kata-cli 0.8.0__tar.gz → 0.9.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.claude/skills/cicd/SKILL.md +1 -1
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.claude/skills/cicd/scripts/portability-lint.sh +2 -2
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.claude/skills/code-lookup/SKILL.md +1 -1
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.claude/skills/code-lookup/scripts/classify.sh +1 -1
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.claude/skills/code-lookup/scripts/grep.sh +1 -1
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.claude/skills/code-lookup/scripts/recent.sh +1 -1
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.claude/skills/eval/SKILL.md +48 -48
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.claude/skills/repo-map/SKILL.md +2 -2
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.claude/skills/repo-map/scripts/connections.sh +1 -1
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.claude/skills/repo-map/scripts/graph.sh +1 -1
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.claude/skills/repo-map/scripts/profile.sh +1 -1
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.github/workflows/publish.yml +5 -5
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.github/workflows/security-checks.yml +2 -2
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.github/workflows/tests.yml +5 -5
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.markdownlint-cli2.yaml +1 -1
- {kata_cli-0.8.0 → kata_cli-0.9.2}/CHANGELOG.md +19 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/CLAUDE.md +32 -18
- kata_cli-0.9.2/PKG-INFO +71 -0
- kata_cli-0.9.2/README.md +53 -0
- {kata_cli-0.8.0/seer → kata_cli-0.9.2/antoine}/__init__.py +8 -7
- kata_cli-0.9.2/antoine/__main__.py +8 -0
- {kata_cli-0.8.0/seer → kata_cli-0.9.2/antoine}/cli/__init__.py +27 -27
- kata_cli-0.9.2/antoine/cli/_commands/__init__.py +1 -0
- {kata_cli-0.8.0/seer → kata_cli-0.9.2/antoine}/cli/_commands/classify.py +4 -4
- {kata_cli-0.8.0/seer → kata_cli-0.9.2/antoine}/cli/_commands/explain.py +7 -7
- {kata_cli-0.8.0/seer → kata_cli-0.9.2/antoine}/cli/_commands/grep.py +3 -3
- {kata_cli-0.8.0/seer → kata_cli-0.9.2/antoine}/cli/_commands/learn.py +8 -8
- {kata_cli-0.8.0/seer → kata_cli-0.9.2/antoine}/cli/_commands/recent.py +3 -3
- {kata_cli-0.8.0/seer → kata_cli-0.9.2/antoine}/cli/_commands/whoami.py +7 -7
- {kata_cli-0.8.0/seer → kata_cli-0.9.2/antoine}/cli/_errors.py +7 -7
- {kata_cli-0.8.0/seer → kata_cli-0.9.2/antoine}/cli/_output.py +4 -4
- kata_cli-0.9.2/antoine/lookup/__init__.py +25 -0
- {kata_cli-0.8.0/seer → kata_cli-0.9.2/antoine}/lookup/ast_scope.py +1 -1
- {kata_cli-0.8.0/seer → kata_cli-0.9.2/antoine}/lookup/classify.py +9 -9
- {kata_cli-0.8.0/seer → kata_cli-0.9.2/antoine}/lookup/grep_context.py +11 -11
- {kata_cli-0.8.0/seer → kata_cli-0.9.2/antoine}/lookup/recent_outline.py +16 -16
- {kata_cli-0.8.0/seer → kata_cli-0.9.2/antoine}/lookup/render.py +1 -1
- kata_cli-0.9.2/antoine/repo/__init__.py +9 -0
- {kata_cli-0.8.0/seer → kata_cli-0.9.2/antoine}/repo/__main__.py +22 -22
- {kata_cli-0.8.0/seer → kata_cli-0.9.2/antoine}/repo/connections.py +9 -9
- {kata_cli-0.8.0/seer → kata_cli-0.9.2/antoine}/repo/errors.py +17 -17
- {kata_cli-0.8.0/seer → kata_cli-0.9.2/antoine}/repo/graph.py +8 -8
- {kata_cli-0.8.0/seer → kata_cli-0.9.2/antoine}/repo/manifest.py +2 -2
- {kata_cli-0.8.0/seer → kata_cli-0.9.2/antoine}/repo/profile.py +2 -2
- {kata_cli-0.8.0/seer → kata_cli-0.9.2/antoine}/repo/render.py +7 -7
- {kata_cli-0.8.0 → kata_cli-0.9.2}/culture.yaml +1 -1
- kata_cli-0.9.2/docs/skill-sources.md +29 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/experiments/scripts_eval/README.md +2 -2
- {kata_cli-0.8.0 → kata_cli-0.9.2}/experiments/scripts_eval/RUNBOOK.md +13 -13
- {kata_cli-0.8.0 → kata_cli-0.9.2}/experiments/scripts_eval/_io.py +6 -6
- {kata_cli-0.8.0 → kata_cli-0.9.2}/experiments/scripts_eval/hooks/pre_tool.py +3 -3
- {kata_cli-0.8.0 → kata_cli-0.9.2}/experiments/scripts_eval/report.py +7 -7
- {kata_cli-0.8.0 → kata_cli-0.9.2}/experiments/scripts_eval/switch-arm.sh +9 -9
- {kata_cli-0.8.0 → kata_cli-0.9.2}/pyproject.toml +10 -10
- {kata_cli-0.8.0 → kata_cli-0.9.2}/sonar-project.properties +2 -2
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/scripts_eval/test_hooks_post_tool.py +8 -8
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/scripts_eval/test_hooks_pre_tool.py +11 -11
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/scripts_eval/test_io.py +6 -6
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/test_ast_scope.py +2 -2
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/test_classify.py +9 -9
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/test_classify_render.py +2 -2
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/test_cli_chassis.py +5 -5
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/test_cli_errors.py +10 -10
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/test_cli_output.py +8 -8
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/test_cli_stubs.py +8 -8
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/test_grep_cmd.py +2 -2
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/test_grep_context.py +7 -7
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/test_package.py +12 -12
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/test_recent_cmd.py +2 -2
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/test_recent_outline.py +12 -12
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/test_repo_cli.py +3 -3
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/test_repo_config.py +2 -2
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/test_repo_connections.py +5 -5
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/test_repo_detect.py +2 -2
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/test_repo_errors.py +4 -4
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/test_repo_graph.py +5 -5
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/test_repo_manifest.py +5 -5
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/test_repo_profile.py +4 -4
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/test_repo_render.py +6 -6
- {kata_cli-0.8.0 → kata_cli-0.9.2}/uv.lock +51 -51
- kata_cli-0.8.0/PKG-INFO +0 -36
- kata_cli-0.8.0/README.md +0 -18
- kata_cli-0.8.0/docs/skill-sources.md +0 -29
- kata_cli-0.8.0/seer/__main__.py +0 -8
- kata_cli-0.8.0/seer/cli/_commands/__init__.py +0 -1
- kata_cli-0.8.0/seer/lookup/__init__.py +0 -25
- kata_cli-0.8.0/seer/repo/__init__.py +0 -9
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.claude/settings.json +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.claude/skills/cicd/scripts/_resolve-nick.sh +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.claude/skills/cicd/scripts/pr-reply.sh +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.claude/skills/cicd/scripts/pr-status.sh +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.claude/skills/cicd/scripts/workflow.sh +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.claude/skills/communicate/SKILL.md +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.claude/skills/communicate/scripts/fetch-issues.sh +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.claude/skills/communicate/scripts/mesh-message.sh +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.claude/skills/communicate/scripts/post-comment.sh +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.claude/skills/communicate/scripts/post-issue.sh +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.claude/skills/communicate/scripts/templates/skill-update-brief.md +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.claude/skills/run-tests/SKILL.md +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.claude/skills/run-tests/scripts/test.sh +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.claude/skills/sonarclaude/SKILL.md +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.claude/skills/sonarclaude/scripts/sonar.sh +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.claude/skills/version-bump/SKILL.md +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.claude/skills/version-bump/scripts/bump.py +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.claude/skills.local.yaml.example +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.flake8 +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.gitignore +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/.pre-commit-config.yaml +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/LICENSE +0 -0
- {kata_cli-0.8.0/seer → kata_cli-0.9.2/antoine}/repo/config.py +0 -0
- {kata_cli-0.8.0/seer → kata_cli-0.9.2/antoine}/repo/detect.py +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/docs/eval-rounds/2026-05-15-round-01.md +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/docs/eval-rounds/2026-05-15-smoke-02-examples.md +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/docs/eval-rounds/2026-05-16-round-02.md +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/docs/superpowers/plans/2026-05-15-repo-map.md +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/docs/superpowers/plans/2026-05-15-scripts-eval-harness.md +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/docs/superpowers/plans/2026-05-16-seer-classify.md +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/docs/superpowers/specs/2026-05-15-repo-map-design.md +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/docs/superpowers/specs/2026-05-15-scripts-eval-harness-design.md +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/docs/superpowers/specs/2026-05-16-seer-classify-design.md +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/experiments/__init__.py +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/experiments/scripts_eval/__init__.py +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/experiments/scripts_eval/backfill.py +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/experiments/scripts_eval/corpus.py +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/experiments/scripts_eval/corpus.yaml +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/experiments/scripts_eval/hooks/__init__.py +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/experiments/scripts_eval/hooks/post_tool.py +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/experiments/scripts_eval/judge.py +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/experiments/scripts_eval/judge_rubric.md +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/experiments/scripts_eval/manifest.py +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/experiments/scripts_eval/results/.gitkeep +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/experiments/scripts_eval/summarize.py +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/experiments/scripts_eval/trial.py +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/experiments/scripts_eval/validate.py +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/__init__.py +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/scripts_eval/__init__.py +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/scripts_eval/fixtures/.gitkeep +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/scripts_eval/fixtures/corpus_minimal.yaml +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/scripts_eval/fixtures/sidechain_min.jsonl +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/scripts_eval/test_backfill.py +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/scripts_eval/test_corpus.py +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/scripts_eval/test_judge.py +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/scripts_eval/test_manifest.py +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/scripts_eval/test_report.py +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/scripts_eval/test_summarize.py +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/scripts_eval/test_trial.py +0 -0
- {kata_cli-0.8.0 → kata_cli-0.9.2}/tests/scripts_eval/test_validate.py +0 -0
|
@@ -9,7 +9,7 @@ description: >
|
|
|
9
9
|
review feedback, polling CI status, or the user says "create PR",
|
|
10
10
|
"review comments", "address feedback", "resolve threads". Renamed
|
|
11
11
|
from `pr-review` in steward 0.7.0; rebased on agex in 0.12.0.
|
|
12
|
-
|
|
12
|
+
antoine divergence: `scripts/portability-lint.sh` drops the GNU-only
|
|
13
13
|
`xargs -r` flag for BSD/macOS portability — see `docs/skill-sources.md`.
|
|
14
14
|
---
|
|
15
15
|
|
|
@@ -21,7 +21,7 @@ esac
|
|
|
21
21
|
[ -z "$files" ] && { echo "(no files to check)"; exit 0; }
|
|
22
22
|
|
|
23
23
|
# ----- Check 1: hard-coded /home/<user>/... paths -----
|
|
24
|
-
#
|
|
24
|
+
# antoine divergence: `xargs -r` is GNU-only and fails on BSD/macOS xargs.
|
|
25
25
|
# `$files` is already guarded non-empty above, so `-r` is redundant — dropped.
|
|
26
26
|
hits1=$(echo "$files" | xargs grep -nE '/home/[a-z][a-z0-9_-]+/' 2>/dev/null || true)
|
|
27
27
|
|
|
@@ -31,7 +31,7 @@ hits1=$(echo "$files" | xargs grep -nE '/home/[a-z][a-z0-9_-]+/' 2>/dev/null ||
|
|
|
31
31
|
# - ~/.culture/ Culture mesh data this skill is supposed to read
|
|
32
32
|
md_yaml=$(echo "$files" | grep -E '\.(md|ya?ml|toml|json|jsonc)$' || true)
|
|
33
33
|
if [ -n "$md_yaml" ]; then
|
|
34
|
-
#
|
|
34
|
+
# antoine divergence: `xargs -r` is GNU-only; `$md_yaml` is guarded
|
|
35
35
|
# non-empty by the enclosing `if`, so `-r` is redundant — dropped.
|
|
36
36
|
hits2=$(echo "$md_yaml" | xargs grep -nE '~/\.[A-Za-z]' 2>/dev/null \
|
|
37
37
|
| grep -vE '~/\.claude/skills/[^[:space:]"]+/scripts/' \
|
|
@@ -96,5 +96,5 @@ One call each, no re-grepping.
|
|
|
96
96
|
|
|
97
97
|
## Engine
|
|
98
98
|
|
|
99
|
-
`
|
|
99
|
+
`antoine/lookup/` — `python -m antoine <verb> …`. Each shell wrapper is a
|
|
100
100
|
one-liner; the agent-facing contract is the verb and its flags.
|
|
@@ -7,8 +7,8 @@ description: >
|
|
|
7
7
|
dispatches + records, then `summarize` + commit to the round's
|
|
8
8
|
accumulator file. Use when the user says "run eval set", "eval",
|
|
9
9
|
"scripts-eval", "round-NN set", or asks to execute a row of the corpus.
|
|
10
|
-
Three arms: A (banned — rider forbids the
|
|
11
|
-
— rider instructs use of
|
|
10
|
+
Three arms: A (banned — rider forbids the antoine skills), B (directed
|
|
11
|
+
— rider instructs use of antoine skills), C (organic — rider permits
|
|
12
12
|
but doesn't direct). Two judge pairs: A-vs-B ("do the skills help
|
|
13
13
|
when used") and A-vs-C ("do the skills get adopted organically").
|
|
14
14
|
`judge prepare --pair AB|AC` selects the pair.
|
|
@@ -28,17 +28,17 @@ operator procedure that sequences them per session.
|
|
|
28
28
|
Before doing anything, verify the user's intent matches the session
|
|
29
29
|
state. Stop and ask if any of these hold:
|
|
30
30
|
|
|
31
|
-
- `env | grep
|
|
31
|
+
- `env | grep ANTOINE_EVAL_RUN_ID` is empty → the harness hooks no-op, no
|
|
32
32
|
metrics get captured. Operator needs to re-launch with the env vars
|
|
33
33
|
exported.
|
|
34
|
-
- `
|
|
34
|
+
- `ANTOINE_EVAL_ARM` is set to anything other than `A`, `B`, or `C` → bad config.
|
|
35
35
|
- User says "do arm C" but the matching arm-A cells don't exist on
|
|
36
|
-
disk under `experiments/scripts_eval/results/$
|
|
36
|
+
disk under `experiments/scripts_eval/results/$ANTOINE_EVAL_RUN_ID/arm-A/`
|
|
37
37
|
→ arm A must complete first; there's nothing to pair against.
|
|
38
38
|
|
|
39
39
|
All three arms run with `repo-map` and `code-lookup` enabled on disk.
|
|
40
40
|
Arm-A's constraint is **verbal** — the rider in the dispatched prompt
|
|
41
|
-
is the sole guard against the subagent using the
|
|
41
|
+
is the sole guard against the subagent using the antoine skills. Do not
|
|
42
42
|
edit the rider; copy it verbatim. (Earlier versions of this skill
|
|
43
43
|
physically moved `.claude/skills/repo-map/` aside for arm A as
|
|
44
44
|
defense-in-depth; that step was dropped because the rider proved
|
|
@@ -46,13 +46,13 @@ sufficient and the move-aside dance made operator setup brittle.)
|
|
|
46
46
|
|
|
47
47
|
Three arms, three questions they answer:
|
|
48
48
|
|
|
49
|
-
- **A (banned)** — verbal rider forbids both
|
|
49
|
+
- **A (banned)** — verbal rider forbids both antoine skills. Establishes
|
|
50
50
|
the "without the new skills" baseline.
|
|
51
51
|
- **B (directed)** — verbal rider instructs the subagent to use the
|
|
52
|
-
|
|
52
|
+
antoine skills where applicable. Establishes the "with the new skills,
|
|
53
53
|
when actually used" upper bound.
|
|
54
54
|
- **C (organic)** — verbal rider permits but does not direct use of
|
|
55
|
-
the
|
|
55
|
+
the antoine skills. Measures organic adoption rate.
|
|
56
56
|
|
|
57
57
|
A-vs-B is the primary "do the skills help?" comparison; A-vs-C is the
|
|
58
58
|
adoption canary. The judge supports both pairs via the `--pair` flag.
|
|
@@ -60,7 +60,7 @@ adoption canary. The judge supports both pairs via the `--pair` flag.
|
|
|
60
60
|
## Preflight (every session)
|
|
61
61
|
|
|
62
62
|
```bash
|
|
63
|
-
env | grep -E "^
|
|
63
|
+
env | grep -E "^ANTOINE_EVAL_(RUN_ID|ARM)="
|
|
64
64
|
# expect both set to the intended round / arm
|
|
65
65
|
```
|
|
66
66
|
|
|
@@ -68,11 +68,11 @@ If unset, export them in your shell before launching `claude`:
|
|
|
68
68
|
|
|
69
69
|
```bash
|
|
70
70
|
# arm-A session (banned):
|
|
71
|
-
export
|
|
71
|
+
export ANTOINE_EVAL_RUN_ID=2026-05-NN-round-XX ANTOINE_EVAL_ARM=A
|
|
72
72
|
# arm-B session (directed):
|
|
73
|
-
export
|
|
73
|
+
export ANTOINE_EVAL_RUN_ID=2026-05-NN-round-XX ANTOINE_EVAL_ARM=B
|
|
74
74
|
# arm-C session (organic):
|
|
75
|
-
export
|
|
75
|
+
export ANTOINE_EVAL_RUN_ID=2026-05-NN-round-XX ANTOINE_EVAL_ARM=C
|
|
76
76
|
```
|
|
77
77
|
|
|
78
78
|
`experiments/scripts_eval/switch-arm.sh A|B|C <run_id>` does the same
|
|
@@ -82,7 +82,7 @@ If this is the first set of the run (idempotent, safe to re-run):
|
|
|
82
82
|
|
|
83
83
|
```bash
|
|
84
84
|
uv run --group experiments python -m experiments.scripts_eval.manifest \
|
|
85
|
-
init --run $
|
|
85
|
+
init --run $ANTOINE_EVAL_RUN_ID
|
|
86
86
|
```
|
|
87
87
|
|
|
88
88
|
## Arm-A procedure
|
|
@@ -99,11 +99,11 @@ uv run --group experiments python -m experiments.scripts_eval.manifest \
|
|
|
99
99
|
```text
|
|
100
100
|
|
|
101
101
|
Constraints (verbatim):
|
|
102
|
-
- You may NOT use the `repo-map` skill, `python -m
|
|
103
|
-
the `
|
|
102
|
+
- You may NOT use the `repo-map` skill, `python -m antoine.repo`,
|
|
103
|
+
the `antoine.repo` Python module, or any `scripts/*.sh` paths under
|
|
104
104
|
`.claude/skills/repo-map/`.
|
|
105
|
-
- You may NOT use the `code-lookup` skill, the `
|
|
106
|
-
Python module, the `
|
|
105
|
+
- You may NOT use the `code-lookup` skill, the `antoine.lookup`
|
|
106
|
+
Python module, the `antoine grep` / `antoine recent` / `antoine classify`
|
|
107
107
|
CLI verbs, or any `scripts/*.sh` paths under
|
|
108
108
|
`.claude/skills/code-lookup/`.
|
|
109
109
|
If you cannot answer without them, say so explicitly and stop.
|
|
@@ -121,7 +121,7 @@ uv run --group experiments python -m experiments.scripts_eval.manifest \
|
|
|
121
121
|
|
|
122
122
|
```bash
|
|
123
123
|
TRIAL_ID=$(uv run --group experiments python -m experiments.scripts_eval.trial \
|
|
124
|
-
start --run $
|
|
124
|
+
start --run $ANTOINE_EVAL_RUN_ID --arm $ANTOINE_EVAL_ARM \
|
|
125
125
|
--target <target> --question <question_id> --trial <n>)
|
|
126
126
|
```
|
|
127
127
|
|
|
@@ -140,17 +140,17 @@ uv run --group experiments python -m experiments.scripts_eval.manifest \
|
|
|
140
140
|
```
|
|
141
141
|
|
|
142
142
|
6. Confirm the cell JSON appeared under
|
|
143
|
-
`experiments/scripts_eval/results/$
|
|
143
|
+
`experiments/scripts_eval/results/$ANTOINE_EVAL_RUN_ID/arm-A/`.
|
|
144
144
|
|
|
145
145
|
**After all 3 trials**, summarize + commit:
|
|
146
146
|
|
|
147
147
|
```bash
|
|
148
148
|
uv run --group experiments python -m experiments.scripts_eval.summarize \
|
|
149
|
-
--run $
|
|
150
|
-
--out docs/eval-rounds/$
|
|
149
|
+
--run $ANTOINE_EVAL_RUN_ID \
|
|
150
|
+
--out docs/eval-rounds/$ANTOINE_EVAL_RUN_ID.md
|
|
151
151
|
|
|
152
|
-
git add docs/eval-rounds/$
|
|
153
|
-
git commit -m "$
|
|
152
|
+
git add docs/eval-rounds/$ANTOINE_EVAL_RUN_ID.md
|
|
153
|
+
git commit -m "$ANTOINE_EVAL_RUN_ID: arm-A captured for <target>/<question_id> (3 trials)"
|
|
154
154
|
```
|
|
155
155
|
|
|
156
156
|
Report back: cell count under arm-A/, what's pending for arm-B and
|
|
@@ -161,7 +161,7 @@ accumulator file.
|
|
|
161
161
|
|
|
162
162
|
Arm-B captures the **directed** trials so the A-vs-B judge run can
|
|
163
163
|
assess "do the skills help when actually used?". Capture happens in
|
|
164
|
-
its own session (`
|
|
164
|
+
its own session (`ANTOINE_EVAL_ARM=B`); the A-vs-B judges then run in
|
|
165
165
|
the arm-C session's Judge phase, alongside the A-vs-C judges
|
|
166
166
|
(`judge prepare --pair AB`).
|
|
167
167
|
|
|
@@ -173,12 +173,12 @@ the arm-C session's Judge phase, alongside the A-vs-C judges
|
|
|
173
173
|
```text
|
|
174
174
|
|
|
175
175
|
Constraints (verbatim):
|
|
176
|
-
- For this question, you MUST use the
|
|
176
|
+
- For this question, you MUST use the antoine skills where they
|
|
177
177
|
apply:
|
|
178
178
|
* `repo-map` (`scripts/profile.sh`, `scripts/connections.sh`,
|
|
179
179
|
`scripts/graph.sh` under `.claude/skills/repo-map/`) for
|
|
180
180
|
repo overview, dependencies, and workspace shape.
|
|
181
|
-
* `code-lookup` (`
|
|
181
|
+
* `code-lookup` (`antoine grep`, `antoine recent`, `antoine classify`,
|
|
182
182
|
or the equivalent scripts under
|
|
183
183
|
`.claude/skills/code-lookup/`) for symbol references,
|
|
184
184
|
recent commit-symbol diffs, and project-kind classification.
|
|
@@ -196,7 +196,7 @@ the arm-C session's Judge phase, alongside the A-vs-C judges
|
|
|
196
196
|
|
|
197
197
|
```bash
|
|
198
198
|
TRIAL_ID=$(uv run --group experiments python -m experiments.scripts_eval.trial \
|
|
199
|
-
start --run $
|
|
199
|
+
start --run $ANTOINE_EVAL_RUN_ID --arm $ANTOINE_EVAL_ARM \
|
|
200
200
|
--target <target> --question <question_id> --trial <n>)
|
|
201
201
|
# dispatch one Explore subagent with the rendered prompt above
|
|
202
202
|
uv run --group experiments python -m experiments.scripts_eval.trial \
|
|
@@ -204,17 +204,17 @@ the arm-C session's Judge phase, alongside the A-vs-C judges
|
|
|
204
204
|
```
|
|
205
205
|
|
|
206
206
|
3. Confirm the cell JSON appeared under
|
|
207
|
-
`experiments/scripts_eval/results/$
|
|
207
|
+
`experiments/scripts_eval/results/$ANTOINE_EVAL_RUN_ID/arm-B/`.
|
|
208
208
|
|
|
209
209
|
**After all 3 trials**, summarize + commit:
|
|
210
210
|
|
|
211
211
|
```bash
|
|
212
212
|
uv run --group experiments python -m experiments.scripts_eval.summarize \
|
|
213
|
-
--run $
|
|
214
|
-
--out docs/eval-rounds/$
|
|
213
|
+
--run $ANTOINE_EVAL_RUN_ID \
|
|
214
|
+
--out docs/eval-rounds/$ANTOINE_EVAL_RUN_ID.md
|
|
215
215
|
|
|
216
|
-
git add docs/eval-rounds/$
|
|
217
|
-
git commit -m "$
|
|
216
|
+
git add docs/eval-rounds/$ANTOINE_EVAL_RUN_ID.md
|
|
217
|
+
git commit -m "$ANTOINE_EVAL_RUN_ID: arm-B captured for <target>/<question_id> (3 trials)"
|
|
218
218
|
```
|
|
219
219
|
|
|
220
220
|
Report back: cell count under arm-B/, whether the subagent actually
|
|
@@ -227,7 +227,7 @@ pending set per the run-state table.
|
|
|
227
227
|
**Precondition check (mandatory):**
|
|
228
228
|
|
|
229
229
|
```bash
|
|
230
|
-
ls experiments/scripts_eval/results/$
|
|
230
|
+
ls experiments/scripts_eval/results/$ANTOINE_EVAL_RUN_ID/arm-A/<target>-<question_id>-t*.json
|
|
231
231
|
# expect: 3 files (t1, t2, t3)
|
|
232
232
|
```
|
|
233
233
|
|
|
@@ -246,7 +246,7 @@ If fewer than 3, stop — arm A must complete first.
|
|
|
246
246
|
- You may use the `repo-map` skill (and its scripts under
|
|
247
247
|
`.claude/skills/repo-map/`) and the `code-lookup` skill (and its
|
|
248
248
|
scripts under `.claude/skills/code-lookup/`) at your discretion.
|
|
249
|
-
This includes `
|
|
249
|
+
This includes `antoine grep` / `antoine recent` / `antoine classify`.
|
|
250
250
|
- After answering, append two sections and stop:
|
|
251
251
|
### tools_used
|
|
252
252
|
- <ToolName>: <count>
|
|
@@ -258,7 +258,7 @@ If fewer than 3, stop — arm A must complete first.
|
|
|
258
258
|
|
|
259
259
|
```bash
|
|
260
260
|
TRIAL_ID=$(uv run --group experiments python -m experiments.scripts_eval.trial \
|
|
261
|
-
start --run $
|
|
261
|
+
start --run $ANTOINE_EVAL_RUN_ID --arm $ANTOINE_EVAL_ARM \
|
|
262
262
|
--target <target> --question <question_id> --trial <n>)
|
|
263
263
|
# dispatch one Explore subagent with the rendered prompt above
|
|
264
264
|
uv run --group experiments python -m experiments.scripts_eval.trial \
|
|
@@ -286,7 +286,7 @@ cells exist) and then the A-vs-B judge (if arm-B cells exist):
|
|
|
286
286
|
|
|
287
287
|
```bash
|
|
288
288
|
uv run --group experiments python -m experiments.scripts_eval.judge \
|
|
289
|
-
prepare --run $
|
|
289
|
+
prepare --run $ANTOINE_EVAL_RUN_ID \
|
|
290
290
|
--pair AC \
|
|
291
291
|
--pair-key <target>/<question_id>/<n> \
|
|
292
292
|
--seed 0 > /tmp/judge-AC-<n>.json
|
|
@@ -317,7 +317,7 @@ cells exist) and then the A-vs-B judge (if arm-B cells exist):
|
|
|
317
317
|
A_LABEL=$(jq -r .blind_label_for_A /tmp/judge-AC-<n>.json)
|
|
318
318
|
C_LABEL=$(jq -r .blind_label_for_C /tmp/judge-AC-<n>.json)
|
|
319
319
|
uv run --group experiments python -m experiments.scripts_eval.judge \
|
|
320
|
-
record --run $
|
|
320
|
+
record --run $ANTOINE_EVAL_RUN_ID \
|
|
321
321
|
--pair AC \
|
|
322
322
|
--pair-key <target>/<question_id>/<n> \
|
|
323
323
|
--blind-label-for-a "$A_LABEL" \
|
|
@@ -332,7 +332,7 @@ cells exist) and then the A-vs-B judge (if arm-B cells exist):
|
|
|
332
332
|
|
|
333
333
|
```bash
|
|
334
334
|
uv run --group experiments python -m experiments.scripts_eval.judge \
|
|
335
|
-
prepare --run $
|
|
335
|
+
prepare --run $ANTOINE_EVAL_RUN_ID \
|
|
336
336
|
--pair AB \
|
|
337
337
|
--pair-key <target>/<question_id>/<n> \
|
|
338
338
|
--seed 0 > /tmp/judge-AB-<n>.json
|
|
@@ -342,7 +342,7 @@ cells exist) and then the A-vs-B judge (if arm-B cells exist):
|
|
|
342
342
|
A_LABEL=$(jq -r .blind_label_for_A /tmp/judge-AB-<n>.json)
|
|
343
343
|
B_LABEL=$(jq -r .blind_label_for_B /tmp/judge-AB-<n>.json)
|
|
344
344
|
uv run --group experiments python -m experiments.scripts_eval.judge \
|
|
345
|
-
record --run $
|
|
345
|
+
record --run $ANTOINE_EVAL_RUN_ID \
|
|
346
346
|
--pair AB \
|
|
347
347
|
--pair-key <target>/<question_id>/<n> \
|
|
348
348
|
--blind-label-for-a "$A_LABEL" \
|
|
@@ -363,19 +363,19 @@ land under `cell["judges"]["AB"]` only.
|
|
|
363
363
|
|
|
364
364
|
```bash
|
|
365
365
|
uv run --group experiments python -m experiments.scripts_eval.validate \
|
|
366
|
-
--run $
|
|
366
|
+
--run $ANTOINE_EVAL_RUN_ID
|
|
367
367
|
|
|
368
368
|
uv run --group experiments python -m experiments.scripts_eval.summarize \
|
|
369
|
-
--run $
|
|
370
|
-
--out docs/eval-rounds/$
|
|
369
|
+
--run $ANTOINE_EVAL_RUN_ID \
|
|
370
|
+
--out docs/eval-rounds/$ANTOINE_EVAL_RUN_ID.md
|
|
371
371
|
|
|
372
|
-
git add docs/eval-rounds/$
|
|
373
|
-
git commit -m "$
|
|
372
|
+
git add docs/eval-rounds/$ANTOINE_EVAL_RUN_ID.md
|
|
373
|
+
git commit -m "$ANTOINE_EVAL_RUN_ID: completed <target>/<question_id> (both arms + judge)"
|
|
374
374
|
```
|
|
375
375
|
|
|
376
376
|
Report back:
|
|
377
377
|
- A-vs-B winners (A / B / tie) and A-vs-C winners (A / C / tie).
|
|
378
|
-
- Whether arm B and arm C actually used the
|
|
378
|
+
- Whether arm B and arm C actually used the antoine scripts (look at the
|
|
379
379
|
`### tools_used` of each cell — `B_did_not_use_scripts` and
|
|
380
380
|
`C_did_not_use_scripts` are findings, not bugs).
|
|
381
381
|
- The next pending set per the run-state table.
|
|
@@ -383,7 +383,7 @@ Report back:
|
|
|
383
383
|
## Reading the run state
|
|
384
384
|
|
|
385
385
|
The committed run-state table and per-set verdicts live in
|
|
386
|
-
`docs/eval-rounds/$
|
|
386
|
+
`docs/eval-rounds/$ANTOINE_EVAL_RUN_ID.md`, between the
|
|
387
387
|
`<!-- runstate:start -->` / `<!-- runstate:end -->` and
|
|
388
388
|
`<!-- evidence:start -->` / `<!-- evidence:end -->` markers. `summarize.py`
|
|
389
389
|
rewrites those regions idempotently — do not hand-edit them.
|
|
@@ -394,6 +394,6 @@ are needed; `judged` below the arm counts means judges still owe verdicts.
|
|
|
394
394
|
|
|
395
395
|
## Cite-don't-import
|
|
396
396
|
|
|
397
|
-
This skill is original to
|
|
397
|
+
This skill is original to antoine (the harness only exists here). When
|
|
398
398
|
promoted upstream, it would re-vendor into steward's skill suppliers —
|
|
399
399
|
update `docs/skill-sources.md` accordingly at that point.
|
|
@@ -97,8 +97,8 @@ Flags always override config.
|
|
|
97
97
|
|
|
98
98
|
## Engine
|
|
99
99
|
|
|
100
|
-
The actual logic lives in `
|
|
101
|
-
`uv run python -m
|
|
100
|
+
The actual logic lives in `antoine/repo/` and is invoked via
|
|
101
|
+
`uv run python -m antoine.repo <verb>`. The shell scripts are one-line wrappers; the
|
|
102
102
|
agent-facing contract is the verbs and their flags, not the wrappers.
|
|
103
103
|
|
|
104
104
|
> **Interpreter note:** the scripts use `uv run --directory <project-root>`
|
|
@@ -5,12 +5,12 @@ on:
|
|
|
5
5
|
branches: [main]
|
|
6
6
|
paths:
|
|
7
7
|
- "pyproject.toml"
|
|
8
|
-
- "
|
|
8
|
+
- "antoine/**"
|
|
9
9
|
pull_request:
|
|
10
10
|
branches: [main]
|
|
11
11
|
paths:
|
|
12
12
|
- "pyproject.toml"
|
|
13
|
-
- "
|
|
13
|
+
- "antoine/**"
|
|
14
14
|
|
|
15
15
|
jobs:
|
|
16
16
|
test:
|
|
@@ -57,7 +57,7 @@ jobs:
|
|
|
57
57
|
- name: Build and publish each distribution to TestPyPI
|
|
58
58
|
run: |
|
|
59
59
|
set -euo pipefail
|
|
60
|
-
for pkg in
|
|
60
|
+
for pkg in antoine-cli kata-cli code-lens-cli; do
|
|
61
61
|
echo "::group::TestPyPI publish $pkg"
|
|
62
62
|
# Run the per-package steps in a subshell so set -e failures
|
|
63
63
|
# don't skip the ::endgroup:: marker — keeps Actions logs
|
|
@@ -81,7 +81,7 @@ jobs:
|
|
|
81
81
|
- name: Print install commands
|
|
82
82
|
if: always()
|
|
83
83
|
run: |
|
|
84
|
-
for pkg in
|
|
84
|
+
for pkg in antoine-cli kata-cli code-lens-cli; do
|
|
85
85
|
echo "::notice::Test with: uv tool install --index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match $pkg==${DEV_VERSION}"
|
|
86
86
|
done
|
|
87
87
|
|
|
@@ -105,7 +105,7 @@ jobs:
|
|
|
105
105
|
- name: Build and publish each distribution
|
|
106
106
|
run: |
|
|
107
107
|
set -euo pipefail
|
|
108
|
-
for pkg in
|
|
108
|
+
for pkg in antoine-cli kata-cli code-lens-cli; do
|
|
109
109
|
echo "::group::Publishing $pkg"
|
|
110
110
|
# Run the per-package steps in a subshell so set -e failures
|
|
111
111
|
# don't skip the ::endgroup:: marker — keeps Actions logs
|
|
@@ -25,11 +25,11 @@ jobs:
|
|
|
25
25
|
- run: uv sync
|
|
26
26
|
|
|
27
27
|
- name: Run Bandit
|
|
28
|
-
run: uv run bandit -r
|
|
28
|
+
run: uv run bandit -r antoine/ -f json -o bandit-results.json -c pyproject.toml
|
|
29
29
|
continue-on-error: true
|
|
30
30
|
|
|
31
31
|
- name: Run Pylint
|
|
32
|
-
run: uv run pylint
|
|
32
|
+
run: uv run pylint antoine/ --output-format=json:pylint-results.json,text
|
|
33
33
|
continue-on-error: true
|
|
34
34
|
|
|
35
35
|
- name: Upload Security Results
|
|
@@ -30,7 +30,7 @@ jobs:
|
|
|
30
30
|
|
|
31
31
|
- run: uv sync
|
|
32
32
|
|
|
33
|
-
- run: uv run pytest -n auto --cov=
|
|
33
|
+
- run: uv run pytest -n auto --cov=antoine --cov-report=xml:coverage.xml --cov-report=term -v
|
|
34
34
|
|
|
35
35
|
- name: SonarCloud Scan
|
|
36
36
|
if: env.SONAR_TOKEN != ''
|
|
@@ -56,16 +56,16 @@ jobs:
|
|
|
56
56
|
- run: uv sync
|
|
57
57
|
|
|
58
58
|
- name: black --check
|
|
59
|
-
run: uv run black --check
|
|
59
|
+
run: uv run black --check antoine tests
|
|
60
60
|
|
|
61
61
|
- name: isort --check
|
|
62
|
-
run: uv run isort --check-only
|
|
62
|
+
run: uv run isort --check-only antoine tests
|
|
63
63
|
|
|
64
64
|
- name: flake8
|
|
65
|
-
run: uv run flake8 --config=.flake8
|
|
65
|
+
run: uv run flake8 --config=.flake8 antoine/ tests/
|
|
66
66
|
|
|
67
67
|
- name: bandit
|
|
68
|
-
run: uv run bandit -c pyproject.toml -r
|
|
68
|
+
run: uv run bandit -c pyproject.toml -r antoine
|
|
69
69
|
|
|
70
70
|
- name: markdownlint-cli2
|
|
71
71
|
run: |
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# markdownlint-cli2 config for
|
|
1
|
+
# markdownlint-cli2 config for antoine.
|
|
2
2
|
# markdownlint-cli2 stops walking at the git root, so a global
|
|
3
3
|
# markdownlint config in the user's home directory isn't picked up from
|
|
4
4
|
# inside the repo. Keep this file aligned with the global preset.
|
|
@@ -5,6 +5,25 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
Format follows [Keep a Changelog](https://keepachangelog.com/). This project
|
|
6
6
|
adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [0.9.2] - 2026-05-17
|
|
9
|
+
|
|
10
|
+
### Changed
|
|
11
|
+
|
|
12
|
+
- CLAUDE.md + README.md: add Name section explaining antoine = N to 1 (collapse N ad-hoc tool calls into one `kata` / `antoine` verb; `kata-cli` is the PyPI distribution label, not a command name).
|
|
13
|
+
- README.md: expand "What's here" to cover the three things antoine manages — the CLI surface (dual-published as `antoine-cli` / `kata-cli` / `code-lens-cli`), the `experiments/scripts_eval/` A/B harness, and the recorded results in `docs/eval-rounds/`.
|
|
14
|
+
|
|
15
|
+
## [0.9.1] - 2026-05-17
|
|
16
|
+
|
|
17
|
+
### Changed
|
|
18
|
+
|
|
19
|
+
- PyPI distribution renamed from `antoine` to `antoine-cli` to avoid name collision and stay consistent with the `kata-cli` / `code-lens-cli` alt-publish naming convention. The Python module (`antoine`) and console script (`antoine`) are unchanged; only the wheel-distribution name moves. `_resolve_version()` fallback list and `.github/workflows/publish.yml` publish loop updated to match.
|
|
20
|
+
|
|
21
|
+
## [0.9.0] - 2026-05-17
|
|
22
|
+
|
|
23
|
+
### Changed
|
|
24
|
+
|
|
25
|
+
- **Repository rename: `seer-cli` → `antoine`.** GitHub remote moved to `agentculture/antoine`; primary PyPI distribution renamed from `seer-cli` to `antoine`; `kata-cli` and `code-lens-cli` alt-publishes preserved. Python module renamed `seer/` → `antoine/`; primary console script renamed `seer` → `antoine` (the `kata` alias is retained). All imports, error classes (`SeerError` → `AntoineError`, `_SeerArgumentParser` → `_AntoineArgumentParser`), env vars (`SEER_EVAL_*` → `ANTOINE_EVAL_*`), Sonar project key (`agentculture_seer-cli` → `agentculture_antoine`), `culture.yaml` agent suffix, vendored skill bodies, and the scripts-eval harness's banned-pattern detection updated accordingly. Historical `CHANGELOG.md` entries, `docs/eval-rounds/`, and dated `docs/superpowers/{specs,plans}/` files are intentionally left referring to `seer` — those describe past state.
|
|
26
|
+
|
|
8
27
|
## [0.8.0] - 2026-05-16
|
|
9
28
|
|
|
10
29
|
### Added
|
|
@@ -2,9 +2,23 @@
|
|
|
2
2
|
|
|
3
3
|
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
4
|
|
|
5
|
+
## Name
|
|
6
|
+
|
|
7
|
+
**antoine** = **"N to 1"** (an-to-one). The name encodes the goal: collapse
|
|
8
|
+
the N ad-hoc tool calls an agent would otherwise make against a codebase
|
|
9
|
+
(`ls` + `cat` + `grep` + `git log` + `git show` + …) into **one** call to a
|
|
10
|
+
purpose-built `kata` verb (or its `antoine` alias — both console scripts are
|
|
11
|
+
defined in [`pyproject.toml`](./pyproject.toml); `kata-cli` is the PyPI
|
|
12
|
+
**distribution** name, not a command name) that returns the same information
|
|
13
|
+
as structured data. Every verb antoine ships is a bet that some recurring
|
|
14
|
+
N-call pattern
|
|
15
|
+
has a 1-call replacement that is cheaper, more reliable, and easier to
|
|
16
|
+
delegate to a subagent. You are that agent — when you face a task shape
|
|
17
|
+
covered by the dispatching table below, prefer the 1-call verb.
|
|
18
|
+
|
|
5
19
|
## Project Status
|
|
6
20
|
|
|
7
|
-
`
|
|
21
|
+
`antoine` is an AgentCulture sibling repo — **codebase lookup and indexing for
|
|
8
22
|
agent skills**. The onboarding scaffold is in place (package, CLI chassis, CI,
|
|
9
23
|
vendored skills); the actual lookup and indexing engine — how codebases are
|
|
10
24
|
scanned, the index format, where it is stored, and how agent skills consume the
|
|
@@ -20,8 +34,8 @@ uv sync # install the package + dev dependencies
|
|
|
20
34
|
## Run
|
|
21
35
|
|
|
22
36
|
```bash
|
|
23
|
-
uv run
|
|
24
|
-
uv run
|
|
37
|
+
uv run antoine --version # or: uv run python -m antoine
|
|
38
|
+
uv run antoine learn # placeholder verbs: learn / explain / whoami
|
|
25
39
|
```
|
|
26
40
|
|
|
27
41
|
## Test
|
|
@@ -34,9 +48,9 @@ uv run pytest tests/test_cli_chassis.py::test_no_args_prints_help_and_returns_ze
|
|
|
34
48
|
## Lint / Format
|
|
35
49
|
|
|
36
50
|
```bash
|
|
37
|
-
uv run flake8 --config=.flake8
|
|
38
|
-
uv run black
|
|
39
|
-
uv run isort
|
|
51
|
+
uv run flake8 --config=.flake8 antoine/ tests/
|
|
52
|
+
uv run black antoine/ tests/
|
|
53
|
+
uv run isort antoine/ tests/
|
|
40
54
|
markdownlint-cli2 "**/*.md"
|
|
41
55
|
```
|
|
42
56
|
|
|
@@ -44,14 +58,14 @@ Bandit and pylint run in CI (`.github/workflows/security-checks.yml`).
|
|
|
44
58
|
|
|
45
59
|
## Architecture
|
|
46
60
|
|
|
47
|
-
- `
|
|
48
|
-
routing (`
|
|
49
|
-
`_dispatch` (invokes the verb handler, translating `
|
|
61
|
+
- `antoine/cli/__init__.py` — the argparse CLI chassis: structured error
|
|
62
|
+
routing (`_AntoineArgumentParser`), `--json` hint detection, and
|
|
63
|
+
`_dispatch` (invokes the verb handler, translating `AntoineError` and bare
|
|
50
64
|
exceptions to structured exit codes). `main()` is the entry point, exposed
|
|
51
|
-
as the `
|
|
52
|
-
- `
|
|
53
|
-
- `
|
|
54
|
-
- `
|
|
65
|
+
as the `antoine` console script and via `python -m antoine`.
|
|
66
|
+
- `antoine/cli/_errors.py` — `AntoineError` and the exit-code policy.
|
|
67
|
+
- `antoine/cli/_output.py` — strict stdout/stderr split helpers.
|
|
68
|
+
- `antoine/cli/_commands/` — one module per verb, each exposing `register()`.
|
|
55
69
|
All three verbs are currently greenfield stubs.
|
|
56
70
|
|
|
57
71
|
## Version Management
|
|
@@ -70,9 +84,9 @@ from `../steward/.claude/skills/<name>/`.
|
|
|
70
84
|
|
|
71
85
|
`experiments/scripts_eval/` is the round-1 A/B-test harness for the
|
|
72
86
|
`repo-map` skill. The three Claude Code hooks wired in
|
|
73
|
-
`.claude/settings.json` are **env-var-gated** (`
|
|
74
|
-
`
|
|
75
|
-
day-to-day
|
|
87
|
+
`.claude/settings.json` are **env-var-gated** (`ANTOINE_EVAL_RUN_ID`,
|
|
88
|
+
`ANTOINE_EVAL_ARM`) and no-op outside an active eval session, so
|
|
89
|
+
day-to-day antoine work is unaffected.
|
|
76
90
|
|
|
77
91
|
When asked to run an eval round, work the harness, or interpret its
|
|
78
92
|
results, read these in order:
|
|
@@ -124,7 +138,7 @@ not subagents you dispatch):
|
|
|
124
138
|
of the PR #18 organic-adoption smokes showed that **subagents construct
|
|
125
139
|
their plan from the prompt body before consulting the skills catalog** —
|
|
126
140
|
so a description-shape change on the skill itself does not move adoption
|
|
127
|
-
(0 of 2 models picked up `
|
|
141
|
+
(0 of 2 models picked up `antoine recent` for a question perfectly tuned for
|
|
128
142
|
it). Round-3 confirmed the parent-agent path: a fresh session loading the
|
|
129
143
|
table delegated *and the subagent invoked the verb directly via the
|
|
130
144
|
injected directive*.
|
|
@@ -140,4 +154,4 @@ in row 1 above — the table does not change that.
|
|
|
140
154
|
|
|
141
155
|
## Workspace Context
|
|
142
156
|
|
|
143
|
-
The GitHub remote is `agentculture/
|
|
157
|
+
The GitHub remote is `agentculture/antoine`. When opening PRs or posting comments here as an AI assistant, sign them so it's clear they're AI-authored — e.g. `- antoine (Claude)`.
|