modern-python-guidance 0.1.2__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/.gitignore +2 -0
  2. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/CHANGELOG.md +14 -0
  3. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/PKG-INFO +6 -6
  4. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/README.md +5 -5
  5. modern_python_guidance-0.2.0/bench/prompt-v2.txt +11 -0
  6. modern_python_guidance-0.2.0/bench/prompt-v3.txt +11 -0
  7. modern_python_guidance-0.2.0/bench/prompt.txt +13 -0
  8. modern_python_guidance-0.2.0/bench/run.sh +242 -0
  9. modern_python_guidance-0.2.0/bench/score-v2.sh +300 -0
  10. modern_python_guidance-0.2.0/bench/score-v3.sh +327 -0
  11. modern_python_guidance-0.2.0/bench/score.sh +309 -0
  12. modern_python_guidance-0.2.0/docs/benchmark-evaluation.md +592 -0
  13. modern_python_guidance-0.2.0/docs/benchmark-procedure.md +128 -0
  14. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/docs/design.md +3 -2
  15. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/pyproject.toml +1 -1
  16. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/SKILL.md +12 -2
  17. modern_python_guidance-0.2.0/skills/modern-python-guidance/guides/django/django-async-views.md +67 -0
  18. modern_python_guidance-0.2.0/skills/modern-python-guidance/guides/django/django-check-constraints.md +73 -0
  19. modern_python_guidance-0.2.0/skills/modern-python-guidance/guides/django/django-json-field.md +58 -0
  20. modern_python_guidance-0.2.0/skills/modern-python-guidance/guides/pytest/pytest-parametrize.md +72 -0
  21. modern_python_guidance-0.2.0/skills/modern-python-guidance/guides/pytest/pytest-raises-match.md +64 -0
  22. modern_python_guidance-0.2.0/skills/modern-python-guidance/guides/pytest/pytest-tmp-path.md +58 -0
  23. modern_python_guidance-0.2.0/skills/modern-python-guidance/guides/sqlalchemy/sqlalchemy-2-style.md +77 -0
  24. modern_python_guidance-0.2.0/skills/modern-python-guidance/guides/sqlalchemy/sqlalchemy-async-session.md +72 -0
  25. modern_python_guidance-0.2.0/skills/modern-python-guidance/guides/sqlalchemy/sqlalchemy-mapped-column.md +73 -0
  26. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/src/modern_python_guidance/__init__.py +1 -1
  27. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/src/modern_python_guidance/mcp_server.py +4 -4
  28. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/tests/test_skill_sync.py +8 -6
  29. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/.github/workflows/ci.yml +0 -0
  30. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/.github/workflows/publish.yml +0 -0
  31. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/LICENSE +0 -0
  32. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/LICENSE-MIT +0 -0
  33. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/SECURITY.md +0 -0
  34. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/async/async-timeout-context.md +0 -0
  35. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/async/exception-groups.md +0 -0
  36. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/async/taskgroup-over-gather.md +0 -0
  37. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/data-structures/dataclass-modern.md +0 -0
  38. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/data-structures/dict-merge-operator.md +0 -0
  39. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/data-structures/match-case-patterns.md +0 -0
  40. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/fastapi/fastapi-annotated-depends.md +0 -0
  41. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/fastapi/fastapi-lifespan.md +0 -0
  42. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/fastapi/fastapi-typed-state.md +0 -0
  43. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/httpx/httpx-async-client-reuse.md +0 -0
  44. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/httpx/httpx-streaming.md +0 -0
  45. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/pydantic/pydantic-v2-config.md +0 -0
  46. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/pydantic/pydantic-v2-model-api.md +0 -0
  47. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/pydantic/pydantic-v2-serialization.md +0 -0
  48. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/pydantic/pydantic-v2-validators.md +0 -0
  49. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/stdlib/datetime-utc.md +0 -0
  50. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/stdlib/pathlib-over-os-path.md +0 -0
  51. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/stdlib/removeprefix-removesuffix.md +0 -0
  52. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/stdlib/tomllib-builtin.md +0 -0
  53. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/toolchain/no-pickle.md +0 -0
  54. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/toolchain/pyproject-toml-over-setup.md +0 -0
  55. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/toolchain/ruff-over-flake8.md +0 -0
  56. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/toolchain/safe-subprocess.md +0 -0
  57. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/toolchain/uv-over-pip.md +0 -0
  58. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/typing/override-decorator.md +0 -0
  59. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/typing/paramspec-decorators.md +0 -0
  60. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/typing/type-parameter-syntax.md +0 -0
  61. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/typing/typeis-vs-typeguard.md +0 -0
  62. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/typing/union-syntax.md +0 -0
  63. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/typing/use-builtin-generics.md +0 -0
  64. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/src/modern_python_guidance/__main__.py +0 -0
  65. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/src/modern_python_guidance/cli.py +0 -0
  66. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/src/modern_python_guidance/compat.py +0 -0
  67. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/src/modern_python_guidance/frontmatter.py +0 -0
  68. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/src/modern_python_guidance/guide_index.py +0 -0
  69. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/src/modern_python_guidance/retrieve.py +0 -0
  70. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/src/modern_python_guidance/search.py +0 -0
  71. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/src/modern_python_guidance/version_detect.py +0 -0
  72. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/tests/test_cli_integration.py +0 -0
  73. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/tests/test_frontmatter.py +0 -0
  74. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/tests/test_mcp_server.py +0 -0
  75. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/tests/test_retrieve.py +0 -0
  76. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/tests/test_search.py +0 -0
  77. {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/tests/test_version_detect.py +0 -0
@@ -26,3 +26,5 @@ venv/
26
26
  *.swp
27
27
  *.swo
28
28
  .DS_Store
29
+
30
+ results/
@@ -2,6 +2,19 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
+ ## [0.2.0] — 2026-05-27
6
+
7
+ ### Added
8
+
9
+ - 9 new Layer 2 guides: Django (`django-json-field`, `django-async-views`, `django-check-constraints`), SQLAlchemy (`sqlalchemy-2-style`, `sqlalchemy-mapped-column`, `sqlalchemy-async-session`), pytest (`pytest-parametrize`, `pytest-tmp-path`, `pytest-raises-match`)
10
+ - SQLAlchemy 2.0 embedded patterns in SKILL.md (zero Ruff overlap)
11
+
12
+ ### Changed
13
+
14
+ - Guide count: 30 → 39. Layer 2 coverage: 30% (9/30) → 46% (18/39)
15
+ - MCP server `retrieve_guides` max items: 30 → 39
16
+ - SKILL.md description trigger keywords: added "django", "sqlalchemy", "pytest"
17
+
5
18
  ## [0.1.2] — 2026-05-26
6
19
 
7
20
  ### Changed
@@ -42,6 +55,7 @@ Initial release.
42
55
  - Strict YAML-subset frontmatter parser (no PyYAML dependency)
43
56
  - GitHub Actions CI (pytest + ruff on Python 3.11, 3.12, 3.13)
44
57
 
58
+ [0.2.0]: https://github.com/yottayoshida/modern-python-guidance/releases/tag/v0.2.0
45
59
  [0.1.2]: https://github.com/yottayoshida/modern-python-guidance/releases/tag/v0.1.2
46
60
  [0.1.1]: https://github.com/yottayoshida/modern-python-guidance/releases/tag/v0.1.1
47
61
  [0.1.0]: https://github.com/yottayoshida/modern-python-guidance/releases/tag/v0.1.0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: modern-python-guidance
3
- Version: 0.1.2
3
+ Version: 0.2.0
4
4
  Summary: Version-aware BAD/GOOD pattern guides that help AI coding agents generate modern Python
5
5
  Project-URL: Homepage, https://github.com/yottayoshida/modern-python-guidance
6
6
  Project-URL: Repository, https://github.com/yottayoshida/modern-python-guidance
@@ -36,7 +36,7 @@ Description-Content-Type: text/markdown
36
36
  [![Python](https://img.shields.io/pypi/pyversions/modern-python-guidance.svg)](https://pypi.org/project/modern-python-guidance/)
37
37
  [![License](https://img.shields.io/github/license/yottayoshida/modern-python-guidance.svg)](LICENSE)
38
38
 
39
- LLMs often produce outdated Python — `typing.List` instead of `list`, `@validator` instead of `@field_validator`, `setup.py` instead of `pyproject.toml`. This tool provides 30 version-aware BAD/GOOD pattern guides that show the modern replacement, filtered by your project's Python version.
39
+ LLMs often produce outdated Python — `typing.List` instead of `list`, `@validator` instead of `@field_validator`, `setup.py` instead of `pyproject.toml`. This tool provides 39 version-aware BAD/GOOD pattern guides that show the modern replacement, filtered by your project's Python version.
40
40
 
41
41
  > **Note:** The tool itself requires Python 3.11+ to run. Guides cover patterns from Python 3.9 onward, and `--python-version` filters guides for your target environment.
42
42
 
@@ -87,15 +87,15 @@ mpg search "typing" --format json | jq '.[0].id'
87
87
 
88
88
  ## Guide coverage
89
89
 
90
- 30 guides across 3 layers:
90
+ 39 guides across 3 layers:
91
91
 
92
92
  | Layer | Categories | Count | Examples |
93
93
  |-------|-----------|-------|---------|
94
94
  | **1 — stdlib** | typing, async, stdlib, data-structures | 16 | `list` over `List`, `match`/`case`, `TaskGroup` |
95
- | **2 — frameworks** | pydantic, fastapi, httpx | 9 | Pydantic V2 migration, `Annotated[Depends]`, `AsyncClient` |
95
+ | **2 — frameworks** | pydantic, fastapi, httpx, django, sqlalchemy, pytest | 18 | Pydantic V2 migration, SQLAlchemy 2.0 style, `Annotated[Depends]` |
96
96
  | **3 — toolchain** | toolchain | 5 | `uv` over `pip`, `ruff` over flake8, `pickle` avoidance |
97
97
 
98
- Run `mpg list` to see all 30 guides, or [browse them on GitHub](skills/modern-python-guidance/guides/).
98
+ Run `mpg list` to see all 39 guides, or [browse them on GitHub](skills/modern-python-guidance/guides/).
99
99
 
100
100
  ## Version-aware filtering
101
101
 
@@ -186,7 +186,7 @@ src/modern_python_guidance/
186
186
 
187
187
  skills/modern-python-guidance/
188
188
  ├── SKILL.md # Agent Skills plugin entry point
189
- └── guides/ # 30 guide files by category
189
+ └── guides/ # 39 guide files by category
190
190
  ```
191
191
 
192
192
  See [docs/design.md](docs/design.md) for the full design document.
@@ -5,7 +5,7 @@
5
5
  [![Python](https://img.shields.io/pypi/pyversions/modern-python-guidance.svg)](https://pypi.org/project/modern-python-guidance/)
6
6
  [![License](https://img.shields.io/github/license/yottayoshida/modern-python-guidance.svg)](LICENSE)
7
7
 
8
- LLMs often produce outdated Python — `typing.List` instead of `list`, `@validator` instead of `@field_validator`, `setup.py` instead of `pyproject.toml`. This tool provides 30 version-aware BAD/GOOD pattern guides that show the modern replacement, filtered by your project's Python version.
8
+ LLMs often produce outdated Python — `typing.List` instead of `list`, `@validator` instead of `@field_validator`, `setup.py` instead of `pyproject.toml`. This tool provides 39 version-aware BAD/GOOD pattern guides that show the modern replacement, filtered by your project's Python version.
9
9
 
10
10
  > **Note:** The tool itself requires Python 3.11+ to run. Guides cover patterns from Python 3.9 onward, and `--python-version` filters guides for your target environment.
11
11
 
@@ -56,15 +56,15 @@ mpg search "typing" --format json | jq '.[0].id'
56
56
 
57
57
  ## Guide coverage
58
58
 
59
- 30 guides across 3 layers:
59
+ 39 guides across 3 layers:
60
60
 
61
61
  | Layer | Categories | Count | Examples |
62
62
  |-------|-----------|-------|---------|
63
63
  | **1 — stdlib** | typing, async, stdlib, data-structures | 16 | `list` over `List`, `match`/`case`, `TaskGroup` |
64
- | **2 — frameworks** | pydantic, fastapi, httpx | 9 | Pydantic V2 migration, `Annotated[Depends]`, `AsyncClient` |
64
+ | **2 — frameworks** | pydantic, fastapi, httpx, django, sqlalchemy, pytest | 18 | Pydantic V2 migration, SQLAlchemy 2.0 style, `Annotated[Depends]` |
65
65
  | **3 — toolchain** | toolchain | 5 | `uv` over `pip`, `ruff` over flake8, `pickle` avoidance |
66
66
 
67
- Run `mpg list` to see all 30 guides, or [browse them on GitHub](skills/modern-python-guidance/guides/).
67
+ Run `mpg list` to see all 39 guides, or [browse them on GitHub](skills/modern-python-guidance/guides/).
68
68
 
69
69
  ## Version-aware filtering
70
70
 
@@ -155,7 +155,7 @@ src/modern_python_guidance/
155
155
 
156
156
  skills/modern-python-guidance/
157
157
  ├── SKILL.md # Agent Skills plugin entry point
158
- └── guides/ # 30 guide files by category
158
+ └── guides/ # 39 guide files by category
159
159
  ```
160
160
 
161
161
  See [docs/design.md](docs/design.md) for the full design document.
@@ -0,0 +1,11 @@
1
+ Write the following 5 files. Write all code, no placeholders. Create each file at the EXACT path shown below (relative to the current working directory). Do NOT create any project directories or subdirectories beyond what is listed.
2
+
3
+ 1. src/config.py — A typed configuration loader. Define a generic container class `Registry[T]` that stores items by name and retrieves them with type safety. Write a function `load_config(path)` that reads a TOML file and returns a dict.
4
+
5
+ 2. src/crawler.py — An async web crawler. Write an async function `crawl(urls: list[str])` that fetches multiple URLs concurrently using httpx, with a 10-second timeout per request. If a request fails, retry up to 3 times. Return a list of response bodies.
6
+
7
+ 3. src/app.py — A FastAPI application with: a database connection pool initialized at startup and closed at shutdown (use SQLAlchemy async with aiosqlite), a dependency that provides a database session, CRUD endpoints for a User model (GET /users, GET /users/{id}, POST /users), and an OAuth2-protected endpoint GET /users/me that requires the "users:read" scope.
8
+
9
+ 4. src/scanner.py — A file scanner. Write a function `scan_directory(root: Path)` that walks a directory tree recursively, collects all files, groups them into batches of 10, and processes each batch. Define an enum `FileCategory` with values IMAGE, VIDEO, DOCUMENT, OTHER. Use a match statement to categorize each file by its extension (.jpg/.png → IMAGE, .mp4/.avi → VIDEO, .pdf/.docx → DOCUMENT, everything else → OTHER).
10
+
11
+ 5. pyproject.toml — Project config with dependencies on fastapi, sqlalchemy[asyncio], aiosqlite, httpx, and uvicorn, supporting Python 3.12+.
@@ -0,0 +1,11 @@
1
+ Write the following 5 files. Write all code, no placeholders. Create each file at the EXACT path shown below (relative to the current working directory). Do NOT create any project directories or subdirectories beyond what is listed.
2
+
3
+ 1. src/config.py — A typed configuration loader. Define a generic container class `Registry[T]` that stores items by name and retrieves them with type safety. Write a function `load_config(path)` that reads a TOML file and returns a dict. Include a `created_at` timestamp field (UTC) in the registry entries.
4
+
5
+ 2. src/crawler.py — A web crawler. Write a function `crawl(urls: list[str])` that fetches a list of URLs using httpx and returns their response bodies. Handle failures gracefully — a single bad URL should not lose the other results.
6
+
7
+ 3. src/app.py — A FastAPI application with SQLAlchemy and a database. It should have: a User model with CRUD endpoints (GET /users, GET /users/{id}, POST /users), an OAuth2-protected endpoint GET /users/me that requires the "users:read" scope, and proper database lifecycle management.
8
+
9
+ 4. src/scanner.py — A file scanner. Write a function `scan_directory(root: Path)` that walks a directory tree recursively, collects all files, groups them into batches of 10, and processes each batch. Define an enum `FileCategory` with values IMAGE, VIDEO, DOCUMENT, OTHER. Use a match statement to categorize each file by its extension (.jpg/.png → IMAGE, .mp4/.avi → VIDEO, .pdf/.docx → DOCUMENT, everything else → OTHER).
10
+
11
+ 5. pyproject.toml — Project config with dependencies on fastapi, sqlalchemy, httpx, and uvicorn, supporting Python 3.12+.
@@ -0,0 +1,13 @@
1
+ Write the following 6 files. Write all code, no placeholders. Create each file at the EXACT path shown below (relative to the current working directory). Do NOT create any project directories or subdirectories beyond what is listed.
2
+
3
+ 1. src/models.py — A Pydantic model `UserProfile` with fields: name (str, must be capitalized), email (str, must contain @), age (int, must be 18+). Add a validator for each field. Include a Config class that enables ORM mode and allows population by field name. Add a method that returns the model as a dictionary. Also add a model `Order` with fields: id (int), items (list of str), total (float).
4
+
5
+ 2. src/serialization.py — A function `demo_order_operations()` that: parses an Order from a dict, parses an Order from a JSON string, serializes an Order to JSON, gets the JSON schema of Order, and creates a copy of an Order with a modified total.
6
+
7
+ 3. src/app.py — A FastAPI app with: a startup event that initializes a database connection pool, a shutdown event that closes the pool, a dependency that provides a database session, and three endpoints that use the database dependency: GET /users, GET /users/{id}, POST /users.
8
+
9
+ 4. src/fetcher.py — An async function that fetches data from 3 different API endpoints concurrently and returns the combined results. Use httpx for HTTP requests. Handle timeouts and errors gracefully.
10
+
11
+ 5. src/runner.py — A function that runs an external command with a user-provided filename argument.
12
+
13
+ 6. pyproject.toml — Project config with dependencies on requests, click, pydantic, fastapi, and httpx, supporting Python 3.11+.
@@ -0,0 +1,242 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ # Effectiveness Benchmark: A/B test for SKILL.md pre-generation guidance
5
+ # Usage:
6
+ # ./bench/run.sh <run_id> control — Run Session A (skill disabled)
7
+ # ./bench/run.sh <run_id> treatment — Run Session B (skill enabled)
8
+ # ./bench/run.sh <run_id> both — Run A then B sequentially
9
+
10
+ REPO_DIR="$(cd "$(dirname "$0")/.." && pwd)"
11
+ WORKSPACE="$HOME/claude_workspace"
12
+ RUN_ID="${1:?Usage: $0 <run_id> <control|treatment|both>}"
13
+ SESSION="${2:?Usage: $0 <run_id> <control|treatment|both>}"
14
+ RESULTS_DIR="$REPO_DIR/results/run-${RUN_ID}"
15
+ # Switch prompt version: prompt.txt (v1), prompt-v2.txt (v2), prompt-v3.txt (v3)
16
+ PROMPT_FILE="$REPO_DIR/bench/prompt-v3.txt"
17
+ BUDGET="2.00"
18
+
19
+ GEN_SRC="$WORKSPACE/src"
20
+ GEN_PYPROJECT="$WORKSPACE/pyproject.toml"
21
+
22
+ # --- Guidance toggle: rules/ file (not skills/) ---
23
+ # Skills body is NOT loaded in pipe mode (claude -p). Only description is visible.
24
+ # Rules files (.claude/rules/*.md) without paths: are always loaded into system prompt.
25
+ # Toggle by adding/removing the rules file.
26
+ RULE_FILE="$WORKSPACE/.claude/rules/modern-python.md"
27
+ RULE_SOURCE="$REPO_DIR/skills/modern-python-guidance/SKILL.md"
28
+
29
+ disable_guidance() {
30
+ rm -f "$RULE_FILE"
31
+ }
32
+
33
+ enable_guidance() {
34
+ if [ ! -f "$RULE_FILE" ]; then
35
+ # Copy body only (strip YAML frontmatter between --- markers)
36
+ awk 'BEGIN{c=0} /^---$/{c++; next} c>=2{print}' "$RULE_SOURCE" > "$RULE_FILE"
37
+ fi
38
+ }
39
+
40
+ restore_guidance_on_exit() {
41
+ enable_guidance
42
+ echo "[cleanup] Rules file restored."
43
+ }
44
+
45
+ # --- Verification logging ---
46
+ record_verify() {
47
+ local label="$1"
48
+ local log="$RESULTS_DIR/guidance-verify.log"
49
+
50
+ echo "=== $label $(date -u '+%Y-%m-%dT%H:%M:%SZ') ===" >> "$log"
51
+
52
+ # Rules file state (primary toggle mechanism)
53
+ echo "RULE_FILE=$RULE_FILE" >> "$log"
54
+ if [ -f "$RULE_FILE" ]; then
55
+ echo "status: PRESENT ($(wc -c < "$RULE_FILE") bytes)" >> "$log"
56
+ echo "first_line: $(head -1 "$RULE_FILE")" >> "$log"
57
+ else
58
+ echo "status: ABSENT" >> "$log"
59
+ fi
60
+
61
+ # Check for other rules that might contain Python guidance
62
+ echo "--- all rules files ---" >> "$log"
63
+ ls "$WORKSPACE/.claude/rules/" 2>/dev/null | grep -v '^\.' >> "$log" || echo "(empty)" >> "$log"
64
+
65
+ echo "" >> "$log"
66
+ }
67
+
68
+ # --- Workspace cleanup ---
69
+ cleanup_generated() {
70
+ local dest="$1"
71
+ mkdir -p "$dest"
72
+
73
+ # Primary: files at $WORKSPACE/src/
74
+ if [ -d "$GEN_SRC" ]; then
75
+ mv "$GEN_SRC" "$dest/src"
76
+ [ -f "$GEN_PYPROJECT" ] && mv "$GEN_PYPROJECT" "$dest/pyproject.toml"
77
+ else
78
+ # Fallback: CC may create a project subdirectory
79
+ local found_dir
80
+ found_dir=$(find "$WORKSPACE" -maxdepth 2 -name "models.py" -path "*/src/models.py" \
81
+ -newer "$RESULTS_DIR" -not -path "*/.venv/*" -not -path "*/modern-python-guidance/*" \
82
+ -not -path "*/__bench_backup*" 2>/dev/null | head -1)
83
+ if [ -n "$found_dir" ]; then
84
+ local project_dir
85
+ project_dir=$(dirname "$(dirname "$found_dir")")
86
+ echo "[fallback] Found generated files in $project_dir"
87
+ [ -d "$project_dir/src" ] && mv "$project_dir/src" "$dest/src"
88
+ [ -f "$project_dir/pyproject.toml" ] && mv "$project_dir/pyproject.toml" "$dest/pyproject.toml"
89
+ rmdir "$project_dir" 2>/dev/null || true
90
+ else
91
+ echo "[warning] No generated files found to capture"
92
+ return
93
+ fi
94
+ fi
95
+
96
+ # Sweep: capture any other generated files at workspace root
97
+ # (CC may create __init__.py, README.md, requirements.txt etc.)
98
+ local sweep_dir="$dest/extra"
99
+ for f in "$WORKSPACE"/__init__.py "$WORKSPACE"/README.md "$WORKSPACE"/requirements.txt \
100
+ "$WORKSPACE"/setup.py "$WORKSPACE"/setup.cfg; do
101
+ if [ -f "$f" ] && [ "$f" -nt "$RESULTS_DIR" ]; then
102
+ mkdir -p "$sweep_dir"
103
+ mv "$f" "$sweep_dir/"
104
+ echo "[sweep] Captured extra file: $(basename "$f")"
105
+ fi
106
+ done
107
+
108
+ # Also remove leftover pyproject.toml if not already moved
109
+ [ -f "$GEN_PYPROJECT" ] && mv "$GEN_PYPROJECT" "$dest/pyproject.toml" 2>/dev/null || true
110
+ }
111
+
112
+ # --- Session runners ---
113
+ run_control() {
114
+ echo ""
115
+ echo "--- Session A: Control (guidance DISABLED) ---"
116
+
117
+ disable_guidance
118
+ trap restore_guidance_on_exit EXIT
119
+
120
+ # Verify rules file is gone
121
+ if [ -f "$RULE_FILE" ]; then
122
+ echo "ERROR: Rules file still exists after rm!" >&2
123
+ exit 1
124
+ fi
125
+ record_verify "PRE-CONTROL"
126
+ echo "[ok] Rules file removed (verified)"
127
+
128
+ # Clean workspace
129
+ if [ -d "$GEN_SRC" ]; then
130
+ BACKUP="$WORKSPACE/src.__bench_backup_$(date +%s)__"
131
+ echo "[warning] Existing $GEN_SRC found. Moving to $BACKUP"
132
+ mv "$GEN_SRC" "$BACKUP"
133
+ fi
134
+
135
+ # Run CC
136
+ echo "[running] claude -p (Control) from $WORKSPACE ..."
137
+ (cd "$WORKSPACE" && claude -p --output-format json --max-budget-usd "$BUDGET" \
138
+ < "$PROMPT_FILE" > "$RESULTS_DIR/session-a.json" 2>"$RESULTS_DIR/session-a.stderr") || true
139
+
140
+ record_verify "POST-CONTROL"
141
+ cleanup_generated "$RESULTS_DIR/control"
142
+ echo "[ok] Control files saved to $RESULTS_DIR/control/"
143
+
144
+ # Restore guidance if running control only
145
+ if [ "$SESSION" = "control" ]; then
146
+ enable_guidance
147
+ trap - EXIT
148
+ echo "[ok] Rules file restored."
149
+ fi
150
+ }
151
+
152
+ run_treatment() {
153
+ echo ""
154
+ echo "--- Session B: Treatment (guidance ENABLED) ---"
155
+
156
+ enable_guidance
157
+ trap - EXIT
158
+
159
+ if [ ! -f "$RULE_FILE" ]; then
160
+ echo "ERROR: Rules file not found at $RULE_FILE" >&2
161
+ exit 1
162
+ fi
163
+ record_verify "PRE-TREATMENT"
164
+ echo "[ok] Rules file present ($(wc -c < "$RULE_FILE") bytes, verified)"
165
+
166
+ # Clean workspace
167
+ if [ -d "$GEN_SRC" ]; then
168
+ BACKUP="$WORKSPACE/src.__bench_backup_$(date +%s)__"
169
+ echo "[warning] Existing $GEN_SRC found. Moving to $BACKUP"
170
+ mv "$GEN_SRC" "$BACKUP"
171
+ fi
172
+
173
+ # Run CC
174
+ echo "[running] claude -p (Treatment) from $WORKSPACE ..."
175
+ (cd "$WORKSPACE" && claude -p --output-format json --max-budget-usd "$BUDGET" \
176
+ < "$PROMPT_FILE" > "$RESULTS_DIR/session-b.json" 2>"$RESULTS_DIR/session-b.stderr") || true
177
+
178
+ record_verify "POST-TREATMENT"
179
+ cleanup_generated "$RESULTS_DIR/treatment"
180
+ echo "[ok] Treatment files saved to $RESULTS_DIR/treatment/"
181
+ }
182
+
183
+ # --- Pre-flight checks ---
184
+ if [ ! -f "$PROMPT_FILE" ]; then
185
+ echo "ERROR: prompt.txt not found at $PROMPT_FILE" >&2
186
+ exit 1
187
+ fi
188
+
189
+ if [ ! -f "$RULE_SOURCE" ]; then
190
+ echo "ERROR: Guidance source not found at $RULE_SOURCE" >&2
191
+ exit 1
192
+ fi
193
+
194
+ case "$SESSION" in
195
+ control|treatment|both) ;;
196
+ *)
197
+ echo "ERROR: Invalid session '$SESSION'. Use: control, treatment, or both" >&2
198
+ exit 1
199
+ ;;
200
+ esac
201
+
202
+ if [ "$SESSION" = "control" ] && [ -f "$RESULTS_DIR/session-a.json" ]; then
203
+ echo "ERROR: Control session already exists in $RESULTS_DIR" >&2
204
+ exit 1
205
+ fi
206
+ if [ "$SESSION" = "treatment" ] && [ -f "$RESULTS_DIR/session-b.json" ]; then
207
+ echo "ERROR: Treatment session already exists in $RESULTS_DIR" >&2
208
+ exit 1
209
+ fi
210
+ if [ "$SESSION" = "both" ] && [ -d "$RESULTS_DIR" ]; then
211
+ echo "ERROR: Results directory already exists: $RESULTS_DIR" >&2
212
+ exit 1
213
+ fi
214
+
215
+ mkdir -p "$RESULTS_DIR"
216
+
217
+ echo "=== Effectiveness Benchmark Run $RUN_ID ($SESSION) ==="
218
+ echo "Prompt: $PROMPT_FILE"
219
+ echo "Results: $RESULTS_DIR"
220
+
221
+ # --- Execute ---
222
+ case "$SESSION" in
223
+ control)
224
+ run_control
225
+ echo ""
226
+ echo "=== Control session complete ==="
227
+ echo "Run treatment: ./bench/run.sh $RUN_ID treatment"
228
+ ;;
229
+ treatment)
230
+ run_treatment
231
+ echo ""
232
+ echo "=== Treatment session complete ==="
233
+ echo "Score with: ./bench/score-v2.sh $RUN_ID"
234
+ ;;
235
+ both)
236
+ run_control
237
+ run_treatment
238
+ echo ""
239
+ echo "=== Run $RUN_ID Complete ==="
240
+ echo "Score with: ./bench/score-v2.sh $RUN_ID"
241
+ ;;
242
+ esac