modern-python-guidance 0.1.2__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/.gitignore +2 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/CHANGELOG.md +14 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/PKG-INFO +6 -6
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/README.md +5 -5
- modern_python_guidance-0.2.0/bench/prompt-v2.txt +11 -0
- modern_python_guidance-0.2.0/bench/prompt-v3.txt +11 -0
- modern_python_guidance-0.2.0/bench/prompt.txt +13 -0
- modern_python_guidance-0.2.0/bench/run.sh +242 -0
- modern_python_guidance-0.2.0/bench/score-v2.sh +300 -0
- modern_python_guidance-0.2.0/bench/score-v3.sh +327 -0
- modern_python_guidance-0.2.0/bench/score.sh +309 -0
- modern_python_guidance-0.2.0/docs/benchmark-evaluation.md +592 -0
- modern_python_guidance-0.2.0/docs/benchmark-procedure.md +128 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/docs/design.md +3 -2
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/pyproject.toml +1 -1
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/SKILL.md +12 -2
- modern_python_guidance-0.2.0/skills/modern-python-guidance/guides/django/django-async-views.md +67 -0
- modern_python_guidance-0.2.0/skills/modern-python-guidance/guides/django/django-check-constraints.md +73 -0
- modern_python_guidance-0.2.0/skills/modern-python-guidance/guides/django/django-json-field.md +58 -0
- modern_python_guidance-0.2.0/skills/modern-python-guidance/guides/pytest/pytest-parametrize.md +72 -0
- modern_python_guidance-0.2.0/skills/modern-python-guidance/guides/pytest/pytest-raises-match.md +64 -0
- modern_python_guidance-0.2.0/skills/modern-python-guidance/guides/pytest/pytest-tmp-path.md +58 -0
- modern_python_guidance-0.2.0/skills/modern-python-guidance/guides/sqlalchemy/sqlalchemy-2-style.md +77 -0
- modern_python_guidance-0.2.0/skills/modern-python-guidance/guides/sqlalchemy/sqlalchemy-async-session.md +72 -0
- modern_python_guidance-0.2.0/skills/modern-python-guidance/guides/sqlalchemy/sqlalchemy-mapped-column.md +73 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/src/modern_python_guidance/__init__.py +1 -1
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/src/modern_python_guidance/mcp_server.py +4 -4
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/tests/test_skill_sync.py +8 -6
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/.github/workflows/ci.yml +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/.github/workflows/publish.yml +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/LICENSE +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/LICENSE-MIT +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/SECURITY.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/async/async-timeout-context.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/async/exception-groups.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/async/taskgroup-over-gather.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/data-structures/dataclass-modern.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/data-structures/dict-merge-operator.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/data-structures/match-case-patterns.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/fastapi/fastapi-annotated-depends.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/fastapi/fastapi-lifespan.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/fastapi/fastapi-typed-state.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/httpx/httpx-async-client-reuse.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/httpx/httpx-streaming.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/pydantic/pydantic-v2-config.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/pydantic/pydantic-v2-model-api.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/pydantic/pydantic-v2-serialization.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/pydantic/pydantic-v2-validators.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/stdlib/datetime-utc.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/stdlib/pathlib-over-os-path.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/stdlib/removeprefix-removesuffix.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/stdlib/tomllib-builtin.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/toolchain/no-pickle.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/toolchain/pyproject-toml-over-setup.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/toolchain/ruff-over-flake8.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/toolchain/safe-subprocess.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/toolchain/uv-over-pip.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/typing/override-decorator.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/typing/paramspec-decorators.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/typing/type-parameter-syntax.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/typing/typeis-vs-typeguard.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/typing/union-syntax.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/skills/modern-python-guidance/guides/typing/use-builtin-generics.md +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/src/modern_python_guidance/__main__.py +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/src/modern_python_guidance/cli.py +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/src/modern_python_guidance/compat.py +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/src/modern_python_guidance/frontmatter.py +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/src/modern_python_guidance/guide_index.py +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/src/modern_python_guidance/retrieve.py +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/src/modern_python_guidance/search.py +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/src/modern_python_guidance/version_detect.py +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/tests/test_cli_integration.py +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/tests/test_frontmatter.py +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/tests/test_mcp_server.py +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/tests/test_retrieve.py +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/tests/test_search.py +0 -0
- {modern_python_guidance-0.1.2 → modern_python_guidance-0.2.0}/tests/test_version_detect.py +0 -0
|
@@ -2,6 +2,19 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [0.2.0] — 2026-05-27
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
|
|
9
|
+
- 9 new Layer 2 guides: Django (`django-json-field`, `django-async-views`, `django-check-constraints`), SQLAlchemy (`sqlalchemy-2-style`, `sqlalchemy-mapped-column`, `sqlalchemy-async-session`), pytest (`pytest-parametrize`, `pytest-tmp-path`, `pytest-raises-match`)
|
|
10
|
+
- SQLAlchemy 2.0 embedded patterns in SKILL.md (zero Ruff overlap)
|
|
11
|
+
|
|
12
|
+
### Changed
|
|
13
|
+
|
|
14
|
+
- Guide count: 30 → 39. Layer 2 coverage: 30% (9/30) → 46% (18/39)
|
|
15
|
+
- MCP server `retrieve_guides` max items: 30 → 39
|
|
16
|
+
- SKILL.md description trigger keywords: added "django", "sqlalchemy", "pytest"
|
|
17
|
+
|
|
5
18
|
## [0.1.2] — 2026-05-26
|
|
6
19
|
|
|
7
20
|
### Changed
|
|
@@ -42,6 +55,7 @@ Initial release.
|
|
|
42
55
|
- Strict YAML-subset frontmatter parser (no PyYAML dependency)
|
|
43
56
|
- GitHub Actions CI (pytest + ruff on Python 3.11, 3.12, 3.13)
|
|
44
57
|
|
|
58
|
+
[0.2.0]: https://github.com/yottayoshida/modern-python-guidance/releases/tag/v0.2.0
|
|
45
59
|
[0.1.2]: https://github.com/yottayoshida/modern-python-guidance/releases/tag/v0.1.2
|
|
46
60
|
[0.1.1]: https://github.com/yottayoshida/modern-python-guidance/releases/tag/v0.1.1
|
|
47
61
|
[0.1.0]: https://github.com/yottayoshida/modern-python-guidance/releases/tag/v0.1.0
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: modern-python-guidance
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Version-aware BAD/GOOD pattern guides that help AI coding agents generate modern Python
|
|
5
5
|
Project-URL: Homepage, https://github.com/yottayoshida/modern-python-guidance
|
|
6
6
|
Project-URL: Repository, https://github.com/yottayoshida/modern-python-guidance
|
|
@@ -36,7 +36,7 @@ Description-Content-Type: text/markdown
|
|
|
36
36
|
[](https://pypi.org/project/modern-python-guidance/)
|
|
37
37
|
[](LICENSE)
|
|
38
38
|
|
|
39
|
-
LLMs often produce outdated Python — `typing.List` instead of `list`, `@validator` instead of `@field_validator`, `setup.py` instead of `pyproject.toml`. This tool provides
|
|
39
|
+
LLMs often produce outdated Python — `typing.List` instead of `list`, `@validator` instead of `@field_validator`, `setup.py` instead of `pyproject.toml`. This tool provides 39 version-aware BAD/GOOD pattern guides that show the modern replacement, filtered by your project's Python version.
|
|
40
40
|
|
|
41
41
|
> **Note:** The tool itself requires Python 3.11+ to run. Guides cover patterns from Python 3.9 onward, and `--python-version` filters guides for your target environment.
|
|
42
42
|
|
|
@@ -87,15 +87,15 @@ mpg search "typing" --format json | jq '.[0].id'
|
|
|
87
87
|
|
|
88
88
|
## Guide coverage
|
|
89
89
|
|
|
90
|
-
|
|
90
|
+
39 guides across 3 layers:
|
|
91
91
|
|
|
92
92
|
| Layer | Categories | Count | Examples |
|
|
93
93
|
|-------|-----------|-------|---------|
|
|
94
94
|
| **1 — stdlib** | typing, async, stdlib, data-structures | 16 | `list` over `List`, `match`/`case`, `TaskGroup` |
|
|
95
|
-
| **2 — frameworks** | pydantic, fastapi, httpx |
|
|
95
|
+
| **2 — frameworks** | pydantic, fastapi, httpx, django, sqlalchemy, pytest | 18 | Pydantic V2 migration, SQLAlchemy 2.0 style, `Annotated[Depends]` |
|
|
96
96
|
| **3 — toolchain** | toolchain | 5 | `uv` over `pip`, `ruff` over flake8, `pickle` avoidance |
|
|
97
97
|
|
|
98
|
-
Run `mpg list` to see all
|
|
98
|
+
Run `mpg list` to see all 39 guides, or [browse them on GitHub](skills/modern-python-guidance/guides/).
|
|
99
99
|
|
|
100
100
|
## Version-aware filtering
|
|
101
101
|
|
|
@@ -186,7 +186,7 @@ src/modern_python_guidance/
|
|
|
186
186
|
|
|
187
187
|
skills/modern-python-guidance/
|
|
188
188
|
├── SKILL.md # Agent Skills plugin entry point
|
|
189
|
-
└── guides/ #
|
|
189
|
+
└── guides/ # 39 guide files by category
|
|
190
190
|
```
|
|
191
191
|
|
|
192
192
|
See [docs/design.md](docs/design.md) for the full design document.
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
[](https://pypi.org/project/modern-python-guidance/)
|
|
6
6
|
[](LICENSE)
|
|
7
7
|
|
|
8
|
-
LLMs often produce outdated Python — `typing.List` instead of `list`, `@validator` instead of `@field_validator`, `setup.py` instead of `pyproject.toml`. This tool provides
|
|
8
|
+
LLMs often produce outdated Python — `typing.List` instead of `list`, `@validator` instead of `@field_validator`, `setup.py` instead of `pyproject.toml`. This tool provides 39 version-aware BAD/GOOD pattern guides that show the modern replacement, filtered by your project's Python version.
|
|
9
9
|
|
|
10
10
|
> **Note:** The tool itself requires Python 3.11+ to run. Guides cover patterns from Python 3.9 onward, and `--python-version` filters guides for your target environment.
|
|
11
11
|
|
|
@@ -56,15 +56,15 @@ mpg search "typing" --format json | jq '.[0].id'
|
|
|
56
56
|
|
|
57
57
|
## Guide coverage
|
|
58
58
|
|
|
59
|
-
|
|
59
|
+
39 guides across 3 layers:
|
|
60
60
|
|
|
61
61
|
| Layer | Categories | Count | Examples |
|
|
62
62
|
|-------|-----------|-------|---------|
|
|
63
63
|
| **1 — stdlib** | typing, async, stdlib, data-structures | 16 | `list` over `List`, `match`/`case`, `TaskGroup` |
|
|
64
|
-
| **2 — frameworks** | pydantic, fastapi, httpx |
|
|
64
|
+
| **2 — frameworks** | pydantic, fastapi, httpx, django, sqlalchemy, pytest | 18 | Pydantic V2 migration, SQLAlchemy 2.0 style, `Annotated[Depends]` |
|
|
65
65
|
| **3 — toolchain** | toolchain | 5 | `uv` over `pip`, `ruff` over flake8, `pickle` avoidance |
|
|
66
66
|
|
|
67
|
-
Run `mpg list` to see all
|
|
67
|
+
Run `mpg list` to see all 39 guides, or [browse them on GitHub](skills/modern-python-guidance/guides/).
|
|
68
68
|
|
|
69
69
|
## Version-aware filtering
|
|
70
70
|
|
|
@@ -155,7 +155,7 @@ src/modern_python_guidance/
|
|
|
155
155
|
|
|
156
156
|
skills/modern-python-guidance/
|
|
157
157
|
├── SKILL.md # Agent Skills plugin entry point
|
|
158
|
-
└── guides/ #
|
|
158
|
+
└── guides/ # 39 guide files by category
|
|
159
159
|
```
|
|
160
160
|
|
|
161
161
|
See [docs/design.md](docs/design.md) for the full design document.
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
Write the following 5 files. Write all code, no placeholders. Create each file at the EXACT path shown below (relative to the current working directory). Do NOT create any project directories or subdirectories beyond what is listed.
|
|
2
|
+
|
|
3
|
+
1. src/config.py — A typed configuration loader. Define a generic container class `Registry[T]` that stores items by name and retrieves them with type safety. Write a function `load_config(path)` that reads a TOML file and returns a dict.
|
|
4
|
+
|
|
5
|
+
2. src/crawler.py — An async web crawler. Write an async function `crawl(urls: list[str])` that fetches multiple URLs concurrently using httpx, with a 10-second timeout per request. If a request fails, retry up to 3 times. Return a list of response bodies.
|
|
6
|
+
|
|
7
|
+
3. src/app.py — A FastAPI application with: a database connection pool initialized at startup and closed at shutdown (use SQLAlchemy async with aiosqlite), a dependency that provides a database session, CRUD endpoints for a User model (GET /users, GET /users/{id}, POST /users), and an OAuth2-protected endpoint GET /users/me that requires the "users:read" scope.
|
|
8
|
+
|
|
9
|
+
4. src/scanner.py — A file scanner. Write a function `scan_directory(root: Path)` that walks a directory tree recursively, collects all files, groups them into batches of 10, and processes each batch. Define an enum `FileCategory` with values IMAGE, VIDEO, DOCUMENT, OTHER. Use a match statement to categorize each file by its extension (.jpg/.png → IMAGE, .mp4/.avi → VIDEO, .pdf/.docx → DOCUMENT, everything else → OTHER).
|
|
10
|
+
|
|
11
|
+
5. pyproject.toml — Project config with dependencies on fastapi, sqlalchemy[asyncio], aiosqlite, httpx, and uvicorn, supporting Python 3.12+.
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
Write the following 5 files. Write all code, no placeholders. Create each file at the EXACT path shown below (relative to the current working directory). Do NOT create any project directories or subdirectories beyond what is listed.
|
|
2
|
+
|
|
3
|
+
1. src/config.py — A typed configuration loader. Define a generic container class `Registry[T]` that stores items by name and retrieves them with type safety. Write a function `load_config(path)` that reads a TOML file and returns a dict. Include a `created_at` timestamp field (UTC) in the registry entries.
|
|
4
|
+
|
|
5
|
+
2. src/crawler.py — A web crawler. Write a function `crawl(urls: list[str])` that fetches a list of URLs using httpx and returns their response bodies. Handle failures gracefully — a single bad URL should not lose the other results.
|
|
6
|
+
|
|
7
|
+
3. src/app.py — A FastAPI application with SQLAlchemy and a database. It should have: a User model with CRUD endpoints (GET /users, GET /users/{id}, POST /users), an OAuth2-protected endpoint GET /users/me that requires the "users:read" scope, and proper database lifecycle management.
|
|
8
|
+
|
|
9
|
+
4. src/scanner.py — A file scanner. Write a function `scan_directory(root: Path)` that walks a directory tree recursively, collects all files, groups them into batches of 10, and processes each batch. Define an enum `FileCategory` with values IMAGE, VIDEO, DOCUMENT, OTHER. Use a match statement to categorize each file by its extension (.jpg/.png → IMAGE, .mp4/.avi → VIDEO, .pdf/.docx → DOCUMENT, everything else → OTHER).
|
|
10
|
+
|
|
11
|
+
5. pyproject.toml — Project config with dependencies on fastapi, sqlalchemy, httpx, and uvicorn, supporting Python 3.12+.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
Write the following 6 files. Write all code, no placeholders. Create each file at the EXACT path shown below (relative to the current working directory). Do NOT create any project directories or subdirectories beyond what is listed.
|
|
2
|
+
|
|
3
|
+
1. src/models.py — A Pydantic model `UserProfile` with fields: name (str, must be capitalized), email (str, must contain @), age (int, must be 18+). Add a validator for each field. Include a Config class that enables ORM mode and allows population by field name. Add a method that returns the model as a dictionary. Also add a model `Order` with fields: id (int), items (list of str), total (float).
|
|
4
|
+
|
|
5
|
+
2. src/serialization.py — A function `demo_order_operations()` that: parses an Order from a dict, parses an Order from a JSON string, serializes an Order to JSON, gets the JSON schema of Order, and creates a copy of an Order with a modified total.
|
|
6
|
+
|
|
7
|
+
3. src/app.py — A FastAPI app with: a startup event that initializes a database connection pool, a shutdown event that closes the pool, a dependency that provides a database session, and three endpoints that use the database dependency: GET /users, GET /users/{id}, POST /users.
|
|
8
|
+
|
|
9
|
+
4. src/fetcher.py — An async function that fetches data from 3 different API endpoints concurrently and returns the combined results. Use httpx for HTTP requests. Handle timeouts and errors gracefully.
|
|
10
|
+
|
|
11
|
+
5. src/runner.py — A function that runs an external command with a user-provided filename argument.
|
|
12
|
+
|
|
13
|
+
6. pyproject.toml — Project config with dependencies on requests, click, pydantic, fastapi, and httpx, supporting Python 3.11+.
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
# Effectiveness Benchmark: A/B test for SKILL.md pre-generation guidance
|
|
5
|
+
# Usage:
|
|
6
|
+
# ./bench/run.sh <run_id> control — Run Session A (skill disabled)
|
|
7
|
+
# ./bench/run.sh <run_id> treatment — Run Session B (skill enabled)
|
|
8
|
+
# ./bench/run.sh <run_id> both — Run A then B sequentially
|
|
9
|
+
|
|
10
|
+
REPO_DIR="$(cd "$(dirname "$0")/.." && pwd)"
|
|
11
|
+
WORKSPACE="$HOME/claude_workspace"
|
|
12
|
+
RUN_ID="${1:?Usage: $0 <run_id> <control|treatment|both>}"
|
|
13
|
+
SESSION="${2:?Usage: $0 <run_id> <control|treatment|both>}"
|
|
14
|
+
RESULTS_DIR="$REPO_DIR/results/run-${RUN_ID}"
|
|
15
|
+
# Switch prompt version: prompt.txt (v1), prompt-v2.txt (v2), prompt-v3.txt (v3)
|
|
16
|
+
PROMPT_FILE="$REPO_DIR/bench/prompt-v3.txt"
|
|
17
|
+
BUDGET="2.00"
|
|
18
|
+
|
|
19
|
+
GEN_SRC="$WORKSPACE/src"
|
|
20
|
+
GEN_PYPROJECT="$WORKSPACE/pyproject.toml"
|
|
21
|
+
|
|
22
|
+
# --- Guidance toggle: rules/ file (not skills/) ---
|
|
23
|
+
# Skills body is NOT loaded in pipe mode (claude -p). Only description is visible.
|
|
24
|
+
# Rules files (.claude/rules/*.md) without paths: are always loaded into system prompt.
|
|
25
|
+
# Toggle by adding/removing the rules file.
|
|
26
|
+
RULE_FILE="$WORKSPACE/.claude/rules/modern-python.md"
|
|
27
|
+
RULE_SOURCE="$REPO_DIR/skills/modern-python-guidance/SKILL.md"
|
|
28
|
+
|
|
29
|
+
disable_guidance() {
|
|
30
|
+
rm -f "$RULE_FILE"
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
enable_guidance() {
|
|
34
|
+
if [ ! -f "$RULE_FILE" ]; then
|
|
35
|
+
# Copy body only (strip YAML frontmatter between --- markers)
|
|
36
|
+
awk 'BEGIN{c=0} /^---$/{c++; next} c>=2{print}' "$RULE_SOURCE" > "$RULE_FILE"
|
|
37
|
+
fi
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
restore_guidance_on_exit() {
|
|
41
|
+
enable_guidance
|
|
42
|
+
echo "[cleanup] Rules file restored."
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
# --- Verification logging ---
|
|
46
|
+
record_verify() {
|
|
47
|
+
local label="$1"
|
|
48
|
+
local log="$RESULTS_DIR/guidance-verify.log"
|
|
49
|
+
|
|
50
|
+
echo "=== $label $(date -u '+%Y-%m-%dT%H:%M:%SZ') ===" >> "$log"
|
|
51
|
+
|
|
52
|
+
# Rules file state (primary toggle mechanism)
|
|
53
|
+
echo "RULE_FILE=$RULE_FILE" >> "$log"
|
|
54
|
+
if [ -f "$RULE_FILE" ]; then
|
|
55
|
+
echo "status: PRESENT ($(wc -c < "$RULE_FILE") bytes)" >> "$log"
|
|
56
|
+
echo "first_line: $(head -1 "$RULE_FILE")" >> "$log"
|
|
57
|
+
else
|
|
58
|
+
echo "status: ABSENT" >> "$log"
|
|
59
|
+
fi
|
|
60
|
+
|
|
61
|
+
# Check for other rules that might contain Python guidance
|
|
62
|
+
echo "--- all rules files ---" >> "$log"
|
|
63
|
+
ls "$WORKSPACE/.claude/rules/" 2>/dev/null | grep -v '^\.' >> "$log" || echo "(empty)" >> "$log"
|
|
64
|
+
|
|
65
|
+
echo "" >> "$log"
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
# --- Workspace cleanup ---
|
|
69
|
+
cleanup_generated() {
|
|
70
|
+
local dest="$1"
|
|
71
|
+
mkdir -p "$dest"
|
|
72
|
+
|
|
73
|
+
# Primary: files at $WORKSPACE/src/
|
|
74
|
+
if [ -d "$GEN_SRC" ]; then
|
|
75
|
+
mv "$GEN_SRC" "$dest/src"
|
|
76
|
+
[ -f "$GEN_PYPROJECT" ] && mv "$GEN_PYPROJECT" "$dest/pyproject.toml"
|
|
77
|
+
else
|
|
78
|
+
# Fallback: CC may create a project subdirectory
|
|
79
|
+
local found_dir
|
|
80
|
+
found_dir=$(find "$WORKSPACE" -maxdepth 2 -name "models.py" -path "*/src/models.py" \
|
|
81
|
+
-newer "$RESULTS_DIR" -not -path "*/.venv/*" -not -path "*/modern-python-guidance/*" \
|
|
82
|
+
-not -path "*/__bench_backup*" 2>/dev/null | head -1)
|
|
83
|
+
if [ -n "$found_dir" ]; then
|
|
84
|
+
local project_dir
|
|
85
|
+
project_dir=$(dirname "$(dirname "$found_dir")")
|
|
86
|
+
echo "[fallback] Found generated files in $project_dir"
|
|
87
|
+
[ -d "$project_dir/src" ] && mv "$project_dir/src" "$dest/src"
|
|
88
|
+
[ -f "$project_dir/pyproject.toml" ] && mv "$project_dir/pyproject.toml" "$dest/pyproject.toml"
|
|
89
|
+
rmdir "$project_dir" 2>/dev/null || true
|
|
90
|
+
else
|
|
91
|
+
echo "[warning] No generated files found to capture"
|
|
92
|
+
return
|
|
93
|
+
fi
|
|
94
|
+
fi
|
|
95
|
+
|
|
96
|
+
# Sweep: capture any other generated files at workspace root
|
|
97
|
+
# (CC may create __init__.py, README.md, requirements.txt etc.)
|
|
98
|
+
local sweep_dir="$dest/extra"
|
|
99
|
+
for f in "$WORKSPACE"/__init__.py "$WORKSPACE"/README.md "$WORKSPACE"/requirements.txt \
|
|
100
|
+
"$WORKSPACE"/setup.py "$WORKSPACE"/setup.cfg; do
|
|
101
|
+
if [ -f "$f" ] && [ "$f" -nt "$RESULTS_DIR" ]; then
|
|
102
|
+
mkdir -p "$sweep_dir"
|
|
103
|
+
mv "$f" "$sweep_dir/"
|
|
104
|
+
echo "[sweep] Captured extra file: $(basename "$f")"
|
|
105
|
+
fi
|
|
106
|
+
done
|
|
107
|
+
|
|
108
|
+
# Also remove leftover pyproject.toml if not already moved
|
|
109
|
+
[ -f "$GEN_PYPROJECT" ] && mv "$GEN_PYPROJECT" "$dest/pyproject.toml" 2>/dev/null || true
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
# --- Session runners ---
|
|
113
|
+
run_control() {
|
|
114
|
+
echo ""
|
|
115
|
+
echo "--- Session A: Control (guidance DISABLED) ---"
|
|
116
|
+
|
|
117
|
+
disable_guidance
|
|
118
|
+
trap restore_guidance_on_exit EXIT
|
|
119
|
+
|
|
120
|
+
# Verify rules file is gone
|
|
121
|
+
if [ -f "$RULE_FILE" ]; then
|
|
122
|
+
echo "ERROR: Rules file still exists after rm!" >&2
|
|
123
|
+
exit 1
|
|
124
|
+
fi
|
|
125
|
+
record_verify "PRE-CONTROL"
|
|
126
|
+
echo "[ok] Rules file removed (verified)"
|
|
127
|
+
|
|
128
|
+
# Clean workspace
|
|
129
|
+
if [ -d "$GEN_SRC" ]; then
|
|
130
|
+
BACKUP="$WORKSPACE/src.__bench_backup_$(date +%s)__"
|
|
131
|
+
echo "[warning] Existing $GEN_SRC found. Moving to $BACKUP"
|
|
132
|
+
mv "$GEN_SRC" "$BACKUP"
|
|
133
|
+
fi
|
|
134
|
+
|
|
135
|
+
# Run CC
|
|
136
|
+
echo "[running] claude -p (Control) from $WORKSPACE ..."
|
|
137
|
+
(cd "$WORKSPACE" && claude -p --output-format json --max-budget-usd "$BUDGET" \
|
|
138
|
+
< "$PROMPT_FILE" > "$RESULTS_DIR/session-a.json" 2>"$RESULTS_DIR/session-a.stderr") || true
|
|
139
|
+
|
|
140
|
+
record_verify "POST-CONTROL"
|
|
141
|
+
cleanup_generated "$RESULTS_DIR/control"
|
|
142
|
+
echo "[ok] Control files saved to $RESULTS_DIR/control/"
|
|
143
|
+
|
|
144
|
+
# Restore guidance if running control only
|
|
145
|
+
if [ "$SESSION" = "control" ]; then
|
|
146
|
+
enable_guidance
|
|
147
|
+
trap - EXIT
|
|
148
|
+
echo "[ok] Rules file restored."
|
|
149
|
+
fi
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
run_treatment() {
|
|
153
|
+
echo ""
|
|
154
|
+
echo "--- Session B: Treatment (guidance ENABLED) ---"
|
|
155
|
+
|
|
156
|
+
enable_guidance
|
|
157
|
+
trap - EXIT
|
|
158
|
+
|
|
159
|
+
if [ ! -f "$RULE_FILE" ]; then
|
|
160
|
+
echo "ERROR: Rules file not found at $RULE_FILE" >&2
|
|
161
|
+
exit 1
|
|
162
|
+
fi
|
|
163
|
+
record_verify "PRE-TREATMENT"
|
|
164
|
+
echo "[ok] Rules file present ($(wc -c < "$RULE_FILE") bytes, verified)"
|
|
165
|
+
|
|
166
|
+
# Clean workspace
|
|
167
|
+
if [ -d "$GEN_SRC" ]; then
|
|
168
|
+
BACKUP="$WORKSPACE/src.__bench_backup_$(date +%s)__"
|
|
169
|
+
echo "[warning] Existing $GEN_SRC found. Moving to $BACKUP"
|
|
170
|
+
mv "$GEN_SRC" "$BACKUP"
|
|
171
|
+
fi
|
|
172
|
+
|
|
173
|
+
# Run CC
|
|
174
|
+
echo "[running] claude -p (Treatment) from $WORKSPACE ..."
|
|
175
|
+
(cd "$WORKSPACE" && claude -p --output-format json --max-budget-usd "$BUDGET" \
|
|
176
|
+
< "$PROMPT_FILE" > "$RESULTS_DIR/session-b.json" 2>"$RESULTS_DIR/session-b.stderr") || true
|
|
177
|
+
|
|
178
|
+
record_verify "POST-TREATMENT"
|
|
179
|
+
cleanup_generated "$RESULTS_DIR/treatment"
|
|
180
|
+
echo "[ok] Treatment files saved to $RESULTS_DIR/treatment/"
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
# --- Pre-flight checks ---
|
|
184
|
+
if [ ! -f "$PROMPT_FILE" ]; then
|
|
185
|
+
echo "ERROR: prompt.txt not found at $PROMPT_FILE" >&2
|
|
186
|
+
exit 1
|
|
187
|
+
fi
|
|
188
|
+
|
|
189
|
+
if [ ! -f "$RULE_SOURCE" ]; then
|
|
190
|
+
echo "ERROR: Guidance source not found at $RULE_SOURCE" >&2
|
|
191
|
+
exit 1
|
|
192
|
+
fi
|
|
193
|
+
|
|
194
|
+
case "$SESSION" in
|
|
195
|
+
control|treatment|both) ;;
|
|
196
|
+
*)
|
|
197
|
+
echo "ERROR: Invalid session '$SESSION'. Use: control, treatment, or both" >&2
|
|
198
|
+
exit 1
|
|
199
|
+
;;
|
|
200
|
+
esac
|
|
201
|
+
|
|
202
|
+
if [ "$SESSION" = "control" ] && [ -f "$RESULTS_DIR/session-a.json" ]; then
|
|
203
|
+
echo "ERROR: Control session already exists in $RESULTS_DIR" >&2
|
|
204
|
+
exit 1
|
|
205
|
+
fi
|
|
206
|
+
if [ "$SESSION" = "treatment" ] && [ -f "$RESULTS_DIR/session-b.json" ]; then
|
|
207
|
+
echo "ERROR: Treatment session already exists in $RESULTS_DIR" >&2
|
|
208
|
+
exit 1
|
|
209
|
+
fi
|
|
210
|
+
if [ "$SESSION" = "both" ] && [ -d "$RESULTS_DIR" ]; then
|
|
211
|
+
echo "ERROR: Results directory already exists: $RESULTS_DIR" >&2
|
|
212
|
+
exit 1
|
|
213
|
+
fi
|
|
214
|
+
|
|
215
|
+
mkdir -p "$RESULTS_DIR"
|
|
216
|
+
|
|
217
|
+
echo "=== Effectiveness Benchmark Run $RUN_ID ($SESSION) ==="
|
|
218
|
+
echo "Prompt: $PROMPT_FILE"
|
|
219
|
+
echo "Results: $RESULTS_DIR"
|
|
220
|
+
|
|
221
|
+
# --- Execute ---
|
|
222
|
+
case "$SESSION" in
|
|
223
|
+
control)
|
|
224
|
+
run_control
|
|
225
|
+
echo ""
|
|
226
|
+
echo "=== Control session complete ==="
|
|
227
|
+
echo "Run treatment: ./bench/run.sh $RUN_ID treatment"
|
|
228
|
+
;;
|
|
229
|
+
treatment)
|
|
230
|
+
run_treatment
|
|
231
|
+
echo ""
|
|
232
|
+
echo "=== Treatment session complete ==="
|
|
233
|
+
echo "Score with: ./bench/score-v2.sh $RUN_ID"
|
|
234
|
+
;;
|
|
235
|
+
both)
|
|
236
|
+
run_control
|
|
237
|
+
run_treatment
|
|
238
|
+
echo ""
|
|
239
|
+
echo "=== Run $RUN_ID Complete ==="
|
|
240
|
+
echo "Score with: ./bench/score-v2.sh $RUN_ID"
|
|
241
|
+
;;
|
|
242
|
+
esac
|