serenecode 0.1.1__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- serenecode-0.2.0/.env +1 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/PKG-INFO +58 -51
- {serenecode-0.1.1 → serenecode-0.2.0}/README.md +57 -42
- {serenecode-0.1.1 → serenecode-0.2.0}/SERENECODE.md +4 -1
- serenecode-0.2.0/examples/dosage-serenecode/CLAUDE.md +51 -0
- serenecode-0.2.0/examples/dosage-serenecode/SERENECODE.md +255 -0
- serenecode-0.2.0/examples/dosage-serenecode/SPEC.md +160 -0
- serenecode-0.2.0/examples/dosage-serenecode/pyproject.toml +11 -0
- serenecode-0.2.0/examples/dosage-serenecode/src/dosage/__init__.py +4 -0
- serenecode-0.2.0/examples/dosage-serenecode/src/dosage/core/__init__.py +4 -0
- serenecode-0.2.0/examples/dosage-serenecode/src/dosage/core/dosage.py +81 -0
- serenecode-0.2.0/examples/dosage-serenecode/src/dosage/core/models.py +120 -0
- serenecode-0.2.0/examples/dosage-serenecode/src/dosage/core/safety.py +85 -0
- serenecode-0.2.0/examples/dosage-serenecode/tests/unit/test_dosage.py +264 -0
- serenecode-0.2.0/examples/dosage-serenecode/tests/unit/test_models.py +399 -0
- serenecode-0.2.0/examples/dosage-serenecode/tests/unit/test_safety.py +270 -0
- serenecode-0.2.0/examples/dosage-serenecode/uv.lock +57 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/pyproject.toml +1 -10
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/__init__.py +28 -12
- serenecode-0.2.0/src/serenecode/adapters/__init__.py +63 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/adapters/coverage_adapter.py +33 -21
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/adapters/crosshair_adapter.py +22 -14
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/adapters/hypothesis_adapter.py +7 -8
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/adapters/mypy_adapter.py +8 -5
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/checker/compositional.py +7 -4
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/checker/properties.py +2 -2
- serenecode-0.2.0/src/serenecode/checker/spec_traceability.py +488 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/checker/structural.py +155 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/checker/symbolic.py +2 -2
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/cli.py +171 -13
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/config.py +61 -23
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/contracts/predicates.py +26 -4
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/core/pipeline.py +200 -58
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/init.py +108 -14
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/models.py +56 -12
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/ports/coverage_analyzer.py +2 -2
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/reporter.py +31 -17
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/source_discovery.py +76 -7
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/templates/content.py +225 -2
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/conftest.py +10 -2
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/e2e/test_check_command.py +73 -4
- serenecode-0.2.0/tests/e2e/test_cli.py +7 -0
- serenecode-0.2.0/tests/e2e/test_init.py +7 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/fixtures/valid/class_with_invariant.py +0 -1
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/integration/test_adapter_internals.py +66 -0
- serenecode-0.2.0/tests/integration/test_local_fs.py +7 -0
- serenecode-0.2.0/tests/integration/test_module_loader.py +233 -0
- serenecode-0.2.0/tests/unit/checker/__init__.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/unit/checker/test_coverage.py +4 -4
- serenecode-0.2.0/tests/unit/checker/test_spec_traceability.py +296 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/unit/checker/test_structural.py +139 -5
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/unit/checker/test_symbolic.py +3 -1
- serenecode-0.2.0/tests/unit/contracts/__init__.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/unit/contracts/test_predicates.py +8 -1
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/unit/contracts/test_predicates_hypothesis.py +12 -2
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/unit/test_models.py +70 -1
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/unit/test_pipeline.py +113 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/unit/test_reporter.py +8 -2
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/unit/test_source_discovery.py +64 -1
- {serenecode-0.1.1 → serenecode-0.2.0}/uv.lock +2 -18
- serenecode-0.1.1/examples/dosage-serenecode/CLAUDE.md +0 -31
- serenecode-0.1.1/examples/dosage-serenecode/SERENECODE.md +0 -52
- serenecode-0.1.1/examples/dosage-serenecode/pyproject.toml +0 -22
- serenecode-0.1.1/examples/dosage-serenecode/src/__init__.py +0 -1
- serenecode-0.1.1/examples/dosage-serenecode/src/core/__init__.py +0 -1
- serenecode-0.1.1/examples/dosage-serenecode/src/core/dosage.py +0 -261
- serenecode-0.1.1/examples/dosage-serenecode/src/core/models.py +0 -471
- serenecode-0.1.1/examples/dosage-serenecode/tests/__init__.py +0 -1
- serenecode-0.1.1/examples/dosage-serenecode/tests/conftest.py +0 -84
- serenecode-0.1.1/examples/dosage-serenecode/tests/test_dosage.py +0 -336
- serenecode-0.1.1/examples/dosage-serenecode/tests/test_models.py +0 -313
- serenecode-0.1.1/examples/dosage-serenecode/uv.lock +0 -701
- serenecode-0.1.1/src/serenecode/adapters/__init__.py +0 -6
- {serenecode-0.1.1 → serenecode-0.2.0}/.gitignore +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/CLAUDE.md +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/LICENSE +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/examples/DOSAGE_CALC_SPEC.md +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/examples/dosage-regular/dosage_calc.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/examples/dosage-regular/test_dosage_calc.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0/examples/dosage-serenecode}/tests/__init__.py +0 -0
- {serenecode-0.1.1/tests/e2e → serenecode-0.2.0/examples/dosage-serenecode/tests/unit}/__init__.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/serenecode.jpg +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/adapters/local_fs.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/adapters/module_loader.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/checker/__init__.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/checker/coverage.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/checker/types.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/contracts/__init__.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/core/__init__.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/core/exceptions.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/ports/__init__.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/ports/file_system.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/ports/property_tester.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/ports/symbolic_checker.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/ports/type_checker.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/src/serenecode/templates/__init__.py +0 -0
- {serenecode-0.1.1/tests/integration → serenecode-0.2.0/tests}/__init__.py +0 -0
- {serenecode-0.1.1/tests/unit → serenecode-0.2.0/tests/e2e}/__init__.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/e2e/test_init_command.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/e2e/test_report_command.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/e2e/test_status_command.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/fixtures/edge_cases/aliased_import.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/fixtures/edge_cases/async_functions.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/fixtures/edge_cases/empty_module.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/fixtures/edge_cases/from_import.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/fixtures/invalid/broken_postcondition.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/fixtures/invalid/io_in_core.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/fixtures/invalid/missing_contracts.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/fixtures/invalid/missing_invariant.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/fixtures/invalid/missing_types.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/fixtures/valid/full_module.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/fixtures/valid/simple_function.py +0 -0
- {serenecode-0.1.1/tests/unit/checker → serenecode-0.2.0/tests/integration}/__init__.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/integration/test_checkers_real_code.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/integration/test_coverage_adapter.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/integration/test_crosshair_adapter.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/integration/test_example_projects.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/integration/test_file_adapter.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/integration/test_hypothesis_adapter.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/integration/test_mypy_adapter.py +0 -0
- {serenecode-0.1.1/tests/unit/contracts → serenecode-0.2.0/tests/unit}/__init__.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/unit/checker/test_compositional.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/unit/checker/test_properties.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/unit/checker/test_structural_hypothesis.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/unit/checker/test_types.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/unit/test_api.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/unit/test_config.py +0 -0
- {serenecode-0.1.1 → serenecode-0.2.0}/tests/unit/test_models_hypothesis.py +0 -0
serenecode-0.2.0/.env
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
PYPI_TOKEN=pypi-AgEIcHlwaS5vcmcCJDcxN2IyN2ViLWM5YmUtNGI4OS1hNWVlLTkwNTk2ODBjOWE5OAACKlszLCJhOTJmN2Q5MS05MjExLTQxMjYtYTFkOC0wNzM0YWE5OWFmZTAiXQAABiAOdR3FRlN1mzfkEM-TmJ0bO3h7NjwlYmtjgePNyog0Wg
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: serenecode
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Verification framework for AI-generated Python — test coverage, property testing, and symbolic execution
|
|
5
5
|
Project-URL: Homepage, https://github.com/helgster77/serenecode
|
|
6
6
|
Project-URL: Repository, https://github.com/helgster77/serenecode
|
|
@@ -27,16 +27,8 @@ Requires-Dist: hypothesis>=6.0
|
|
|
27
27
|
Requires-Dist: icontract>=2.7.0
|
|
28
28
|
Requires-Dist: mypy>=1.0
|
|
29
29
|
Provides-Extra: dev
|
|
30
|
-
Requires-Dist: crosshair-tool>=0.0.60; extra == 'dev'
|
|
31
|
-
Requires-Dist: hypothesis>=6.0; extra == 'dev'
|
|
32
|
-
Requires-Dist: mypy>=1.0; extra == 'dev'
|
|
33
30
|
Requires-Dist: pytest-cov>=4.0; extra == 'dev'
|
|
34
31
|
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
35
|
-
Provides-Extra: verify
|
|
36
|
-
Requires-Dist: coverage>=7.0; extra == 'verify'
|
|
37
|
-
Requires-Dist: crosshair-tool>=0.0.60; extra == 'verify'
|
|
38
|
-
Requires-Dist: hypothesis>=6.0; extra == 'verify'
|
|
39
|
-
Requires-Dist: mypy>=1.0; extra == 'verify'
|
|
40
32
|
Description-Content-Type: text/markdown
|
|
41
33
|
|
|
42
34
|
<p align="center">
|
|
@@ -45,9 +37,9 @@ Description-Content-Type: text/markdown
|
|
|
45
37
|
|
|
46
38
|
<h3 align="center">A Framework for AI-Driven Development of Verifiable Systems</h3>
|
|
47
39
|
|
|
48
|
-
SereneCode is a
|
|
40
|
+
SereneCode is a spec-to-verified-implementation framework for AI-generated Python. It ensures that every requirement in your spec is implemented, tested, and formally verified — closing the gap between what you asked for and what the AI built. The workflow starts from a spec with traceable requirements (REQ-xxx), enforces that the AI writes verifiable code with contracts and tests, then verifies at multiple levels — from structural checks and test coverage through property-based testing to symbolic execution with an SMT solver. You choose the verification depth during interactive setup: lightweight for internal tools, balanced for production systems, strict for safety-critical code. AI agents write code fast but can miss requirements and skip edge cases; SereneCode closes that gap with spec traceability, test-existence enforcement, and formal verification.
|
|
49
41
|
|
|
50
|
-
> **This framework was bootstrapped with AI under its own rules.** SereneCode's SERENECODE.md was written before the first line of code, and the codebase has been developed under those conventions from the start. The current tree passes its own `serenecode check src --level 6 --allow-code-execution`, an internal strict-config Level 6 self-check in the test suite, `mypy src examples/dosage-serenecode/src`, the shipped example's
|
|
42
|
+
> **This framework was bootstrapped with AI under its own rules.** SereneCode's SERENECODE.md was written before the first line of code, and the codebase has been developed under those conventions from the start. The current tree passes its own `serenecode check src --level 6 --allow-code-execution`, an internal strict-config Level 6 self-check in the test suite, `mypy src examples/dosage-serenecode/src`, the shipped example's check, and the full `pytest` suite (769 passing tests, 16 skipped). The verification output is transparent about scope: exempt modules (adapters, CLI, ports) and functions excluded from deep verification (non-primitive parameter types) are reported as "exempt" rather than silently omitted.
|
|
51
43
|
|
|
52
44
|
---
|
|
53
45
|
|
|
@@ -61,17 +53,17 @@ SereneCode is designed for **building new verifiable systems from scratch with A
|
|
|
61
53
|
|
|
62
54
|
### Choosing the Right Level
|
|
63
55
|
|
|
64
|
-
The cost of verification should be proportional to the cost of a bug. Each level generates a different SERENECODE.md with different requirements for the AI, so the choice shapes how code is *written*, not just how it's checked.
|
|
56
|
+
The cost of verification should be proportional to the cost of a bug. Each level generates a different SERENECODE.md with different requirements for the AI, so the choice shapes how code is *written*, not just how it's checked. You make this choice during `serenecode init` — it cannot be changed after implementation starts.
|
|
65
57
|
|
|
66
|
-
| |
|
|
58
|
+
| | **Minimal** (Level 2) | **Default** (Level 4) | **Strict** (Level 6) |
|
|
67
59
|
|---|---|---|---|
|
|
68
60
|
| **Verifies through** | L2 (structure + types) | L4 (+ test coverage + properties) | L6 (+ symbolic + compositional) |
|
|
69
61
|
| **What the AI must write** | Contracts on public functions, type annotations | + description strings, class invariants, hexagonal architecture | + contracts on *all* functions, loop invariants, domain exceptions, no exemptions |
|
|
70
62
|
| **What catches bugs** | Runtime contract checks, mypy | + L3 surfaces untested code paths and generates test suggestions; L4 tests contracts against hundreds of random inputs | + SMT solver searches for *any* counterexample within analysis bounds |
|
|
71
|
-
| **Good for** | Internal tools, scripts, prototypes
|
|
63
|
+
| **Good for** | Internal tools, scripts, prototypes | Production APIs, business logic, data pipelines | Medical, financial, infrastructure, regulated systems |
|
|
72
64
|
| **The tradeoff** | Low ceremony, but contracts are only checked at the boundaries you wrote them | Moderate overhead; architecture rules keep core logic pure and testable | Significant overhead — every loop gets an invariant comment, every helper gets a contract. Justified when the cost of an undiscovered bug is measured in patient harm, financial loss, or regulatory failure |
|
|
73
65
|
|
|
74
|
-
Pick the level that matches the stakes
|
|
66
|
+
Pick the level that matches the stakes. Safety-critical code should start at Strict.
|
|
75
67
|
|
|
76
68
|
---
|
|
77
69
|
|
|
@@ -87,44 +79,49 @@ Both versions implement the same requirements, and the plain version passes its
|
|
|
87
79
|
|---|---|---|
|
|
88
80
|
| **Dose never exceeds maximum** | Covered by unit tests | Encoded as a postcondition; bounded symbolic search found no counterexample within analysis bounds |
|
|
89
81
|
| **Renal adjustment never increases a dose** | Covered by unit tests | `result <= dose_mg` is an executable contract, not just a test expectation |
|
|
90
|
-
| **Safety result is internally consistent** | No validation — you can construct `SafetyResult(total=9999, max=100, is_safe=True)` |
|
|
91
|
-
| **Objects are truly immutable** | `frozen=True` with mutable `set` on Drug | `
|
|
82
|
+
| **Safety result is internally consistent** | No validation — you can construct `SafetyResult(total=9999, max=100, is_safe=True)` | Postcondition on `check_daily_safety` enforces `is_safe == (total <= max)` — inconsistent results cannot be produced through the contracted API |
|
|
83
|
+
| **Objects are truly immutable** | `frozen=True` with mutable `set` on Drug | `frozen=True` with class invariants enforcing valid state — mutations raise `FrozenInstanceError` and invariants guarantee internal consistency |
|
|
92
84
|
| **Boundary behavior (CrCl exactly 30.0)** | Covered by explicit tests | Boundary behavior is specified in contracts; bounded symbolic search found no counterexample |
|
|
93
85
|
| **What if someone changes the code later?** | You rely on the tests you remembered to keep | Contracts stay attached to the code and keep checking every contracted call |
|
|
94
|
-
| **Can a solver verify it?** | No executable specification for a solver to target |
|
|
86
|
+
| **Can a solver verify it?** | No executable specification for a solver to target | 42 executable contracts and a clean `serenecode check ... --level 6 --allow-code-execution` run |
|
|
95
87
|
| **Confidence in a safety-critical setting** | Better than ad hoc code, but still test-shaped confidence | Higher: behavior is formally specified, runtime-checked, and solver-checked within analysis bounds — but bounded search is not proof |
|
|
96
88
|
|
|
97
|
-
The plain version relies on 59 tests that check specific scenarios. The SereneCode version adds
|
|
89
|
+
The plain version relies on 59 tests that check specific scenarios. The SereneCode version adds 42 executable contracts across its domain models and core dosage logic. Those contracts define *what correct means* in code, get checked at runtime, and give CrossHair/Z3 something precise to search against when looking for counterexamples within analysis bounds.
|
|
98
90
|
|
|
99
91
|
> Both examples live in [`examples/dosage-regular/`](examples/dosage-regular/) and [`examples/dosage-serenecode/`](examples/dosage-serenecode/). Read them side by side.
|
|
100
92
|
|
|
101
|
-
The Serenecode dosage example currently passes `serenecode check
|
|
93
|
+
The Serenecode dosage example currently passes `serenecode check src/ --level 6 --allow-code-execution` from within the example directory. Its local `pytest` suite is also green with 67 passing tests.
|
|
102
94
|
|
|
103
95
|
---
|
|
104
96
|
|
|
105
97
|
## How It Works
|
|
106
98
|
|
|
107
|
-
### 1.
|
|
99
|
+
### 1. Interactive Setup — `serenecode init`
|
|
108
100
|
|
|
109
|
-
|
|
101
|
+
Run `serenecode init` and answer two questions:
|
|
102
|
+
|
|
103
|
+
**Spec question:** Do you already have a spec, or will you write one with your coding assistant? Both options set up spec traceability with REQ-xxx requirement identifiers — the difference is the workflow your assistant follows.
|
|
104
|
+
|
|
105
|
+
**Verification level:** Minimal (L2), Default (L4), or Strict (L6). This determines what conventions your SERENECODE.md will require and cannot be changed after implementation starts.
|
|
110
106
|
|
|
111
107
|
```bash
|
|
112
|
-
serenecode init
|
|
113
|
-
serenecode init --strict # maximum rigor — contracts on ALL functions (public and private), no exemptions
|
|
114
|
-
serenecode init --minimal # lightweight — public-function contracts only, relaxed architecture rules
|
|
108
|
+
serenecode init
|
|
115
109
|
```
|
|
116
110
|
|
|
117
|
-
This creates
|
|
111
|
+
This creates SERENECODE.md (project conventions including spec traceability) and CLAUDE.md (instructions for your AI coding assistant) tailored to your answers. The conventions become the contract between you, your coding assistant, and the verification tool. SERENECODE.md includes instructions for converting raw specs into SereneCode format (REQ-xxx identifiers), validating them with `serenecode spec SPEC.md`, creating an implementation plan, and building from it — the coding agent handles this workflow automatically.
|
|
118
112
|
|
|
119
|
-
### 2. The Checker —
|
|
113
|
+
### 2. The Checker — Structural Enforcement
|
|
120
114
|
|
|
121
|
-
A lightweight AST-based
|
|
115
|
+
A lightweight AST-based checker that validates code follows SERENECODE.md conventions in seconds. Missing a postcondition? No class invariant? No test file for a module? Caught before you waste time on heavy verification.
|
|
122
116
|
|
|
123
117
|
```bash
|
|
124
|
-
serenecode check src/ --structural
|
|
118
|
+
serenecode check src/ --structural # structural conventions
|
|
119
|
+
serenecode check src/ --spec SPEC.md # + spec traceability
|
|
125
120
|
```
|
|
126
121
|
|
|
127
|
-
|
|
122
|
+
The `--spec` flag verifies that every REQ in the spec has an `Implements: REQ-xxx` tag in the code and a `Verifies: REQ-xxx` tag in the tests. No requirement goes unimplemented or untested.
|
|
123
|
+
|
|
124
|
+
### 3. The Verifier — Deep Verification
|
|
128
125
|
|
|
129
126
|
A six-level verification pipeline that escalates from fast checks to full symbolic verification:
|
|
130
127
|
|
|
@@ -141,7 +138,7 @@ A six-level verification pipeline that escalates from fast checks to full symbol
|
|
|
141
138
|
serenecode check src/ --level 6 --allow-code-execution # verify it
|
|
142
139
|
```
|
|
143
140
|
|
|
144
|
-
**L3 Test Coverage** is where SereneCode checks that the AI's tests actually exercise the code it wrote. AI agents can be suboptimal at writing tests — they tend to cover the happy path, skip edge cases, and miss error branches. L3 runs your existing tests under coverage.py tracing, measures per-function line and branch coverage, and reports exactly which lines and branches are untested. For each coverage gap, it generates concrete test suggestions including mock necessity assessments: each dependency is classified as REQUIRED (external I/O — must mock) or OPTIONAL (internal code — consider using the real implementation). This gives the AI agent actionable feedback to improve its own tests rather than leaving coverage gaps undetected. When no tests exist for a module, L3 reports this as
|
|
141
|
+
**L3 Test Coverage** is where SereneCode checks that the AI's tests actually exercise the code it wrote. AI agents can be suboptimal at writing tests — they tend to cover the happy path, skip edge cases, and miss error branches. L3 runs your existing tests under coverage.py tracing, measures per-function line and branch coverage, and reports exactly which lines and branches are untested. For each coverage gap, it generates concrete test suggestions including mock necessity assessments: each dependency is classified as REQUIRED (external I/O — must mock) or OPTIONAL (internal code — consider using the real implementation). This gives the AI agent actionable feedback to improve its own tests rather than leaving coverage gaps undetected. When no tests exist for a module, L3 reports this as a failure — missing tests must be written. At L1, the structural checker also verifies that every non-exempt source module has a corresponding `test_<module>.py` file.
|
|
145
142
|
|
|
146
143
|
The full pipeline is thorough but not instant. Larger systems will take longer, and the deepest runs may surface skipped items when Hypothesis cannot synthesize valid values for complex domain types or when CrossHair hits its time budget. By default, L5 focuses on contracted top-level functions defined in each module and skips modules or signatures that are currently poor fits for direct symbolic execution, such as adapter/composition-root code, helper predicate modules, and object-heavy APIs. Not everything needs L5/L6. Critical paths get full symbolic and compositional verification. Utility functions get property testing. A Level 4 run only counts as achieved when at least one contracted property target was actually exercised.
|
|
147
144
|
|
|
@@ -153,19 +150,21 @@ Scoped targets keep their package/import context across verification levels. In
|
|
|
153
150
|
|
|
154
151
|
## The AI Agent Loop
|
|
155
152
|
|
|
156
|
-
SereneCode is designed for
|
|
153
|
+
SereneCode is designed for spec-driven development with AI agents:
|
|
157
154
|
|
|
158
155
|
```
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
156
|
+
serenecode init → interactive setup: spec mode + verification level
|
|
157
|
+
serenecode spec SPEC.md → validate spec is ready (REQ-xxx format, no gaps)
|
|
158
|
+
AI reads SERENECODE.md + SPEC.md → knows the conventions and what to build
|
|
159
|
+
AI implements from spec → Implements: REQ-xxx in docstrings, contracts, tests
|
|
160
|
+
serenecode check src/ --spec SPEC.md --structural → did the AI follow rules? all REQs covered?
|
|
161
|
+
serenecode check src/ --level 5 --allow-code-execution --spec SPEC.md → deep verification
|
|
162
|
+
AI reads findings → missing REQs, counterexamples, untested paths
|
|
163
|
+
AI fixes the code → adjusts implementation, adds tests, closes gaps
|
|
164
|
+
Repeat until verified → all REQs implemented + tested + no counterexamples
|
|
166
165
|
```
|
|
167
166
|
|
|
168
|
-
AI-generated code won't always pass verification on the first try — and that's the point. SereneCode gives the coding agent structured feedback on exactly what failed and why: counterexamples, violated contracts, and suggested fixes.
|
|
167
|
+
AI-generated code won't always pass verification on the first try — and that's the point. SereneCode gives the coding agent structured feedback on exactly what failed and why: missing requirement implementations, counterexamples, violated contracts, untested modules, and suggested fixes. When there are many findings, SereneCode suggests the agent spawn subagents to address groups of related issues in parallel. **The value isn't in one-shotting perfection — it's in the loop that converges on verified completeness and correctness.**
|
|
169
168
|
|
|
170
169
|
Works in Claude Code, works in the terminal, works in CI:
|
|
171
170
|
|
|
@@ -190,7 +189,7 @@ SereneCode isn't just a tool that *tells* you to write verified code. It *is* ve
|
|
|
190
189
|
|
|
191
190
|
The SERENECODE.md convention file was the first artifact created — before any Python was written. The framework has been developed under those conventions with AI as a first-class contributor, and the repository continuously checks itself with:
|
|
192
191
|
|
|
193
|
-
- `pytest` across the full suite (currently
|
|
192
|
+
- `pytest` across the full suite (currently 769 passing tests, 16 skipped)
|
|
194
193
|
- `mypy --strict` across `src/` and `examples/dosage-serenecode/src/`
|
|
195
194
|
- SereneCode's own structural, type, property, symbolic, and compositional passes
|
|
196
195
|
|
|
@@ -206,26 +205,32 @@ At Level 5, CrossHair and Z3 search for counterexamples across the codebase's sy
|
|
|
206
205
|
# Install from PyPI
|
|
207
206
|
pip install serenecode
|
|
208
207
|
|
|
209
|
-
# Initialize
|
|
208
|
+
# Initialize — interactive setup (spec mode + verification level)
|
|
210
209
|
serenecode init
|
|
211
210
|
|
|
212
|
-
#
|
|
213
|
-
#
|
|
214
|
-
|
|
211
|
+
# Place your spec in the project directory, then start a coding session.
|
|
212
|
+
# Your agent reads SERENECODE.md, converts the spec to REQ-xxx format,
|
|
213
|
+
# validates it, creates an implementation plan, and builds from it.
|
|
214
|
+
|
|
215
|
+
# Verify structure + spec traceability:
|
|
216
|
+
serenecode check src/ --spec SPEC.md --structural
|
|
215
217
|
|
|
216
|
-
#
|
|
217
|
-
serenecode check src/
|
|
218
|
+
# Go deep — test coverage, property testing, symbolic verification:
|
|
219
|
+
serenecode check src/ --level 5 --allow-code-execution --spec SPEC.md
|
|
218
220
|
```
|
|
219
221
|
|
|
220
|
-
JSON output includes top-level `passed`, `level_requested`, and `level_achieved` fields alongside the summary and per-function results.
|
|
222
|
+
JSON output (via `--format json`) includes top-level `passed`, `level_requested`, and `level_achieved` fields alongside the summary and per-function results.
|
|
221
223
|
|
|
222
|
-
When you verify a nested package or a single module, Serenecode
|
|
224
|
+
When you verify a nested package or a single module, Serenecode preserves the package root and module-path context used by mypy, Hypothesis, CrossHair, and the architectural checks. That lets package-local absolute imports, relative imports, and scoped core-module rules behave the same way they do in project-wide runs.
|
|
223
225
|
|
|
224
226
|
## CLI Reference
|
|
225
227
|
|
|
226
228
|
```bash
|
|
227
|
-
serenecode init [<path>]
|
|
229
|
+
serenecode init [<path>] # interactive setup
|
|
230
|
+
serenecode spec <SPEC.md> # validate spec readiness
|
|
231
|
+
[--format human|json]
|
|
228
232
|
serenecode check [<path>] [--level 1-6] [--allow-code-execution] # run verification
|
|
233
|
+
[--spec SPEC.md] # spec traceability
|
|
229
234
|
[--format human|json] # output format
|
|
230
235
|
[--structural] [--verify] # L1 only / L3-6 only
|
|
231
236
|
[--per-condition-timeout N] # L5 CrossHair budgets
|
|
@@ -269,10 +274,12 @@ SereneCode is honest about what it can and can't do:
|
|
|
269
274
|
SereneCode follows hexagonal architecture — the same pattern it enforces on your code:
|
|
270
275
|
|
|
271
276
|
```
|
|
272
|
-
CLI / Library API ← composition roots
|
|
277
|
+
CLI / Library API ← composition roots (interactive init, spec validation)
|
|
273
278
|
│
|
|
274
279
|
├──▸ Pipeline ← orchestrates L1 → L2 → L3 → L4 → L5 → L6
|
|
275
280
|
│ ├──▸ Structural Checker (ast)
|
|
281
|
+
│ ├──▸ Spec Traceability (REQ-xxx → Implements/Verifies)
|
|
282
|
+
│ ├──▸ Test Existence (test_<module>.py discovery)
|
|
276
283
|
│ ├──▸ Type Checker (mypy)
|
|
277
284
|
│ ├──▸ Coverage Analyzer (coverage.py)
|
|
278
285
|
│ ├──▸ Property Tester (Hypothesis)
|
|
@@ -4,9 +4,9 @@
|
|
|
4
4
|
|
|
5
5
|
<h3 align="center">A Framework for AI-Driven Development of Verifiable Systems</h3>
|
|
6
6
|
|
|
7
|
-
SereneCode is a
|
|
7
|
+
SereneCode is a spec-to-verified-implementation framework for AI-generated Python. It ensures that every requirement in your spec is implemented, tested, and formally verified — closing the gap between what you asked for and what the AI built. The workflow starts from a spec with traceable requirements (REQ-xxx), enforces that the AI writes verifiable code with contracts and tests, then verifies at multiple levels — from structural checks and test coverage through property-based testing to symbolic execution with an SMT solver. You choose the verification depth during interactive setup: lightweight for internal tools, balanced for production systems, strict for safety-critical code. AI agents write code fast but can miss requirements and skip edge cases; SereneCode closes that gap with spec traceability, test-existence enforcement, and formal verification.
|
|
8
8
|
|
|
9
|
-
> **This framework was bootstrapped with AI under its own rules.** SereneCode's SERENECODE.md was written before the first line of code, and the codebase has been developed under those conventions from the start. The current tree passes its own `serenecode check src --level 6 --allow-code-execution`, an internal strict-config Level 6 self-check in the test suite, `mypy src examples/dosage-serenecode/src`, the shipped example's
|
|
9
|
+
> **This framework was bootstrapped with AI under its own rules.** SereneCode's SERENECODE.md was written before the first line of code, and the codebase has been developed under those conventions from the start. The current tree passes its own `serenecode check src --level 6 --allow-code-execution`, an internal strict-config Level 6 self-check in the test suite, `mypy src examples/dosage-serenecode/src`, the shipped example's check, and the full `pytest` suite (769 passing tests, 16 skipped). The verification output is transparent about scope: exempt modules (adapters, CLI, ports) and functions excluded from deep verification (non-primitive parameter types) are reported as "exempt" rather than silently omitted.
|
|
10
10
|
|
|
11
11
|
---
|
|
12
12
|
|
|
@@ -20,17 +20,17 @@ SereneCode is designed for **building new verifiable systems from scratch with A
|
|
|
20
20
|
|
|
21
21
|
### Choosing the Right Level
|
|
22
22
|
|
|
23
|
-
The cost of verification should be proportional to the cost of a bug. Each level generates a different SERENECODE.md with different requirements for the AI, so the choice shapes how code is *written*, not just how it's checked.
|
|
23
|
+
The cost of verification should be proportional to the cost of a bug. Each level generates a different SERENECODE.md with different requirements for the AI, so the choice shapes how code is *written*, not just how it's checked. You make this choice during `serenecode init` — it cannot be changed after implementation starts.
|
|
24
24
|
|
|
25
|
-
| |
|
|
25
|
+
| | **Minimal** (Level 2) | **Default** (Level 4) | **Strict** (Level 6) |
|
|
26
26
|
|---|---|---|---|
|
|
27
27
|
| **Verifies through** | L2 (structure + types) | L4 (+ test coverage + properties) | L6 (+ symbolic + compositional) |
|
|
28
28
|
| **What the AI must write** | Contracts on public functions, type annotations | + description strings, class invariants, hexagonal architecture | + contracts on *all* functions, loop invariants, domain exceptions, no exemptions |
|
|
29
29
|
| **What catches bugs** | Runtime contract checks, mypy | + L3 surfaces untested code paths and generates test suggestions; L4 tests contracts against hundreds of random inputs | + SMT solver searches for *any* counterexample within analysis bounds |
|
|
30
|
-
| **Good for** | Internal tools, scripts, prototypes
|
|
30
|
+
| **Good for** | Internal tools, scripts, prototypes | Production APIs, business logic, data pipelines | Medical, financial, infrastructure, regulated systems |
|
|
31
31
|
| **The tradeoff** | Low ceremony, but contracts are only checked at the boundaries you wrote them | Moderate overhead; architecture rules keep core logic pure and testable | Significant overhead — every loop gets an invariant comment, every helper gets a contract. Justified when the cost of an undiscovered bug is measured in patient harm, financial loss, or regulatory failure |
|
|
32
32
|
|
|
33
|
-
Pick the level that matches the stakes
|
|
33
|
+
Pick the level that matches the stakes. Safety-critical code should start at Strict.
|
|
34
34
|
|
|
35
35
|
---
|
|
36
36
|
|
|
@@ -46,44 +46,49 @@ Both versions implement the same requirements, and the plain version passes its
|
|
|
46
46
|
|---|---|---|
|
|
47
47
|
| **Dose never exceeds maximum** | Covered by unit tests | Encoded as a postcondition; bounded symbolic search found no counterexample within analysis bounds |
|
|
48
48
|
| **Renal adjustment never increases a dose** | Covered by unit tests | `result <= dose_mg` is an executable contract, not just a test expectation |
|
|
49
|
-
| **Safety result is internally consistent** | No validation — you can construct `SafetyResult(total=9999, max=100, is_safe=True)` |
|
|
50
|
-
| **Objects are truly immutable** | `frozen=True` with mutable `set` on Drug | `
|
|
49
|
+
| **Safety result is internally consistent** | No validation — you can construct `SafetyResult(total=9999, max=100, is_safe=True)` | Postcondition on `check_daily_safety` enforces `is_safe == (total <= max)` — inconsistent results cannot be produced through the contracted API |
|
|
50
|
+
| **Objects are truly immutable** | `frozen=True` with mutable `set` on Drug | `frozen=True` with class invariants enforcing valid state — mutations raise `FrozenInstanceError` and invariants guarantee internal consistency |
|
|
51
51
|
| **Boundary behavior (CrCl exactly 30.0)** | Covered by explicit tests | Boundary behavior is specified in contracts; bounded symbolic search found no counterexample |
|
|
52
52
|
| **What if someone changes the code later?** | You rely on the tests you remembered to keep | Contracts stay attached to the code and keep checking every contracted call |
|
|
53
|
-
| **Can a solver verify it?** | No executable specification for a solver to target |
|
|
53
|
+
| **Can a solver verify it?** | No executable specification for a solver to target | 42 executable contracts and a clean `serenecode check ... --level 6 --allow-code-execution` run |
|
|
54
54
|
| **Confidence in a safety-critical setting** | Better than ad hoc code, but still test-shaped confidence | Higher: behavior is formally specified, runtime-checked, and solver-checked within analysis bounds — but bounded search is not proof |
|
|
55
55
|
|
|
56
|
-
The plain version relies on 59 tests that check specific scenarios. The SereneCode version adds
|
|
56
|
+
The plain version relies on 59 tests that check specific scenarios. The SereneCode version adds 42 executable contracts across its domain models and core dosage logic. Those contracts define *what correct means* in code, get checked at runtime, and give CrossHair/Z3 something precise to search against when looking for counterexamples within analysis bounds.
|
|
57
57
|
|
|
58
58
|
> Both examples live in [`examples/dosage-regular/`](examples/dosage-regular/) and [`examples/dosage-serenecode/`](examples/dosage-serenecode/). Read them side by side.
|
|
59
59
|
|
|
60
|
-
The Serenecode dosage example currently passes `serenecode check
|
|
60
|
+
The Serenecode dosage example currently passes `serenecode check src/ --level 6 --allow-code-execution` from within the example directory. Its local `pytest` suite is also green with 67 passing tests.
|
|
61
61
|
|
|
62
62
|
---
|
|
63
63
|
|
|
64
64
|
## How It Works
|
|
65
65
|
|
|
66
|
-
### 1.
|
|
66
|
+
### 1. Interactive Setup — `serenecode init`
|
|
67
67
|
|
|
68
|
-
|
|
68
|
+
Run `serenecode init` and answer two questions:
|
|
69
|
+
|
|
70
|
+
**Spec question:** Do you already have a spec, or will you write one with your coding assistant? Both options set up spec traceability with REQ-xxx requirement identifiers — the difference is the workflow your assistant follows.
|
|
71
|
+
|
|
72
|
+
**Verification level:** Minimal (L2), Default (L4), or Strict (L6). This determines what conventions your SERENECODE.md will require and cannot be changed after implementation starts.
|
|
69
73
|
|
|
70
74
|
```bash
|
|
71
|
-
serenecode init
|
|
72
|
-
serenecode init --strict # maximum rigor — contracts on ALL functions (public and private), no exemptions
|
|
73
|
-
serenecode init --minimal # lightweight — public-function contracts only, relaxed architecture rules
|
|
75
|
+
serenecode init
|
|
74
76
|
```
|
|
75
77
|
|
|
76
|
-
This creates
|
|
78
|
+
This creates SERENECODE.md (project conventions including spec traceability) and CLAUDE.md (instructions for your AI coding assistant) tailored to your answers. The conventions become the contract between you, your coding assistant, and the verification tool. SERENECODE.md includes instructions for converting raw specs into SereneCode format (REQ-xxx identifiers), validating them with `serenecode spec SPEC.md`, creating an implementation plan, and building from it — the coding agent handles this workflow automatically.
|
|
77
79
|
|
|
78
|
-
### 2. The Checker —
|
|
80
|
+
### 2. The Checker — Structural Enforcement
|
|
79
81
|
|
|
80
|
-
A lightweight AST-based
|
|
82
|
+
A lightweight AST-based checker that validates code follows SERENECODE.md conventions in seconds. Missing a postcondition? No class invariant? No test file for a module? Caught before you waste time on heavy verification.
|
|
81
83
|
|
|
82
84
|
```bash
|
|
83
|
-
serenecode check src/ --structural
|
|
85
|
+
serenecode check src/ --structural # structural conventions
|
|
86
|
+
serenecode check src/ --spec SPEC.md # + spec traceability
|
|
84
87
|
```
|
|
85
88
|
|
|
86
|
-
|
|
89
|
+
The `--spec` flag verifies that every REQ in the spec has an `Implements: REQ-xxx` tag in the code and a `Verifies: REQ-xxx` tag in the tests. No requirement goes unimplemented or untested.
|
|
90
|
+
|
|
91
|
+
### 3. The Verifier — Deep Verification
|
|
87
92
|
|
|
88
93
|
A six-level verification pipeline that escalates from fast checks to full symbolic verification:
|
|
89
94
|
|
|
@@ -100,7 +105,7 @@ A six-level verification pipeline that escalates from fast checks to full symbol
|
|
|
100
105
|
serenecode check src/ --level 6 --allow-code-execution # verify it
|
|
101
106
|
```
|
|
102
107
|
|
|
103
|
-
**L3 Test Coverage** is where SereneCode checks that the AI's tests actually exercise the code it wrote. AI agents can be suboptimal at writing tests — they tend to cover the happy path, skip edge cases, and miss error branches. L3 runs your existing tests under coverage.py tracing, measures per-function line and branch coverage, and reports exactly which lines and branches are untested. For each coverage gap, it generates concrete test suggestions including mock necessity assessments: each dependency is classified as REQUIRED (external I/O — must mock) or OPTIONAL (internal code — consider using the real implementation). This gives the AI agent actionable feedback to improve its own tests rather than leaving coverage gaps undetected. When no tests exist for a module, L3 reports this as
|
|
108
|
+
**L3 Test Coverage** is where SereneCode checks that the AI's tests actually exercise the code it wrote. AI agents can be suboptimal at writing tests — they tend to cover the happy path, skip edge cases, and miss error branches. L3 runs your existing tests under coverage.py tracing, measures per-function line and branch coverage, and reports exactly which lines and branches are untested. For each coverage gap, it generates concrete test suggestions including mock necessity assessments: each dependency is classified as REQUIRED (external I/O — must mock) or OPTIONAL (internal code — consider using the real implementation). This gives the AI agent actionable feedback to improve its own tests rather than leaving coverage gaps undetected. When no tests exist for a module, L3 reports this as a failure — missing tests must be written. At L1, the structural checker also verifies that every non-exempt source module has a corresponding `test_<module>.py` file.
|
|
104
109
|
|
|
105
110
|
The full pipeline is thorough but not instant. Larger systems will take longer, and the deepest runs may surface skipped items when Hypothesis cannot synthesize valid values for complex domain types or when CrossHair hits its time budget. By default, L5 focuses on contracted top-level functions defined in each module and skips modules or signatures that are currently poor fits for direct symbolic execution, such as adapter/composition-root code, helper predicate modules, and object-heavy APIs. Not everything needs L5/L6. Critical paths get full symbolic and compositional verification. Utility functions get property testing. A Level 4 run only counts as achieved when at least one contracted property target was actually exercised.
|
|
106
111
|
|
|
@@ -112,19 +117,21 @@ Scoped targets keep their package/import context across verification levels. In
|
|
|
112
117
|
|
|
113
118
|
## The AI Agent Loop
|
|
114
119
|
|
|
115
|
-
SereneCode is designed for
|
|
120
|
+
SereneCode is designed for spec-driven development with AI agents:
|
|
116
121
|
|
|
117
122
|
```
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
123
|
+
serenecode init → interactive setup: spec mode + verification level
|
|
124
|
+
serenecode spec SPEC.md → validate spec is ready (REQ-xxx format, no gaps)
|
|
125
|
+
AI reads SERENECODE.md + SPEC.md → knows the conventions and what to build
|
|
126
|
+
AI implements from spec → Implements: REQ-xxx in docstrings, contracts, tests
|
|
127
|
+
serenecode check src/ --spec SPEC.md --structural → did the AI follow rules? all REQs covered?
|
|
128
|
+
serenecode check src/ --level 5 --allow-code-execution --spec SPEC.md → deep verification
|
|
129
|
+
AI reads findings → missing REQs, counterexamples, untested paths
|
|
130
|
+
AI fixes the code → adjusts implementation, adds tests, closes gaps
|
|
131
|
+
Repeat until verified → all REQs implemented + tested + no counterexamples
|
|
125
132
|
```
|
|
126
133
|
|
|
127
|
-
AI-generated code won't always pass verification on the first try — and that's the point. SereneCode gives the coding agent structured feedback on exactly what failed and why: counterexamples, violated contracts, and suggested fixes.
|
|
134
|
+
AI-generated code won't always pass verification on the first try — and that's the point. SereneCode gives the coding agent structured feedback on exactly what failed and why: missing requirement implementations, counterexamples, violated contracts, untested modules, and suggested fixes. When there are many findings, SereneCode suggests the agent spawn subagents to address groups of related issues in parallel. **The value isn't in one-shotting perfection — it's in the loop that converges on verified completeness and correctness.**
|
|
128
135
|
|
|
129
136
|
Works in Claude Code, works in the terminal, works in CI:
|
|
130
137
|
|
|
@@ -149,7 +156,7 @@ SereneCode isn't just a tool that *tells* you to write verified code. It *is* ve
|
|
|
149
156
|
|
|
150
157
|
The SERENECODE.md convention file was the first artifact created — before any Python was written. The framework has been developed under those conventions with AI as a first-class contributor, and the repository continuously checks itself with:
|
|
151
158
|
|
|
152
|
-
- `pytest` across the full suite (currently
|
|
159
|
+
- `pytest` across the full suite (currently 769 passing tests, 16 skipped)
|
|
153
160
|
- `mypy --strict` across `src/` and `examples/dosage-serenecode/src/`
|
|
154
161
|
- SereneCode's own structural, type, property, symbolic, and compositional passes
|
|
155
162
|
|
|
@@ -165,26 +172,32 @@ At Level 5, CrossHair and Z3 search for counterexamples across the codebase's sy
|
|
|
165
172
|
# Install from PyPI
|
|
166
173
|
pip install serenecode
|
|
167
174
|
|
|
168
|
-
# Initialize
|
|
175
|
+
# Initialize — interactive setup (spec mode + verification level)
|
|
169
176
|
serenecode init
|
|
170
177
|
|
|
171
|
-
#
|
|
172
|
-
#
|
|
173
|
-
|
|
178
|
+
# Place your spec in the project directory, then start a coding session.
|
|
179
|
+
# Your agent reads SERENECODE.md, converts the spec to REQ-xxx format,
|
|
180
|
+
# validates it, creates an implementation plan, and builds from it.
|
|
181
|
+
|
|
182
|
+
# Verify structure + spec traceability:
|
|
183
|
+
serenecode check src/ --spec SPEC.md --structural
|
|
174
184
|
|
|
175
|
-
#
|
|
176
|
-
serenecode check src/
|
|
185
|
+
# Go deep — test coverage, property testing, symbolic verification:
|
|
186
|
+
serenecode check src/ --level 5 --allow-code-execution --spec SPEC.md
|
|
177
187
|
```
|
|
178
188
|
|
|
179
|
-
JSON output includes top-level `passed`, `level_requested`, and `level_achieved` fields alongside the summary and per-function results.
|
|
189
|
+
JSON output (via `--format json`) includes top-level `passed`, `level_requested`, and `level_achieved` fields alongside the summary and per-function results.
|
|
180
190
|
|
|
181
|
-
When you verify a nested package or a single module, Serenecode
|
|
191
|
+
When you verify a nested package or a single module, Serenecode preserves the package root and module-path context used by mypy, Hypothesis, CrossHair, and the architectural checks. That lets package-local absolute imports, relative imports, and scoped core-module rules behave the same way they do in project-wide runs.
|
|
182
192
|
|
|
183
193
|
## CLI Reference
|
|
184
194
|
|
|
185
195
|
```bash
|
|
186
|
-
serenecode init [<path>]
|
|
196
|
+
serenecode init [<path>] # interactive setup
|
|
197
|
+
serenecode spec <SPEC.md> # validate spec readiness
|
|
198
|
+
[--format human|json]
|
|
187
199
|
serenecode check [<path>] [--level 1-6] [--allow-code-execution] # run verification
|
|
200
|
+
[--spec SPEC.md] # spec traceability
|
|
188
201
|
[--format human|json] # output format
|
|
189
202
|
[--structural] [--verify] # L1 only / L3-6 only
|
|
190
203
|
[--per-condition-timeout N] # L5 CrossHair budgets
|
|
@@ -228,10 +241,12 @@ SereneCode is honest about what it can and can't do:
|
|
|
228
241
|
SereneCode follows hexagonal architecture — the same pattern it enforces on your code:
|
|
229
242
|
|
|
230
243
|
```
|
|
231
|
-
CLI / Library API ← composition roots
|
|
244
|
+
CLI / Library API ← composition roots (interactive init, spec validation)
|
|
232
245
|
│
|
|
233
246
|
├──▸ Pipeline ← orchestrates L1 → L2 → L3 → L4 → L5 → L6
|
|
234
247
|
│ ├──▸ Structural Checker (ast)
|
|
248
|
+
│ ├──▸ Spec Traceability (REQ-xxx → Implements/Verifies)
|
|
249
|
+
│ ├──▸ Test Existence (test_<module>.py discovery)
|
|
235
250
|
│ ├──▸ Type Checker (mypy)
|
|
236
251
|
│ ├──▸ Coverage Analyzer (coverage.py)
|
|
237
252
|
│ ├──▸ Property Tester (Hypothesis)
|
|
@@ -311,7 +311,7 @@ Every meaningful code change in this project MUST come with verification. Writin
|
|
|
311
311
|
|
|
312
312
|
### Verification Tiers by Module Type
|
|
313
313
|
|
|
314
|
-
**Pure core modules** (`core/`, `checker/`, `models.py`, `contracts/`, `config.py`, `reporter.py
|
|
314
|
+
**Pure core modules** (`core/`, `checker/`, `models.py`, `contracts/`, `config.py`, `reporter.py`) should remain friendly to Serenecode's full pipeline:
|
|
315
315
|
1. Structural check — required contracts present on public functions and classes.
|
|
316
316
|
2. `mypy --strict` — zero errors.
|
|
317
317
|
3. Test coverage analysis through Serenecode's coverage adapter.
|
|
@@ -319,6 +319,8 @@ Every meaningful code change in this project MUST come with verification. Writin
|
|
|
319
319
|
5. Symbolic verification through CrossHair for symbolic-friendly contracted top-level functions within analysis bounds.
|
|
320
320
|
6. Example-based unit tests for edge cases, boundary conditions, regressions, and behavior that is important but awkward for automated strategy generation.
|
|
321
321
|
|
|
322
|
+
**Infrastructure modules** (`source_discovery.py`) use filesystem operations to locate and prepare source files. They are not pure core modules but should maintain contracts and full test coverage.
|
|
323
|
+
|
|
322
324
|
**Adapter and composition-root modules** (`adapters/`, `cli.py`, `__init__.py`, `init.py`) must pass:
|
|
323
325
|
1. `mypy --strict` — zero errors.
|
|
324
326
|
2. Integration or end-to-end tests that exercise real file system, subprocess, and CLI behavior.
|
|
@@ -473,6 +475,7 @@ Steps 1-3 may be done together, but steps 4-8 MUST NOT be skipped or deferred.
|
|
|
473
475
|
The following modules are exempt from full contract requirements due to their nature:
|
|
474
476
|
- `cli.py` — Thin CLI layer, tested via integration tests.
|
|
475
477
|
- `__init__.py` — Composition roots, tested via integration tests.
|
|
478
|
+
- `init.py` — Composition root for project initialization, tested via e2e tests.
|
|
476
479
|
- `adapters/` — I/O boundary code, tested via integration tests.
|
|
477
480
|
- `templates/` — Static markdown files, not code.
|
|
478
481
|
- `tests/fixtures/` — Intentionally broken or incomplete code used for testing the checker.
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
## Serenecode (Strict Mode)
|
|
2
|
+
|
|
3
|
+
All code in this project MUST follow the standards defined in SERENECODE.md. Read SERENECODE.md before writing or modifying any code. Every function — public and private — with caller-supplied inputs must have icontract preconditions, and every function must have postconditions. Every class must have invariants. No exemptions.
|
|
4
|
+
|
|
5
|
+
### Verification
|
|
6
|
+
|
|
7
|
+
After each work iteration (implementing a feature, fixing a bug, refactoring), you MUST run verification before considering the task complete. Do not skip this.
|
|
8
|
+
|
|
9
|
+
**Quick structural check (seconds):**
|
|
10
|
+
```bash
|
|
11
|
+
serenecode check src/ --structural
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
**Full verification with property testing (minutes):**
|
|
15
|
+
```bash
|
|
16
|
+
serenecode check src/ --level 4 --allow-code-execution
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
**Full verification including symbolic and compositional (minutes):**
|
|
20
|
+
```bash
|
|
21
|
+
serenecode check src/ --level 6 --allow-code-execution
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
**Spec traceability check:**
|
|
25
|
+
```bash
|
|
26
|
+
serenecode check src/ --spec SPEC.md
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Levels 3-6 import and execute project modules. Only use `--allow-code-execution` for trusted code.
|
|
30
|
+
|
|
31
|
+
If verification fails, read the error messages and fix the issues. Each failure includes the function name, file, line number, and a suggested fix. Iterate until all checks pass. Do not commit code that fails verification.
|
|
32
|
+
|
|
33
|
+
### Testing
|
|
34
|
+
|
|
35
|
+
You MUST write tests for every function. Do not skip this.
|
|
36
|
+
|
|
37
|
+
- Unit tests for core functions in `tests/unit/`
|
|
38
|
+
- Integration tests for adapters in `tests/integration/`
|
|
39
|
+
- Property-based tests (Hypothesis) for pure functions
|
|
40
|
+
|
|
41
|
+
Run `pytest -q` before considering any task complete. Do not commit code without passing tests.
|
|
42
|
+
|
|
43
|
+
### Spec-Driven Workflow
|
|
44
|
+
|
|
45
|
+
This project has an existing spec document. Follow the Spec Traceability section in SERENECODE.md for the full workflow. The key steps are:
|
|
46
|
+
|
|
47
|
+
1. Read the existing spec and SERENECODE.md before writing any code.
|
|
48
|
+
2. If the spec is not already in SereneCode format (REQ-xxx headings), convert it into SPEC.md following the "Preparing a SereneCode-Ready Spec" instructions in SERENECODE.md. Validate with `serenecode spec SPEC.md`.
|
|
49
|
+
3. Create an implementation plan mapping each REQ to functions, modules, and contracts. Get user approval before writing code.
|
|
50
|
+
4. Implement and tag with `Implements: REQ-xxx`. Test and tag with `Verifies: REQ-xxx`.
|
|
51
|
+
5. Run `serenecode check src/ --spec SPEC.md` to verify full traceability.
|