serenecode 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. serenecode-0.1.0/.gitignore +42 -0
  2. serenecode-0.1.0/CLAUDE.md +70 -0
  3. serenecode-0.1.0/LICENSE +21 -0
  4. serenecode-0.1.0/PKG-INFO +298 -0
  5. serenecode-0.1.0/README.md +261 -0
  6. serenecode-0.1.0/SERENECODE.md +482 -0
  7. serenecode-0.1.0/examples/DOSAGE_CALC_SPEC.md +149 -0
  8. serenecode-0.1.0/examples/dosage-regular/dosage_calc.py +142 -0
  9. serenecode-0.1.0/examples/dosage-regular/test_dosage_calc.py +431 -0
  10. serenecode-0.1.0/examples/dosage-serenecode/CLAUDE.md +31 -0
  11. serenecode-0.1.0/examples/dosage-serenecode/SERENECODE.md +52 -0
  12. serenecode-0.1.0/examples/dosage-serenecode/pyproject.toml +22 -0
  13. serenecode-0.1.0/examples/dosage-serenecode/src/__init__.py +1 -0
  14. serenecode-0.1.0/examples/dosage-serenecode/src/core/__init__.py +1 -0
  15. serenecode-0.1.0/examples/dosage-serenecode/src/core/dosage.py +261 -0
  16. serenecode-0.1.0/examples/dosage-serenecode/src/core/models.py +471 -0
  17. serenecode-0.1.0/examples/dosage-serenecode/tests/__init__.py +1 -0
  18. serenecode-0.1.0/examples/dosage-serenecode/tests/conftest.py +84 -0
  19. serenecode-0.1.0/examples/dosage-serenecode/tests/test_dosage.py +336 -0
  20. serenecode-0.1.0/examples/dosage-serenecode/tests/test_models.py +313 -0
  21. serenecode-0.1.0/examples/dosage-serenecode/uv.lock +701 -0
  22. serenecode-0.1.0/pyproject.toml +98 -0
  23. serenecode-0.1.0/serenecode.jpg +0 -0
  24. serenecode-0.1.0/src/serenecode/__init__.py +281 -0
  25. serenecode-0.1.0/src/serenecode/adapters/__init__.py +6 -0
  26. serenecode-0.1.0/src/serenecode/adapters/coverage_adapter.py +1173 -0
  27. serenecode-0.1.0/src/serenecode/adapters/crosshair_adapter.py +1069 -0
  28. serenecode-0.1.0/src/serenecode/adapters/hypothesis_adapter.py +1824 -0
  29. serenecode-0.1.0/src/serenecode/adapters/local_fs.py +169 -0
  30. serenecode-0.1.0/src/serenecode/adapters/module_loader.py +492 -0
  31. serenecode-0.1.0/src/serenecode/adapters/mypy_adapter.py +161 -0
  32. serenecode-0.1.0/src/serenecode/checker/__init__.py +6 -0
  33. serenecode-0.1.0/src/serenecode/checker/compositional.py +2216 -0
  34. serenecode-0.1.0/src/serenecode/checker/coverage.py +186 -0
  35. serenecode-0.1.0/src/serenecode/checker/properties.py +154 -0
  36. serenecode-0.1.0/src/serenecode/checker/structural.py +1504 -0
  37. serenecode-0.1.0/src/serenecode/checker/symbolic.py +178 -0
  38. serenecode-0.1.0/src/serenecode/checker/types.py +148 -0
  39. serenecode-0.1.0/src/serenecode/cli.py +478 -0
  40. serenecode-0.1.0/src/serenecode/config.py +711 -0
  41. serenecode-0.1.0/src/serenecode/contracts/__init__.py +6 -0
  42. serenecode-0.1.0/src/serenecode/contracts/predicates.py +176 -0
  43. serenecode-0.1.0/src/serenecode/core/__init__.py +6 -0
  44. serenecode-0.1.0/src/serenecode/core/exceptions.py +38 -0
  45. serenecode-0.1.0/src/serenecode/core/pipeline.py +807 -0
  46. serenecode-0.1.0/src/serenecode/init.py +307 -0
  47. serenecode-0.1.0/src/serenecode/models.py +308 -0
  48. serenecode-0.1.0/src/serenecode/ports/__init__.py +6 -0
  49. serenecode-0.1.0/src/serenecode/ports/coverage_analyzer.py +124 -0
  50. serenecode-0.1.0/src/serenecode/ports/file_system.py +95 -0
  51. serenecode-0.1.0/src/serenecode/ports/property_tester.py +69 -0
  52. serenecode-0.1.0/src/serenecode/ports/symbolic_checker.py +70 -0
  53. serenecode-0.1.0/src/serenecode/ports/type_checker.py +66 -0
  54. serenecode-0.1.0/src/serenecode/reporter.py +346 -0
  55. serenecode-0.1.0/src/serenecode/source_discovery.py +319 -0
  56. serenecode-0.1.0/src/serenecode/templates/__init__.py +5 -0
  57. serenecode-0.1.0/src/serenecode/templates/content.py +337 -0
  58. serenecode-0.1.0/tests/__init__.py +0 -0
  59. serenecode-0.1.0/tests/conftest.py +141 -0
  60. serenecode-0.1.0/tests/e2e/__init__.py +0 -0
  61. serenecode-0.1.0/tests/e2e/test_check_command.py +307 -0
  62. serenecode-0.1.0/tests/e2e/test_init_command.py +109 -0
  63. serenecode-0.1.0/tests/e2e/test_report_command.py +156 -0
  64. serenecode-0.1.0/tests/e2e/test_status_command.py +84 -0
  65. serenecode-0.1.0/tests/fixtures/edge_cases/aliased_import.py +10 -0
  66. serenecode-0.1.0/tests/fixtures/edge_cases/async_functions.py +10 -0
  67. serenecode-0.1.0/tests/fixtures/edge_cases/empty_module.py +1 -0
  68. serenecode-0.1.0/tests/fixtures/edge_cases/from_import.py +10 -0
  69. serenecode-0.1.0/tests/fixtures/invalid/broken_postcondition.py +31 -0
  70. serenecode-0.1.0/tests/fixtures/invalid/io_in_core.py +12 -0
  71. serenecode-0.1.0/tests/fixtures/invalid/missing_contracts.py +11 -0
  72. serenecode-0.1.0/tests/fixtures/invalid/missing_invariant.py +13 -0
  73. serenecode-0.1.0/tests/fixtures/invalid/missing_types.py +10 -0
  74. serenecode-0.1.0/tests/fixtures/valid/class_with_invariant.py +32 -0
  75. serenecode-0.1.0/tests/fixtures/valid/full_module.py +59 -0
  76. serenecode-0.1.0/tests/fixtures/valid/simple_function.py +10 -0
  77. serenecode-0.1.0/tests/integration/__init__.py +0 -0
  78. serenecode-0.1.0/tests/integration/test_adapter_internals.py +591 -0
  79. serenecode-0.1.0/tests/integration/test_checkers_real_code.py +206 -0
  80. serenecode-0.1.0/tests/integration/test_coverage_adapter.py +372 -0
  81. serenecode-0.1.0/tests/integration/test_crosshair_adapter.py +88 -0
  82. serenecode-0.1.0/tests/integration/test_example_projects.py +71 -0
  83. serenecode-0.1.0/tests/integration/test_file_adapter.py +95 -0
  84. serenecode-0.1.0/tests/integration/test_hypothesis_adapter.py +230 -0
  85. serenecode-0.1.0/tests/integration/test_mypy_adapter.py +132 -0
  86. serenecode-0.1.0/tests/unit/__init__.py +0 -0
  87. serenecode-0.1.0/tests/unit/checker/__init__.py +0 -0
  88. serenecode-0.1.0/tests/unit/checker/test_compositional.py +1472 -0
  89. serenecode-0.1.0/tests/unit/checker/test_coverage.py +272 -0
  90. serenecode-0.1.0/tests/unit/checker/test_properties.py +84 -0
  91. serenecode-0.1.0/tests/unit/checker/test_structural.py +949 -0
  92. serenecode-0.1.0/tests/unit/checker/test_structural_hypothesis.py +232 -0
  93. serenecode-0.1.0/tests/unit/checker/test_symbolic.py +142 -0
  94. serenecode-0.1.0/tests/unit/checker/test_types.py +81 -0
  95. serenecode-0.1.0/tests/unit/contracts/__init__.py +0 -0
  96. serenecode-0.1.0/tests/unit/contracts/test_predicates.py +172 -0
  97. serenecode-0.1.0/tests/unit/contracts/test_predicates_hypothesis.py +180 -0
  98. serenecode-0.1.0/tests/unit/test_api.py +154 -0
  99. serenecode-0.1.0/tests/unit/test_config.py +286 -0
  100. serenecode-0.1.0/tests/unit/test_models.py +405 -0
  101. serenecode-0.1.0/tests/unit/test_models_hypothesis.py +246 -0
  102. serenecode-0.1.0/tests/unit/test_pipeline.py +638 -0
  103. serenecode-0.1.0/tests/unit/test_reporter.py +245 -0
  104. serenecode-0.1.0/tests/unit/test_source_discovery.py +145 -0
  105. serenecode-0.1.0/uv.lock +1227 -0
@@ -0,0 +1,42 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ *.egg-info/
7
+ *.egg
8
+ dist/
9
+ build/
10
+ eggs/
11
+ *.whl
12
+
13
+ # Virtual environments
14
+ .venv/
15
+ venv/
16
+ env/
17
+
18
+ # IDE / AI tools
19
+ .idea/
20
+ .vscode/
21
+ .claude/
22
+ *.swp
23
+ *.swo
24
+ *~
25
+
26
+ # mypy
27
+ .mypy_cache/
28
+
29
+ # pytest / hypothesis
30
+ .pytest_cache/
31
+ .hypothesis/
32
+ htmlcov/
33
+ .coverage
34
+ coverage.xml
35
+ report.html
36
+
37
+ # Distribution
38
+ *.tar.gz
39
+
40
+ # OS
41
+ .DS_Store
42
+ Thumbs.db
@@ -0,0 +1,70 @@
1
+ ## Serenecode
2
+
3
+ All code in this project MUST follow the standards defined in SERENECODE.md. Read SERENECODE.md before writing or modifying any code. Every public function must have icontract preconditions and postconditions. Every class with state must have invariants. Follow the architectural patterns specified in SERENECODE.md.
4
+
5
+ ### Verification Commands
6
+
7
+ After each work iteration (implementing a feature, fixing a bug, refactoring), offer to run verification before considering the task complete.
8
+
9
+ **Quick structural check (seconds):**
10
+ ```bash
11
+ serenecode check src/ --structural
12
+ ```
13
+
14
+ **Full verification with coverage and property testing (minutes):**
15
+ ```bash
16
+ serenecode check src/ --level 4 --allow-code-execution
17
+ ```
18
+
19
+ **Full verification including symbolic and compositional (minutes):**
20
+ ```bash
21
+ serenecode check src/ --level 6 --allow-code-execution
22
+ ```
23
+
24
+ ### Reading Verification Output
25
+
26
+ Each finding includes function name, file path, line number, a message, and a suggestion. The output summary uses four statuses:
27
+
28
+ - **passed** — verified at the requested level.
29
+ - **failed** — a violation was found. Read the message and suggestion to fix it.
30
+ - **skipped** — the tool could not run (e.g. tool not installed, module not importable). Investigate why.
31
+ - **exempt** — intentionally excluded from this check level (adapter code, Protocol classes, functions with non-primitive parameters). No action needed unless the function should be verified.
32
+
33
+ ### Fixing Failures by Level
34
+
35
+ **Level 1 (structural)** — Missing contracts or annotations. The suggestion names the specific parameters or return type. Add the missing decorator.
36
+
37
+ **Level 2 (types)** — mypy type errors. The suggestion includes the mypy error code and a fix direction. Fix the type annotation or the expression.
38
+
39
+ **Level 3 (coverage)** — Test coverage is below threshold. The output shows:
40
+ - Which functions have insufficient coverage and their exact uncovered lines
41
+ - Suggested test code for each uncovered path
42
+ - Mock assessment: each dependency is classified as REQUIRED (external I/O — must mock) or OPTIONAL (internal code — consider using the real implementation)
43
+ - If "no tests found", write tests first. Coverage analysis measures existing test quality.
44
+
45
+ **Level 4 (properties)** — Hypothesis found inputs that violate a postcondition. The counterexample shows the exact failing inputs (e.g. `x=-1, result=-2`). Either:
46
+ 1. Fix the implementation so the postcondition holds for these inputs, OR
47
+ 2. Add a `@icontract.require` precondition to exclude these inputs if they are not valid.
48
+
49
+ **Level 5 (symbolic)** — CrossHair found a counterexample via symbolic execution. Same fix pattern as Level 4, but the counterexample comes from the solver rather than random testing.
50
+
51
+ **Level 6 (compositional)** — Cross-module architectural violations. Fix the dependency direction, add missing contracts at module boundaries, or correct interface mismatches.
52
+
53
+ ### Writing Contracts
54
+
55
+ When adding contracts, write meaningful conditions that constrain behavior:
56
+
57
+ ```python
58
+ # GOOD — constrains real behavior
59
+ @icontract.require(lambda items: len(items) > 0, "items must not be empty")
60
+ @icontract.ensure(lambda items, result: min(items) <= result <= max(items), "result within range")
61
+
62
+ # BAD — tautological, verifies nothing
63
+ @icontract.ensure(lambda result: True, "always passes")
64
+ ```
65
+
66
+ Protocol classes and stateless adapters do not need `@icontract.invariant`. Add `# no-invariant: <reason>` above the class definition if the class has no state to constrain.
67
+
68
+ ### Verification Scope
69
+
70
+ The output shows what was and wasn't checked. Exempt items (adapters, ports, non-primitive signatures) are visible in the output — not silently omitted. If verification fails, read the error messages and fix the issues. Iterate until all checks pass.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 helgster77
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,298 @@
1
+ Metadata-Version: 2.4
2
+ Name: serenecode
3
+ Version: 0.1.0
4
+ Summary: Verification framework for AI-generated Python — test coverage, property testing, and symbolic execution
5
+ Project-URL: Homepage, https://github.com/helgster77/serenecode
6
+ Project-URL: Repository, https://github.com/helgster77/serenecode
7
+ Project-URL: Issues, https://github.com/helgster77/serenecode/issues
8
+ Author: helgster77
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Software Development :: Quality Assurance
20
+ Classifier: Topic :: Software Development :: Testing
21
+ Classifier: Typing :: Typed
22
+ Requires-Python: >=3.10
23
+ Requires-Dist: click>=8.0
24
+ Requires-Dist: icontract>=2.7.0
25
+ Provides-Extra: dev
26
+ Requires-Dist: crosshair-tool>=0.0.60; extra == 'dev'
27
+ Requires-Dist: hypothesis>=6.0; extra == 'dev'
28
+ Requires-Dist: mypy>=1.0; extra == 'dev'
29
+ Requires-Dist: pytest-cov>=4.0; extra == 'dev'
30
+ Requires-Dist: pytest>=7.0; extra == 'dev'
31
+ Provides-Extra: verify
32
+ Requires-Dist: coverage>=7.0; extra == 'verify'
33
+ Requires-Dist: crosshair-tool>=0.0.60; extra == 'verify'
34
+ Requires-Dist: hypothesis>=6.0; extra == 'verify'
35
+ Requires-Dist: mypy>=1.0; extra == 'verify'
36
+ Description-Content-Type: text/markdown
37
+
38
+ <p align="center">
39
+ <img src="serenecode.jpg" alt="SereneCode" width="500">
40
+ </p>
41
+
42
+ <h3 align="center">A Framework for AI-Driven Development of Verifiable Systems</h3>
43
+
44
+ SereneCode is a verification framework for AI-generated Python. It tells the AI *how* to write verifiable code, checks that the AI followed instructions, and then verifies the code at multiple levels — from test coverage analysis that catches gaps in AI-written tests, through property-based testing that checks contracts against hundreds of random inputs, to symbolic execution that uses an SMT solver to search for *any* input that breaks a contract. You choose the verification depth that matches your project: lightweight for internal tools, balanced for production systems, strict for safety-critical code. AI agents write code fast but can be suboptimal at testing their own work; SereneCode closes that gap by surfacing untested paths, generating test suggestions, and verifying behavior beyond what hand-written tests cover.
45
+
46
+ > **This framework was bootstrapped with AI under its own rules.** SereneCode's SERENECODE.md was written before the first line of code, and the codebase has been developed under those conventions from the start. The current tree passes its own `serenecode check src --level 6 --allow-code-execution`, an internal strict-config Level 6 self-check in the test suite, `mypy src examples/dosage-serenecode/src`, the shipped example's strict Level 6 check, and the full `pytest` suite. The verification output is transparent about scope: exempt modules (adapters, CLI, ports) and functions excluded from deep verification (non-primitive parameter types) are reported as "exempt" rather than silently omitted.
47
+
48
+ ---
49
+
50
+ ## Why This Exists
51
+
52
+ AI writes code fast. But *fast* and *correct* aren't the same thing. When you're building a medical dosage calculator, a financial ledger, or an avionics controller, "it passed my tests" isn't enough. Tests check the inputs you thought of. Formal verification uses an SMT solver to search for *any* input that breaks your contracts.
53
+
54
+ The problem is that formal verification has always been expensive — too slow, too manual, too specialized. SereneCode makes it tractable by controlling the process from the start: a convention file tells the AI to write verification-ready code, a structural linter checks it followed the rules, and CrossHair + Z3 search for contract violations via symbolic execution.
55
+
56
+ SereneCode is designed for **building new verifiable systems from scratch with AI**, not for retrofitting verification onto large existing codebases. The conventions go in before the first line of code, and every module is written with verification in mind from day one. That's what makes it work. SereneCode is a best-effort tool, not a guarantee — see the [Disclaimer](#disclaimer) for important limitations on what it can and cannot assure.
57
+
58
+ ### Choosing the Right Level
59
+
60
+ The cost of verification should be proportional to the cost of a bug. Each level generates a different SERENECODE.md with different requirements for the AI, so the choice shapes how code is *written*, not just how it's checked.
61
+
62
+ | | `--minimal` | **Default** | `--strict` |
63
+ |---|---|---|---|
64
+ | **Verifies through** | L2 (structure + types) | L4 (+ test coverage + properties) | L6 (+ symbolic + compositional) |
65
+ | **What the AI must write** | Contracts on public functions, type annotations | + description strings, class invariants, hexagonal architecture | + contracts on *all* functions, loop invariants, domain exceptions, no exemptions |
66
+ | **What catches bugs** | Runtime contract checks, mypy | + L3 surfaces untested code paths and generates test suggestions; L4 tests contracts against hundreds of random inputs | + SMT solver searches for *any* counterexample within analysis bounds |
67
+ | **Good for** | Internal tools, scripts, prototypes, incremental adoption | Production APIs, business logic, data pipelines | Medical, financial, infrastructure, regulated systems |
68
+ | **The tradeoff** | Low ceremony, but contracts are only checked at the boundaries you wrote them | Moderate overhead; architecture rules keep core logic pure and testable | Significant overhead — every loop gets an invariant comment, every helper gets a contract. Justified when the cost of an undiscovered bug is measured in patient harm, financial loss, or regulatory failure |
69
+
70
+ Pick the level that matches the stakes, and pick it early. Moving up later means retrofitting contracts, invariants, and architecture onto existing code — it's not just flipping a flag. Safety-critical code should be written for `--strict` from the first line.
71
+
72
+ ---
73
+
74
+ ## See It In Action: The Medical Dosage Calculator
75
+
76
+ We built the same medical dosage calculator twice from the same spec — once with plain AI, once with SereneCode — to show the difference.
77
+
78
+ Both versions implement four functions: dose calculation with weight-based dosing and max caps, renal function adjustment with tiered CrCl thresholds, daily safety checks with explicit total-versus-threshold calculations, and contraindication detection across current medications.
79
+
80
+ Both versions implement the same requirements, and the plain version passes its 59-test suite. Here's what SereneCode adds on top:
81
+
82
+ | What can you claim? | Plain AI | SereneCode |
83
+ |---|---|---|
84
+ | **Dose never exceeds maximum** | Covered by unit tests | Encoded as a postcondition; bounded symbolic search found no counterexample within analysis bounds |
85
+ | **Renal adjustment never increases a dose** | Covered by unit tests | `result <= dose_mg` is an executable contract, not just a test expectation |
86
+ | **Safety result is internally consistent** | No validation — you can construct `SafetyResult(total=9999, max=100, is_safe=True)` | Representation invariants make inconsistent `SafetyResult` states unconstructable |
87
+ | **Objects are truly immutable** | `frozen=True` with mutable `set` on Drug | `_Frozen` mixin + immutable `tuple`/`frozenset` fields — fully locked down |
88
+ | **Boundary behavior (CrCl exactly 30.0)** | Covered by explicit tests | Boundary behavior is specified in contracts; bounded symbolic search found no counterexample |
89
+ | **What if someone changes the code later?** | You rely on the tests you remembered to keep | Contracts stay attached to the code and keep checking every contracted call |
90
+ | **Can a solver verify it?** | No executable specification for a solver to target | 120 executable contracts and a clean `serenecode check ... --level 6 --allow-code-execution` run |
91
+ | **Confidence in a safety-critical setting** | Better than ad hoc code, but still test-shaped confidence | Higher: behavior is formally specified, runtime-checked, and solver-checked within analysis bounds — but bounded search is not proof |
92
+
93
+ The plain version relies on 59 tests that check specific scenarios. The SereneCode version adds 120 executable contracts across its domain models and core dosage logic. Those contracts define *what correct means* in code, get checked at runtime, and give CrossHair/Z3 something precise to search against when looking for counterexamples within analysis bounds.
94
+
95
+ > Both examples live in [`examples/dosage-regular/`](examples/dosage-regular/) and [`examples/dosage-serenecode/`](examples/dosage-serenecode/). Read them side by side.
96
+
97
+ The Serenecode dosage example currently passes `serenecode check examples/dosage-serenecode/src --level 6 --allow-code-execution`. Its local `pytest` suite is also green with 74 passing tests.
98
+
99
+ ---
100
+
101
+ ## How It Works
102
+
103
+ ### 1. SERENECODE.md — Your AI Writes Code That's Built for Verification
104
+
105
+ A markdown file in your project root that tells AI coding agents exactly how to write code: what contracts to include, what architecture to follow, what patterns to use. When Claude Code (or another agent) reads this before generating code, it has a concrete target for producing verification-friendly output from the first keystroke.
106
+
107
+ ```bash
108
+ serenecode init # balanced defaults — contracts on public APIs, test coverage, hexagonal architecture
109
+ serenecode init --strict # maximum rigor — contracts on ALL functions (public and private), no exemptions
110
+ serenecode init --minimal # lightweight — public-function contracts only, relaxed architecture rules
111
+ ```
112
+
113
+ This creates a SERENECODE.md tailored to your project and integrates with CLAUDE.md so Claude Code follows the conventions automatically. You write the rules once, and the agent has a stable spec to follow on every iteration.
114
+
115
+ ### 2. The Checker — Instant Feedback
116
+
117
+ A lightweight AST-based linter that validates code follows SERENECODE.md conventions in seconds. Missing a postcondition? No class invariant? I/O imports in a core module? Caught before you waste time on heavy verification.
118
+
119
+ ```bash
120
+ serenecode check src/ --structural # seconds
121
+ ```
122
+
123
+ ### 3. The Verifier — Symbolic Verification
124
+
125
+ A six-level verification pipeline that escalates from fast checks to full symbolic verification:
126
+
127
+ | Level | What | Speed | Backend |
128
+ |-------|------|-------|---------|
129
+ | **L1** | Structural conventions | Seconds | AST analysis |
130
+ | **L2** | Type correctness | Seconds | mypy --strict |
131
+ | **L3** | Test coverage analysis | Seconds–minutes | coverage.py |
132
+ | **L4** | Property-based testing | Seconds–minutes | Hypothesis |
133
+ | **L5** | Symbolic search (bounded) | Minutes | CrossHair / Z3 |
134
+ | **L6** | Cross-module verification | Seconds | Compositional analysis |
135
+
136
+ ```bash
137
+ serenecode check src/ --level 6 --allow-code-execution # verify it
138
+ ```
139
+
140
+ **L3 Test Coverage** is where SereneCode checks that the AI's tests actually exercise the code it wrote. AI agents can be suboptimal at writing tests — they tend to cover the happy path, skip edge cases, and miss error branches. L3 runs your existing tests under coverage.py tracing, measures per-function line and branch coverage, and reports exactly which lines and branches are untested. For each coverage gap, it generates concrete test suggestions including mock necessity assessments: each dependency is classified as REQUIRED (external I/O — must mock) or OPTIONAL (internal code — consider using the real implementation). This gives the AI agent actionable feedback to improve its own tests rather than leaving coverage gaps undetected. When no tests exist for a module, L3 reports this as informational rather than failing, so the coverage level serves as a baseline measurement before L4 property testing generates new test inputs.
141
+
142
+ The full pipeline is thorough but not instant. Larger systems will take longer, and the deepest runs may surface skipped items when Hypothesis cannot synthesize valid values for complex domain types or when CrossHair hits its time budget. By default, L5 focuses on contracted top-level functions defined in each module and skips modules or signatures that are currently poor fits for direct symbolic execution, such as adapter/composition-root code, helper predicate modules, and object-heavy APIs. Not everything needs L5/L6. Critical paths get full symbolic and compositional verification. Utility functions get property testing. A Level 4 run only counts as achieved when at least one contracted property target was actually exercised.
143
+
144
+ Levels 3-6 import and execute project modules so coverage.py, Hypothesis, and CrossHair can exercise real code. Deep runs therefore require explicit `--allow-code-execution` and should only be used on trusted code.
145
+
146
+ Scoped targets keep their package/import context across verification levels. In practice that means commands like `serenecode check src/core/ --level 4 --allow-code-execution` and `serenecode check src/core/models.py --level 3 --allow-code-execution` use the same local import roots and architectural module paths as a project-wide run instead of breaking relative imports or scoped core-module rules. Those scoped core/exemption rules are matched on path segments, not raw substrings, so names like `notcli.py`, `viewmodels.py`, and `transports/` do not accidentally change policy classification. Standalone files with non-importable names are also targeted correctly for CrossHair via `file.py:line` references.
147
+
148
+ ---
149
+
150
+ ## The AI Agent Loop
151
+
152
+ SereneCode is designed for AI agents that write code and fix their own mistakes:
153
+
154
+ ```
155
+ AI reads SERENECODE.md → knows how to write verification-ready code
156
+ AI generates code with contracts → postconditions, input preconditions, invariants
157
+ serenecode check --structural → instant: did the AI follow the rules?
158
+ serenecode check --level 5 --allow-code-execution → deep: can the solver find any counterexample?
159
+ AI reads counterexamples → "input x=[-1] violates postcondition"
160
+ AI fixes the code → adjusts implementation or contract
161
+ Repeat until verified → no counterexample found, not just tested
162
+ ```
163
+
164
+ AI-generated code won't always pass verification on the first try — and that's the point. SereneCode gives the coding agent structured feedback on exactly what failed and why: counterexamples, violated contracts, and suggested fixes. The agent uses that feedback to iterate until the code passes. The value isn't in one-shotting perfection — it's in the loop that converges on verified correctness.
165
+
166
+ Works in Claude Code, works in the terminal, works in CI:
167
+
168
+ ```python
169
+ import serenecode
170
+
171
+ result = serenecode.check(path="src/", level=5, allow_code_execution=True)
172
+ for failure in result.failures:
173
+ print(f"{failure.function} @ {failure.file}:{failure.line}")
174
+ for detail in failure.details:
175
+ if detail.counterexample is not None:
176
+ print(detail.counterexample) # exact input that breaks the code
177
+ if detail.suggestion is not None:
178
+ print(detail.suggestion) # proposed fix direction
179
+ ```
180
+
181
+ ---
182
+
183
+ ## Built With Its Own Medicine
184
+
185
+ SereneCode isn't just a tool that *tells* you to write verified code. It *is* verified code.
186
+
187
+ The SERENECODE.md convention file was the first artifact created — before any Python was written. The framework has been developed under those conventions with AI as a first-class contributor, and the repository continuously checks itself with:
188
+
189
+ - `pytest` across the full suite (currently 651 passing tests, 16 skipped)
190
+ - `mypy --strict` across `src/` and `examples/dosage-serenecode/src/`
191
+ - SereneCode's own structural, type, property, symbolic, and compositional passes
192
+
193
+ On the current tree, `serenecode check src --level 6 --allow-code-execution` runs all six verification levels. The exempt items include adapter modules (which handle I/O and are integration-tested), port interfaces (Protocols that define abstract contracts), CLI entry points, and functions whose parameter types are too complex for automated strategy generation or symbolic execution. Exempt items are visible in the output — they are not silently omitted.
194
+
195
+ At Level 5, CrossHair and Z3 search for counterexamples across the codebase's symbolic-friendly contracted top-level functions. Functions with non-primitive parameters (custom dataclasses, Protocol implementations, Callable types) are reported as exempt because the solver cannot generate inputs for them. Level 6 adds structural compositional analysis: dependency direction, circular dependency detection, interface compliance, contract presence at module boundaries, aliased cross-module call resolution, and architectural invariants. Interface compliance follows explicit `Protocol` inheritance and checks substitutability, including extra required parameters and incompatible return annotations. Together, they provide both deep per-function verification and system-level structural guarantees — but the structural checks at L6 verify contract *presence*, not logical *sufficiency* across call chains.
196
+
197
+ ---
198
+
199
+ ## Quick Start
200
+
201
+ ```bash
202
+ # Clone and install from source
203
+ git clone https://github.com/helgster77/serenecode.git
204
+ cd serenecode
205
+ uv sync --extra verify
206
+
207
+ # Or with pip:
208
+ # pip install -e ".[verify]"
209
+
210
+ # Initialize a project with conventions
211
+ serenecode init
212
+
213
+ # Let your AI agent write code following SERENECODE.md...
214
+ # Then verify:
215
+ serenecode check src/ --structural
216
+
217
+ # Or go deep:
218
+ serenecode check src/core/ --level 5 --allow-code-execution --format json
219
+ ```
220
+
221
+ JSON output includes top-level `passed`, `level_requested`, and `level_achieved` fields alongside the summary and per-function results.
222
+
223
+ When you verify a nested package or a single module, Serenecode now preserves the package root and module-path context used by mypy, Hypothesis, CrossHair, and the architectural checks. That lets package-local absolute imports, relative imports, and scoped core-module rules behave the same way they do in project-wide runs.
224
+
225
+ ## CLI Reference
226
+
227
+ ```bash
228
+ serenecode init [<path>] [--strict | --minimal] # set up conventions
229
+ serenecode check [<path>] [--level 1-6] [--allow-code-execution] # run verification
230
+ [--format human|json] # output format
231
+ [--structural] [--verify] # L1 only / L3-6 only
232
+ [--per-condition-timeout N] # L5 CrossHair budgets
233
+ [--per-path-timeout N] [--module-timeout N] # (defaults: 30/10/300s)
234
+ [--workers N] # L5 parallel workers
235
+ serenecode status [<path>] [--format human|json] # verification status
236
+ serenecode report [<path>] [--format human|json|html] # generate reports
237
+ [--output FILE] [--allow-code-execution] # write to file
238
+ ```
239
+
240
+ **Exit codes:** 0 = passed, 1 = structural, 2 = types, 3 = coverage, 4 = properties, 5 = symbolic, 6 = compositional, 10 = internal error or deep verification refused without explicit trust.
241
+
242
+ ---
243
+
244
+ ## Honest Limitations
245
+
246
+ SereneCode is honest about what it can and can't do:
247
+
248
+ **"No counterexample found" is not "proven correct."** CrossHair uses bounded symbolic execution backed by Z3 — it explores execution paths within time limits (default: 30 seconds per condition, 10 seconds per path, 300 seconds per module) and searches for counterexamples. When it reports "no counterexample found within analysis bounds," that's strong evidence of correctness for the explored paths, but it's not an unbounded proof in the Coq/Lean sense. For pure functions with simple control flow, the coverage is often effectively exhaustive. For complex code, it's bounded. The tool's output now uses this honest language rather than saying "verified."
249
+
250
+ **Contracts are only as good as you write them.** A function with weak postconditions will pass verification even if the implementation is subtly wrong. SereneCode checks that contracts exist and hold, but can't check that they fully capture your intent. Tautological contracts like `lambda self: True` are now flagged by the conventions and should not be used — they provide no verification value.
251
+
252
+ **Exempt items are visible, not hidden.** Modules exempt from structural checking (adapters, CLI, ports, `__init__.py`) and functions excluded from deep verification (non-primitive parameter types, adapter code) are reported as "exempt" in the output rather than being silently omitted. This makes the verification scope transparent: the tool reports passed, failed, skipped, and exempt counts separately so you can see exactly what was and wasn't deeply verified. Previous versions silently omitted these, inflating the apparent scope.
253
+
254
+ **Runtime checks can be disabled.** icontract decorators are checked on every call by default, but can be disabled via environment variables for performance in production. This is a feature, not a bug — but it means runtime guarantees depend on configuration.
255
+
256
+ **Not everything can be deeply verified.** Functions with complex domain-type parameters (custom dataclasses, Callable, Protocol implementations) are automatically excluded from L4/L5 because the tools cannot generate valid inputs for them — they show up as "exempt" in the output. See "Choosing the Right Level" above for guidance on which verification depth fits your system.
257
+
258
+ **Levels 3-6 execute your code.** Coverage analysis, property-based testing, and symbolic verification import project modules and run their top-level code as part of analysis. Module loading uses `compile()` + `exec()` on target source files and their transitive dependencies. There is no sandboxing or syscall filtering — a malicious `.py` file in the target directory gets full access to the host. Use `--allow-code-execution` or `allow_code_execution=True` only for code you trust. Subprocess-based backends (CrossHair, pytest/coverage) receive module paths and search paths from the source discovery layer; symlink-based directory traversal is blocked (`followlinks=False`), but the trust boundary ultimately relies on the `--allow-code-execution` gate.
259
+
260
+ **Deep runs can be incomplete by default.** A result can include skipped items even when there are no correctness failures: Hypothesis may not be able to derive strategies for some highly structured project-local types, and CrossHair can time out on solver-heavy modules once the module budget is exhausted. When a run exercises no property-testing targets at all, Serenecode does not claim L4 was achieved. When a scoped run produces no symbolic findings at all, Serenecode does not claim L5 was achieved. A verification level is only marked as achieved when results are non-empty with no failures and no skips — empty results from L3/L4/L5 backends mean "nothing was exercised," not "everything passed." Increase `--per-condition-timeout`, `--per-path-timeout`, or `--module-timeout` when you want to push harder on L5.
261
+
262
+ **Level 6 is structural, not semantic.** Compositional verification (L6) checks that contracts *exist* at module boundaries, that dependency direction is correct, and that interfaces structurally match, including explicit `Protocol` inheritance and signature-shape compatibility. It does not verify that postconditions *logically satisfy* preconditions across call chains — that would require symbolic reasoning across module boundaries, which is a planned future enhancement. L6 catches architectural violations and contract gaps, not logical insufficiency. Source files with syntax errors are now reported as skipped with an actionable message instead of silently producing an empty analysis.
263
+
264
+
265
+
266
+ ---
267
+
268
+ ## Architecture
269
+
270
+ SereneCode follows hexagonal architecture — the same pattern it enforces on your code:
271
+
272
+ ```
273
+ CLI / Library API ← composition roots
274
+
275
+ ├──▸ Pipeline ← orchestrates L1 → L2 → L3 → L4 → L5 → L6
276
+ │ ├──▸ Structural Checker (ast)
277
+ │ ├──▸ Type Checker (mypy)
278
+ │ ├──▸ Coverage Analyzer (coverage.py)
279
+ │ ├──▸ Property Tester (Hypothesis)
280
+ │ ├──▸ Symbolic Checker (CrossHair/Z3)
281
+ │ └──▸ Compositional Checker (ast)
282
+
283
+ ├──▸ Reporter ← human / JSON / HTML
284
+
285
+ └──▸ Adapters → Ports ← Protocol interfaces for all I/O
286
+ ```
287
+
288
+ Core logic is pure. All I/O goes through Protocol-defined ports. The verification engine itself is verifiable.
289
+
290
+ ## Disclaimer
291
+
292
+ SereneCode is provided as-is, without warranty of any kind. It is a best-effort tool that helps surface defects through contracts, property-based testing, and bounded symbolic execution — but it cannot guarantee the absence of bugs. "No counterexample found" means the solver did not find one within its analysis bounds, not that none exists. Verification results depend on the quality of the contracts you write, the time budgets you configure, and the inherent limitations of the underlying tools.
293
+
294
+ Users are responsible for the correctness, safety, and regulatory compliance of their own systems. SereneCode is not a substitute for independent code review, domain-expert validation, or any certification process required by your industry. If you are building safety-critical software, use this framework as one layer of assurance among many — not as the only one.
295
+
296
+ ## License
297
+
298
+ MIT