a3-python 0.1.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- a3_python-0.1.11/PKG-INFO +343 -0
- a3_python-0.1.11/README.md +328 -0
- a3_python-0.1.11/a3_python/ci/init_cmd.py +146 -0
- a3_python-0.1.11/a3_python/ci/templates/a3-pr-scan.yml +98 -0
- a3_python-0.1.11/a3_python/ci/templates/a3-scheduled-scan.yml +88 -0
- a3_python-0.1.11/a3_python.egg-info/PKG-INFO +343 -0
- a3_python-0.1.11/a3_python.egg-info/SOURCES.txt +192 -0
- a3_python-0.1.11/a3_python.egg-info/dependency_links.txt +1 -0
- a3_python-0.1.11/a3_python.egg-info/entry_points.txt +2 -0
- a3_python-0.1.11/a3_python.egg-info/requires.txt +10 -0
- a3_python-0.1.11/a3_python.egg-info/top_level.txt +1 -0
- a3_python-0.1.11/pyproject.toml +43 -0
- a3_python-0.1.11/setup.cfg +4 -0
- a3_python-0.1.11/tests/test_all_security.py +53 -0
- a3_python-0.1.11/tests/test_analyzer_dse_integration.py +101 -0
- a3_python-0.1.11/tests/test_analyzer_hybrid_witness_integration.py +29 -0
- a3_python-0.1.11/tests/test_analyzer_safe_integration.py +104 -0
- a3_python-0.1.11/tests/test_auto_template_synthesis.py +345 -0
- a3_python-0.1.11/tests/test_barrier_analysis.py +95 -0
- a3_python-0.1.11/tests/test_barrier_on_qlib.py +219 -0
- a3_python-0.1.11/tests/test_barriers.py +877 -0
- a3_python-0.1.11/tests/test_barriers_ranking.py +399 -0
- a3_python-0.1.11/tests/test_binary_op_bitwise.py +460 -0
- a3_python-0.1.11/tests/test_binary_op_power.py +211 -0
- a3_python-0.1.11/tests/test_binary_ops_extended.py +130 -0
- a3_python-0.1.11/tests/test_build_list.py +129 -0
- a3_python-0.1.11/tests/test_build_map.py +132 -0
- a3_python-0.1.11/tests/test_build_set.py +175 -0
- a3_python-0.1.11/tests/test_build_string.py +109 -0
- a3_python-0.1.11/tests/test_call_intrinsic_1.py +202 -0
- a3_python-0.1.11/tests/test_call_kw.py +262 -0
- a3_python-0.1.11/tests/test_cegis_counterexamples.py +270 -0
- a3_python-0.1.11/tests/test_cegis_synthesis.py +429 -0
- a3_python-0.1.11/tests/test_cli.py +53 -0
- a3_python-0.1.11/tests/test_cli_termination.py +179 -0
- a3_python-0.1.11/tests/test_closures.py +275 -0
- a3_python-0.1.11/tests/test_cmd_inj.py +38 -0
- a3_python-0.1.11/tests/test_collection_bugs.py +135 -0
- a3_python-0.1.11/tests/test_concrete_taint_path.py +242 -0
- a3_python-0.1.11/tests/test_confidence_null_ptr_bounds.py +164 -0
- a3_python-0.1.11/tests/test_connection_pool_taint.py +353 -0
- a3_python-0.1.11/tests/test_constraint_solver.py +348 -0
- a3_python-0.1.11/tests/test_contains_dict.py +155 -0
- a3_python-0.1.11/tests/test_contains_op.py +206 -0
- a3_python-0.1.11/tests/test_context_precision_demo.py +96 -0
- a3_python-0.1.11/tests/test_context_sensitivity.py +344 -0
- a3_python-0.1.11/tests/test_contract_matching_specificity.py +92 -0
- a3_python-0.1.11/tests/test_contracts.py +253 -0
- a3_python-0.1.11/tests/test_contracts_integration.py +274 -0
- a3_python-0.1.11/tests/test_cookie_injection.py +195 -0
- a3_python-0.1.11/tests/test_crash_summaries_compact_proofs.py +38 -0
- a3_python-0.1.11/tests/test_cross_module_taint.py +247 -0
- a3_python-0.1.11/tests/test_cursor_taint.py +243 -0
- a3_python-0.1.11/tests/test_deduplication.py +245 -0
- a3_python-0.1.11/tests/test_deepspeed.py +129 -0
- a3_python-0.1.11/tests/test_deserialization_detection.py +163 -0
- a3_python-0.1.11/tests/test_dict_merge.py +152 -0
- a3_python-0.1.11/tests/test_dict_methods.py +147 -0
- a3_python-0.1.11/tests/test_dict_taint_tracking.py +167 -0
- a3_python-0.1.11/tests/test_dict_update.py +118 -0
- a3_python-0.1.11/tests/test_django_render_sanitizer.py +163 -0
- a3_python-0.1.11/tests/test_django_template_sanitization.py +129 -0
- a3_python-0.1.11/tests/test_dse.py +429 -0
- a3_python-0.1.11/tests/test_dse_context.py +280 -0
- a3_python-0.1.11/tests/test_e2e_taint_path_filtering.py +146 -0
- a3_python-0.1.11/tests/test_ellipsis_slice.py +66 -0
- a3_python-0.1.11/tests/test_exception_bug_types.py +484 -0
- a3_python-0.1.11/tests/test_exception_handlers.py +200 -0
- a3_python-0.1.11/tests/test_exception_path_forking.py +116 -0
- a3_python-0.1.11/tests/test_extended_arg.py +138 -0
- a3_python-0.1.11/tests/test_file_object_taint.py +221 -0
- a3_python-0.1.11/tests/test_file_object_taint_vm.py +112 -0
- a3_python-0.1.11/tests/test_flaml.py +120 -0
- a3_python-0.1.11/tests/test_flask_debug.py +111 -0
- a3_python-0.1.11/tests/test_function_level_termination.py +226 -0
- a3_python-0.1.11/tests/test_heap_observers.py +233 -0
- a3_python-0.1.11/tests/test_hscc2004_barrier_certificates.py +26 -0
- a3_python-0.1.11/tests/test_hybrid_concolic_symbolic_unknown_libs.py +165 -0
- a3_python-0.1.11/tests/test_ide_precision.py +289 -0
- a3_python-0.1.11/tests/test_ide_return_propagation.py +249 -0
- a3_python-0.1.11/tests/test_implicit_flow_security.py +353 -0
- a3_python-0.1.11/tests/test_import_from.py +158 -0
- a3_python-0.1.11/tests/test_import_tracking.py +172 -0
- a3_python-0.1.11/tests/test_insecure_cookie.py +190 -0
- a3_python-0.1.11/tests/test_interprocedural.py +503 -0
- a3_python-0.1.11/tests/test_interprocedural_crash_analysis.py +207 -0
- a3_python-0.1.11/tests/test_interprocedural_security.py +658 -0
- a3_python-0.1.11/tests/test_interprocedural_sigma.py +169 -0
- a3_python-0.1.11/tests/test_intraprocedural_analysis.py +583 -0
- a3_python-0.1.11/tests/test_intraprocedural_integration.py +145 -0
- a3_python-0.1.11/tests/test_intraprocedural_phase2.py +442 -0
- a3_python-0.1.11/tests/test_intraprocedural_phase3.py +415 -0
- a3_python-0.1.11/tests/test_intraprocedural_taint.py +339 -0
- a3_python-0.1.11/tests/test_invariant_integration.py +283 -0
- a3_python-0.1.11/tests/test_is_op.py +98 -0
- a3_python-0.1.11/tests/test_iteration_601.py +34 -0
- a3_python-0.1.11/tests/test_jump_forward.py +210 -0
- a3_python-0.1.11/tests/test_kitchensink_sota.py +703 -0
- a3_python-0.1.11/tests/test_kitchensink_taxonomy.py +462 -0
- a3_python-0.1.11/tests/test_lexicographic_ranking.py +609 -0
- a3_python-0.1.11/tests/test_lightgbm.py +115 -0
- a3_python-0.1.11/tests/test_list_append.py +141 -0
- a3_python-0.1.11/tests/test_load_fast_and_clear.py +231 -0
- a3_python-0.1.11/tests/test_load_fast_borrow.py +117 -0
- a3_python-0.1.11/tests/test_lockstep_concolic_replay.py +59 -0
- a3_python-0.1.11/tests/test_loop_opcodes.py +270 -0
- a3_python-0.1.11/tests/test_map_add.py +137 -0
- a3_python-0.1.11/tests/test_module_init_detection.py +135 -0
- a3_python-0.1.11/tests/test_module_init_filtering.py +205 -0
- a3_python-0.1.11/tests/test_module_vs_function_level_detection.py +254 -0
- a3_python-0.1.11/tests/test_no_concolic_mode.py +21 -0
- a3_python-0.1.11/tests/test_opcodes_build_tuple_format.py +99 -0
- a3_python-0.1.11/tests/test_open_exception_handler.py +65 -0
- a3_python-0.1.11/tests/test_orm_taint.py +286 -0
- a3_python-0.1.11/tests/test_path_forking_unit.py +236 -0
- a3_python-0.1.11/tests/test_path_injection_detection.py +297 -0
- a3_python-0.1.11/tests/test_path_limit_soundness.py +146 -0
- a3_python-0.1.11/tests/test_path_validation.py +190 -0
- a3_python-0.1.11/tests/test_polynomial_barriers.py +485 -0
- a3_python-0.1.11/tests/test_pop_jump_if_none.py +140 -0
- a3_python-0.1.11/tests/test_program_analysis.py +371 -0
- a3_python-0.1.11/tests/test_qlib_full_analyzer.py +48 -0
- a3_python-0.1.11/tests/test_qlib_known_bugs.py +73 -0
- a3_python-0.1.11/tests/test_qlib_models.py +81 -0
- a3_python-0.1.11/tests/test_ranking_synthesis.py +446 -0
- a3_python-0.1.11/tests/test_ranking_synthesis_lexicographic_integration.py +237 -0
- a3_python-0.1.11/tests/test_receiver_taint_vm.py +176 -0
- a3_python-0.1.11/tests/test_redos_detection.py +257 -0
- a3_python-0.1.11/tests/test_regex_pattern_contracts.py +204 -0
- a3_python-0.1.11/tests/test_regex_validation_sanitizers.py +472 -0
- a3_python-0.1.11/tests/test_relational_summaries.py +266 -0
- a3_python-0.1.11/tests/test_safe_proofs_e2e.py +208 -0
- a3_python-0.1.11/tests/test_security_api.py +375 -0
- a3_python-0.1.11/tests/test_security_bugs.py +1009 -0
- a3_python-0.1.11/tests/test_semantics_concrete.py +190 -0
- a3_python-0.1.11/tests/test_semantics_generators.py +219 -0
- a3_python-0.1.11/tests/test_semantics_symbolic.py +224 -0
- a3_python-0.1.11/tests/test_sensitivity_inference.py +391 -0
- a3_python-0.1.11/tests/test_set_add.py +180 -0
- a3_python-0.1.11/tests/test_setup_annotations.py +185 -0
- a3_python-0.1.11/tests/test_sigma_taint_regression.py +338 -0
- a3_python-0.1.11/tests/test_socket_taint.py +304 -0
- a3_python-0.1.11/tests/test_sos_for_safety.py +42 -0
- a3_python-0.1.11/tests/test_sos_guarded_divzero.py +25 -0
- a3_python-0.1.11/tests/test_sos_toolbox.py +36 -0
- a3_python-0.1.11/tests/test_sota_interprocedural.py +474 -0
- a3_python-0.1.11/tests/test_sota_intraprocedural.py +441 -0
- a3_python-0.1.11/tests/test_sota_pdr_bmc_ice.py +112 -0
- a3_python-0.1.11/tests/test_ssrf_detection.py +227 -0
- a3_python-0.1.11/tests/test_star_import.py +147 -0
- a3_python-0.1.11/tests/test_stdlib_contracts_expansion.py +252 -0
- a3_python-0.1.11/tests/test_stdlib_module_relations.py +341 -0
- a3_python-0.1.11/tests/test_stdlib_stubs.py +170 -0
- a3_python-0.1.11/tests/test_step_relation.py +401 -0
- a3_python-0.1.11/tests/test_stochastic_precondition_risk.py +11 -0
- a3_python-0.1.11/tests/test_stochastic_replay.py +12 -0
- a3_python-0.1.11/tests/test_store_fast_load_fast.py +259 -0
- a3_python-0.1.11/tests/test_store_global.py +185 -0
- a3_python-0.1.11/tests/test_store_subscr.py +248 -0
- a3_python-0.1.11/tests/test_subprocess_shell_param.py +189 -0
- a3_python-0.1.11/tests/test_symbolic_violation_provenance.py +36 -0
- a3_python-0.1.11/tests/test_taint_lattice.py +349 -0
- a3_python-0.1.11/tests/test_tarslip_kwargs.py +90 -0
- a3_python-0.1.11/tests/test_termination_integration.py +261 -0
- a3_python-0.1.11/tests/test_type_annotations.py +142 -0
- a3_python-0.1.11/tests/test_type_based_sanitizers.py +203 -0
- a3_python-0.1.11/tests/test_type_tracking.py +332 -0
- a3_python-0.1.11/tests/test_unary_operations.py +465 -0
- a3_python-0.1.11/tests/test_unpack_sequence.py +228 -0
- a3_python-0.1.11/tests/test_unpack_sequence_fix.py +152 -0
- a3_python-0.1.11/tests/test_unsafe_assert_fail.py +443 -0
- a3_python-0.1.11/tests/test_unsafe_bounds.py +295 -0
- a3_python-0.1.11/tests/test_unsafe_data_race.py +478 -0
- a3_python-0.1.11/tests/test_unsafe_deadlock.py +606 -0
- a3_python-0.1.11/tests/test_unsafe_div_zero.py +330 -0
- a3_python-0.1.11/tests/test_unsafe_double_free.py +384 -0
- a3_python-0.1.11/tests/test_unsafe_fp_domain.py +204 -0
- a3_python-0.1.11/tests/test_unsafe_info_leak.py +316 -0
- a3_python-0.1.11/tests/test_unsafe_integer_overflow.py +296 -0
- a3_python-0.1.11/tests/test_unsafe_iterator_invalid.py +262 -0
- a3_python-0.1.11/tests/test_unsafe_memory_leak.py +147 -0
- a3_python-0.1.11/tests/test_unsafe_non_termination.py +396 -0
- a3_python-0.1.11/tests/test_unsafe_null_ptr.py +318 -0
- a3_python-0.1.11/tests/test_unsafe_panic.py +476 -0
- a3_python-0.1.11/tests/test_unsafe_send_sync.py +621 -0
- a3_python-0.1.11/tests/test_unsafe_stack_overflow.py +280 -0
- a3_python-0.1.11/tests/test_unsafe_timing_channel.py +376 -0
- a3_python-0.1.11/tests/test_unsafe_type_confusion.py +346 -0
- a3_python-0.1.11/tests/test_unsafe_uninit_memory.py +357 -0
- a3_python-0.1.11/tests/test_user_function_detection.py +192 -0
- a3_python-0.1.11/tests/test_user_function_module_init.py +109 -0
- a3_python-0.1.11/tests/test_xml_bomb.py +150 -0
- a3_python-0.1.11/tests/test_xxe_detection.py +218 -0
- a3_python-0.1.11/tests/test_z3_variable_tracking.py +296 -0
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: a3-python
|
|
3
|
+
Version: 0.1.11
|
|
4
|
+
Summary: Catch real Python bugs before production — 99%+ accuracy, Z3 symbolic execution, LLM-powered false-positive filtering, zero-config GitHub CI
|
|
5
|
+
Requires-Python: >=3.11
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: z3-solver>=4.12.0
|
|
8
|
+
Provides-Extra: dev
|
|
9
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
10
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
11
|
+
Provides-Extra: ci
|
|
12
|
+
Requires-Dist: anthropic>=0.30.0; extra == "ci"
|
|
13
|
+
Requires-Dist: openai>=1.0.0; extra == "ci"
|
|
14
|
+
Requires-Dist: pyyaml>=6.0; extra == "ci"
|
|
15
|
+
|
|
16
|
+
# PythonFromScratch
|
|
17
|
+
|
|
18
|
+
A static analysis tool for Python that finds **real bugs** in large codebases using bytecode analysis, barrier-certificate proofs, and Z3-backed symbolic execution.
|
|
19
|
+
|
|
20
|
+
Tested on Microsoft DeepSpeed (5,000+ functions) — found **6 confirmed true positives** including silent data corruption and unguarded division-by-zero bugs, while automatically proving 87.6% of candidates as false positives.
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## Install
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
git clone https://github.com/halleyyoung/PythonFromScratch.git
|
|
28
|
+
cd PythonFromScratch
|
|
29
|
+
pip install -e .
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Requires **Python ≥ 3.11** and **z3-solver** (installed automatically).
|
|
33
|
+
|
|
34
|
+
---
|
|
35
|
+
|
|
36
|
+
## Walkthrough: Analyze a Real Project
|
|
37
|
+
|
|
38
|
+
### 1. Clone a target repo
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
git clone https://github.com/microsoft/DeepSpeed.git external_tools/DeepSpeed
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### 2. Run the analyzer
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
python3.11 -m pyfromscratch external_tools/DeepSpeed/deepspeed/
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
This runs the full pipeline automatically:
|
|
51
|
+
|
|
52
|
+
```
|
|
53
|
+
======================================================================
|
|
54
|
+
PythonFromScratch — Full Project Analysis
|
|
55
|
+
Target: external_tools/DeepSpeed/deepspeed
|
|
56
|
+
======================================================================
|
|
57
|
+
|
|
58
|
+
STEP 1: BUILDING CALL GRAPH
|
|
59
|
+
Functions: 5003 (2.2s)
|
|
60
|
+
|
|
61
|
+
STEP 2: COMPUTING CRASH SUMMARIES
|
|
62
|
+
Summaries: 5003 (329.4s)
|
|
63
|
+
|
|
64
|
+
STEP 3: BUILDING CODE OBJECTS FOR DSE
|
|
65
|
+
Code objects: 5003 (0.0s)
|
|
66
|
+
|
|
67
|
+
STEP 4: BUG TYPE COVERAGE
|
|
68
|
+
2928 NULL_PTR
|
|
69
|
+
689 BOUNDS
|
|
70
|
+
358 ASSERT_FAIL
|
|
71
|
+
119 DIV_ZERO
|
|
72
|
+
35 RUNTIME_ERROR
|
|
73
|
+
...
|
|
74
|
+
|
|
75
|
+
STEP 5: BARRIER CERTIFICATE + DSE ANALYSIS
|
|
76
|
+
Total bug instances: 4613
|
|
77
|
+
Fully guarded (guards): 3008
|
|
78
|
+
Unguarded: 1605
|
|
79
|
+
|
|
80
|
+
Barrier results (35.8s):
|
|
81
|
+
Proven FP: 1031/1605
|
|
82
|
+
Remaining: 574
|
|
83
|
+
|
|
84
|
+
STEP 6: DSE RESULTS
|
|
85
|
+
DSE confirmed FP: 4
|
|
86
|
+
DSE confirmed TP: 493
|
|
87
|
+
|
|
88
|
+
STEP 7: TRUE POSITIVE CANDIDATES
|
|
89
|
+
Production code bugs: 571
|
|
90
|
+
Test-only code bugs: 3
|
|
91
|
+
|
|
92
|
+
TRUE POSITIVES (DSE-confirmed reachable):
|
|
93
|
+
⚠️ DIV_ZERO in utils.groups._ensure_divisibility
|
|
94
|
+
⚠️ DIV_ZERO in utils.timer.ThroughputTimer._is_report_boundary
|
|
95
|
+
⚠️ DIV_ZERO in inference.v2.inference_utils.ceil_div
|
|
96
|
+
...
|
|
97
|
+
|
|
98
|
+
SUMMARY
|
|
99
|
+
Functions analysed: 5003
|
|
100
|
+
Total bug instances: 4613
|
|
101
|
+
Proven false positive: 4039 (87.6%)
|
|
102
|
+
Remaining candidates: 574
|
|
103
|
+
DSE-confirmed TPs: 493
|
|
104
|
+
|
|
105
|
+
Results saved to results/deepspeed_results.pkl
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### 3. Filter remaining false positives with Copilot
|
|
109
|
+
|
|
110
|
+
The analyzer's barrier certificates and DSE eliminate ~88% of false positives automatically. The remaining candidates include bugs that are technically reachable but may be guarded by framework invariants invisible at the bytecode level (e.g., "this parameter is always non-None because PyTorch guarantees it").
|
|
111
|
+
|
|
112
|
+
**Ask GitHub Copilot (or any LLM) to triage the remaining candidates:**
|
|
113
|
+
|
|
114
|
+
> Look at the output from `python3.11 -m pyfromscratch external_tools/DeepSpeed/deepspeed/`. For each remaining TP candidate, read the actual source code and callers to determine if it's a real bug or a false positive. Classify each as:
|
|
115
|
+
>
|
|
116
|
+
> - **REAL_BUG** — genuinely reachable crash from user input or config
|
|
117
|
+
> - **INTENTIONAL_GUARD** — deliberate `raise` (working as designed)
|
|
118
|
+
> - **FP_SELF** — attribute access on `self` (never None)
|
|
119
|
+
> - **FP_FRAMEWORK** — parameter guaranteed by framework (pytest, argparse, etc.)
|
|
120
|
+
> - **FP_INTERNAL** — parameter guaranteed by internal plumbing
|
|
121
|
+
>
|
|
122
|
+
> Write up the confirmed true positives in a markdown report.
|
|
123
|
+
|
|
124
|
+
This step typically reduces 500+ candidates down to **5–10 real bugs** with source-level evidence.
|
|
125
|
+
|
|
126
|
+
See [docs/TRUE_POSITIVE_ANALYSIS.md](docs/TRUE_POSITIVE_ANALYSIS.md) for our full DeepSpeed investigation.
|
|
127
|
+
|
|
128
|
+
---
|
|
129
|
+
|
|
130
|
+
## Single-File Analysis
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
# Analyze one file
|
|
134
|
+
python3.11 -m pyfromscratch myfile.py
|
|
135
|
+
|
|
136
|
+
# Security analysis — treats each function as an entry point with tainted params
|
|
137
|
+
python3.11 -m pyfromscratch myfile.py --functions
|
|
138
|
+
|
|
139
|
+
# Verbose output
|
|
140
|
+
python3.11 -m pyfromscratch myfile.py --verbose
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
**Exit codes:** `0` = SAFE, `1` = BUG found, `2` = UNKNOWN, `3` = error
|
|
144
|
+
|
|
145
|
+
---
|
|
146
|
+
|
|
147
|
+
## All Options
|
|
148
|
+
|
|
149
|
+
| Option | Description |
|
|
150
|
+
|--------|-------------|
|
|
151
|
+
| `--verbose` | Detailed output |
|
|
152
|
+
| `--functions` | Treat each function as a tainted entry point |
|
|
153
|
+
| `--all-functions` | Analyze ALL functions as entry points |
|
|
154
|
+
| `--interprocedural` | Cross-function taint analysis with call graph |
|
|
155
|
+
| `--entry-points NAME,...` | Specify entry point functions |
|
|
156
|
+
| `--min-confidence 0.0-1.0` | Filter bugs by confidence score |
|
|
157
|
+
| `--deduplicate` | Deduplicate findings by type + location |
|
|
158
|
+
| `--save-results PATH` | Custom output path (default: `results/<name>_results.pkl`) |
|
|
159
|
+
| `--context-depth N` | k-CFA context sensitivity (0, 1, 2, ...) |
|
|
160
|
+
| `--check-termination` | Detect non-terminating loops |
|
|
161
|
+
| `--synthesize-invariants` | Generate inductive loop invariants |
|
|
162
|
+
| `--no-concolic` | Pure symbolic analysis (no concrete execution) |
|
|
163
|
+
|
|
164
|
+
---
|
|
165
|
+
|
|
166
|
+
## Detected Bug Types
|
|
167
|
+
|
|
168
|
+
### Security Vulnerabilities (47 types)
|
|
169
|
+
|
|
170
|
+
**Injection**
|
|
171
|
+
- `SQL_INJECTION` — Unsanitized input in SQL queries
|
|
172
|
+
- `COMMAND_INJECTION` — Shell command injection
|
|
173
|
+
- `CODE_INJECTION` — Eval/exec with untrusted data
|
|
174
|
+
- `PATH_INJECTION` — Path traversal attacks
|
|
175
|
+
- `LDAP_INJECTION`, `XPATH_INJECTION`, `NOSQL_INJECTION`
|
|
176
|
+
- `REGEX_INJECTION` — ReDoS via user-controlled patterns
|
|
177
|
+
- `HEADER_INJECTION`, `COOKIE_INJECTION`
|
|
178
|
+
|
|
179
|
+
**Cross-Site Scripting (XSS)**
|
|
180
|
+
- `REFLECTED_XSS` — User input reflected in HTML output
|
|
181
|
+
|
|
182
|
+
**Server-Side Request Forgery**
|
|
183
|
+
- `SSRF` — Requests to user-controlled URLs
|
|
184
|
+
- `PARTIAL_SSRF` — Partial URL control
|
|
185
|
+
|
|
186
|
+
**Deserialization**
|
|
187
|
+
- `UNSAFE_DESERIALIZATION` — Pickle/YAML with untrusted data
|
|
188
|
+
- `XXE` — XML External Entity injection
|
|
189
|
+
- `XML_BOMB` — Billion laughs attack
|
|
190
|
+
|
|
191
|
+
**Sensitive Data**
|
|
192
|
+
- `CLEARTEXT_LOGGING` — Passwords/secrets in logs
|
|
193
|
+
- `CLEARTEXT_STORAGE` — Unencrypted sensitive data
|
|
194
|
+
- `HARDCODED_CREDENTIALS`
|
|
195
|
+
|
|
196
|
+
**Cryptography**
|
|
197
|
+
- `WEAK_CRYPTO` — MD5/SHA1 for security
|
|
198
|
+
- `WEAK_CRYPTO_KEY` — Insufficient key sizes
|
|
199
|
+
- `BROKEN_CRYPTO_ALGORITHM` — DES, RC4, etc.
|
|
200
|
+
- `INSECURE_PROTOCOL` — HTTP, FTP, Telnet
|
|
201
|
+
|
|
202
|
+
**Web Security**
|
|
203
|
+
- `URL_REDIRECT` — Open redirect vulnerabilities
|
|
204
|
+
- `CSRF_PROTECTION_DISABLED`
|
|
205
|
+
- `FLASK_DEBUG` — Debug mode in production
|
|
206
|
+
- `INSECURE_COOKIE` — Missing Secure/HttpOnly flags
|
|
207
|
+
- `JINJA2_AUTOESCAPE_FALSE`
|
|
208
|
+
|
|
209
|
+
**File System**
|
|
210
|
+
- `TAR_SLIP` — Tar extraction path traversal
|
|
211
|
+
- `INSECURE_TEMPORARY_FILE`
|
|
212
|
+
- `WEAK_FILE_PERMISSIONS`
|
|
213
|
+
|
|
214
|
+
**Network**
|
|
215
|
+
- `BIND_TO_ALL_INTERFACES` — 0.0.0.0 binding
|
|
216
|
+
- `MISSING_HOST_KEY_VALIDATION`
|
|
217
|
+
- `CERT_VALIDATION_DISABLED`
|
|
218
|
+
|
|
219
|
+
**Regex**
|
|
220
|
+
- `REDOS` — Catastrophic backtracking
|
|
221
|
+
- `POLYNOMIAL_REDOS`
|
|
222
|
+
- `BAD_TAG_FILTER`
|
|
223
|
+
- `INCOMPLETE_HOSTNAME_REGEXP`
|
|
224
|
+
|
|
225
|
+
### Core Bug Types (20 types)
|
|
226
|
+
|
|
227
|
+
- `DIV_ZERO` — Division by zero
|
|
228
|
+
- `NULL_PTR` — None dereference
|
|
229
|
+
- `BOUNDS` — Index out of bounds
|
|
230
|
+
- `TYPE_CONFUSION` — Type errors
|
|
231
|
+
- `ASSERT_FAIL` — Failed assertions
|
|
232
|
+
- `INTEGER_OVERFLOW`
|
|
233
|
+
- `NON_TERMINATION` — Infinite loops
|
|
234
|
+
- `MEMORY_LEAK`, `USE_AFTER_FREE`, `DOUBLE_FREE`
|
|
235
|
+
- `DATA_RACE`, `DEADLOCK`
|
|
236
|
+
- `INFO_LEAK`, `TIMING_CHANNEL`
|
|
237
|
+
|
|
238
|
+
## Examples
|
|
239
|
+
|
|
240
|
+
### Finding SQL Injection
|
|
241
|
+
|
|
242
|
+
```python
|
|
243
|
+
# vulnerable.py
|
|
244
|
+
import sqlite3
|
|
245
|
+
|
|
246
|
+
def get_user(user_id):
|
|
247
|
+
conn = sqlite3.connect('users.db')
|
|
248
|
+
cursor = conn.cursor()
|
|
249
|
+
cursor.execute(f"SELECT * FROM users WHERE id = {user_id}") # BUG!
|
|
250
|
+
return cursor.fetchone()
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
```bash
|
|
254
|
+
$ pyfromscratch vulnerable.py --functions
|
|
255
|
+
Analyzing: vulnerable.py
|
|
256
|
+
|
|
257
|
+
Function-level entry points: 1
|
|
258
|
+
get_user: BUG
|
|
259
|
+
SQL_INJECTION: Tainted value flows to SQL query at line 7
|
|
260
|
+
|
|
261
|
+
Total bugs found: 1
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
### Verifying Safe Code
|
|
265
|
+
|
|
266
|
+
```python
|
|
267
|
+
# safe.py
|
|
268
|
+
import sqlite3
|
|
269
|
+
|
|
270
|
+
def get_user(user_id):
|
|
271
|
+
conn = sqlite3.connect('users.db')
|
|
272
|
+
cursor = conn.cursor()
|
|
273
|
+
cursor.execute("SELECT * FROM users WHERE id = ?", (user_id,)) # Safe!
|
|
274
|
+
return cursor.fetchone()
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
```bash
|
|
278
|
+
$ pyfromscratch safe.py --functions
|
|
279
|
+
Analyzing: safe.py
|
|
280
|
+
|
|
281
|
+
Function-level entry points: 1
|
|
282
|
+
get_user: SAFE
|
|
283
|
+
|
|
284
|
+
Total bugs found: 0
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
## How It Works
|
|
288
|
+
|
|
289
|
+
The analyzer runs a **7-step pipeline** on a project directory:
|
|
290
|
+
|
|
291
|
+
1. **Call Graph** — Builds a whole-program call graph from all `.py` files
|
|
292
|
+
2. **Crash Summaries** — Disassembles bytecode, finds unguarded divisions, None-dereferences, out-of-bounds accesses, etc.
|
|
293
|
+
3. **Code Objects** — Extracts Python code objects for symbolic execution
|
|
294
|
+
4. **Guard Detection** — Identifies bugs already protected by `if`, `try/except`, `assert`, `isinstance` checks
|
|
295
|
+
5. **Barrier Certificates** — 10 proof patterns (assume-guarantee, post-condition, refinement types, inductive invariants, control flow, dataflow, disjunctive, callee return-guarantee, validated params, DSE confirmation) attempt to formally prove each remaining bug is unreachable
|
|
296
|
+
6. **DSE (Z3)** — Dynamic symbolic execution confirms whether a concrete input can trigger each surviving bug
|
|
297
|
+
7. **Classification** — Separates production code from test code, reports true positive candidates
|
|
298
|
+
|
|
299
|
+
The tool produces one of three verdicts per bug:
|
|
300
|
+
- **FP (proven)** — barrier certificate or DSE proves the bug is unreachable
|
|
301
|
+
- **TP candidate** — no proof found; needs human/LLM triage
|
|
302
|
+
- **DSE-confirmed TP** — Z3 found a satisfying assignment that reaches the bug
|
|
303
|
+
|
|
304
|
+
## Architecture
|
|
305
|
+
|
|
306
|
+
```
|
|
307
|
+
pyfromscratch/
|
|
308
|
+
├── __main__.py # python -m pyfromscratch entry point
|
|
309
|
+
├── cli.py # CLI: single-file and project-directory analysis
|
|
310
|
+
├── analyzer.py # Core analysis engine
|
|
311
|
+
├── frontend/ # Python loading, bytecode compilation
|
|
312
|
+
├── cfg/ # Control-flow graph + call graph construction
|
|
313
|
+
├── semantics/ # Symbolic bytecode execution, crash summaries
|
|
314
|
+
├── z3model/ # Z3 value/heap modeling
|
|
315
|
+
├── unsafe/ # Bug type predicates (67 types)
|
|
316
|
+
├── contracts/ # External call modeling, taint sources/sinks
|
|
317
|
+
├── dse/ # Concolic execution oracle (Z3-backed)
|
|
318
|
+
└── barriers/ # Barrier certificate synthesis (10 patterns)
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
## Docker
|
|
322
|
+
|
|
323
|
+
```bash
|
|
324
|
+
# Build
|
|
325
|
+
docker build -t pyfromscratch .
|
|
326
|
+
|
|
327
|
+
# Analyze a directory
|
|
328
|
+
docker run --rm -v $(pwd)/my_project:/target pyfromscratch /target
|
|
329
|
+
|
|
330
|
+
# Analyze a single file
|
|
331
|
+
docker run --rm -v $(pwd):/code pyfromscratch /code/myfile.py --functions
|
|
332
|
+
```
|
|
333
|
+
|
|
334
|
+
## Development
|
|
335
|
+
|
|
336
|
+
```bash
|
|
337
|
+
pytest # Run tests
|
|
338
|
+
pytest --cov=pyfromscratch # With coverage
|
|
339
|
+
```
|
|
340
|
+
|
|
341
|
+
## License
|
|
342
|
+
|
|
343
|
+
See LICENSE file.
|
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
# PythonFromScratch
|
|
2
|
+
|
|
3
|
+
A static analysis tool for Python that finds **real bugs** in large codebases using bytecode analysis, barrier-certificate proofs, and Z3-backed symbolic execution.
|
|
4
|
+
|
|
5
|
+
Tested on Microsoft DeepSpeed (5,000+ functions) — found **6 confirmed true positives** including silent data corruption and unguarded division-by-zero bugs, while automatically proving 87.6% of candidates as false positives.
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Install
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
git clone https://github.com/halleyyoung/PythonFromScratch.git
|
|
13
|
+
cd PythonFromScratch
|
|
14
|
+
pip install -e .
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
Requires **Python ≥ 3.11** and **z3-solver** (installed automatically).
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## Walkthrough: Analyze a Real Project
|
|
22
|
+
|
|
23
|
+
### 1. Clone a target repo
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
git clone https://github.com/microsoft/DeepSpeed.git external_tools/DeepSpeed
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
### 2. Run the analyzer
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
python3.11 -m pyfromscratch external_tools/DeepSpeed/deepspeed/
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
This runs the full pipeline automatically:
|
|
36
|
+
|
|
37
|
+
```
|
|
38
|
+
======================================================================
|
|
39
|
+
PythonFromScratch — Full Project Analysis
|
|
40
|
+
Target: external_tools/DeepSpeed/deepspeed
|
|
41
|
+
======================================================================
|
|
42
|
+
|
|
43
|
+
STEP 1: BUILDING CALL GRAPH
|
|
44
|
+
Functions: 5003 (2.2s)
|
|
45
|
+
|
|
46
|
+
STEP 2: COMPUTING CRASH SUMMARIES
|
|
47
|
+
Summaries: 5003 (329.4s)
|
|
48
|
+
|
|
49
|
+
STEP 3: BUILDING CODE OBJECTS FOR DSE
|
|
50
|
+
Code objects: 5003 (0.0s)
|
|
51
|
+
|
|
52
|
+
STEP 4: BUG TYPE COVERAGE
|
|
53
|
+
2928 NULL_PTR
|
|
54
|
+
689 BOUNDS
|
|
55
|
+
358 ASSERT_FAIL
|
|
56
|
+
119 DIV_ZERO
|
|
57
|
+
35 RUNTIME_ERROR
|
|
58
|
+
...
|
|
59
|
+
|
|
60
|
+
STEP 5: BARRIER CERTIFICATE + DSE ANALYSIS
|
|
61
|
+
Total bug instances: 4613
|
|
62
|
+
Fully guarded (guards): 3008
|
|
63
|
+
Unguarded: 1605
|
|
64
|
+
|
|
65
|
+
Barrier results (35.8s):
|
|
66
|
+
Proven FP: 1031/1605
|
|
67
|
+
Remaining: 574
|
|
68
|
+
|
|
69
|
+
STEP 6: DSE RESULTS
|
|
70
|
+
DSE confirmed FP: 4
|
|
71
|
+
DSE confirmed TP: 493
|
|
72
|
+
|
|
73
|
+
STEP 7: TRUE POSITIVE CANDIDATES
|
|
74
|
+
Production code bugs: 571
|
|
75
|
+
Test-only code bugs: 3
|
|
76
|
+
|
|
77
|
+
TRUE POSITIVES (DSE-confirmed reachable):
|
|
78
|
+
⚠️ DIV_ZERO in utils.groups._ensure_divisibility
|
|
79
|
+
⚠️ DIV_ZERO in utils.timer.ThroughputTimer._is_report_boundary
|
|
80
|
+
⚠️ DIV_ZERO in inference.v2.inference_utils.ceil_div
|
|
81
|
+
...
|
|
82
|
+
|
|
83
|
+
SUMMARY
|
|
84
|
+
Functions analysed: 5003
|
|
85
|
+
Total bug instances: 4613
|
|
86
|
+
Proven false positive: 4039 (87.6%)
|
|
87
|
+
Remaining candidates: 574
|
|
88
|
+
DSE-confirmed TPs: 493
|
|
89
|
+
|
|
90
|
+
Results saved to results/deepspeed_results.pkl
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### 3. Filter remaining false positives with Copilot
|
|
94
|
+
|
|
95
|
+
The analyzer's barrier certificates and DSE eliminate ~88% of false positives automatically. The remaining candidates include bugs that are technically reachable but may be guarded by framework invariants invisible at the bytecode level (e.g., "this parameter is always non-None because PyTorch guarantees it").
|
|
96
|
+
|
|
97
|
+
**Ask GitHub Copilot (or any LLM) to triage the remaining candidates:**
|
|
98
|
+
|
|
99
|
+
> Look at the output from `python3.11 -m pyfromscratch external_tools/DeepSpeed/deepspeed/`. For each remaining TP candidate, read the actual source code and callers to determine if it's a real bug or a false positive. Classify each as:
|
|
100
|
+
>
|
|
101
|
+
> - **REAL_BUG** — genuinely reachable crash from user input or config
|
|
102
|
+
> - **INTENTIONAL_GUARD** — deliberate `raise` (working as designed)
|
|
103
|
+
> - **FP_SELF** — attribute access on `self` (never None)
|
|
104
|
+
> - **FP_FRAMEWORK** — parameter guaranteed by framework (pytest, argparse, etc.)
|
|
105
|
+
> - **FP_INTERNAL** — parameter guaranteed by internal plumbing
|
|
106
|
+
>
|
|
107
|
+
> Write up the confirmed true positives in a markdown report.
|
|
108
|
+
|
|
109
|
+
This step typically reduces 500+ candidates down to **5–10 real bugs** with source-level evidence.
|
|
110
|
+
|
|
111
|
+
See [docs/TRUE_POSITIVE_ANALYSIS.md](docs/TRUE_POSITIVE_ANALYSIS.md) for our full DeepSpeed investigation.
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## Single-File Analysis
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
# Analyze one file
|
|
119
|
+
python3.11 -m pyfromscratch myfile.py
|
|
120
|
+
|
|
121
|
+
# Security analysis — treats each function as an entry point with tainted params
|
|
122
|
+
python3.11 -m pyfromscratch myfile.py --functions
|
|
123
|
+
|
|
124
|
+
# Verbose output
|
|
125
|
+
python3.11 -m pyfromscratch myfile.py --verbose
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
**Exit codes:** `0` = SAFE, `1` = BUG found, `2` = UNKNOWN, `3` = error
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
## All Options
|
|
133
|
+
|
|
134
|
+
| Option | Description |
|
|
135
|
+
|--------|-------------|
|
|
136
|
+
| `--verbose` | Detailed output |
|
|
137
|
+
| `--functions` | Treat each function as a tainted entry point |
|
|
138
|
+
| `--all-functions` | Analyze ALL functions as entry points |
|
|
139
|
+
| `--interprocedural` | Cross-function taint analysis with call graph |
|
|
140
|
+
| `--entry-points NAME,...` | Specify entry point functions |
|
|
141
|
+
| `--min-confidence 0.0-1.0` | Filter bugs by confidence score |
|
|
142
|
+
| `--deduplicate` | Deduplicate findings by type + location |
|
|
143
|
+
| `--save-results PATH` | Custom output path (default: `results/<name>_results.pkl`) |
|
|
144
|
+
| `--context-depth N` | k-CFA context sensitivity (0, 1, 2, ...) |
|
|
145
|
+
| `--check-termination` | Detect non-terminating loops |
|
|
146
|
+
| `--synthesize-invariants` | Generate inductive loop invariants |
|
|
147
|
+
| `--no-concolic` | Pure symbolic analysis (no concrete execution) |
|
|
148
|
+
|
|
149
|
+
---
|
|
150
|
+
|
|
151
|
+
## Detected Bug Types
|
|
152
|
+
|
|
153
|
+
### Security Vulnerabilities (47 types)
|
|
154
|
+
|
|
155
|
+
**Injection**
|
|
156
|
+
- `SQL_INJECTION` — Unsanitized input in SQL queries
|
|
157
|
+
- `COMMAND_INJECTION` — Shell command injection
|
|
158
|
+
- `CODE_INJECTION` — Eval/exec with untrusted data
|
|
159
|
+
- `PATH_INJECTION` — Path traversal attacks
|
|
160
|
+
- `LDAP_INJECTION`, `XPATH_INJECTION`, `NOSQL_INJECTION`
|
|
161
|
+
- `REGEX_INJECTION` — ReDoS via user-controlled patterns
|
|
162
|
+
- `HEADER_INJECTION`, `COOKIE_INJECTION`
|
|
163
|
+
|
|
164
|
+
**Cross-Site Scripting (XSS)**
|
|
165
|
+
- `REFLECTED_XSS` — User input reflected in HTML output
|
|
166
|
+
|
|
167
|
+
**Server-Side Request Forgery**
|
|
168
|
+
- `SSRF` — Requests to user-controlled URLs
|
|
169
|
+
- `PARTIAL_SSRF` — Partial URL control
|
|
170
|
+
|
|
171
|
+
**Deserialization**
|
|
172
|
+
- `UNSAFE_DESERIALIZATION` — Pickle/YAML with untrusted data
|
|
173
|
+
- `XXE` — XML External Entity injection
|
|
174
|
+
- `XML_BOMB` — Billion laughs attack
|
|
175
|
+
|
|
176
|
+
**Sensitive Data**
|
|
177
|
+
- `CLEARTEXT_LOGGING` — Passwords/secrets in logs
|
|
178
|
+
- `CLEARTEXT_STORAGE` — Unencrypted sensitive data
|
|
179
|
+
- `HARDCODED_CREDENTIALS`
|
|
180
|
+
|
|
181
|
+
**Cryptography**
|
|
182
|
+
- `WEAK_CRYPTO` — MD5/SHA1 for security
|
|
183
|
+
- `WEAK_CRYPTO_KEY` — Insufficient key sizes
|
|
184
|
+
- `BROKEN_CRYPTO_ALGORITHM` — DES, RC4, etc.
|
|
185
|
+
- `INSECURE_PROTOCOL` — HTTP, FTP, Telnet
|
|
186
|
+
|
|
187
|
+
**Web Security**
|
|
188
|
+
- `URL_REDIRECT` — Open redirect vulnerabilities
|
|
189
|
+
- `CSRF_PROTECTION_DISABLED`
|
|
190
|
+
- `FLASK_DEBUG` — Debug mode in production
|
|
191
|
+
- `INSECURE_COOKIE` — Missing Secure/HttpOnly flags
|
|
192
|
+
- `JINJA2_AUTOESCAPE_FALSE`
|
|
193
|
+
|
|
194
|
+
**File System**
|
|
195
|
+
- `TAR_SLIP` — Tar extraction path traversal
|
|
196
|
+
- `INSECURE_TEMPORARY_FILE`
|
|
197
|
+
- `WEAK_FILE_PERMISSIONS`
|
|
198
|
+
|
|
199
|
+
**Network**
|
|
200
|
+
- `BIND_TO_ALL_INTERFACES` — 0.0.0.0 binding
|
|
201
|
+
- `MISSING_HOST_KEY_VALIDATION`
|
|
202
|
+
- `CERT_VALIDATION_DISABLED`
|
|
203
|
+
|
|
204
|
+
**Regex**
|
|
205
|
+
- `REDOS` — Catastrophic backtracking
|
|
206
|
+
- `POLYNOMIAL_REDOS`
|
|
207
|
+
- `BAD_TAG_FILTER`
|
|
208
|
+
- `INCOMPLETE_HOSTNAME_REGEXP`
|
|
209
|
+
|
|
210
|
+
### Core Bug Types (20 types)
|
|
211
|
+
|
|
212
|
+
- `DIV_ZERO` — Division by zero
|
|
213
|
+
- `NULL_PTR` — None dereference
|
|
214
|
+
- `BOUNDS` — Index out of bounds
|
|
215
|
+
- `TYPE_CONFUSION` — Type errors
|
|
216
|
+
- `ASSERT_FAIL` — Failed assertions
|
|
217
|
+
- `INTEGER_OVERFLOW`
|
|
218
|
+
- `NON_TERMINATION` — Infinite loops
|
|
219
|
+
- `MEMORY_LEAK`, `USE_AFTER_FREE`, `DOUBLE_FREE`
|
|
220
|
+
- `DATA_RACE`, `DEADLOCK`
|
|
221
|
+
- `INFO_LEAK`, `TIMING_CHANNEL`
|
|
222
|
+
|
|
223
|
+
## Examples
|
|
224
|
+
|
|
225
|
+
### Finding SQL Injection
|
|
226
|
+
|
|
227
|
+
```python
|
|
228
|
+
# vulnerable.py
|
|
229
|
+
import sqlite3
|
|
230
|
+
|
|
231
|
+
def get_user(user_id):
|
|
232
|
+
conn = sqlite3.connect('users.db')
|
|
233
|
+
cursor = conn.cursor()
|
|
234
|
+
cursor.execute(f"SELECT * FROM users WHERE id = {user_id}") # BUG!
|
|
235
|
+
return cursor.fetchone()
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
```bash
|
|
239
|
+
$ pyfromscratch vulnerable.py --functions
|
|
240
|
+
Analyzing: vulnerable.py
|
|
241
|
+
|
|
242
|
+
Function-level entry points: 1
|
|
243
|
+
get_user: BUG
|
|
244
|
+
SQL_INJECTION: Tainted value flows to SQL query at line 7
|
|
245
|
+
|
|
246
|
+
Total bugs found: 1
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
### Verifying Safe Code
|
|
250
|
+
|
|
251
|
+
```python
|
|
252
|
+
# safe.py
|
|
253
|
+
import sqlite3
|
|
254
|
+
|
|
255
|
+
def get_user(user_id):
|
|
256
|
+
conn = sqlite3.connect('users.db')
|
|
257
|
+
cursor = conn.cursor()
|
|
258
|
+
cursor.execute("SELECT * FROM users WHERE id = ?", (user_id,)) # Safe!
|
|
259
|
+
return cursor.fetchone()
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
```bash
|
|
263
|
+
$ pyfromscratch safe.py --functions
|
|
264
|
+
Analyzing: safe.py
|
|
265
|
+
|
|
266
|
+
Function-level entry points: 1
|
|
267
|
+
get_user: SAFE
|
|
268
|
+
|
|
269
|
+
Total bugs found: 0
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
## How It Works
|
|
273
|
+
|
|
274
|
+
The analyzer runs a **7-step pipeline** on a project directory:
|
|
275
|
+
|
|
276
|
+
1. **Call Graph** — Builds a whole-program call graph from all `.py` files
|
|
277
|
+
2. **Crash Summaries** — Disassembles bytecode, finds unguarded divisions, None-dereferences, out-of-bounds accesses, etc.
|
|
278
|
+
3. **Code Objects** — Extracts Python code objects for symbolic execution
|
|
279
|
+
4. **Guard Detection** — Identifies bugs already protected by `if`, `try/except`, `assert`, `isinstance` checks
|
|
280
|
+
5. **Barrier Certificates** — 10 proof patterns (assume-guarantee, post-condition, refinement types, inductive invariants, control flow, dataflow, disjunctive, callee return-guarantee, validated params, DSE confirmation) attempt to formally prove each remaining bug is unreachable
|
|
281
|
+
6. **DSE (Z3)** — Dynamic symbolic execution confirms whether a concrete input can trigger each surviving bug
|
|
282
|
+
7. **Classification** — Separates production code from test code, reports true positive candidates
|
|
283
|
+
|
|
284
|
+
The tool produces one of three verdicts per bug:
|
|
285
|
+
- **FP (proven)** — barrier certificate or DSE proves the bug is unreachable
|
|
286
|
+
- **TP candidate** — no proof found; needs human/LLM triage
|
|
287
|
+
- **DSE-confirmed TP** — Z3 found a satisfying assignment that reaches the bug
|
|
288
|
+
|
|
289
|
+
## Architecture
|
|
290
|
+
|
|
291
|
+
```
|
|
292
|
+
pyfromscratch/
|
|
293
|
+
├── __main__.py # python -m pyfromscratch entry point
|
|
294
|
+
├── cli.py # CLI: single-file and project-directory analysis
|
|
295
|
+
├── analyzer.py # Core analysis engine
|
|
296
|
+
├── frontend/ # Python loading, bytecode compilation
|
|
297
|
+
├── cfg/ # Control-flow graph + call graph construction
|
|
298
|
+
├── semantics/ # Symbolic bytecode execution, crash summaries
|
|
299
|
+
├── z3model/ # Z3 value/heap modeling
|
|
300
|
+
├── unsafe/ # Bug type predicates (67 types)
|
|
301
|
+
├── contracts/ # External call modeling, taint sources/sinks
|
|
302
|
+
├── dse/ # Concolic execution oracle (Z3-backed)
|
|
303
|
+
└── barriers/ # Barrier certificate synthesis (10 patterns)
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
## Docker
|
|
307
|
+
|
|
308
|
+
```bash
|
|
309
|
+
# Build
|
|
310
|
+
docker build -t pyfromscratch .
|
|
311
|
+
|
|
312
|
+
# Analyze a directory
|
|
313
|
+
docker run --rm -v $(pwd)/my_project:/target pyfromscratch /target
|
|
314
|
+
|
|
315
|
+
# Analyze a single file
|
|
316
|
+
docker run --rm -v $(pwd):/code pyfromscratch /code/myfile.py --functions
|
|
317
|
+
```
|
|
318
|
+
|
|
319
|
+
## Development
|
|
320
|
+
|
|
321
|
+
```bash
|
|
322
|
+
pytest # Run tests
|
|
323
|
+
pytest --cov=pyfromscratch # With coverage
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
## License
|
|
327
|
+
|
|
328
|
+
See LICENSE file.
|