a3-python 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,146 @@
1
+ """
2
+ ``a3 init`` — bootstrap any repo with CI workflows.
3
+
4
+ Copies workflow templates into ``.github/workflows/``, creates a
5
+ ``.a3.yml`` config, and initialises an empty baseline file.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import shutil
11
+ from importlib import resources
12
+ from pathlib import Path
13
+
14
+ from .config import A3Config
15
+
16
+
17
+ # All template files shipped with the package
18
+ _WORKFLOW_TEMPLATES = [
19
+ "a3-pr-scan.yml",
20
+ "a3-scheduled-scan.yml",
21
+ ]
22
+
23
+
24
+ def init_repo(
25
+ repo_root: Path,
26
+ *,
27
+ overwrite: bool = False,
28
+ enable_llm_triage: bool = False,
29
+ llm_provider: str = "anthropic",
30
+ llm_model: str = "claude-sonnet-4-20250514",
31
+ copilot: bool = False,
32
+ ) -> list[str]:
33
+ """
34
+ Bootstrap a repository with a3 CI integration.
35
+
36
+ Creates:
37
+ - ``.github/workflows/a3-pr-scan.yml``
38
+ - ``.github/workflows/a3-scheduled-scan.yml``
39
+ - ``.a3.yml``
40
+ - ``.a3-baseline.json``
41
+
42
+ Parameters
43
+ ----------
44
+ repo_root : Path
45
+ Root of the target git repository.
46
+ overwrite : bool
47
+ If True, overwrite existing files.
48
+ enable_llm_triage : bool
49
+ Whether to enable LLM triage in the config.
50
+
51
+ Returns
52
+ -------
53
+ list[str]
54
+ Paths of all files created (relative to repo_root).
55
+ """
56
+ repo_root = Path(repo_root).resolve()
57
+ created: list[str] = []
58
+
59
+ # --copilot is a convenience shorthand
60
+ if copilot:
61
+ enable_llm_triage = True
62
+ llm_provider = "github"
63
+ llm_model = "gpt-4o"
64
+
65
+ # ── 1. Workflow files ────────────────────────────────────────────────
66
+ workflows_dir = repo_root / ".github" / "workflows"
67
+ workflows_dir.mkdir(parents=True, exist_ok=True)
68
+
69
+ templates_dir = Path(__file__).parent / "templates"
70
+
71
+ for template_name in _WORKFLOW_TEMPLATES:
72
+ src = templates_dir / template_name
73
+ dst = workflows_dir / template_name
74
+
75
+ if dst.exists() and not overwrite:
76
+ print(f" ⏭ {dst.relative_to(repo_root)} already exists (use --overwrite to replace)")
77
+ continue
78
+
79
+ content = src.read_text()
80
+ dst.write_text(content)
81
+ rel = str(dst.relative_to(repo_root))
82
+ created.append(rel)
83
+ print(f" ✅ Created {rel}")
84
+
85
+ # ── 2. Config file ───────────────────────────────────────────────────
86
+ config_path = repo_root / ".a3.yml"
87
+ if config_path.exists() and not overwrite:
88
+ print(f" ⏭ .a3.yml already exists")
89
+ else:
90
+ config = A3Config()
91
+ config.ci.llm_triage = enable_llm_triage
92
+ config.ci.llm_provider = llm_provider
93
+ config.ci.llm_model = llm_model
94
+ config_path.write_text(config.to_yaml())
95
+ created.append(".a3.yml")
96
+ print(f" ✅ Created .a3.yml")
97
+
98
+ # ── 3. Empty baseline ────────────────────────────────────────────────
99
+ baseline_path = repo_root / ".a3-baseline.json"
100
+ if baseline_path.exists() and not overwrite:
101
+ print(f" ⏭ .a3-baseline.json already exists")
102
+ else:
103
+ baseline_path.write_text('{\n "version": 1,\n "findings": {}\n}\n')
104
+ created.append(".a3-baseline.json")
105
+ print(f" ✅ Created .a3-baseline.json")
106
+
107
+ return created
108
+
109
+
110
+ def cmd_init(
111
+ repo_root: Path,
112
+ *,
113
+ overwrite: bool = False,
114
+ llm_triage: bool = False,
115
+ copilot: bool = False,
116
+ ) -> int:
117
+ """``a3 init`` CLI entry point."""
118
+ print(f"\n🚀 Initialising a3 CI in {repo_root}\n")
119
+
120
+ created = init_repo(
121
+ repo_root,
122
+ overwrite=overwrite,
123
+ enable_llm_triage=llm_triage,
124
+ copilot=copilot,
125
+ )
126
+
127
+ print(f"\n{'─' * 50}")
128
+ if created:
129
+ print(f"Created {len(created)} file(s). Next steps:\n")
130
+ print(" git add .github/ .a3.yml .a3-baseline.json")
131
+ print(" git commit -m 'ci: add a3 static analysis'")
132
+ print(" git push")
133
+ print()
134
+ if copilot:
135
+ print(" ✅ GitHub Copilot triage is enabled — no API keys needed!")
136
+ print(" The workflow uses GITHUB_TOKEN which is provided automatically.")
137
+ print()
138
+ elif llm_triage:
139
+ print(" ⚠ LLM triage is enabled. Make sure to add your API key")
140
+ print(" as a repository secret: ANTHROPIC_API_KEY or OPENAI_API_KEY")
141
+ print()
142
+ else:
143
+ print("Nothing to do — all files already exist.")
144
+ print("Use --overwrite to replace existing files.")
145
+ print()
146
+ return 0
@@ -0,0 +1,98 @@
1
+ # ──────────────────────────────────────────────────────────────────────────────
2
+ # a3 — Scan on every code change
3
+ #
4
+ # Runs a3 on every push and pull request that touches Python files,
5
+ # triages findings with GitHub Copilot, diffs against the committed baseline,
6
+ # and uploads SARIF to GitHub Code Scanning.
7
+ #
8
+ # Install: a3 init .
9
+ # Docs: https://github.com/thehalleyyoung/A³
10
+ # ──────────────────────────────────────────────────────────────────────────────
11
+
12
+ name: "A³: Scan"
13
+
14
+ on:
15
+ push:
16
+ branches: [main, master]
17
+ paths:
18
+ - "**.py"
19
+ pull_request:
20
+ paths:
21
+ - "**.py"
22
+
23
+ permissions:
24
+ contents: read
25
+ security-events: write # needed for SARIF upload
26
+ pull-requests: write # needed for PR comments (optional)
27
+
28
+ jobs:
29
+ a3-scan:
30
+ name: Static Analysis
31
+ runs-on: ubuntu-latest
32
+ steps:
33
+ # ── Checkout ─────────────────────────────────────────────────────
34
+ - uses: actions/checkout@v4
35
+ with:
36
+ fetch-depth: 0 # full history for diff
37
+
38
+ # ── Python setup ─────────────────────────────────────────────────
39
+ - uses: actions/setup-python@v5
40
+ with:
41
+ python-version: "3.12"
42
+
43
+
44
+ # ── Install a3 ────────────────────────────────────────
45
+ - name: Install a3
46
+ run: pip install a3-python[ci]
47
+
48
+ # ── Collect changed Python files ─────────────────────────────────
49
+ - name: Get changed files
50
+ id: changed
51
+ run: |
52
+ if [ "${{ github.event_name }}" = "pull_request" ]; then
53
+ BASE="origin/${{ github.base_ref }}"
54
+ else
55
+ BASE="${{ github.event.before }}"
56
+ fi
57
+ git diff --name-only --diff-filter=ACMR "$BASE"...HEAD -- '*.py' > changed_files.txt
58
+ echo "count=$(wc -l < changed_files.txt | tr -d ' ')" >> "$GITHUB_OUTPUT"
59
+ echo "Changed Python files:"
60
+ cat changed_files.txt
61
+
62
+ # ── Run analysis ─────────────────────────────────────────────────
63
+ - name: Run a3
64
+ if: steps.changed.outputs.count != '0'
65
+ run: |
66
+ a3 scan . \
67
+ --output-sarif a3-results.sarif
68
+ continue-on-error: true
69
+
70
+ # ── LLM triage via GitHub Copilot (zero-config — uses GITHUB_TOKEN) ──
71
+ - name: Copilot triage
72
+ if: steps.changed.outputs.count != '0'
73
+ env:
74
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
75
+ run: |
76
+ a3 triage \
77
+ --sarif a3-results.sarif \
78
+ --output-sarif a3-triaged.sarif \
79
+ --provider github \
80
+ --model gpt-4o \
81
+ --verbose
82
+ mv a3-triaged.sarif a3-results.sarif
83
+
84
+ # ── Baseline ratchet check ───────────────────────────────────────
85
+ - name: Check baseline
86
+ if: steps.changed.outputs.count != '0'
87
+ run: |
88
+ a3 baseline diff \
89
+ --sarif a3-results.sarif
90
+
91
+ # ── Upload SARIF to GitHub Code Scanning ─────────────────────────
92
+ - name: Upload SARIF
93
+ if: always() && steps.changed.outputs.count != '0'
94
+ uses: github/codeql-action/upload-sarif@v3
95
+ continue-on-error: true
96
+ with:
97
+ sarif_file: a3-results.sarif
98
+ category: a3
@@ -0,0 +1,88 @@
1
+ # ──────────────────────────────────────────────────────────────────────────────
2
+ # a3 — Scheduled Full Scan
3
+ #
4
+ # Runs a full-repo analysis weekly, triages via LLM, diffs against baseline,
5
+ # and auto-files GitHub issues for new true positives.
6
+ #
7
+ # Install: a3 init .
8
+ # Docs: https://github.com/thehalleyyoung/A³
9
+ # ──────────────────────────────────────────────────────────────────────────────
10
+
11
+ name: "A³: Weekly Scan"
12
+
13
+ on:
14
+ schedule:
15
+ - cron: "0 6 * * 1" # Every Monday at 06:00 UTC
16
+ workflow_dispatch: # Allow manual trigger
17
+
18
+ permissions:
19
+ contents: write # needed to update baseline
20
+ security-events: write # needed for SARIF upload
21
+ issues: write # needed for auto-issue creation
22
+
23
+ jobs:
24
+ a3-full-scan:
25
+ name: Full Analysis
26
+ runs-on: ubuntu-latest
27
+ steps:
28
+ # ── Checkout ─────────────────────────────────────────────────────
29
+ - uses: actions/checkout@v4
30
+
31
+ # ── Python setup ─────────────────────────────────────────────────
32
+ - uses: actions/setup-python@v5
33
+ with:
34
+ python-version: "3.12"
35
+
36
+
37
+ # ── Install a3 ────────────────────────────────────────
38
+ - name: Install a3
39
+ run: pip install a3-python[ci]
40
+
41
+ # ── Full scan ────────────────────────────────────────────────────
42
+ - name: Run full analysis
43
+ run: |
44
+ a3 scan . \
45
+ --output-sarif a3-full.sarif
46
+
47
+ # ── LLM triage via GitHub Copilot (zero-config — uses GITHUB_TOKEN) ──
48
+ - name: Copilot triage
49
+ env:
50
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
51
+ run: |
52
+ a3 triage \
53
+ --sarif a3-full.sarif \
54
+ --output-sarif a3-triaged.sarif \
55
+ --provider github \
56
+ --model gpt-4o \
57
+ --verbose
58
+ mv a3-triaged.sarif a3-full.sarif
59
+
60
+ # ── Baseline diff + auto-issue ───────────────────────────────────
61
+ - name: Check for new findings
62
+ env:
63
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
64
+ run: |
65
+ a3 baseline diff \
66
+ --sarif a3-full.sarif \
67
+ --auto-issue
68
+ continue-on-error: true
69
+
70
+ # ── Update baseline ──────────────────────────────────────────────
71
+ - name: Update baseline
72
+ run: |
73
+ a3 baseline accept --sarif a3-full.sarif
74
+ git config user.name "a3[bot]"
75
+ git config user.email "a3[bot]@users.noreply.github.com"
76
+ git add .a3-baseline.json
77
+ git diff --cached --quiet || \
78
+ git commit -m "ci: update a3 baseline [skip ci]" && \
79
+ git push
80
+
81
+ # ── Upload SARIF to GitHub Code Scanning ─────────────────────────
82
+ - name: Upload SARIF
83
+ if: always()
84
+ uses: github/codeql-action/upload-sarif@v3
85
+ continue-on-error: true
86
+ with:
87
+ sarif_file: a3-full.sarif
88
+ category: a3-weekly
@@ -0,0 +1,343 @@
1
+ Metadata-Version: 2.4
2
+ Name: a3-python
3
+ Version: 0.1.11
4
+ Summary: Catch real Python bugs before production — 99%+ accuracy, Z3 symbolic execution, LLM-powered false-positive filtering, zero-config GitHub CI
5
+ Requires-Python: >=3.11
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: z3-solver>=4.12.0
8
+ Provides-Extra: dev
9
+ Requires-Dist: pytest>=7.0; extra == "dev"
10
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
11
+ Provides-Extra: ci
12
+ Requires-Dist: anthropic>=0.30.0; extra == "ci"
13
+ Requires-Dist: openai>=1.0.0; extra == "ci"
14
+ Requires-Dist: pyyaml>=6.0; extra == "ci"
15
+
16
+ # PythonFromScratch
17
+
18
+ A static analysis tool for Python that finds **real bugs** in large codebases using bytecode analysis, barrier-certificate proofs, and Z3-backed symbolic execution.
19
+
20
+ Tested on Microsoft DeepSpeed (5,000+ functions) — found **6 confirmed true positives** including silent data corruption and unguarded division-by-zero bugs, while automatically proving 87.6% of candidates as false positives.
21
+
22
+ ---
23
+
24
+ ## Install
25
+
26
+ ```bash
27
+ git clone https://github.com/halleyyoung/PythonFromScratch.git
28
+ cd PythonFromScratch
29
+ pip install -e .
30
+ ```
31
+
32
+ Requires **Python ≥ 3.11** and **z3-solver** (installed automatically).
33
+
34
+ ---
35
+
36
+ ## Walkthrough: Analyze a Real Project
37
+
38
+ ### 1. Clone a target repo
39
+
40
+ ```bash
41
+ git clone https://github.com/microsoft/DeepSpeed.git external_tools/DeepSpeed
42
+ ```
43
+
44
+ ### 2. Run the analyzer
45
+
46
+ ```bash
47
+ python3.11 -m pyfromscratch external_tools/DeepSpeed/deepspeed/
48
+ ```
49
+
50
+ This runs the full pipeline automatically:
51
+
52
+ ```
53
+ ======================================================================
54
+ PythonFromScratch — Full Project Analysis
55
+ Target: external_tools/DeepSpeed/deepspeed
56
+ ======================================================================
57
+
58
+ STEP 1: BUILDING CALL GRAPH
59
+ Functions: 5003 (2.2s)
60
+
61
+ STEP 2: COMPUTING CRASH SUMMARIES
62
+ Summaries: 5003 (329.4s)
63
+
64
+ STEP 3: BUILDING CODE OBJECTS FOR DSE
65
+ Code objects: 5003 (0.0s)
66
+
67
+ STEP 4: BUG TYPE COVERAGE
68
+ 2928 NULL_PTR
69
+ 689 BOUNDS
70
+ 358 ASSERT_FAIL
71
+ 119 DIV_ZERO
72
+ 35 RUNTIME_ERROR
73
+ ...
74
+
75
+ STEP 5: BARRIER CERTIFICATE + DSE ANALYSIS
76
+ Total bug instances: 4613
77
+ Fully guarded (guards): 3008
78
+ Unguarded: 1605
79
+
80
+ Barrier results (35.8s):
81
+ Proven FP: 1031/1605
82
+ Remaining: 574
83
+
84
+ STEP 6: DSE RESULTS
85
+ DSE confirmed FP: 4
86
+ DSE confirmed TP: 493
87
+
88
+ STEP 7: TRUE POSITIVE CANDIDATES
89
+ Production code bugs: 571
90
+ Test-only code bugs: 3
91
+
92
+ TRUE POSITIVES (DSE-confirmed reachable):
93
+ ⚠️ DIV_ZERO in utils.groups._ensure_divisibility
94
+ ⚠️ DIV_ZERO in utils.timer.ThroughputTimer._is_report_boundary
95
+ ⚠️ DIV_ZERO in inference.v2.inference_utils.ceil_div
96
+ ...
97
+
98
+ SUMMARY
99
+ Functions analysed: 5003
100
+ Total bug instances: 4613
101
+ Proven false positive: 4039 (87.6%)
102
+ Remaining candidates: 574
103
+ DSE-confirmed TPs: 493
104
+
105
+ Results saved to results/deepspeed_results.pkl
106
+ ```
107
+
108
+ ### 3. Filter remaining false positives with Copilot
109
+
110
+ The analyzer's barrier certificates and DSE eliminate ~88% of false positives automatically. The remaining candidates include bugs that are technically reachable but may be guarded by framework invariants invisible at the bytecode level (e.g., "this parameter is always non-None because PyTorch guarantees it").
111
+
112
+ **Ask GitHub Copilot (or any LLM) to triage the remaining candidates:**
113
+
114
+ > Look at the output from `python3.11 -m pyfromscratch external_tools/DeepSpeed/deepspeed/`. For each remaining TP candidate, read the actual source code and callers to determine if it's a real bug or a false positive. Classify each as:
115
+ >
116
+ > - **REAL_BUG** — genuinely reachable crash from user input or config
117
+ > - **INTENTIONAL_GUARD** — deliberate `raise` (working as designed)
118
+ > - **FP_SELF** — attribute access on `self` (never None)
119
+ > - **FP_FRAMEWORK** — parameter guaranteed by framework (pytest, argparse, etc.)
120
+ > - **FP_INTERNAL** — parameter guaranteed by internal plumbing
121
+ >
122
+ > Write up the confirmed true positives in a markdown report.
123
+
124
+ This step typically reduces 500+ candidates down to **5–10 real bugs** with source-level evidence.
125
+
126
+ See [docs/TRUE_POSITIVE_ANALYSIS.md](docs/TRUE_POSITIVE_ANALYSIS.md) for our full DeepSpeed investigation.
127
+
128
+ ---
129
+
130
+ ## Single-File Analysis
131
+
132
+ ```bash
133
+ # Analyze one file
134
+ python3.11 -m pyfromscratch myfile.py
135
+
136
+ # Security analysis — treats each function as an entry point with tainted params
137
+ python3.11 -m pyfromscratch myfile.py --functions
138
+
139
+ # Verbose output
140
+ python3.11 -m pyfromscratch myfile.py --verbose
141
+ ```
142
+
143
+ **Exit codes:** `0` = SAFE, `1` = BUG found, `2` = UNKNOWN, `3` = error
144
+
145
+ ---
146
+
147
+ ## All Options
148
+
149
+ | Option | Description |
150
+ |--------|-------------|
151
+ | `--verbose` | Detailed output |
152
+ | `--functions` | Treat each function as a tainted entry point |
153
+ | `--all-functions` | Analyze ALL functions as entry points |
154
+ | `--interprocedural` | Cross-function taint analysis with call graph |
155
+ | `--entry-points NAME,...` | Specify entry point functions |
156
+ | `--min-confidence 0.0-1.0` | Filter bugs by confidence score |
157
+ | `--deduplicate` | Deduplicate findings by type + location |
158
+ | `--save-results PATH` | Custom output path (default: `results/<name>_results.pkl`) |
159
+ | `--context-depth N` | k-CFA context sensitivity (0, 1, 2, ...) |
160
+ | `--check-termination` | Detect non-terminating loops |
161
+ | `--synthesize-invariants` | Generate inductive loop invariants |
162
+ | `--no-concolic` | Pure symbolic analysis (no concrete execution) |
163
+
164
+ ---
165
+
166
+ ## Detected Bug Types
167
+
168
+ ### Security Vulnerabilities (47 types)
169
+
170
+ **Injection**
171
+ - `SQL_INJECTION` — Unsanitized input in SQL queries
172
+ - `COMMAND_INJECTION` — Shell command injection
173
+ - `CODE_INJECTION` — Eval/exec with untrusted data
174
+ - `PATH_INJECTION` — Path traversal attacks
175
+ - `LDAP_INJECTION`, `XPATH_INJECTION`, `NOSQL_INJECTION`
176
+ - `REGEX_INJECTION` — ReDoS via user-controlled patterns
177
+ - `HEADER_INJECTION`, `COOKIE_INJECTION`
178
+
179
+ **Cross-Site Scripting (XSS)**
180
+ - `REFLECTED_XSS` — User input reflected in HTML output
181
+
182
+ **Server-Side Request Forgery**
183
+ - `SSRF` — Requests to user-controlled URLs
184
+ - `PARTIAL_SSRF` — Partial URL control
185
+
186
+ **Deserialization**
187
+ - `UNSAFE_DESERIALIZATION` — Pickle/YAML with untrusted data
188
+ - `XXE` — XML External Entity injection
189
+ - `XML_BOMB` — Billion laughs attack
190
+
191
+ **Sensitive Data**
192
+ - `CLEARTEXT_LOGGING` — Passwords/secrets in logs
193
+ - `CLEARTEXT_STORAGE` — Unencrypted sensitive data
194
+ - `HARDCODED_CREDENTIALS`
195
+
196
+ **Cryptography**
197
+ - `WEAK_CRYPTO` — MD5/SHA1 for security
198
+ - `WEAK_CRYPTO_KEY` — Insufficient key sizes
199
+ - `BROKEN_CRYPTO_ALGORITHM` — DES, RC4, etc.
200
+ - `INSECURE_PROTOCOL` — HTTP, FTP, Telnet
201
+
202
+ **Web Security**
203
+ - `URL_REDIRECT` — Open redirect vulnerabilities
204
+ - `CSRF_PROTECTION_DISABLED`
205
+ - `FLASK_DEBUG` — Debug mode in production
206
+ - `INSECURE_COOKIE` — Missing Secure/HttpOnly flags
207
+ - `JINJA2_AUTOESCAPE_FALSE`
208
+
209
+ **File System**
210
+ - `TAR_SLIP` — Tar extraction path traversal
211
+ - `INSECURE_TEMPORARY_FILE`
212
+ - `WEAK_FILE_PERMISSIONS`
213
+
214
+ **Network**
215
+ - `BIND_TO_ALL_INTERFACES` — 0.0.0.0 binding
216
+ - `MISSING_HOST_KEY_VALIDATION`
217
+ - `CERT_VALIDATION_DISABLED`
218
+
219
+ **Regex**
220
+ - `REDOS` — Catastrophic backtracking
221
+ - `POLYNOMIAL_REDOS`
222
+ - `BAD_TAG_FILTER`
223
+ - `INCOMPLETE_HOSTNAME_REGEXP`
224
+
225
+ ### Core Bug Types (20 types)
226
+
227
+ - `DIV_ZERO` — Division by zero
228
+ - `NULL_PTR` — None dereference
229
+ - `BOUNDS` — Index out of bounds
230
+ - `TYPE_CONFUSION` — Type errors
231
+ - `ASSERT_FAIL` — Failed assertions
232
+ - `INTEGER_OVERFLOW`
233
+ - `NON_TERMINATION` — Infinite loops
234
+ - `MEMORY_LEAK`, `USE_AFTER_FREE`, `DOUBLE_FREE`
235
+ - `DATA_RACE`, `DEADLOCK`
236
+ - `INFO_LEAK`, `TIMING_CHANNEL`
237
+
238
+ ## Examples
239
+
240
+ ### Finding SQL Injection
241
+
242
+ ```python
243
+ # vulnerable.py
244
+ import sqlite3
245
+
246
+ def get_user(user_id):
247
+ conn = sqlite3.connect('users.db')
248
+ cursor = conn.cursor()
249
+ cursor.execute(f"SELECT * FROM users WHERE id = {user_id}") # BUG!
250
+ return cursor.fetchone()
251
+ ```
252
+
253
+ ```bash
254
+ $ pyfromscratch vulnerable.py --functions
255
+ Analyzing: vulnerable.py
256
+
257
+ Function-level entry points: 1
258
+ get_user: BUG
259
+ SQL_INJECTION: Tainted value flows to SQL query at line 7
260
+
261
+ Total bugs found: 1
262
+ ```
263
+
264
+ ### Verifying Safe Code
265
+
266
+ ```python
267
+ # safe.py
268
+ import sqlite3
269
+
270
+ def get_user(user_id):
271
+ conn = sqlite3.connect('users.db')
272
+ cursor = conn.cursor()
273
+ cursor.execute("SELECT * FROM users WHERE id = ?", (user_id,)) # Safe!
274
+ return cursor.fetchone()
275
+ ```
276
+
277
+ ```bash
278
+ $ pyfromscratch safe.py --functions
279
+ Analyzing: safe.py
280
+
281
+ Function-level entry points: 1
282
+ get_user: SAFE
283
+
284
+ Total bugs found: 0
285
+ ```
286
+
287
+ ## How It Works
288
+
289
+ The analyzer runs a **7-step pipeline** on a project directory:
290
+
291
+ 1. **Call Graph** — Builds a whole-program call graph from all `.py` files
292
+ 2. **Crash Summaries** — Disassembles bytecode, finds unguarded divisions, None-dereferences, out-of-bounds accesses, etc.
293
+ 3. **Code Objects** — Extracts Python code objects for symbolic execution
294
+ 4. **Guard Detection** — Identifies bugs already protected by `if`, `try/except`, `assert`, `isinstance` checks
295
+ 5. **Barrier Certificates** — 10 proof patterns (assume-guarantee, post-condition, refinement types, inductive invariants, control flow, dataflow, disjunctive, callee return-guarantee, validated params, DSE confirmation) attempt to formally prove each remaining bug is unreachable
296
+ 6. **DSE (Z3)** — Dynamic symbolic execution confirms whether a concrete input can trigger each surviving bug
297
+ 7. **Classification** — Separates production code from test code, reports true positive candidates
298
+
299
+ The tool produces one of three verdicts per bug:
300
+ - **FP (proven)** — barrier certificate or DSE proves the bug is unreachable
301
+ - **TP candidate** — no proof found; needs human/LLM triage
302
+ - **DSE-confirmed TP** — Z3 found a satisfying assignment that reaches the bug
303
+
304
+ ## Architecture
305
+
306
+ ```
307
+ pyfromscratch/
308
+ ├── __main__.py # python -m pyfromscratch entry point
309
+ ├── cli.py # CLI: single-file and project-directory analysis
310
+ ├── analyzer.py # Core analysis engine
311
+ ├── frontend/ # Python loading, bytecode compilation
312
+ ├── cfg/ # Control-flow graph + call graph construction
313
+ ├── semantics/ # Symbolic bytecode execution, crash summaries
314
+ ├── z3model/ # Z3 value/heap modeling
315
+ ├── unsafe/ # Bug type predicates (67 types)
316
+ ├── contracts/ # External call modeling, taint sources/sinks
317
+ ├── dse/ # Concolic execution oracle (Z3-backed)
318
+ └── barriers/ # Barrier certificate synthesis (10 patterns)
319
+ ```
320
+
321
+ ## Docker
322
+
323
+ ```bash
324
+ # Build
325
+ docker build -t pyfromscratch .
326
+
327
+ # Analyze a directory
328
+ docker run --rm -v $(pwd)/my_project:/target pyfromscratch /target
329
+
330
+ # Analyze a single file
331
+ docker run --rm -v $(pwd):/code pyfromscratch /code/myfile.py --functions
332
+ ```
333
+
334
+ ## Development
335
+
336
+ ```bash
337
+ pytest # Run tests
338
+ pytest --cov=pyfromscratch # With coverage
339
+ ```
340
+
341
+ ## License
342
+
343
+ See LICENSE file.
@@ -0,0 +1,8 @@
1
+ a3_python/ci/init_cmd.py,sha256=AYz53SnlTTml_Cde6EJ8DSMBqTZno5n8nFCxYA0Unso,4691
2
+ a3_python/ci/templates/a3-pr-scan.yml,sha256=jELLJ2Pbu3M3TMw-cBwn-Gn_J8nDZhzhZjT1N4dDZl8,4075
3
+ a3_python/ci/templates/a3-scheduled-scan.yml,sha256=DxauFcoRKa8k6xnpVmv2eZlxGbLAF86UnZUVdty55-U,3817
4
+ a3_python-0.1.11.dist-info/METADATA,sha256=Jl6pVnQlGc149m_G50xPAnTxoQ-bZb8YqtSkr1D7au8,10825
5
+ a3_python-0.1.11.dist-info/WHEEL,sha256=YCfwYGOYMi5Jhw2fU4yNgwErybb2IX5PEwBKV4ZbdBo,91
6
+ a3_python-0.1.11.dist-info/entry_points.txt,sha256=OVCyWgSb0HeehCZckDGxLyrNBgtaJ0g-L-quoLEZ8_M,42
7
+ a3_python-0.1.11.dist-info/top_level.txt,sha256=_IdpagE_q1SmPdNYZY6yETqIq68e9zk18egFV0qOJqI,10
8
+ a3_python-0.1.11.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ a3 = a3_python.cli:main
@@ -0,0 +1 @@
1
+ a3_python