sanicode 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. sanicode-0.1.0/.github/workflows/release.yml +105 -0
  2. sanicode-0.1.0/.github/workflows/test.yml +55 -0
  3. sanicode-0.1.0/.gitignore +38 -0
  4. sanicode-0.1.0/.gitleaks.toml +13 -0
  5. sanicode-0.1.0/CLAUDE.md +103 -0
  6. sanicode-0.1.0/PKG-INFO +161 -0
  7. sanicode-0.1.0/PUBLISHING.md +94 -0
  8. sanicode-0.1.0/README.md +124 -0
  9. sanicode-0.1.0/RESEARCH.md +1421 -0
  10. sanicode-0.1.0/data/.gitkeep +0 -0
  11. sanicode-0.1.0/docs/MVP_SCOPE.md +154 -0
  12. sanicode-0.1.0/prompts/.gitkeep +0 -0
  13. sanicode-0.1.0/pyproject.toml +97 -0
  14. sanicode-0.1.0/rules/.gitkeep +0 -0
  15. sanicode-0.1.0/sanicode.toml.example +76 -0
  16. sanicode-0.1.0/scripts/release.sh +114 -0
  17. sanicode-0.1.0/scripts/scaffold.sh +25 -0
  18. sanicode-0.1.0/src/sanicode/__init__.py +3 -0
  19. sanicode-0.1.0/src/sanicode/__main__.py +6 -0
  20. sanicode-0.1.0/src/sanicode/cli.py +426 -0
  21. sanicode-0.1.0/src/sanicode/compliance/__init__.py +1 -0
  22. sanicode-0.1.0/src/sanicode/compliance/enrichment.py +77 -0
  23. sanicode-0.1.0/src/sanicode/compliance/mapper.py +314 -0
  24. sanicode-0.1.0/src/sanicode/config.py +230 -0
  25. sanicode-0.1.0/src/sanicode/data/__init__.py +1 -0
  26. sanicode-0.1.0/src/sanicode/data/compliance_db.json +312 -0
  27. sanicode-0.1.0/src/sanicode/graph/__init__.py +1 -0
  28. sanicode-0.1.0/src/sanicode/graph/builder.py +305 -0
  29. sanicode-0.1.0/src/sanicode/llm/__init__.py +1 -0
  30. sanicode-0.1.0/src/sanicode/llm/client.py +128 -0
  31. sanicode-0.1.0/src/sanicode/report/__init__.py +1 -0
  32. sanicode-0.1.0/src/sanicode/report/json_report.py +43 -0
  33. sanicode-0.1.0/src/sanicode/report/markdown.py +165 -0
  34. sanicode-0.1.0/src/sanicode/report/persist.py +190 -0
  35. sanicode-0.1.0/src/sanicode/report/sarif.py +148 -0
  36. sanicode-0.1.0/src/sanicode/scanner/__init__.py +1 -0
  37. sanicode-0.1.0/src/sanicode/scanner/_sql_detect.py +27 -0
  38. sanicode-0.1.0/src/sanicode/scanner/ast_parser.py +49 -0
  39. sanicode-0.1.0/src/sanicode/scanner/data_flow.py +460 -0
  40. sanicode-0.1.0/src/sanicode/scanner/executor.py +120 -0
  41. sanicode-0.1.0/src/sanicode/scanner/patterns.py +132 -0
  42. sanicode-0.1.0/src/sanicode/server/__init__.py +4 -0
  43. sanicode-0.1.0/src/sanicode/server/app.py +280 -0
  44. sanicode-0.1.0/src/sanicode/server/models.py +80 -0
  45. sanicode-0.1.0/src/sanicode/server/state.py +73 -0
  46. sanicode-0.1.0/src/sanicode/version.py +3 -0
  47. sanicode-0.1.0/tests/__init__.py +0 -0
  48. sanicode-0.1.0/tests/test_cli.py +93 -0
  49. sanicode-0.1.0/tests/test_compliance.py +264 -0
  50. sanicode-0.1.0/tests/test_config.py +178 -0
  51. sanicode-0.1.0/tests/test_data_flow.py +461 -0
  52. sanicode-0.1.0/tests/test_executor.py +105 -0
  53. sanicode-0.1.0/tests/test_graph_builder.py +240 -0
  54. sanicode-0.1.0/tests/test_patterns.py +132 -0
  55. sanicode-0.1.0/tests/test_report.py +529 -0
  56. sanicode-0.1.0/tests/test_server.py +194 -0
@@ -0,0 +1,105 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v*.*.*'
7
+
8
+ permissions:
9
+ contents: write
10
+ id-token: write
11
+
12
+ jobs:
13
+ verify:
14
+ runs-on: ubuntu-latest
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+
18
+ - name: Extract tag version
19
+ id: tag
20
+ run: echo "version=${GITHUB_REF_NAME#v}" >> "$GITHUB_OUTPUT"
21
+
22
+ - name: Read version.py
23
+ id: pyver
24
+ run: |
25
+ PY_VERSION=$(grep '__version__' src/sanicode/version.py | cut -d'"' -f2)
26
+ echo "version=$PY_VERSION" >> "$GITHUB_OUTPUT"
27
+
28
+ - name: Read pyproject.toml version
29
+ id: tomlver
30
+ run: |
31
+ TOML_VERSION=$(grep '^version = ' pyproject.toml | head -1 | cut -d'"' -f2)
32
+ echo "version=$TOML_VERSION" >> "$GITHUB_OUTPUT"
33
+
34
+ - name: Verify versions match
35
+ run: |
36
+ TAG="${{ steps.tag.outputs.version }}"
37
+ PY="${{ steps.pyver.outputs.version }}"
38
+ TOML="${{ steps.tomlver.outputs.version }}"
39
+ echo "Tag: $TAG"
40
+ echo "version.py: $PY"
41
+ echo "pyproject.toml: $TOML"
42
+ if [[ "$TAG" != "$PY" || "$TAG" != "$TOML" ]]; then
43
+ echo "::error::Version mismatch! Tag=$TAG, version.py=$PY, pyproject.toml=$TOML"
44
+ exit 1
45
+ fi
46
+
47
+ test:
48
+ needs: verify
49
+ uses: ./.github/workflows/test.yml
50
+
51
+ create-release:
52
+ needs: test
53
+ runs-on: ubuntu-latest
54
+ steps:
55
+ - uses: actions/checkout@v4
56
+
57
+ - name: Extract version
58
+ id: version
59
+ run: echo "version=${GITHUB_REF_NAME#v}" >> "$GITHUB_OUTPUT"
60
+
61
+ - name: Create GitHub Release
62
+ env:
63
+ GH_TOKEN: ${{ github.token }}
64
+ run: |
65
+ gh release create "$GITHUB_REF_NAME" \
66
+ --title "sanicode v${{ steps.version.outputs.version }}" \
67
+ --generate-notes
68
+
69
+ build:
70
+ needs: create-release
71
+ runs-on: ubuntu-latest
72
+ steps:
73
+ - uses: actions/checkout@v4
74
+
75
+ - name: Set up Python
76
+ uses: actions/setup-python@v5
77
+ with:
78
+ python-version: "3.11"
79
+
80
+ - name: Install build tools
81
+ run: pip install build
82
+
83
+ - name: Build distribution
84
+ run: python -m build
85
+
86
+ - name: Upload artifacts
87
+ uses: actions/upload-artifact@v4
88
+ with:
89
+ name: dist
90
+ path: dist/
91
+
92
+ publish-to-pypi:
93
+ needs: build
94
+ runs-on: ubuntu-latest
95
+ environment: pypi
96
+
97
+ steps:
98
+ - name: Download artifacts
99
+ uses: actions/download-artifact@v4
100
+ with:
101
+ name: dist
102
+ path: dist/
103
+
104
+ - name: Publish to PyPI
105
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,55 @@
1
+ name: Tests
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ tags:
7
+ - 'v*.*.*'
8
+ pull_request:
9
+ branches: [main]
10
+ workflow_call:
11
+
12
+ jobs:
13
+ test:
14
+ runs-on: ubuntu-latest
15
+ strategy:
16
+ matrix:
17
+ python-version: ["3.10", "3.11", "3.12", "3.13"]
18
+
19
+ steps:
20
+ - uses: actions/checkout@v4
21
+
22
+ - name: Set up Python ${{ matrix.python-version }}
23
+ uses: actions/setup-python@v5
24
+ with:
25
+ python-version: ${{ matrix.python-version }}
26
+
27
+ - name: Install dependencies
28
+ run: pip install -e ".[dev]"
29
+
30
+ - name: Lint with ruff
31
+ run: ruff check src/ tests/
32
+
33
+ - name: Run tests with coverage
34
+ run: pytest --cov=sanicode --cov-report=xml --cov-report=term-missing
35
+
36
+ build:
37
+ needs: test
38
+ runs-on: ubuntu-latest
39
+
40
+ steps:
41
+ - uses: actions/checkout@v4
42
+
43
+ - name: Set up Python
44
+ uses: actions/setup-python@v5
45
+ with:
46
+ python-version: "3.11"
47
+
48
+ - name: Install build tools
49
+ run: pip install build twine
50
+
51
+ - name: Build distribution
52
+ run: python -m build
53
+
54
+ - name: Validate distribution
55
+ run: twine check dist/*
@@ -0,0 +1,38 @@
1
+ # Secrets and credentials
2
+ .env
3
+ .env.local
4
+ .env.*.local
5
+ *.key
6
+ *.pem
7
+ *.p12
8
+ *.pfx
9
+ credentials.json
10
+ secrets.yaml
11
+ secrets.yml
12
+
13
+ # Python
14
+ __pycache__/
15
+ *.py[cod]
16
+ *.egg-info/
17
+ *.egg
18
+ dist/
19
+ build/
20
+
21
+ # Virtual environments
22
+ .venv/
23
+ venv/
24
+
25
+ # Tool caches
26
+ .pytest_cache/
27
+ .ruff_cache/
28
+ .mypy_cache/
29
+ htmlcov/
30
+ .coverage
31
+
32
+ # IDE
33
+ .idea/
34
+ .vscode/
35
+ *.swp
36
+
37
+ # OS
38
+ .DS_Store
@@ -0,0 +1,13 @@
1
+ [allowlist]
2
+ description = "Project-specific allowlist for false positives"
3
+
4
+ paths = [
5
+ '''\/examples\/''',
6
+ '''\/tests?\/fixtures\/''',
7
+ '''docs\/.*\.md$''',
8
+ ]
9
+
10
+ regexes = [
11
+ '''YOUR_.*_HERE''',
12
+ '''REPLACE_WITH_YOUR''',
13
+ ]
@@ -0,0 +1,103 @@
1
+ # Sanicode — Project Context
2
+
3
+ Sanicode is a PyPI-distributed CLI tool that uses an AI agent (configurable LLM backend) to build a codebase-level knowledge graph, then outputs security/sanitization recommendations mapped to OWASP ASVS, NIST 800-53, and ASD STIG controls. The key differentiator over tools like Bandit or Semgrep is data flow context via LLM reasoning over a knowledge graph, not just AST pattern matching.
4
+
5
+ See `RESEARCH.md` for the full standards baseline, architecture rationale, and tooling landscape.
6
+
7
+ ## Getting Started
8
+
9
+ Start with issue #1 in the Phase 1 milestone. Issues have dependency notes in their descriptions — check "blocked by" before starting. Work issues in order within each milestone.
10
+
11
+ ## Tech Stack
12
+
13
+ - Python 3.10+, package name `sanicode`
14
+ - CLI: Click or Typer (decided in issue #1)
15
+ - AST parsing: Python `ast` module (tree-sitter for multi-language, later)
16
+ - Knowledge graph: NetworkX (in-memory); Neo4j for persistent/large codebases
17
+ - LLM integration: LiteLLM (multi-provider, tiered endpoints)
18
+ - API server: FastAPI (`sanicode serve`)
19
+ - Config: TOML (`sanicode.toml` or `~/.config/sanicode/config.toml`)
20
+ - Output formats: Markdown, JSON, SARIF
21
+ - Container base: `registry.redhat.io/ubi9/python-311` (UBI9, multi-stage)
22
+ - Phase 3 operator: kopf (Python); Phase 4: Go Operator SDK if needed
23
+
24
+ ## Project Structure
25
+
26
+ ```
27
+ sanicode/
28
+ ├── src/sanicode/ # Main package
29
+ ├── data/ # Compliance cross-reference DB (JSON/TOML)
30
+ ├── rules/ # Rule definitions (YAML, Bandit-style)
31
+ ├── prompts/ # Prompt templates (YAML, with {variable} substitution)
32
+ ├── tests/ # Mirrors src/sanicode/ structure; pytest, 80%+ coverage
33
+ └── sanicode.toml # Project config (or ~/.config/sanicode/)
34
+ ```
35
+
36
+ ## Phase Structure
37
+
38
+ - **Phase 1** (starting now): Python package + CLI. Scan → knowledge graph → compliance-mapped report. Local mode + `sanicode serve` API mode.
39
+ - **Phase 2**: Containerized deployment on OpenShift. Helm chart, Prometheus metrics, Grafana dashboard, MLflow integration, Tekton task, KFP pipeline.
40
+ - **Phase 3**: Lightweight Python Operator via kopf. CRDs: SanicodeConfig, SanicodeScan, SanicodeProfile, SanicodeException. Auto-discover InferenceService CRs.
41
+ - **Phase 4**: Production Operator (Go/Operator SDK or Helm-based), OLM packaging, OperatorHub submission.
42
+
43
+ ## CLI Surface
44
+
45
+ ```
46
+ sanicode config # Configure LLM backend (tiered endpoints)
47
+ sanicode scan # Scan codebase, build knowledge graph
48
+ sanicode serve # Start FastAPI server (containerized/remote mode)
49
+ sanicode report # Generate compliance-mapped report
50
+ sanicode recommend # Output prioritized recommendations
51
+ sanicode graph # Inspect/export the knowledge graph
52
+ ```
53
+
54
+ The CLI operates in three modes: **local** (all in-process), **remote** (thin client to sanicode-api), **hybrid** (AST local, LLM remote).
55
+
56
+ ## Key Architectural Decisions
57
+
58
+ **Degraded mode is not optional.** With no LLM configured, sanicode must still produce useful output: AST pattern matching, known-bad function detection, data flow graph construction, compliance lookups, and SARIF output. LLM adds context-awareness and false-positive reduction on top.
59
+
60
+ **Compliance mapping is cross-cutting.** Every finding must carry: CWE ID, OWASP ASVS 5.0 requirement (with L1/L2/L3 level), NIST 800-53 control (SI-10, SI-15, etc.), ASD STIG check ID and CAT level (I/II/III). PCI DSS mapping where applicable.
61
+
62
+ **Knowledge graph is the differentiator.** Graph nodes: entry points, sanitization points, sink points, trust boundaries. Edges: data flow paths. LLM reasons over the graph to assess whether sanitization actually covers the identified threat.
63
+
64
+ **API is designed for machine consumption first.** A refactoring agent (Konveyor, custom agent) needs to iterate programmatically. REST endpoints: `POST /api/v1/scan`, `GET /api/v1/scan/{id}`, `GET /api/v1/scan/{id}/findings`, `GET /api/v1/scan/{id}/graph`, `POST /api/v1/analyze`, `GET /api/v1/compliance/map`.
65
+
66
+ **Offline-native, not offline-compatible.** Zero egress at runtime. All compliance data, rules, and prompt templates ship in the package. Model weights are a separate supply chain artifact (PVC or ModelCar OCI image) — never bundled with sanicode.
67
+
68
+ **Tiered model support.** `sanicode.toml` allows separate endpoints for fast (classification), analysis (data flow), and reasoning (compliance mapping + report generation) tiers. Recommended: Granite Nano → Granite Code 8B → Llama 3.1 70B.
69
+
70
+ ## Backlog Discipline
71
+
72
+ Work backlog items until they are 100% complete. 99% rounds to zero. If during implementation we discover scope that should wait (separate concern, large effort, blocked on something else), the standard procedure is:
73
+
74
+ 1. Create a new backlog issue for the carved-out work, with full context.
75
+ 2. Update the current issue to document what moved and reference the new issue(s).
76
+ 3. Only then close the current issue.
77
+
78
+ This prevents hidden technical debt from accumulating behind "done" items. An agent should never offer to close an item until the carve-out issues exist and the current item's description reflects the reduced scope.
79
+
80
+ ## Releasing
81
+
82
+ The version lives in two files that must stay in sync:
83
+ - `src/sanicode/version.py` — `__version__ = "x.y.z"`
84
+ - `pyproject.toml` — `version = "x.y.z"`
85
+
86
+ To release, run from the project root:
87
+
88
+ ```bash
89
+ ./scripts/release.sh <version> "<description>"
90
+ ```
91
+
92
+ This updates both version files, commits, tags, and pushes. GitHub Actions handles testing, GitHub Release creation, and PyPI publishing via OIDC trusted publishing. See `PUBLISHING.md` for one-time setup steps.
93
+
94
+ Never update version files manually — always use the release script to ensure consistency.
95
+
96
+ ## Don't Forget
97
+
98
+ - SARIF output is critical for ecosystem interoperability (GitHub Code Scanning, VS Code, Azure DevOps, Tekton gating). Prioritize this over custom formats.
99
+ - OWASP ASVS reference is version **5.0** (released May 2025). The chapter is now "V1: Encoding and Sanitization" — not the old V5. Use `v5.0.0-V1-x.x` identifiers.
100
+ - ASD STIG version is **v4 r11**. Key IDs: APSC-DV-002510 (command injection, CAT I), APSC-DV-002520 (XSS, CAT II), APSC-DV-002530 (input validation, CAT II).
101
+ - CWE is the lingua franca across all frameworks. Core sanitization CWEs: 20, 77, 78, 79, 89, 94, 116, 918, 1333.
102
+ - `SanicodeException` CRs (Phase 3) are critical for ATO workflows — RBAC-controlled, expiring, GitOps-managed risk acceptances.
103
+ - Expose `/metrics` (Prometheus) from day one in `sanicode serve`. Compliance score, findings by severity/CWE/namespace, LLM usage.
@@ -0,0 +1,161 @@
1
+ Metadata-Version: 2.4
2
+ Name: sanicode
3
+ Version: 0.1.0
4
+ Summary: AI-assisted code sanitization scanner with OWASP ASVS, NIST 800-53, and ASD STIG compliance mapping.
5
+ Project-URL: Homepage, https://github.com/rdwj/sanicode
6
+ Project-URL: Repository, https://github.com/rdwj/sanicode
7
+ Project-URL: Issues, https://github.com/rdwj/sanicode/issues
8
+ Author: Sanicode Contributors
9
+ License: Apache-2.0
10
+ Keywords: compliance,llm,owasp,sast,security,stig
11
+ Classifier: Development Status :: 2 - Pre-Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Topic :: Security
19
+ Requires-Python: >=3.10
20
+ Requires-Dist: fastapi>=0.100
21
+ Requires-Dist: litellm>=1.0
22
+ Requires-Dist: networkx>=3.0
23
+ Requires-Dist: prometheus-client>=0.17
24
+ Requires-Dist: rich>=13.0
25
+ Requires-Dist: tomli>=2.0; python_version < '3.11'
26
+ Requires-Dist: typer>=0.9.0
27
+ Requires-Dist: uvicorn[standard]>=0.20
28
+ Provides-Extra: dev
29
+ Requires-Dist: build>=1.0; extra == 'dev'
30
+ Requires-Dist: httpx>=0.24; extra == 'dev'
31
+ Requires-Dist: pytest-asyncio>=0.21; extra == 'dev'
32
+ Requires-Dist: pytest-cov>=4.0; extra == 'dev'
33
+ Requires-Dist: pytest>=7.0; extra == 'dev'
34
+ Requires-Dist: ruff>=0.1.0; extra == 'dev'
35
+ Requires-Dist: twine>=5.0; extra == 'dev'
36
+ Description-Content-Type: text/markdown
37
+
38
+ # Sanicode
39
+
40
+ Sanicode scans Python codebases for input validation and sanitization gaps, builds a knowledge graph of data flow (entry points, sanitizers, sinks), and maps every finding to OWASP ASVS 5.0, NIST 800-53, and ASD STIG v4r11 controls. Output formats include SARIF (for GitHub Code Scanning integration), JSON, and Markdown.
41
+
42
+ Unlike pattern-only tools like Bandit or Semgrep, sanicode constructs a data flow graph so findings carry context about *how* tainted data reaches a sink and *whether* sanitization exists along the path.
43
+
44
+ ## Install
45
+
46
+ ```
47
+ pip install sanicode
48
+ ```
49
+
50
+ Requires Python 3.10+.
51
+
52
+ ## Quick start
53
+
54
+ Scan a codebase and generate a Markdown report:
55
+
56
+ ```
57
+ sanicode scan .
58
+ ```
59
+
60
+ Generate SARIF output for CI integration:
61
+
62
+ ```
63
+ sanicode scan . -f sarif
64
+ ```
65
+
66
+ Reports are written to `sanicode-reports/` by default.
67
+
68
+ ## API server
69
+
70
+ Start the FastAPI server for remote or hybrid scan mode:
71
+
72
+ ```
73
+ sanicode serve
74
+ ```
75
+
76
+ This starts on port 8080 with Prometheus metrics at `/metrics`.
77
+
78
+ ### Endpoints
79
+
80
+ ```
81
+ POST /api/v1/scan Submit a scan (async)
82
+ GET /api/v1/scan/{id} Poll scan status
83
+ GET /api/v1/scan/{id}/findings Retrieve findings (JSON or ?format=sarif)
84
+ GET /api/v1/scan/{id}/graph Retrieve knowledge graph
85
+ POST /api/v1/analyze Instant snippet analysis
86
+ GET /api/v1/compliance/map Compliance framework lookup
87
+ GET /api/v1/health Liveness check
88
+ GET /metrics Prometheus metrics
89
+ ```
90
+
91
+ ## CLI commands
92
+
93
+ ```
94
+ sanicode scan . # Scan codebase, generate reports
95
+ sanicode scan . -f sarif # SARIF output
96
+ sanicode scan . -f json -f sarif # Multiple formats
97
+ sanicode serve # Start API server on :8080
98
+ sanicode report scan-result.json # Re-generate reports from saved results
99
+ sanicode report scan-result.json -s high # Filter by severity
100
+ sanicode report scan-result.json --cwe 89 # Filter by CWE
101
+ sanicode config --show # Show resolved configuration
102
+ sanicode config --init # Create starter sanicode.toml
103
+ sanicode graph . --export graph.json # Export knowledge graph
104
+ ```
105
+
106
+ ## Detection rules
107
+
108
+ | Rule | Description | CWE |
109
+ |--------|----------------------------------|---------|
110
+ | SC001 | `eval()` | CWE-78 |
111
+ | SC002 | `exec()` | CWE-78 |
112
+ | SC003 | `os.system()` | CWE-78 |
113
+ | SC004 | `subprocess` with `shell=True` | CWE-78 |
114
+ | SC005 | `pickle.loads()` | CWE-502 |
115
+ | SC006 | SQL string formatting | CWE-89 |
116
+ | SC007 | `__import__()` | CWE-94 |
117
+ | SC008 | `yaml.load()` without `Loader` | CWE-502 |
118
+
119
+ Each finding is enriched with CWE metadata and mapped to the active compliance profiles.
120
+
121
+ ## Compliance frameworks
122
+
123
+ Sanicode maps findings to three frameworks out of the box:
124
+
125
+ - **OWASP ASVS 5.0** -- V1: Encoding and Sanitization requirements (L1/L2/L3)
126
+ - **NIST 800-53** -- SI-10 (Information Input Validation), SI-15 (Information Output Filtering), and related controls
127
+ - **ASD STIG v4r11** -- APSC-DV-002510 (CAT I), APSC-DV-002520 (CAT II), APSC-DV-002530 (CAT II), and related checks
128
+
129
+ ## Configuration
130
+
131
+ Create a config file:
132
+
133
+ ```
134
+ sanicode config --init
135
+ ```
136
+
137
+ This writes a `sanicode.toml` in the current directory. Config is loaded from (in order):
138
+
139
+ 1. `--config` flag
140
+ 2. `sanicode.toml` in the current directory
141
+ 3. `~/.config/sanicode/config.toml`
142
+
143
+ Sanicode works fully without any configuration. LLM tiers are optional -- without them, the tool runs in degraded mode using AST pattern matching, knowledge graph construction, and compliance lookups. LLM integration adds context-aware reasoning on top of these.
144
+
145
+ ### LLM tiers (optional)
146
+
147
+ The config supports three tiers for different task complexities, each pointing at any OpenAI-compatible endpoint (Ollama, vLLM, OpenShift AI):
148
+
149
+ | Tier | Purpose | Recommended model |
150
+ |-------------|-----------------------------------|-------------------------|
151
+ | `fast` | Classification, severity scoring | Granite Nano, Mistral 7B |
152
+ | `analysis` | Data flow reasoning | Granite Code 8B |
153
+ | `reasoning` | Compliance mapping, reports | Llama 3.1 70B |
154
+
155
+ ## Current status
156
+
157
+ Phase 1 MVP: Python-only scanning, 8 detection rules, local and API server modes. LLM integration is planned but not yet wired; the tool operates in degraded mode with AST patterns and compliance mapping.
158
+
159
+ ## License
160
+
161
+ Apache-2.0
@@ -0,0 +1,94 @@
1
+ # Publishing sanicode to PyPI
2
+
3
+ ## One-Time Setup
4
+
5
+ ### 1. Configure PyPI Trusted Publisher
6
+
7
+ Trusted publishing uses OpenID Connect (OIDC) — no API tokens needed.
8
+
9
+ 1. Go to https://pypi.org/manage/account/publishing/
10
+ 2. Click "Add a new pending publisher"
11
+ 3. Fill in:
12
+ - **PyPI Project Name:** `sanicode`
13
+ - **Owner:** `rdwj`
14
+ - **Repository name:** `sanicode`
15
+ - **Workflow name:** `release.yml`
16
+ - **Environment name:** `pypi`
17
+ 4. Click "Add"
18
+
19
+ ### 2. Create GitHub Environment
20
+
21
+ 1. Go to https://github.com/rdwj/sanicode/settings/environments
22
+ 2. Create environment named `pypi`
23
+ 3. (Optional) Add yourself as a required reviewer for manual approval before publishing
24
+ 4. Set deployment branches to "main" only
25
+
26
+ ## Releasing
27
+
28
+ ### Automated (Recommended)
29
+
30
+ From the project root:
31
+
32
+ ```bash
33
+ ./scripts/release.sh <version> "<description>"
34
+
35
+ # Example:
36
+ ./scripts/release.sh 0.2.0 "Add API server and Prometheus metrics"
37
+ ```
38
+
39
+ The script:
40
+ 1. Validates version format (x.y.z)
41
+ 2. Updates `src/sanicode/version.py` and `pyproject.toml`
42
+ 3. Verifies both files match
43
+ 4. Commits, creates annotated tag `v<version>`, pushes both
44
+
45
+ GitHub Actions then:
46
+ 1. Verifies tag/version.py/pyproject.toml all agree
47
+ 2. Runs full test suite across Python 3.10-3.13
48
+ 3. Creates a GitHub Release with auto-generated notes
49
+ 4. Builds sdist + wheel
50
+ 5. Publishes to PyPI via OIDC
51
+
52
+ ### Manual Fallback
53
+
54
+ If GitHub Actions is unavailable:
55
+
56
+ ```bash
57
+ # Build
58
+ rm -rf dist/
59
+ python -m build
60
+
61
+ # Validate
62
+ twine check dist/*
63
+
64
+ # Upload (requires PyPI API token)
65
+ twine upload dist/*
66
+ ```
67
+
68
+ ## Version Numbering
69
+
70
+ Semantic versioning: `MAJOR.MINOR.PATCH`
71
+
72
+ - **MAJOR**: Breaking API changes
73
+ - **MINOR**: New features (backward compatible)
74
+ - **PATCH**: Bug fixes
75
+
76
+ The version lives in two files that must stay in sync:
77
+ - `src/sanicode/version.py` — `__version__ = "x.y.z"`
78
+ - `pyproject.toml` — `version = "x.y.z"`
79
+
80
+ The release script and GitHub Actions both enforce this invariant.
81
+
82
+ ## Verification
83
+
84
+ After a release:
85
+
86
+ ```bash
87
+ # Check PyPI
88
+ pip install sanicode==<version>
89
+ sanicode --version
90
+
91
+ # Check GitHub
92
+ # https://github.com/rdwj/sanicode/releases
93
+ # https://github.com/rdwj/sanicode/actions
94
+ ```
@@ -0,0 +1,124 @@
1
+ # Sanicode
2
+
3
+ Sanicode scans Python codebases for input validation and sanitization gaps, builds a knowledge graph of data flow (entry points, sanitizers, sinks), and maps every finding to OWASP ASVS 5.0, NIST 800-53, and ASD STIG v4r11 controls. Output formats include SARIF (for GitHub Code Scanning integration), JSON, and Markdown.
4
+
5
+ Unlike pattern-only tools like Bandit or Semgrep, sanicode constructs a data flow graph so findings carry context about *how* tainted data reaches a sink and *whether* sanitization exists along the path.
6
+
7
+ ## Install
8
+
9
+ ```
10
+ pip install sanicode
11
+ ```
12
+
13
+ Requires Python 3.10+.
14
+
15
+ ## Quick start
16
+
17
+ Scan a codebase and generate a Markdown report:
18
+
19
+ ```
20
+ sanicode scan .
21
+ ```
22
+
23
+ Generate SARIF output for CI integration:
24
+
25
+ ```
26
+ sanicode scan . -f sarif
27
+ ```
28
+
29
+ Reports are written to `sanicode-reports/` by default.
30
+
31
+ ## API server
32
+
33
+ Start the FastAPI server for remote or hybrid scan mode:
34
+
35
+ ```
36
+ sanicode serve
37
+ ```
38
+
39
+ This starts on port 8080 with Prometheus metrics at `/metrics`.
40
+
41
+ ### Endpoints
42
+
43
+ ```
44
+ POST /api/v1/scan Submit a scan (async)
45
+ GET /api/v1/scan/{id} Poll scan status
46
+ GET /api/v1/scan/{id}/findings Retrieve findings (JSON or ?format=sarif)
47
+ GET /api/v1/scan/{id}/graph Retrieve knowledge graph
48
+ POST /api/v1/analyze Instant snippet analysis
49
+ GET /api/v1/compliance/map Compliance framework lookup
50
+ GET /api/v1/health Liveness check
51
+ GET /metrics Prometheus metrics
52
+ ```
53
+
54
+ ## CLI commands
55
+
56
+ ```
57
+ sanicode scan . # Scan codebase, generate reports
58
+ sanicode scan . -f sarif # SARIF output
59
+ sanicode scan . -f json -f sarif # Multiple formats
60
+ sanicode serve # Start API server on :8080
61
+ sanicode report scan-result.json # Re-generate reports from saved results
62
+ sanicode report scan-result.json -s high # Filter by severity
63
+ sanicode report scan-result.json --cwe 89 # Filter by CWE
64
+ sanicode config --show # Show resolved configuration
65
+ sanicode config --init # Create starter sanicode.toml
66
+ sanicode graph . --export graph.json # Export knowledge graph
67
+ ```
68
+
69
+ ## Detection rules
70
+
71
+ | Rule | Description | CWE |
72
+ |--------|----------------------------------|---------|
73
+ | SC001 | `eval()` | CWE-78 |
74
+ | SC002 | `exec()` | CWE-78 |
75
+ | SC003 | `os.system()` | CWE-78 |
76
+ | SC004 | `subprocess` with `shell=True` | CWE-78 |
77
+ | SC005 | `pickle.loads()` | CWE-502 |
78
+ | SC006 | SQL string formatting | CWE-89 |
79
+ | SC007 | `__import__()` | CWE-94 |
80
+ | SC008 | `yaml.load()` without `Loader` | CWE-502 |
81
+
82
+ Each finding is enriched with CWE metadata and mapped to the active compliance profiles.
83
+
84
+ ## Compliance frameworks
85
+
86
+ Sanicode maps findings to three frameworks out of the box:
87
+
88
+ - **OWASP ASVS 5.0** -- V1: Encoding and Sanitization requirements (L1/L2/L3)
89
+ - **NIST 800-53** -- SI-10 (Information Input Validation), SI-15 (Information Output Filtering), and related controls
90
+ - **ASD STIG v4r11** -- APSC-DV-002510 (CAT I), APSC-DV-002520 (CAT II), APSC-DV-002530 (CAT II), and related checks
91
+
92
+ ## Configuration
93
+
94
+ Create a config file:
95
+
96
+ ```
97
+ sanicode config --init
98
+ ```
99
+
100
+ This writes a `sanicode.toml` in the current directory. Config is loaded from (in order):
101
+
102
+ 1. `--config` flag
103
+ 2. `sanicode.toml` in the current directory
104
+ 3. `~/.config/sanicode/config.toml`
105
+
106
+ Sanicode works fully without any configuration. LLM tiers are optional -- without them, the tool runs in degraded mode using AST pattern matching, knowledge graph construction, and compliance lookups. LLM integration adds context-aware reasoning on top of these.
107
+
108
+ ### LLM tiers (optional)
109
+
110
+ The config supports three tiers for different task complexities, each pointing at any OpenAI-compatible endpoint (Ollama, vLLM, OpenShift AI):
111
+
112
+ | Tier | Purpose | Recommended model |
113
+ |-------------|-----------------------------------|-------------------------|
114
+ | `fast` | Classification, severity scoring | Granite Nano, Mistral 7B |
115
+ | `analysis` | Data flow reasoning | Granite Code 8B |
116
+ | `reasoning` | Compliance mapping, reports | Llama 3.1 70B |
117
+
118
+ ## Current status
119
+
120
+ Phase 1 MVP: Python-only scanning, 8 detection rules, local and API server modes. LLM integration is planned but not yet wired; the tool operates in degraded mode with AST patterns and compliance mapping.
121
+
122
+ ## License
123
+
124
+ Apache-2.0