sanicode 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sanicode-0.1.0/.github/workflows/release.yml +105 -0
- sanicode-0.1.0/.github/workflows/test.yml +55 -0
- sanicode-0.1.0/.gitignore +38 -0
- sanicode-0.1.0/.gitleaks.toml +13 -0
- sanicode-0.1.0/CLAUDE.md +103 -0
- sanicode-0.1.0/PKG-INFO +161 -0
- sanicode-0.1.0/PUBLISHING.md +94 -0
- sanicode-0.1.0/README.md +124 -0
- sanicode-0.1.0/RESEARCH.md +1421 -0
- sanicode-0.1.0/data/.gitkeep +0 -0
- sanicode-0.1.0/docs/MVP_SCOPE.md +154 -0
- sanicode-0.1.0/prompts/.gitkeep +0 -0
- sanicode-0.1.0/pyproject.toml +97 -0
- sanicode-0.1.0/rules/.gitkeep +0 -0
- sanicode-0.1.0/sanicode.toml.example +76 -0
- sanicode-0.1.0/scripts/release.sh +114 -0
- sanicode-0.1.0/scripts/scaffold.sh +25 -0
- sanicode-0.1.0/src/sanicode/__init__.py +3 -0
- sanicode-0.1.0/src/sanicode/__main__.py +6 -0
- sanicode-0.1.0/src/sanicode/cli.py +426 -0
- sanicode-0.1.0/src/sanicode/compliance/__init__.py +1 -0
- sanicode-0.1.0/src/sanicode/compliance/enrichment.py +77 -0
- sanicode-0.1.0/src/sanicode/compliance/mapper.py +314 -0
- sanicode-0.1.0/src/sanicode/config.py +230 -0
- sanicode-0.1.0/src/sanicode/data/__init__.py +1 -0
- sanicode-0.1.0/src/sanicode/data/compliance_db.json +312 -0
- sanicode-0.1.0/src/sanicode/graph/__init__.py +1 -0
- sanicode-0.1.0/src/sanicode/graph/builder.py +305 -0
- sanicode-0.1.0/src/sanicode/llm/__init__.py +1 -0
- sanicode-0.1.0/src/sanicode/llm/client.py +128 -0
- sanicode-0.1.0/src/sanicode/report/__init__.py +1 -0
- sanicode-0.1.0/src/sanicode/report/json_report.py +43 -0
- sanicode-0.1.0/src/sanicode/report/markdown.py +165 -0
- sanicode-0.1.0/src/sanicode/report/persist.py +190 -0
- sanicode-0.1.0/src/sanicode/report/sarif.py +148 -0
- sanicode-0.1.0/src/sanicode/scanner/__init__.py +1 -0
- sanicode-0.1.0/src/sanicode/scanner/_sql_detect.py +27 -0
- sanicode-0.1.0/src/sanicode/scanner/ast_parser.py +49 -0
- sanicode-0.1.0/src/sanicode/scanner/data_flow.py +460 -0
- sanicode-0.1.0/src/sanicode/scanner/executor.py +120 -0
- sanicode-0.1.0/src/sanicode/scanner/patterns.py +132 -0
- sanicode-0.1.0/src/sanicode/server/__init__.py +4 -0
- sanicode-0.1.0/src/sanicode/server/app.py +280 -0
- sanicode-0.1.0/src/sanicode/server/models.py +80 -0
- sanicode-0.1.0/src/sanicode/server/state.py +73 -0
- sanicode-0.1.0/src/sanicode/version.py +3 -0
- sanicode-0.1.0/tests/__init__.py +0 -0
- sanicode-0.1.0/tests/test_cli.py +93 -0
- sanicode-0.1.0/tests/test_compliance.py +264 -0
- sanicode-0.1.0/tests/test_config.py +178 -0
- sanicode-0.1.0/tests/test_data_flow.py +461 -0
- sanicode-0.1.0/tests/test_executor.py +105 -0
- sanicode-0.1.0/tests/test_graph_builder.py +240 -0
- sanicode-0.1.0/tests/test_patterns.py +132 -0
- sanicode-0.1.0/tests/test_report.py +529 -0
- sanicode-0.1.0/tests/test_server.py +194 -0
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- 'v*.*.*'
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
contents: write
|
|
10
|
+
id-token: write
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
verify:
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
|
|
18
|
+
- name: Extract tag version
|
|
19
|
+
id: tag
|
|
20
|
+
run: echo "version=${GITHUB_REF_NAME#v}" >> "$GITHUB_OUTPUT"
|
|
21
|
+
|
|
22
|
+
- name: Read version.py
|
|
23
|
+
id: pyver
|
|
24
|
+
run: |
|
|
25
|
+
PY_VERSION=$(grep '__version__' src/sanicode/version.py | cut -d'"' -f2)
|
|
26
|
+
echo "version=$PY_VERSION" >> "$GITHUB_OUTPUT"
|
|
27
|
+
|
|
28
|
+
- name: Read pyproject.toml version
|
|
29
|
+
id: tomlver
|
|
30
|
+
run: |
|
|
31
|
+
TOML_VERSION=$(grep '^version = ' pyproject.toml | head -1 | cut -d'"' -f2)
|
|
32
|
+
echo "version=$TOML_VERSION" >> "$GITHUB_OUTPUT"
|
|
33
|
+
|
|
34
|
+
- name: Verify versions match
|
|
35
|
+
run: |
|
|
36
|
+
TAG="${{ steps.tag.outputs.version }}"
|
|
37
|
+
PY="${{ steps.pyver.outputs.version }}"
|
|
38
|
+
TOML="${{ steps.tomlver.outputs.version }}"
|
|
39
|
+
echo "Tag: $TAG"
|
|
40
|
+
echo "version.py: $PY"
|
|
41
|
+
echo "pyproject.toml: $TOML"
|
|
42
|
+
if [[ "$TAG" != "$PY" || "$TAG" != "$TOML" ]]; then
|
|
43
|
+
echo "::error::Version mismatch! Tag=$TAG, version.py=$PY, pyproject.toml=$TOML"
|
|
44
|
+
exit 1
|
|
45
|
+
fi
|
|
46
|
+
|
|
47
|
+
test:
|
|
48
|
+
needs: verify
|
|
49
|
+
uses: ./.github/workflows/test.yml
|
|
50
|
+
|
|
51
|
+
create-release:
|
|
52
|
+
needs: test
|
|
53
|
+
runs-on: ubuntu-latest
|
|
54
|
+
steps:
|
|
55
|
+
- uses: actions/checkout@v4
|
|
56
|
+
|
|
57
|
+
- name: Extract version
|
|
58
|
+
id: version
|
|
59
|
+
run: echo "version=${GITHUB_REF_NAME#v}" >> "$GITHUB_OUTPUT"
|
|
60
|
+
|
|
61
|
+
- name: Create GitHub Release
|
|
62
|
+
env:
|
|
63
|
+
GH_TOKEN: ${{ github.token }}
|
|
64
|
+
run: |
|
|
65
|
+
gh release create "$GITHUB_REF_NAME" \
|
|
66
|
+
--title "sanicode v${{ steps.version.outputs.version }}" \
|
|
67
|
+
--generate-notes
|
|
68
|
+
|
|
69
|
+
build:
|
|
70
|
+
needs: create-release
|
|
71
|
+
runs-on: ubuntu-latest
|
|
72
|
+
steps:
|
|
73
|
+
- uses: actions/checkout@v4
|
|
74
|
+
|
|
75
|
+
- name: Set up Python
|
|
76
|
+
uses: actions/setup-python@v5
|
|
77
|
+
with:
|
|
78
|
+
python-version: "3.11"
|
|
79
|
+
|
|
80
|
+
- name: Install build tools
|
|
81
|
+
run: pip install build
|
|
82
|
+
|
|
83
|
+
- name: Build distribution
|
|
84
|
+
run: python -m build
|
|
85
|
+
|
|
86
|
+
- name: Upload artifacts
|
|
87
|
+
uses: actions/upload-artifact@v4
|
|
88
|
+
with:
|
|
89
|
+
name: dist
|
|
90
|
+
path: dist/
|
|
91
|
+
|
|
92
|
+
publish-to-pypi:
|
|
93
|
+
needs: build
|
|
94
|
+
runs-on: ubuntu-latest
|
|
95
|
+
environment: pypi
|
|
96
|
+
|
|
97
|
+
steps:
|
|
98
|
+
- name: Download artifacts
|
|
99
|
+
uses: actions/download-artifact@v4
|
|
100
|
+
with:
|
|
101
|
+
name: dist
|
|
102
|
+
path: dist/
|
|
103
|
+
|
|
104
|
+
- name: Publish to PyPI
|
|
105
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
name: Tests
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
tags:
|
|
7
|
+
- 'v*.*.*'
|
|
8
|
+
pull_request:
|
|
9
|
+
branches: [main]
|
|
10
|
+
workflow_call:
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
test:
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
strategy:
|
|
16
|
+
matrix:
|
|
17
|
+
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
|
18
|
+
|
|
19
|
+
steps:
|
|
20
|
+
- uses: actions/checkout@v4
|
|
21
|
+
|
|
22
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
23
|
+
uses: actions/setup-python@v5
|
|
24
|
+
with:
|
|
25
|
+
python-version: ${{ matrix.python-version }}
|
|
26
|
+
|
|
27
|
+
- name: Install dependencies
|
|
28
|
+
run: pip install -e ".[dev]"
|
|
29
|
+
|
|
30
|
+
- name: Lint with ruff
|
|
31
|
+
run: ruff check src/ tests/
|
|
32
|
+
|
|
33
|
+
- name: Run tests with coverage
|
|
34
|
+
run: pytest --cov=sanicode --cov-report=xml --cov-report=term-missing
|
|
35
|
+
|
|
36
|
+
build:
|
|
37
|
+
needs: test
|
|
38
|
+
runs-on: ubuntu-latest
|
|
39
|
+
|
|
40
|
+
steps:
|
|
41
|
+
- uses: actions/checkout@v4
|
|
42
|
+
|
|
43
|
+
- name: Set up Python
|
|
44
|
+
uses: actions/setup-python@v5
|
|
45
|
+
with:
|
|
46
|
+
python-version: "3.11"
|
|
47
|
+
|
|
48
|
+
- name: Install build tools
|
|
49
|
+
run: pip install build twine
|
|
50
|
+
|
|
51
|
+
- name: Build distribution
|
|
52
|
+
run: python -m build
|
|
53
|
+
|
|
54
|
+
- name: Validate distribution
|
|
55
|
+
run: twine check dist/*
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Secrets and credentials
|
|
2
|
+
.env
|
|
3
|
+
.env.local
|
|
4
|
+
.env.*.local
|
|
5
|
+
*.key
|
|
6
|
+
*.pem
|
|
7
|
+
*.p12
|
|
8
|
+
*.pfx
|
|
9
|
+
credentials.json
|
|
10
|
+
secrets.yaml
|
|
11
|
+
secrets.yml
|
|
12
|
+
|
|
13
|
+
# Python
|
|
14
|
+
__pycache__/
|
|
15
|
+
*.py[cod]
|
|
16
|
+
*.egg-info/
|
|
17
|
+
*.egg
|
|
18
|
+
dist/
|
|
19
|
+
build/
|
|
20
|
+
|
|
21
|
+
# Virtual environments
|
|
22
|
+
.venv/
|
|
23
|
+
venv/
|
|
24
|
+
|
|
25
|
+
# Tool caches
|
|
26
|
+
.pytest_cache/
|
|
27
|
+
.ruff_cache/
|
|
28
|
+
.mypy_cache/
|
|
29
|
+
htmlcov/
|
|
30
|
+
.coverage
|
|
31
|
+
|
|
32
|
+
# IDE
|
|
33
|
+
.idea/
|
|
34
|
+
.vscode/
|
|
35
|
+
*.swp
|
|
36
|
+
|
|
37
|
+
# OS
|
|
38
|
+
.DS_Store
|
sanicode-0.1.0/CLAUDE.md
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# Sanicode — Project Context
|
|
2
|
+
|
|
3
|
+
Sanicode is a PyPI-distributed CLI tool that uses an AI agent (configurable LLM backend) to build a codebase-level knowledge graph, then outputs security/sanitization recommendations mapped to OWASP ASVS, NIST 800-53, and ASD STIG controls. The key differentiator over tools like Bandit or Semgrep is data flow context via LLM reasoning over a knowledge graph, not just AST pattern matching.
|
|
4
|
+
|
|
5
|
+
See `RESEARCH.md` for the full standards baseline, architecture rationale, and tooling landscape.
|
|
6
|
+
|
|
7
|
+
## Getting Started
|
|
8
|
+
|
|
9
|
+
Start with issue #1 in the Phase 1 milestone. Issues have dependency notes in their descriptions — check "blocked by" before starting. Work issues in order within each milestone.
|
|
10
|
+
|
|
11
|
+
## Tech Stack
|
|
12
|
+
|
|
13
|
+
- Python 3.10+, package name `sanicode`
|
|
14
|
+
- CLI: Click or Typer (decided in issue #1)
|
|
15
|
+
- AST parsing: Python `ast` module (tree-sitter for multi-language, later)
|
|
16
|
+
- Knowledge graph: NetworkX (in-memory); Neo4j for persistent/large codebases
|
|
17
|
+
- LLM integration: LiteLLM (multi-provider, tiered endpoints)
|
|
18
|
+
- API server: FastAPI (`sanicode serve`)
|
|
19
|
+
- Config: TOML (`sanicode.toml` or `~/.config/sanicode/config.toml`)
|
|
20
|
+
- Output formats: Markdown, JSON, SARIF
|
|
21
|
+
- Container base: `registry.redhat.io/ubi9/python-311` (UBI9, multi-stage)
|
|
22
|
+
- Phase 3 operator: kopf (Python); Phase 4: Go Operator SDK if needed
|
|
23
|
+
|
|
24
|
+
## Project Structure
|
|
25
|
+
|
|
26
|
+
```
|
|
27
|
+
sanicode/
|
|
28
|
+
├── src/sanicode/ # Main package
|
|
29
|
+
├── data/ # Compliance cross-reference DB (JSON/TOML)
|
|
30
|
+
├── rules/ # Rule definitions (YAML, Bandit-style)
|
|
31
|
+
├── prompts/ # Prompt templates (YAML, with {variable} substitution)
|
|
32
|
+
├── tests/ # Mirrors src/sanicode/ structure; pytest, 80%+ coverage
|
|
33
|
+
└── sanicode.toml # Project config (or ~/.config/sanicode/)
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Phase Structure
|
|
37
|
+
|
|
38
|
+
- **Phase 1** (starting now): Python package + CLI. Scan → knowledge graph → compliance-mapped report. Local mode + `sanicode serve` API mode.
|
|
39
|
+
- **Phase 2**: Containerized deployment on OpenShift. Helm chart, Prometheus metrics, Grafana dashboard, MLflow integration, Tekton task, KFP pipeline.
|
|
40
|
+
- **Phase 3**: Lightweight Python Operator via kopf. CRDs: SanicodeConfig, SanicodeScan, SanicodeProfile, SanicodeException. Auto-discover InferenceService CRs.
|
|
41
|
+
- **Phase 4**: Production Operator (Go/Operator SDK or Helm-based), OLM packaging, OperatorHub submission.
|
|
42
|
+
|
|
43
|
+
## CLI Surface
|
|
44
|
+
|
|
45
|
+
```
|
|
46
|
+
sanicode config # Configure LLM backend (tiered endpoints)
|
|
47
|
+
sanicode scan # Scan codebase, build knowledge graph
|
|
48
|
+
sanicode serve # Start FastAPI server (containerized/remote mode)
|
|
49
|
+
sanicode report # Generate compliance-mapped report
|
|
50
|
+
sanicode recommend # Output prioritized recommendations
|
|
51
|
+
sanicode graph # Inspect/export the knowledge graph
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
The CLI operates in three modes: **local** (all in-process), **remote** (thin client to sanicode-api), **hybrid** (AST local, LLM remote).
|
|
55
|
+
|
|
56
|
+
## Key Architectural Decisions
|
|
57
|
+
|
|
58
|
+
**Degraded mode is not optional.** With no LLM configured, sanicode must still produce useful output: AST pattern matching, known-bad function detection, data flow graph construction, compliance lookups, and SARIF output. LLM adds context-awareness and false-positive reduction on top.
|
|
59
|
+
|
|
60
|
+
**Compliance mapping is cross-cutting.** Every finding must carry: CWE ID, OWASP ASVS 5.0 requirement (with L1/L2/L3 level), NIST 800-53 control (SI-10, SI-15, etc.), ASD STIG check ID and CAT level (I/II/III). PCI DSS mapping where applicable.
|
|
61
|
+
|
|
62
|
+
**Knowledge graph is the differentiator.** Graph nodes: entry points, sanitization points, sink points, trust boundaries. Edges: data flow paths. LLM reasons over the graph to assess whether sanitization actually covers the identified threat.
|
|
63
|
+
|
|
64
|
+
**API is designed for machine consumption first.** A refactoring agent (Konveyor, custom agent) needs to iterate programmatically. REST endpoints: `POST /api/v1/scan`, `GET /api/v1/scan/{id}`, `GET /api/v1/scan/{id}/findings`, `GET /api/v1/scan/{id}/graph`, `POST /api/v1/analyze`, `GET /api/v1/compliance/map`.
|
|
65
|
+
|
|
66
|
+
**Offline-native, not offline-compatible.** Zero egress at runtime. All compliance data, rules, and prompt templates ship in the package. Model weights are a separate supply chain artifact (PVC or ModelCar OCI image) — never bundled with sanicode.
|
|
67
|
+
|
|
68
|
+
**Tiered model support.** `sanicode.toml` allows separate endpoints for fast (classification), analysis (data flow), and reasoning (compliance mapping + report generation) tiers. Recommended: Granite Nano → Granite Code 8B → Llama 3.1 70B.
|
|
69
|
+
|
|
70
|
+
## Backlog Discipline
|
|
71
|
+
|
|
72
|
+
Work backlog items until they are 100% complete. 99% rounds to zero. If during implementation we discover scope that should wait (separate concern, large effort, blocked on something else), the standard procedure is:
|
|
73
|
+
|
|
74
|
+
1. Create a new backlog issue for the carved-out work, with full context.
|
|
75
|
+
2. Update the current issue to document what moved and reference the new issue(s).
|
|
76
|
+
3. Only then close the current issue.
|
|
77
|
+
|
|
78
|
+
This prevents hidden technical debt from accumulating behind "done" items. An agent should never offer to close an item until the carve-out issues exist and the current item's description reflects the reduced scope.
|
|
79
|
+
|
|
80
|
+
## Releasing
|
|
81
|
+
|
|
82
|
+
The version lives in two files that must stay in sync:
|
|
83
|
+
- `src/sanicode/version.py` — `__version__ = "x.y.z"`
|
|
84
|
+
- `pyproject.toml` — `version = "x.y.z"`
|
|
85
|
+
|
|
86
|
+
To release, run from the project root:
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
./scripts/release.sh <version> "<description>"
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
This updates both version files, commits, tags, and pushes. GitHub Actions handles testing, GitHub Release creation, and PyPI publishing via OIDC trusted publishing. See `PUBLISHING.md` for one-time setup steps.
|
|
93
|
+
|
|
94
|
+
Never update version files manually — always use the release script to ensure consistency.
|
|
95
|
+
|
|
96
|
+
## Don't Forget
|
|
97
|
+
|
|
98
|
+
- SARIF output is critical for ecosystem interoperability (GitHub Code Scanning, VS Code, Azure DevOps, Tekton gating). Prioritize this over custom formats.
|
|
99
|
+
- OWASP ASVS reference is version **5.0** (released May 2025). The chapter is now "V1: Encoding and Sanitization" — not the old V5. Use `v5.0.0-V1-x.x` identifiers.
|
|
100
|
+
- ASD STIG version is **v4 r11**. Key IDs: APSC-DV-002510 (command injection, CAT I), APSC-DV-002520 (XSS, CAT II), APSC-DV-002530 (input validation, CAT II).
|
|
101
|
+
- CWE is the lingua franca across all frameworks. Core sanitization CWEs: 20, 77, 78, 79, 89, 94, 116, 918, 1333.
|
|
102
|
+
- `SanicodeException` CRs (Phase 3) are critical for ATO workflows — RBAC-controlled, expiring, GitOps-managed risk acceptances.
|
|
103
|
+
- Expose `/metrics` (Prometheus) from day one in `sanicode serve`. Compliance score, findings by severity/CWE/namespace, LLM usage.
|
sanicode-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sanicode
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: AI-assisted code sanitization scanner with OWASP ASVS, NIST 800-53, and ASD STIG compliance mapping.
|
|
5
|
+
Project-URL: Homepage, https://github.com/rdwj/sanicode
|
|
6
|
+
Project-URL: Repository, https://github.com/rdwj/sanicode
|
|
7
|
+
Project-URL: Issues, https://github.com/rdwj/sanicode/issues
|
|
8
|
+
Author: Sanicode Contributors
|
|
9
|
+
License: Apache-2.0
|
|
10
|
+
Keywords: compliance,llm,owasp,sast,security,stig
|
|
11
|
+
Classifier: Development Status :: 2 - Pre-Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Topic :: Security
|
|
19
|
+
Requires-Python: >=3.10
|
|
20
|
+
Requires-Dist: fastapi>=0.100
|
|
21
|
+
Requires-Dist: litellm>=1.0
|
|
22
|
+
Requires-Dist: networkx>=3.0
|
|
23
|
+
Requires-Dist: prometheus-client>=0.17
|
|
24
|
+
Requires-Dist: rich>=13.0
|
|
25
|
+
Requires-Dist: tomli>=2.0; python_version < '3.11'
|
|
26
|
+
Requires-Dist: typer>=0.9.0
|
|
27
|
+
Requires-Dist: uvicorn[standard]>=0.20
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: build>=1.0; extra == 'dev'
|
|
30
|
+
Requires-Dist: httpx>=0.24; extra == 'dev'
|
|
31
|
+
Requires-Dist: pytest-asyncio>=0.21; extra == 'dev'
|
|
32
|
+
Requires-Dist: pytest-cov>=4.0; extra == 'dev'
|
|
33
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
34
|
+
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
35
|
+
Requires-Dist: twine>=5.0; extra == 'dev'
|
|
36
|
+
Description-Content-Type: text/markdown
|
|
37
|
+
|
|
38
|
+
# Sanicode
|
|
39
|
+
|
|
40
|
+
Sanicode scans Python codebases for input validation and sanitization gaps, builds a knowledge graph of data flow (entry points, sanitizers, sinks), and maps every finding to OWASP ASVS 5.0, NIST 800-53, and ASD STIG v4r11 controls. Output formats include SARIF (for GitHub Code Scanning integration), JSON, and Markdown.
|
|
41
|
+
|
|
42
|
+
Unlike pattern-only tools like Bandit or Semgrep, sanicode constructs a data flow graph so findings carry context about *how* tainted data reaches a sink and *whether* sanitization exists along the path.
|
|
43
|
+
|
|
44
|
+
## Install
|
|
45
|
+
|
|
46
|
+
```
|
|
47
|
+
pip install sanicode
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Requires Python 3.10+.
|
|
51
|
+
|
|
52
|
+
## Quick start
|
|
53
|
+
|
|
54
|
+
Scan a codebase and generate a Markdown report:
|
|
55
|
+
|
|
56
|
+
```
|
|
57
|
+
sanicode scan .
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Generate SARIF output for CI integration:
|
|
61
|
+
|
|
62
|
+
```
|
|
63
|
+
sanicode scan . -f sarif
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Reports are written to `sanicode-reports/` by default.
|
|
67
|
+
|
|
68
|
+
## API server
|
|
69
|
+
|
|
70
|
+
Start the FastAPI server for remote or hybrid scan mode:
|
|
71
|
+
|
|
72
|
+
```
|
|
73
|
+
sanicode serve
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
This starts on port 8080 with Prometheus metrics at `/metrics`.
|
|
77
|
+
|
|
78
|
+
### Endpoints
|
|
79
|
+
|
|
80
|
+
```
|
|
81
|
+
POST /api/v1/scan Submit a scan (async)
|
|
82
|
+
GET /api/v1/scan/{id} Poll scan status
|
|
83
|
+
GET /api/v1/scan/{id}/findings Retrieve findings (JSON or ?format=sarif)
|
|
84
|
+
GET /api/v1/scan/{id}/graph Retrieve knowledge graph
|
|
85
|
+
POST /api/v1/analyze Instant snippet analysis
|
|
86
|
+
GET /api/v1/compliance/map Compliance framework lookup
|
|
87
|
+
GET /api/v1/health Liveness check
|
|
88
|
+
GET /metrics Prometheus metrics
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## CLI commands
|
|
92
|
+
|
|
93
|
+
```
|
|
94
|
+
sanicode scan . # Scan codebase, generate reports
|
|
95
|
+
sanicode scan . -f sarif # SARIF output
|
|
96
|
+
sanicode scan . -f json -f sarif # Multiple formats
|
|
97
|
+
sanicode serve # Start API server on :8080
|
|
98
|
+
sanicode report scan-result.json # Re-generate reports from saved results
|
|
99
|
+
sanicode report scan-result.json -s high # Filter by severity
|
|
100
|
+
sanicode report scan-result.json --cwe 89 # Filter by CWE
|
|
101
|
+
sanicode config --show # Show resolved configuration
|
|
102
|
+
sanicode config --init # Create starter sanicode.toml
|
|
103
|
+
sanicode graph . --export graph.json # Export knowledge graph
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## Detection rules
|
|
107
|
+
|
|
108
|
+
| Rule | Description | CWE |
|
|
109
|
+
|--------|----------------------------------|---------|
|
|
110
|
+
| SC001 | `eval()` | CWE-78 |
|
|
111
|
+
| SC002 | `exec()` | CWE-78 |
|
|
112
|
+
| SC003 | `os.system()` | CWE-78 |
|
|
113
|
+
| SC004 | `subprocess` with `shell=True` | CWE-78 |
|
|
114
|
+
| SC005 | `pickle.loads()` | CWE-502 |
|
|
115
|
+
| SC006 | SQL string formatting | CWE-89 |
|
|
116
|
+
| SC007 | `__import__()` | CWE-94 |
|
|
117
|
+
| SC008 | `yaml.load()` without `Loader` | CWE-502 |
|
|
118
|
+
|
|
119
|
+
Each finding is enriched with CWE metadata and mapped to the active compliance profiles.
|
|
120
|
+
|
|
121
|
+
## Compliance frameworks
|
|
122
|
+
|
|
123
|
+
Sanicode maps findings to three frameworks out of the box:
|
|
124
|
+
|
|
125
|
+
- **OWASP ASVS 5.0** -- V1: Encoding and Sanitization requirements (L1/L2/L3)
|
|
126
|
+
- **NIST 800-53** -- SI-10 (Information Input Validation), SI-15 (Information Output Filtering), and related controls
|
|
127
|
+
- **ASD STIG v4r11** -- APSC-DV-002510 (CAT I), APSC-DV-002520 (CAT II), APSC-DV-002530 (CAT II), and related checks
|
|
128
|
+
|
|
129
|
+
## Configuration
|
|
130
|
+
|
|
131
|
+
Create a config file:
|
|
132
|
+
|
|
133
|
+
```
|
|
134
|
+
sanicode config --init
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
This writes a `sanicode.toml` in the current directory. Config is loaded from (in order):
|
|
138
|
+
|
|
139
|
+
1. `--config` flag
|
|
140
|
+
2. `sanicode.toml` in the current directory
|
|
141
|
+
3. `~/.config/sanicode/config.toml`
|
|
142
|
+
|
|
143
|
+
Sanicode works fully without any configuration. LLM tiers are optional -- without them, the tool runs in degraded mode using AST pattern matching, knowledge graph construction, and compliance lookups. LLM integration adds context-aware reasoning on top of these.
|
|
144
|
+
|
|
145
|
+
### LLM tiers (optional)
|
|
146
|
+
|
|
147
|
+
The config supports three tiers for different task complexities, each pointing at any OpenAI-compatible endpoint (Ollama, vLLM, OpenShift AI):
|
|
148
|
+
|
|
149
|
+
| Tier | Purpose | Recommended model |
|
|
150
|
+
|-------------|-----------------------------------|-------------------------|
|
|
151
|
+
| `fast` | Classification, severity scoring | Granite Nano, Mistral 7B |
|
|
152
|
+
| `analysis` | Data flow reasoning | Granite Code 8B |
|
|
153
|
+
| `reasoning` | Compliance mapping, reports | Llama 3.1 70B |
|
|
154
|
+
|
|
155
|
+
## Current status
|
|
156
|
+
|
|
157
|
+
Phase 1 MVP: Python-only scanning, 8 detection rules, local and API server modes. LLM integration is planned but not yet wired; the tool operates in degraded mode with AST patterns and compliance mapping.
|
|
158
|
+
|
|
159
|
+
## License
|
|
160
|
+
|
|
161
|
+
Apache-2.0
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# Publishing sanicode to PyPI
|
|
2
|
+
|
|
3
|
+
## One-Time Setup
|
|
4
|
+
|
|
5
|
+
### 1. Configure PyPI Trusted Publisher
|
|
6
|
+
|
|
7
|
+
Trusted publishing uses OpenID Connect (OIDC) — no API tokens needed.
|
|
8
|
+
|
|
9
|
+
1. Go to https://pypi.org/manage/account/publishing/
|
|
10
|
+
2. Click "Add a new pending publisher"
|
|
11
|
+
3. Fill in:
|
|
12
|
+
- **PyPI Project Name:** `sanicode`
|
|
13
|
+
- **Owner:** `rdwj`
|
|
14
|
+
- **Repository name:** `sanicode`
|
|
15
|
+
- **Workflow name:** `release.yml`
|
|
16
|
+
- **Environment name:** `pypi`
|
|
17
|
+
4. Click "Add"
|
|
18
|
+
|
|
19
|
+
### 2. Create GitHub Environment
|
|
20
|
+
|
|
21
|
+
1. Go to https://github.com/rdwj/sanicode/settings/environments
|
|
22
|
+
2. Create environment named `pypi`
|
|
23
|
+
3. (Optional) Add yourself as a required reviewer for manual approval before publishing
|
|
24
|
+
4. Set deployment branches to "main" only
|
|
25
|
+
|
|
26
|
+
## Releasing
|
|
27
|
+
|
|
28
|
+
### Automated (Recommended)
|
|
29
|
+
|
|
30
|
+
From the project root:
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
./scripts/release.sh <version> "<description>"
|
|
34
|
+
|
|
35
|
+
# Example:
|
|
36
|
+
./scripts/release.sh 0.2.0 "Add API server and Prometheus metrics"
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
The script:
|
|
40
|
+
1. Validates version format (x.y.z)
|
|
41
|
+
2. Updates `src/sanicode/version.py` and `pyproject.toml`
|
|
42
|
+
3. Verifies both files match
|
|
43
|
+
4. Commits, creates annotated tag `v<version>`, pushes both
|
|
44
|
+
|
|
45
|
+
GitHub Actions then:
|
|
46
|
+
1. Verifies tag/version.py/pyproject.toml all agree
|
|
47
|
+
2. Runs full test suite across Python 3.10-3.13
|
|
48
|
+
3. Creates a GitHub Release with auto-generated notes
|
|
49
|
+
4. Builds sdist + wheel
|
|
50
|
+
5. Publishes to PyPI via OIDC
|
|
51
|
+
|
|
52
|
+
### Manual Fallback
|
|
53
|
+
|
|
54
|
+
If GitHub Actions is unavailable:
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
# Build
|
|
58
|
+
rm -rf dist/
|
|
59
|
+
python -m build
|
|
60
|
+
|
|
61
|
+
# Validate
|
|
62
|
+
twine check dist/*
|
|
63
|
+
|
|
64
|
+
# Upload (requires PyPI API token)
|
|
65
|
+
twine upload dist/*
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Version Numbering
|
|
69
|
+
|
|
70
|
+
Semantic versioning: `MAJOR.MINOR.PATCH`
|
|
71
|
+
|
|
72
|
+
- **MAJOR**: Breaking API changes
|
|
73
|
+
- **MINOR**: New features (backward compatible)
|
|
74
|
+
- **PATCH**: Bug fixes
|
|
75
|
+
|
|
76
|
+
The version lives in two files that must stay in sync:
|
|
77
|
+
- `src/sanicode/version.py` — `__version__ = "x.y.z"`
|
|
78
|
+
- `pyproject.toml` — `version = "x.y.z"`
|
|
79
|
+
|
|
80
|
+
The release script and GitHub Actions both enforce this invariant.
|
|
81
|
+
|
|
82
|
+
## Verification
|
|
83
|
+
|
|
84
|
+
After a release:
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
# Check PyPI
|
|
88
|
+
pip install sanicode==<version>
|
|
89
|
+
sanicode --version
|
|
90
|
+
|
|
91
|
+
# Check GitHub
|
|
92
|
+
# https://github.com/rdwj/sanicode/releases
|
|
93
|
+
# https://github.com/rdwj/sanicode/actions
|
|
94
|
+
```
|
sanicode-0.1.0/README.md
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# Sanicode
|
|
2
|
+
|
|
3
|
+
Sanicode scans Python codebases for input validation and sanitization gaps, builds a knowledge graph of data flow (entry points, sanitizers, sinks), and maps every finding to OWASP ASVS 5.0, NIST 800-53, and ASD STIG v4r11 controls. Output formats include SARIF (for GitHub Code Scanning integration), JSON, and Markdown.
|
|
4
|
+
|
|
5
|
+
Unlike pattern-only tools like Bandit or Semgrep, sanicode constructs a data flow graph so findings carry context about *how* tainted data reaches a sink and *whether* sanitization exists along the path.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
pip install sanicode
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Requires Python 3.10+.
|
|
14
|
+
|
|
15
|
+
## Quick start
|
|
16
|
+
|
|
17
|
+
Scan a codebase and generate a Markdown report:
|
|
18
|
+
|
|
19
|
+
```
|
|
20
|
+
sanicode scan .
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Generate SARIF output for CI integration:
|
|
24
|
+
|
|
25
|
+
```
|
|
26
|
+
sanicode scan . -f sarif
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Reports are written to `sanicode-reports/` by default.
|
|
30
|
+
|
|
31
|
+
## API server
|
|
32
|
+
|
|
33
|
+
Start the FastAPI server for remote or hybrid scan mode:
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
sanicode serve
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
This starts on port 8080 with Prometheus metrics at `/metrics`.
|
|
40
|
+
|
|
41
|
+
### Endpoints
|
|
42
|
+
|
|
43
|
+
```
|
|
44
|
+
POST /api/v1/scan Submit a scan (async)
|
|
45
|
+
GET /api/v1/scan/{id} Poll scan status
|
|
46
|
+
GET /api/v1/scan/{id}/findings Retrieve findings (JSON or ?format=sarif)
|
|
47
|
+
GET /api/v1/scan/{id}/graph Retrieve knowledge graph
|
|
48
|
+
POST /api/v1/analyze Instant snippet analysis
|
|
49
|
+
GET /api/v1/compliance/map Compliance framework lookup
|
|
50
|
+
GET /api/v1/health Liveness check
|
|
51
|
+
GET /metrics Prometheus metrics
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## CLI commands
|
|
55
|
+
|
|
56
|
+
```
|
|
57
|
+
sanicode scan . # Scan codebase, generate reports
|
|
58
|
+
sanicode scan . -f sarif # SARIF output
|
|
59
|
+
sanicode scan . -f json -f sarif # Multiple formats
|
|
60
|
+
sanicode serve # Start API server on :8080
|
|
61
|
+
sanicode report scan-result.json # Re-generate reports from saved results
|
|
62
|
+
sanicode report scan-result.json -s high # Filter by severity
|
|
63
|
+
sanicode report scan-result.json --cwe 89 # Filter by CWE
|
|
64
|
+
sanicode config --show # Show resolved configuration
|
|
65
|
+
sanicode config --init # Create starter sanicode.toml
|
|
66
|
+
sanicode graph . --export graph.json # Export knowledge graph
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Detection rules
|
|
70
|
+
|
|
71
|
+
| Rule | Description | CWE |
|
|
72
|
+
|--------|----------------------------------|---------|
|
|
73
|
+
| SC001 | `eval()` | CWE-78 |
|
|
74
|
+
| SC002 | `exec()` | CWE-78 |
|
|
75
|
+
| SC003 | `os.system()` | CWE-78 |
|
|
76
|
+
| SC004 | `subprocess` with `shell=True` | CWE-78 |
|
|
77
|
+
| SC005 | `pickle.loads()` | CWE-502 |
|
|
78
|
+
| SC006 | SQL string formatting | CWE-89 |
|
|
79
|
+
| SC007 | `__import__()` | CWE-94 |
|
|
80
|
+
| SC008 | `yaml.load()` without `Loader` | CWE-502 |
|
|
81
|
+
|
|
82
|
+
Each finding is enriched with CWE metadata and mapped to the active compliance profiles.
|
|
83
|
+
|
|
84
|
+
## Compliance frameworks
|
|
85
|
+
|
|
86
|
+
Sanicode maps findings to three frameworks out of the box:
|
|
87
|
+
|
|
88
|
+
- **OWASP ASVS 5.0** -- V1: Encoding and Sanitization requirements (L1/L2/L3)
|
|
89
|
+
- **NIST 800-53** -- SI-10 (Information Input Validation), SI-15 (Information Output Filtering), and related controls
|
|
90
|
+
- **ASD STIG v4r11** -- APSC-DV-002510 (CAT I), APSC-DV-002520 (CAT II), APSC-DV-002530 (CAT II), and related checks
|
|
91
|
+
|
|
92
|
+
## Configuration
|
|
93
|
+
|
|
94
|
+
Create a config file:
|
|
95
|
+
|
|
96
|
+
```
|
|
97
|
+
sanicode config --init
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
This writes a `sanicode.toml` in the current directory. Config is loaded from (in order):
|
|
101
|
+
|
|
102
|
+
1. `--config` flag
|
|
103
|
+
2. `sanicode.toml` in the current directory
|
|
104
|
+
3. `~/.config/sanicode/config.toml`
|
|
105
|
+
|
|
106
|
+
Sanicode works fully without any configuration. LLM tiers are optional -- without them, the tool runs in degraded mode using AST pattern matching, knowledge graph construction, and compliance lookups. LLM integration adds context-aware reasoning on top of these.
|
|
107
|
+
|
|
108
|
+
### LLM tiers (optional)
|
|
109
|
+
|
|
110
|
+
The config supports three tiers for different task complexities, each pointing at any OpenAI-compatible endpoint (Ollama, vLLM, OpenShift AI):
|
|
111
|
+
|
|
112
|
+
| Tier | Purpose | Recommended model |
|
|
113
|
+
|-------------|-----------------------------------|-------------------------|
|
|
114
|
+
| `fast` | Classification, severity scoring | Granite Nano, Mistral 7B |
|
|
115
|
+
| `analysis` | Data flow reasoning | Granite Code 8B |
|
|
116
|
+
| `reasoning` | Compliance mapping, reports | Llama 3.1 70B |
|
|
117
|
+
|
|
118
|
+
## Current status
|
|
119
|
+
|
|
120
|
+
Phase 1 MVP: Python-only scanning, 8 detection rules, local and API server modes. LLM integration is planned but not yet wired; the tool operates in degraded mode with AST patterns and compliance mapping.
|
|
121
|
+
|
|
122
|
+
## License
|
|
123
|
+
|
|
124
|
+
Apache-2.0
|