llm-security-scanner 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_security_scanner-0.1.0/.dockerignore +14 -0
- llm_security_scanner-0.1.0/.github/ISSUE_TEMPLATE/bug_report.md +20 -0
- llm_security_scanner-0.1.0/.github/ISSUE_TEMPLATE/feature_request.md +11 -0
- llm_security_scanner-0.1.0/.github/PULL_REQUEST_TEMPLATE.md +8 -0
- llm_security_scanner-0.1.0/.github/actions/llm-scan/action.yml +125 -0
- llm_security_scanner-0.1.0/.github/workflows/llm-scan.yml +78 -0
- llm_security_scanner-0.1.0/.github/workflows/release.yml +50 -0
- llm_security_scanner-0.1.0/.gitignore +11 -0
- llm_security_scanner-0.1.0/CHANGELOG.md +10 -0
- llm_security_scanner-0.1.0/CLAUDE.md +183 -0
- llm_security_scanner-0.1.0/CONTRIBUTING.md +29 -0
- llm_security_scanner-0.1.0/Dockerfile +18 -0
- llm_security_scanner-0.1.0/LICENSE +21 -0
- llm_security_scanner-0.1.0/PKG-INFO +540 -0
- llm_security_scanner-0.1.0/README.md +516 -0
- llm_security_scanner-0.1.0/RUNBOOK.md +513 -0
- llm_security_scanner-0.1.0/demo/chatbot_openai_app.py +83 -0
- llm_security_scanner-0.1.0/demo/vulnerable_app.py +65 -0
- llm_security_scanner-0.1.0/docs/FUNCTIONALITY.md +72 -0
- llm_security_scanner-0.1.0/examples/README.md +63 -0
- llm_security_scanner-0.1.0/examples/config/ci-url.yml +15 -0
- llm_security_scanner-0.1.0/examples/config/local-url.yml +15 -0
- llm_security_scanner-0.1.0/examples/config/ollama-target.yml +13 -0
- llm_security_scanner-0.1.0/examples/config/public-url.yml +15 -0
- llm_security_scanner-0.1.0/examples/docker/docker-compose.local.yml +36 -0
- llm_security_scanner-0.1.0/examples/github/llm-security.docker.yml +71 -0
- llm_security_scanner-0.1.0/examples/github/llm-security.yml +59 -0
- llm_security_scanner-0.1.0/examples/gitlab/llm-security.gitlab-ci.docker.yml +76 -0
- llm_security_scanner-0.1.0/examples/gitlab/llm-security.gitlab-ci.yml +60 -0
- llm_security_scanner-0.1.0/examples/llm-scan.yml +22 -0
- llm_security_scanner-0.1.0/payloads/extended/llm10_extended.yaml +41 -0
- llm_security_scanner-0.1.0/payloads/llm01_prompt_injection.yaml +44 -0
- llm_security_scanner-0.1.0/payloads/llm02_sensitive_info_disclosure.yaml +39 -0
- llm_security_scanner-0.1.0/payloads/llm03_supply_chain.yaml +37 -0
- llm_security_scanner-0.1.0/payloads/llm04_data_model_poisoning.yaml +39 -0
- llm_security_scanner-0.1.0/payloads/llm05_improper_output_handling.yaml +40 -0
- llm_security_scanner-0.1.0/payloads/llm06_excessive_agency.yaml +38 -0
- llm_security_scanner-0.1.0/payloads/llm07_system_prompt_leakage.yaml +39 -0
- llm_security_scanner-0.1.0/payloads/llm08_vector_embedding_weaknesses.yaml +40 -0
- llm_security_scanner-0.1.0/payloads/llm09_misinformation.yaml +41 -0
- llm_security_scanner-0.1.0/payloads/llm10_unbounded_consumption.yaml +41 -0
- llm_security_scanner-0.1.0/pyproject.toml +69 -0
- llm_security_scanner-0.1.0/src/llm_scanner/__init__.py +1 -0
- llm_security_scanner-0.1.0/src/llm_scanner/baselines/__init__.py +90 -0
- llm_security_scanner-0.1.0/src/llm_scanner/cli.py +643 -0
- llm_security_scanner-0.1.0/src/llm_scanner/judge/__init__.py +3 -0
- llm_security_scanner-0.1.0/src/llm_scanner/judge/ollama_judge.py +200 -0
- llm_security_scanner-0.1.0/src/llm_scanner/models.py +118 -0
- llm_security_scanner-0.1.0/src/llm_scanner/payloads/__init__.py +1 -0
- llm_security_scanner-0.1.0/src/llm_scanner/payloads/loader.py +74 -0
- llm_security_scanner-0.1.0/src/llm_scanner/preflight.py +118 -0
- llm_security_scanner-0.1.0/src/llm_scanner/reporters/__init__.py +38 -0
- llm_security_scanner-0.1.0/src/llm_scanner/reporters/html.py +33 -0
- llm_security_scanner-0.1.0/src/llm_scanner/reporters/json_reporter.py +15 -0
- llm_security_scanner-0.1.0/src/llm_scanner/reporters/markdown.py +47 -0
- llm_security_scanner-0.1.0/src/llm_scanner/reporters/sarif.py +106 -0
- llm_security_scanner-0.1.0/src/llm_scanner/reporters/text.py +45 -0
- llm_security_scanner-0.1.0/src/llm_scanner/reporters/trend.py +71 -0
- llm_security_scanner-0.1.0/src/llm_scanner/scanner.py +121 -0
- llm_security_scanner-0.1.0/src/llm_scanner/suppressions/__init__.py +104 -0
- llm_security_scanner-0.1.0/src/llm_scanner/targets/__init__.py +44 -0
- llm_security_scanner-0.1.0/src/llm_scanner/targets/base.py +29 -0
- llm_security_scanner-0.1.0/src/llm_scanner/targets/http.py +53 -0
- llm_security_scanner-0.1.0/src/llm_scanner/targets/ollama_target.py +46 -0
- llm_security_scanner-0.1.0/src/llm_scanner/templates/index.html.j2 +92 -0
- llm_security_scanner-0.1.0/src/llm_scanner/templates/report.html.j2 +57 -0
- llm_security_scanner-0.1.0/tests/conftest.py +45 -0
- llm_security_scanner-0.1.0/tests/test_baseline.py +179 -0
- llm_security_scanner-0.1.0/tests/test_cli.py +523 -0
- llm_security_scanner-0.1.0/tests/test_judge.py +157 -0
- llm_security_scanner-0.1.0/tests/test_loader.py +125 -0
- llm_security_scanner-0.1.0/tests/test_models.py +140 -0
- llm_security_scanner-0.1.0/tests/test_preflight.py +186 -0
- llm_security_scanner-0.1.0/tests/test_reporters.py +312 -0
- llm_security_scanner-0.1.0/tests/test_sarif.py +213 -0
- llm_security_scanner-0.1.0/tests/test_scanner.py +257 -0
- llm_security_scanner-0.1.0/tests/test_suppressions.py +136 -0
- llm_security_scanner-0.1.0/tests/test_targets.py +173 -0
- llm_security_scanner-0.1.0/tests/test_trend.py +101 -0
- llm_security_scanner-0.1.0/uv.lock +1562 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Bug report
|
|
3
|
+
about: Something doesn't work as expected
|
|
4
|
+
labels: bug
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
**Command run** (redact any secrets/API keys)
|
|
8
|
+
```
|
|
9
|
+
llm-scanner --target ... --target-type ... --judge-model ...
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
**Expected behavior**
|
|
13
|
+
|
|
14
|
+
**Actual behavior**
|
|
15
|
+
|
|
16
|
+
**Environment**
|
|
17
|
+
- OS:
|
|
18
|
+
- Python version:
|
|
19
|
+
- `llm-scanner` version / commit:
|
|
20
|
+
- Ollama version:
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
name: 'LLM Security Scanner'
|
|
2
|
+
description: 'Automated OWASP Top 10 for LLMs security testing — 46+ attack payloads, local Ollama judge, fully offline.'
|
|
3
|
+
author: 'Konrad Malinowski'
|
|
4
|
+
|
|
5
|
+
inputs:
|
|
6
|
+
target:
|
|
7
|
+
description: 'Target URL (HTTP endpoint) or Ollama model name to test'
|
|
8
|
+
required: false
|
|
9
|
+
target-type:
|
|
10
|
+
description: 'Type of target: url or ollama'
|
|
11
|
+
required: false
|
|
12
|
+
default: 'url'
|
|
13
|
+
judge-model:
|
|
14
|
+
description: 'Local Ollama model used as the AI evaluator'
|
|
15
|
+
required: false
|
|
16
|
+
default: 'llama3.2:3b'
|
|
17
|
+
api-key:
|
|
18
|
+
description: 'Bearer token for authenticated endpoints (never logged)'
|
|
19
|
+
required: false
|
|
20
|
+
default: ''
|
|
21
|
+
severity:
|
|
22
|
+
description: 'Minimum severity filter: critical, high, medium, low, info. Empty = all.'
|
|
23
|
+
required: false
|
|
24
|
+
default: ''
|
|
25
|
+
categories:
|
|
26
|
+
description: 'Comma-separated OWASP categories (e.g. LLM01,LLM07). Empty = all except LLM10.'
|
|
27
|
+
required: false
|
|
28
|
+
default: ''
|
|
29
|
+
include-dos-tests:
|
|
30
|
+
description: 'Include LLM10 Unbounded Consumption probes. Only use against targets you own.'
|
|
31
|
+
required: false
|
|
32
|
+
default: 'false'
|
|
33
|
+
output-dir:
|
|
34
|
+
description: 'Directory for saved report files'
|
|
35
|
+
required: false
|
|
36
|
+
default: 'llm-scan-reports'
|
|
37
|
+
fail-on-score:
|
|
38
|
+
description: 'Fail the workflow when risk score is greater than or equal to this threshold. Empty = never fail by score.'
|
|
39
|
+
required: false
|
|
40
|
+
default: ''
|
|
41
|
+
config:
|
|
42
|
+
description: 'Optional llm-scan.yml config path. Explicit action inputs override config values.'
|
|
43
|
+
required: false
|
|
44
|
+
default: ''
|
|
45
|
+
|
|
46
|
+
outputs:
|
|
47
|
+
report-html:
|
|
48
|
+
description: 'Path to the generated HTML report'
|
|
49
|
+
value: ${{ steps.scan.outputs.report-html }}
|
|
50
|
+
risk-score:
|
|
51
|
+
description: 'Numeric risk score (0.0–10.0)'
|
|
52
|
+
value: ${{ steps.scan.outputs.risk-score }}
|
|
53
|
+
|
|
54
|
+
runs:
|
|
55
|
+
using: 'composite'
|
|
56
|
+
steps:
|
|
57
|
+
- name: Set up Python
|
|
58
|
+
uses: actions/setup-python@v5
|
|
59
|
+
with:
|
|
60
|
+
python-version: '3.11'
|
|
61
|
+
|
|
62
|
+
- name: Set up uv
|
|
63
|
+
uses: astral-sh/setup-uv@v3
|
|
64
|
+
|
|
65
|
+
- name: Install llm-security-scanner
|
|
66
|
+
shell: bash
|
|
67
|
+
run: |
|
|
68
|
+
uv pip install --system -e "${{ github.action_path }}/../../.."
|
|
69
|
+
|
|
70
|
+
- name: Install Ollama
|
|
71
|
+
shell: bash
|
|
72
|
+
run: |
|
|
73
|
+
curl -fsSL https://ollama.com/install.sh | bash
|
|
74
|
+
|
|
75
|
+
- name: Start Ollama and pull judge model
|
|
76
|
+
shell: bash
|
|
77
|
+
run: |
|
|
78
|
+
ollama serve &
|
|
79
|
+
echo "Waiting for Ollama to be ready..."
|
|
80
|
+
for i in $(seq 1 30); do
|
|
81
|
+
if curl -sf http://localhost:11434/api/version >/dev/null 2>&1; then
|
|
82
|
+
echo "Ollama ready after $((i * 2))s"
|
|
83
|
+
break
|
|
84
|
+
fi
|
|
85
|
+
sleep 2
|
|
86
|
+
done
|
|
87
|
+
ollama pull "${{ inputs.judge-model }}"
|
|
88
|
+
|
|
89
|
+
- name: Run LLM security scan
|
|
90
|
+
id: scan
|
|
91
|
+
shell: bash
|
|
92
|
+
run: |
|
|
93
|
+
set -e
|
|
94
|
+
mkdir -p "${{ inputs.output-dir }}"
|
|
95
|
+
|
|
96
|
+
ARGS=()
|
|
97
|
+
|
|
98
|
+
[ -n "${{ inputs.config }}" ] && ARGS+=(--config "${{ inputs.config }}")
|
|
99
|
+
[ -n "${{ inputs.target }}" ] && ARGS+=(--target "${{ inputs.target }}")
|
|
100
|
+
[ -n "${{ inputs.target-type }}" ] && ARGS+=(--target-type "${{ inputs.target-type }}")
|
|
101
|
+
[ -n "${{ inputs.judge-model }}" ] && ARGS+=(--judge-model "${{ inputs.judge-model }}")
|
|
102
|
+
|
|
103
|
+
ARGS+=(--format json,html,sarif --output-dir "${{ inputs.output-dir }}")
|
|
104
|
+
|
|
105
|
+
[ -n "${{ inputs.api-key }}" ] && ARGS+=(--api-key "${{ inputs.api-key }}")
|
|
106
|
+
[ -n "${{ inputs.severity }}" ] && ARGS+=(--severity "${{ inputs.severity }}")
|
|
107
|
+
[ -n "${{ inputs.categories }}" ] && ARGS+=(--categories "${{ inputs.categories }}")
|
|
108
|
+
[ -n "${{ inputs.fail-on-score }}" ] && ARGS+=(--fail-on-score "${{ inputs.fail-on-score }}")
|
|
109
|
+
[ "${{ inputs.include-dos-tests }}" = "true" ] && ARGS+=(--include-dos-tests)
|
|
110
|
+
|
|
111
|
+
set +e
|
|
112
|
+
llm-scanner "${ARGS[@]}"
|
|
113
|
+
SCAN_RC=$?
|
|
114
|
+
set -e
|
|
115
|
+
|
|
116
|
+
HTML=$(find "${{ inputs.output-dir }}" -name report.html -print | head -1 || true)
|
|
117
|
+
echo "report-html=${HTML}" >> "$GITHUB_OUTPUT"
|
|
118
|
+
|
|
119
|
+
JSON=$(find "${{ inputs.output-dir }}" -name report.json -print | head -1 || true)
|
|
120
|
+
if [ -n "$JSON" ]; then
|
|
121
|
+
SCORE=$(python -c 'import json,sys; print(json.load(open(sys.argv[1])).get("risk_score", ""))' "$JSON" || true)
|
|
122
|
+
echo "risk-score=${SCORE}" >> "$GITHUB_OUTPUT"
|
|
123
|
+
fi
|
|
124
|
+
|
|
125
|
+
exit "$SCAN_RC"
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
name: LLM Security Scan
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request:
|
|
5
|
+
workflow_dispatch:
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
llm-security-scan:
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
timeout-minutes: 60 # LLM inference can be slow on shared runners
|
|
11
|
+
env:
|
|
12
|
+
LLM_ENDPOINT: ${{ vars.LLM_ENDPOINT }}
|
|
13
|
+
LLM_JUDGE_MODEL: ${{ vars.LLM_JUDGE_MODEL || 'llama3.2:3b' }}
|
|
14
|
+
LLM_FAIL_ON_SCORE: ${{ vars.LLM_FAIL_ON_SCORE || '7.0' }}
|
|
15
|
+
LLM_SEVERITY: ${{ vars.LLM_SEVERITY }}
|
|
16
|
+
LLM_CATEGORIES: ${{ vars.LLM_CATEGORIES }}
|
|
17
|
+
LLM_INCLUDE_DOS_TESTS: ${{ vars.LLM_INCLUDE_DOS_TESTS || 'false' }}
|
|
18
|
+
|
|
19
|
+
steps:
|
|
20
|
+
- uses: actions/checkout@v4
|
|
21
|
+
|
|
22
|
+
- uses: actions/setup-python@v5
|
|
23
|
+
with:
|
|
24
|
+
python-version: '3.11'
|
|
25
|
+
|
|
26
|
+
- name: Install uv
|
|
27
|
+
run: pip install uv
|
|
28
|
+
|
|
29
|
+
- name: Install scanner
|
|
30
|
+
run: uv pip install -e . --system
|
|
31
|
+
|
|
32
|
+
- name: Install Ollama
|
|
33
|
+
run: curl -fsSL https://ollama.com/install.sh | bash
|
|
34
|
+
|
|
35
|
+
- name: Start Ollama and pull models
|
|
36
|
+
run: |
|
|
37
|
+
ollama serve &
|
|
38
|
+
# Wait for Ollama HTTP server to be ready (up to 15 * 2s = 30s)
|
|
39
|
+
for i in $(seq 1 15); do
|
|
40
|
+
if curl -s http://localhost:11434 > /dev/null; then break; fi
|
|
41
|
+
sleep 2
|
|
42
|
+
done
|
|
43
|
+
# llama3.2:3b Q4 ~2GB RAM fits within ubuntu-latest 7GB runner limit
|
|
44
|
+
# do not use 7B+ models (llama3:7b ~4GB, llama3.1:8b ~5GB) -- risk OOM
|
|
45
|
+
ollama pull "$LLM_JUDGE_MODEL"
|
|
46
|
+
|
|
47
|
+
- name: Validate workflow variables
|
|
48
|
+
run: |
|
|
49
|
+
test -n "$LLM_ENDPOINT" || {
|
|
50
|
+
echo "Set repository variable LLM_ENDPOINT to the URL reachable from this job, for example http://localhost:5000/chat" >&2
|
|
51
|
+
exit 1
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
- name: Run security scan
|
|
55
|
+
uses: ./.github/actions/llm-scan
|
|
56
|
+
with:
|
|
57
|
+
target: ${{ env.LLM_ENDPOINT }}
|
|
58
|
+
target-type: url
|
|
59
|
+
judge-model: ${{ env.LLM_JUDGE_MODEL }}
|
|
60
|
+
severity: ${{ env.LLM_SEVERITY }}
|
|
61
|
+
categories: ${{ env.LLM_CATEGORIES }}
|
|
62
|
+
include-dos-tests: ${{ env.LLM_INCLUDE_DOS_TESTS }}
|
|
63
|
+
fail-on-score: ${{ env.LLM_FAIL_ON_SCORE }}
|
|
64
|
+
output-dir: ./reports
|
|
65
|
+
|
|
66
|
+
- name: Upload SARIF to GitHub Security
|
|
67
|
+
uses: github/codeql-action/upload-sarif@v3
|
|
68
|
+
if: always() # upload even if scan found vulnerabilities (exit code 1)
|
|
69
|
+
with:
|
|
70
|
+
sarif_file: ./reports/
|
|
71
|
+
category: llm-security
|
|
72
|
+
|
|
73
|
+
- name: Upload reports as artifact
|
|
74
|
+
uses: actions/upload-artifact@v4
|
|
75
|
+
if: always() # preserve reports even on scan failure
|
|
76
|
+
with:
|
|
77
|
+
name: llm-scan-reports
|
|
78
|
+
path: ./reports/
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
name: Release to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*.*.*"
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
build:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
steps:
|
|
12
|
+
- uses: actions/checkout@v4
|
|
13
|
+
|
|
14
|
+
- uses: actions/setup-python@v5
|
|
15
|
+
with:
|
|
16
|
+
python-version: "3.11"
|
|
17
|
+
|
|
18
|
+
- name: Build sdist and wheel
|
|
19
|
+
run: |
|
|
20
|
+
pip install --no-cache-dir build
|
|
21
|
+
python -m build
|
|
22
|
+
|
|
23
|
+
- name: Verify tag matches package version
|
|
24
|
+
run: |
|
|
25
|
+
TAG_VERSION="${GITHUB_REF_NAME#v}"
|
|
26
|
+
PKG_VERSION="$(python -c 'import tomllib; print(tomllib.load(open("pyproject.toml", "rb"))["project"]["version"])')"
|
|
27
|
+
if [ "$TAG_VERSION" != "$PKG_VERSION" ]; then
|
|
28
|
+
echo "Tag $GITHUB_REF_NAME does not match pyproject.toml version $PKG_VERSION" >&2
|
|
29
|
+
exit 1
|
|
30
|
+
fi
|
|
31
|
+
|
|
32
|
+
- uses: actions/upload-artifact@v4
|
|
33
|
+
with:
|
|
34
|
+
name: dist
|
|
35
|
+
path: dist/
|
|
36
|
+
|
|
37
|
+
publish:
|
|
38
|
+
needs: build
|
|
39
|
+
runs-on: ubuntu-latest
|
|
40
|
+
environment: pypi
|
|
41
|
+
permissions:
|
|
42
|
+
id-token: write # required for PyPI trusted publishing (OIDC) — no API token secret needed
|
|
43
|
+
steps:
|
|
44
|
+
- uses: actions/download-artifact@v4
|
|
45
|
+
with:
|
|
46
|
+
name: dist
|
|
47
|
+
path: dist/
|
|
48
|
+
|
|
49
|
+
- name: Publish to PyPI
|
|
50
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
Releases are tagged on GitHub (`vX.Y.Z`); this file summarizes what changed in each one. Tagging `vX.Y.Z` (matching the version in `pyproject.toml`) triggers `.github/workflows/release.yml`, which publishes to PyPI via trusted publishing. Until the first release ships, install via `pip install git+https://github.com/konradxmalinowski/llm-security-scanner`.
|
|
4
|
+
|
|
5
|
+
## Unreleased
|
|
6
|
+
|
|
7
|
+
- Split the hosted landing page / premium service out into a private repo (`llm-security-scanner-saas`). This repo is now scan-engine-only.
|
|
8
|
+
- Added `LICENSE` (MIT), `CONTRIBUTING.md`, issue/PR templates.
|
|
9
|
+
- Fixed example CI/CD pipelines (`examples/github/`, `examples/gitlab/`): Docker variants no longer try to build the scanner image from the consumer's own checkout (they had no Dockerfile to build from); GitLab variants no longer ship an environment-specific `LLM_ENDPOINT`/`OLLAMA_HOST` default that silently defeated the "did you configure this?" check; GitLab pipelines now only trigger on merge requests/manual runs instead of every push, matching the GitHub examples.
|
|
10
|
+
- Added `.github/workflows/release.yml`: tag-triggered PyPI publish via trusted publishing (OIDC, no stored API token). Docker CI/CD examples now build a minimal `pip install llm-security-scanner` image instead of cloning this repo's full source — requires the first PyPI release to exist (see `docs/FUNCTIONALITY.md` for the one-time pending-publisher setup).
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
<!-- GSD:project-start source:PROJECT.md -->
|
|
2
|
+
|
|
3
|
+
## Project
|
|
4
|
+
|
|
5
|
+
**LLM Security Scanner**
|
|
6
|
+
|
|
7
|
+
A Python CLI tool that tests LLM-based applications against the full OWASP Top 10 for LLMs vulnerability framework. It accepts either an HTTP endpoint or a local Ollama model as a target, runs a battery of ~50 automated attacks, and uses a second Ollama model as an AI Judge to evaluate each result — producing reports in terminal, Markdown, JSON, and HTML formats.
|
|
8
|
+
|
|
9
|
+
**Core Value:** Give security engineers a fully offline, one-command tool to test local LLM endpoints and local models against all 10 OWASP LLM attack categories — no cloud dependencies, no manual analysis.
|
|
10
|
+
|
|
11
|
+
### Constraints
|
|
12
|
+
|
|
13
|
+
- **Tech Stack**: Python 3.11+, httpx, ollama SDK, rich, Pydantic v2, jinja2, pytest — no deviations from spec
|
|
14
|
+
- **Offline-first**: All judge inference via local Ollama — no calls to OpenAI/Anthropic/etc.
|
|
15
|
+
- **Payload format**: YAML files per OWASP category; each payload must have `id`, `name`, `payload`, `judge_criteria`
|
|
16
|
+
- **Judge contract**: Must return parseable JSON `{"success": bool, "reasoning": str}` — parser must handle malformed output gracefully
|
|
17
|
+
- **OWASP coverage**: All 10 categories (LLM01–LLM10) must have at least 4 payloads each to reach ≥40 total
|
|
18
|
+
|
|
19
|
+
<!-- GSD:project-end -->
|
|
20
|
+
|
|
21
|
+
<!-- GSD:stack-start source:research/STACK.md -->
|
|
22
|
+
|
|
23
|
+
## Technology Stack
|
|
24
|
+
|
|
25
|
+
## Recommended Stack
|
|
26
|
+
|
|
27
|
+
### Core Runtime
|
|
28
|
+
|
|
29
|
+
| Technology | Version | Purpose | Why |
|
|
30
|
+
|------------|---------|---------|-----|
|
|
31
|
+
| Python | 3.11+ | Runtime | `asyncio.TaskGroup` (3.11+) for parallel attack dispatch; modern `match` for pattern-matching judge output; `tomllib` stdlib |
|
|
32
|
+
| uv | latest | Package/venv manager | Replaces pip+venv; lockfile-native; `uv run` executes CLI without install |
|
|
33
|
+
|
|
34
|
+
### HTTP Client (Attack Delivery)
|
|
35
|
+
|
|
36
|
+
| Technology | Version | Purpose | Why |
|
|
37
|
+
|------------|---------|---------|-----|
|
|
38
|
+
| httpx | 0.28.1 | Async HTTP to URL targets | Sync+async parity (same API surface); native `AsyncClient`; fine-grained `Timeout(connect=5, read=120)` for slow LLM responses; `HTTPTransport(retries=N)` for flaky endpoints. Outperforms aiohttp on DX and error handling. |
|
|
39
|
+
|
|
40
|
+
### Ollama SDK (Local Model Inference)
|
|
41
|
+
|
|
42
|
+
| Technology | Version | Purpose | Why |
|
|
43
|
+
|------------|---------|---------|-----|
|
|
44
|
+
| ollama | 0.6.2 | Chat with local models | Official SDK; `AsyncClient` mirrors httpx's async model; structured output via Pydantic schema; `format='json'` for judge responses |
|
|
45
|
+
|
|
46
|
+
- `chat()` accepts a `messages` list — supports system prompt + user turn in one call, which is exactly the judge pattern: `[{"role": "system", "content": judge_prompt}, {"role": "user", "content": f"Payload: {p}\nResponse: {r}"}]`
|
|
47
|
+
- `generate()` is for single-prompt completion without conversation history — appropriate only for raw completions, not structured judge evaluation
|
|
48
|
+
- `chat()` with `format=JudgeOutput.model_json_schema()` forces structured JSON output natively (Ollama >= 0.5.0 supports Pydantic schema in `format`)
|
|
49
|
+
|
|
50
|
+
### Terminal UI
|
|
51
|
+
|
|
52
|
+
| Technology | Version | Purpose | Why |
|
|
53
|
+
|------------|---------|---------|-----|
|
|
54
|
+
| rich | 15.0.0 | Progress bars, colored tables, console output | Industry standard for Python CLI UX; `Progress` context manager with `track()` is idiomatic; `Table` renders colored severity columns without extra dependencies |
|
|
55
|
+
|
|
56
|
+
### Data Models
|
|
57
|
+
|
|
58
|
+
| Technology | Version | Purpose | Why |
|
|
59
|
+
|------------|---------|---------|-----|
|
|
60
|
+
| pydantic | 2.13.4 | AttackResult, ScanReport, Severity models | v2 is 10–20x faster than v1; `model_dump_json()` outputs JSON directly; `ConfigDict(extra='forbid')` catches YAML schema drift; enum serialization in JSON mode outputs values not members |
|
|
61
|
+
|
|
62
|
+
### Template Engine
|
|
63
|
+
|
|
64
|
+
| Technology | Version | Purpose | Why |
|
|
65
|
+
|------------|---------|---------|-----|
|
|
66
|
+
| jinja2 | 3.1.6 | HTML report generation | Standard for Python templating; `FileSystemLoader` loads from `templates/` dir; auto-escaping via `Environment(autoescape=True)` mandatory for security tools (prevents XSS in report if payloads contain HTML) |
|
|
67
|
+
|
|
68
|
+
### YAML Payload Loader
|
|
69
|
+
|
|
70
|
+
| Technology | Version | Purpose | Why |
|
|
71
|
+
|------------|---------|---------|-----|
|
|
72
|
+
| PyYAML | 6.0.3 | Loading payload YAML files | Sufficient for read-only structured data; simpler API than ruamel.yaml; always use `yaml.safe_load()` |
|
|
73
|
+
|
|
74
|
+
### CLI Framework
|
|
75
|
+
|
|
76
|
+
| Technology | Version | Purpose | Why |
|
|
77
|
+
|------------|---------|---------|-----|
|
|
78
|
+
| argparse | stdlib | CLI argument parsing | Per project spec; zero extra dependencies; sufficient for flat argument surface (~10 flags, no subcommands); security-tool users expect POSIX-standard `--flag value` syntax |
|
|
79
|
+
|
|
80
|
+
- **Click (38.7% market share in 2025):** Better DX for complex CLIs with subcommands, but adds a dependency. Unnecessary here — no subcommands, no command groups.
|
|
81
|
+
- **Typer:** Adds Click as transitive dep; designed for type-hint-native CLIs. Overkill for a single-command tool. Also 14ms startup vs argparse 18ms (negligible, but argparse wins on dep count).
|
|
82
|
+
- **argparse wins here because:** flat single-command surface; zero deps; pip-installable by security teams in air-gapped environments without extra packages.
|
|
83
|
+
|
|
84
|
+
### Testing
|
|
85
|
+
|
|
86
|
+
| Technology | Version | Purpose | Why |
|
|
87
|
+
|------------|---------|---------|-----|
|
|
88
|
+
| pytest | 9.1.1 | Test runner | Standard |
|
|
89
|
+
| pytest-asyncio | 1.4.0 | Async test support | Required for testing `AsyncClient` judge and httpx async scanner; `asyncio_mode = "auto"` eliminates decorator noise |
|
|
90
|
+
|
|
91
|
+
# No decorator needed in auto mode
|
|
92
|
+
|
|
93
|
+
### Linter / Formatter
|
|
94
|
+
|
|
95
|
+
| Technology | Version | Purpose | Why |
|
|
96
|
+
|------------|---------|---------|-----|
|
|
97
|
+
| ruff | 0.15.20 | Linting + formatting | Replaces flake8 + black + isort in one tool; 100x faster; S-series rules (flake8-bandit) flag unsafe YAML, subprocess calls, hardcoded secrets — exactly what a security tool should self-enforce |
|
|
98
|
+
|
|
99
|
+
- `S506`: `yaml.load()` without `Loader=yaml.SafeLoader` → force `yaml.safe_load()`
|
|
100
|
+
- `S701`: Jinja2 `Environment(autoescape=False)` → force `autoescape=True`
|
|
101
|
+
- `S105`/`S106`: hardcoded API keys/passwords in source
|
|
102
|
+
- `S113`: requests without timeout (not applicable with httpx, but good discipline)
|
|
103
|
+
|
|
104
|
+
## Dependency Summary
|
|
105
|
+
|
|
106
|
+
### Production (`[project.dependencies]`)
|
|
107
|
+
|
|
108
|
+
### Dev (`[project.optional-dependencies]`)
|
|
109
|
+
|
|
110
|
+
## Alternatives Explicitly Rejected
|
|
111
|
+
|
|
112
|
+
| Category | Recommended | Alternative | Why Rejected |
|
|
113
|
+
|----------|-------------|-------------|--------------|
|
|
114
|
+
| HTTP client | httpx | aiohttp | aiohttp's session lifecycle is more verbose; no sync parity; DX inferior |
|
|
115
|
+
| HTTP client | httpx | requests | Blocking — incompatible with async attack loop |
|
|
116
|
+
| CLI framework | argparse | Click | Adds dependency; subcommands not needed |
|
|
117
|
+
| CLI framework | argparse | Typer | Click transitive dep; startup overhead; overkill for flat CLI |
|
|
118
|
+
| Models | pydantic v2 | dataclasses | No JSON serialization; no validation; no schema generation for Ollama `format=` |
|
|
119
|
+
| YAML | PyYAML | ruamel.yaml | Needed only for round-trip write-back (preserving comments); payload files are read-only |
|
|
120
|
+
| YAML | PyYAML | StrictYAML | No standard pip install; unnecessary for internal authored payload files |
|
|
121
|
+
| Linting | ruff | flake8+black+isort | Three tools replaced by one; ruff is 100x faster |
|
|
122
|
+
| Judge inference | ollama.chat() | ollama.generate() | generate() lacks system/user turn structure for structured evaluation |
|
|
123
|
+
| Jinja2 autoescape | autoescape=True | autoescape=False | Attack payloads contain HTML/JS; False creates XSS in rendered reports; Ruff S701 flags it |
|
|
124
|
+
|
|
125
|
+
## Sources
|
|
126
|
+
|
|
127
|
+
- httpx async client: https://www.python-httpx.org/advanced/timeouts — HIGH confidence (official docs via Context7)
|
|
128
|
+
- Ollama Python SDK: https://github.com/ollama/ollama-python/blob/main/README.md — HIGH confidence (official SDK via Context7)
|
|
129
|
+
- Ollama chat vs generate: https://context7.com/ollama/ollama-python/llms.txt — HIGH confidence (official docs)
|
|
130
|
+
- Rich tables and progress: https://rich.readthedocs.io/en/stable/tables.html — HIGH confidence (official docs via Context7)
|
|
131
|
+
- Pydantic v2 serialization: https://github.com/pydantic/pydantic/blob/main/docs/concepts/serialization.md — HIGH confidence (official docs via Context7)
|
|
132
|
+
- pytest-asyncio auto mode: https://pytest-asyncio.readthedocs.io/en/stable/concepts.html — HIGH confidence (official docs via Context7)
|
|
133
|
+
- Ruff S-series (bandit) rules: https://docs.astral.sh/ruff/rules — HIGH confidence (official docs via Context7)
|
|
134
|
+
- PyYAML vs ruamel.yaml: https://yaml.dev/doc/ruamel.yaml/pyyaml/ — MEDIUM confidence (official ruamel docs, cross-checked with PyYAML docs)
|
|
135
|
+
- Jinja2 FileSystemLoader: https://jinja.palletsprojects.com/en/stable/api — HIGH confidence (official docs via Context7)
|
|
136
|
+
- argparse vs Click vs Typer: https://dasroot.net/posts/2025/12/building-cli-tools-python-click-typer-argparse/ — MEDIUM confidence (third-party article, consistent with project spec decision)
|
|
137
|
+
- Package versions: PyPI index (pip index versions), confirmed 2026-06-25
|
|
138
|
+
|
|
139
|
+
<!-- GSD:stack-end -->
|
|
140
|
+
|
|
141
|
+
<!-- GSD:conventions-start source:CONVENTIONS.md -->
|
|
142
|
+
|
|
143
|
+
## Conventions
|
|
144
|
+
|
|
145
|
+
Conventions not yet established. Will populate as patterns emerge during development.
|
|
146
|
+
<!-- GSD:conventions-end -->
|
|
147
|
+
|
|
148
|
+
<!-- GSD:architecture-start source:ARCHITECTURE.md -->
|
|
149
|
+
|
|
150
|
+
## Architecture
|
|
151
|
+
|
|
152
|
+
Architecture not yet mapped. Follow existing patterns found in the codebase.
|
|
153
|
+
<!-- GSD:architecture-end -->
|
|
154
|
+
|
|
155
|
+
<!-- GSD:skills-start source:skills/ -->
|
|
156
|
+
|
|
157
|
+
## Project Skills
|
|
158
|
+
|
|
159
|
+
No project skills found. Add skills to any of: `.claude/skills/`, `.agents/skills/`, `.cursor/skills/`, `.github/skills/`, or `.codex/skills/` with a `SKILL.md` index file.
|
|
160
|
+
<!-- GSD:skills-end -->
|
|
161
|
+
|
|
162
|
+
<!-- GSD:workflow-start source:GSD defaults -->
|
|
163
|
+
|
|
164
|
+
## GSD Workflow Enforcement
|
|
165
|
+
|
|
166
|
+
Before using Edit, Write, or other file-changing tools, start work through a GSD command so planning artifacts and execution context stay in sync.
|
|
167
|
+
|
|
168
|
+
Use these entry points:
|
|
169
|
+
|
|
170
|
+
- `/gsd-quick` for small fixes, doc updates, and ad-hoc tasks
|
|
171
|
+
- `/gsd-debug` for investigation and bug fixing
|
|
172
|
+
- `/gsd-execute-phase` for planned phase work
|
|
173
|
+
|
|
174
|
+
Do not make direct repo edits outside a GSD workflow unless the user explicitly asks to bypass it.
|
|
175
|
+
<!-- GSD:workflow-end -->
|
|
176
|
+
|
|
177
|
+
<!-- GSD:profile-start -->
|
|
178
|
+
|
|
179
|
+
## Developer Profile
|
|
180
|
+
|
|
181
|
+
> Profile not yet configured. Run `/gsd-profile-user` to generate your developer profile.
|
|
182
|
+
> This section is managed by `generate-claude-profile` -- do not edit manually.
|
|
183
|
+
<!-- GSD:profile-end -->
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Contributing
|
|
2
|
+
|
|
3
|
+
Thanks for considering a contribution to LLM Security Scanner.
|
|
4
|
+
|
|
5
|
+
## Setup
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
uv pip install -e ".[dev]"
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Before opening a PR
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
uv run pytest
|
|
15
|
+
uv run ruff check .
|
|
16
|
+
uv run ruff format .
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
- Keep payload YAML files (`payloads/`) matching the existing schema: `id`, `name`, `payload`, `judge_criteria`, `severity`.
|
|
20
|
+
- New payloads should map to a specific OWASP LLM Top 10 (2025) category — see `docs/FUNCTIONALITY.md` for the current category list.
|
|
21
|
+
- `ruff` runs with `flake8-bandit` (`S` rules) enabled — don't disable these without a clear reason in the PR description (e.g. `yaml.safe_load` and `Environment(autoescape=True)` are enforced project-wide for security reasons).
|
|
22
|
+
|
|
23
|
+
## Reporting bugs / requesting features
|
|
24
|
+
|
|
25
|
+
Open a GitHub issue using the provided templates. Include reproduction steps (target type, judge model, relevant flags) for bugs.
|
|
26
|
+
|
|
27
|
+
## Scope
|
|
28
|
+
|
|
29
|
+
This repo is the open-source scan engine and CLI only. Hosted/premium features are out of scope here — see `docs/FUNCTIONALITY.md` for what belongs where.
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
FROM python:3.11-slim
|
|
2
|
+
|
|
3
|
+
ENV PYTHONDONTWRITEBYTECODE=1 \
|
|
4
|
+
PYTHONUNBUFFERED=1
|
|
5
|
+
|
|
6
|
+
WORKDIR /app
|
|
7
|
+
|
|
8
|
+
RUN apt-get update \
|
|
9
|
+
&& apt-get install -y --no-install-recommends curl ca-certificates \
|
|
10
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
11
|
+
|
|
12
|
+
COPY pyproject.toml README.md ./
|
|
13
|
+
COPY src ./src
|
|
14
|
+
COPY payloads ./payloads
|
|
15
|
+
|
|
16
|
+
RUN pip install --no-cache-dir .
|
|
17
|
+
|
|
18
|
+
ENTRYPOINT ["llm-scanner"]
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Konrad Malinowski
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|