llm-security-scanner 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. llm_security_scanner-0.1.0/.dockerignore +14 -0
  2. llm_security_scanner-0.1.0/.github/ISSUE_TEMPLATE/bug_report.md +20 -0
  3. llm_security_scanner-0.1.0/.github/ISSUE_TEMPLATE/feature_request.md +11 -0
  4. llm_security_scanner-0.1.0/.github/PULL_REQUEST_TEMPLATE.md +8 -0
  5. llm_security_scanner-0.1.0/.github/actions/llm-scan/action.yml +125 -0
  6. llm_security_scanner-0.1.0/.github/workflows/llm-scan.yml +78 -0
  7. llm_security_scanner-0.1.0/.github/workflows/release.yml +50 -0
  8. llm_security_scanner-0.1.0/.gitignore +11 -0
  9. llm_security_scanner-0.1.0/CHANGELOG.md +10 -0
  10. llm_security_scanner-0.1.0/CLAUDE.md +183 -0
  11. llm_security_scanner-0.1.0/CONTRIBUTING.md +29 -0
  12. llm_security_scanner-0.1.0/Dockerfile +18 -0
  13. llm_security_scanner-0.1.0/LICENSE +21 -0
  14. llm_security_scanner-0.1.0/PKG-INFO +540 -0
  15. llm_security_scanner-0.1.0/README.md +516 -0
  16. llm_security_scanner-0.1.0/RUNBOOK.md +513 -0
  17. llm_security_scanner-0.1.0/demo/chatbot_openai_app.py +83 -0
  18. llm_security_scanner-0.1.0/demo/vulnerable_app.py +65 -0
  19. llm_security_scanner-0.1.0/docs/FUNCTIONALITY.md +72 -0
  20. llm_security_scanner-0.1.0/examples/README.md +63 -0
  21. llm_security_scanner-0.1.0/examples/config/ci-url.yml +15 -0
  22. llm_security_scanner-0.1.0/examples/config/local-url.yml +15 -0
  23. llm_security_scanner-0.1.0/examples/config/ollama-target.yml +13 -0
  24. llm_security_scanner-0.1.0/examples/config/public-url.yml +15 -0
  25. llm_security_scanner-0.1.0/examples/docker/docker-compose.local.yml +36 -0
  26. llm_security_scanner-0.1.0/examples/github/llm-security.docker.yml +71 -0
  27. llm_security_scanner-0.1.0/examples/github/llm-security.yml +59 -0
  28. llm_security_scanner-0.1.0/examples/gitlab/llm-security.gitlab-ci.docker.yml +76 -0
  29. llm_security_scanner-0.1.0/examples/gitlab/llm-security.gitlab-ci.yml +60 -0
  30. llm_security_scanner-0.1.0/examples/llm-scan.yml +22 -0
  31. llm_security_scanner-0.1.0/payloads/extended/llm10_extended.yaml +41 -0
  32. llm_security_scanner-0.1.0/payloads/llm01_prompt_injection.yaml +44 -0
  33. llm_security_scanner-0.1.0/payloads/llm02_sensitive_info_disclosure.yaml +39 -0
  34. llm_security_scanner-0.1.0/payloads/llm03_supply_chain.yaml +37 -0
  35. llm_security_scanner-0.1.0/payloads/llm04_data_model_poisoning.yaml +39 -0
  36. llm_security_scanner-0.1.0/payloads/llm05_improper_output_handling.yaml +40 -0
  37. llm_security_scanner-0.1.0/payloads/llm06_excessive_agency.yaml +38 -0
  38. llm_security_scanner-0.1.0/payloads/llm07_system_prompt_leakage.yaml +39 -0
  39. llm_security_scanner-0.1.0/payloads/llm08_vector_embedding_weaknesses.yaml +40 -0
  40. llm_security_scanner-0.1.0/payloads/llm09_misinformation.yaml +41 -0
  41. llm_security_scanner-0.1.0/payloads/llm10_unbounded_consumption.yaml +41 -0
  42. llm_security_scanner-0.1.0/pyproject.toml +69 -0
  43. llm_security_scanner-0.1.0/src/llm_scanner/__init__.py +1 -0
  44. llm_security_scanner-0.1.0/src/llm_scanner/baselines/__init__.py +90 -0
  45. llm_security_scanner-0.1.0/src/llm_scanner/cli.py +643 -0
  46. llm_security_scanner-0.1.0/src/llm_scanner/judge/__init__.py +3 -0
  47. llm_security_scanner-0.1.0/src/llm_scanner/judge/ollama_judge.py +200 -0
  48. llm_security_scanner-0.1.0/src/llm_scanner/models.py +118 -0
  49. llm_security_scanner-0.1.0/src/llm_scanner/payloads/__init__.py +1 -0
  50. llm_security_scanner-0.1.0/src/llm_scanner/payloads/loader.py +74 -0
  51. llm_security_scanner-0.1.0/src/llm_scanner/preflight.py +118 -0
  52. llm_security_scanner-0.1.0/src/llm_scanner/reporters/__init__.py +38 -0
  53. llm_security_scanner-0.1.0/src/llm_scanner/reporters/html.py +33 -0
  54. llm_security_scanner-0.1.0/src/llm_scanner/reporters/json_reporter.py +15 -0
  55. llm_security_scanner-0.1.0/src/llm_scanner/reporters/markdown.py +47 -0
  56. llm_security_scanner-0.1.0/src/llm_scanner/reporters/sarif.py +106 -0
  57. llm_security_scanner-0.1.0/src/llm_scanner/reporters/text.py +45 -0
  58. llm_security_scanner-0.1.0/src/llm_scanner/reporters/trend.py +71 -0
  59. llm_security_scanner-0.1.0/src/llm_scanner/scanner.py +121 -0
  60. llm_security_scanner-0.1.0/src/llm_scanner/suppressions/__init__.py +104 -0
  61. llm_security_scanner-0.1.0/src/llm_scanner/targets/__init__.py +44 -0
  62. llm_security_scanner-0.1.0/src/llm_scanner/targets/base.py +29 -0
  63. llm_security_scanner-0.1.0/src/llm_scanner/targets/http.py +53 -0
  64. llm_security_scanner-0.1.0/src/llm_scanner/targets/ollama_target.py +46 -0
  65. llm_security_scanner-0.1.0/src/llm_scanner/templates/index.html.j2 +92 -0
  66. llm_security_scanner-0.1.0/src/llm_scanner/templates/report.html.j2 +57 -0
  67. llm_security_scanner-0.1.0/tests/conftest.py +45 -0
  68. llm_security_scanner-0.1.0/tests/test_baseline.py +179 -0
  69. llm_security_scanner-0.1.0/tests/test_cli.py +523 -0
  70. llm_security_scanner-0.1.0/tests/test_judge.py +157 -0
  71. llm_security_scanner-0.1.0/tests/test_loader.py +125 -0
  72. llm_security_scanner-0.1.0/tests/test_models.py +140 -0
  73. llm_security_scanner-0.1.0/tests/test_preflight.py +186 -0
  74. llm_security_scanner-0.1.0/tests/test_reporters.py +312 -0
  75. llm_security_scanner-0.1.0/tests/test_sarif.py +213 -0
  76. llm_security_scanner-0.1.0/tests/test_scanner.py +257 -0
  77. llm_security_scanner-0.1.0/tests/test_suppressions.py +136 -0
  78. llm_security_scanner-0.1.0/tests/test_targets.py +173 -0
  79. llm_security_scanner-0.1.0/tests/test_trend.py +101 -0
  80. llm_security_scanner-0.1.0/uv.lock +1562 -0
@@ -0,0 +1,14 @@
1
+ .git
2
+ .venv
3
+ .pytest_cache
4
+ .ruff_cache
5
+ .idea
6
+ .planning
7
+ .claude
8
+ reports
9
+ __pycache__
10
+ *.pyc
11
+ *.pyo
12
+ *.pyd
13
+ .DS_Store
14
+ commits.txt
@@ -0,0 +1,20 @@
1
+ ---
2
+ name: Bug report
3
+ about: Something doesn't work as expected
4
+ labels: bug
5
+ ---
6
+
7
+ **Command run** (redact any secrets/API keys)
8
+ ```
9
+ llm-scanner --target ... --target-type ... --judge-model ...
10
+ ```
11
+
12
+ **Expected behavior**
13
+
14
+ **Actual behavior**
15
+
16
+ **Environment**
17
+ - OS:
18
+ - Python version:
19
+ - `llm-scanner` version / commit:
20
+ - Ollama version:
@@ -0,0 +1,11 @@
1
+ ---
2
+ name: Feature request
3
+ about: Suggest a new payload, target type, reporter, or CLI flag
4
+ labels: enhancement
5
+ ---
6
+
7
+ **What's missing**
8
+
9
+ **Which OWASP LLM category (if a new payload)**
10
+
11
+ **Why it matters**
@@ -0,0 +1,8 @@
1
+ ## What this changes
2
+
3
+ ## Testing
4
+
5
+ - [ ] `uv run pytest` passes
6
+ - [ ] `uv run ruff check .` passes
7
+
8
+ ## Notes for reviewer
@@ -0,0 +1,125 @@
1
+ name: 'LLM Security Scanner'
2
+ description: 'Automated OWASP Top 10 for LLMs security testing — 46+ attack payloads, local Ollama judge, fully offline.'
3
+ author: 'Konrad Malinowski'
4
+
5
+ inputs:
6
+ target:
7
+ description: 'Target URL (HTTP endpoint) or Ollama model name to test'
8
+ required: false
9
+ target-type:
10
+ description: 'Type of target: url or ollama'
11
+ required: false
12
+ default: 'url'
13
+ judge-model:
14
+ description: 'Local Ollama model used as the AI evaluator'
15
+ required: false
16
+ default: 'llama3.2:3b'
17
+ api-key:
18
+ description: 'Bearer token for authenticated endpoints (never logged)'
19
+ required: false
20
+ default: ''
21
+ severity:
22
+ description: 'Minimum severity filter: critical, high, medium, low, info. Empty = all.'
23
+ required: false
24
+ default: ''
25
+ categories:
26
+ description: 'Comma-separated OWASP categories (e.g. LLM01,LLM07). Empty = all except LLM10.'
27
+ required: false
28
+ default: ''
29
+ include-dos-tests:
30
+ description: 'Include LLM10 Unbounded Consumption probes. Only use against targets you own.'
31
+ required: false
32
+ default: 'false'
33
+ output-dir:
34
+ description: 'Directory for saved report files'
35
+ required: false
36
+ default: 'llm-scan-reports'
37
+ fail-on-score:
38
+ description: 'Fail the workflow when risk score is greater than or equal to this threshold. Empty = never fail by score.'
39
+ required: false
40
+ default: ''
41
+ config:
42
+ description: 'Optional llm-scan.yml config path. Explicit action inputs override config values.'
43
+ required: false
44
+ default: ''
45
+
46
+ outputs:
47
+ report-html:
48
+ description: 'Path to the generated HTML report'
49
+ value: ${{ steps.scan.outputs.report-html }}
50
+ risk-score:
51
+ description: 'Numeric risk score (0.0–10.0)'
52
+ value: ${{ steps.scan.outputs.risk-score }}
53
+
54
+ runs:
55
+ using: 'composite'
56
+ steps:
57
+ - name: Set up Python
58
+ uses: actions/setup-python@v5
59
+ with:
60
+ python-version: '3.11'
61
+
62
+ - name: Set up uv
63
+ uses: astral-sh/setup-uv@v3
64
+
65
+ - name: Install llm-security-scanner
66
+ shell: bash
67
+ run: |
68
+ uv pip install --system -e "${{ github.action_path }}/../../.."
69
+
70
+ - name: Install Ollama
71
+ shell: bash
72
+ run: |
73
+ curl -fsSL https://ollama.com/install.sh | bash
74
+
75
+ - name: Start Ollama and pull judge model
76
+ shell: bash
77
+ run: |
78
+ ollama serve &
79
+ echo "Waiting for Ollama to be ready..."
80
+ for i in $(seq 1 30); do
81
+ if curl -sf http://localhost:11434/api/version >/dev/null 2>&1; then
82
+ echo "Ollama ready after $((i * 2))s"
83
+ break
84
+ fi
85
+ sleep 2
86
+ done
87
+ ollama pull "${{ inputs.judge-model }}"
88
+
89
+ - name: Run LLM security scan
90
+ id: scan
91
+ shell: bash
92
+ run: |
93
+ set -e
94
+ mkdir -p "${{ inputs.output-dir }}"
95
+
96
+ ARGS=()
97
+
98
+ [ -n "${{ inputs.config }}" ] && ARGS+=(--config "${{ inputs.config }}")
99
+ [ -n "${{ inputs.target }}" ] && ARGS+=(--target "${{ inputs.target }}")
100
+ [ -n "${{ inputs.target-type }}" ] && ARGS+=(--target-type "${{ inputs.target-type }}")
101
+ [ -n "${{ inputs.judge-model }}" ] && ARGS+=(--judge-model "${{ inputs.judge-model }}")
102
+
103
+ ARGS+=(--format json,html,sarif --output-dir "${{ inputs.output-dir }}")
104
+
105
+ [ -n "${{ inputs.api-key }}" ] && ARGS+=(--api-key "${{ inputs.api-key }}")
106
+ [ -n "${{ inputs.severity }}" ] && ARGS+=(--severity "${{ inputs.severity }}")
107
+ [ -n "${{ inputs.categories }}" ] && ARGS+=(--categories "${{ inputs.categories }}")
108
+ [ -n "${{ inputs.fail-on-score }}" ] && ARGS+=(--fail-on-score "${{ inputs.fail-on-score }}")
109
+ [ "${{ inputs.include-dos-tests }}" = "true" ] && ARGS+=(--include-dos-tests)
110
+
111
+ set +e
112
+ llm-scanner "${ARGS[@]}"
113
+ SCAN_RC=$?
114
+ set -e
115
+
116
+ HTML=$(find "${{ inputs.output-dir }}" -name report.html -print | head -1 || true)
117
+ echo "report-html=${HTML}" >> "$GITHUB_OUTPUT"
118
+
119
+ JSON=$(find "${{ inputs.output-dir }}" -name report.json -print | head -1 || true)
120
+ if [ -n "$JSON" ]; then
121
+ SCORE=$(python -c 'import json,sys; print(json.load(open(sys.argv[1])).get("risk_score", ""))' "$JSON" || true)
122
+ echo "risk-score=${SCORE}" >> "$GITHUB_OUTPUT"
123
+ fi
124
+
125
+ exit "$SCAN_RC"
@@ -0,0 +1,78 @@
1
+ name: LLM Security Scan
2
+
3
+ on:
4
+ pull_request:
5
+ workflow_dispatch:
6
+
7
+ jobs:
8
+ llm-security-scan:
9
+ runs-on: ubuntu-latest
10
+ timeout-minutes: 60 # LLM inference can be slow on shared runners
11
+ env:
12
+ LLM_ENDPOINT: ${{ vars.LLM_ENDPOINT }}
13
+ LLM_JUDGE_MODEL: ${{ vars.LLM_JUDGE_MODEL || 'llama3.2:3b' }}
14
+ LLM_FAIL_ON_SCORE: ${{ vars.LLM_FAIL_ON_SCORE || '7.0' }}
15
+ LLM_SEVERITY: ${{ vars.LLM_SEVERITY }}
16
+ LLM_CATEGORIES: ${{ vars.LLM_CATEGORIES }}
17
+ LLM_INCLUDE_DOS_TESTS: ${{ vars.LLM_INCLUDE_DOS_TESTS || 'false' }}
18
+
19
+ steps:
20
+ - uses: actions/checkout@v4
21
+
22
+ - uses: actions/setup-python@v5
23
+ with:
24
+ python-version: '3.11'
25
+
26
+ - name: Install uv
27
+ run: pip install uv
28
+
29
+ - name: Install scanner
30
+ run: uv pip install -e . --system
31
+
32
+ - name: Install Ollama
33
+ run: curl -fsSL https://ollama.com/install.sh | bash
34
+
35
+ - name: Start Ollama and pull models
36
+ run: |
37
+ ollama serve &
38
+ # Wait for Ollama HTTP server to be ready (up to 15 * 2s = 30s)
39
+ for i in $(seq 1 15); do
40
+ if curl -s http://localhost:11434 > /dev/null; then break; fi
41
+ sleep 2
42
+ done
43
+ # llama3.2:3b Q4 ~2GB RAM fits within ubuntu-latest 7GB runner limit
44
+ # do not use 7B+ models (llama3:7b ~4GB, llama3.1:8b ~5GB) -- risk OOM
45
+ ollama pull "$LLM_JUDGE_MODEL"
46
+
47
+ - name: Validate workflow variables
48
+ run: |
49
+ test -n "$LLM_ENDPOINT" || {
50
+ echo "Set repository variable LLM_ENDPOINT to the URL reachable from this job, for example http://localhost:5000/chat" >&2
51
+ exit 1
52
+ }
53
+
54
+ - name: Run security scan
55
+ uses: ./.github/actions/llm-scan
56
+ with:
57
+ target: ${{ env.LLM_ENDPOINT }}
58
+ target-type: url
59
+ judge-model: ${{ env.LLM_JUDGE_MODEL }}
60
+ severity: ${{ env.LLM_SEVERITY }}
61
+ categories: ${{ env.LLM_CATEGORIES }}
62
+ include-dos-tests: ${{ env.LLM_INCLUDE_DOS_TESTS }}
63
+ fail-on-score: ${{ env.LLM_FAIL_ON_SCORE }}
64
+ output-dir: ./reports
65
+
66
+ - name: Upload SARIF to GitHub Security
67
+ uses: github/codeql-action/upload-sarif@v3
68
+ if: always() # upload even if scan found vulnerabilities (exit code 1)
69
+ with:
70
+ sarif_file: ./reports/
71
+ category: llm-security
72
+
73
+ - name: Upload reports as artifact
74
+ uses: actions/upload-artifact@v4
75
+ if: always() # preserve reports even on scan failure
76
+ with:
77
+ name: llm-scan-reports
78
+ path: ./reports/
@@ -0,0 +1,50 @@
1
+ name: Release to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*.*.*"
7
+
8
+ jobs:
9
+ build:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+
14
+ - uses: actions/setup-python@v5
15
+ with:
16
+ python-version: "3.11"
17
+
18
+ - name: Build sdist and wheel
19
+ run: |
20
+ pip install --no-cache-dir build
21
+ python -m build
22
+
23
+ - name: Verify tag matches package version
24
+ run: |
25
+ TAG_VERSION="${GITHUB_REF_NAME#v}"
26
+ PKG_VERSION="$(python -c 'import tomllib; print(tomllib.load(open("pyproject.toml", "rb"))["project"]["version"])')"
27
+ if [ "$TAG_VERSION" != "$PKG_VERSION" ]; then
28
+ echo "Tag $GITHUB_REF_NAME does not match pyproject.toml version $PKG_VERSION" >&2
29
+ exit 1
30
+ fi
31
+
32
+ - uses: actions/upload-artifact@v4
33
+ with:
34
+ name: dist
35
+ path: dist/
36
+
37
+ publish:
38
+ needs: build
39
+ runs-on: ubuntu-latest
40
+ environment: pypi
41
+ permissions:
42
+ id-token: write # required for PyPI trusted publishing (OIDC) — no API token secret needed
43
+ steps:
44
+ - uses: actions/download-artifact@v4
45
+ with:
46
+ name: dist
47
+ path: dist/
48
+
49
+ - name: Publish to PyPI
50
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,11 @@
1
+ .claude
2
+ .planning
3
+ .venv
4
+ .idea
5
+ .tests/__pycache__/
6
+ *.pyc
7
+ __pycache__/
8
+ .env
9
+ sessions/
10
+ plans/
11
+ reports/
@@ -0,0 +1,10 @@
1
+ # Changelog
2
+
3
+ Releases are tagged on GitHub (`vX.Y.Z`); this file summarizes what changed in each one. Tagging `vX.Y.Z` (matching the version in `pyproject.toml`) triggers `.github/workflows/release.yml`, which publishes to PyPI via trusted publishing. Until the first release ships, install via `pip install git+https://github.com/konradxmalinowski/llm-security-scanner`.
4
+
5
+ ## Unreleased
6
+
7
+ - Split the hosted landing page / premium service out into a private repo (`llm-security-scanner-saas`). This repo is now scan-engine-only.
8
+ - Added `LICENSE` (MIT), `CONTRIBUTING.md`, issue/PR templates.
9
+ - Fixed example CI/CD pipelines (`examples/github/`, `examples/gitlab/`): Docker variants no longer try to build the scanner image from the consumer's own checkout (they had no Dockerfile to build from); GitLab variants no longer ship an environment-specific `LLM_ENDPOINT`/`OLLAMA_HOST` default that silently defeated the "did you configure this?" check; GitLab pipelines now only trigger on merge requests/manual runs instead of every push, matching the GitHub examples.
10
+ - Added `.github/workflows/release.yml`: tag-triggered PyPI publish via trusted publishing (OIDC, no stored API token). Docker CI/CD examples now build a minimal `pip install llm-security-scanner` image instead of cloning this repo's full source — requires the first PyPI release to exist (see `docs/FUNCTIONALITY.md` for the one-time pending-publisher setup).
@@ -0,0 +1,183 @@
1
+ <!-- GSD:project-start source:PROJECT.md -->
2
+
3
+ ## Project
4
+
5
+ **LLM Security Scanner**
6
+
7
+ A Python CLI tool that tests LLM-based applications against the full OWASP Top 10 for LLMs vulnerability framework. It accepts either an HTTP endpoint or a local Ollama model as a target, runs a battery of ~50 automated attacks, and uses a second Ollama model as an AI Judge to evaluate each result — producing reports in terminal, Markdown, JSON, and HTML formats.
8
+
9
+ **Core Value:** Give security engineers a fully offline, one-command tool to test local LLM endpoints and local models against all 10 OWASP LLM attack categories — no cloud dependencies, no manual analysis.
10
+
11
+ ### Constraints
12
+
13
+ - **Tech Stack**: Python 3.11+, httpx, ollama SDK, rich, Pydantic v2, jinja2, pytest — no deviations from spec
14
+ - **Offline-first**: All judge inference via local Ollama — no calls to OpenAI/Anthropic/etc.
15
+ - **Payload format**: YAML files per OWASP category; each payload must have `id`, `name`, `payload`, `judge_criteria`
16
+ - **Judge contract**: Must return parseable JSON `{"success": bool, "reasoning": str}` — parser must handle malformed output gracefully
17
+ - **OWASP coverage**: All 10 categories (LLM01–LLM10) must have at least 4 payloads each to reach ≥40 total
18
+
19
+ <!-- GSD:project-end -->
20
+
21
+ <!-- GSD:stack-start source:research/STACK.md -->
22
+
23
+ ## Technology Stack
24
+
25
+ ## Recommended Stack
26
+
27
+ ### Core Runtime
28
+
29
+ | Technology | Version | Purpose | Why |
30
+ |------------|---------|---------|-----|
31
+ | Python | 3.11+ | Runtime | `asyncio.TaskGroup` (3.11+) for parallel attack dispatch; modern `match` for pattern-matching judge output; `tomllib` stdlib |
32
+ | uv | latest | Package/venv manager | Replaces pip+venv; lockfile-native; `uv run` executes CLI without install |
33
+
34
+ ### HTTP Client (Attack Delivery)
35
+
36
+ | Technology | Version | Purpose | Why |
37
+ |------------|---------|---------|-----|
38
+ | httpx | 0.28.1 | Async HTTP to URL targets | Sync+async parity (same API surface); native `AsyncClient`; fine-grained `Timeout(connect=5, read=120)` for slow LLM responses; `HTTPTransport(retries=N)` for flaky endpoints. Outperforms aiohttp on DX and error handling. |
39
+
40
+ ### Ollama SDK (Local Model Inference)
41
+
42
+ | Technology | Version | Purpose | Why |
43
+ |------------|---------|---------|-----|
44
+ | ollama | 0.6.2 | Chat with local models | Official SDK; `AsyncClient` mirrors httpx's async model; structured output via Pydantic schema; `format='json'` for judge responses |
45
+
46
+ - `chat()` accepts a `messages` list — supports system prompt + user turn in one call, which is exactly the judge pattern: `[{"role": "system", "content": judge_prompt}, {"role": "user", "content": f"Payload: {p}\nResponse: {r}"}]`
47
+ - `generate()` is for single-prompt completion without conversation history — appropriate only for raw completions, not structured judge evaluation
48
+ - `chat()` with `format=JudgeOutput.model_json_schema()` forces structured JSON output natively (Ollama >= 0.5.0 supports Pydantic schema in `format`)
49
+
50
+ ### Terminal UI
51
+
52
+ | Technology | Version | Purpose | Why |
53
+ |------------|---------|---------|-----|
54
+ | rich | 15.0.0 | Progress bars, colored tables, console output | Industry standard for Python CLI UX; `Progress` context manager with `track()` is idiomatic; `Table` renders colored severity columns without extra dependencies |
55
+
56
+ ### Data Models
57
+
58
+ | Technology | Version | Purpose | Why |
59
+ |------------|---------|---------|-----|
60
+ | pydantic | 2.13.4 | AttackResult, ScanReport, Severity models | v2 is 10–20x faster than v1; `model_dump_json()` outputs JSON directly; `ConfigDict(extra='forbid')` catches YAML schema drift; enum serialization in JSON mode outputs values not members |
61
+
62
+ ### Template Engine
63
+
64
+ | Technology | Version | Purpose | Why |
65
+ |------------|---------|---------|-----|
66
+ | jinja2 | 3.1.6 | HTML report generation | Standard for Python templating; `FileSystemLoader` loads from `templates/` dir; auto-escaping via `Environment(autoescape=True)` mandatory for security tools (prevents XSS in report if payloads contain HTML) |
67
+
68
+ ### YAML Payload Loader
69
+
70
+ | Technology | Version | Purpose | Why |
71
+ |------------|---------|---------|-----|
72
+ | PyYAML | 6.0.3 | Loading payload YAML files | Sufficient for read-only structured data; simpler API than ruamel.yaml; always use `yaml.safe_load()` |
73
+
74
+ ### CLI Framework
75
+
76
+ | Technology | Version | Purpose | Why |
77
+ |------------|---------|---------|-----|
78
+ | argparse | stdlib | CLI argument parsing | Per project spec; zero extra dependencies; sufficient for flat argument surface (~10 flags, no subcommands); security-tool users expect POSIX-standard `--flag value` syntax |
79
+
80
+ - **Click (38.7% market share in 2025):** Better DX for complex CLIs with subcommands, but adds a dependency. Unnecessary here — no subcommands, no command groups.
81
+ - **Typer:** Adds Click as transitive dep; designed for type-hint-native CLIs. Overkill for a single-command tool. Also 14ms startup vs argparse 18ms (negligible, but argparse wins on dep count).
82
+ - **argparse wins here because:** flat single-command surface; zero deps; pip-installable by security teams in air-gapped environments without extra packages.
83
+
84
+ ### Testing
85
+
86
+ | Technology | Version | Purpose | Why |
87
+ |------------|---------|---------|-----|
88
+ | pytest | 9.1.1 | Test runner | Standard |
89
+ | pytest-asyncio | 1.4.0 | Async test support | Required for testing `AsyncClient` judge and httpx async scanner; `asyncio_mode = "auto"` eliminates decorator noise |
90
+
91
+ # No decorator needed in auto mode
92
+
93
+ ### Linter / Formatter
94
+
95
+ | Technology | Version | Purpose | Why |
96
+ |------------|---------|---------|-----|
97
+ | ruff | 0.15.20 | Linting + formatting | Replaces flake8 + black + isort in one tool; 100x faster; S-series rules (flake8-bandit) flag unsafe YAML, subprocess calls, hardcoded secrets — exactly what a security tool should self-enforce |
98
+
99
+ - `S506`: `yaml.load()` without `Loader=yaml.SafeLoader` → force `yaml.safe_load()`
100
+ - `S701`: Jinja2 `Environment(autoescape=False)` → force `autoescape=True`
101
+ - `S105`/`S106`: hardcoded API keys/passwords in source
102
+ - `S113`: requests without timeout (not applicable with httpx, but good discipline)
103
+
104
+ ## Dependency Summary
105
+
106
+ ### Production (`[project.dependencies]`)
107
+
108
+ ### Dev (`[project.optional-dependencies]`)
109
+
110
+ ## Alternatives Explicitly Rejected
111
+
112
+ | Category | Recommended | Alternative | Why Rejected |
113
+ |----------|-------------|-------------|--------------|
114
+ | HTTP client | httpx | aiohttp | aiohttp's session lifecycle is more verbose; no sync parity; DX inferior |
115
+ | HTTP client | httpx | requests | Blocking — incompatible with async attack loop |
116
+ | CLI framework | argparse | Click | Adds dependency; subcommands not needed |
117
+ | CLI framework | argparse | Typer | Click transitive dep; startup overhead; overkill for flat CLI |
118
+ | Models | pydantic v2 | dataclasses | No JSON serialization; no validation; no schema generation for Ollama `format=` |
119
+ | YAML | PyYAML | ruamel.yaml | Needed only for round-trip write-back (preserving comments); payload files are read-only |
120
+ | YAML | PyYAML | StrictYAML | No standard pip install; unnecessary for internal authored payload files |
121
+ | Linting | ruff | flake8+black+isort | Three tools replaced by one; ruff is 100x faster |
122
+ | Judge inference | ollama.chat() | ollama.generate() | generate() lacks system/user turn structure for structured evaluation |
123
+ | Jinja2 autoescape | autoescape=True | autoescape=False | Attack payloads contain HTML/JS; False creates XSS in rendered reports; Ruff S701 flags it |
124
+
125
+ ## Sources
126
+
127
+ - httpx async client: https://www.python-httpx.org/advanced/timeouts — HIGH confidence (official docs via Context7)
128
+ - Ollama Python SDK: https://github.com/ollama/ollama-python/blob/main/README.md — HIGH confidence (official SDK via Context7)
129
+ - Ollama chat vs generate: https://context7.com/ollama/ollama-python/llms.txt — HIGH confidence (official docs)
130
+ - Rich tables and progress: https://rich.readthedocs.io/en/stable/tables.html — HIGH confidence (official docs via Context7)
131
+ - Pydantic v2 serialization: https://github.com/pydantic/pydantic/blob/main/docs/concepts/serialization.md — HIGH confidence (official docs via Context7)
132
+ - pytest-asyncio auto mode: https://pytest-asyncio.readthedocs.io/en/stable/concepts.html — HIGH confidence (official docs via Context7)
133
+ - Ruff S-series (bandit) rules: https://docs.astral.sh/ruff/rules — HIGH confidence (official docs via Context7)
134
+ - PyYAML vs ruamel.yaml: https://yaml.dev/doc/ruamel.yaml/pyyaml/ — MEDIUM confidence (official ruamel docs, cross-checked with PyYAML docs)
135
+ - Jinja2 FileSystemLoader: https://jinja.palletsprojects.com/en/stable/api — HIGH confidence (official docs via Context7)
136
+ - argparse vs Click vs Typer: https://dasroot.net/posts/2025/12/building-cli-tools-python-click-typer-argparse/ — MEDIUM confidence (third-party article, consistent with project spec decision)
137
+ - Package versions: PyPI index (pip index versions), confirmed 2026-06-25
138
+
139
+ <!-- GSD:stack-end -->
140
+
141
+ <!-- GSD:conventions-start source:CONVENTIONS.md -->
142
+
143
+ ## Conventions
144
+
145
+ Conventions not yet established. Will populate as patterns emerge during development.
146
+ <!-- GSD:conventions-end -->
147
+
148
+ <!-- GSD:architecture-start source:ARCHITECTURE.md -->
149
+
150
+ ## Architecture
151
+
152
+ Architecture not yet mapped. Follow existing patterns found in the codebase.
153
+ <!-- GSD:architecture-end -->
154
+
155
+ <!-- GSD:skills-start source:skills/ -->
156
+
157
+ ## Project Skills
158
+
159
+ No project skills found. Add skills to any of: `.claude/skills/`, `.agents/skills/`, `.cursor/skills/`, `.github/skills/`, or `.codex/skills/` with a `SKILL.md` index file.
160
+ <!-- GSD:skills-end -->
161
+
162
+ <!-- GSD:workflow-start source:GSD defaults -->
163
+
164
+ ## GSD Workflow Enforcement
165
+
166
+ Before using Edit, Write, or other file-changing tools, start work through a GSD command so planning artifacts and execution context stay in sync.
167
+
168
+ Use these entry points:
169
+
170
+ - `/gsd-quick` for small fixes, doc updates, and ad-hoc tasks
171
+ - `/gsd-debug` for investigation and bug fixing
172
+ - `/gsd-execute-phase` for planned phase work
173
+
174
+ Do not make direct repo edits outside a GSD workflow unless the user explicitly asks to bypass it.
175
+ <!-- GSD:workflow-end -->
176
+
177
+ <!-- GSD:profile-start -->
178
+
179
+ ## Developer Profile
180
+
181
+ > Profile not yet configured. Run `/gsd-profile-user` to generate your developer profile.
182
+ > This section is managed by `generate-claude-profile` -- do not edit manually.
183
+ <!-- GSD:profile-end -->
@@ -0,0 +1,29 @@
1
+ # Contributing
2
+
3
+ Thanks for considering a contribution to LLM Security Scanner.
4
+
5
+ ## Setup
6
+
7
+ ```bash
8
+ uv pip install -e ".[dev]"
9
+ ```
10
+
11
+ ## Before opening a PR
12
+
13
+ ```bash
14
+ uv run pytest
15
+ uv run ruff check .
16
+ uv run ruff format .
17
+ ```
18
+
19
+ - Keep payload YAML files (`payloads/`) matching the existing schema: `id`, `name`, `payload`, `judge_criteria`, `severity`.
20
+ - New payloads should map to a specific OWASP LLM Top 10 (2025) category — see `docs/FUNCTIONALITY.md` for the current category list.
21
+ - `ruff` runs with `flake8-bandit` (`S` rules) enabled — don't disable these without a clear reason in the PR description (e.g. `yaml.safe_load` and `Environment(autoescape=True)` are enforced project-wide for security reasons).
22
+
23
+ ## Reporting bugs / requesting features
24
+
25
+ Open a GitHub issue using the provided templates. Include reproduction steps (target type, judge model, relevant flags) for bugs.
26
+
27
+ ## Scope
28
+
29
+ This repo is the open-source scan engine and CLI only. Hosted/premium features are out of scope here — see `docs/FUNCTIONALITY.md` for what belongs where.
@@ -0,0 +1,18 @@
1
+ FROM python:3.11-slim
2
+
3
+ ENV PYTHONDONTWRITEBYTECODE=1 \
4
+ PYTHONUNBUFFERED=1
5
+
6
+ WORKDIR /app
7
+
8
+ RUN apt-get update \
9
+ && apt-get install -y --no-install-recommends curl ca-certificates \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ COPY pyproject.toml README.md ./
13
+ COPY src ./src
14
+ COPY payloads ./payloads
15
+
16
+ RUN pip install --no-cache-dir .
17
+
18
+ ENTRYPOINT ["llm-scanner"]
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Konrad Malinowski
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.