consent-engine 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- consent_engine-0.1.0/.claude/skills/consent-audit/SKILL.md +65 -0
- consent_engine-0.1.0/.github/workflows/ci.yml +38 -0
- consent_engine-0.1.0/.github/workflows/release.yml +87 -0
- consent_engine-0.1.0/.gitignore +30 -0
- consent_engine-0.1.0/AGENTS.md +79 -0
- consent_engine-0.1.0/CHANGELOG.md +38 -0
- consent_engine-0.1.0/CLAUDE.md +79 -0
- consent_engine-0.1.0/Dockerfile +49 -0
- consent_engine-0.1.0/LICENSE +21 -0
- consent_engine-0.1.0/PKG-INFO +226 -0
- consent_engine-0.1.0/README.md +182 -0
- consent_engine-0.1.0/RELEASING.md +87 -0
- consent_engine-0.1.0/data/vendor_library/open-cookie-database.csv +2265 -0
- consent_engine-0.1.0/data/vendor_library/vendors.json +326 -0
- consent_engine-0.1.0/data/wiki/CLAUDE.md +70 -0
- consent_engine-0.1.0/data/wiki/concepts/cipa-vppa.md +75 -0
- consent_engine-0.1.0/data/wiki/concepts/cmp-failures.md +70 -0
- consent_engine-0.1.0/data/wiki/concepts/consent-mode-v2.md +126 -0
- consent_engine-0.1.0/data/wiki/concepts/dark-patterns.md +56 -0
- consent_engine-0.1.0/data/wiki/concepts/gpc-signal.md +60 -0
- consent_engine-0.1.0/data/wiki/concepts/ssgtm-risk.md +59 -0
- consent_engine-0.1.0/data/wiki/enforcement/emerging-trends.md +90 -0
- consent_engine-0.1.0/data/wiki/enforcement/gdpr-fines.md +79 -0
- consent_engine-0.1.0/data/wiki/enforcement/lawsuit-surge.md +180 -0
- consent_engine-0.1.0/data/wiki/enforcement/live-fines-db.md +190 -0
- consent_engine-0.1.0/data/wiki/enforcement/us-class-actions.md +116 -0
- consent_engine-0.1.0/data/wiki/enforcement/us-enforcement.md +88 -0
- consent_engine-0.1.0/data/wiki/index.md +74 -0
- consent_engine-0.1.0/data/wiki/log.md +58 -0
- consent_engine-0.1.0/data/wiki/regulations/ccpa.md +59 -0
- consent_engine-0.1.0/data/wiki/regulations/gdpr.md +51 -0
- consent_engine-0.1.0/data/wiki/regulations/quebec-law25.md +42 -0
- consent_engine-0.1.0/data/wiki/regulations/tcf.md +44 -0
- consent_engine-0.1.0/data/wiki/regulations/us-state-laws.md +64 -0
- consent_engine-0.1.0/data/wiki/technical/cmp-profiles.md +460 -0
- consent_engine-0.1.0/data/wiki/technical/consent-mode-impact.md +72 -0
- consent_engine-0.1.0/data/wiki/technical/google-tag-gateway.md +70 -0
- consent_engine-0.1.0/data/wiki/technical/scanner-methodology.md +191 -0
- consent_engine-0.1.0/docs/scenarios.md +125 -0
- consent_engine-0.1.0/evals/README.md +80 -0
- consent_engine-0.1.0/evals/cases/001-onetrust-marketing-site.yaml +21 -0
- consent_engine-0.1.0/evals/cases/002-anthropic-marketing-site.yaml +15 -0
- consent_engine-0.1.0/evals/cases/003-no-cmp-baseline.yaml +14 -0
- consent_engine-0.1.0/evals/cases/004-cresta-marketing-site.yaml +13 -0
- consent_engine-0.1.0/evals/run_evals.py +107 -0
- consent_engine-0.1.0/pyproject.toml +102 -0
- consent_engine-0.1.0/src/consent_engine/__init__.py +11 -0
- consent_engine-0.1.0/src/consent_engine/api.py +83 -0
- consent_engine-0.1.0/src/consent_engine/cli.py +133 -0
- consent_engine-0.1.0/src/consent_engine/config.py +37 -0
- consent_engine-0.1.0/src/consent_engine/llm/__init__.py +0 -0
- consent_engine-0.1.0/src/consent_engine/llm/client.py +50 -0
- consent_engine-0.1.0/src/consent_engine/mcp_server.py +185 -0
- consent_engine-0.1.0/src/consent_engine/models/__init__.py +0 -0
- consent_engine-0.1.0/src/consent_engine/models/audit_request.py +23 -0
- consent_engine-0.1.0/src/consent_engine/models/audit_result.py +152 -0
- consent_engine-0.1.0/src/consent_engine/models/scan_result.py +57 -0
- consent_engine-0.1.0/src/consent_engine/models/vendor.py +28 -0
- consent_engine-0.1.0/src/consent_engine/tools/__init__.py +0 -0
- consent_engine-0.1.0/src/consent_engine/tools/cmp_clicker.py +901 -0
- consent_engine-0.1.0/src/consent_engine/tools/cmp_detector.py +413 -0
- consent_engine-0.1.0/src/consent_engine/tools/cmp_injector.py +420 -0
- consent_engine-0.1.0/src/consent_engine/tools/jurisdiction_detector.py +306 -0
- consent_engine-0.1.0/src/consent_engine/tools/tool_01_gtm_parser.py +160 -0
- consent_engine-0.1.0/src/consent_engine/tools/tool_02_violation_classifier.py +146 -0
- consent_engine-0.1.0/src/consent_engine/tools/tool_03_browser_scanner.py +1945 -0
- consent_engine-0.1.0/src/consent_engine/tools/tool_04_har_analyzer.py +99 -0
- consent_engine-0.1.0/src/consent_engine/tools/tool_05_vendor_library.py +262 -0
- consent_engine-0.1.0/src/consent_engine/tools/tool_06_ssgtm_detector.py +216 -0
- consent_engine-0.1.0/src/consent_engine/tools/tool_06b_pixel_detector.py +192 -0
- consent_engine-0.1.0/src/consent_engine/tools/tool_07_rag_retriever.py +291 -0
- consent_engine-0.1.0/src/consent_engine/tools/tool_08_report_generator.py +1767 -0
- consent_engine-0.1.0/templates/audit_deck.marp.md.j2 +175 -0
- consent_engine-0.1.0/templates/audit_report.html.j2 +1070 -0
- consent_engine-0.1.0/tests/__init__.py +0 -0
- consent_engine-0.1.0/tests/test_smoke.py +5 -0
- consent_engine-0.1.0/tests/tools/__init__.py +0 -0
- consent_engine-0.1.0/tests/tools/conftest.py +340 -0
- consent_engine-0.1.0/tests/tools/test_tool_03_browser_scanner.py +667 -0
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: consent-audit
|
|
3
|
+
description: Run a forensic consent-compliance audit on a URL. Use whenever the user asks to audit, check, or scan a website for consent / cookie / CMP / CCPA / GDPR / privacy compliance. Captures every network request, classifies violations against the lawsuit-surge wiki, produces an HTML report + Marp deck + JSON audit result + evidence log.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# consent-audit
|
|
7
|
+
|
|
8
|
+
A Claude Code skill that drives the `consent-engine` Python package to run a
|
|
9
|
+
forensic consent-compliance audit and explain the results.
|
|
10
|
+
|
|
11
|
+
## When to invoke
|
|
12
|
+
|
|
13
|
+
Any of these:
|
|
14
|
+
- "Audit https://example.com for consent compliance"
|
|
15
|
+
- "Run a consent check on <url>"
|
|
16
|
+
- "Why is <vendor> firing on <url> after reject?"
|
|
17
|
+
- "Is this site CCPA / GDPR compliant?"
|
|
18
|
+
- "Generate a forensic report for this site"
|
|
19
|
+
|
|
20
|
+
## How to run it
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
uvx consent-engine audit <url> --output-dir ./consent-audits
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Output bundle: `./consent-audits/<audit_id>/`
|
|
27
|
+
- `report.html` — full forensic report with network evidence, vendor
|
|
28
|
+
lookups, legal exposure estimates, and remediation roadmap
|
|
29
|
+
- `audit_result.json` — structured Pydantic model
|
|
30
|
+
- `evidence.jsonl` — every captured network request (timestamped)
|
|
31
|
+
- `deck.marp.md` — client-ready slide deck
|
|
32
|
+
|
|
33
|
+
## Interpreting findings
|
|
34
|
+
|
|
35
|
+
| Severity | What to do |
|
|
36
|
+
|---|---|
|
|
37
|
+
| **Definitive violation (S3)** | Tag fired after explicit reject. Quote the audit_id, evidence line, and legal exposure ($7,500 CCPA, $5,000 CIPA per event). Remediate this week. |
|
|
38
|
+
| **Server-side bypass** | sSGTM detected in front of analytics. Client-side enforcement cannot block it. Architectural risk. Brief engineering. |
|
|
39
|
+
| **Inconclusive (S2)** | Tag fired in ambiguous consent state. Re-run with S3 methodology before reporting. |
|
|
40
|
+
| **Warning** | Configuration drift, deprecated tags, or compliance edge cases. Track. |
|
|
41
|
+
|
|
42
|
+
## Follow-up questions to support
|
|
43
|
+
|
|
44
|
+
- "Why did <vendor> fire?" → invoke `consent-engine chat <audit_id>` or call
|
|
45
|
+
the `consent-engine-mcp` `query_evidence` tool with `host_contains` set.
|
|
46
|
+
- "What's the financial exposure?" → read the `legal_exposure` block in
|
|
47
|
+
`audit_result.json`.
|
|
48
|
+
- "How do I remediate?" → read the `remediation` section in `report.html`
|
|
49
|
+
and cross-reference `data/wiki/technical/` pages.
|
|
50
|
+
|
|
51
|
+
## Thorough audit sequence
|
|
52
|
+
|
|
53
|
+
For high-stakes audits (regulated industry, healthcare, post-demand-letter):
|
|
54
|
+
|
|
55
|
+
1. Audit with consent in **reject** (default S3 methodology).
|
|
56
|
+
2. Audit with consent in **accept** — confirms the CMP toggles tags as
|
|
57
|
+
expected.
|
|
58
|
+
3. Audit with GPC set (`Sec-GPC: 1`) — confirms Global Privacy Control
|
|
59
|
+
honored.
|
|
60
|
+
4. Re-audit weekly for 90 days post-remediation. Drift is real.
|
|
61
|
+
|
|
62
|
+
## Source
|
|
63
|
+
|
|
64
|
+
[github.com/kb223/consent-engine](https://github.com/kb223/consent-engine).
|
|
65
|
+
MIT license. Built by Kenneth Buchanan.
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request:
|
|
5
|
+
push:
|
|
6
|
+
branches: [main]
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
test:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
steps:
|
|
12
|
+
- uses: actions/checkout@v4
|
|
13
|
+
|
|
14
|
+
- name: Install uv
|
|
15
|
+
uses: astral-sh/setup-uv@v4
|
|
16
|
+
with:
|
|
17
|
+
version: "latest"
|
|
18
|
+
|
|
19
|
+
- name: Set up Python
|
|
20
|
+
run: uv python install 3.12
|
|
21
|
+
|
|
22
|
+
- name: Install dependencies
|
|
23
|
+
run: uv sync --group dev
|
|
24
|
+
|
|
25
|
+
- name: Lint with ruff
|
|
26
|
+
run: uv run ruff check src/
|
|
27
|
+
|
|
28
|
+
- name: Type check with mypy
|
|
29
|
+
run: uv run mypy src/ || true # warnings, not failures, on first public release
|
|
30
|
+
|
|
31
|
+
- name: Install Playwright browsers
|
|
32
|
+
run: uv run playwright install chromium --with-deps
|
|
33
|
+
|
|
34
|
+
- name: Run tests
|
|
35
|
+
run: uv run pytest tests/ -v --tb=short
|
|
36
|
+
env:
|
|
37
|
+
ANTHROPIC_API_KEY: "test-key"
|
|
38
|
+
OPENAI_API_KEY: "test-key"
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
# Publishes consent-engine to PyPI on every pushed tag matching v*.
|
|
4
|
+
# Uses PyPI Trusted Publishing — no API token needed; auth is via GitHub
|
|
5
|
+
# OIDC. One-time setup on PyPI: https://docs.pypi.org/trusted-publishers/
|
|
6
|
+
#
|
|
7
|
+
# To release:
|
|
8
|
+
# 1. Bump version in src/consent_engine/__init__.py + pyproject.toml
|
|
9
|
+
# 2. Update CHANGELOG.md
|
|
10
|
+
# 3. git tag v0.1.0 && git push --tags
|
|
11
|
+
# 4. This workflow builds, attests, and publishes the wheel + sdist
|
|
12
|
+
# 5. A GitHub Release is created from the tag automatically
|
|
13
|
+
|
|
14
|
+
on:
|
|
15
|
+
push:
|
|
16
|
+
tags:
|
|
17
|
+
- "v*"
|
|
18
|
+
|
|
19
|
+
permissions:
|
|
20
|
+
contents: write # to create the GitHub Release
|
|
21
|
+
id-token: write # PyPI Trusted Publishing OIDC
|
|
22
|
+
|
|
23
|
+
jobs:
|
|
24
|
+
build:
|
|
25
|
+
name: Build distributions
|
|
26
|
+
runs-on: ubuntu-latest
|
|
27
|
+
steps:
|
|
28
|
+
- uses: actions/checkout@v4
|
|
29
|
+
|
|
30
|
+
- name: Install uv
|
|
31
|
+
uses: astral-sh/setup-uv@v4
|
|
32
|
+
with:
|
|
33
|
+
version: "latest"
|
|
34
|
+
|
|
35
|
+
- name: Set up Python
|
|
36
|
+
run: uv python install 3.12
|
|
37
|
+
|
|
38
|
+
- name: Build wheel + sdist
|
|
39
|
+
run: uv build
|
|
40
|
+
|
|
41
|
+
- name: Show artifacts
|
|
42
|
+
run: ls -la dist/
|
|
43
|
+
|
|
44
|
+
- uses: actions/upload-artifact@v4
|
|
45
|
+
with:
|
|
46
|
+
name: dist
|
|
47
|
+
path: dist/
|
|
48
|
+
|
|
49
|
+
publish-pypi:
|
|
50
|
+
name: Publish to PyPI
|
|
51
|
+
needs: build
|
|
52
|
+
runs-on: ubuntu-latest
|
|
53
|
+
environment:
|
|
54
|
+
name: pypi
|
|
55
|
+
url: https://pypi.org/p/consent-engine
|
|
56
|
+
steps:
|
|
57
|
+
- uses: actions/download-artifact@v4
|
|
58
|
+
with:
|
|
59
|
+
name: dist
|
|
60
|
+
path: dist/
|
|
61
|
+
|
|
62
|
+
- name: Publish via Trusted Publisher
|
|
63
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
64
|
+
# No `password:` — OIDC handles auth.
|
|
65
|
+
|
|
66
|
+
github-release:
|
|
67
|
+
name: Create GitHub Release
|
|
68
|
+
needs: publish-pypi
|
|
69
|
+
runs-on: ubuntu-latest
|
|
70
|
+
steps:
|
|
71
|
+
- uses: actions/checkout@v4
|
|
72
|
+
|
|
73
|
+
- uses: actions/download-artifact@v4
|
|
74
|
+
with:
|
|
75
|
+
name: dist
|
|
76
|
+
path: dist/
|
|
77
|
+
|
|
78
|
+
- name: Create release + attach artifacts
|
|
79
|
+
env:
|
|
80
|
+
GH_TOKEN: ${{ github.token }}
|
|
81
|
+
run: |
|
|
82
|
+
gh release create "${GITHUB_REF_NAME}" \
|
|
83
|
+
--title "consent-engine ${GITHUB_REF_NAME}" \
|
|
84
|
+
--notes-from-tag \
|
|
85
|
+
--verify-tag \
|
|
86
|
+
dist/*.whl \
|
|
87
|
+
dist/*.tar.gz
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
.venv/
|
|
6
|
+
.pytest_cache/
|
|
7
|
+
.mypy_cache/
|
|
8
|
+
.ruff_cache/
|
|
9
|
+
|
|
10
|
+
# Audit outputs (transient)
|
|
11
|
+
out/
|
|
12
|
+
.tmp/
|
|
13
|
+
|
|
14
|
+
# Local env
|
|
15
|
+
.env
|
|
16
|
+
.env.local
|
|
17
|
+
|
|
18
|
+
# IDE
|
|
19
|
+
.vscode/
|
|
20
|
+
.idea/
|
|
21
|
+
*.swp
|
|
22
|
+
|
|
23
|
+
# OS
|
|
24
|
+
.DS_Store
|
|
25
|
+
Thumbs.db
|
|
26
|
+
|
|
27
|
+
# Build artifacts
|
|
28
|
+
build/
|
|
29
|
+
dist/
|
|
30
|
+
*.egg
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# consent-engine — agent instructions
|
|
2
|
+
|
|
3
|
+
> Public OSS repo. The full forensic engine. See `README.md` for the user-
|
|
4
|
+
> facing pitch and `docs/scenarios.md` for the system flow.
|
|
5
|
+
|
|
6
|
+
## Project purpose
|
|
7
|
+
|
|
8
|
+
Forensic audit tool that compares cookie + tag enforcement against user
|
|
9
|
+
consent preferences. Built for enterprises facing privacy-litigation
|
|
10
|
+
demand letters.
|
|
11
|
+
|
|
12
|
+
## Architecture
|
|
13
|
+
|
|
14
|
+
- **Deterministic** by design. Decisions made at build time, not runtime.
|
|
15
|
+
- 8 independently testable tools in `src/consent_engine/tools/`.
|
|
16
|
+
- LLM scoped to executive-summary generation only, behind a LiteLLM wrapper.
|
|
17
|
+
- Knowledge base is markdown (`data/wiki/`). No vector DB, no embeddings.
|
|
18
|
+
- Vendor library is JSON (`data/vendor_library/vendors.json`) + the Open
|
|
19
|
+
Cookie Database CSV.
|
|
20
|
+
|
|
21
|
+
## Domain context
|
|
22
|
+
|
|
23
|
+
- OneTrust categories: C0001 (essential), C0002 (analytics), C0003
|
|
24
|
+
(functional), C0004 (targeting).
|
|
25
|
+
- OneTrust data layer variable: `OnetrustActiveGroups`.
|
|
26
|
+
- S2 = post-opt-out without page reload = INCONCLUSIVE. Never definitive.
|
|
27
|
+
- S3 = fresh browser context with consent pre-set = DEFINITIVE.
|
|
28
|
+
- GCS=G100 in a network request = Advanced Consent Mode active (Basic
|
|
29
|
+
blocks entirely).
|
|
30
|
+
- Server-side GTM cannot be blocked by client-side enforcement snippets.
|
|
31
|
+
- GPC (Sec-GPC: 1) signal cannot be forwarded to server-side containers.
|
|
32
|
+
|
|
33
|
+
## Commands
|
|
34
|
+
|
|
35
|
+
- Install: `uv sync`
|
|
36
|
+
- CLI: `uv run consent-engine audit <url>`
|
|
37
|
+
- API: `uv run uvicorn consent_engine.api:app --reload`
|
|
38
|
+
- MCP server: `uv run consent-engine-mcp`
|
|
39
|
+
- Test: `uv run pytest tests/ -v`
|
|
40
|
+
- Lint: `uv run ruff check src/`
|
|
41
|
+
- Type check: `uv run mypy src/`
|
|
42
|
+
- Evals: `uv run python evals/run_evals.py`
|
|
43
|
+
|
|
44
|
+
## When the user wants to audit a URL
|
|
45
|
+
|
|
46
|
+
Use the CLI directly (`uv run consent-engine audit <url>`) or call the
|
|
47
|
+
underlying tools. Output bundle lands in `./out/<audit_id>/` with
|
|
48
|
+
`report.html`, `audit_result.json`, `evidence.jsonl`, `deck.marp.md`.
|
|
49
|
+
|
|
50
|
+
## When the user wants to query a prior audit
|
|
51
|
+
|
|
52
|
+
Use `consent-engine chat <audit_id>` or, if working through MCP, the
|
|
53
|
+
`query_evidence` tool against the audit_id. The evidence.jsonl has every
|
|
54
|
+
captured network request — grounding for follow-ups.
|
|
55
|
+
|
|
56
|
+
## When adding knowledge
|
|
57
|
+
|
|
58
|
+
Knowledge lives in `data/wiki/` as markdown. Add a new page, update
|
|
59
|
+
`data/wiki/index.md`, ensure `tool_07_rag_retriever.py` knows which
|
|
60
|
+
findings should retrieve it.
|
|
61
|
+
|
|
62
|
+
## When adding a vendor
|
|
63
|
+
|
|
64
|
+
Two paths:
|
|
65
|
+
- Legally-annotated, lawsuit-relevant: edit
|
|
66
|
+
`data/vendor_library/vendors.json` (priority).
|
|
67
|
+
- Standard, well-known: edit the Open Cookie DB CSV in the same folder.
|
|
68
|
+
|
|
69
|
+
## When adding an eval case
|
|
70
|
+
|
|
71
|
+
`evals/cases/NNN-<slug>.yaml`. Run
|
|
72
|
+
`uv run python evals/run_evals.py --add-baseline evals/cases/NNN-<slug>.yaml`
|
|
73
|
+
once to populate the expected block from current behavior.
|
|
74
|
+
|
|
75
|
+
## Voice
|
|
76
|
+
|
|
77
|
+
Public-facing docs and report copy: no emojis, no em dashes, no superlatives,
|
|
78
|
+
no marketing-speak. Plain technical English. Audience is engineers, privacy
|
|
79
|
+
officers, and legal counsel.
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to consent-engine. Format loosely follows
|
|
4
|
+
[Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
|
5
|
+
|
|
6
|
+
## [0.1.0] — 2026-05-16 — initial public release
|
|
7
|
+
|
|
8
|
+
### Added
|
|
9
|
+
- Eight-tool deterministic audit pipeline:
|
|
10
|
+
- tool_01 GTM container parser (live interception or JSON upload)
|
|
11
|
+
- tool_02 violation classifier (S2 inconclusive vs S3 definitive)
|
|
12
|
+
- tool_03 Playwright browser scanner with consent state pre-set
|
|
13
|
+
- tool_04 HAR analyzer
|
|
14
|
+
- tool_05 vendor library lookup (custom + Open Cookie Database)
|
|
15
|
+
- tool_06 server-side GTM detector
|
|
16
|
+
- tool_06b out-of-GTM pixel detector
|
|
17
|
+
- tool_07 markdown wiki retriever (no vector DB)
|
|
18
|
+
- tool_08 report + Marp deck generator (LLM exec summary only)
|
|
19
|
+
- CLI: `consent-engine audit <url>` + `consent-engine chat <audit_id>`
|
|
20
|
+
- MCP server: `consent-engine-mcp` (Claude Desktop / Code integration)
|
|
21
|
+
- Claude Code skill at `.claude/skills/consent-audit/SKILL.md`
|
|
22
|
+
- Evals harness skeleton at `evals/` with golden-case YAML format
|
|
23
|
+
- Glass-box reporting: every captured network request persisted to
|
|
24
|
+
`evidence.jsonl` per audit, queryable from CLI + MCP
|
|
25
|
+
- Lawsuit-surge knowledge base page at
|
|
26
|
+
`data/wiki/enforcement/lawsuit-surge.md`
|
|
27
|
+
- End-to-end scenarios doc at `docs/scenarios.md` with Mermaid diagram
|
|
28
|
+
|
|
29
|
+
### Design decisions
|
|
30
|
+
- Deterministic by default. LLM scoped to executive-summary generation
|
|
31
|
+
only. Credit to Fred Pike (MeasureSummit May 2026) for the explicit
|
|
32
|
+
framing.
|
|
33
|
+
- Markdown wiki replaces vector DB. Karpathy LLM-wiki pattern. Zero
|
|
34
|
+
embeddings, zero Pinecone, zero fine-tuning. The whole knowledge layer
|
|
35
|
+
is version-controlled markdown.
|
|
36
|
+
- Multi-tier vendor library: custom legally-annotated entries take
|
|
37
|
+
precedence, then the Open Cookie Database (~3,200 entries), then
|
|
38
|
+
flagged for manual review.
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# consent-engine — agent instructions
|
|
2
|
+
|
|
3
|
+
> Public OSS repo. The full forensic engine. See `README.md` for the user-
|
|
4
|
+
> facing pitch and `docs/scenarios.md` for the system flow.
|
|
5
|
+
|
|
6
|
+
## Project purpose
|
|
7
|
+
|
|
8
|
+
Forensic audit tool that compares cookie + tag enforcement against user
|
|
9
|
+
consent preferences. Built for enterprises facing privacy-litigation
|
|
10
|
+
demand letters.
|
|
11
|
+
|
|
12
|
+
## Architecture
|
|
13
|
+
|
|
14
|
+
- **Deterministic** by design. Decisions made at build time, not runtime.
|
|
15
|
+
- 8 independently testable tools in `src/consent_engine/tools/`.
|
|
16
|
+
- LLM scoped to executive-summary generation only, behind a LiteLLM wrapper.
|
|
17
|
+
- Knowledge base is markdown (`data/wiki/`). No vector DB, no embeddings.
|
|
18
|
+
- Vendor library is JSON (`data/vendor_library/vendors.json`) + the Open
|
|
19
|
+
Cookie Database CSV.
|
|
20
|
+
|
|
21
|
+
## Domain context
|
|
22
|
+
|
|
23
|
+
- OneTrust categories: C0001 (essential), C0002 (analytics), C0003
|
|
24
|
+
(functional), C0004 (targeting).
|
|
25
|
+
- OneTrust data layer variable: `OnetrustActiveGroups`.
|
|
26
|
+
- S2 = post-opt-out without page reload = INCONCLUSIVE. Never definitive.
|
|
27
|
+
- S3 = fresh browser context with consent pre-set = DEFINITIVE.
|
|
28
|
+
- GCS=G100 in a network request = Advanced Consent Mode active (Basic
|
|
29
|
+
blocks entirely).
|
|
30
|
+
- Server-side GTM cannot be blocked by client-side enforcement snippets.
|
|
31
|
+
- GPC (Sec-GPC: 1) signal cannot be forwarded to server-side containers.
|
|
32
|
+
|
|
33
|
+
## Commands
|
|
34
|
+
|
|
35
|
+
- Install: `uv sync`
|
|
36
|
+
- CLI: `uv run consent-engine audit <url>`
|
|
37
|
+
- API: `uv run uvicorn consent_engine.api:app --reload`
|
|
38
|
+
- MCP server: `uv run consent-engine-mcp`
|
|
39
|
+
- Test: `uv run pytest tests/ -v`
|
|
40
|
+
- Lint: `uv run ruff check src/`
|
|
41
|
+
- Type check: `uv run mypy src/`
|
|
42
|
+
- Evals: `uv run python evals/run_evals.py`
|
|
43
|
+
|
|
44
|
+
## When the user wants to audit a URL
|
|
45
|
+
|
|
46
|
+
Use the CLI directly (`uv run consent-engine audit <url>`) or call the
|
|
47
|
+
underlying tools. Output bundle lands in `./out/<audit_id>/` with
|
|
48
|
+
`report.html`, `audit_result.json`, `evidence.jsonl`, `deck.marp.md`.
|
|
49
|
+
|
|
50
|
+
## When the user wants to query a prior audit
|
|
51
|
+
|
|
52
|
+
Use `consent-engine chat <audit_id>` or, if working through MCP, the
|
|
53
|
+
`query_evidence` tool against the audit_id. The evidence.jsonl has every
|
|
54
|
+
captured network request — grounding for follow-ups.
|
|
55
|
+
|
|
56
|
+
## When adding knowledge
|
|
57
|
+
|
|
58
|
+
Knowledge lives in `data/wiki/` as markdown. Add a new page, update
|
|
59
|
+
`data/wiki/index.md`, ensure `tool_07_rag_retriever.py` knows which
|
|
60
|
+
findings should retrieve it.
|
|
61
|
+
|
|
62
|
+
## When adding a vendor
|
|
63
|
+
|
|
64
|
+
Two paths:
|
|
65
|
+
- Legally-annotated, lawsuit-relevant: edit
|
|
66
|
+
`data/vendor_library/vendors.json` (priority).
|
|
67
|
+
- Standard, well-known: edit the Open Cookie DB CSV in the same folder.
|
|
68
|
+
|
|
69
|
+
## When adding an eval case
|
|
70
|
+
|
|
71
|
+
`evals/cases/NNN-<slug>.yaml`. Run
|
|
72
|
+
`uv run python evals/run_evals.py --add-baseline evals/cases/NNN-<slug>.yaml`
|
|
73
|
+
once to populate the expected block from current behavior.
|
|
74
|
+
|
|
75
|
+
## Voice
|
|
76
|
+
|
|
77
|
+
Public-facing docs and report copy: no emojis, no em dashes, no superlatives,
|
|
78
|
+
no marketing-speak. Plain technical English. Audience is engineers, privacy
|
|
79
|
+
officers, and legal counsel.
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
FROM python:3.12-slim
|
|
2
|
+
|
|
3
|
+
# System deps required by Playwright Chromium
|
|
4
|
+
RUN apt-get update && apt-get install -y \
|
|
5
|
+
libglib2.0-0 \
|
|
6
|
+
libnss3 \
|
|
7
|
+
libnspr4 \
|
|
8
|
+
libdbus-1-3 \
|
|
9
|
+
libatk1.0-0 \
|
|
10
|
+
libatk-bridge2.0-0 \
|
|
11
|
+
libcups2 \
|
|
12
|
+
libdrm2 \
|
|
13
|
+
libxkbcommon0 \
|
|
14
|
+
libxcomposite1 \
|
|
15
|
+
libxdamage1 \
|
|
16
|
+
libxfixes3 \
|
|
17
|
+
libxrandr2 \
|
|
18
|
+
libgbm1 \
|
|
19
|
+
libasound2 \
|
|
20
|
+
libpango-1.0-0 \
|
|
21
|
+
libpangocairo-1.0-0 \
|
|
22
|
+
libcairo2 \
|
|
23
|
+
libfontconfig1 \
|
|
24
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
25
|
+
|
|
26
|
+
# Install uv
|
|
27
|
+
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
|
|
28
|
+
|
|
29
|
+
WORKDIR /app
|
|
30
|
+
|
|
31
|
+
# Install Python dependencies (cached layer)
|
|
32
|
+
COPY pyproject.toml ./
|
|
33
|
+
RUN uv sync --no-dev --no-install-project
|
|
34
|
+
|
|
35
|
+
# Install Playwright Chromium browser only
|
|
36
|
+
RUN uv run playwright install chromium
|
|
37
|
+
|
|
38
|
+
# Copy application source
|
|
39
|
+
COPY src/ src/
|
|
40
|
+
COPY data/ data/
|
|
41
|
+
COPY templates/ templates/
|
|
42
|
+
|
|
43
|
+
ENV PYTHONPATH=/app/src
|
|
44
|
+
|
|
45
|
+
EXPOSE 8080
|
|
46
|
+
|
|
47
|
+
# Default: HTTP API. Use `docker run consent-engine consent-engine audit <url>`
|
|
48
|
+
# for CLI mode.
|
|
49
|
+
CMD ["uv", "run", "uvicorn", "consent_engine.api:app", "--host", "0.0.0.0", "--port", "8080"]
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Kenneth Buchanan
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|