consent-engine 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ """consent-engine — forensic consent compliance audit engine.
2
+
3
+ Public package surface:
4
+ - consent_engine.cli CLI entrypoint (`consent-engine audit ...`)
5
+ - consent_engine.mcp_server MCP server entrypoint (`consent-engine-mcp`)
6
+ - consent_engine.tools.* Eight deterministic audit tools
7
+ - consent_engine.models.* Pydantic models (AuditResult, ScanResult, ...)
8
+ - consent_engine.llm.client LiteLLM-wrapped chat surface (agentic layer)
9
+ """
10
+
11
+ __version__ = "0.1.0"
consent_engine/api.py ADDED
@@ -0,0 +1,83 @@
1
+ """Stripped FastAPI surface for the public consent-engine.
2
+
3
+ Single endpoint: POST /audit
4
+ - Accepts { "url": "https://example.com" }
5
+ - Returns the audit_result.json contents inline + a link to the report
6
+ bundle on disk (relative path).
7
+
8
+ For the full async / job-queue flow the private business app uses, fork this
9
+ file. This public version is deliberately small and synchronous so it's easy
10
+ to read.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ from pathlib import Path
17
+
18
+ from fastapi import FastAPI, HTTPException
19
+ from pydantic import BaseModel, HttpUrl
20
+
21
+ from consent_engine import __version__
22
+
23
+ app = FastAPI(
24
+ title="consent-engine",
25
+ version=__version__,
26
+ description="Forensic consent compliance audit engine.",
27
+ )
28
+
29
+
30
+ class AuditRequest(BaseModel):
31
+ url: HttpUrl
32
+
33
+
34
+ @app.get("/healthz")
35
+ def healthz() -> dict[str, str]:
36
+ return {"status": "ok", "version": __version__}
37
+
38
+
39
+ @app.post("/audit")
40
+ async def audit(req: AuditRequest) -> dict:
41
+ """Run a full audit and return the structured result inline.
42
+
43
+ For long-running jobs swap this for an async job-queue (BackgroundTasks
44
+ or a real queue like Celery/Arq).
45
+ """
46
+ from consent_engine.tools.tool_02_violation_classifier import classify
47
+ from consent_engine.tools.tool_03_browser_scanner import scan_page
48
+ from consent_engine.tools.tool_08_report_generator import generate_report
49
+
50
+ try:
51
+ scan = await scan_page(url=str(req.url))
52
+ except Exception as e: # noqa: BLE001
53
+ raise HTTPException(status_code=502, detail=f"scan failed: {e}") from e
54
+
55
+ audit_result = classify(scan)
56
+
57
+ out_dir = Path("./out") / audit_result.audit_id
58
+ out_dir.mkdir(parents=True, exist_ok=True)
59
+ with (out_dir / "evidence.jsonl").open("w") as f:
60
+ for r in scan.network_requests:
61
+ f.write(json.dumps(r.model_dump(mode="json"), default=str) + "\n")
62
+ report_html, deck_md = generate_report(audit_result)
63
+ (out_dir / "report.html").write_text(report_html)
64
+ (out_dir / "deck.marp.md").write_text(deck_md)
65
+ (out_dir / "audit_result.json").write_text(
66
+ json.dumps(audit_result.model_dump(mode="json"), indent=2, default=str)
67
+ )
68
+
69
+ return {
70
+ "audit_id": audit_result.audit_id,
71
+ "bundle": str(out_dir),
72
+ "result": audit_result.model_dump(mode="json"),
73
+ }
74
+
75
+
76
+ def cli() -> None:
77
+ """`uvicorn` entrypoint for the FastAPI surface."""
78
+ import uvicorn
79
+ uvicorn.run("consent_engine.api:app", host="0.0.0.0", port=8080, reload=False)
80
+
81
+
82
+ if __name__ == "__main__": # pragma: no cover
83
+ cli()
consent_engine/cli.py ADDED
@@ -0,0 +1,133 @@
1
+ """consent-engine CLI.
2
+
3
+ Usage:
4
+ consent-engine audit <url> [--output-dir DIR] [--gtm-json PATH] [--har PATH]
5
+ consent-engine chat <audit_id>
6
+ consent-engine version
7
+
8
+ The `audit` command writes a full audit bundle (report.html, audit_result.json,
9
+ evidence.jsonl, deck.marp.md) to ./out/<audit_id>/.
10
+
11
+ The `chat` command opens a per-audit Claude conversation grounded in the
12
+ captured evidence + audit result + wiki context cited by the audit. Closing
13
+ the loop on Fred Pike's glass-box principle.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import argparse
19
+ import asyncio
20
+ import json
21
+ import sys
22
+ from pathlib import Path
23
+
24
+ from consent_engine import __version__
25
+
26
+
27
+ def _audit_command(args: argparse.Namespace) -> int:
28
+ """Run an audit against a URL. Writes the bundle to out/<audit_id>/."""
29
+ # Lazy imports so `--help` doesn't trigger Playwright load.
30
+ from consent_engine.tools.tool_02_violation_classifier import classify
31
+ from consent_engine.tools.tool_03_browser_scanner import scan_page
32
+ from consent_engine.tools.tool_08_report_generator import generate_report
33
+
34
+ url = args.url
35
+ out_dir = Path(args.output_dir or "./out")
36
+ out_dir.mkdir(parents=True, exist_ok=True)
37
+
38
+ print(f"[1/4] Scanning {url} …", flush=True)
39
+ scan_result = asyncio.run(scan_page(url=url))
40
+
41
+ print("[2/4] Classifying violations …", flush=True)
42
+ audit_result = classify(scan_result)
43
+
44
+ audit_dir = out_dir / audit_result.audit_id
45
+ audit_dir.mkdir(parents=True, exist_ok=True)
46
+
47
+ # Persist the network evidence per Fred Pike's "glass box" pattern —
48
+ # every captured request goes to evidence.jsonl, audit-scoped.
49
+ print("[3/4] Writing evidence log …", flush=True)
50
+ with (audit_dir / "evidence.jsonl").open("w") as f:
51
+ for req in scan_result.network_requests:
52
+ f.write(json.dumps(req.model_dump(mode="json"), default=str) + "\n")
53
+
54
+ print("[4/4] Generating report + deck …", flush=True)
55
+ report_html, deck_md = generate_report(audit_result)
56
+ (audit_dir / "report.html").write_text(report_html)
57
+ (audit_dir / "deck.marp.md").write_text(deck_md)
58
+ (audit_dir / "audit_result.json").write_text(
59
+ json.dumps(audit_result.model_dump(mode="json"), indent=2, default=str)
60
+ )
61
+
62
+ print()
63
+ print(f"Audit complete: {audit_dir}")
64
+ print(f" Report: {audit_dir / 'report.html'}")
65
+ print(f" Deck: {audit_dir / 'deck.marp.md'}")
66
+ print(f" Evidence: {audit_dir / 'evidence.jsonl'}")
67
+ print(f" Findings: {len(audit_result.violations)} violation(s), "
68
+ f"{len(audit_result.warnings)} warning(s)")
69
+ return 0
70
+
71
+
72
+ def _chat_command(args: argparse.Namespace) -> int:
73
+ """Open a Claude conversation grounded in a completed audit."""
74
+ audit_dir = Path("./out") / args.audit_id
75
+ if not audit_dir.exists():
76
+ print(f"error: no audit bundle at {audit_dir}", file=sys.stderr)
77
+ return 1
78
+
79
+ try:
80
+ from consent_engine.llm.client import chat_with_context
81
+ except ImportError:
82
+ print("error: chat requires `pip install consent-engine[chat]`", file=sys.stderr)
83
+ return 1
84
+
85
+ audit = json.loads((audit_dir / "audit_result.json").read_text())
86
+ evidence_lines = (audit_dir / "evidence.jsonl").read_text().splitlines()
87
+
88
+ print(f"Loaded audit {args.audit_id}. {len(evidence_lines)} network "
89
+ f"events captured. Type 'exit' to quit.\n")
90
+
91
+ while True:
92
+ try:
93
+ question = input("you> ").strip()
94
+ except (EOFError, KeyboardInterrupt):
95
+ print()
96
+ return 0
97
+ if not question or question.lower() in {"exit", "quit"}:
98
+ return 0
99
+ answer = chat_with_context(
100
+ question=question,
101
+ audit_result=audit,
102
+ evidence=evidence_lines,
103
+ )
104
+ print(f"claude> {answer}\n")
105
+
106
+
107
+ def main(argv: list[str] | None = None) -> int:
108
+ parser = argparse.ArgumentParser(
109
+ prog="consent-engine",
110
+ description="Forensic consent compliance audit engine.",
111
+ )
112
+ sub = parser.add_subparsers(dest="command", required=True)
113
+
114
+ p_audit = sub.add_parser("audit", help="Run an audit against a URL.")
115
+ p_audit.add_argument("url", help="The URL to audit.")
116
+ p_audit.add_argument("--output-dir", help="Output directory (default: ./out).")
117
+ p_audit.add_argument("--gtm-json", help="Optional GTM container JSON export.")
118
+ p_audit.add_argument("--har", help="Optional HAR file.")
119
+ p_audit.set_defaults(func=_audit_command)
120
+
121
+ p_chat = sub.add_parser("chat", help="Chat over a completed audit.")
122
+ p_chat.add_argument("audit_id", help="Audit ID (the directory name under ./out/).")
123
+ p_chat.set_defaults(func=_chat_command)
124
+
125
+ p_ver = sub.add_parser("version", help="Print version + exit.")
126
+ p_ver.set_defaults(func=lambda _: (print(f"consent-engine {__version__}"), 0)[1])
127
+
128
+ args = parser.parse_args(argv)
129
+ return args.func(args)
130
+
131
+
132
+ if __name__ == "__main__":
133
+ sys.exit(main())
@@ -0,0 +1,37 @@
1
+ from functools import lru_cache
2
+
3
+ from pydantic import field_validator
4
+ from pydantic_settings import BaseSettings, SettingsConfigDict
5
+
6
+
7
+ class Settings(BaseSettings):
8
+ model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore")
9
+
10
+ # LLM
11
+ anthropic_api_key: str | None = None
12
+ default_audit_model: str = "vertex_ai/gemini-2.5-pro"
13
+ default_classify_model: str = "vertex_ai/gemini-2.5-flash"
14
+
15
+ # Gemini / Vertex AI
16
+ gemini_api_key: str | None = None
17
+ vertex_project: str | None = None # GCP project ID for Vertex AI
18
+ vertex_location: str = "us-central1"
19
+
20
+ # App
21
+ environment: str = "development"
22
+ log_level: str = "INFO"
23
+
24
+ # Playwright proxy (optional — empty string treated as None)
25
+ playwright_proxy_url: str | None = None
26
+
27
+ @field_validator("playwright_proxy_url", mode="before")
28
+ @classmethod
29
+ def empty_str_to_none(cls, v: object) -> object:
30
+ if isinstance(v, str) and (not v.strip() or v.strip() == "placeholder"):
31
+ return None
32
+ return v
33
+
34
+
35
+ @lru_cache(maxsize=1)
36
+ def get_settings() -> Settings:
37
+ return Settings() # type: ignore[call-arg,unused-ignore]
File without changes
@@ -0,0 +1,50 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from typing import Any
5
+
6
+ import litellm
7
+
8
+
9
+ def _propagate_api_keys() -> None:
10
+ """Propagate API keys from pydantic-settings into os.environ for LiteLLM.
11
+
12
+ pydantic-settings reads .env into a Python object but does NOT set OS env
13
+ vars. LiteLLM reads provider keys directly from os.environ, so we bridge
14
+ the gap here without overwriting keys that were already set at process start.
15
+ """
16
+ try:
17
+ from consent_engine.config import get_settings # local import to avoid circular
18
+
19
+ settings = get_settings()
20
+ pairs = [
21
+ ("GEMINI_API_KEY", settings.gemini_api_key),
22
+ ("ANTHROPIC_API_KEY", settings.anthropic_api_key),
23
+ ]
24
+ for env_var, value in pairs:
25
+ if value and not os.environ.get(env_var):
26
+ os.environ[env_var] = value
27
+ except Exception: # noqa: BLE001
28
+ pass
29
+
30
+
31
+ class LLMClient:
32
+ """Thin LiteLLM wrapper. Swap `model` string to change LLM providers."""
33
+
34
+ def __init__(self, model: str) -> None:
35
+ self.model = model
36
+ _propagate_api_keys()
37
+
38
+ async def complete(
39
+ self,
40
+ messages: list[dict[str, Any]],
41
+ tools: list[dict[str, Any]] | None = None,
42
+ system: str | None = None,
43
+ ) -> dict[str, Any]:
44
+ kwargs: dict[str, Any] = {"model": self.model, "messages": messages}
45
+ if tools:
46
+ kwargs["tools"] = tools
47
+ if system:
48
+ kwargs["messages"] = [{"role": "system", "content": system}] + messages
49
+ response = await litellm.acompletion(**kwargs)
50
+ return dict(response)
@@ -0,0 +1,185 @@
1
+ """MCP server wrapper for consent-engine.
2
+
3
+ Exposes the audit pipeline as Model Context Protocol tools so Claude Desktop
4
+ (and any other MCP host) can run an audit, read the result, and query the
5
+ captured evidence from a conversation.
6
+
7
+ Run standalone:
8
+ uvx consent-engine-mcp
9
+
10
+ Register in Claude Desktop config:
11
+ {
12
+ "mcpServers": {
13
+ "consent-engine": {
14
+ "command": "uvx",
15
+ "args": ["consent-engine-mcp"]
16
+ }
17
+ }
18
+ }
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import asyncio
24
+ import json
25
+ from pathlib import Path
26
+ from typing import Any
27
+
28
+ # `mcp` is an optional dependency. If the user installed
29
+ # `pip install consent-engine[mcp]` we get it; otherwise we surface a clear
30
+ # error rather than failing on import.
31
+ try:
32
+ from mcp.server import Server
33
+ from mcp.server.stdio import stdio_server
34
+ from mcp.types import TextContent, Tool
35
+ except ImportError as e: # pragma: no cover
36
+ raise SystemExit(
37
+ "MCP support requires the optional [mcp] extra:\n"
38
+ " pip install 'consent-engine[mcp]'\n"
39
+ ) from e
40
+
41
+
42
+ server: Server = Server("consent-engine")
43
+
44
+
45
+ @server.list_tools()
46
+ async def list_tools() -> list[Tool]:
47
+ return [
48
+ Tool(
49
+ name="audit_url",
50
+ description=(
51
+ "Run a forensic consent-compliance audit against a URL. "
52
+ "Returns the audit_id, a one-paragraph executive summary, "
53
+ "and a violations count. Use read_audit_result / "
54
+ "query_evidence to drill into specifics."
55
+ ),
56
+ inputSchema={
57
+ "type": "object",
58
+ "properties": {"url": {"type": "string"}},
59
+ "required": ["url"],
60
+ },
61
+ ),
62
+ Tool(
63
+ name="read_audit_result",
64
+ description=(
65
+ "Load the structured audit_result.json for a prior audit. "
66
+ "Returns the full Pydantic model as JSON."
67
+ ),
68
+ inputSchema={
69
+ "type": "object",
70
+ "properties": {"audit_id": {"type": "string"}},
71
+ "required": ["audit_id"],
72
+ },
73
+ ),
74
+ Tool(
75
+ name="query_evidence",
76
+ description=(
77
+ "Filter the captured network evidence for a prior audit. "
78
+ "Use this when the user asks 'why did X fire' or 'what was "
79
+ "happening at time T'. Filter by url substring, "
80
+ "host substring, or time window."
81
+ ),
82
+ inputSchema={
83
+ "type": "object",
84
+ "properties": {
85
+ "audit_id": {"type": "string"},
86
+ "url_contains": {"type": "string"},
87
+ "host_contains": {"type": "string"},
88
+ "max_results": {"type": "integer", "default": 20},
89
+ },
90
+ "required": ["audit_id"],
91
+ },
92
+ ),
93
+ ]
94
+
95
+
96
+ @server.call_tool()
97
+ async def call_tool(name: str, arguments: dict[str, Any]) -> list[TextContent]:
98
+ if name == "audit_url":
99
+ return await _audit_url(arguments["url"])
100
+ if name == "read_audit_result":
101
+ return _read_audit_result(arguments["audit_id"])
102
+ if name == "query_evidence":
103
+ return _query_evidence(arguments)
104
+ raise ValueError(f"Unknown tool: {name}")
105
+
106
+
107
+ async def _audit_url(url: str) -> list[TextContent]:
108
+ # Lazy import — avoids pulling Playwright at MCP server start
109
+ from consent_engine.tools.tool_02_violation_classifier import classify
110
+ from consent_engine.tools.tool_03_browser_scanner import scan_page
111
+ from consent_engine.tools.tool_08_report_generator import generate_executive_summary
112
+
113
+ scan = await scan_page(url=url)
114
+ audit = classify(scan)
115
+ audit_dir = Path("./out") / audit.audit_id
116
+ audit_dir.mkdir(parents=True, exist_ok=True)
117
+ (audit_dir / "audit_result.json").write_text(
118
+ json.dumps(audit.model_dump(mode="json"), indent=2, default=str)
119
+ )
120
+ with (audit_dir / "evidence.jsonl").open("w") as f:
121
+ for req in scan.network_requests:
122
+ f.write(json.dumps(req.model_dump(mode="json"), default=str) + "\n")
123
+ summary = generate_executive_summary(audit)
124
+ return [TextContent(
125
+ type="text",
126
+ text=(
127
+ f"Audit complete: {audit.audit_id}\n"
128
+ f" URL: {url}\n"
129
+ f" Violations: {len(audit.violations)}\n"
130
+ f" Warnings: {len(audit.warnings)}\n\n"
131
+ f"Summary:\n{summary}"
132
+ ),
133
+ )]
134
+
135
+
136
+ def _read_audit_result(audit_id: str) -> list[TextContent]:
137
+ path = Path("./out") / audit_id / "audit_result.json"
138
+ if not path.exists():
139
+ return [TextContent(type="text", text=f"No audit bundle at {path}")]
140
+ return [TextContent(type="text", text=path.read_text())]
141
+
142
+
143
+ def _query_evidence(args: dict[str, Any]) -> list[TextContent]:
144
+ audit_id = args["audit_id"]
145
+ max_results = args.get("max_results", 20)
146
+ url_contains = (args.get("url_contains") or "").lower()
147
+ host_contains = (args.get("host_contains") or "").lower()
148
+
149
+ path = Path("./out") / audit_id / "evidence.jsonl"
150
+ if not path.exists():
151
+ return [TextContent(type="text", text=f"No evidence at {path}")]
152
+
153
+ matches: list[dict[str, Any]] = []
154
+ for line in path.read_text().splitlines():
155
+ try:
156
+ evt = json.loads(line)
157
+ except json.JSONDecodeError:
158
+ continue
159
+ u = (evt.get("url") or "").lower()
160
+ if url_contains and url_contains not in u:
161
+ continue
162
+ if host_contains and host_contains not in u:
163
+ continue
164
+ matches.append(evt)
165
+ if len(matches) >= max_results:
166
+ break
167
+
168
+ return [TextContent(
169
+ type="text",
170
+ text=f"{len(matches)} match(es):\n" + json.dumps(matches, indent=2, default=str),
171
+ )]
172
+
173
+
174
+ def cli() -> None:
175
+ """Entrypoint registered as `consent-engine-mcp` in pyproject.toml."""
176
+ asyncio.run(_run())
177
+
178
+
179
+ async def _run() -> None:
180
+ async with stdio_server() as (read_stream, write_stream):
181
+ await server.run(read_stream, write_stream, server.create_initialization_options())
182
+
183
+
184
+ if __name__ == "__main__": # pragma: no cover
185
+ cli()
File without changes
@@ -0,0 +1,23 @@
1
+ from enum import StrEnum
2
+ from typing import Literal
3
+
4
+ from pydantic import BaseModel
5
+
6
+
7
+ class CMPProvider(StrEnum):
8
+ ONETRUST = "onetrust"
9
+
10
+
11
+ class ConsentState(StrEnum):
12
+ OPTED_IN = "opted_in"
13
+ OPTED_OUT = "opted_out"
14
+ GPC_OPTED_OUT = "gpc_opted_out"
15
+
16
+
17
+ class AuditRequest(BaseModel):
18
+ url: str
19
+ cmp_provider: Literal[CMPProvider.ONETRUST] = CMPProvider.ONETRUST
20
+ consent_state: ConsentState = ConsentState.OPTED_OUT
21
+ gtm_container_json: str | None = None
22
+ onetrust_receipt_jwt: str | None = None
23
+ har_file_path: str | None = None
@@ -0,0 +1,152 @@
1
+ from datetime import datetime
2
+ from enum import StrEnum
3
+ from typing import Literal
4
+
5
+ from pydantic import BaseModel
6
+
7
+ from .vendor import Vendor
8
+
9
+
10
+ class GTMExtractionMethod(StrEnum):
11
+ LIVE = "live" # Intercepted from gtm.js during scan — strongest evidence
12
+ PROVIDED = "provided" # User-supplied JSON export
13
+ NONE = "none" # No container data available
14
+
15
+
16
+ class ViolationStatus(StrEnum):
17
+ CONFIRMED = "confirmed_violation"
18
+ LIKELY = "likely_violation"
19
+ REQUIRES_INVESTIGATION = "requires_further_investigation"
20
+ NO_EVIDENCE = "no_evidence_of_violation"
21
+ ACM_COMPLIANT = "acm_cookieless_ping" # Google tag firing cookieless (correct ACM behaviour)
22
+
23
+
24
+ class MethodologyFlag(StrEnum):
25
+ S1 = "s1_baseline"
26
+ S2 = "s2_post_optout_no_reload"
27
+ S3 = "s3_fresh_load_optout_preset"
28
+ # S3 run completed, but consent injection could not be verified against
29
+ # a denied post-injection Consent Mode signal (e.g. unknown CMP, or
30
+ # injection silently did not suppress tracking). Treat as non-definitive.
31
+ INCONCLUSIVE_UNKNOWN_CMP = "s3_inconclusive_unknown_cmp"
32
+ # S3 run completed with a recognised CMP and a matching injection plan,
33
+ # but Google Consent Mode beacons continued firing with GCS=G111
34
+ # throughout the scan. This is definitive evidence that the site's tag
35
+ # wiring fires before or regardless of the consent state the CMP stores
36
+ # — the CMP is working; the integration is broken. Findings are legally
37
+ # defensible.
38
+ S3_CONSENT_WIRING_BROKEN = "s3_consent_wiring_broken"
39
+
40
+
41
+ class GCSValue(BaseModel):
42
+ raw: str
43
+ ad_storage: str
44
+ analytics_storage: str
45
+
46
+
47
+ class TagConsentEntry(BaseModel):
48
+ """Per-tag GTM consent configuration extracted by Tool 1."""
49
+
50
+ tag_id: int
51
+ tag_name: str
52
+ tag_type: str # GTM function code, e.g. "__html", "__ua", "__ga4"
53
+ is_google_tag: bool
54
+ consent_types: list[str] = [] # e.g. ["ad_storage", "analytics_storage"]
55
+ requirement: Literal[
56
+ "required", # explicit consent settings present, enforced
57
+ "optional", # explicit consent settings, default_value=1
58
+ "acm_managed", # Google tag — ACM handles consent via cookieless ping
59
+ "missing", # NON-Google tag with no consent settings — VIOLATION
60
+ ]
61
+
62
+
63
+ class GCSHit(BaseModel):
64
+ """A single GCS signal observation from HAR analysis (Tool 4)."""
65
+
66
+ url: str
67
+ gcs_value: GCSValue
68
+ gcd_raw: str | None = None
69
+ timestamp_ms: float # milliseconds from first HAR entry
70
+
71
+
72
+ class HarAnalysis(BaseModel):
73
+ """Output of Tool 4 HAR file analysis."""
74
+
75
+ gcs_timeline: list[GCSHit] = []
76
+ post_payloads: list[str] = [] # raw POST bodies (beacons, dataLayer pushes)
77
+ consent_api_responses: list[str] = [] # response bodies from consent endpoints
78
+
79
+
80
+ class PixelFiring(BaseModel):
81
+ """A tracking pixel endpoint observed firing in network traffic post-consent-denial.
82
+
83
+ This is the primary evidence method used by plaintiff attorneys — detecting
84
+ known ad/analytics pixel endpoints in HAR/network traffic regardless of cookies.
85
+ """
86
+
87
+ vendor_name: str # e.g. "Meta Pixel", "TikTok Pixel", "LinkedIn Insight Tag"
88
+ url: str # full request URL observed
89
+ category: str # "advertising" | "analytics" | "session_recording"
90
+ legal_exposure: str # "high" | "medium"
91
+ matched_pattern: str # the pattern that triggered this match
92
+ is_acm_ping: bool = (
93
+ False # True = Google ACM cookieless ping (G100+npa=1) — expected behavior, not a violation
94
+ )
95
+
96
+
97
+ class VendorFinding(BaseModel):
98
+ vendor: Vendor
99
+ status: ViolationStatus
100
+ methodology: MethodologyFlag
101
+ cookies_observed: list[str] = []
102
+ gcs_value: GCSValue | None = None
103
+ gpc_honored: bool | None = None
104
+ evidence: list[str] = []
105
+ notes: str = ""
106
+
107
+
108
+ class AuditResult(BaseModel):
109
+ audit_id: str
110
+ url: str
111
+ timestamp: datetime
112
+ methodology: MethodologyFlag
113
+ gtm_extraction_method: GTMExtractionMethod = GTMExtractionMethod.NONE
114
+ gtm_container_id: str | None = None # e.g. "GTM-XXXXXX"
115
+ ssgtm_detected: bool = False
116
+ ssgtm_domain: str | None = None
117
+ gpc_tested: bool = False
118
+ # GPC signal test — populated when a dedicated GPC scan is run alongside
119
+ # the primary S3 scan. Lets the report show a clear pass/fail on whether
120
+ # the site respected the Global Privacy Control opt-out signal.
121
+ gpc_header_sent: bool = False # Sec-GPC: 1 HTTP header on all requests
122
+ gpc_navigator_api_set: bool = False # navigator.globalPrivacyControl = true injected
123
+ gpc_signal_respected: bool | None = None # True = pixel count dropped; None = not tested
124
+ gpc_vendors_after_signal: int = 0 # vendors still firing after GPC asserted
125
+ gpc_pixel_count_baseline: int = 0 # pixel firings during primary S3 opt-out scan
126
+ gpc_pixel_count_with_gpc: int = 0 # pixel firings during GPC scan
127
+ # Scan-level consent signals from primary scan network traffic
128
+ gcs_value: GCSValue | None = None
129
+ gcd_raw: str | None = None
130
+ cmp_interaction_method: str | None = (
131
+ None # "cookie_injection" | "banner_click" | "banner_click_inconclusive" | "banner_click_failed" | "banner_click_reverted"
132
+ )
133
+ detected_cmp: str | None = None
134
+ cmp_detection_confidence: str | None = None
135
+ bot_detection_encountered: bool = False
136
+ scan_mode_used: Literal["playwright", "stealthy"] = "playwright"
137
+ # Records whether the primary Chromium scan succeeded ("playwright") or the
138
+ # Scrapling/Camoufox stealthy fallback had to be engaged ("stealthy") — set
139
+ # when the primary scan hit a WAF/bot challenge.
140
+ detected_jurisdiction: str | None = (
141
+ None # "EU" | "US" | "CA"; str (not Literal) to allow extension without schema migration
142
+ )
143
+ tag_consent_map: list[TagConsentEntry] = []
144
+ gcs_timeline: list[GCSHit] = []
145
+ post_payloads: list[str] = []
146
+ consent_api_responses: list[str] = []
147
+ findings: list[VendorFinding] = []
148
+ pixel_firings: list[
149
+ PixelFiring
150
+ ] = [] # Network-level pixel endpoint detections (plaintiff evidence)
151
+ open_gaps: list[str] = []
152
+ remediation: list[str] = []