PyPI - clausekeeper-mcp - Versions diffs - 0.1.0__tar.gz - Mend

clausekeeper-mcp 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

clausekeeper_mcp-0.1.0/PKG-INFO +122 -0
clausekeeper_mcp-0.1.0/README.md +112 -0
clausekeeper_mcp-0.1.0/clausekeeper_core/__init__.py +1 -0
clausekeeper_mcp-0.1.0/clausekeeper_core/clause_rules.py +173 -0
clausekeeper_mcp-0.1.0/clausekeeper_core/scanner.py +132 -0
clausekeeper_mcp-0.1.0/clausekeeper_mcp.egg-info/PKG-INFO +122 -0
clausekeeper_mcp-0.1.0/clausekeeper_mcp.egg-info/SOURCES.txt +12 -0
clausekeeper_mcp-0.1.0/clausekeeper_mcp.egg-info/dependency_links.txt +1 -0
clausekeeper_mcp-0.1.0/clausekeeper_mcp.egg-info/entry_points.txt +2 -0
clausekeeper_mcp-0.1.0/clausekeeper_mcp.egg-info/requires.txt +1 -0
clausekeeper_mcp-0.1.0/clausekeeper_mcp.egg-info/top_level.txt +2 -0
clausekeeper_mcp-0.1.0/pyproject.toml +22 -0
clausekeeper_mcp-0.1.0/server.py +115 -0
clausekeeper_mcp-0.1.0/setup.cfg +4 -0

clausekeeper_mcp-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,122 @@
+Metadata-Version: 2.4
+Name: clausekeeper-mcp
+Version: 0.1.0
+Summary: MCP server exposing ClauseKeeper's deterministic compliance scanner to agents.
+Author: ClauseKeeper contributors
+License: AGPL-3.0-or-later
+Requires-Python: >=3.11
+Description-Content-Type: text/markdown
+Requires-Dist: mcp>=1.0.0
+# ClauseKeeper MCP Server
+`clausekeeper-mcp` exposes [ClauseKeeper](../README.md)'s deterministic compliance scanner as Model Context Protocol (MCP) tools for Claude Desktop, Cursor, and other MCP hosts.
+It is a thin wrapper around a vendored copy of the real ClauseKeeper scanner in `clausekeeper_core/scanner.py` and rule library in `clausekeeper_core/clause_rules.py`:
+- no LLM calls
+- no API keys
+- no paid services
+- stdio transport via the official Python `mcp` SDK
+## Tools
+- `scan_policy_text(text: str)` — scans pasted policy/legal text with the vendored ClauseKeeper scanner and returns the score, grade, counts, categories, and missing/stale clause checklist.
+- `scan_policy_url(url: str)` — fetches a URL, strips HTML with ClauseKeeper's `html_to_text`, then scans the visible text.
+- `list_clause_rules()` — returns the vendored clause rules and categories.
+## Run locally
+From the main ClauseKeeper repository:
+```bash
+cd mcp
+uv run clausekeeper-mcp
+```
+`uv run` creates/uses the local project environment and installs the `mcp` SDK dependency automatically. Because this is a stdio MCP server, `uv run clausekeeper-mcp` waits for an MCP client on stdin/stdout.
+You can also install it into an explicit virtual environment:
+```bash
+cd mcp
+uv venv
+uv pip install --python .venv/bin/python -e .
+.venv/bin/clausekeeper-mcp
+```
+Or run it directly:
+```bash
+cd /path/to/clausekeeper/mcp
+uv run python server.py
+```
+## MCP host configuration
+Use an absolute path to this `mcp` directory.
+### Claude Desktop
+Add this to your Claude Desktop MCP configuration file:
+```json
+{
+  "mcpServers": {
+    "clausekeeper": {
+      "command": "uv",
+      "args": [
+        "--directory",
+        "/absolute/path/to/clausekeeper/mcp",
+        "run",
+        "clausekeeper-mcp"
+      ]
+    }
+  }
+}
+```
+### Cursor or other MCP hosts
+Use the same server definition in your MCP settings:
+```json
+{
+  "mcpServers": {
+    "clausekeeper": {
+      "command": "uv",
+      "args": [
+        "--directory",
+        "/absolute/path/to/clausekeeper/mcp",
+        "run",
+        "clausekeeper-mcp"
+      ]
+    }
+  }
+}
+```
+If your host does not support `uv --directory`, use the installed console script from a virtual environment instead:
+```json
+{
+  "mcpServers": {
+    "clausekeeper": {
+      "command": "/absolute/path/to/clausekeeper/mcp/.venv/bin/clausekeeper-mcp",
+      "args": []
+    }
+  }
+}
+```
+## Keeping the vendored scanner in sync
+The `clausekeeper_core/` package is vendored from the main app's `app/scanner.py` and `app/clause_rules.py` so the MCP package is self-contained for PyPI/registry installs. When the scanner or clause rules change in `app/`, update the matching vendored files here and re-run the MCP self-containment and parity checks.
+## Publishing metadata
+No `server.json` is included here because the current OSS MCP Community Registry / GitHub MCP Registry metadata format should be confirmed against the registry documentation at publication time. When publishing, add the registry-required metadata file with the package name, description, license, repository URL, runtime command, and tool list.
+## Main project
+See the main ClauseKeeper project at [../README.md](../README.md).

clausekeeper_mcp-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,112 @@
+# ClauseKeeper MCP Server
+`clausekeeper-mcp` exposes [ClauseKeeper](../README.md)'s deterministic compliance scanner as Model Context Protocol (MCP) tools for Claude Desktop, Cursor, and other MCP hosts.
+It is a thin wrapper around a vendored copy of the real ClauseKeeper scanner in `clausekeeper_core/scanner.py` and rule library in `clausekeeper_core/clause_rules.py`:
+- no LLM calls
+- no API keys
+- no paid services
+- stdio transport via the official Python `mcp` SDK
+## Tools
+- `scan_policy_text(text: str)` — scans pasted policy/legal text with the vendored ClauseKeeper scanner and returns the score, grade, counts, categories, and missing/stale clause checklist.
+- `scan_policy_url(url: str)` — fetches a URL, strips HTML with ClauseKeeper's `html_to_text`, then scans the visible text.
+- `list_clause_rules()` — returns the vendored clause rules and categories.
+## Run locally
+From the main ClauseKeeper repository:
+```bash
+cd mcp
+uv run clausekeeper-mcp
+```
+`uv run` creates/uses the local project environment and installs the `mcp` SDK dependency automatically. Because this is a stdio MCP server, `uv run clausekeeper-mcp` waits for an MCP client on stdin/stdout.
+You can also install it into an explicit virtual environment:
+```bash
+cd mcp
+uv venv
+uv pip install --python .venv/bin/python -e .
+.venv/bin/clausekeeper-mcp
+```
+Or run it directly:
+```bash
+cd /path/to/clausekeeper/mcp
+uv run python server.py
+```
+## MCP host configuration
+Use an absolute path to this `mcp` directory.
+### Claude Desktop
+Add this to your Claude Desktop MCP configuration file:
+```json
+{
+  "mcpServers": {
+    "clausekeeper": {
+      "command": "uv",
+      "args": [
+        "--directory",
+        "/absolute/path/to/clausekeeper/mcp",
+        "run",
+        "clausekeeper-mcp"
+      ]
+    }
+  }
+}
+```
+### Cursor or other MCP hosts
+Use the same server definition in your MCP settings:
+```json
+{
+  "mcpServers": {
+    "clausekeeper": {
+      "command": "uv",
+      "args": [
+        "--directory",
+        "/absolute/path/to/clausekeeper/mcp",
+        "run",
+        "clausekeeper-mcp"
+      ]
+    }
+  }
+}
+```
+If your host does not support `uv --directory`, use the installed console script from a virtual environment instead:
+```json
+{
+  "mcpServers": {
+    "clausekeeper": {
+      "command": "/absolute/path/to/clausekeeper/mcp/.venv/bin/clausekeeper-mcp",
+      "args": []
+    }
+  }
+}
+```
+## Keeping the vendored scanner in sync
+The `clausekeeper_core/` package is vendored from the main app's `app/scanner.py` and `app/clause_rules.py` so the MCP package is self-contained for PyPI/registry installs. When the scanner or clause rules change in `app/`, update the matching vendored files here and re-run the MCP self-containment and parity checks.
+## Publishing metadata
+No `server.json` is included here because the current OSS MCP Community Registry / GitHub MCP Registry metadata format should be confirmed against the registry documentation at publication time. When publishing, add the registry-required metadata file with the package name, description, license, repository URL, runtime command, and tool list.
+## Main project
+See the main ClauseKeeper project at [../README.md](../README.md).

clausekeeper_mcp-0.1.0/clausekeeper_core/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Vendored ClauseKeeper scanner core for the standalone MCP package."""

clausekeeper_mcp-0.1.0/clausekeeper_core/clause_rules.py ADDED Viewed

@@ -0,0 +1,173 @@
+# Vendored from app/clause_rules.py — keep in sync. Source of truth is the main ClauseKeeper app.
+"""
+ClauseKeeper clause-rules configuration.
+This is the heart of the FREE compliance scanner. Each rule is a self-contained,
+$0-to-run heuristic: it looks for keyword/phrase signals in a policy document and
+decides whether the clause is PRESENT, MISSING, or STALE (present but using
+out-of-date language for 2026).
+Rule-based on purpose: no paid LLM dependency, deterministic, auditable, and cheap.
+To tune the scanner, edit this file only — the scanner engine reads it generically.
+Each rule:
+  key:        stable id
+  label:      human title in the scorecard
+  weight:     points contributed to the /100 score when satisfied
+  signals:    phrases whose presence indicates the clause EXISTS (case-insensitive)
+  stale_signals (optional): phrases that, IF the clause exists, suggest it is
+              up-to-date for 2026. If the clause is present but NONE of these
+              appear, we flag it STALE.
+  why:        short explanation shown to the user
+  fix:        one-line guidance on what to add
+  category:   grouping for the scorecard
+"""
+CLAUSE_RULES = [
+    {
+        "key": "gdpr",
+        "label": "GDPR (EU) data-rights language",
+        "category": "Privacy regimes",
+        "weight": 12,
+        "signals": [
+            "gdpr", "general data protection regulation", "lawful basis",
+            "right to erasure", "right to be forgotten", "data subject",
+            "supervisory authority",
+        ],
+        "stale_signals": ["lawful basis", "data subject rights", "right to erasure"],
+        "why": "EU/EEA users trigger GDPR. You must state lawful basis, data-subject rights, and how to exercise them.",
+        "fix": "Add a GDPR section covering lawful basis, the 8 data-subject rights, and EU representative/contact.",
+    },
+    {
+        "key": "ccpa",
+        "label": "CCPA/CPRA (California) rights",
+        "category": "Privacy regimes",
+        "weight": 12,
+        "signals": [
+            "ccpa", "cpra", "california consumer privacy", "do not sell",
+            "do not share", "sale of personal information", "california residents",
+            "right to opt out",
+        ],
+        "stale_signals": ["cpra", "do not sell or share", "sensitive personal information", "right to opt out"],
+        "why": "California's CPRA (effective successor to CCPA) adds 'do not sell OR share' and sensitive-data rights.",
+        "fix": "Add a California section with 'Do Not Sell or Share My Personal Information' and sensitive-PI opt-out.",
+    },
+    {
+        "key": "cookie_consent",
+        "label": "Cookie consent / tracking disclosure",
+        "category": "Tracking & cookies",
+        "weight": 10,
+        "signals": [
+            "cookie", "cookies", "tracking technolog", "pixel", "local storage",
+            "web beacon", "consent banner",
+        ],
+        "stale_signals": ["consent", "manage cookie", "reject all", "cookie preferences"],
+        "why": "You must disclose cookies/trackers and, for EU users, obtain prior consent (not just notice).",
+        "fix": "Add a cookie section listing categories and a consent mechanism with a 'Reject all' option.",
+    },
+    {
+        "key": "data_retention",
+        "label": "Data-retention policy",
+        "category": "Data handling",
+        "weight": 9,
+        "signals": [
+            "retention", "retain your", "how long we keep", "retention period",
+            "delete your data", "data is kept",
+        ],
+        "why": "Modern privacy laws require stating how long you keep personal data and the deletion criteria.",
+        "fix": "Add a retention section: state periods (or criteria) and what happens to data after.",
+    },
+    {
+        "key": "ai_disclosure",
+        "label": "AI-use disclosure (2026 wedge)",
+        "category": "AI & automation (2026)",
+        "weight": 14,
+        "signals": [
+            "artificial intelligence", "automated decision", "machine learning",
+            "ai model", "ai-powered", "ai features", "generative ai",
+            "profiling", "algorithm",
+        ],
+        "stale_signals": [
+            "eu ai act", "ai act", "automated decision-making", "human review",
+            "training data", "ai disclosure", "generative ai",
+        ],
+        "why": "2026 rules (EU AI Act phased obligations, US state AI laws) require disclosing AI/automated processing and user rights around it.",
+        "fix": "Add an AI-disclosure section: what AI you use, automated decisions, human-review rights, and training-data stance.",
+    },
+    {
+        "key": "automated_decision",
+        "label": "Automated decision-making & profiling rights",
+        "category": "AI & automation (2026)",
+        "weight": 8,
+        "signals": [
+            "automated decision", "profiling", "solely automated",
+            "automated processing", "human intervention", "human review",
+        ],
+        "why": "GDPR Art. 22 + 2026 AI rules give users rights re: decisions made about them by algorithms.",
+        "fix": "State whether you make solely-automated decisions and how users can request human review.",
+    },
+    {
+        "key": "contact_dpo",
+        "label": "Contact / DPO information",
+        "category": "Accountability",
+        "weight": 8,
+        "signals": [
+            "contact us", "data protection officer", "dpo", "privacy@",
+            "email us", "reach us", "@",
+        ],
+        "stale_signals": ["data protection officer", "dpo", "privacy@", "contact"],
+        "why": "Users must be able to reach you to exercise rights; EU often expects a DPO/representative contact.",
+        "fix": "Add a clear contact email (e.g., privacy@yourdomain) and, if applicable, a DPO/EU rep.",
+    },
+    {
+        "key": "coppa_children",
+        "label": "Children's data (COPPA / age limits)",
+        "category": "Special categories",
+        "weight": 7,
+        "signals": [
+            "coppa", "children", "under 13", "under 16", "minor",
+            "parental consent", "age of", "not directed to children",
+        ],
+        "why": "COPPA (US) and GDPR-K require handling of minors' data; even 'we don't serve children' must be stated.",
+        "fix": "Add a children's-data section: minimum age, no knowing collection, and parental-contact path.",
+    },
+    {
+        "key": "data_sharing",
+        "label": "Third-party sharing / sub-processors",
+        "category": "Data handling",
+        "weight": 6,
+        "signals": [
+            "third party", "third-party", "service provider", "sub-processor",
+            "subprocessor", "share your", "disclose your", "partners",
+        ],
+        "why": "You must disclose who you share data with (analytics, payment, hosting, AI vendors).",
+        "fix": "List categories of recipients/sub-processors (e.g., hosting, analytics, payments, AI providers).",
+    },
+    {
+        "key": "intl_transfer",
+        "label": "International data transfers",
+        "category": "Privacy regimes",
+        "weight": 6,
+        "signals": [
+            "international transfer", "transfer your data", "outside the eea",
+            "standard contractual clauses", "scc", "cross-border", "data transfer",
+        ],
+        "why": "Transferring EU data abroad requires a lawful transfer mechanism (e.g., SCCs) and disclosure.",
+        "fix": "Add a transfers section naming your mechanism (SCCs / adequacy) and destination regions.",
+    },
+    {
+        "key": "last_updated",
+        "label": "'Last updated' date / version stamp",
+        "category": "Accountability",
+        "weight": 8,
+        "signals": [
+            "last updated", "effective date", "last revised", "version",
+            "last modified",
+        ],
+        "why": "A visible last-updated date is expected and signals the policy is maintained — core to staying compliant.",
+        "fix": "Add a 'Last updated: <date>' line at the top and bump it whenever you change the policy.",
+    },
+]
+# Maximum achievable raw score (sum of weights) — used to normalize to /100.
+MAX_RAW_SCORE = sum(r["weight"] for r in CLAUSE_RULES)

clausekeeper_mcp-0.1.0/clausekeeper_core/scanner.py ADDED Viewed

@@ -0,0 +1,132 @@
+# Vendored from app/scanner.py — keep in sync. Source of truth is the main ClauseKeeper app.
+"""
+ClauseKeeper compliance scanner engine.
+Pure, rule-based ($0 to run). Takes raw policy text (pasted or fetched from a URL)
+and evaluates it against CLAUSE_RULES, producing a scorecard.
+Status per clause:
+  present       -> signals found AND (no stale_signals OR a stale_signal found)
+  needs_update  -> signals found BUT stale_signals defined and none matched (STALE)
+  missing       -> no signals found
+"""
+import re
+from html.parser import HTMLParser
+from .clause_rules import CLAUSE_RULES, MAX_RAW_SCORE
+class _TextExtractor(HTMLParser):
+    """Strip HTML to visible text. Skips script/style content."""
+    def __init__(self):
+        super().__init__()
+        self._chunks = []
+        self._skip = False
+    def handle_starttag(self, tag, attrs):
+        if tag in ("script", "style", "noscript"):
+            self._skip = True
+    def handle_endtag(self, tag):
+        if tag in ("script", "style", "noscript"):
+            self._skip = False
+    def handle_data(self, data):
+        if not self._skip:
+            self._chunks.append(data)
+    def text(self):
+        return " ".join(self._chunks)
+def html_to_text(html: str) -> str:
+    parser = _TextExtractor()
+    try:
+        parser.feed(html)
+    except Exception:
+        # Fall back to a crude tag-strip if the parser chokes on malformed HTML.
+        return re.sub(r"<[^>]+>", " ", html)
+    return parser.text()
+def _contains_any(haystack: str, needles) -> bool:
+    return any(n in haystack for n in needles)
+def scan_text(raw_text: str) -> dict:
+    """Run all clause rules against raw policy text. Returns a scorecard dict."""
+    # Normalize: lowercase, collapse whitespace. Heuristic signals are lowercase.
+    text = re.sub(r"\s+", " ", (raw_text or "")).lower()
+    word_count = len(text.split())
+    results = []
+    raw_score = 0
+    present_ct = stale_ct = missing_ct = 0
+    for rule in CLAUSE_RULES:
+        has_signal = _contains_any(text, rule["signals"])
+        stale_signals = rule.get("stale_signals")
+        if not has_signal:
+            status = "missing"
+            earned = 0
+            missing_ct += 1
+        elif stale_signals and not _contains_any(text, stale_signals):
+            # Present but using outdated/incomplete language for 2026.
+            status = "needs_update"
+            earned = rule["weight"] // 2  # partial credit
+            stale_ct += 1
+        else:
+            status = "present"
+            earned = rule["weight"]
+            present_ct += 1
+        raw_score += earned
+        results.append({
+            "key": rule["key"],
+            "label": rule["label"],
+            "category": rule["category"],
+            "status": status,
+            "weight": rule["weight"],
+            "earned": earned,
+            "why": rule["why"],
+            "fix": rule["fix"],
+        })
+    score_100 = round((raw_score / MAX_RAW_SCORE) * 100) if MAX_RAW_SCORE else 0
+    # 0-10 headline score for the marketing "X/10" framing.
+    score_10 = round(score_100 / 10)
+    if score_100 >= 85:
+        grade, verdict = "A", "Strong — minor freshness checks only."
+    elif score_100 >= 70:
+        grade, verdict = "B", "Decent, but missing some 2026-critical clauses."
+    elif score_100 >= 50:
+        grade, verdict = "C", "Notable gaps — likely not fully 2026-compliant."
+    elif score_100 >= 30:
+        grade, verdict = "D", "Major gaps. High exposure on privacy/AI rules."
+    else:
+        grade, verdict = "F", "Little to no compliant policy language detected."
+    # Group results by category for display.
+    by_category = {}
+    for r in results:
+        by_category.setdefault(r["category"], []).append(r)
+    return {
+        "score_100": score_100,
+        "score_10": score_10,
+        "grade": grade,
+        "verdict": verdict,
+        "word_count": word_count,
+        "counts": {
+            "present": present_ct,
+            "needs_update": stale_ct,
+            "missing": missing_ct,
+            "total": len(CLAUSE_RULES),
+        },
+        "results": results,
+        "by_category": by_category,
+        "missing_or_stale": [r for r in results if r["status"] != "present"],
+    }

clausekeeper_mcp-0.1.0/clausekeeper_mcp.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,122 @@
+Metadata-Version: 2.4
+Name: clausekeeper-mcp
+Version: 0.1.0
+Summary: MCP server exposing ClauseKeeper's deterministic compliance scanner to agents.
+Author: ClauseKeeper contributors
+License: AGPL-3.0-or-later
+Requires-Python: >=3.11
+Description-Content-Type: text/markdown
+Requires-Dist: mcp>=1.0.0
+# ClauseKeeper MCP Server
+`clausekeeper-mcp` exposes [ClauseKeeper](../README.md)'s deterministic compliance scanner as Model Context Protocol (MCP) tools for Claude Desktop, Cursor, and other MCP hosts.
+It is a thin wrapper around a vendored copy of the real ClauseKeeper scanner in `clausekeeper_core/scanner.py` and rule library in `clausekeeper_core/clause_rules.py`:
+- no LLM calls
+- no API keys
+- no paid services
+- stdio transport via the official Python `mcp` SDK
+## Tools
+- `scan_policy_text(text: str)` — scans pasted policy/legal text with the vendored ClauseKeeper scanner and returns the score, grade, counts, categories, and missing/stale clause checklist.
+- `scan_policy_url(url: str)` — fetches a URL, strips HTML with ClauseKeeper's `html_to_text`, then scans the visible text.
+- `list_clause_rules()` — returns the vendored clause rules and categories.
+## Run locally
+From the main ClauseKeeper repository:
+```bash
+cd mcp
+uv run clausekeeper-mcp
+```
+`uv run` creates/uses the local project environment and installs the `mcp` SDK dependency automatically. Because this is a stdio MCP server, `uv run clausekeeper-mcp` waits for an MCP client on stdin/stdout.
+You can also install it into an explicit virtual environment:
+```bash
+cd mcp
+uv venv
+uv pip install --python .venv/bin/python -e .
+.venv/bin/clausekeeper-mcp
+```
+Or run it directly:
+```bash
+cd /path/to/clausekeeper/mcp
+uv run python server.py
+```
+## MCP host configuration
+Use an absolute path to this `mcp` directory.
+### Claude Desktop
+Add this to your Claude Desktop MCP configuration file:
+```json
+{
+  "mcpServers": {
+    "clausekeeper": {
+      "command": "uv",
+      "args": [
+        "--directory",
+        "/absolute/path/to/clausekeeper/mcp",
+        "run",
+        "clausekeeper-mcp"
+      ]
+    }
+  }
+}
+```
+### Cursor or other MCP hosts
+Use the same server definition in your MCP settings:
+```json
+{
+  "mcpServers": {
+    "clausekeeper": {
+      "command": "uv",
+      "args": [
+        "--directory",
+        "/absolute/path/to/clausekeeper/mcp",
+        "run",
+        "clausekeeper-mcp"
+      ]
+    }
+  }
+}
+```
+If your host does not support `uv --directory`, use the installed console script from a virtual environment instead:
+```json
+{
+  "mcpServers": {
+    "clausekeeper": {
+      "command": "/absolute/path/to/clausekeeper/mcp/.venv/bin/clausekeeper-mcp",
+      "args": []
+    }
+  }
+}
+```
+## Keeping the vendored scanner in sync
+The `clausekeeper_core/` package is vendored from the main app's `app/scanner.py` and `app/clause_rules.py` so the MCP package is self-contained for PyPI/registry installs. When the scanner or clause rules change in `app/`, update the matching vendored files here and re-run the MCP self-containment and parity checks.
+## Publishing metadata
+No `server.json` is included here because the current OSS MCP Community Registry / GitHub MCP Registry metadata format should be confirmed against the registry documentation at publication time. When publishing, add the registry-required metadata file with the package name, description, license, repository URL, runtime command, and tool list.
+## Main project
+See the main ClauseKeeper project at [../README.md](../README.md).

clausekeeper_mcp-0.1.0/clausekeeper_mcp.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,12 @@
+README.md
+pyproject.toml
+server.py
+clausekeeper_core/__init__.py
+clausekeeper_core/clause_rules.py
+clausekeeper_core/scanner.py
+clausekeeper_mcp.egg-info/PKG-INFO
+clausekeeper_mcp.egg-info/SOURCES.txt
+clausekeeper_mcp.egg-info/dependency_links.txt
+clausekeeper_mcp.egg-info/entry_points.txt
+clausekeeper_mcp.egg-info/requires.txt
+clausekeeper_mcp.egg-info/top_level.txt

clausekeeper_mcp-0.1.0/clausekeeper_mcp.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

clausekeeper_mcp-0.1.0/clausekeeper_mcp.egg-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ clausekeeper-mcp = server:main

clausekeeper_mcp-0.1.0/clausekeeper_mcp.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ mcp>=1.0.0

clausekeeper_mcp-0.1.0/clausekeeper_mcp.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ clausekeeper_core
2	+ server

clausekeeper_mcp-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,22 @@
+[project]
+name = "clausekeeper-mcp"
+version = "0.1.0"
+description = "MCP server exposing ClauseKeeper's deterministic compliance scanner to agents."
+readme = "README.md"
+requires-python = ">=3.11"
+license = { text = "AGPL-3.0-or-later" }
+authors = [{ name = "ClauseKeeper contributors" }]
+dependencies = [
+    "mcp>=1.0.0",
+]
+[project.scripts]
+clausekeeper-mcp = "server:main"
+[build-system]
+requires = ["setuptools>=68"]
+build-backend = "setuptools.build_meta"
+[tool.setuptools]
+py-modules = ["server"]
+packages = ["clausekeeper_core"]

clausekeeper_mcp-0.1.0/server.py ADDED Viewed

@@ -0,0 +1,115 @@
+"""ClauseKeeper MCP server.
+Exposes the deterministic ClauseKeeper compliance scanner as Model Context
+Protocol tools. This module intentionally keeps wrappers thin and delegates all
+scoring to the vendored ClauseKeeper scanner core.
+"""
+from __future__ import annotations
+import re
+import urllib.request
+from typing import Any
+from clausekeeper_core.clause_rules import CLAUSE_RULES, MAX_RAW_SCORE
+from clausekeeper_core.scanner import html_to_text, scan_text
+from mcp.server.fastmcp import FastMCP
+USER_AGENT = "ClauseKeeperMCP/0.1 (+https://github.com/)"
+MAX_FETCH_BYTES = 2_000_000
+mcp = FastMCP(
+    "clausekeeper-mcp",
+    instructions=(
+        "Use these tools to scan privacy policies, terms, cookie policies, "
+        "and related compliance documents with ClauseKeeper's deterministic "
+        "rule-based scanner. No LLM or API key is used."
+    ),
+)
+def _checklist(result: dict[str, Any]) -> list[dict[str, Any]]:
+    """Return the missing/stale clause checklist from a scanner scorecard."""
+    return [
+        {
+            "key": item["key"],
+            "label": item["label"],
+            "category": item["category"],
+            "status": item["status"],
+            "weight": item["weight"],
+            "earned": item["earned"],
+            "why": item["why"],
+            "fix": item["fix"],
+        }
+        for item in result.get("missing_or_stale", [])
+    ]
+def _scan_response(raw_text: str, *, source: str) -> dict[str, Any]:
+    """Run the real scanner and shape a compact agent-facing response."""
+    result = scan_text(raw_text)
+    return {
+        "source": source,
+        "score_100": result["score_100"],
+        "score_10": result["score_10"],
+        "grade": result["grade"],
+        "verdict": result["verdict"],
+        "word_count": result["word_count"],
+        "counts": result["counts"],
+        "missing_or_stale": _checklist(result),
+        "by_category": result["by_category"],
+    }
+def _fetch_url_text(url: str) -> str:
+    """Fetch visible text from a policy URL using stdlib urllib + scanner HTML strip."""
+    clean_url = (url or "").strip()
+    if not clean_url:
+        raise ValueError("url is required")
+    if not clean_url.startswith(("http://", "https://")):
+        clean_url = "https://" + clean_url
+    request = urllib.request.Request(clean_url, headers={"User-Agent": USER_AGENT})
+    with urllib.request.urlopen(request, timeout=15) as response:  # noqa: S310 - user-requested scan URL
+        content_type = response.headers.get("Content-Type", "")
+        charset_match = re.search(r"charset=([^;]+)", content_type, re.I)
+        charset = charset_match.group(1).strip() if charset_match else "utf-8"
+        raw = response.read(MAX_FETCH_BYTES + 1)
+        if len(raw) > MAX_FETCH_BYTES:
+            raise ValueError(f"URL response exceeds {MAX_FETCH_BYTES} byte limit")
+        html = raw.decode(charset, errors="replace")
+    return html_to_text(html)
+@mcp.tool()
+def scan_policy_text(text: str) -> dict[str, Any]:
+    """Scan pasted policy text and return score plus missing/stale clause checklist."""
+    return _scan_response(text or "", source="pasted text")
+@mcp.tool()
+def scan_policy_url(url: str) -> dict[str, Any]:
+    """Fetch a policy URL, strip HTML, and scan its visible text."""
+    return _scan_response(_fetch_url_text(url), source=url)
+@mcp.tool()
+def list_clause_rules() -> dict[str, Any]:
+    """List the ClauseKeeper rules/categories used by the scanner."""
+    categories: dict[str, int] = {}
+    for rule in CLAUSE_RULES:
+        categories[rule["category"]] = categories.get(rule["category"], 0) + 1
+    return {
+        "total_rules": len(CLAUSE_RULES),
+        "max_raw_score": MAX_RAW_SCORE,
+        "categories": categories,
+        "rules": [dict(rule) for rule in CLAUSE_RULES],
+    }
+def main() -> None:
+    """Run the MCP server over stdio transport."""
+    mcp.run(transport="stdio")
+if __name__ == "__main__":
+    main()

clausekeeper_mcp-0.1.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0