clausekeeper-mcp 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,122 @@
1
+ Metadata-Version: 2.4
2
+ Name: clausekeeper-mcp
3
+ Version: 0.1.0
4
+ Summary: MCP server exposing ClauseKeeper's deterministic compliance scanner to agents.
5
+ Author: ClauseKeeper contributors
6
+ License: AGPL-3.0-or-later
7
+ Requires-Python: >=3.11
8
+ Description-Content-Type: text/markdown
9
+ Requires-Dist: mcp>=1.0.0
10
+
11
+ # ClauseKeeper MCP Server
12
+
13
+ `clausekeeper-mcp` exposes [ClauseKeeper](../README.md)'s deterministic compliance scanner as Model Context Protocol (MCP) tools for Claude Desktop, Cursor, and other MCP hosts.
14
+
15
+ It is a thin wrapper around a vendored copy of the real ClauseKeeper scanner in `clausekeeper_core/scanner.py` and rule library in `clausekeeper_core/clause_rules.py`:
16
+
17
+ - no LLM calls
18
+ - no API keys
19
+ - no paid services
20
+ - stdio transport via the official Python `mcp` SDK
21
+
22
+ ## Tools
23
+
24
+ - `scan_policy_text(text: str)` — scans pasted policy/legal text with the vendored ClauseKeeper scanner and returns the score, grade, counts, categories, and missing/stale clause checklist.
25
+ - `scan_policy_url(url: str)` — fetches a URL, strips HTML with ClauseKeeper's `html_to_text`, then scans the visible text.
26
+ - `list_clause_rules()` — returns the vendored clause rules and categories.
27
+
28
+ ## Run locally
29
+
30
+ From the main ClauseKeeper repository:
31
+
32
+ ```bash
33
+ cd mcp
34
+ uv run clausekeeper-mcp
35
+ ```
36
+
37
+ `uv run` creates/uses the local project environment and installs the `mcp` SDK dependency automatically. Because this is a stdio MCP server, `uv run clausekeeper-mcp` waits for an MCP client on stdin/stdout.
38
+
39
+ You can also install it into an explicit virtual environment:
40
+
41
+ ```bash
42
+ cd mcp
43
+ uv venv
44
+ uv pip install --python .venv/bin/python -e .
45
+ .venv/bin/clausekeeper-mcp
46
+ ```
47
+
48
+ Or run it directly:
49
+
50
+ ```bash
51
+ cd /path/to/clausekeeper/mcp
52
+ uv run python server.py
53
+ ```
54
+
55
+ ## MCP host configuration
56
+
57
+ Use an absolute path to this `mcp` directory.
58
+
59
+ ### Claude Desktop
60
+
61
+ Add this to your Claude Desktop MCP configuration file:
62
+
63
+ ```json
64
+ {
65
+ "mcpServers": {
66
+ "clausekeeper": {
67
+ "command": "uv",
68
+ "args": [
69
+ "--directory",
70
+ "/absolute/path/to/clausekeeper/mcp",
71
+ "run",
72
+ "clausekeeper-mcp"
73
+ ]
74
+ }
75
+ }
76
+ }
77
+ ```
78
+
79
+ ### Cursor or other MCP hosts
80
+
81
+ Use the same server definition in your MCP settings:
82
+
83
+ ```json
84
+ {
85
+ "mcpServers": {
86
+ "clausekeeper": {
87
+ "command": "uv",
88
+ "args": [
89
+ "--directory",
90
+ "/absolute/path/to/clausekeeper/mcp",
91
+ "run",
92
+ "clausekeeper-mcp"
93
+ ]
94
+ }
95
+ }
96
+ }
97
+ ```
98
+
99
+ If your host does not support `uv --directory`, use the installed console script from a virtual environment instead:
100
+
101
+ ```json
102
+ {
103
+ "mcpServers": {
104
+ "clausekeeper": {
105
+ "command": "/absolute/path/to/clausekeeper/mcp/.venv/bin/clausekeeper-mcp",
106
+ "args": []
107
+ }
108
+ }
109
+ }
110
+ ```
111
+
112
+ ## Keeping the vendored scanner in sync
113
+
114
+ The `clausekeeper_core/` package is vendored from the main app's `app/scanner.py` and `app/clause_rules.py` so the MCP package is self-contained for PyPI/registry installs. When the scanner or clause rules change in `app/`, update the matching vendored files here and re-run the MCP self-containment and parity checks.
115
+
116
+ ## Publishing metadata
117
+
118
+ No `server.json` is included here because the current OSS MCP Community Registry / GitHub MCP Registry metadata format should be confirmed against the registry documentation at publication time. When publishing, add the registry-required metadata file with the package name, description, license, repository URL, runtime command, and tool list.
119
+
120
+ ## Main project
121
+
122
+ See the main ClauseKeeper project at [../README.md](../README.md).
@@ -0,0 +1,112 @@
1
+ # ClauseKeeper MCP Server
2
+
3
+ `clausekeeper-mcp` exposes [ClauseKeeper](../README.md)'s deterministic compliance scanner as Model Context Protocol (MCP) tools for Claude Desktop, Cursor, and other MCP hosts.
4
+
5
+ It is a thin wrapper around a vendored copy of the real ClauseKeeper scanner in `clausekeeper_core/scanner.py` and rule library in `clausekeeper_core/clause_rules.py`:
6
+
7
+ - no LLM calls
8
+ - no API keys
9
+ - no paid services
10
+ - stdio transport via the official Python `mcp` SDK
11
+
12
+ ## Tools
13
+
14
+ - `scan_policy_text(text: str)` — scans pasted policy/legal text with the vendored ClauseKeeper scanner and returns the score, grade, counts, categories, and missing/stale clause checklist.
15
+ - `scan_policy_url(url: str)` — fetches a URL, strips HTML with ClauseKeeper's `html_to_text`, then scans the visible text.
16
+ - `list_clause_rules()` — returns the vendored clause rules and categories.
17
+
18
+ ## Run locally
19
+
20
+ From the main ClauseKeeper repository:
21
+
22
+ ```bash
23
+ cd mcp
24
+ uv run clausekeeper-mcp
25
+ ```
26
+
27
+ `uv run` creates/uses the local project environment and installs the `mcp` SDK dependency automatically. Because this is a stdio MCP server, `uv run clausekeeper-mcp` waits for an MCP client on stdin/stdout.
28
+
29
+ You can also install it into an explicit virtual environment:
30
+
31
+ ```bash
32
+ cd mcp
33
+ uv venv
34
+ uv pip install --python .venv/bin/python -e .
35
+ .venv/bin/clausekeeper-mcp
36
+ ```
37
+
38
+ Or run it directly:
39
+
40
+ ```bash
41
+ cd /path/to/clausekeeper/mcp
42
+ uv run python server.py
43
+ ```
44
+
45
+ ## MCP host configuration
46
+
47
+ Use an absolute path to this `mcp` directory.
48
+
49
+ ### Claude Desktop
50
+
51
+ Add this to your Claude Desktop MCP configuration file:
52
+
53
+ ```json
54
+ {
55
+ "mcpServers": {
56
+ "clausekeeper": {
57
+ "command": "uv",
58
+ "args": [
59
+ "--directory",
60
+ "/absolute/path/to/clausekeeper/mcp",
61
+ "run",
62
+ "clausekeeper-mcp"
63
+ ]
64
+ }
65
+ }
66
+ }
67
+ ```
68
+
69
+ ### Cursor or other MCP hosts
70
+
71
+ Use the same server definition in your MCP settings:
72
+
73
+ ```json
74
+ {
75
+ "mcpServers": {
76
+ "clausekeeper": {
77
+ "command": "uv",
78
+ "args": [
79
+ "--directory",
80
+ "/absolute/path/to/clausekeeper/mcp",
81
+ "run",
82
+ "clausekeeper-mcp"
83
+ ]
84
+ }
85
+ }
86
+ }
87
+ ```
88
+
89
+ If your host does not support `uv --directory`, use the installed console script from a virtual environment instead:
90
+
91
+ ```json
92
+ {
93
+ "mcpServers": {
94
+ "clausekeeper": {
95
+ "command": "/absolute/path/to/clausekeeper/mcp/.venv/bin/clausekeeper-mcp",
96
+ "args": []
97
+ }
98
+ }
99
+ }
100
+ ```
101
+
102
+ ## Keeping the vendored scanner in sync
103
+
104
+ The `clausekeeper_core/` package is vendored from the main app's `app/scanner.py` and `app/clause_rules.py` so the MCP package is self-contained for PyPI/registry installs. When the scanner or clause rules change in `app/`, update the matching vendored files here and re-run the MCP self-containment and parity checks.
105
+
106
+ ## Publishing metadata
107
+
108
+ No `server.json` is included here because the current OSS MCP Community Registry / GitHub MCP Registry metadata format should be confirmed against the registry documentation at publication time. When publishing, add the registry-required metadata file with the package name, description, license, repository URL, runtime command, and tool list.
109
+
110
+ ## Main project
111
+
112
+ See the main ClauseKeeper project at [../README.md](../README.md).
@@ -0,0 +1 @@
1
+ """Vendored ClauseKeeper scanner core for the standalone MCP package."""
@@ -0,0 +1,173 @@
1
+ # Vendored from app/clause_rules.py — keep in sync. Source of truth is the main ClauseKeeper app.
2
+ """
3
+ ClauseKeeper clause-rules configuration.
4
+
5
+ This is the heart of the FREE compliance scanner. Each rule is a self-contained,
6
+ $0-to-run heuristic: it looks for keyword/phrase signals in a policy document and
7
+ decides whether the clause is PRESENT, MISSING, or STALE (present but using
8
+ out-of-date language for 2026).
9
+
10
+ Rule-based on purpose: no paid LLM dependency, deterministic, auditable, and cheap.
11
+ To tune the scanner, edit this file only — the scanner engine reads it generically.
12
+
13
+ Each rule:
14
+ key: stable id
15
+ label: human title in the scorecard
16
+ weight: points contributed to the /100 score when satisfied
17
+ signals: phrases whose presence indicates the clause EXISTS (case-insensitive)
18
+ stale_signals (optional): phrases that, IF the clause exists, suggest it is
19
+ up-to-date for 2026. If the clause is present but NONE of these
20
+ appear, we flag it STALE.
21
+ why: short explanation shown to the user
22
+ fix: one-line guidance on what to add
23
+ category: grouping for the scorecard
24
+ """
25
+
26
+ CLAUSE_RULES = [
27
+ {
28
+ "key": "gdpr",
29
+ "label": "GDPR (EU) data-rights language",
30
+ "category": "Privacy regimes",
31
+ "weight": 12,
32
+ "signals": [
33
+ "gdpr", "general data protection regulation", "lawful basis",
34
+ "right to erasure", "right to be forgotten", "data subject",
35
+ "supervisory authority",
36
+ ],
37
+ "stale_signals": ["lawful basis", "data subject rights", "right to erasure"],
38
+ "why": "EU/EEA users trigger GDPR. You must state lawful basis, data-subject rights, and how to exercise them.",
39
+ "fix": "Add a GDPR section covering lawful basis, the 8 data-subject rights, and EU representative/contact.",
40
+ },
41
+ {
42
+ "key": "ccpa",
43
+ "label": "CCPA/CPRA (California) rights",
44
+ "category": "Privacy regimes",
45
+ "weight": 12,
46
+ "signals": [
47
+ "ccpa", "cpra", "california consumer privacy", "do not sell",
48
+ "do not share", "sale of personal information", "california residents",
49
+ "right to opt out",
50
+ ],
51
+ "stale_signals": ["cpra", "do not sell or share", "sensitive personal information", "right to opt out"],
52
+ "why": "California's CPRA (effective successor to CCPA) adds 'do not sell OR share' and sensitive-data rights.",
53
+ "fix": "Add a California section with 'Do Not Sell or Share My Personal Information' and sensitive-PI opt-out.",
54
+ },
55
+ {
56
+ "key": "cookie_consent",
57
+ "label": "Cookie consent / tracking disclosure",
58
+ "category": "Tracking & cookies",
59
+ "weight": 10,
60
+ "signals": [
61
+ "cookie", "cookies", "tracking technolog", "pixel", "local storage",
62
+ "web beacon", "consent banner",
63
+ ],
64
+ "stale_signals": ["consent", "manage cookie", "reject all", "cookie preferences"],
65
+ "why": "You must disclose cookies/trackers and, for EU users, obtain prior consent (not just notice).",
66
+ "fix": "Add a cookie section listing categories and a consent mechanism with a 'Reject all' option.",
67
+ },
68
+ {
69
+ "key": "data_retention",
70
+ "label": "Data-retention policy",
71
+ "category": "Data handling",
72
+ "weight": 9,
73
+ "signals": [
74
+ "retention", "retain your", "how long we keep", "retention period",
75
+ "delete your data", "data is kept",
76
+ ],
77
+ "why": "Modern privacy laws require stating how long you keep personal data and the deletion criteria.",
78
+ "fix": "Add a retention section: state periods (or criteria) and what happens to data after.",
79
+ },
80
+ {
81
+ "key": "ai_disclosure",
82
+ "label": "AI-use disclosure (2026 wedge)",
83
+ "category": "AI & automation (2026)",
84
+ "weight": 14,
85
+ "signals": [
86
+ "artificial intelligence", "automated decision", "machine learning",
87
+ "ai model", "ai-powered", "ai features", "generative ai",
88
+ "profiling", "algorithm",
89
+ ],
90
+ "stale_signals": [
91
+ "eu ai act", "ai act", "automated decision-making", "human review",
92
+ "training data", "ai disclosure", "generative ai",
93
+ ],
94
+ "why": "2026 rules (EU AI Act phased obligations, US state AI laws) require disclosing AI/automated processing and user rights around it.",
95
+ "fix": "Add an AI-disclosure section: what AI you use, automated decisions, human-review rights, and training-data stance.",
96
+ },
97
+ {
98
+ "key": "automated_decision",
99
+ "label": "Automated decision-making & profiling rights",
100
+ "category": "AI & automation (2026)",
101
+ "weight": 8,
102
+ "signals": [
103
+ "automated decision", "profiling", "solely automated",
104
+ "automated processing", "human intervention", "human review",
105
+ ],
106
+ "why": "GDPR Art. 22 + 2026 AI rules give users rights re: decisions made about them by algorithms.",
107
+ "fix": "State whether you make solely-automated decisions and how users can request human review.",
108
+ },
109
+ {
110
+ "key": "contact_dpo",
111
+ "label": "Contact / DPO information",
112
+ "category": "Accountability",
113
+ "weight": 8,
114
+ "signals": [
115
+ "contact us", "data protection officer", "dpo", "privacy@",
116
+ "email us", "reach us", "@",
117
+ ],
118
+ "stale_signals": ["data protection officer", "dpo", "privacy@", "contact"],
119
+ "why": "Users must be able to reach you to exercise rights; EU often expects a DPO/representative contact.",
120
+ "fix": "Add a clear contact email (e.g., privacy@yourdomain) and, if applicable, a DPO/EU rep.",
121
+ },
122
+ {
123
+ "key": "coppa_children",
124
+ "label": "Children's data (COPPA / age limits)",
125
+ "category": "Special categories",
126
+ "weight": 7,
127
+ "signals": [
128
+ "coppa", "children", "under 13", "under 16", "minor",
129
+ "parental consent", "age of", "not directed to children",
130
+ ],
131
+ "why": "COPPA (US) and GDPR-K require handling of minors' data; even 'we don't serve children' must be stated.",
132
+ "fix": "Add a children's-data section: minimum age, no knowing collection, and parental-contact path.",
133
+ },
134
+ {
135
+ "key": "data_sharing",
136
+ "label": "Third-party sharing / sub-processors",
137
+ "category": "Data handling",
138
+ "weight": 6,
139
+ "signals": [
140
+ "third party", "third-party", "service provider", "sub-processor",
141
+ "subprocessor", "share your", "disclose your", "partners",
142
+ ],
143
+ "why": "You must disclose who you share data with (analytics, payment, hosting, AI vendors).",
144
+ "fix": "List categories of recipients/sub-processors (e.g., hosting, analytics, payments, AI providers).",
145
+ },
146
+ {
147
+ "key": "intl_transfer",
148
+ "label": "International data transfers",
149
+ "category": "Privacy regimes",
150
+ "weight": 6,
151
+ "signals": [
152
+ "international transfer", "transfer your data", "outside the eea",
153
+ "standard contractual clauses", "scc", "cross-border", "data transfer",
154
+ ],
155
+ "why": "Transferring EU data abroad requires a lawful transfer mechanism (e.g., SCCs) and disclosure.",
156
+ "fix": "Add a transfers section naming your mechanism (SCCs / adequacy) and destination regions.",
157
+ },
158
+ {
159
+ "key": "last_updated",
160
+ "label": "'Last updated' date / version stamp",
161
+ "category": "Accountability",
162
+ "weight": 8,
163
+ "signals": [
164
+ "last updated", "effective date", "last revised", "version",
165
+ "last modified",
166
+ ],
167
+ "why": "A visible last-updated date is expected and signals the policy is maintained — core to staying compliant.",
168
+ "fix": "Add a 'Last updated: <date>' line at the top and bump it whenever you change the policy.",
169
+ },
170
+ ]
171
+
172
+ # Maximum achievable raw score (sum of weights) — used to normalize to /100.
173
+ MAX_RAW_SCORE = sum(r["weight"] for r in CLAUSE_RULES)
@@ -0,0 +1,132 @@
1
+ # Vendored from app/scanner.py — keep in sync. Source of truth is the main ClauseKeeper app.
2
+ """
3
+ ClauseKeeper compliance scanner engine.
4
+
5
+ Pure, rule-based ($0 to run). Takes raw policy text (pasted or fetched from a URL)
6
+ and evaluates it against CLAUSE_RULES, producing a scorecard.
7
+
8
+ Status per clause:
9
+ present -> signals found AND (no stale_signals OR a stale_signal found)
10
+ needs_update -> signals found BUT stale_signals defined and none matched (STALE)
11
+ missing -> no signals found
12
+ """
13
+ import re
14
+ from html.parser import HTMLParser
15
+
16
+ from .clause_rules import CLAUSE_RULES, MAX_RAW_SCORE
17
+
18
+
19
+ class _TextExtractor(HTMLParser):
20
+ """Strip HTML to visible text. Skips script/style content."""
21
+
22
+ def __init__(self):
23
+ super().__init__()
24
+ self._chunks = []
25
+ self._skip = False
26
+
27
+ def handle_starttag(self, tag, attrs):
28
+ if tag in ("script", "style", "noscript"):
29
+ self._skip = True
30
+
31
+ def handle_endtag(self, tag):
32
+ if tag in ("script", "style", "noscript"):
33
+ self._skip = False
34
+
35
+ def handle_data(self, data):
36
+ if not self._skip:
37
+ self._chunks.append(data)
38
+
39
+ def text(self):
40
+ return " ".join(self._chunks)
41
+
42
+
43
+ def html_to_text(html: str) -> str:
44
+ parser = _TextExtractor()
45
+ try:
46
+ parser.feed(html)
47
+ except Exception:
48
+ # Fall back to a crude tag-strip if the parser chokes on malformed HTML.
49
+ return re.sub(r"<[^>]+>", " ", html)
50
+ return parser.text()
51
+
52
+
53
+ def _contains_any(haystack: str, needles) -> bool:
54
+ return any(n in haystack for n in needles)
55
+
56
+
57
+ def scan_text(raw_text: str) -> dict:
58
+ """Run all clause rules against raw policy text. Returns a scorecard dict."""
59
+ # Normalize: lowercase, collapse whitespace. Heuristic signals are lowercase.
60
+ text = re.sub(r"\s+", " ", (raw_text or "")).lower()
61
+ word_count = len(text.split())
62
+
63
+ results = []
64
+ raw_score = 0
65
+ present_ct = stale_ct = missing_ct = 0
66
+
67
+ for rule in CLAUSE_RULES:
68
+ has_signal = _contains_any(text, rule["signals"])
69
+ stale_signals = rule.get("stale_signals")
70
+
71
+ if not has_signal:
72
+ status = "missing"
73
+ earned = 0
74
+ missing_ct += 1
75
+ elif stale_signals and not _contains_any(text, stale_signals):
76
+ # Present but using outdated/incomplete language for 2026.
77
+ status = "needs_update"
78
+ earned = rule["weight"] // 2 # partial credit
79
+ stale_ct += 1
80
+ else:
81
+ status = "present"
82
+ earned = rule["weight"]
83
+ present_ct += 1
84
+
85
+ raw_score += earned
86
+ results.append({
87
+ "key": rule["key"],
88
+ "label": rule["label"],
89
+ "category": rule["category"],
90
+ "status": status,
91
+ "weight": rule["weight"],
92
+ "earned": earned,
93
+ "why": rule["why"],
94
+ "fix": rule["fix"],
95
+ })
96
+
97
+ score_100 = round((raw_score / MAX_RAW_SCORE) * 100) if MAX_RAW_SCORE else 0
98
+ # 0-10 headline score for the marketing "X/10" framing.
99
+ score_10 = round(score_100 / 10)
100
+
101
+ if score_100 >= 85:
102
+ grade, verdict = "A", "Strong — minor freshness checks only."
103
+ elif score_100 >= 70:
104
+ grade, verdict = "B", "Decent, but missing some 2026-critical clauses."
105
+ elif score_100 >= 50:
106
+ grade, verdict = "C", "Notable gaps — likely not fully 2026-compliant."
107
+ elif score_100 >= 30:
108
+ grade, verdict = "D", "Major gaps. High exposure on privacy/AI rules."
109
+ else:
110
+ grade, verdict = "F", "Little to no compliant policy language detected."
111
+
112
+ # Group results by category for display.
113
+ by_category = {}
114
+ for r in results:
115
+ by_category.setdefault(r["category"], []).append(r)
116
+
117
+ return {
118
+ "score_100": score_100,
119
+ "score_10": score_10,
120
+ "grade": grade,
121
+ "verdict": verdict,
122
+ "word_count": word_count,
123
+ "counts": {
124
+ "present": present_ct,
125
+ "needs_update": stale_ct,
126
+ "missing": missing_ct,
127
+ "total": len(CLAUSE_RULES),
128
+ },
129
+ "results": results,
130
+ "by_category": by_category,
131
+ "missing_or_stale": [r for r in results if r["status"] != "present"],
132
+ }
@@ -0,0 +1,122 @@
1
+ Metadata-Version: 2.4
2
+ Name: clausekeeper-mcp
3
+ Version: 0.1.0
4
+ Summary: MCP server exposing ClauseKeeper's deterministic compliance scanner to agents.
5
+ Author: ClauseKeeper contributors
6
+ License: AGPL-3.0-or-later
7
+ Requires-Python: >=3.11
8
+ Description-Content-Type: text/markdown
9
+ Requires-Dist: mcp>=1.0.0
10
+
11
+ # ClauseKeeper MCP Server
12
+
13
+ `clausekeeper-mcp` exposes [ClauseKeeper](../README.md)'s deterministic compliance scanner as Model Context Protocol (MCP) tools for Claude Desktop, Cursor, and other MCP hosts.
14
+
15
+ It is a thin wrapper around a vendored copy of the real ClauseKeeper scanner in `clausekeeper_core/scanner.py` and rule library in `clausekeeper_core/clause_rules.py`:
16
+
17
+ - no LLM calls
18
+ - no API keys
19
+ - no paid services
20
+ - stdio transport via the official Python `mcp` SDK
21
+
22
+ ## Tools
23
+
24
+ - `scan_policy_text(text: str)` — scans pasted policy/legal text with the vendored ClauseKeeper scanner and returns the score, grade, counts, categories, and missing/stale clause checklist.
25
+ - `scan_policy_url(url: str)` — fetches a URL, strips HTML with ClauseKeeper's `html_to_text`, then scans the visible text.
26
+ - `list_clause_rules()` — returns the vendored clause rules and categories.
27
+
28
+ ## Run locally
29
+
30
+ From the main ClauseKeeper repository:
31
+
32
+ ```bash
33
+ cd mcp
34
+ uv run clausekeeper-mcp
35
+ ```
36
+
37
+ `uv run` creates/uses the local project environment and installs the `mcp` SDK dependency automatically. Because this is a stdio MCP server, `uv run clausekeeper-mcp` waits for an MCP client on stdin/stdout.
38
+
39
+ You can also install it into an explicit virtual environment:
40
+
41
+ ```bash
42
+ cd mcp
43
+ uv venv
44
+ uv pip install --python .venv/bin/python -e .
45
+ .venv/bin/clausekeeper-mcp
46
+ ```
47
+
48
+ Or run it directly:
49
+
50
+ ```bash
51
+ cd /path/to/clausekeeper/mcp
52
+ uv run python server.py
53
+ ```
54
+
55
+ ## MCP host configuration
56
+
57
+ Use an absolute path to this `mcp` directory.
58
+
59
+ ### Claude Desktop
60
+
61
+ Add this to your Claude Desktop MCP configuration file:
62
+
63
+ ```json
64
+ {
65
+ "mcpServers": {
66
+ "clausekeeper": {
67
+ "command": "uv",
68
+ "args": [
69
+ "--directory",
70
+ "/absolute/path/to/clausekeeper/mcp",
71
+ "run",
72
+ "clausekeeper-mcp"
73
+ ]
74
+ }
75
+ }
76
+ }
77
+ ```
78
+
79
+ ### Cursor or other MCP hosts
80
+
81
+ Use the same server definition in your MCP settings:
82
+
83
+ ```json
84
+ {
85
+ "mcpServers": {
86
+ "clausekeeper": {
87
+ "command": "uv",
88
+ "args": [
89
+ "--directory",
90
+ "/absolute/path/to/clausekeeper/mcp",
91
+ "run",
92
+ "clausekeeper-mcp"
93
+ ]
94
+ }
95
+ }
96
+ }
97
+ ```
98
+
99
+ If your host does not support `uv --directory`, use the installed console script from a virtual environment instead:
100
+
101
+ ```json
102
+ {
103
+ "mcpServers": {
104
+ "clausekeeper": {
105
+ "command": "/absolute/path/to/clausekeeper/mcp/.venv/bin/clausekeeper-mcp",
106
+ "args": []
107
+ }
108
+ }
109
+ }
110
+ ```
111
+
112
+ ## Keeping the vendored scanner in sync
113
+
114
+ The `clausekeeper_core/` package is vendored from the main app's `app/scanner.py` and `app/clause_rules.py` so the MCP package is self-contained for PyPI/registry installs. When the scanner or clause rules change in `app/`, update the matching vendored files here and re-run the MCP self-containment and parity checks.
115
+
116
+ ## Publishing metadata
117
+
118
+ No `server.json` is included here because the current OSS MCP Community Registry / GitHub MCP Registry metadata format should be confirmed against the registry documentation at publication time. When publishing, add the registry-required metadata file with the package name, description, license, repository URL, runtime command, and tool list.
119
+
120
+ ## Main project
121
+
122
+ See the main ClauseKeeper project at [../README.md](../README.md).
@@ -0,0 +1,12 @@
1
+ README.md
2
+ pyproject.toml
3
+ server.py
4
+ clausekeeper_core/__init__.py
5
+ clausekeeper_core/clause_rules.py
6
+ clausekeeper_core/scanner.py
7
+ clausekeeper_mcp.egg-info/PKG-INFO
8
+ clausekeeper_mcp.egg-info/SOURCES.txt
9
+ clausekeeper_mcp.egg-info/dependency_links.txt
10
+ clausekeeper_mcp.egg-info/entry_points.txt
11
+ clausekeeper_mcp.egg-info/requires.txt
12
+ clausekeeper_mcp.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ clausekeeper-mcp = server:main
@@ -0,0 +1,2 @@
1
+ clausekeeper_core
2
+ server
@@ -0,0 +1,22 @@
1
+ [project]
2
+ name = "clausekeeper-mcp"
3
+ version = "0.1.0"
4
+ description = "MCP server exposing ClauseKeeper's deterministic compliance scanner to agents."
5
+ readme = "README.md"
6
+ requires-python = ">=3.11"
7
+ license = { text = "AGPL-3.0-or-later" }
8
+ authors = [{ name = "ClauseKeeper contributors" }]
9
+ dependencies = [
10
+ "mcp>=1.0.0",
11
+ ]
12
+
13
+ [project.scripts]
14
+ clausekeeper-mcp = "server:main"
15
+
16
+ [build-system]
17
+ requires = ["setuptools>=68"]
18
+ build-backend = "setuptools.build_meta"
19
+
20
+ [tool.setuptools]
21
+ py-modules = ["server"]
22
+ packages = ["clausekeeper_core"]
@@ -0,0 +1,115 @@
1
+ """ClauseKeeper MCP server.
2
+
3
+ Exposes the deterministic ClauseKeeper compliance scanner as Model Context
4
+ Protocol tools. This module intentionally keeps wrappers thin and delegates all
5
+ scoring to the vendored ClauseKeeper scanner core.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import re
10
+ import urllib.request
11
+ from typing import Any
12
+
13
+ from clausekeeper_core.clause_rules import CLAUSE_RULES, MAX_RAW_SCORE
14
+ from clausekeeper_core.scanner import html_to_text, scan_text
15
+ from mcp.server.fastmcp import FastMCP
16
+
17
+ USER_AGENT = "ClauseKeeperMCP/0.1 (+https://github.com/)"
18
+ MAX_FETCH_BYTES = 2_000_000
19
+
20
+ mcp = FastMCP(
21
+ "clausekeeper-mcp",
22
+ instructions=(
23
+ "Use these tools to scan privacy policies, terms, cookie policies, "
24
+ "and related compliance documents with ClauseKeeper's deterministic "
25
+ "rule-based scanner. No LLM or API key is used."
26
+ ),
27
+ )
28
+
29
+
30
+ def _checklist(result: dict[str, Any]) -> list[dict[str, Any]]:
31
+ """Return the missing/stale clause checklist from a scanner scorecard."""
32
+ return [
33
+ {
34
+ "key": item["key"],
35
+ "label": item["label"],
36
+ "category": item["category"],
37
+ "status": item["status"],
38
+ "weight": item["weight"],
39
+ "earned": item["earned"],
40
+ "why": item["why"],
41
+ "fix": item["fix"],
42
+ }
43
+ for item in result.get("missing_or_stale", [])
44
+ ]
45
+
46
+
47
+ def _scan_response(raw_text: str, *, source: str) -> dict[str, Any]:
48
+ """Run the real scanner and shape a compact agent-facing response."""
49
+ result = scan_text(raw_text)
50
+ return {
51
+ "source": source,
52
+ "score_100": result["score_100"],
53
+ "score_10": result["score_10"],
54
+ "grade": result["grade"],
55
+ "verdict": result["verdict"],
56
+ "word_count": result["word_count"],
57
+ "counts": result["counts"],
58
+ "missing_or_stale": _checklist(result),
59
+ "by_category": result["by_category"],
60
+ }
61
+
62
+
63
+ def _fetch_url_text(url: str) -> str:
64
+ """Fetch visible text from a policy URL using stdlib urllib + scanner HTML strip."""
65
+ clean_url = (url or "").strip()
66
+ if not clean_url:
67
+ raise ValueError("url is required")
68
+ if not clean_url.startswith(("http://", "https://")):
69
+ clean_url = "https://" + clean_url
70
+
71
+ request = urllib.request.Request(clean_url, headers={"User-Agent": USER_AGENT})
72
+ with urllib.request.urlopen(request, timeout=15) as response: # noqa: S310 - user-requested scan URL
73
+ content_type = response.headers.get("Content-Type", "")
74
+ charset_match = re.search(r"charset=([^;]+)", content_type, re.I)
75
+ charset = charset_match.group(1).strip() if charset_match else "utf-8"
76
+ raw = response.read(MAX_FETCH_BYTES + 1)
77
+ if len(raw) > MAX_FETCH_BYTES:
78
+ raise ValueError(f"URL response exceeds {MAX_FETCH_BYTES} byte limit")
79
+ html = raw.decode(charset, errors="replace")
80
+ return html_to_text(html)
81
+
82
+
83
+ @mcp.tool()
84
+ def scan_policy_text(text: str) -> dict[str, Any]:
85
+ """Scan pasted policy text and return score plus missing/stale clause checklist."""
86
+ return _scan_response(text or "", source="pasted text")
87
+
88
+
89
+ @mcp.tool()
90
+ def scan_policy_url(url: str) -> dict[str, Any]:
91
+ """Fetch a policy URL, strip HTML, and scan its visible text."""
92
+ return _scan_response(_fetch_url_text(url), source=url)
93
+
94
+
95
+ @mcp.tool()
96
+ def list_clause_rules() -> dict[str, Any]:
97
+ """List the ClauseKeeper rules/categories used by the scanner."""
98
+ categories: dict[str, int] = {}
99
+ for rule in CLAUSE_RULES:
100
+ categories[rule["category"]] = categories.get(rule["category"], 0) + 1
101
+ return {
102
+ "total_rules": len(CLAUSE_RULES),
103
+ "max_raw_score": MAX_RAW_SCORE,
104
+ "categories": categories,
105
+ "rules": [dict(rule) for rule in CLAUSE_RULES],
106
+ }
107
+
108
+
109
+ def main() -> None:
110
+ """Run the MCP server over stdio transport."""
111
+ mcp.run(transport="stdio")
112
+
113
+
114
+ if __name__ == "__main__":
115
+ main()
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+