@aborruso/ckan-mcp-server 0.4.11 → 0.4.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +7 -7
- package/LOG.md +66 -0
- package/NOTICE.md +77 -0
- package/PRD.md +6 -6
- package/README.md +48 -20
- package/dist/index.js +150 -3
- package/dist/worker.js +60 -56
- package/examples/langgraph/01_basic_workflow.py +277 -0
- package/examples/langgraph/02_data_exploration.py +366 -0
- package/examples/langgraph/README.md +719 -0
- package/examples/langgraph/metadata_quality.py +299 -0
- package/examples/langgraph/requirements.txt +12 -0
- package/examples/langgraph/setup.sh +32 -0
- package/examples/langgraph/test_setup.py +106 -0
- package/openspec/changes/add-ckan-host-allowlist-env/design.md +38 -0
- package/openspec/changes/add-ckan-host-allowlist-env/proposal.md +16 -0
- package/openspec/changes/add-ckan-host-allowlist-env/specs/ckan-request-allowlist/spec.md +15 -0
- package/openspec/changes/add-ckan-host-allowlist-env/specs/cloudflare-deployment/spec.md +11 -0
- package/openspec/changes/add-ckan-host-allowlist-env/tasks.md +12 -0
- package/openspec/changes/add-escape-text-query/proposal.md +12 -0
- package/openspec/changes/add-escape-text-query/specs/ckan-search/spec.md +11 -0
- package/openspec/changes/add-escape-text-query/tasks.md +8 -0
- package/openspec/changes/archive/2026-01-15-add-mcp-resource-filters/proposal.md +13 -0
- package/openspec/changes/archive/2026-01-15-add-mcp-resource-filters/specs/mcp-resources/spec.md +38 -0
- package/openspec/changes/archive/2026-01-15-add-mcp-resource-filters/tasks.md +10 -0
- package/openspec/changes/archive/2026-01-19-update-repo-owner-ondata/proposal.md +13 -0
- package/openspec/changes/archive/2026-01-19-update-repo-owner-ondata/specs/repository-metadata/spec.md +14 -0
- package/openspec/changes/archive/2026-01-19-update-repo-owner-ondata/tasks.md +12 -0
- package/openspec/specs/ckan-insights/spec.md +5 -1
- package/openspec/specs/ckan-search/spec.md +16 -0
- package/openspec/specs/mcp-prompts/spec.md +26 -0
- package/openspec/specs/mcp-resources/spec.md +30 -4
- package/openspec/specs/repository-metadata/spec.md +19 -0
- package/package.json +1 -1
- package/private/commenti-privati.yaml +14 -0
- /package/openspec/changes/{add-mcp-prompts → archive/2026-01-15-add-mcp-prompts}/proposal.md +0 -0
- /package/openspec/changes/{add-mcp-prompts → archive/2026-01-15-add-mcp-prompts}/specs/mcp-prompts/spec.md +0 -0
- /package/openspec/changes/{add-mcp-prompts → archive/2026-01-15-add-mcp-prompts}/tasks.md +0 -0
- /package/openspec/changes/{update-search-parser-config → archive/2026-01-19-update-search-parser-config}/proposal.md +0 -0
- /package/openspec/changes/{update-search-parser-config → archive/2026-01-19-update-search-parser-config}/specs/ckan-insights/spec.md +0 -0
- /package/openspec/changes/{update-search-parser-config → archive/2026-01-19-update-search-parser-config}/specs/ckan-search/spec.md +0 -0
- /package/openspec/changes/{update-search-parser-config → archive/2026-01-19-update-search-parser-config}/tasks.md +0 -0
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Metadata Quality Scoring for CKAN Datasets
|
|
4
|
+
|
|
5
|
+
Advanced quality scoring system based on:
|
|
6
|
+
- Completeness (required and recommended fields)
|
|
7
|
+
- Richness (descriptions, tags, temporal coverage)
|
|
8
|
+
- Resources quality (formats, accessibility)
|
|
9
|
+
- Temporal freshness
|
|
10
|
+
|
|
11
|
+
Score: 0-100 points
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from datetime import datetime
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class MetadataQualityScorer:
|
|
19
|
+
"""Calculate metadata quality score for CKAN datasets."""
|
|
20
|
+
|
|
21
|
+
# Quality thresholds
|
|
22
|
+
EXCELLENT = 80
|
|
23
|
+
GOOD = 60
|
|
24
|
+
ACCEPTABLE = 40
|
|
25
|
+
POOR = 0
|
|
26
|
+
|
|
27
|
+
@classmethod
|
|
28
|
+
def score_dataset(cls, dataset: dict[str, Any]) -> dict[str, Any]:
|
|
29
|
+
"""
|
|
30
|
+
Calculate comprehensive quality score.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
{
|
|
34
|
+
"score": 75, # Total score 0-100
|
|
35
|
+
"level": "good", # excellent/good/acceptable/poor
|
|
36
|
+
"breakdown": {
|
|
37
|
+
"completeness": 20, # out of 30
|
|
38
|
+
"richness": 15, # out of 30
|
|
39
|
+
"resources": 25, # out of 30
|
|
40
|
+
"freshness": 8 # out of 10
|
|
41
|
+
},
|
|
42
|
+
"issues": ["Missing license", ...]
|
|
43
|
+
}
|
|
44
|
+
"""
|
|
45
|
+
issues = []
|
|
46
|
+
breakdown = {
|
|
47
|
+
"completeness": cls._score_completeness(dataset, issues),
|
|
48
|
+
"richness": cls._score_richness(dataset, issues),
|
|
49
|
+
"resources": cls._score_resources(dataset, issues),
|
|
50
|
+
"freshness": cls._score_freshness(dataset, issues),
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
total_score = sum(breakdown.values())
|
|
54
|
+
level = cls._get_level(total_score)
|
|
55
|
+
|
|
56
|
+
return {
|
|
57
|
+
"score": total_score,
|
|
58
|
+
"level": level,
|
|
59
|
+
"breakdown": breakdown,
|
|
60
|
+
"issues": issues,
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
def _score_completeness(cls, dataset: dict, issues: list) -> int:
|
|
65
|
+
"""Score 0-30: Required and recommended fields."""
|
|
66
|
+
score = 0
|
|
67
|
+
|
|
68
|
+
# Required fields (15 points)
|
|
69
|
+
if dataset.get("title"):
|
|
70
|
+
score += 5
|
|
71
|
+
else:
|
|
72
|
+
issues.append("Missing title")
|
|
73
|
+
|
|
74
|
+
if dataset.get("notes"): # Description
|
|
75
|
+
score += 5
|
|
76
|
+
else:
|
|
77
|
+
issues.append("Missing description")
|
|
78
|
+
|
|
79
|
+
if dataset.get("name"): # Identifier
|
|
80
|
+
score += 5
|
|
81
|
+
else:
|
|
82
|
+
issues.append("Missing identifier")
|
|
83
|
+
|
|
84
|
+
# Recommended fields (15 points)
|
|
85
|
+
if dataset.get("license_id"):
|
|
86
|
+
score += 3
|
|
87
|
+
else:
|
|
88
|
+
issues.append("Missing license")
|
|
89
|
+
|
|
90
|
+
if dataset.get("author") or dataset.get("maintainer"):
|
|
91
|
+
score += 3
|
|
92
|
+
else:
|
|
93
|
+
issues.append("Missing author/maintainer")
|
|
94
|
+
|
|
95
|
+
if dataset.get("author_email") or dataset.get("maintainer_email"):
|
|
96
|
+
score += 3
|
|
97
|
+
else:
|
|
98
|
+
issues.append("Missing contact email")
|
|
99
|
+
|
|
100
|
+
# Organization
|
|
101
|
+
if dataset.get("organization"):
|
|
102
|
+
score += 3
|
|
103
|
+
else:
|
|
104
|
+
issues.append("Not assigned to organization")
|
|
105
|
+
|
|
106
|
+
# Geographical coverage
|
|
107
|
+
if dataset.get("extras"):
|
|
108
|
+
has_geo = any(
|
|
109
|
+
e.get("key") in ["spatial", "geographic_coverage"]
|
|
110
|
+
for e in dataset.get("extras", [])
|
|
111
|
+
)
|
|
112
|
+
if has_geo:
|
|
113
|
+
score += 3
|
|
114
|
+
|
|
115
|
+
return score
|
|
116
|
+
|
|
117
|
+
@classmethod
|
|
118
|
+
def _score_richness(cls, dataset: dict, issues: list) -> int:
|
|
119
|
+
"""Score 0-30: Richness of metadata."""
|
|
120
|
+
score = 0
|
|
121
|
+
|
|
122
|
+
# Description quality (10 points)
|
|
123
|
+
notes = dataset.get("notes", "")
|
|
124
|
+
if len(notes) > 200:
|
|
125
|
+
score += 10
|
|
126
|
+
elif len(notes) > 100:
|
|
127
|
+
score += 5
|
|
128
|
+
elif len(notes) > 0:
|
|
129
|
+
score += 2
|
|
130
|
+
else:
|
|
131
|
+
issues.append("Very short or missing description")
|
|
132
|
+
|
|
133
|
+
# Tags (10 points)
|
|
134
|
+
tags = dataset.get("tags", [])
|
|
135
|
+
num_tags = len(tags)
|
|
136
|
+
if num_tags >= 5:
|
|
137
|
+
score += 10
|
|
138
|
+
elif num_tags >= 3:
|
|
139
|
+
score += 6
|
|
140
|
+
elif num_tags >= 1:
|
|
141
|
+
score += 3
|
|
142
|
+
else:
|
|
143
|
+
issues.append("No tags")
|
|
144
|
+
|
|
145
|
+
# Temporal coverage (5 points)
|
|
146
|
+
extras = {e.get("key"): e.get("value") for e in dataset.get("extras", [])}
|
|
147
|
+
if "temporal_start" in extras or "temporal_end" in extras:
|
|
148
|
+
score += 3
|
|
149
|
+
|
|
150
|
+
# Frequency/update schedule (5 points)
|
|
151
|
+
if extras.get("frequency") or extras.get("update_frequency"):
|
|
152
|
+
score += 2
|
|
153
|
+
|
|
154
|
+
return score
|
|
155
|
+
|
|
156
|
+
@classmethod
|
|
157
|
+
def _score_resources(cls, dataset: dict, issues: list) -> int:
|
|
158
|
+
"""Score 0-30: Resources quality."""
|
|
159
|
+
score = 0
|
|
160
|
+
resources = dataset.get("resources", [])
|
|
161
|
+
|
|
162
|
+
if not resources:
|
|
163
|
+
issues.append("No resources")
|
|
164
|
+
return 0
|
|
165
|
+
|
|
166
|
+
# At least one resource (5 points)
|
|
167
|
+
score += 5
|
|
168
|
+
|
|
169
|
+
# Check formats (10 points)
|
|
170
|
+
formats = {r.get("format", "").upper() for r in resources}
|
|
171
|
+
open_formats = {"CSV", "JSON", "GEOJSON", "XML", "RDF", "JSONLD"}
|
|
172
|
+
if formats & open_formats:
|
|
173
|
+
score += 10
|
|
174
|
+
if "CSV" in formats:
|
|
175
|
+
score += 2 # Bonus for CSV
|
|
176
|
+
else:
|
|
177
|
+
issues.append("No open formats (CSV/JSON/XML)")
|
|
178
|
+
|
|
179
|
+
# Resource descriptions (5 points)
|
|
180
|
+
described = sum(1 for r in resources if r.get("description"))
|
|
181
|
+
if described == len(resources):
|
|
182
|
+
score += 5
|
|
183
|
+
elif described > 0:
|
|
184
|
+
score += 2
|
|
185
|
+
|
|
186
|
+
# DataStore availability (5 points)
|
|
187
|
+
has_datastore = any(r.get("datastore_active") for r in resources)
|
|
188
|
+
if has_datastore:
|
|
189
|
+
score += 5
|
|
190
|
+
|
|
191
|
+
# URLs validity (5 points)
|
|
192
|
+
valid_urls = sum(
|
|
193
|
+
1 for r in resources if r.get("url") and r["url"].startswith("http")
|
|
194
|
+
)
|
|
195
|
+
if valid_urls == len(resources):
|
|
196
|
+
score += 5
|
|
197
|
+
elif valid_urls > 0:
|
|
198
|
+
score += 2
|
|
199
|
+
else:
|
|
200
|
+
issues.append("Invalid or missing resource URLs")
|
|
201
|
+
|
|
202
|
+
return score
|
|
203
|
+
|
|
204
|
+
@classmethod
|
|
205
|
+
def _score_freshness(cls, dataset: dict, issues: list) -> int:
|
|
206
|
+
"""Score 0-10: Temporal freshness."""
|
|
207
|
+
score = 0
|
|
208
|
+
|
|
209
|
+
# Check metadata_modified
|
|
210
|
+
modified_str = dataset.get("metadata_modified")
|
|
211
|
+
if not modified_str:
|
|
212
|
+
issues.append("No last modified date")
|
|
213
|
+
return 0
|
|
214
|
+
|
|
215
|
+
try:
|
|
216
|
+
modified = datetime.fromisoformat(modified_str.replace("Z", "+00:00"))
|
|
217
|
+
now = datetime.now(modified.tzinfo)
|
|
218
|
+
days_old = (now - modified).days
|
|
219
|
+
|
|
220
|
+
if days_old < 90: # < 3 months
|
|
221
|
+
score = 10
|
|
222
|
+
elif days_old < 180: # < 6 months
|
|
223
|
+
score = 7
|
|
224
|
+
elif days_old < 365: # < 1 year
|
|
225
|
+
score = 5
|
|
226
|
+
elif days_old < 730: # < 2 years
|
|
227
|
+
score = 3
|
|
228
|
+
else:
|
|
229
|
+
score = 1
|
|
230
|
+
issues.append(f"Last updated {days_old} days ago")
|
|
231
|
+
|
|
232
|
+
except (ValueError, AttributeError):
|
|
233
|
+
issues.append("Invalid date format")
|
|
234
|
+
|
|
235
|
+
return score
|
|
236
|
+
|
|
237
|
+
@classmethod
|
|
238
|
+
def _get_level(cls, score: int) -> str:
|
|
239
|
+
"""Convert score to quality level."""
|
|
240
|
+
if score >= cls.EXCELLENT:
|
|
241
|
+
return "excellent"
|
|
242
|
+
elif score >= cls.GOOD:
|
|
243
|
+
return "good"
|
|
244
|
+
elif score >= cls.ACCEPTABLE:
|
|
245
|
+
return "acceptable"
|
|
246
|
+
else:
|
|
247
|
+
return "poor"
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
# Example usage
|
|
251
|
+
if __name__ == "__main__":
|
|
252
|
+
# Sample dataset
|
|
253
|
+
sample_dataset = {
|
|
254
|
+
"title": "Sample Dataset",
|
|
255
|
+
"name": "sample-dataset",
|
|
256
|
+
"notes": "This is a sample dataset with a detailed description " * 5,
|
|
257
|
+
"license_id": "cc-by-4.0",
|
|
258
|
+
"author": "Mario Rossi",
|
|
259
|
+
"author_email": "mario@example.com",
|
|
260
|
+
"organization": {"name": "comune-roma"},
|
|
261
|
+
"tags": [
|
|
262
|
+
{"name": "environment"},
|
|
263
|
+
{"name": "air-quality"},
|
|
264
|
+
{"name": "open-data"},
|
|
265
|
+
],
|
|
266
|
+
"resources": [
|
|
267
|
+
{
|
|
268
|
+
"format": "CSV",
|
|
269
|
+
"url": "https://example.com/data.csv",
|
|
270
|
+
"description": "Data in CSV format",
|
|
271
|
+
"datastore_active": True,
|
|
272
|
+
},
|
|
273
|
+
{
|
|
274
|
+
"format": "JSON",
|
|
275
|
+
"url": "https://example.com/data.json",
|
|
276
|
+
"description": "Data in JSON format",
|
|
277
|
+
},
|
|
278
|
+
],
|
|
279
|
+
"metadata_modified": "2025-01-15T10:00:00Z",
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
scorer = MetadataQualityScorer()
|
|
283
|
+
result = scorer.score_dataset(sample_dataset)
|
|
284
|
+
|
|
285
|
+
print("Metadata Quality Assessment")
|
|
286
|
+
print("=" * 50)
|
|
287
|
+
print(f"Overall Score: {result['score']}/100")
|
|
288
|
+
print(f"Quality Level: {result['level'].upper()}")
|
|
289
|
+
print(f"\nBreakdown:")
|
|
290
|
+
for category, score in result["breakdown"].items():
|
|
291
|
+
print(
|
|
292
|
+
f" {category.capitalize():15} {score:2}/30"
|
|
293
|
+
if category != "freshness"
|
|
294
|
+
else f" {category.capitalize():15} {score:2}/10"
|
|
295
|
+
)
|
|
296
|
+
if result["issues"]:
|
|
297
|
+
print(f"\nIssues ({len(result['issues'])}):")
|
|
298
|
+
for issue in result["issues"]:
|
|
299
|
+
print(f" - {issue}")
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# LangGraph Examples - Python Dependencies
|
|
2
|
+
|
|
3
|
+
# Core dependencies
|
|
4
|
+
langgraph>=0.2.0
|
|
5
|
+
langchain-core>=0.3.0
|
|
6
|
+
|
|
7
|
+
# MCP Python SDK for client connection
|
|
8
|
+
mcp>=1.0.0
|
|
9
|
+
|
|
10
|
+
# Optional: LangSmith for debugging/tracing
|
|
11
|
+
# Uncomment if you want to use LangSmith
|
|
12
|
+
# langsmith>=0.1.0
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Setup script for LangGraph examples
|
|
3
|
+
|
|
4
|
+
set -e
|
|
5
|
+
|
|
6
|
+
echo "Setting up LangGraph examples environment..."
|
|
7
|
+
|
|
8
|
+
# Check Python version
|
|
9
|
+
python3 --version
|
|
10
|
+
|
|
11
|
+
# Create virtual environment if it doesn't exist
|
|
12
|
+
if [ ! -d "venv" ]; then
|
|
13
|
+
echo "Creating virtual environment..."
|
|
14
|
+
python3 -m venv venv
|
|
15
|
+
fi
|
|
16
|
+
|
|
17
|
+
# Activate virtual environment
|
|
18
|
+
source venv/bin/activate
|
|
19
|
+
|
|
20
|
+
# Install dependencies
|
|
21
|
+
echo "Installing dependencies..."
|
|
22
|
+
pip install -r requirements.txt
|
|
23
|
+
|
|
24
|
+
echo ""
|
|
25
|
+
echo "✓ Setup complete!"
|
|
26
|
+
echo ""
|
|
27
|
+
echo "To activate the environment:"
|
|
28
|
+
echo " source venv/bin/activate"
|
|
29
|
+
echo ""
|
|
30
|
+
echo "To run examples:"
|
|
31
|
+
echo " python 01_basic_workflow.py"
|
|
32
|
+
echo " python 02_data_exploration.py"
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Quick test to verify LangGraph + MCP setup
|
|
4
|
+
|
|
5
|
+
Run:
|
|
6
|
+
uvx --with langgraph --with mcp --with langchain-core python test_setup.py
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import sys
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_imports():
|
|
13
|
+
"""Test that all required packages are available."""
|
|
14
|
+
print("Testing imports...")
|
|
15
|
+
errors = []
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
import langgraph # noqa: F401
|
|
19
|
+
|
|
20
|
+
print("✓ langgraph")
|
|
21
|
+
except ImportError as e:
|
|
22
|
+
errors.append(f"✗ langgraph: {e}")
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
import mcp # noqa: F401
|
|
26
|
+
|
|
27
|
+
print("✓ mcp")
|
|
28
|
+
except ImportError as e:
|
|
29
|
+
errors.append(f"✗ mcp: {e}")
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
import langchain_core # noqa: F401
|
|
33
|
+
|
|
34
|
+
print("✓ langchain_core")
|
|
35
|
+
except ImportError as e:
|
|
36
|
+
errors.append(f"✗ langchain_core: {e}")
|
|
37
|
+
|
|
38
|
+
return errors
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_mcp_server():
|
|
42
|
+
"""Test that MCP server file exists."""
|
|
43
|
+
import os
|
|
44
|
+
|
|
45
|
+
print("\nTesting MCP server...")
|
|
46
|
+
server_path = os.path.join(os.path.dirname(__file__), "../../dist/index.js")
|
|
47
|
+
|
|
48
|
+
if os.path.exists(server_path):
|
|
49
|
+
print(f"✓ MCP server found: {server_path}")
|
|
50
|
+
return []
|
|
51
|
+
else:
|
|
52
|
+
return [
|
|
53
|
+
f"✗ MCP server not found: {server_path}",
|
|
54
|
+
" Run: cd ../.. && npm run build",
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def test_node():
|
|
59
|
+
"""Test that Node.js is available."""
|
|
60
|
+
import subprocess
|
|
61
|
+
|
|
62
|
+
print("\nTesting Node.js...")
|
|
63
|
+
try:
|
|
64
|
+
result = subprocess.run(
|
|
65
|
+
["node", "--version"], capture_output=True, text=True, check=True
|
|
66
|
+
)
|
|
67
|
+
version = result.stdout.strip()
|
|
68
|
+
print(f"✓ Node.js {version}")
|
|
69
|
+
return []
|
|
70
|
+
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
71
|
+
return ["✗ Node.js not found or not in PATH"]
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def main():
|
|
75
|
+
"""Run all tests."""
|
|
76
|
+
print("=" * 60)
|
|
77
|
+
print("LangGraph + CKAN MCP Setup Test")
|
|
78
|
+
print("=" * 60)
|
|
79
|
+
|
|
80
|
+
all_errors = []
|
|
81
|
+
|
|
82
|
+
# Run tests
|
|
83
|
+
all_errors.extend(test_imports())
|
|
84
|
+
all_errors.extend(test_node())
|
|
85
|
+
all_errors.extend(test_mcp_server())
|
|
86
|
+
|
|
87
|
+
# Summary
|
|
88
|
+
print("\n" + "=" * 60)
|
|
89
|
+
if all_errors:
|
|
90
|
+
print("SETUP INCOMPLETE")
|
|
91
|
+
print("=" * 60)
|
|
92
|
+
for error in all_errors:
|
|
93
|
+
print(error)
|
|
94
|
+
print("\nSee README.md for setup instructions")
|
|
95
|
+
sys.exit(1)
|
|
96
|
+
else:
|
|
97
|
+
print("✓ ALL TESTS PASSED")
|
|
98
|
+
print("=" * 60)
|
|
99
|
+
print("\nYou can now run:")
|
|
100
|
+
print(" python 01_basic_workflow.py")
|
|
101
|
+
print(" python 02_data_exploration.py")
|
|
102
|
+
sys.exit(0)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
if __name__ == "__main__":
|
|
106
|
+
main()
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Design: CKAN Host Allowlist via Environment
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
Introduce an optional, environment-driven allowlist for CKAN hosts. When configured, all CKAN requests must target hosts in the allowlist. When not configured, behavior remains unchanged.
|
|
5
|
+
|
|
6
|
+
## Configuration
|
|
7
|
+
- Environment variable: `ALLOWED_CKAN_HOSTS`
|
|
8
|
+
- Format: comma-separated hostnames (e.g., `dati.gov.it,data.gov,catalog.data.gov`)
|
|
9
|
+
- Parsing rules:
|
|
10
|
+
- Split on commas
|
|
11
|
+
- Trim whitespace
|
|
12
|
+
- Lowercase
|
|
13
|
+
- Drop empty entries
|
|
14
|
+
|
|
15
|
+
## Enforcement
|
|
16
|
+
- Validate hostnames extracted from:
|
|
17
|
+
- Tool inputs: `server_url`
|
|
18
|
+
- Resource URIs: `ckan://{server}/...`
|
|
19
|
+
- Reject requests where the hostname is not in the allowlist.
|
|
20
|
+
- Error message should be explicit: `Host not allowed: <hostname>`.
|
|
21
|
+
|
|
22
|
+
## Runtime Scope
|
|
23
|
+
- Node.js (stdio/http) and Workers runtimes share the same validation utility.
|
|
24
|
+
- Workers reads env via `env.ALLOWED_CKAN_HOSTS` (wrangler var), Node via `process.env.ALLOWED_CKAN_HOSTS`.
|
|
25
|
+
|
|
26
|
+
## Integration Points
|
|
27
|
+
- Add a small utility module, e.g. `src/utils/allowlist.ts`:
|
|
28
|
+
- `parseAllowedHosts(value?: string): Set<string> | null`
|
|
29
|
+
- `assertAllowedHost(serverUrl: string, allowed: Set<string> | null): void`
|
|
30
|
+
- Call `assertAllowedHost` inside CKAN request flow or immediately in each tool/resource handler before network calls.
|
|
31
|
+
|
|
32
|
+
## Backwards Compatibility
|
|
33
|
+
- If `ALLOWED_CKAN_HOSTS` is unset or empty, allow all hosts (no behavior change).
|
|
34
|
+
|
|
35
|
+
## Testing
|
|
36
|
+
- Unit tests for parsing behavior (case, whitespace, empty entries).
|
|
37
|
+
- Unit tests for allow/deny logic with known hostnames and invalid URLs.
|
|
38
|
+
- Tool/resource tests to verify rejection when allowlist is set.
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Change: Add allowlist for CKAN hosts via environment
|
|
2
|
+
|
|
3
|
+
## Why
|
|
4
|
+
Public HTTP/Workers deployments can be abused to proxy requests to arbitrary hosts. A host allowlist provides a simple, configurable barrier without breaking local usage.
|
|
5
|
+
|
|
6
|
+
## What Changes
|
|
7
|
+
- Add `ALLOWED_CKAN_HOSTS` env var (comma-separated hostnames) to restrict `server_url` targets.
|
|
8
|
+
- Validate `server_url` host against the allowlist for all tools/resources that call CKAN.
|
|
9
|
+
- Document and expose the env var in `wrangler.toml` for Workers deployments.
|
|
10
|
+
|
|
11
|
+
## Design Reference
|
|
12
|
+
See `openspec/changes/add-ckan-host-allowlist-env/design.md` for configuration, enforcement, and runtime details.
|
|
13
|
+
|
|
14
|
+
## Impact
|
|
15
|
+
- Affected specs: `cloudflare-deployment`, new `ckan-request-allowlist`
|
|
16
|
+
- Affected code: request validation utilities; Workers/Node configuration handling; tools/resources that accept `server_url` or `ckan://` URIs.
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
## ADDED Requirements
|
|
2
|
+
### Requirement: CKAN host allowlist validation
|
|
3
|
+
The system SHALL validate every CKAN request target against an optional allowlist configured via `ALLOWED_CKAN_HOSTS`.
|
|
4
|
+
|
|
5
|
+
#### Scenario: Allowed host
|
|
6
|
+
- **WHEN** a tool or resource is called with a `server_url` whose hostname is in `ALLOWED_CKAN_HOSTS`
|
|
7
|
+
- **THEN** the request proceeds as normal
|
|
8
|
+
|
|
9
|
+
#### Scenario: Disallowed host
|
|
10
|
+
- **WHEN** a tool or resource is called with a `server_url` whose hostname is not in `ALLOWED_CKAN_HOSTS`
|
|
11
|
+
- **THEN** the request is rejected with a clear error indicating the host is not allowed
|
|
12
|
+
|
|
13
|
+
#### Scenario: Allowlist not set
|
|
14
|
+
- **WHEN** `ALLOWED_CKAN_HOSTS` is unset or empty
|
|
15
|
+
- **THEN** the system accepts any valid `server_url`
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
## ADDED Requirements
|
|
2
|
+
### Requirement: Workers allowlist configuration
|
|
3
|
+
The system SHALL allow configuring an allowlist of CKAN hosts for Workers deployments via environment variable.
|
|
4
|
+
|
|
5
|
+
#### Scenario: Wrangler allowlist configuration
|
|
6
|
+
- **WHEN** `wrangler.toml` sets `ALLOWED_CKAN_HOSTS` to a comma-separated list of hostnames
|
|
7
|
+
- **THEN** the Workers runtime reads the variable and restricts CKAN requests to those hosts
|
|
8
|
+
|
|
9
|
+
#### Scenario: Allowlist not set
|
|
10
|
+
- **WHEN** `ALLOWED_CKAN_HOSTS` is unset or empty in the Workers environment
|
|
11
|
+
- **THEN** the Workers runtime allows requests to any CKAN host
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
## 1. Implementation
|
|
2
|
+
- [ ] Add allowlist parsing utility for `ALLOWED_CKAN_HOSTS` (comma-separated hostnames, case-insensitive, trim whitespace).
|
|
3
|
+
- [ ] Enforce allowlist for all CKAN requests (tools and resource templates) with clear error messaging.
|
|
4
|
+
- [ ] Ensure allowlist applies to both Node and Workers runtimes.
|
|
5
|
+
|
|
6
|
+
## 2. Configuration
|
|
7
|
+
- [ ] Add `ALLOWED_CKAN_HOSTS` to `wrangler.toml` with example values.
|
|
8
|
+
- [ ] Update docs/README to describe the env var and behavior (optional if required by spec).
|
|
9
|
+
|
|
10
|
+
## 3. Tests
|
|
11
|
+
- [ ] Add unit tests for allowlist parsing and validation.
|
|
12
|
+
- [ ] Add tool/resource tests verifying rejection for non-allowed hosts when env var is set.
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# Change: Escape text-field query wrapping in search parser
|
|
2
|
+
|
|
3
|
+
## Why
|
|
4
|
+
Wrapping arbitrary user input in `text:(...)` without escaping allows query parser errors or unintended semantics when the input contains Solr/Lucene metacharacters (e.g., `"` or `)`).
|
|
5
|
+
|
|
6
|
+
## What Changes
|
|
7
|
+
- Escape Solr/Lucene special characters before wrapping user input in `text:(...)`.
|
|
8
|
+
- Add tests to confirm escaped output and prevent regressions.
|
|
9
|
+
|
|
10
|
+
## Impact
|
|
11
|
+
- Affected specs: `ckan-search`
|
|
12
|
+
- Affected code: `src/utils/search.ts`, package search tool behavior, related tests.
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
## MODIFIED Requirements
|
|
2
|
+
### Requirement: Package search parser override
|
|
3
|
+
The system SHALL support a per-portal default and a per-request override to force package search queries through the `text` field when needed, and SHALL escape Solr/Lucene special characters when wrapping queries in `text:(...)`.
|
|
4
|
+
|
|
5
|
+
#### Scenario: Portal default applies
|
|
6
|
+
- **WHEN** a portal is configured to force the text-field parser
|
|
7
|
+
- **THEN** `ckan_package_search` uses `text:(...)` for non-fielded queries by default with escaped query content
|
|
8
|
+
|
|
9
|
+
#### Scenario: Request override applies
|
|
10
|
+
- **WHEN** a client explicitly requests the text-field parser
|
|
11
|
+
- **THEN** `ckan_package_search` uses `text:(...)` regardless of portal defaults with escaped query content
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
## 1. Implementation
|
|
2
|
+
- [x] Add a Solr/Lucene escaping helper for text-field queries.
|
|
3
|
+
- [x] Apply escaping when `resolveSearchQuery` forces `text:(...)`.
|
|
4
|
+
- [x] Ensure behavior is unchanged when not forcing the text parser.
|
|
5
|
+
|
|
6
|
+
## 2. Tests
|
|
7
|
+
- [x] Add unit tests for escaping behavior (quotes, parentheses, backslashes, colons).
|
|
8
|
+
- [x] Add tests to cover `resolveSearchQuery` effectiveQuery output with forced text parser.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Change: add filtered CKAN dataset resource templates
|
|
2
|
+
|
|
3
|
+
## Why
|
|
4
|
+
Users need quick, direct access to filtered dataset lists (by theme, publisher, tag, format) without building complex tool queries.
|
|
5
|
+
|
|
6
|
+
## What Changes
|
|
7
|
+
- Add new MCP resource templates under the existing `ckan://{server}/...` scheme for group, organization, tag, and format dataset filters.
|
|
8
|
+
- Extend resource discovery to list the new templates.
|
|
9
|
+
- Document new resource URIs and examples.
|
|
10
|
+
|
|
11
|
+
## Impact
|
|
12
|
+
- Affected specs: mcp-resources
|
|
13
|
+
- Affected code: `src/resources/*`, `src/resources/uri.ts`, README/docs
|
package/openspec/changes/archive/2026-01-15-add-mcp-resource-filters/specs/mcp-resources/spec.md
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
## ADDED Requirements
|
|
2
|
+
|
|
3
|
+
### Requirement: Group Datasets Resource Template
|
|
4
|
+
The system SHALL expose a resource template for accessing CKAN datasets by group via the URI pattern `ckan://{server}/group/{name}/datasets`.
|
|
5
|
+
|
|
6
|
+
#### Scenario: Read datasets by group
|
|
7
|
+
- **WHEN** client reads `ckan://dati.gov.it/group/governo/datasets`
|
|
8
|
+
- **THEN** server returns JSON with the matching datasets from `package_search`
|
|
9
|
+
|
|
10
|
+
### Requirement: Organization Datasets Resource Template
|
|
11
|
+
The system SHALL expose a resource template for accessing CKAN datasets by organization via the URI pattern `ckan://{server}/organization/{name}/datasets`.
|
|
12
|
+
|
|
13
|
+
#### Scenario: Read datasets by organization
|
|
14
|
+
- **WHEN** client reads `ckan://dati.gov.it/organization/regione-toscana/datasets`
|
|
15
|
+
- **THEN** server returns JSON with the matching datasets from `package_search`
|
|
16
|
+
|
|
17
|
+
### Requirement: Tag Datasets Resource Template
|
|
18
|
+
The system SHALL expose a resource template for accessing CKAN datasets by tag via the URI pattern `ckan://{server}/tag/{name}/datasets`.
|
|
19
|
+
|
|
20
|
+
#### Scenario: Read datasets by tag
|
|
21
|
+
- **WHEN** client reads `ckan://dati.gov.it/tag/turismo/datasets`
|
|
22
|
+
- **THEN** server returns JSON with the matching datasets from `package_search`
|
|
23
|
+
|
|
24
|
+
### Requirement: Format Datasets Resource Template
|
|
25
|
+
The system SHALL expose a resource template for accessing CKAN datasets by resource format via the URI pattern `ckan://{server}/format/{format}/datasets`.
|
|
26
|
+
|
|
27
|
+
#### Scenario: Read datasets by format
|
|
28
|
+
- **WHEN** client reads `ckan://dati.gov.it/format/csv/datasets`
|
|
29
|
+
- **THEN** server returns JSON with the matching datasets from `package_search` filtered by resource format
|
|
30
|
+
|
|
31
|
+
## MODIFIED Requirements
|
|
32
|
+
|
|
33
|
+
### Requirement: Resource Discovery
|
|
34
|
+
The system SHALL list available resource templates when client requests resource list, including dataset, resource, organization, and dataset filter templates (group, organization, tag, format).
|
|
35
|
+
|
|
36
|
+
#### Scenario: List resource templates
|
|
37
|
+
- **WHEN** client calls `resources/listTemplates`
|
|
38
|
+
- **THEN** server returns list of available URI templates with descriptions, including the dataset filter templates
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
## 1. Implementation
|
|
2
|
+
- [x] Add new resource handlers for group, organization datasets, tag, and format filters under `src/resources/`.
|
|
3
|
+
- [x] Register new resource templates in `src/resources/index.ts`.
|
|
4
|
+
- [x] Update URI parsing/validation to accept the new dataset filter paths.
|
|
5
|
+
- [x] Add tests for each new resource template and error cases.
|
|
6
|
+
- [x] Update README/docs with new `ckan://{server}/...` examples.
|
|
7
|
+
|
|
8
|
+
## 2. Validation
|
|
9
|
+
- [x] Run `npm test` (or targeted resource tests).
|
|
10
|
+
- [x] Run `openspec validate add-mcp-resource-filters --strict`.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Change: Move repository ownership to ondata organization
|
|
2
|
+
|
|
3
|
+
## Why
|
|
4
|
+
The repository is moving from the personal account to the ondata organization. Documentation, badges, and published references must reflect the new canonical URL to avoid confusion and broken links.
|
|
5
|
+
|
|
6
|
+
## What Changes
|
|
7
|
+
- Update documentation and in-app links to the new repository URL under the ondata organization.
|
|
8
|
+
- Preserve npm package ownership and scope as @aborruso (no npm ownership change).
|
|
9
|
+
- Confirm GitHub Pages and related references align with the new organization ownership.
|
|
10
|
+
|
|
11
|
+
## Impact
|
|
12
|
+
- Affected specs: repository-metadata, cloudflare-deployment (docs references)
|
|
13
|
+
- Affected code: README.md, docs/*, src/worker.ts, .github/ISSUE_TEMPLATE/config.yml
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
## ADDED Requirements
|
|
2
|
+
### Requirement: Canonical repository ownership references
|
|
3
|
+
The project documentation and UI MUST reference the canonical repository under the ondata organization once the repository is migrated.
|
|
4
|
+
|
|
5
|
+
#### Scenario: Repository references updated
|
|
6
|
+
- **WHEN** a user follows documentation or UI links to the repository
|
|
7
|
+
- **THEN** the links point to the ondata organization repository URL
|
|
8
|
+
|
|
9
|
+
### Requirement: NPM package ownership remains personal
|
|
10
|
+
The project MUST continue to document the npm package under the @aborruso scope unless an explicit npm ownership change is approved.
|
|
11
|
+
|
|
12
|
+
#### Scenario: npm install instructions unchanged
|
|
13
|
+
- **WHEN** a user follows installation instructions
|
|
14
|
+
- **THEN** the package name remains @aborruso/ckan-mcp-server
|