@aborruso/ckan-mcp-server 0.4.17 → 0.4.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/LOG.md +64 -0
  2. package/README.md +104 -34
  3. package/dist/index.js +161 -45
  4. package/dist/worker.js +42 -42
  5. package/package.json +12 -1
  6. package/.devin/wiki.json +0 -273
  7. package/CLAUDE.md +0 -398
  8. package/PRD.md +0 -999
  9. package/REFACTORING.md +0 -238
  10. package/examples/langgraph/01_basic_workflow.py +0 -277
  11. package/examples/langgraph/02_data_exploration.py +0 -366
  12. package/examples/langgraph/README.md +0 -719
  13. package/examples/langgraph/metadata_quality.py +0 -299
  14. package/examples/langgraph/requirements.txt +0 -12
  15. package/examples/langgraph/setup.sh +0 -32
  16. package/examples/langgraph/test_setup.py +0 -106
  17. package/openspec/AGENTS.md +0 -456
  18. package/openspec/changes/add-ckan-analyze-dataset-structure/proposal.md +0 -17
  19. package/openspec/changes/add-ckan-analyze-dataset-structure/specs/ckan-insights/spec.md +0 -7
  20. package/openspec/changes/add-ckan-analyze-dataset-structure/tasks.md +0 -6
  21. package/openspec/changes/add-ckan-analyze-dataset-updates/proposal.md +0 -17
  22. package/openspec/changes/add-ckan-analyze-dataset-updates/specs/ckan-insights/spec.md +0 -7
  23. package/openspec/changes/add-ckan-analyze-dataset-updates/tasks.md +0 -6
  24. package/openspec/changes/add-ckan-audit-tool/proposal.md +0 -17
  25. package/openspec/changes/add-ckan-audit-tool/specs/ckan-insights/spec.md +0 -7
  26. package/openspec/changes/add-ckan-audit-tool/tasks.md +0 -6
  27. package/openspec/changes/add-ckan-dataset-insights/proposal.md +0 -17
  28. package/openspec/changes/add-ckan-dataset-insights/specs/ckan-insights/spec.md +0 -7
  29. package/openspec/changes/add-ckan-dataset-insights/tasks.md +0 -6
  30. package/openspec/changes/add-ckan-host-allowlist-env/design.md +0 -38
  31. package/openspec/changes/add-ckan-host-allowlist-env/proposal.md +0 -16
  32. package/openspec/changes/add-ckan-host-allowlist-env/specs/ckan-request-allowlist/spec.md +0 -15
  33. package/openspec/changes/add-ckan-host-allowlist-env/specs/cloudflare-deployment/spec.md +0 -11
  34. package/openspec/changes/add-ckan-host-allowlist-env/tasks.md +0 -12
  35. package/openspec/changes/add-escape-text-query/proposal.md +0 -12
  36. package/openspec/changes/add-escape-text-query/specs/ckan-search/spec.md +0 -11
  37. package/openspec/changes/add-escape-text-query/tasks.md +0 -8
  38. package/openspec/changes/add-mqa-quality-tool/proposal.md +0 -21
  39. package/openspec/changes/add-mqa-quality-tool/specs/ckan-quality/spec.md +0 -71
  40. package/openspec/changes/add-mqa-quality-tool/tasks.md +0 -29
  41. package/openspec/changes/archive/2026-01-08-add-mcp-resources/design.md +0 -115
  42. package/openspec/changes/archive/2026-01-08-add-mcp-resources/proposal.md +0 -52
  43. package/openspec/changes/archive/2026-01-08-add-mcp-resources/specs/mcp-resources/spec.md +0 -92
  44. package/openspec/changes/archive/2026-01-08-add-mcp-resources/tasks.md +0 -56
  45. package/openspec/changes/archive/2026-01-08-expand-test-coverage-specs/design.md +0 -355
  46. package/openspec/changes/archive/2026-01-08-expand-test-coverage-specs/proposal.md +0 -161
  47. package/openspec/changes/archive/2026-01-08-expand-test-coverage-specs/tasks.md +0 -162
  48. package/openspec/changes/archive/2026-01-08-translate-project-to-english/proposal.md +0 -115
  49. package/openspec/changes/archive/2026-01-08-translate-project-to-english/specs/documentation-language/spec.md +0 -32
  50. package/openspec/changes/archive/2026-01-08-translate-project-to-english/tasks.md +0 -115
  51. package/openspec/changes/archive/2026-01-10-add-ckan-find-relevant-datasets/proposal.md +0 -17
  52. package/openspec/changes/archive/2026-01-10-add-ckan-find-relevant-datasets/specs/ckan-insights/spec.md +0 -7
  53. package/openspec/changes/archive/2026-01-10-add-ckan-find-relevant-datasets/tasks.md +0 -6
  54. package/openspec/changes/archive/2026-01-10-add-cloudflare-workers/design.md +0 -734
  55. package/openspec/changes/archive/2026-01-10-add-cloudflare-workers/proposal.md +0 -183
  56. package/openspec/changes/archive/2026-01-10-add-cloudflare-workers/specs/cloudflare-deployment/spec.md +0 -389
  57. package/openspec/changes/archive/2026-01-10-add-cloudflare-workers/tasks.md +0 -519
  58. package/openspec/changes/archive/2026-01-15-add-mcp-prompts/proposal.md +0 -13
  59. package/openspec/changes/archive/2026-01-15-add-mcp-prompts/specs/mcp-prompts/spec.md +0 -22
  60. package/openspec/changes/archive/2026-01-15-add-mcp-prompts/tasks.md +0 -10
  61. package/openspec/changes/archive/2026-01-15-add-mcp-resource-filters/proposal.md +0 -13
  62. package/openspec/changes/archive/2026-01-15-add-mcp-resource-filters/specs/mcp-resources/spec.md +0 -38
  63. package/openspec/changes/archive/2026-01-15-add-mcp-resource-filters/tasks.md +0 -10
  64. package/openspec/changes/archive/2026-01-19-update-repo-owner-ondata/proposal.md +0 -13
  65. package/openspec/changes/archive/2026-01-19-update-repo-owner-ondata/specs/repository-metadata/spec.md +0 -14
  66. package/openspec/changes/archive/2026-01-19-update-repo-owner-ondata/tasks.md +0 -12
  67. package/openspec/changes/archive/2026-01-19-update-search-parser-config/proposal.md +0 -13
  68. package/openspec/changes/archive/2026-01-19-update-search-parser-config/specs/ckan-insights/spec.md +0 -11
  69. package/openspec/changes/archive/2026-01-19-update-search-parser-config/specs/ckan-search/spec.md +0 -11
  70. package/openspec/changes/archive/2026-01-19-update-search-parser-config/tasks.md +0 -6
  71. package/openspec/changes/archive/add-automated-tests/design.md +0 -324
  72. package/openspec/changes/archive/add-automated-tests/proposal.md +0 -167
  73. package/openspec/changes/archive/add-automated-tests/specs/automated-testing/spec.md +0 -143
  74. package/openspec/changes/archive/add-automated-tests/tasks.md +0 -132
  75. package/openspec/project.md +0 -115
  76. package/openspec/specs/ckan-insights/spec.md +0 -23
  77. package/openspec/specs/ckan-search/spec.md +0 -16
  78. package/openspec/specs/cloudflare-deployment/spec.md +0 -344
  79. package/openspec/specs/documentation-language/spec.md +0 -32
  80. package/openspec/specs/mcp-prompts/spec.md +0 -26
  81. package/openspec/specs/mcp-resources/spec.md +0 -120
  82. package/openspec/specs/repository-metadata/spec.md +0 -19
  83. package/private/commenti-privati.yaml +0 -14
  84. package/testo.md +0 -12
  85. package/web-gui/PRD.md +0 -158
  86. package/web-gui/public/index.html +0 -883
  87. package/wrangler.toml +0 -6
@@ -1,366 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Data Exploration Workflow with State and Conditionals
4
-
5
- Advanced workflow demonstrating:
6
- - Conditional branching (DataStore vs CSV)
7
- - State persistence across decisions
8
- - Human-in-the-loop for resource selection
9
- - SQL queries on DataStore resources
10
-
11
- Run:
12
- python 02_data_exploration.py
13
- """
14
-
15
- import asyncio
16
- import json
17
- import os
18
- from typing import Annotated, Literal
19
-
20
- from langgraph.graph import StateGraph, START, END
21
- from langgraph.graph.message import add_messages
22
- from mcp import ClientSession, StdioServerParameters
23
- from mcp.client.stdio import stdio_client
24
-
25
-
26
- # Configuration
27
- CKAN_SERVER = "https://www.dati.gov.it/opendata"
28
- MCP_SERVER_PATH = os.path.join(os.path.dirname(__file__), "../../dist/index.js")
29
- SEARCH_ROWS = 5 # Markdown format handles truncation gracefully
30
- # Note: Some queries return very large metadata. Use specific queries like "trasporti"
31
- # instead of generic ones like "CSV" or "popolazione" to avoid JSON truncation.
32
-
33
-
34
- # State definition
35
- class ExplorationState(dict):
36
- """State for data exploration workflow."""
37
-
38
- messages: Annotated[list, add_messages]
39
- query: str
40
- datasets: list[dict]
41
- selected_dataset: dict | None
42
- selected_resource: dict | None
43
- resource_type: Literal["datastore", "csv", "unknown"] | None
44
- analysis_result: dict | None
45
- error: str | None
46
-
47
-
48
- # MCP Client
49
- class CKANMCPClient:
50
- """Helper for CKAN MCP operations."""
51
-
52
- def __init__(self, session: ClientSession):
53
- self.session = session
54
-
55
- async def search_packages(self, query: str, rows: int = SEARCH_ROWS) -> dict:
56
- """Search packages."""
57
- result = await self.session.call_tool(
58
- "ckan_package_search",
59
- arguments={
60
- "server_url": CKAN_SERVER,
61
- "q": query,
62
- "rows": rows,
63
- "response_format": "json",
64
- },
65
- )
66
- for content in result.content:
67
- if content.type == "text":
68
- try:
69
- text = content.text
70
- if "[Response truncated" in text:
71
- text = text.split("[Response truncated")[0].strip()
72
- return json.loads(text)
73
- except json.JSONDecodeError as e:
74
- return {"error": f"JSON parse error: {e}"}
75
- return {"error": "No content in response"}
76
-
77
- async def datastore_search(self, resource_id: str, limit: int = 3) -> dict:
78
- """Query DataStore."""
79
- result = await self.session.call_tool(
80
- "ckan_datastore_search",
81
- arguments={
82
- "server_url": CKAN_SERVER,
83
- "resource_id": resource_id,
84
- "limit": limit,
85
- "response_format": "json",
86
- },
87
- )
88
- for content in result.content:
89
- if content.type == "text":
90
- try:
91
- text = content.text
92
- if "[Response truncated" in text:
93
- text = text.split("[Response truncated")[0].strip()
94
- return json.loads(text)
95
- except json.JSONDecodeError as e:
96
- return {"error": f"JSON parse error: {e}"}
97
- return {"error": "No content in response"}
98
-
99
-
100
- # Workflow nodes
101
- async def search_node(
102
- state: ExplorationState, mcp_client: CKANMCPClient
103
- ) -> ExplorationState:
104
- """Search for datasets."""
105
- print(f"\n[SEARCH] Query: '{state['query']}'")
106
-
107
- try:
108
- response = await mcp_client.search_packages(state["query"])
109
-
110
- if "error" in response:
111
- state["error"] = response["error"]
112
- print(f" ✗ Error: {response['error']}")
113
- return state
114
-
115
- if "results" in response:
116
- datasets = response["results"]
117
- state["datasets"] = datasets
118
- print(
119
- f" ✓ Found {response.get('count', len(datasets))} total, showing {len(datasets)}"
120
- )
121
- else:
122
- state["error"] = "Unexpected response structure"
123
-
124
- except Exception as e:
125
- state["error"] = str(e)
126
- print(f" ✗ Error: {e}")
127
-
128
- return state
129
-
130
-
131
- async def select_dataset_node(state: ExplorationState) -> ExplorationState:
132
- """Human-in-the-loop: select dataset."""
133
- print("\n[SELECT DATASET] Available datasets:")
134
-
135
- if state.get("error") or not state.get("datasets"):
136
- return state
137
-
138
- # Show top 3 datasets
139
- for i, ds in enumerate(state["datasets"][:3], 1):
140
- print(f"\n{i}. {ds['title']}")
141
- print(f" Resources: {ds.get('num_resources', 0)}")
142
- print(f" Org: {ds.get('organization', {}).get('title', 'N/A')}")
143
-
144
- # Simulate user selection (in real app, use input())
145
- selection = 0 # Select first
146
- state["selected_dataset"] = state["datasets"][selection]
147
- print(f"\n → Selected: {state['selected_dataset']['title']}")
148
-
149
- return state
150
-
151
-
152
- async def select_resource_node(state: ExplorationState) -> ExplorationState:
153
- """Select resource and detect type."""
154
- print("\n[SELECT RESOURCE]")
155
-
156
- if state.get("error") or not state.get("selected_dataset"):
157
- return state
158
-
159
- resources = state["selected_dataset"].get("resources", [])
160
- if not resources:
161
- state["error"] = "No resources available"
162
- return state
163
-
164
- print("Available resources:")
165
- for i, res in enumerate(resources[:3], 1):
166
- print(f"{i}. {res.get('name', 'Untitled')} ({res.get('format', 'N/A')})")
167
-
168
- # Select first resource
169
- selected = resources[0]
170
- state["selected_resource"] = selected
171
-
172
- # Detect type
173
- if selected.get("datastore_active"):
174
- state["resource_type"] = "datastore"
175
- print(f"\n → Type: DataStore (SQL queries available)")
176
- elif selected.get("format", "").lower() == "csv":
177
- state["resource_type"] = "csv"
178
- print(f"\n → Type: CSV (download required)")
179
- else:
180
- state["resource_type"] = "unknown"
181
- print(f"\n → Type: Unknown format")
182
-
183
- return state
184
-
185
-
186
- async def analyze_datastore_node(
187
- state: ExplorationState, mcp_client: CKANMCPClient
188
- ) -> ExplorationState:
189
- """Analyze DataStore resource."""
190
- print("\n[ANALYZE DATASTORE]")
191
-
192
- if state.get("error"):
193
- return state
194
-
195
- try:
196
- resource_id = state["selected_resource"]["id"]
197
- result = await mcp_client.datastore_search(resource_id, limit=3)
198
-
199
- if "error" in result:
200
- state["error"] = result["error"]
201
- print(f" ✗ Error: {result['error']}")
202
- return state
203
-
204
- if "records" in result:
205
- records = result["records"]
206
- fields = result.get("fields", [])
207
-
208
- state["analysis_result"] = {
209
- "type": "datastore",
210
- "record_count": len(records),
211
- "fields": [f["id"] for f in fields if isinstance(f, dict)],
212
- "sample_records": records,
213
- }
214
-
215
- print(f" ✓ Fields: {', '.join(state['analysis_result']['fields'][:5])}")
216
- print(f" ✓ Sample: {len(records)} records")
217
- else:
218
- state["error"] = "DataStore query failed"
219
-
220
- except Exception as e:
221
- state["error"] = str(e)
222
- print(f" ✗ Error: {e}")
223
-
224
- return state
225
-
226
-
227
- async def analyze_csv_node(state: ExplorationState) -> ExplorationState:
228
- """Analyze CSV resource (placeholder)."""
229
- print("\n[ANALYZE CSV]")
230
-
231
- if state.get("error"):
232
- return state
233
-
234
- # In real app: download and analyze with pandas/duckdb
235
- state["analysis_result"] = {
236
- "type": "csv",
237
- "url": state["selected_resource"].get("url"),
238
- "format": state["selected_resource"].get("format"),
239
- }
240
-
241
- print(f" → URL: {state['analysis_result']['url']}")
242
- print(" (Download and analyze with DuckDB/pandas)")
243
-
244
- return state
245
-
246
-
247
- async def skip_analysis_node(state: ExplorationState) -> ExplorationState:
248
- """Skip analysis for unknown formats."""
249
- print("\n[SKIP ANALYSIS] Unknown format, cannot analyze")
250
- state["analysis_result"] = {"type": "unknown", "skipped": True}
251
- return state
252
-
253
-
254
- # Routing function
255
- def route_by_resource_type(state: ExplorationState) -> str:
256
- """Route based on resource type."""
257
- if state.get("error"):
258
- return "end"
259
-
260
- resource_type = state.get("resource_type")
261
- if resource_type == "datastore":
262
- return "analyze_datastore"
263
- elif resource_type == "csv":
264
- return "analyze_csv"
265
- else:
266
- return "skip_analysis"
267
-
268
-
269
- # Build workflow
270
- async def build_workflow(mcp_client: CKANMCPClient) -> StateGraph:
271
- """Build exploration workflow with conditional branching."""
272
- graph = StateGraph(ExplorationState)
273
-
274
- # Add nodes with async wrappers
275
- async def search_wrapper(state: ExplorationState) -> ExplorationState:
276
- return await search_node(state, mcp_client)
277
-
278
- async def analyze_wrapper(state: ExplorationState) -> ExplorationState:
279
- return await analyze_datastore_node(state, mcp_client)
280
-
281
- graph.add_node("search", search_wrapper)
282
- graph.add_node("select_dataset", select_dataset_node)
283
- graph.add_node("select_resource", select_resource_node)
284
- graph.add_node("analyze_datastore", analyze_wrapper)
285
- graph.add_node("analyze_csv", analyze_csv_node)
286
- graph.add_node("skip_analysis", skip_analysis_node)
287
-
288
- # Define edges
289
- graph.add_edge(START, "search")
290
- graph.add_edge("search", "select_dataset")
291
- graph.add_edge("select_dataset", "select_resource")
292
-
293
- # Conditional routing based on resource type
294
- graph.add_conditional_edges(
295
- "select_resource",
296
- route_by_resource_type,
297
- {
298
- "analyze_datastore": "analyze_datastore",
299
- "analyze_csv": "analyze_csv",
300
- "skip_analysis": "skip_analysis",
301
- "end": END,
302
- },
303
- )
304
-
305
- # All analysis paths lead to END
306
- graph.add_edge("analyze_datastore", END)
307
- graph.add_edge("analyze_csv", END)
308
- graph.add_edge("skip_analysis", END)
309
-
310
- return graph.compile()
311
-
312
-
313
- async def main():
314
- """Run exploration workflow."""
315
- print("=" * 60)
316
- print("LangGraph + CKAN MCP - Data Exploration Workflow")
317
- print("=" * 60)
318
-
319
- server_params = StdioServerParameters(command="node", args=[MCP_SERVER_PATH])
320
-
321
- async with stdio_client(server_params) as (read, write):
322
- async with ClientSession(read, write) as session:
323
- await session.initialize()
324
- print("\n✓ Connected to CKAN MCP Server")
325
-
326
- mcp_client = CKANMCPClient(session)
327
- workflow = await build_workflow(mcp_client)
328
-
329
- # Execute workflow
330
- initial_state: ExplorationState = {
331
- "messages": [],
332
- "query": "trasporti", # Query that returns manageable datasets
333
- "datasets": [],
334
- "selected_dataset": None,
335
- "selected_resource": None,
336
- "resource_type": None,
337
- "analysis_result": None,
338
- "error": None,
339
- }
340
-
341
- result = await workflow.ainvoke(initial_state)
342
-
343
- # Display results
344
- print("\n" + "=" * 60)
345
- print("WORKFLOW RESULT")
346
- print("=" * 60)
347
-
348
- if result.get("error"):
349
- print(f"\n✗ Error: {result['error']}")
350
- elif result.get("analysis_result"):
351
- analysis = result["analysis_result"]
352
- print(f"\nAnalysis Type: {analysis['type']}")
353
-
354
- if analysis["type"] == "datastore":
355
- print(f"Fields: {', '.join(analysis['fields'][:5])}")
356
- print(f"Records sampled: {analysis['record_count']}")
357
- elif analysis["type"] == "csv":
358
- print(f"URL: {analysis['url']}")
359
- else:
360
- print("Skipped (unknown format)")
361
-
362
- print("\n" + "=" * 60)
363
-
364
-
365
- if __name__ == "__main__":
366
- asyncio.run(main())