@aborruso/ckan-mcp-server 0.4.16 → 0.4.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LOG.md +78 -0
- package/README.md +104 -34
- package/dist/index.js +161 -45
- package/dist/worker.js +42 -42
- package/package.json +12 -1
- package/.devin/wiki.json +0 -273
- package/CLAUDE.md +0 -398
- package/PRD.md +0 -999
- package/REFACTORING.md +0 -238
- package/examples/langgraph/01_basic_workflow.py +0 -277
- package/examples/langgraph/02_data_exploration.py +0 -366
- package/examples/langgraph/README.md +0 -719
- package/examples/langgraph/metadata_quality.py +0 -299
- package/examples/langgraph/requirements.txt +0 -12
- package/examples/langgraph/setup.sh +0 -32
- package/examples/langgraph/test_setup.py +0 -106
- package/openspec/AGENTS.md +0 -456
- package/openspec/changes/add-ckan-analyze-dataset-structure/proposal.md +0 -17
- package/openspec/changes/add-ckan-analyze-dataset-structure/specs/ckan-insights/spec.md +0 -7
- package/openspec/changes/add-ckan-analyze-dataset-structure/tasks.md +0 -6
- package/openspec/changes/add-ckan-analyze-dataset-updates/proposal.md +0 -17
- package/openspec/changes/add-ckan-analyze-dataset-updates/specs/ckan-insights/spec.md +0 -7
- package/openspec/changes/add-ckan-analyze-dataset-updates/tasks.md +0 -6
- package/openspec/changes/add-ckan-audit-tool/proposal.md +0 -17
- package/openspec/changes/add-ckan-audit-tool/specs/ckan-insights/spec.md +0 -7
- package/openspec/changes/add-ckan-audit-tool/tasks.md +0 -6
- package/openspec/changes/add-ckan-dataset-insights/proposal.md +0 -17
- package/openspec/changes/add-ckan-dataset-insights/specs/ckan-insights/spec.md +0 -7
- package/openspec/changes/add-ckan-dataset-insights/tasks.md +0 -6
- package/openspec/changes/add-ckan-host-allowlist-env/design.md +0 -38
- package/openspec/changes/add-ckan-host-allowlist-env/proposal.md +0 -16
- package/openspec/changes/add-ckan-host-allowlist-env/specs/ckan-request-allowlist/spec.md +0 -15
- package/openspec/changes/add-ckan-host-allowlist-env/specs/cloudflare-deployment/spec.md +0 -11
- package/openspec/changes/add-ckan-host-allowlist-env/tasks.md +0 -12
- package/openspec/changes/add-escape-text-query/proposal.md +0 -12
- package/openspec/changes/add-escape-text-query/specs/ckan-search/spec.md +0 -11
- package/openspec/changes/add-escape-text-query/tasks.md +0 -8
- package/openspec/changes/add-mqa-quality-tool/proposal.md +0 -21
- package/openspec/changes/add-mqa-quality-tool/specs/ckan-quality/spec.md +0 -71
- package/openspec/changes/add-mqa-quality-tool/tasks.md +0 -29
- package/openspec/changes/archive/2026-01-08-add-mcp-resources/design.md +0 -115
- package/openspec/changes/archive/2026-01-08-add-mcp-resources/proposal.md +0 -52
- package/openspec/changes/archive/2026-01-08-add-mcp-resources/specs/mcp-resources/spec.md +0 -92
- package/openspec/changes/archive/2026-01-08-add-mcp-resources/tasks.md +0 -56
- package/openspec/changes/archive/2026-01-08-expand-test-coverage-specs/design.md +0 -355
- package/openspec/changes/archive/2026-01-08-expand-test-coverage-specs/proposal.md +0 -161
- package/openspec/changes/archive/2026-01-08-expand-test-coverage-specs/tasks.md +0 -162
- package/openspec/changes/archive/2026-01-08-translate-project-to-english/proposal.md +0 -115
- package/openspec/changes/archive/2026-01-08-translate-project-to-english/specs/documentation-language/spec.md +0 -32
- package/openspec/changes/archive/2026-01-08-translate-project-to-english/tasks.md +0 -115
- package/openspec/changes/archive/2026-01-10-add-ckan-find-relevant-datasets/proposal.md +0 -17
- package/openspec/changes/archive/2026-01-10-add-ckan-find-relevant-datasets/specs/ckan-insights/spec.md +0 -7
- package/openspec/changes/archive/2026-01-10-add-ckan-find-relevant-datasets/tasks.md +0 -6
- package/openspec/changes/archive/2026-01-10-add-cloudflare-workers/design.md +0 -734
- package/openspec/changes/archive/2026-01-10-add-cloudflare-workers/proposal.md +0 -183
- package/openspec/changes/archive/2026-01-10-add-cloudflare-workers/specs/cloudflare-deployment/spec.md +0 -389
- package/openspec/changes/archive/2026-01-10-add-cloudflare-workers/tasks.md +0 -519
- package/openspec/changes/archive/2026-01-15-add-mcp-prompts/proposal.md +0 -13
- package/openspec/changes/archive/2026-01-15-add-mcp-prompts/specs/mcp-prompts/spec.md +0 -22
- package/openspec/changes/archive/2026-01-15-add-mcp-prompts/tasks.md +0 -10
- package/openspec/changes/archive/2026-01-15-add-mcp-resource-filters/proposal.md +0 -13
- package/openspec/changes/archive/2026-01-15-add-mcp-resource-filters/specs/mcp-resources/spec.md +0 -38
- package/openspec/changes/archive/2026-01-15-add-mcp-resource-filters/tasks.md +0 -10
- package/openspec/changes/archive/2026-01-19-update-repo-owner-ondata/proposal.md +0 -13
- package/openspec/changes/archive/2026-01-19-update-repo-owner-ondata/specs/repository-metadata/spec.md +0 -14
- package/openspec/changes/archive/2026-01-19-update-repo-owner-ondata/tasks.md +0 -12
- package/openspec/changes/archive/2026-01-19-update-search-parser-config/proposal.md +0 -13
- package/openspec/changes/archive/2026-01-19-update-search-parser-config/specs/ckan-insights/spec.md +0 -11
- package/openspec/changes/archive/2026-01-19-update-search-parser-config/specs/ckan-search/spec.md +0 -11
- package/openspec/changes/archive/2026-01-19-update-search-parser-config/tasks.md +0 -6
- package/openspec/changes/archive/add-automated-tests/design.md +0 -324
- package/openspec/changes/archive/add-automated-tests/proposal.md +0 -167
- package/openspec/changes/archive/add-automated-tests/specs/automated-testing/spec.md +0 -143
- package/openspec/changes/archive/add-automated-tests/tasks.md +0 -132
- package/openspec/project.md +0 -115
- package/openspec/specs/ckan-insights/spec.md +0 -23
- package/openspec/specs/ckan-search/spec.md +0 -16
- package/openspec/specs/cloudflare-deployment/spec.md +0 -344
- package/openspec/specs/documentation-language/spec.md +0 -32
- package/openspec/specs/mcp-prompts/spec.md +0 -26
- package/openspec/specs/mcp-resources/spec.md +0 -120
- package/openspec/specs/repository-metadata/spec.md +0 -19
- package/private/commenti-privati.yaml +0 -14
- package/testo.md +0 -12
- package/web-gui/PRD.md +0 -158
- package/web-gui/public/index.html +0 -883
- package/wrangler.toml +0 -6
package/REFACTORING.md
DELETED
|
@@ -1,238 +0,0 @@
|
|
|
1
|
-
# Refactoring Documentation
|
|
2
|
-
|
|
3
|
-
## Overview
|
|
4
|
-
|
|
5
|
-
Il codebase è stato refactorizzato da un singolo file monolitico di 1021 righe a una struttura modulare di 11 file.
|
|
6
|
-
|
|
7
|
-
## Motivazione
|
|
8
|
-
|
|
9
|
-
**Problemi del file unico**:
|
|
10
|
-
- 1021 righe difficili da navigare
|
|
11
|
-
- Modifiche rischiose (alta probabilità di errori)
|
|
12
|
-
- Testing complesso
|
|
13
|
-
- Code review difficili
|
|
14
|
-
- Merge conflicts probabili in collaborazione
|
|
15
|
-
|
|
16
|
-
**Vantaggi della struttura modulare**:
|
|
17
|
-
- File più piccoli (max 350 righe)
|
|
18
|
-
- Modifiche localizzate e sicure
|
|
19
|
-
- Testing isolato per tool
|
|
20
|
-
- Manutenzione semplificata
|
|
21
|
-
- Collaborazione efficiente
|
|
22
|
-
|
|
23
|
-
## Nuova Struttura
|
|
24
|
-
|
|
25
|
-
```
|
|
26
|
-
src/
|
|
27
|
-
├── index.ts # Entry point (39 lines)
|
|
28
|
-
├── server.ts # MCP server setup (12 lines)
|
|
29
|
-
├── types.ts # Types & schemas (16 lines)
|
|
30
|
-
├── utils/
|
|
31
|
-
│ ├── http.ts # CKAN API client (51 lines)
|
|
32
|
-
│ └── formatting.ts # Output formatting (37 lines)
|
|
33
|
-
├── tools/
|
|
34
|
-
│ ├── package.ts # Package tools (350 lines)
|
|
35
|
-
│ ├── organization.ts # Organization tools (341 lines)
|
|
36
|
-
│ ├── datastore.ts # DataStore tools (146 lines)
|
|
37
|
-
│ └── status.ts # Status tools (66 lines)
|
|
38
|
-
└── transport/
|
|
39
|
-
├── stdio.ts # Stdio transport (12 lines)
|
|
40
|
-
└── http.ts # HTTP transport (27 lines)
|
|
41
|
-
```
|
|
42
|
-
|
|
43
|
-
**Total**: 1097 lines (vs 1021 original, +76 lines for better organization)
|
|
44
|
-
|
|
45
|
-
## File Descriptions
|
|
46
|
-
|
|
47
|
-
### Core Files
|
|
48
|
-
|
|
49
|
-
**`index.ts`** (Entry Point)
|
|
50
|
-
- Importa e registra tutti i tool
|
|
51
|
-
- Sceglie transport (stdio/http)
|
|
52
|
-
- Gestisce startup e error handling
|
|
53
|
-
|
|
54
|
-
**`server.ts`** (Server Configuration)
|
|
55
|
-
- Crea istanza MCP server
|
|
56
|
-
- Configurazione base (name, version)
|
|
57
|
-
|
|
58
|
-
**`types.ts`** (Type Definitions)
|
|
59
|
-
- `ResponseFormat` enum
|
|
60
|
-
- `ResponseFormatSchema` Zod validator
|
|
61
|
-
- `CHARACTER_LIMIT` constant
|
|
62
|
-
|
|
63
|
-
### Utils
|
|
64
|
-
|
|
65
|
-
**`utils/http.ts`** (HTTP Client)
|
|
66
|
-
- `makeCkanRequest<T>()` - HTTP client per CKAN API
|
|
67
|
-
- Normalizzazione URL
|
|
68
|
-
- Gestione errori (timeout, 404, network)
|
|
69
|
-
- User-Agent header
|
|
70
|
-
|
|
71
|
-
**`utils/formatting.ts`** (Output Formatting)
|
|
72
|
-
- `truncateText()` - Limita output a CHARACTER_LIMIT
|
|
73
|
-
- `formatDate()` - Format date in ISO `YYYY-MM-DD`
|
|
74
|
-
- `formatBytes()` - Human-readable file sizes
|
|
75
|
-
|
|
76
|
-
### Tools
|
|
77
|
-
|
|
78
|
-
**`tools/package.ts`** (Package/Dataset Tools)
|
|
79
|
-
- `ckan_package_search` - Search datasets (182 lines handler)
|
|
80
|
-
- `ckan_package_show` - Dataset details (138 lines handler)
|
|
81
|
-
|
|
82
|
-
**`tools/organization.ts`** (Organization Tools)
|
|
83
|
-
- `ckan_organization_list` - List organizations (118 lines)
|
|
84
|
-
- `ckan_organization_show` - Org details (95 lines)
|
|
85
|
-
- `ckan_organization_search` - Search orgs (102 lines)
|
|
86
|
-
|
|
87
|
-
**`tools/datastore.ts`** (DataStore Tools)
|
|
88
|
-
- `ckan_datastore_search` - Query tabular data (130 lines)
|
|
89
|
-
|
|
90
|
-
**`tools/status.ts`** (Status Tools)
|
|
91
|
-
- `ckan_status_show` - Check server status (51 lines)
|
|
92
|
-
|
|
93
|
-
### Transport
|
|
94
|
-
|
|
95
|
-
**`transport/stdio.ts`** (Stdio Transport)
|
|
96
|
-
- `runStdio()` - Standard input/output transport
|
|
97
|
-
- For Claude Desktop and local MCP clients
|
|
98
|
-
|
|
99
|
-
**`transport/http.ts`** (HTTP Transport)
|
|
100
|
-
- `runHTTP()` - HTTP server on configurable port
|
|
101
|
-
- Single shared transport per process
|
|
102
|
-
- For remote access via HTTP POST
|
|
103
|
-
|
|
104
|
-
## Build Configuration
|
|
105
|
-
|
|
106
|
-
**No changes needed** - esbuild continua a funzionare:
|
|
107
|
-
```bash
|
|
108
|
-
npm run build # 15ms build time
|
|
109
|
-
```
|
|
110
|
-
|
|
111
|
-
esbuild automaticamente:
|
|
112
|
-
- Bundle tutti i moduli interni
|
|
113
|
-
- Tree-shake codice non usato
|
|
114
|
-
- Mantiene external dependencies separate
|
|
115
|
-
- Produce singolo file `dist/index.js`
|
|
116
|
-
|
|
117
|
-
## Testing Results
|
|
118
|
-
|
|
119
|
-
✅ **Build**: Successful (15ms)
|
|
120
|
-
✅ **Tool Registration**: All 7 tools listed
|
|
121
|
-
✅ **Tool Execution**: `ckan_status_show` tested successfully
|
|
122
|
-
✅ **Backward Compatibility**: 100% - stessa API MCP
|
|
123
|
-
|
|
124
|
-
## Benefits Achieved
|
|
125
|
-
|
|
126
|
-
### Maintainability
|
|
127
|
-
- Ogni file < 350 righe (easy to understand)
|
|
128
|
-
- Moduli isolati (modifiche localizzate)
|
|
129
|
-
- Clear separation of concerns
|
|
130
|
-
|
|
131
|
-
### Testing
|
|
132
|
-
- Tool handlers isolati e testabili
|
|
133
|
-
- Utilities testabili indipendentemente
|
|
134
|
-
- Mock più facili per unit tests
|
|
135
|
-
|
|
136
|
-
### Collaboration
|
|
137
|
-
- Merge conflicts ridotti
|
|
138
|
-
- Code review più veloci
|
|
139
|
-
- Feature development parallelo possibile
|
|
140
|
-
|
|
141
|
-
### Performance
|
|
142
|
-
- Build time invariato: ~15ms
|
|
143
|
-
- Bundle size invariato: ~30KB
|
|
144
|
-
- Runtime performance identico
|
|
145
|
-
- Tree-shaking preservato
|
|
146
|
-
|
|
147
|
-
## Migration Notes
|
|
148
|
-
|
|
149
|
-
### Old Structure
|
|
150
|
-
```typescript
|
|
151
|
-
// src/index.ts (1021 lines)
|
|
152
|
-
// Everything in one file
|
|
153
|
-
```
|
|
154
|
-
|
|
155
|
-
### New Structure
|
|
156
|
-
```typescript
|
|
157
|
-
// src/index.ts (39 lines)
|
|
158
|
-
import { createServer } from "./server.js";
|
|
159
|
-
import { registerPackageTools } from "./tools/package.js";
|
|
160
|
-
// ...
|
|
161
|
-
|
|
162
|
-
const server = createServer();
|
|
163
|
-
registerPackageTools(server);
|
|
164
|
-
// ...
|
|
165
|
-
```
|
|
166
|
-
|
|
167
|
-
### Backward Compatibility
|
|
168
|
-
|
|
169
|
-
✅ **Zero breaking changes**:
|
|
170
|
-
- Stessi tool MCP esposti
|
|
171
|
-
- Stessi parametri input
|
|
172
|
-
- Stesso formato output
|
|
173
|
-
- Stesso comportamento
|
|
174
|
-
|
|
175
|
-
### Old File Preserved
|
|
176
|
-
|
|
177
|
-
`src/index-old.ts` - Backup del file originale (per riferimento)
|
|
178
|
-
|
|
179
|
-
## Future Enhancements Enabled
|
|
180
|
-
|
|
181
|
-
Con la nuova struttura diventa più facile:
|
|
182
|
-
|
|
183
|
-
1. **Unit Testing**: Test singoli tool in isolamento
|
|
184
|
-
2. **New Tools**: Aggiungere tool in file separati
|
|
185
|
-
3. **Shared Logic**: Utilities riutilizzabili
|
|
186
|
-
4. **Documentation**: JSDoc per ogni modulo
|
|
187
|
-
5. **Type Safety**: Types centralizzati
|
|
188
|
-
6. **Optimization**: Ottimizzare singoli moduli
|
|
189
|
-
|
|
190
|
-
## Maintenance Guide
|
|
191
|
-
|
|
192
|
-
### Adding a New Tool
|
|
193
|
-
|
|
194
|
-
1. Create `src/tools/newtool.ts`:
|
|
195
|
-
```typescript
|
|
196
|
-
import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
197
|
-
|
|
198
|
-
export function registerNewTools(server: McpServer) {
|
|
199
|
-
server.registerTool("tool_name", { ... }, async (params) => { ... });
|
|
200
|
-
}
|
|
201
|
-
```
|
|
202
|
-
|
|
203
|
-
2. Import in `src/index.ts`:
|
|
204
|
-
```typescript
|
|
205
|
-
import { registerNewTools } from "./tools/newtool.js";
|
|
206
|
-
registerNewTools(server);
|
|
207
|
-
```
|
|
208
|
-
|
|
209
|
-
3. Build and test:
|
|
210
|
-
```bash
|
|
211
|
-
npm run build
|
|
212
|
-
npm start
|
|
213
|
-
```
|
|
214
|
-
|
|
215
|
-
### Modifying a Tool
|
|
216
|
-
|
|
217
|
-
1. Edit relevant file in `src/tools/`
|
|
218
|
-
2. Changes are isolated
|
|
219
|
-
3. Build and test
|
|
220
|
-
4. No risk to other tools
|
|
221
|
-
|
|
222
|
-
### Adding Utilities
|
|
223
|
-
|
|
224
|
-
1. Create in `src/utils/`
|
|
225
|
-
2. Export function
|
|
226
|
-
3. Import where needed
|
|
227
|
-
4. Automatic tree-shaking if unused
|
|
228
|
-
|
|
229
|
-
## Conclusion
|
|
230
|
-
|
|
231
|
-
Il refactoring è stato completato con successo:
|
|
232
|
-
- ✅ Struttura modulare
|
|
233
|
-
- ✅ Build funzionante
|
|
234
|
-
- ✅ Backward compatible
|
|
235
|
-
- ✅ Testing verificato
|
|
236
|
-
- ✅ Performance preserved
|
|
237
|
-
|
|
238
|
-
Il codice è ora più manutenibile, testabile e scalabile, mantenendo tutta la funzionalità originale.
|
|
@@ -1,277 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
Basic LangGraph Workflow with CKAN MCP Server
|
|
4
|
-
|
|
5
|
-
Demonstrates simple sequential workflow:
|
|
6
|
-
1. Search datasets by keyword
|
|
7
|
-
2. Filter by metadata quality using scoring system
|
|
8
|
-
3. Extract CSV resources
|
|
9
|
-
4. Display results
|
|
10
|
-
|
|
11
|
-
Run:
|
|
12
|
-
uvx --with langgraph --with mcp --with langchain-core python 01_basic_workflow.py
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
import asyncio
|
|
16
|
-
import json
|
|
17
|
-
import os
|
|
18
|
-
from typing import Annotated, TypedDict
|
|
19
|
-
|
|
20
|
-
from langgraph.graph import StateGraph, START, END
|
|
21
|
-
from langgraph.graph.message import add_messages
|
|
22
|
-
from mcp import ClientSession, StdioServerParameters
|
|
23
|
-
from mcp.client.stdio import stdio_client
|
|
24
|
-
|
|
25
|
-
from metadata_quality import MetadataQualityScorer
|
|
26
|
-
|
|
27
|
-
# Configuration
|
|
28
|
-
CKAN_SERVER = "https://www.dati.gov.it/opendata"
|
|
29
|
-
MCP_SERVER_PATH = os.path.join(os.path.dirname(__file__), "../../dist/index.js")
|
|
30
|
-
QUALITY_THRESHOLD = 40 # Minimum quality score (0-100)
|
|
31
|
-
SEARCH_ROWS = 5 # Limit rows to avoid JSON truncation in MCP responses
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
# State definition
|
|
35
|
-
class WorkflowState(TypedDict):
|
|
36
|
-
"""State tracked through workflow."""
|
|
37
|
-
|
|
38
|
-
messages: Annotated[list, add_messages]
|
|
39
|
-
query: str
|
|
40
|
-
datasets: list[dict]
|
|
41
|
-
filtered_datasets: list[dict]
|
|
42
|
-
csv_resources: list[dict]
|
|
43
|
-
error: str | None
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
# MCP Client helper
|
|
47
|
-
class CKANMCPClient:
|
|
48
|
-
"""Helper for calling CKAN MCP Server tools."""
|
|
49
|
-
|
|
50
|
-
def __init__(self, session: ClientSession):
|
|
51
|
-
self.session = session
|
|
52
|
-
|
|
53
|
-
async def search_packages(self, query: str, rows: int = SEARCH_ROWS) -> dict:
|
|
54
|
-
"""Search CKAN packages."""
|
|
55
|
-
result = await self.session.call_tool(
|
|
56
|
-
"ckan_package_search",
|
|
57
|
-
arguments={
|
|
58
|
-
"server_url": CKAN_SERVER,
|
|
59
|
-
"q": query,
|
|
60
|
-
"rows": rows,
|
|
61
|
-
"response_format": "json",
|
|
62
|
-
},
|
|
63
|
-
)
|
|
64
|
-
|
|
65
|
-
# Parse JSON from text content
|
|
66
|
-
for content in result.content:
|
|
67
|
-
if content.type == "text":
|
|
68
|
-
try:
|
|
69
|
-
text = content.text
|
|
70
|
-
# Handle truncation marker if present
|
|
71
|
-
if "[Response truncated" in text:
|
|
72
|
-
text = text.split("[Response truncated")[0].strip()
|
|
73
|
-
return json.loads(text)
|
|
74
|
-
except json.JSONDecodeError as e:
|
|
75
|
-
return {"error": f"JSON parse error: {e}"}
|
|
76
|
-
|
|
77
|
-
return {"error": "No content in response"}
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
# Workflow nodes
|
|
81
|
-
async def search_datasets_node(
|
|
82
|
-
state: WorkflowState, mcp_client: CKANMCPClient
|
|
83
|
-
) -> WorkflowState:
|
|
84
|
-
"""Node 1: Search datasets."""
|
|
85
|
-
print(f"\n[1/3] Searching datasets for: '{state['query']}'")
|
|
86
|
-
|
|
87
|
-
try:
|
|
88
|
-
response = await mcp_client.search_packages(state["query"])
|
|
89
|
-
|
|
90
|
-
if "error" in response:
|
|
91
|
-
state["error"] = response["error"]
|
|
92
|
-
print(f" ✗ Error: {response['error']}")
|
|
93
|
-
return state
|
|
94
|
-
|
|
95
|
-
if "results" in response:
|
|
96
|
-
datasets = response["results"]
|
|
97
|
-
state["datasets"] = datasets
|
|
98
|
-
state["messages"].append(
|
|
99
|
-
{"role": "assistant", "content": f"Found {len(datasets)} datasets"}
|
|
100
|
-
)
|
|
101
|
-
print(
|
|
102
|
-
f" ✓ Found {response.get('count', len(datasets))} total, showing {len(datasets)}"
|
|
103
|
-
)
|
|
104
|
-
else:
|
|
105
|
-
state["error"] = "Unexpected response structure"
|
|
106
|
-
print(f" ✗ Error: missing 'results' key")
|
|
107
|
-
|
|
108
|
-
except Exception as e:
|
|
109
|
-
state["error"] = str(e)
|
|
110
|
-
print(f" ✗ Error: {e}")
|
|
111
|
-
|
|
112
|
-
return state
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
async def filter_quality_node(state: WorkflowState) -> WorkflowState:
|
|
116
|
-
"""Node 2: Filter by metadata quality using scoring system."""
|
|
117
|
-
print("\n[2/3] Filtering by metadata quality")
|
|
118
|
-
|
|
119
|
-
if state.get("error"):
|
|
120
|
-
return state
|
|
121
|
-
|
|
122
|
-
scorer = MetadataQualityScorer()
|
|
123
|
-
filtered = []
|
|
124
|
-
|
|
125
|
-
for ds in state["datasets"]:
|
|
126
|
-
quality = scorer.score_dataset(ds)
|
|
127
|
-
ds["_quality"] = quality # Attach quality info to dataset
|
|
128
|
-
|
|
129
|
-
if quality["score"] >= QUALITY_THRESHOLD:
|
|
130
|
-
filtered.append(ds)
|
|
131
|
-
print(
|
|
132
|
-
f" ✓ {ds['title'][:50]}: {quality['score']}/100 ({quality['level']})"
|
|
133
|
-
)
|
|
134
|
-
else:
|
|
135
|
-
print(f" ✗ {ds['title'][:50]}: {quality['score']}/100 (rejected)")
|
|
136
|
-
|
|
137
|
-
state["filtered_datasets"] = filtered
|
|
138
|
-
state["messages"].append(
|
|
139
|
-
{
|
|
140
|
-
"role": "assistant",
|
|
141
|
-
"content": f"Filtered to {len(filtered)} quality datasets",
|
|
142
|
-
}
|
|
143
|
-
)
|
|
144
|
-
print(
|
|
145
|
-
f"\n → {len(filtered)}/{len(state['datasets'])} datasets pass quality threshold ({QUALITY_THRESHOLD})"
|
|
146
|
-
)
|
|
147
|
-
|
|
148
|
-
return state
|
|
149
|
-
|
|
150
|
-
# Filter: must have title, notes, and at least one resource
|
|
151
|
-
filtered = [
|
|
152
|
-
ds
|
|
153
|
-
for ds in state["datasets"]
|
|
154
|
-
if ds.get("title") and ds.get("notes") and ds.get("num_resources", 0) > 0
|
|
155
|
-
]
|
|
156
|
-
|
|
157
|
-
state["filtered_datasets"] = filtered
|
|
158
|
-
state["messages"].append(
|
|
159
|
-
{
|
|
160
|
-
"role": "assistant",
|
|
161
|
-
"content": f"Filtered to {len(filtered)} quality datasets",
|
|
162
|
-
}
|
|
163
|
-
)
|
|
164
|
-
print(f" ✓ {len(filtered)} datasets with good metadata")
|
|
165
|
-
|
|
166
|
-
return state
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
async def extract_csv_node(state: WorkflowState) -> WorkflowState:
|
|
170
|
-
"""Node 3: Extract CSV resources."""
|
|
171
|
-
print("\n[3/3] Extracting CSV resources")
|
|
172
|
-
|
|
173
|
-
if state.get("error"):
|
|
174
|
-
return state
|
|
175
|
-
|
|
176
|
-
csv_resources = []
|
|
177
|
-
for dataset in state["filtered_datasets"][:5]: # Limit to first 5
|
|
178
|
-
for resource in dataset.get("resources", []):
|
|
179
|
-
if resource.get("format", "").lower() == "csv":
|
|
180
|
-
csv_resources.append(
|
|
181
|
-
{
|
|
182
|
-
"dataset_name": dataset["name"],
|
|
183
|
-
"dataset_title": dataset["title"],
|
|
184
|
-
"resource_name": resource.get("name", "Untitled"),
|
|
185
|
-
"url": resource.get("url"),
|
|
186
|
-
}
|
|
187
|
-
)
|
|
188
|
-
|
|
189
|
-
state["csv_resources"] = csv_resources
|
|
190
|
-
state["messages"].append(
|
|
191
|
-
{
|
|
192
|
-
"role": "assistant",
|
|
193
|
-
"content": f"Extracted {len(csv_resources)} CSV resources",
|
|
194
|
-
}
|
|
195
|
-
)
|
|
196
|
-
print(f" ✓ Found {len(csv_resources)} CSV resources")
|
|
197
|
-
|
|
198
|
-
return state
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
# Build graph
|
|
202
|
-
async def build_workflow(mcp_client: CKANMCPClient) -> StateGraph:
|
|
203
|
-
"""Build LangGraph workflow."""
|
|
204
|
-
graph = StateGraph(WorkflowState)
|
|
205
|
-
|
|
206
|
-
# Add nodes - wrap async functions properly
|
|
207
|
-
async def search_wrapper(state: WorkflowState) -> WorkflowState:
|
|
208
|
-
return await search_datasets_node(state, mcp_client)
|
|
209
|
-
|
|
210
|
-
graph.add_node("search", search_wrapper)
|
|
211
|
-
graph.add_node("filter", filter_quality_node)
|
|
212
|
-
graph.add_node("extract", extract_csv_node)
|
|
213
|
-
|
|
214
|
-
# Define edges
|
|
215
|
-
graph.add_edge(START, "search")
|
|
216
|
-
graph.add_edge("search", "filter")
|
|
217
|
-
graph.add_edge("filter", "extract")
|
|
218
|
-
graph.add_edge("extract", END)
|
|
219
|
-
|
|
220
|
-
return graph.compile()
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
async def main():
|
|
224
|
-
"""Run workflow."""
|
|
225
|
-
print("=" * 60)
|
|
226
|
-
print("LangGraph + CKAN MCP Server - Basic Workflow")
|
|
227
|
-
print("=" * 60)
|
|
228
|
-
|
|
229
|
-
# Connect to MCP server
|
|
230
|
-
server_params = StdioServerParameters(command="node", args=[MCP_SERVER_PATH])
|
|
231
|
-
|
|
232
|
-
async with stdio_client(server_params) as (read, write):
|
|
233
|
-
async with ClientSession(read, write) as session:
|
|
234
|
-
await session.initialize()
|
|
235
|
-
print("\n✓ Connected to CKAN MCP Server")
|
|
236
|
-
|
|
237
|
-
# Build workflow
|
|
238
|
-
mcp_client = CKANMCPClient(session)
|
|
239
|
-
workflow = await build_workflow(mcp_client)
|
|
240
|
-
|
|
241
|
-
# Execute workflow
|
|
242
|
-
initial_state: WorkflowState = {
|
|
243
|
-
"messages": [],
|
|
244
|
-
"query": "mobilità urbana",
|
|
245
|
-
"datasets": [],
|
|
246
|
-
"filtered_datasets": [],
|
|
247
|
-
"csv_resources": [],
|
|
248
|
-
"error": None,
|
|
249
|
-
}
|
|
250
|
-
|
|
251
|
-
result = await workflow.ainvoke(initial_state)
|
|
252
|
-
|
|
253
|
-
# Display results
|
|
254
|
-
print("\n" + "=" * 60)
|
|
255
|
-
print("RESULTS")
|
|
256
|
-
print("=" * 60)
|
|
257
|
-
|
|
258
|
-
if result["error"]:
|
|
259
|
-
print(f"\n✗ Workflow failed: {result['error']}")
|
|
260
|
-
else:
|
|
261
|
-
print(f"\nQuery: {result['query']}")
|
|
262
|
-
print(f"Total datasets found: {len(result['datasets'])}")
|
|
263
|
-
print(f"Quality datasets: {len(result['filtered_datasets'])}")
|
|
264
|
-
print(f"CSV resources: {len(result['csv_resources'])}")
|
|
265
|
-
|
|
266
|
-
if result["csv_resources"]:
|
|
267
|
-
print("\nFirst 3 CSV resources:")
|
|
268
|
-
for i, res in enumerate(result["csv_resources"][:3], 1):
|
|
269
|
-
print(f"\n{i}. {res['resource_name']}")
|
|
270
|
-
print(f" Dataset: {res['dataset_title']}")
|
|
271
|
-
print(f" URL: {res['url']}")
|
|
272
|
-
|
|
273
|
-
print("\n" + "=" * 60)
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
if __name__ == "__main__":
|
|
277
|
-
asyncio.run(main())
|