@aborruso/ckan-mcp-server 0.4.4 → 0.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. package/EXAMPLES.md +9 -0
  2. package/LOG.md +12 -0
  3. package/README.md +12 -1
  4. package/dist/index.js +363 -3
  5. package/dist/worker.js +62 -28
  6. package/openspec/changes/add-ckan-analyze-dataset-structure/proposal.md +17 -0
  7. package/openspec/changes/add-ckan-analyze-dataset-structure/specs/ckan-insights/spec.md +7 -0
  8. package/openspec/changes/add-ckan-analyze-dataset-structure/tasks.md +6 -0
  9. package/openspec/changes/add-ckan-analyze-dataset-updates/proposal.md +17 -0
  10. package/openspec/changes/add-ckan-analyze-dataset-updates/specs/ckan-insights/spec.md +7 -0
  11. package/openspec/changes/add-ckan-analyze-dataset-updates/tasks.md +6 -0
  12. package/openspec/changes/add-ckan-audit-tool/proposal.md +17 -0
  13. package/openspec/changes/add-ckan-audit-tool/specs/ckan-insights/spec.md +7 -0
  14. package/openspec/changes/add-ckan-audit-tool/tasks.md +6 -0
  15. package/openspec/changes/add-ckan-dataset-insights/proposal.md +17 -0
  16. package/openspec/changes/add-ckan-dataset-insights/specs/ckan-insights/spec.md +7 -0
  17. package/openspec/changes/add-ckan-dataset-insights/tasks.md +6 -0
  18. package/openspec/changes/add-ckan-find-relevant-datasets/proposal.md +17 -0
  19. package/openspec/changes/add-ckan-find-relevant-datasets/specs/ckan-insights/spec.md +7 -0
  20. package/openspec/changes/add-ckan-find-relevant-datasets/tasks.md +6 -0
  21. package/openspec/specs/ckan-insights/spec.md +12 -0
  22. package/openspec/specs/cloudflare-deployment/spec.md +344 -0
  23. package/package.json +1 -1
  24. /package/openspec/changes/{add-cloudflare-workers → archive/2026-01-10-add-cloudflare-workers}/design.md +0 -0
  25. /package/openspec/changes/{add-cloudflare-workers → archive/2026-01-10-add-cloudflare-workers}/proposal.md +0 -0
  26. /package/openspec/changes/{add-cloudflare-workers → archive/2026-01-10-add-cloudflare-workers}/specs/cloudflare-deployment/spec.md +0 -0
  27. /package/openspec/changes/{add-cloudflare-workers → archive/2026-01-10-add-cloudflare-workers}/tasks.md +0 -0
package/EXAMPLES.md CHANGED
@@ -20,6 +20,15 @@ ckan_package_search({
20
20
  })
21
21
  ```
22
22
 
23
+ ### Find relevant datasets
24
+ ```typescript
25
+ ckan_find_relevant_datasets({
26
+ server_url: "https://demo.ckan.org",
27
+ query: "open data transport",
28
+ limit: 5
29
+ })
30
+ ```
31
+
23
32
  ## Italy Examples - dati.gov.it
24
33
 
25
34
  ### Search recent datasets
package/LOG.md CHANGED
@@ -2,6 +2,18 @@
2
2
 
3
3
  ## 2026-01-10
4
4
 
5
+ ### Version 0.4.6 - Relevance ranking
6
+ - **Tool**: Added `ckan_find_relevant_datasets`
7
+ - **Docs**: Updated README/EXAMPLES
8
+ - **Tests**: Added relevance scoring checks
9
+
10
+ ## 2026-01-10
11
+
12
+ ### Version 0.4.5 - Health version
13
+ - **Workers**: /health version/tools updated
14
+
15
+ ## 2026-01-10
16
+
5
17
  ### Version 0.4.4 - DataStore SQL
6
18
  - **Tool**: Added `ckan_datastore_search_sql`
7
19
  - **Docs**: Updated README/EXAMPLES/PRD for SQL support
package/README.md CHANGED
@@ -56,7 +56,7 @@ TRANSPORT=http PORT=3000 npm start
56
56
 
57
57
  The server will be available at `http://localhost:3000/mcp`
58
58
 
59
- ## Deployment Options
59
+ ## Usage Options
60
60
 
61
61
  ### Option 1: Local Installation (stdio mode)
62
62
 
@@ -173,6 +173,7 @@ Use the public Cloudflare Workers deployment (no local installation required):
173
173
  ### Search and Discovery
174
174
 
175
175
  - **ckan_package_search**: Search datasets with Solr queries
176
+ - **ckan_find_relevant_datasets**: Rank datasets by relevance score
176
177
  - **ckan_package_show**: Complete details of a dataset
177
178
  - **ckan_package_list**: List all datasets
178
179
  - **ckan_tag_list**: List tags with counts
@@ -225,6 +226,16 @@ ckan_package_search({
225
226
  })
226
227
  ```
227
228
 
229
+ ### Rank datasets by relevance
230
+
231
+ ```typescript
232
+ ckan_find_relevant_datasets({
233
+ server_url: "https://www.dati.gov.it/opendata",
234
+ query: "mobilità urbana",
235
+ limit: 5
236
+ })
237
+ ```
238
+
228
239
  ### Filter by organization
229
240
 
230
241
  ```typescript
package/dist/index.js CHANGED
@@ -126,6 +126,91 @@ function getOrganizationViewUrl(serverUrl, org) {
126
126
  }
127
127
 
128
128
  // src/tools/package.ts
129
+ var DEFAULT_RELEVANCE_WEIGHTS = {
130
+ title: 4,
131
+ notes: 2,
132
+ tags: 3,
133
+ organization: 1
134
+ };
135
+ var QUERY_STOPWORDS = /* @__PURE__ */ new Set([
136
+ "a",
137
+ "an",
138
+ "the",
139
+ "and",
140
+ "or",
141
+ "but",
142
+ "in",
143
+ "on",
144
+ "at",
145
+ "to",
146
+ "for",
147
+ "of",
148
+ "with",
149
+ "by",
150
+ "from",
151
+ "as",
152
+ "is",
153
+ "was",
154
+ "are",
155
+ "were",
156
+ "be",
157
+ "been",
158
+ "being",
159
+ "have",
160
+ "has",
161
+ "had",
162
+ "do",
163
+ "does",
164
+ "did",
165
+ "will",
166
+ "would",
167
+ "could",
168
+ "should",
169
+ "may",
170
+ "might",
171
+ "must",
172
+ "can",
173
+ "this",
174
+ "that",
175
+ "these",
176
+ "those"
177
+ ]);
178
+ var extractQueryTerms = (query) => {
179
+ const matches = query.toLowerCase().match(/[\p{L}\p{N}]+/gu) ?? [];
180
+ const terms = matches.filter((term) => term.length > 1 && !QUERY_STOPWORDS.has(term));
181
+ return Array.from(new Set(terms));
182
+ };
183
+ var escapeRegExp = (value) => value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
184
+ var textMatchesTerms = (text, terms) => {
185
+ if (!text || terms.length === 0) return false;
186
+ const normalized = text.toLowerCase().replace(/_/g, " ");
187
+ return terms.some((term) => new RegExp(`\\b${escapeRegExp(term)}\\b`, "i").test(normalized));
188
+ };
189
+ var scoreTextField = (text, terms, weight) => {
190
+ return textMatchesTerms(text, terms) ? weight : 0;
191
+ };
192
+ var scoreDatasetRelevance = (query, dataset, weights = DEFAULT_RELEVANCE_WEIGHTS) => {
193
+ const terms = extractQueryTerms(query);
194
+ const titleText = dataset.title || dataset.name || "";
195
+ const notesText = dataset.notes || "";
196
+ const orgText = dataset.organization?.title || dataset.organization?.name || dataset.owner_org || "";
197
+ const breakdown = {
198
+ title: scoreTextField(titleText, terms, weights.title),
199
+ notes: scoreTextField(notesText, terms, weights.notes),
200
+ tags: 0,
201
+ organization: scoreTextField(orgText, terms, weights.organization),
202
+ total: 0
203
+ };
204
+ if (Array.isArray(dataset.tags) && dataset.tags.length > 0 && terms.length > 0) {
205
+ const tagMatch = dataset.tags.some((tag) => {
206
+ const tagValue = typeof tag === "string" ? tag : tag?.name;
207
+ return textMatchesTerms(tagValue, terms);
208
+ });
209
+ breakdown.tags = tagMatch ? weights.tags : 0;
210
+ }
211
+ breakdown.total = breakdown.title + breakdown.notes + breakdown.tags + breakdown.organization;
212
+ return { total: breakdown.total, breakdown, terms };
213
+ };
129
214
  function registerPackageTools(server2) {
130
215
  server2.registerTool(
131
216
  "ckan_package_search",
@@ -137,7 +222,7 @@ Supports full Solr search capabilities including filters, facets, and sorting.
137
222
  Use this to discover datasets matching specific criteria.
138
223
 
139
224
  Args:
140
- - server_url (string): Base URL of CKAN server (e.g., "https://dati.gov.it")
225
+ - server_url (string): Base URL of CKAN server (e.g., "https://dati.gov.it/opendata")
141
226
  - q (string): Search query using Solr syntax (default: "*:*" for all)
142
227
  - fq (string): Filter query (e.g., "organization:comune-palermo")
143
228
  - rows (number): Number of results to return (default: 10, max: 1000)
@@ -335,6 +420,177 @@ ${params.fq ? `**Filter**: ${params.fq}
335
420
  }
336
421
  }
337
422
  );
423
+ server2.registerTool(
424
+ "ckan_find_relevant_datasets",
425
+ {
426
+ title: "Find Relevant CKAN Datasets",
427
+ description: `Find and rank datasets by relevance to a query using weighted fields.
428
+
429
+ Uses package_search for discovery and applies a local scoring model.
430
+
431
+ Args:
432
+ - server_url (string): Base URL of CKAN server (e.g., "https://dati.gov.it/opendata")
433
+ - query (string): Search query text
434
+ - limit (number): Number of datasets to return (default: 10)
435
+ - weights (object): Optional weights for title/notes/tags/organization
436
+ - response_format ('markdown' | 'json'): Output format
437
+
438
+ Returns:
439
+ Ranked datasets with relevance scores and breakdowns
440
+
441
+ Examples:
442
+ - { server_url: "https://dati.gov.it/opendata", query: "mobilit\xE0" }
443
+ - { server_url: "...", query: "trasporti", limit: 5, weights: { title: 5, notes: 2 } }`,
444
+ inputSchema: z2.object({
445
+ server_url: z2.string().url().describe("Base URL of the CKAN server (e.g., https://dati.gov.it/opendata)"),
446
+ query: z2.string().min(2).describe("Search query text"),
447
+ limit: z2.number().int().min(1).max(50).optional().default(10).describe("Number of datasets to return"),
448
+ weights: z2.object({
449
+ title: z2.number().min(0).optional(),
450
+ notes: z2.number().min(0).optional(),
451
+ tags: z2.number().min(0).optional(),
452
+ organization: z2.number().min(0).optional()
453
+ }).optional().describe("Optional weights per field"),
454
+ response_format: ResponseFormatSchema
455
+ }).strict(),
456
+ annotations: {
457
+ readOnlyHint: true,
458
+ destructiveHint: false,
459
+ idempotentHint: true,
460
+ openWorldHint: true
461
+ }
462
+ },
463
+ async (params) => {
464
+ try {
465
+ const weights = {
466
+ ...DEFAULT_RELEVANCE_WEIGHTS,
467
+ ...params.weights ?? {}
468
+ };
469
+ const rows = Math.min(Math.max(params.limit * 5, params.limit), 100);
470
+ const searchResult = await makeCkanRequest(
471
+ params.server_url,
472
+ "package_search",
473
+ {
474
+ q: params.query,
475
+ rows,
476
+ start: 0
477
+ }
478
+ );
479
+ const scored = (searchResult.results || []).map((dataset) => {
480
+ const { total, breakdown } = scoreDatasetRelevance(
481
+ params.query,
482
+ dataset,
483
+ weights
484
+ );
485
+ return {
486
+ dataset,
487
+ score: total,
488
+ breakdown
489
+ };
490
+ });
491
+ scored.sort((a, b) => b.score - a.score);
492
+ const top = scored.slice(0, params.limit).map((item) => {
493
+ const dataset = item.dataset;
494
+ return {
495
+ id: dataset.id,
496
+ name: dataset.name,
497
+ title: dataset.title || dataset.name,
498
+ organization: dataset.organization?.title || dataset.organization?.name || dataset.owner_org,
499
+ tags: Array.isArray(dataset.tags) ? dataset.tags.map((tag) => tag.name) : [],
500
+ metadata_modified: dataset.metadata_modified,
501
+ score: item.score,
502
+ breakdown: item.breakdown
503
+ };
504
+ });
505
+ const payload = {
506
+ query: params.query,
507
+ terms: extractQueryTerms(params.query),
508
+ weights,
509
+ total_results: searchResult.count ?? 0,
510
+ returned: top.length,
511
+ results: top
512
+ };
513
+ if (params.response_format === "json" /* JSON */) {
514
+ return {
515
+ content: [{ type: "text", text: truncateText(JSON.stringify(payload, null, 2)) }],
516
+ structuredContent: payload
517
+ };
518
+ }
519
+ let markdown = `# Relevant CKAN Datasets
520
+
521
+ `;
522
+ markdown += `**Server**: ${params.server_url}
523
+ `;
524
+ markdown += `**Query**: ${params.query}
525
+ `;
526
+ markdown += `**Terms**: ${payload.terms.length > 0 ? payload.terms.join(", ") : "n/a"}
527
+ `;
528
+ markdown += `**Total Results**: ${payload.total_results}
529
+ `;
530
+ markdown += `**Returned**: ${payload.returned}
531
+
532
+ `;
533
+ markdown += `## Weights
534
+
535
+ `;
536
+ markdown += `- **Title**: ${weights.title}
537
+ `;
538
+ markdown += `- **Notes**: ${weights.notes}
539
+ `;
540
+ markdown += `- **Tags**: ${weights.tags}
541
+ `;
542
+ markdown += `- **Organization**: ${weights.organization}
543
+
544
+ `;
545
+ if (top.length === 0) {
546
+ markdown += "No datasets matched the query terms.\n";
547
+ } else {
548
+ markdown += `## Results
549
+
550
+ `;
551
+ markdown += `| Rank | Dataset | Score | Title | Org | Tags |
552
+ `;
553
+ markdown += `| --- | --- | --- | --- | --- | --- |
554
+ `;
555
+ top.forEach((dataset, index) => {
556
+ const tags = dataset.tags.slice(0, 3).join(", ");
557
+ markdown += `| ${index + 1} | ${dataset.name} | ${dataset.score} | ${dataset.title} | ${dataset.organization || "-"} | ${tags || "-"} |
558
+ `;
559
+ });
560
+ markdown += `
561
+ ### Score Breakdown
562
+
563
+ `;
564
+ top.forEach((dataset, index) => {
565
+ markdown += `**${index + 1}. ${dataset.title}**
566
+ `;
567
+ markdown += `- Title: ${dataset.breakdown.title}
568
+ `;
569
+ markdown += `- Notes: ${dataset.breakdown.notes}
570
+ `;
571
+ markdown += `- Tags: ${dataset.breakdown.tags}
572
+ `;
573
+ markdown += `- Organization: ${dataset.breakdown.organization}
574
+ `;
575
+ markdown += `- Total: ${dataset.breakdown.total}
576
+
577
+ `;
578
+ });
579
+ }
580
+ return {
581
+ content: [{ type: "text", text: truncateText(markdown) }]
582
+ };
583
+ } catch (error) {
584
+ return {
585
+ content: [{
586
+ type: "text",
587
+ text: `Error ranking datasets: ${error instanceof Error ? error.message : String(error)}`
588
+ }],
589
+ isError: true
590
+ };
591
+ }
592
+ }
593
+ );
338
594
  server2.registerTool(
339
595
  "ckan_package_show",
340
596
  {
@@ -353,7 +609,7 @@ Returns:
353
609
  Complete dataset object with all metadata and resources
354
610
 
355
611
  Examples:
356
- - { server_url: "https://dati.gov.it", id: "dataset-name" }
612
+ - { server_url: "https://dati.gov.it/opendata", id: "dataset-name" }
357
613
  - { server_url: "...", id: "abc-123-def", include_tracking: true }`,
358
614
  inputSchema: z2.object({
359
615
  server_url: z2.string().url().describe("Base URL of the CKAN server"),
@@ -998,6 +1254,110 @@ Examples:
998
1254
  }
999
1255
  }
1000
1256
  );
1257
+ server2.registerTool(
1258
+ "ckan_datastore_search_sql",
1259
+ {
1260
+ title: "Search CKAN DataStore with SQL",
1261
+ description: `Run SQL queries on a CKAN DataStore resource.
1262
+
1263
+ This endpoint is only available on CKAN portals with DataStore enabled and SQL access exposed.
1264
+
1265
+ Args:
1266
+ - server_url (string): Base URL of CKAN server
1267
+ - sql (string): SQL query (e.g., SELECT * FROM "resource_id" LIMIT 10)
1268
+ - response_format ('markdown' | 'json'): Output format
1269
+
1270
+ Returns:
1271
+ SQL query results from DataStore
1272
+
1273
+ Examples:
1274
+ - { server_url: "...", sql: "SELECT * FROM "abc-123" LIMIT 10" }
1275
+ - { server_url: "...", sql: "SELECT COUNT(*) AS total FROM "abc-123"" }`,
1276
+ inputSchema: z4.object({
1277
+ server_url: z4.string().url(),
1278
+ sql: z4.string().min(1),
1279
+ response_format: ResponseFormatSchema
1280
+ }).strict(),
1281
+ annotations: {
1282
+ readOnlyHint: true,
1283
+ destructiveHint: false,
1284
+ idempotentHint: true,
1285
+ openWorldHint: false
1286
+ }
1287
+ },
1288
+ async (params) => {
1289
+ try {
1290
+ const result = await makeCkanRequest(
1291
+ params.server_url,
1292
+ "datastore_search_sql",
1293
+ { sql: params.sql }
1294
+ );
1295
+ if (params.response_format === "json" /* JSON */) {
1296
+ return {
1297
+ content: [{ type: "text", text: truncateText(JSON.stringify(result, null, 2)) }],
1298
+ structuredContent: result
1299
+ };
1300
+ }
1301
+ const records = result.records || [];
1302
+ const fieldIds = result.fields?.map((field) => field.id) || Object.keys(records[0] || {});
1303
+ let markdown = `# DataStore SQL Results
1304
+
1305
+ `;
1306
+ markdown += `**Server**: ${params.server_url}
1307
+ `;
1308
+ markdown += `**SQL**: \`${params.sql}\`
1309
+ `;
1310
+ markdown += `**Returned**: ${records.length} records
1311
+
1312
+ `;
1313
+ if (result.fields && result.fields.length > 0) {
1314
+ markdown += `## Fields
1315
+
1316
+ `;
1317
+ markdown += result.fields.map((field) => `- **${field.id}** (${field.type})`).join("\n") + "\n\n";
1318
+ }
1319
+ if (records.length > 0 && fieldIds.length > 0) {
1320
+ markdown += `## Records
1321
+
1322
+ `;
1323
+ const displayFields = fieldIds.slice(0, 8);
1324
+ markdown += `| ${displayFields.join(" | ")} |
1325
+ `;
1326
+ markdown += `| ${displayFields.map(() => "---").join(" | ")} |
1327
+ `;
1328
+ for (const record of records.slice(0, 50)) {
1329
+ const values = displayFields.map((field) => {
1330
+ const value = record[field];
1331
+ if (value === null || value === void 0) return "-";
1332
+ const text = String(value);
1333
+ return text.length > 50 ? text.substring(0, 47) + "..." : text;
1334
+ });
1335
+ markdown += `| ${values.join(" | ")} |
1336
+ `;
1337
+ }
1338
+ if (records.length > 50) {
1339
+ markdown += `
1340
+ ... and ${records.length - 50} more records
1341
+ `;
1342
+ }
1343
+ markdown += "\n";
1344
+ } else {
1345
+ markdown += "No records returned by the SQL query.\n";
1346
+ }
1347
+ return {
1348
+ content: [{ type: "text", text: truncateText(markdown) }]
1349
+ };
1350
+ } catch (error) {
1351
+ return {
1352
+ content: [{
1353
+ type: "text",
1354
+ text: `Error querying DataStore SQL: ${error instanceof Error ? error.message : String(error)}`
1355
+ }],
1356
+ isError: true
1357
+ };
1358
+ }
1359
+ }
1360
+ );
1001
1361
  }
1002
1362
 
1003
1363
  // src/tools/status.ts
@@ -1727,7 +2087,7 @@ function registerAllResources(server2) {
1727
2087
  function createServer() {
1728
2088
  return new McpServer({
1729
2089
  name: "ckan-mcp-server",
1730
- version: "0.4.3"
2090
+ version: "0.4.5"
1731
2091
  });
1732
2092
  }
1733
2093
  function registerAll(server2) {