@aborruso/ckan-mcp-server 0.4.98 → 0.4.105

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/LOG.md +71 -0
  2. package/dist/index.js +442 -87
  3. package/dist/worker.js +207 -185
  4. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -163,6 +163,16 @@ var portals_default = {
163
163
  force_text_field: false
164
164
  }
165
165
  },
166
+ {
167
+ id: "bdap-rgs-mef",
168
+ name: "bdap-opendata.rgs.mef.gov.it",
169
+ api_url: "https://bdap-opendata.rgs.mef.gov.it/SpodCkanApi",
170
+ api_url_aliases: [
171
+ "http://bdap-opendata.rgs.mef.gov.it/SpodCkanApi"
172
+ ],
173
+ dataset_view_url: "https://bdap-opendata.rgs.mef.gov.it/SpodCkanApi/dataset/{name}",
174
+ organization_view_url: "https://bdap-opendata.rgs.mef.gov.it/SpodCkanApi/organization/{name}"
175
+ },
166
176
  {
167
177
  id: "govdata-de",
168
178
  name: "govdata.de",
@@ -252,7 +262,257 @@ function requiresMultilingualNormalization(serverUrl) {
252
262
  return portal?.normalize === "multilingual";
253
263
  }
254
264
 
265
+ // src/utils/cache.ts
266
+ var TTL_METADATA = /* @__PURE__ */ new Set([
267
+ "package_search",
268
+ "package_show",
269
+ "current_package_list_with_resources",
270
+ "resource_show",
271
+ "organization_show",
272
+ "organization_list",
273
+ "organization_search",
274
+ "group_show",
275
+ "group_list",
276
+ "group_search",
277
+ "tag_list",
278
+ "tag_show",
279
+ "tag_search"
280
+ ]);
281
+ var TTL_STATUS = /* @__PURE__ */ new Set(["status_show", "site_read"]);
282
+ var TTL_DATASTORE = /* @__PURE__ */ new Set(["datastore_search", "datastore_search_sql"]);
283
+ function getTtlForAction(action, fallback) {
284
+ if (TTL_METADATA.has(action)) return 300;
285
+ if (TTL_STATUS.has(action)) return 3600;
286
+ if (TTL_DATASTORE.has(action)) return 60;
287
+ return fallback;
288
+ }
289
+ function readEnv(name) {
290
+ if (typeof process === "undefined" || !process.env) return void 0;
291
+ const value = process.env[name];
292
+ return value === void 0 || value === "" ? void 0 : value;
293
+ }
294
+ function getCacheConfig() {
295
+ const enabledRaw = readEnv("CKAN_CACHE_ENABLED");
296
+ const isTest = readEnv("VITEST") === "true";
297
+ const enabled = enabledRaw !== void 0 ? enabledRaw !== "false" : !isTest;
298
+ const ttlDefault = Number(readEnv("CKAN_CACHE_TTL_DEFAULT")) || 300;
299
+ const maxEntries = Number(readEnv("CKAN_CACHE_MAX_ENTRIES")) || 500;
300
+ const maxEntryBytes = Number(readEnv("CKAN_CACHE_MAX_ENTRY_BYTES")) || 1024 * 1024;
301
+ return { enabled, ttlDefault, maxEntries, maxEntryBytes };
302
+ }
303
+ function canonicalizeParams(params) {
304
+ const keys = Object.keys(params).sort();
305
+ const pairs = [];
306
+ for (const key of keys) {
307
+ const value = params[key];
308
+ if (value === void 0 || value === null) continue;
309
+ const serialized = typeof value === "object" ? JSON.stringify(value) : String(value);
310
+ pairs.push(`${key}=${serialized}`);
311
+ }
312
+ return pairs.join("&");
313
+ }
314
+ async function sha1Hex(input) {
315
+ const data = new TextEncoder().encode(input);
316
+ const hashBuffer = await crypto.subtle.digest("SHA-1", data);
317
+ const bytes = new Uint8Array(hashBuffer);
318
+ let hex = "";
319
+ for (const b of bytes) hex += b.toString(16).padStart(2, "0");
320
+ return hex;
321
+ }
322
+ async function buildCacheKey(serverUrl, action, params) {
323
+ const raw = `${serverUrl}|${action}|${canonicalizeParams(params)}`;
324
+ return sha1Hex(raw);
325
+ }
326
+ var MemoryLruCache = class {
327
+ constructor(maxEntries) {
328
+ this.maxEntries = maxEntries;
329
+ }
330
+ store = /* @__PURE__ */ new Map();
331
+ async get(key) {
332
+ const entry = this.store.get(key);
333
+ if (!entry) return void 0;
334
+ if (entry.expiresAt <= Date.now()) {
335
+ this.store.delete(key);
336
+ return void 0;
337
+ }
338
+ this.store.delete(key);
339
+ this.store.set(key, entry);
340
+ return entry.value;
341
+ }
342
+ async set(key, value, ttlSeconds) {
343
+ if (ttlSeconds <= 0) return;
344
+ if (this.store.has(key)) this.store.delete(key);
345
+ this.store.set(key, { value, expiresAt: Date.now() + ttlSeconds * 1e3 });
346
+ while (this.store.size > this.maxEntries) {
347
+ const oldest = this.store.keys().next().value;
348
+ if (oldest === void 0) break;
349
+ this.store.delete(oldest);
350
+ }
351
+ }
352
+ clear() {
353
+ this.store.clear();
354
+ }
355
+ size() {
356
+ return this.store.size;
357
+ }
358
+ };
359
+ var WorkersCacheApi = class {
360
+ origin = "https://ckan-mcp-cache.internal";
361
+ async get(key) {
362
+ try {
363
+ const response = await caches.default.match(`${this.origin}/${key}`);
364
+ if (!response) return void 0;
365
+ return await response.json();
366
+ } catch {
367
+ return void 0;
368
+ }
369
+ }
370
+ async set(key, value, ttlSeconds) {
371
+ if (ttlSeconds <= 0) return;
372
+ try {
373
+ const body = JSON.stringify(value);
374
+ const response = new Response(body, {
375
+ headers: {
376
+ "Content-Type": "application/json",
377
+ "Cache-Control": `public, s-maxage=${ttlSeconds}`
378
+ }
379
+ });
380
+ await caches.default.put(`${this.origin}/${key}`, response);
381
+ } catch {
382
+ }
383
+ }
384
+ };
385
+ var sharedCache = null;
386
+ function getCache() {
387
+ if (sharedCache) return sharedCache;
388
+ const hasWorkersCaches = typeof caches !== "undefined" && typeof caches.default !== "undefined";
389
+ const isNode = typeof process !== "undefined" && !!process.versions?.node;
390
+ if (hasWorkersCaches && !isNode) {
391
+ sharedCache = new WorkersCacheApi();
392
+ } else {
393
+ sharedCache = new MemoryLruCache(getCacheConfig().maxEntries);
394
+ }
395
+ return sharedCache;
396
+ }
397
+
398
+ // src/utils/rate-limiter.ts
399
+ var RateLimitError = class extends Error {
400
+ constructor(hostname, waitMs) {
401
+ super(
402
+ `Rate limit exceeded for ${hostname}: would need to wait ${Math.round(waitMs)}ms`
403
+ );
404
+ this.name = "RateLimitError";
405
+ }
406
+ };
407
+ var MAX_BUCKETS = 200;
408
+ function readEnv2(name) {
409
+ if (typeof process === "undefined" || !process.env) return void 0;
410
+ const v = process.env[name];
411
+ return v === void 0 || v === "" ? void 0 : v;
412
+ }
413
+ function getRateLimitConfig() {
414
+ const enabledRaw = readEnv2("CKAN_RATE_LIMIT_ENABLED");
415
+ const isTest = readEnv2("VITEST") === "true";
416
+ const enabled = enabledRaw !== void 0 ? enabledRaw !== "false" : !isTest;
417
+ return {
418
+ enabled,
419
+ rps: Number(readEnv2("CKAN_RATE_LIMIT_RPS")) || 5,
420
+ burst: Number(readEnv2("CKAN_RATE_LIMIT_BURST")) || 10,
421
+ maxWaitMs: Number(readEnv2("CKAN_RATE_LIMIT_MAX_WAIT_MS")) || 5e3
422
+ };
423
+ }
424
+ function sleep(ms) {
425
+ return new Promise((resolve) => setTimeout(resolve, ms));
426
+ }
427
+ var UpstreamRateLimiter = class {
428
+ constructor(config) {
429
+ this.config = config;
430
+ }
431
+ buckets = /* @__PURE__ */ new Map();
432
+ async acquire(hostname) {
433
+ const { rps, burst, maxWaitMs } = this.config;
434
+ if (!this.buckets.has(hostname)) {
435
+ if (this.buckets.size >= MAX_BUCKETS) {
436
+ const oldest = this.buckets.keys().next().value;
437
+ if (oldest !== void 0) this.buckets.delete(oldest);
438
+ }
439
+ this.buckets.set(hostname, { tokens: burst, lastRefill: Date.now() });
440
+ }
441
+ let waited = 0;
442
+ while (true) {
443
+ const bucket = this.buckets.get(hostname);
444
+ const now = Date.now();
445
+ const elapsed = now - bucket.lastRefill;
446
+ bucket.tokens = Math.min(burst, bucket.tokens + elapsed / 1e3 * rps);
447
+ bucket.lastRefill = now;
448
+ if (bucket.tokens >= 1) {
449
+ bucket.tokens -= 1;
450
+ return;
451
+ }
452
+ const waitNeeded = (1 - bucket.tokens) / rps * 1e3;
453
+ if (waited + waitNeeded > maxWaitMs) {
454
+ throw new RateLimitError(hostname, waited + waitNeeded);
455
+ }
456
+ await sleep(waitNeeded);
457
+ waited += waitNeeded;
458
+ }
459
+ }
460
+ clear() {
461
+ this.buckets.clear();
462
+ }
463
+ };
464
+ var sharedLimiter = null;
465
+ function getRateLimiter() {
466
+ if (!sharedLimiter) {
467
+ sharedLimiter = new UpstreamRateLimiter(getRateLimitConfig());
468
+ }
469
+ return sharedLimiter;
470
+ }
471
+
255
472
  // src/utils/http.ts
473
+ var CkanApiError = class extends Error {
474
+ status;
475
+ action;
476
+ constructor(message, status, action) {
477
+ super(message);
478
+ this.name = "CkanApiError";
479
+ this.status = status;
480
+ this.action = action;
481
+ }
482
+ };
483
+ function formatCkanError(error, _toolName) {
484
+ if (!(error instanceof CkanApiError)) {
485
+ return error instanceof Error ? error.message : String(error);
486
+ }
487
+ const { status, action, message } = error;
488
+ let hint = "";
489
+ if (status === 404) {
490
+ if (action.startsWith("datastore_search")) {
491
+ hint = "\u2192 Get a valid resource_id first: call `ckan_package_show` on a dataset, then pick a resource where `datastore_active` is true.";
492
+ } else if (action === "package_show") {
493
+ hint = "\u2192 Use `ckan_package_search` to find a valid dataset name or ID.";
494
+ } else if (action === "organization_show") {
495
+ hint = "\u2192 Use `ckan_organization_list` or `ckan_organization_search` to discover valid organization names.";
496
+ }
497
+ } else if (status === 400) {
498
+ if (action === "datastore_search_sql") {
499
+ hint = "\u2192 Invalid SQL syntax or unknown column \u2014 check column names with `ckan_datastore_search` before writing SQL.";
500
+ } else if (action.startsWith("datastore_search")) {
501
+ hint = "\u2192 Bad request \u2014 likely an invalid field name or filter syntax; check column names with a `SELECT *` query first.";
502
+ }
503
+ } else if (status === 409 || status === 422) {
504
+ hint = "\u2192 Portal rejected the request \u2014 parameters may conflict; simplify filters and retry.";
505
+ } else if (status === 503 || status === 502 || status === 504) {
506
+ hint = "\u2192 Portal temporarily unavailable \u2014 retry in a few seconds.";
507
+ } else if (status === 500) {
508
+ hint = "\u2192 Portal internal error \u2014 try a different portal or retry later.";
509
+ } else if (status === void 0) {
510
+ hint = "\u2192 The portal may not support this action, or the endpoint is unavailable.";
511
+ }
512
+ return hint ? `${message}
513
+ ${hint}` : message;
514
+ }
515
+ var _lastCacheHit = null;
256
516
  var loadZlib = /* @__PURE__ */ (() => {
257
517
  let cached = null;
258
518
  return async () => {
@@ -427,8 +687,32 @@ function validateServerUrl(serverUrl) {
427
687
  throw new Error(`Access to private/internal IPv6 addresses is not allowed.`);
428
688
  }
429
689
  }
690
+ const rawAllowed = typeof process !== "undefined" ? process.env.CKAN_ALLOWED_DOMAINS ?? "" : "";
691
+ const allowedDomains = rawAllowed.split(",").map((s) => s.trim()).filter(Boolean);
692
+ if (allowedDomains.length > 0 && !allowedDomains.includes(hostname)) {
693
+ throw new Error(`Domain "${hostname}" is not in the allowed list (CKAN_ALLOWED_DOMAINS).`);
694
+ }
695
+ }
696
+ function auditLog(serverUrl, action, params, cacheHit) {
697
+ if (typeof process === "undefined" || !process.versions?.node) return;
698
+ const entry = {
699
+ ts: (/* @__PURE__ */ new Date()).toISOString(),
700
+ server: serverUrl,
701
+ action,
702
+ cache_hit: cacheHit
703
+ };
704
+ if (params.q !== void 0) entry.q = params.q;
705
+ if (params.fq !== void 0) entry.fq = params.fq;
706
+ if (params.sql !== void 0) entry.sql = String(params.sql).slice(0, 200);
707
+ if (params.id !== void 0) entry.id = params.id;
708
+ if (params.rows !== void 0) entry.rows = params.rows;
709
+ if (params.limit !== void 0) entry.limit = params.limit;
710
+ try {
711
+ process.stderr.write(JSON.stringify(entry) + "\n");
712
+ } catch {
713
+ }
430
714
  }
431
- async function makeCkanRequest(serverUrl, action, params = {}) {
715
+ async function makeCkanRequest(serverUrl, action, params = {}, opts = {}) {
432
716
  const isNode = typeof process !== "undefined" && !!process.versions?.node;
433
717
  validateServerUrl(serverUrl);
434
718
  let resolvedServerUrl = serverUrl;
@@ -443,6 +727,26 @@ async function makeCkanRequest(serverUrl, action, params = {}) {
443
727
  const baseUrl = resolvedServerUrl.replace(/\/$/, "");
444
728
  const apiPath = getPortalApiPath(resolvedServerUrl);
445
729
  const url = `${baseUrl}${apiPath}/${action}`;
730
+ const cacheConfig = getCacheConfig();
731
+ const cacheEnabled = cacheConfig.enabled && opts.cache !== false;
732
+ const ttl = getTtlForAction(action, cacheConfig.ttlDefault);
733
+ const cache = cacheEnabled && ttl > 0 ? getCache() : null;
734
+ const cacheKey = cache ? await buildCacheKey(resolvedServerUrl, action, params) : "";
735
+ if (cache) {
736
+ const cached = await cache.get(cacheKey);
737
+ if (cached !== void 0) {
738
+ _lastCacheHit = true;
739
+ auditLog(serverUrl, action, params, true);
740
+ return cached;
741
+ }
742
+ }
743
+ _lastCacheHit = false;
744
+ const rateLimitConfig = getRateLimitConfig();
745
+ const rateLimitEnabled = rateLimitConfig.enabled && opts.rateLimit !== false;
746
+ if (rateLimitEnabled) {
747
+ const hostname = new URL(resolvedServerUrl).hostname;
748
+ await getRateLimiter().acquire(hostname);
749
+ }
446
750
  try {
447
751
  let decodedData;
448
752
  if (isNode) {
@@ -491,7 +795,7 @@ async function makeCkanRequest(serverUrl, action, params = {}) {
491
795
  clearTimeout(timeoutId);
492
796
  }
493
797
  if (!response.ok) {
494
- throw new Error(`CKAN API error (${response.status}): ${response.statusText}`);
798
+ throw new CkanApiError(`CKAN API error (${response.status}): ${response.statusText}`, response.status, action);
495
799
  }
496
800
  const buffer = await response.arrayBuffer();
497
801
  const headers = {};
@@ -501,20 +805,34 @@ async function makeCkanRequest(serverUrl, action, params = {}) {
501
805
  decodedData = await decodePossiblyCompressed(buffer, headers);
502
806
  }
503
807
  if (decodedData && decodedData.success === true) {
504
- return decodedData.result;
808
+ const result = decodedData.result;
809
+ if (cache) {
810
+ try {
811
+ const serialized = JSON.stringify(result);
812
+ if (serialized.length <= cacheConfig.maxEntryBytes) {
813
+ await cache.set(cacheKey, result, ttl);
814
+ }
815
+ } catch {
816
+ }
817
+ }
818
+ auditLog(serverUrl, action, params, false);
819
+ return result;
505
820
  } else {
506
- throw new Error(
507
- `CKAN API returned success=false: ${JSON.stringify(decodedData)}`
821
+ throw new CkanApiError(
822
+ `CKAN API returned success=false: ${JSON.stringify(decodedData)}`,
823
+ void 0,
824
+ action
508
825
  );
509
826
  }
510
827
  } catch (error) {
828
+ if (error instanceof CkanApiError) throw error;
511
829
  if (axios.isAxiosError(error)) {
512
830
  const axiosError = error;
513
831
  if (axiosError.response) {
514
832
  const status = axiosError.response.status;
515
833
  const data = axiosError.response.data;
516
834
  const errorMsg = data?.error?.message || data?.error || "Unknown error";
517
- throw new Error(`CKAN API error (${status}): ${errorMsg}`);
835
+ throw new CkanApiError(`CKAN API error (${status}): ${errorMsg}`, status, action);
518
836
  } else if (axiosError.code === "ECONNABORTED") {
519
837
  throw new Error(`Request timeout connecting to ${serverUrl}`);
520
838
  } else if (axiosError.code === "ENOTFOUND") {
@@ -594,6 +912,25 @@ function addDemoFooter(text) {
594
912
  }
595
913
 
596
914
  // src/utils/url-generator.ts
915
+ var UUID_RE = /\/resource\/([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})/i;
916
+ function extractSourcePortal(resourceUrl, serverUrl) {
917
+ if (!resourceUrl) return null;
918
+ let rParsed;
919
+ let sParsed;
920
+ try {
921
+ rParsed = new URL(resourceUrl);
922
+ sParsed = new URL(serverUrl);
923
+ } catch {
924
+ return null;
925
+ }
926
+ if (rParsed.hostname === sParsed.hostname) return null;
927
+ const match = rParsed.pathname.match(UUID_RE);
928
+ if (!match) return null;
929
+ return {
930
+ portalUrl: `${rParsed.protocol}//${rParsed.host}`,
931
+ resourceId: match[1]
932
+ };
933
+ }
597
934
  function getDatasetViewUrl(serverUrl, pkg) {
598
935
  const cleanServerUrl = normalizePortalUrl(serverUrl);
599
936
  const portal = getPortalConfig(serverUrl);
@@ -665,7 +1002,8 @@ function resolveSearchQuery(serverUrl, query, parserOverride) {
665
1002
  const portalForce = portalSearchConfig.force_text_field ?? false;
666
1003
  let forceTextField = false;
667
1004
  if (parserOverride === "text") {
668
- forceTextField = true;
1005
+ const trimmedQuery = query.trim();
1006
+ forceTextField = trimmedQuery !== DEFAULT_SEARCH_QUERY && !isFieldedQuery(trimmedQuery);
669
1007
  } else if (parserOverride === "default") {
670
1008
  forceTextField = false;
671
1009
  } else if (portalForce) {
@@ -697,7 +1035,9 @@ var DEFAULT_RELEVANCE_WEIGHTS = {
697
1035
  title: 4,
698
1036
  notes: 2,
699
1037
  tags: 3,
700
- organization: 1
1038
+ organization: 1,
1039
+ holder: 4,
1040
+ publisher: 2
701
1041
  };
702
1042
  var QUERY_STOPWORDS = /* @__PURE__ */ new Set([
703
1043
  "a",
@@ -756,16 +1096,31 @@ var textMatchesTerms = (text, terms) => {
756
1096
  var scoreTextField = (text, terms, weight) => {
757
1097
  return textMatchesTerms(text, terms) ? weight : 0;
758
1098
  };
1099
+ var readDcatExtra = (dataset, key) => {
1100
+ const extras = Array.isArray(dataset.extras) ? dataset.extras : [];
1101
+ for (const e of extras) {
1102
+ if (e && typeof e === "object" && e.key === key) {
1103
+ const value = e.value;
1104
+ if (typeof value === "string" && value.length > 0) return value;
1105
+ }
1106
+ }
1107
+ const rootValue = dataset[key];
1108
+ return typeof rootValue === "string" ? rootValue : "";
1109
+ };
759
1110
  var scoreDatasetRelevance = (query, dataset, weights = DEFAULT_RELEVANCE_WEIGHTS) => {
760
1111
  const terms = extractQueryTerms(query);
761
1112
  const titleText = dataset.title || dataset.name || "";
762
1113
  const notesText = dataset.notes || "";
763
1114
  const orgText = dataset.organization?.title || dataset.organization?.name || dataset.owner_org || "";
1115
+ const holderText = readDcatExtra(dataset, "holder_name");
1116
+ const publisherText = readDcatExtra(dataset, "publisher_name");
764
1117
  const breakdown = {
765
1118
  title: scoreTextField(titleText, terms, weights.title),
766
1119
  notes: scoreTextField(notesText, terms, weights.notes),
767
1120
  tags: 0,
768
1121
  organization: scoreTextField(orgText, terms, weights.organization),
1122
+ holder: scoreTextField(holderText, terms, weights.holder),
1123
+ publisher: scoreTextField(publisherText, terms, weights.publisher),
769
1124
  total: 0
770
1125
  };
771
1126
  if (Array.isArray(dataset.tags) && dataset.tags.length > 0 && terms.length > 0) {
@@ -775,7 +1130,7 @@ var scoreDatasetRelevance = (query, dataset, weights = DEFAULT_RELEVANCE_WEIGHTS
775
1130
  });
776
1131
  breakdown.tags = tagMatch ? weights.tags : 0;
777
1132
  }
778
- breakdown.total = breakdown.title + breakdown.notes + breakdown.tags + breakdown.organization;
1133
+ breakdown.total = breakdown.title + breakdown.notes + breakdown.tags + breakdown.organization + breakdown.holder + breakdown.publisher;
779
1134
  return { total: breakdown.total, breakdown, terms };
780
1135
  };
781
1136
  var parseAccessServices = (resource) => {
@@ -1467,10 +1822,7 @@ Note: showing top ${sorted.length} only. Use \`response_format: json\` for full
1467
1822
  };
1468
1823
  } catch (error) {
1469
1824
  return {
1470
- content: [{
1471
- type: "text",
1472
- text: `Error searching packages: ${error instanceof Error ? error.message : String(error)}`
1473
- }],
1825
+ content: [{ type: "text", text: formatCkanError(error, "ckan_package_search") }],
1474
1826
  isError: true
1475
1827
  };
1476
1828
  }
@@ -1493,7 +1845,13 @@ Args:
1493
1845
  - query (string): Natural language or keyword query (e.g., "mobilit\xE0 urbana", "air quality")
1494
1846
  - limit (number): Number of datasets to return (default: 10)
1495
1847
  - weights (object): Field weights for scoring \u2014 higher weight = more influence on rank
1496
- Default: title=4, tags=3, notes=2, organization=1
1848
+ Default: title=4, tags=3, notes=2, organization=1, holder=4, publisher=2
1849
+ Note on holder vs organization: on federated catalogs (e.g. dati.gov.it), \`organization\`
1850
+ is the harvesting catalog (e.g. Regione Puglia), while \`holder\` (DCAT-AP_IT dct:rightsHolder)
1851
+ is the actual data owner (e.g. Comune di Lecce). Queries like "datasets from a specific Comune"
1852
+ match \`holder\` correctly; matching only \`organization\` misses datasets harvested via
1853
+ aggregators. \`publisher\` (dct:publisher) is scored separately at lower weight as it can
1854
+ contain technical roles ("Redazione OD") rather than the institutional owner.
1497
1855
  - query_parser ('default' | 'text'): Override search parser behavior
1498
1856
  - response_format ('markdown' | 'json'): Output format
1499
1857
 
@@ -1503,17 +1861,20 @@ Returns:
1503
1861
  Examples:
1504
1862
  - { server_url: "https://dati.gov.it/opendata", query: "mobilit\xE0" }
1505
1863
  - { server_url: "...", query: "trasporti", limit: 5, weights: { title: 5, notes: 2 } }
1864
+ - { server_url: "...", query: "defibrillatori Comune di Lecce", weights: { holder: 5 } }
1506
1865
 
1507
1866
  Typical workflow: ckan_find_relevant_datasets \u2192 ckan_package_show (inspect top results) \u2192 ckan_datastore_search (query data)`,
1508
1867
  inputSchema: z2.object({
1509
1868
  server_url: z2.string().url().describe("Base URL of the CKAN server (e.g., https://dati.gov.it/opendata)"),
1510
- query: z2.string().min(2).describe("Natural language or keyword query to match against dataset title, notes, tags, and organization"),
1869
+ query: z2.string().min(2).describe("Natural language or keyword query to match against dataset title, notes, tags, organization, holder and publisher"),
1511
1870
  limit: z2.coerce.number().int().min(1).max(50).optional().default(10).describe("Number of datasets to return"),
1512
1871
  weights: z2.object({
1513
1872
  title: z2.coerce.number().min(0).optional().describe("Weight for title match (default 4)"),
1514
1873
  notes: z2.coerce.number().min(0).optional().describe("Weight for description match (default 2)"),
1515
1874
  tags: z2.coerce.number().min(0).optional().describe("Weight for tag match (default 3)"),
1516
- organization: z2.coerce.number().min(0).optional().describe("Weight for organization match (default 1)")
1875
+ organization: z2.coerce.number().min(0).optional().describe("Weight for organization (CKAN catalog / harvester) match (default 1)"),
1876
+ holder: z2.coerce.number().min(0).optional().describe("Weight for holder_name match \u2014 DCAT-AP_IT dct:rightsHolder, the actual data owner (default 4)"),
1877
+ publisher: z2.coerce.number().min(0).optional().describe("Weight for publisher_name match \u2014 DCAT-AP_IT dct:publisher (default 2)")
1517
1878
  }).optional().describe("Per-field scoring weights; unspecified fields use defaults"),
1518
1879
  query_parser: z2.enum(["default", "text"]).optional().describe("Override search parser ('text' forces text:(...) on non-fielded queries)"),
1519
1880
  response_format: ResponseFormatSchema
@@ -1610,6 +1971,10 @@ Typical workflow: ckan_find_relevant_datasets \u2192 ckan_package_show (inspect
1610
1971
  markdown += `- **Tags**: ${weights.tags}
1611
1972
  `;
1612
1973
  markdown += `- **Organization**: ${weights.organization}
1974
+ `;
1975
+ markdown += `- **Holder**: ${weights.holder}
1976
+ `;
1977
+ markdown += `- **Publisher**: ${weights.publisher}
1613
1978
 
1614
1979
  `;
1615
1980
  if (top.length === 0) {
@@ -1644,6 +2009,10 @@ Typical workflow: ckan_find_relevant_datasets \u2192 ckan_package_show (inspect
1644
2009
  markdown += `- Tags: ${dataset.breakdown.tags}
1645
2010
  `;
1646
2011
  markdown += `- Organization: ${dataset.breakdown.organization}
2012
+ `;
2013
+ markdown += `- Holder: ${dataset.breakdown.holder}
2014
+ `;
2015
+ markdown += `- Publisher: ${dataset.breakdown.publisher}
1647
2016
  `;
1648
2017
  markdown += `- Total: ${dataset.breakdown.total}
1649
2018
 
@@ -1655,10 +2024,7 @@ Typical workflow: ckan_find_relevant_datasets \u2192 ckan_package_show (inspect
1655
2024
  };
1656
2025
  } catch (error) {
1657
2026
  return {
1658
- content: [{
1659
- type: "text",
1660
- text: `Error ranking datasets: ${error instanceof Error ? error.message : String(error)}`
1661
- }],
2027
+ content: [{ type: "text", text: formatCkanError(error, "ckan_find_relevant_datasets") }],
1662
2028
  isError: true
1663
2029
  };
1664
2030
  }
@@ -1734,15 +2100,20 @@ Typical workflow: ckan_package_show \u2192 pick a resource with datastore_active
1734
2100
  };
1735
2101
  } catch (error) {
1736
2102
  return {
1737
- content: [{
1738
- type: "text",
1739
- text: `Error fetching package: ${error instanceof Error ? error.message : String(error)}`
1740
- }],
2103
+ content: [{ type: "text", text: formatCkanError(error, "ckan_package_show") }],
1741
2104
  isError: true
1742
2105
  };
1743
2106
  }
1744
2107
  }
1745
2108
  );
2109
+ async function checkSourceDatastore(portalUrl, resourceId) {
2110
+ try {
2111
+ await makeCkanRequest(portalUrl, "datastore_search", { resource_id: resourceId, limit: 0 }, { cache: false });
2112
+ return true;
2113
+ } catch {
2114
+ return false;
2115
+ }
2116
+ }
1746
2117
  server.registerTool(
1747
2118
  "ckan_list_resources",
1748
2119
  {
@@ -1765,11 +2136,17 @@ Examples:
1765
2136
  - { server_url: "https://dati.gov.it/opendata", id: "dataset-name" }
1766
2137
  - { server_url: "...", id: "dataset-name", format_filter: "CSV" }
1767
2138
 
1768
- Typical workflow: ckan_package_search \u2192 ckan_list_resources (assess available files) \u2192 ckan_datastore_search (for resources with DataStore=true)`,
2139
+ Typical workflow: ckan_package_search \u2192 ckan_list_resources (assess available files) \u2192 ckan_datastore_search (for resources with DataStore=true)
2140
+
2141
+ When a resource has DataStore=false but its download URL belongs to a different (source) portal,
2142
+ the tool automatically probes the source portal for DataStore availability and reports
2143
+ source_datastore_active and source_portal_url so you can query the data there instead.
2144
+ Set check_source_portal=false to skip these extra HTTP calls.`,
1769
2145
  inputSchema: z2.object({
1770
2146
  server_url: z2.string().url().describe("Base URL of the CKAN server"),
1771
2147
  id: z2.string().min(1).describe("Dataset ID or name"),
1772
2148
  format_filter: z2.string().optional().describe("Filter resources by format, case-insensitive (e.g., 'CSV', 'json', 'XLSX')"),
2149
+ check_source_portal: z2.boolean().optional().describe("When true (default), probes the source portal for DataStore availability when a resource URL points to a different CKAN instance"),
1773
2150
  response_format: ResponseFormatSchema
1774
2151
  }).strict(),
1775
2152
  annotations: {
@@ -1788,6 +2165,7 @@ Typical workflow: ckan_package_search \u2192 ckan_list_resources (assess availab
1788
2165
  );
1789
2166
  const resources = Array.isArray(result.resources) ? result.resources : [];
1790
2167
  const formatFilter = params.format_filter?.toUpperCase();
2168
+ const doSourceCheck = params.check_source_portal !== false;
1791
2169
  const summary = resources.filter((r) => !formatFilter || (r.format || "").toUpperCase() === formatFilter).map((r) => {
1792
2170
  const effectiveUrl = resolveDownloadUrl(r);
1793
2171
  return {
@@ -1799,6 +2177,18 @@ Typical workflow: ckan_package_search \u2192 ckan_list_resources (assess availab
1799
2177
  url: effectiveUrl
1800
2178
  };
1801
2179
  });
2180
+ if (doSourceCheck) {
2181
+ await Promise.all(
2182
+ summary.map(async (item, idx) => {
2183
+ if (item.datastore_active) return;
2184
+ const extracted = extractSourcePortal(item.url, params.server_url);
2185
+ if (!extracted) return;
2186
+ const active = await checkSourceDatastore(extracted.portalUrl, extracted.resourceId);
2187
+ summary[idx].source_datastore_active = active;
2188
+ summary[idx].source_portal_url = active ? extracted.portalUrl : null;
2189
+ })
2190
+ );
2191
+ }
1802
2192
  if (params.response_format === "json" /* JSON */) {
1803
2193
  const payload = {
1804
2194
  dataset_id: result.id,
@@ -1853,6 +2243,16 @@ Typical workflow: ckan_package_search \u2192 ckan_list_resources (assess availab
1853
2243
  `;
1854
2244
  for (const r of dsResources) {
1855
2245
  markdown += `- **${r.name}** (${r.format}): \`${r.id}\`
2246
+ `;
2247
+ }
2248
+ }
2249
+ const sourceResources = summary.filter((r) => r.source_datastore_active && r.source_portal_url);
2250
+ if (sourceResources.length > 0) {
2251
+ markdown += `
2252
+ **Available on source portal** (use \`ckan_datastore_search\` with the source portal URL):
2253
+ `;
2254
+ for (const r of sourceResources) {
2255
+ markdown += `- **${r.name}** (${r.format}): \`${r.id}\` on ${r.source_portal_url}
1856
2256
  `;
1857
2257
  }
1858
2258
  }
@@ -1862,10 +2262,7 @@ Typical workflow: ckan_package_search \u2192 ckan_list_resources (assess availab
1862
2262
  };
1863
2263
  } catch (error) {
1864
2264
  return {
1865
- content: [{
1866
- type: "text",
1867
- text: `Error listing resources: ${error instanceof Error ? error.message : String(error)}`
1868
- }],
2265
+ content: [{ type: "text", text: formatCkanError(error, "ckan_list_resources") }],
1869
2266
  isError: true
1870
2267
  };
1871
2268
  }
@@ -2044,8 +2441,7 @@ Typical workflow: ckan_organization_list \u2192 ckan_organization_show (inspect
2044
2441
  }
2045
2442
  );
2046
2443
  } catch (error) {
2047
- const message = error instanceof Error ? error.message : String(error);
2048
- if (message.includes("CKAN API error (500)")) {
2444
+ if (error instanceof CkanApiError && error.status === 500) {
2049
2445
  const searchResult = await makeCkanRequest(
2050
2446
  params.server_url,
2051
2447
  "package_search",
@@ -2153,10 +2549,7 @@ Note: organization_list returned 500; using package_search facets.
2153
2549
  };
2154
2550
  } catch (error) {
2155
2551
  return {
2156
- content: [{
2157
- type: "text",
2158
- text: `Error listing organizations: ${error instanceof Error ? error.message : String(error)}`
2159
- }],
2552
+ content: [{ type: "text", text: formatCkanError(error, "ckan_organization_list") }],
2160
2553
  isError: true
2161
2554
  };
2162
2555
  }
@@ -2217,10 +2610,7 @@ Typical workflow: ckan_organization_show \u2192 ckan_package_show (inspect a dat
2217
2610
  };
2218
2611
  } catch (error) {
2219
2612
  return {
2220
- content: [{
2221
- type: "text",
2222
- text: `Error fetching organization: ${error instanceof Error ? error.message : String(error)}`
2223
- }],
2613
+ content: [{ type: "text", text: formatCkanError(error, "ckan_organization_show") }],
2224
2614
  isError: true
2225
2615
  };
2226
2616
  }
@@ -2328,10 +2718,7 @@ Typical workflow: ckan_organization_search \u2192 ckan_organization_show (get de
2328
2718
  };
2329
2719
  } catch (error) {
2330
2720
  return {
2331
- content: [{
2332
- type: "text",
2333
- text: `Error searching organizations: ${error instanceof Error ? error.message : String(error)}`
2334
- }],
2721
+ content: [{ type: "text", text: formatCkanError(error, "ckan_organization_search") }],
2335
2722
  isError: true
2336
2723
  };
2337
2724
  }
@@ -2543,10 +2930,7 @@ Typical workflow: ckan_package_search \u2192 ckan_package_show (find resource_id
2543
2930
  };
2544
2931
  } catch (error) {
2545
2932
  return {
2546
- content: [{
2547
- type: "text",
2548
- text: `Error querying DataStore: ${error instanceof Error ? error.message : String(error)}`
2549
- }],
2933
+ content: [{ type: "text", text: formatCkanError(error, "ckan_datastore_search") }],
2550
2934
  isError: true
2551
2935
  };
2552
2936
  }
@@ -2607,10 +2991,7 @@ Security note: SQL queries are forwarded directly to the CKAN DataStore API. The
2607
2991
  };
2608
2992
  } catch (error) {
2609
2993
  return {
2610
- content: [{
2611
- type: "text",
2612
- text: `Error querying DataStore SQL: ${error instanceof Error ? error.message : String(error)}`
2613
- }],
2994
+ content: [{ type: "text", text: formatCkanError(error, "ckan_datastore_search_sql") }],
2614
2995
  isError: true
2615
2996
  };
2616
2997
  }
@@ -3062,10 +3443,7 @@ Typical workflow: ckan_group_list \u2192 ckan_group_show (inspect one) \u2192 ck
3062
3443
  };
3063
3444
  } catch (error) {
3064
3445
  return {
3065
- content: [{
3066
- type: "text",
3067
- text: `Error listing groups: ${error instanceof Error ? error.message : String(error)}`
3068
- }],
3446
+ content: [{ type: "text", text: formatCkanError(error, "ckan_group_list") }],
3069
3447
  isError: true
3070
3448
  };
3071
3449
  }
@@ -3123,10 +3501,7 @@ Typical workflow: ckan_group_show \u2192 ckan_package_show (inspect a dataset) \
3123
3501
  };
3124
3502
  } catch (error) {
3125
3503
  return {
3126
- content: [{
3127
- type: "text",
3128
- text: `Error fetching group: ${error instanceof Error ? error.message : String(error)}`
3129
- }],
3504
+ content: [{ type: "text", text: formatCkanError(error, "ckan_group_show") }],
3130
3505
  isError: true
3131
3506
  };
3132
3507
  }
@@ -3228,10 +3603,7 @@ Typical workflow: ckan_group_search \u2192 ckan_group_show (get details) \u2192
3228
3603
  };
3229
3604
  } catch (error) {
3230
3605
  return {
3231
- content: [{
3232
- type: "text",
3233
- text: `Error searching groups: ${error instanceof Error ? error.message : String(error)}`
3234
- }],
3606
+ content: [{ type: "text", text: formatCkanError(error, "ckan_group_search") }],
3235
3607
  isError: true
3236
3608
  };
3237
3609
  }
@@ -3900,12 +4272,8 @@ The MQA (Metadata Quality Assurance) system is operated by data.europa.eu and on
3900
4272
  }]
3901
4273
  };
3902
4274
  } catch (error) {
3903
- const errorMessage = error instanceof Error ? error.message : String(error);
3904
4275
  return {
3905
- content: [{
3906
- type: "text",
3907
- text: `Error retrieving quality metrics: ${errorMessage}`
3908
- }]
4276
+ content: [{ type: "text", text: `Error retrieving quality metrics: ${formatCkanError(error, "ckan_get_mqa_quality")}` }]
3909
4277
  };
3910
4278
  }
3911
4279
  }
@@ -3949,12 +4317,8 @@ The MQA (Metadata Quality Assurance) system is operated by data.europa.eu and on
3949
4317
  }]
3950
4318
  };
3951
4319
  } catch (error) {
3952
- const errorMessage = error instanceof Error ? error.message : String(error);
3953
4320
  return {
3954
- content: [{
3955
- type: "text",
3956
- text: `Error retrieving quality details: ${errorMessage}`
3957
- }]
4321
+ content: [{ type: "text", text: `Error retrieving quality details: ${formatCkanError(error, "ckan_get_mqa_quality_details")}` }]
3958
4322
  };
3959
4323
  }
3960
4324
  }
@@ -4134,10 +4498,7 @@ Typical workflow: ckan_analyze_datasets \u2192 ckan_datastore_search (with known
4134
4498
  };
4135
4499
  } catch (error) {
4136
4500
  return {
4137
- content: [{
4138
- type: "text",
4139
- text: `Error analyzing datasets: ${error instanceof Error ? error.message : String(error)}`
4140
- }],
4501
+ content: [{ type: "text", text: formatCkanError(error, "ckan_analyze_datasets") }],
4141
4502
  isError: true
4142
4503
  };
4143
4504
  }
@@ -4225,10 +4586,7 @@ Typical workflow: ckan_catalog_stats (understand the portal) \u2192 ckan_package
4225
4586
  };
4226
4587
  } catch (error) {
4227
4588
  return {
4228
- content: [{
4229
- type: "text",
4230
- text: `Error retrieving catalog stats: ${error instanceof Error ? error.message : String(error)}`
4231
- }],
4589
+ content: [{ type: "text", text: formatCkanError(error, "ckan_catalog_stats") }],
4232
4590
  isError: true
4233
4591
  };
4234
4592
  }
@@ -4533,11 +4891,8 @@ Typical workflow: ckan_find_portals (discover portal URL) \u2192 ckan_status_sho
4533
4891
  };
4534
4892
  } catch (error) {
4535
4893
  return {
4536
- content: [{
4537
- type: "text",
4538
- text: `Could not fetch portal list from datashades.info:
4539
- ${error instanceof Error ? error.message : String(error)}`
4540
- }],
4894
+ content: [{ type: "text", text: `Could not fetch portal list from datashades.info:
4895
+ ${formatCkanError(error, "ckan_find_portals")}` }],
4541
4896
  isError: true
4542
4897
  };
4543
4898
  }
@@ -5159,7 +5514,7 @@ var registerAllPrompts = (server) => {
5159
5514
  function createServer() {
5160
5515
  return new McpServer({
5161
5516
  name: "ckan-mcp-server",
5162
- version: "0.4.98"
5517
+ version: "0.4.105"
5163
5518
  });
5164
5519
  }
5165
5520
  function registerAll(server) {