@aborruso/ckan-mcp-server 0.4.99 → 0.4.106

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/LOG.md +72 -0
  2. package/dist/index.js +436 -87
  3. package/dist/worker.js +207 -185
  4. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -262,7 +262,257 @@ function requiresMultilingualNormalization(serverUrl) {
262
262
  return portal?.normalize === "multilingual";
263
263
  }
264
264
 
265
+ // src/utils/cache.ts
266
+ var TTL_METADATA = /* @__PURE__ */ new Set([
267
+ "package_search",
268
+ "package_show",
269
+ "current_package_list_with_resources",
270
+ "resource_show",
271
+ "organization_show",
272
+ "organization_list",
273
+ "organization_search",
274
+ "group_show",
275
+ "group_list",
276
+ "group_search",
277
+ "tag_list",
278
+ "tag_show",
279
+ "tag_search"
280
+ ]);
281
+ var TTL_STATUS = /* @__PURE__ */ new Set(["status_show", "site_read"]);
282
+ var TTL_DATASTORE = /* @__PURE__ */ new Set(["datastore_search", "datastore_search_sql"]);
283
+ function getTtlForAction(action, fallback) {
284
+ if (TTL_METADATA.has(action)) return 300;
285
+ if (TTL_STATUS.has(action)) return 3600;
286
+ if (TTL_DATASTORE.has(action)) return 60;
287
+ return fallback;
288
+ }
289
+ function readEnv(name) {
290
+ if (typeof process === "undefined" || !process.env) return void 0;
291
+ const value = process.env[name];
292
+ return value === void 0 || value === "" ? void 0 : value;
293
+ }
294
+ function getCacheConfig() {
295
+ const enabledRaw = readEnv("CKAN_CACHE_ENABLED");
296
+ const isTest = readEnv("VITEST") === "true";
297
+ const enabled = enabledRaw !== void 0 ? enabledRaw !== "false" : !isTest;
298
+ const ttlDefault = Number(readEnv("CKAN_CACHE_TTL_DEFAULT")) || 300;
299
+ const maxEntries = Number(readEnv("CKAN_CACHE_MAX_ENTRIES")) || 500;
300
+ const maxEntryBytes = Number(readEnv("CKAN_CACHE_MAX_ENTRY_BYTES")) || 1024 * 1024;
301
+ return { enabled, ttlDefault, maxEntries, maxEntryBytes };
302
+ }
303
+ function canonicalizeParams(params) {
304
+ const keys = Object.keys(params).sort();
305
+ const pairs = [];
306
+ for (const key of keys) {
307
+ const value = params[key];
308
+ if (value === void 0 || value === null) continue;
309
+ const serialized = typeof value === "object" ? JSON.stringify(value) : String(value);
310
+ pairs.push(`${key}=${serialized}`);
311
+ }
312
+ return pairs.join("&");
313
+ }
314
+ async function sha1Hex(input) {
315
+ const data = new TextEncoder().encode(input);
316
+ const hashBuffer = await crypto.subtle.digest("SHA-1", data);
317
+ const bytes = new Uint8Array(hashBuffer);
318
+ let hex = "";
319
+ for (const b of bytes) hex += b.toString(16).padStart(2, "0");
320
+ return hex;
321
+ }
322
+ async function buildCacheKey(serverUrl, action, params) {
323
+ const raw = `${serverUrl}|${action}|${canonicalizeParams(params)}`;
324
+ return sha1Hex(raw);
325
+ }
326
+ var MemoryLruCache = class {
327
+ constructor(maxEntries) {
328
+ this.maxEntries = maxEntries;
329
+ }
330
+ store = /* @__PURE__ */ new Map();
331
+ async get(key) {
332
+ const entry = this.store.get(key);
333
+ if (!entry) return void 0;
334
+ if (entry.expiresAt <= Date.now()) {
335
+ this.store.delete(key);
336
+ return void 0;
337
+ }
338
+ this.store.delete(key);
339
+ this.store.set(key, entry);
340
+ return entry.value;
341
+ }
342
+ async set(key, value, ttlSeconds) {
343
+ if (ttlSeconds <= 0) return;
344
+ if (this.store.has(key)) this.store.delete(key);
345
+ this.store.set(key, { value, expiresAt: Date.now() + ttlSeconds * 1e3 });
346
+ while (this.store.size > this.maxEntries) {
347
+ const oldest = this.store.keys().next().value;
348
+ if (oldest === void 0) break;
349
+ this.store.delete(oldest);
350
+ }
351
+ }
352
+ clear() {
353
+ this.store.clear();
354
+ }
355
+ size() {
356
+ return this.store.size;
357
+ }
358
+ };
359
+ var WorkersCacheApi = class {
360
+ origin = "https://ckan-mcp-cache.internal";
361
+ async get(key) {
362
+ try {
363
+ const response = await caches.default.match(`${this.origin}/${key}`);
364
+ if (!response) return void 0;
365
+ return await response.json();
366
+ } catch {
367
+ return void 0;
368
+ }
369
+ }
370
+ async set(key, value, ttlSeconds) {
371
+ if (ttlSeconds <= 0) return;
372
+ try {
373
+ const body = JSON.stringify(value);
374
+ const response = new Response(body, {
375
+ headers: {
376
+ "Content-Type": "application/json",
377
+ "Cache-Control": `public, s-maxage=${ttlSeconds}`
378
+ }
379
+ });
380
+ await caches.default.put(`${this.origin}/${key}`, response);
381
+ } catch {
382
+ }
383
+ }
384
+ };
385
+ var sharedCache = null;
386
+ function getCache() {
387
+ if (sharedCache) return sharedCache;
388
+ const hasWorkersCaches = typeof caches !== "undefined" && typeof caches.default !== "undefined";
389
+ const isNode = typeof process !== "undefined" && !!process.versions?.node;
390
+ if (hasWorkersCaches && !isNode) {
391
+ sharedCache = new WorkersCacheApi();
392
+ } else {
393
+ sharedCache = new MemoryLruCache(getCacheConfig().maxEntries);
394
+ }
395
+ return sharedCache;
396
+ }
397
+
398
+ // src/utils/rate-limiter.ts
399
+ var RateLimitError = class extends Error {
400
+ constructor(hostname, waitMs) {
401
+ super(
402
+ `Rate limit exceeded for ${hostname}: would need to wait ${Math.round(waitMs)}ms`
403
+ );
404
+ this.name = "RateLimitError";
405
+ }
406
+ };
407
+ var MAX_BUCKETS = 200;
408
+ function readEnv2(name) {
409
+ if (typeof process === "undefined" || !process.env) return void 0;
410
+ const v = process.env[name];
411
+ return v === void 0 || v === "" ? void 0 : v;
412
+ }
413
+ function getRateLimitConfig() {
414
+ const enabledRaw = readEnv2("CKAN_RATE_LIMIT_ENABLED");
415
+ const isTest = readEnv2("VITEST") === "true";
416
+ const enabled = enabledRaw !== void 0 ? enabledRaw !== "false" : !isTest;
417
+ return {
418
+ enabled,
419
+ rps: Number(readEnv2("CKAN_RATE_LIMIT_RPS")) || 5,
420
+ burst: Number(readEnv2("CKAN_RATE_LIMIT_BURST")) || 10,
421
+ maxWaitMs: Number(readEnv2("CKAN_RATE_LIMIT_MAX_WAIT_MS")) || 5e3
422
+ };
423
+ }
424
+ function sleep(ms) {
425
+ return new Promise((resolve) => setTimeout(resolve, ms));
426
+ }
427
+ var UpstreamRateLimiter = class {
428
+ constructor(config) {
429
+ this.config = config;
430
+ }
431
+ buckets = /* @__PURE__ */ new Map();
432
+ async acquire(hostname) {
433
+ const { rps, burst, maxWaitMs } = this.config;
434
+ if (!this.buckets.has(hostname)) {
435
+ if (this.buckets.size >= MAX_BUCKETS) {
436
+ const oldest = this.buckets.keys().next().value;
437
+ if (oldest !== void 0) this.buckets.delete(oldest);
438
+ }
439
+ this.buckets.set(hostname, { tokens: burst, lastRefill: Date.now() });
440
+ }
441
+ let waited = 0;
442
+ while (true) {
443
+ const bucket = this.buckets.get(hostname);
444
+ const now = Date.now();
445
+ const elapsed = now - bucket.lastRefill;
446
+ bucket.tokens = Math.min(burst, bucket.tokens + elapsed / 1e3 * rps);
447
+ bucket.lastRefill = now;
448
+ if (bucket.tokens >= 1) {
449
+ bucket.tokens -= 1;
450
+ return;
451
+ }
452
+ const waitNeeded = (1 - bucket.tokens) / rps * 1e3;
453
+ if (waited + waitNeeded > maxWaitMs) {
454
+ throw new RateLimitError(hostname, waited + waitNeeded);
455
+ }
456
+ await sleep(waitNeeded);
457
+ waited += waitNeeded;
458
+ }
459
+ }
460
+ clear() {
461
+ this.buckets.clear();
462
+ }
463
+ };
464
+ var sharedLimiter = null;
465
+ function getRateLimiter() {
466
+ if (!sharedLimiter) {
467
+ sharedLimiter = new UpstreamRateLimiter(getRateLimitConfig());
468
+ }
469
+ return sharedLimiter;
470
+ }
471
+
265
472
  // src/utils/http.ts
473
+ var CkanApiError = class extends Error {
474
+ status;
475
+ action;
476
+ constructor(message, status, action) {
477
+ super(message);
478
+ this.name = "CkanApiError";
479
+ this.status = status;
480
+ this.action = action;
481
+ }
482
+ };
483
+ function formatCkanError(error, _toolName) {
484
+ if (!(error instanceof CkanApiError)) {
485
+ return error instanceof Error ? error.message : String(error);
486
+ }
487
+ const { status, action, message } = error;
488
+ let hint = "";
489
+ if (status === 404) {
490
+ if (action.startsWith("datastore_search")) {
491
+ hint = "\u2192 Get a valid resource_id first: call `ckan_package_show` on a dataset, then pick a resource where `datastore_active` is true.";
492
+ } else if (action === "package_show") {
493
+ hint = "\u2192 Use `ckan_package_search` to find a valid dataset name or ID.";
494
+ } else if (action === "organization_show") {
495
+ hint = "\u2192 Use `ckan_organization_list` or `ckan_organization_search` to discover valid organization names.";
496
+ }
497
+ } else if (status === 400) {
498
+ if (action === "datastore_search_sql") {
499
+ hint = "\u2192 Invalid SQL syntax or unknown column \u2014 check column names with `ckan_datastore_search` before writing SQL.";
500
+ } else if (action.startsWith("datastore_search")) {
501
+ hint = "\u2192 Bad request \u2014 likely an invalid field name or filter syntax; check column names with a `SELECT *` query first.";
502
+ }
503
+ } else if (status === 409 || status === 422) {
504
+ hint = "\u2192 Portal rejected the request \u2014 parameters may conflict; simplify filters and retry.";
505
+ } else if (status === 503 || status === 502 || status === 504) {
506
+ hint = "\u2192 Portal temporarily unavailable \u2014 retry in a few seconds.";
507
+ } else if (status === 500) {
508
+ hint = "\u2192 Portal internal error \u2014 try a different portal or retry later.";
509
+ } else if (status === void 0) {
510
+ hint = "\u2192 The portal may not support this action, or the endpoint is unavailable.";
511
+ }
512
+ return hint ? `${message}
513
+ ${hint}` : message;
514
+ }
515
+ var _lastCacheHit = null;
266
516
  var loadZlib = /* @__PURE__ */ (() => {
267
517
  let cached = null;
268
518
  return async () => {
@@ -406,7 +656,12 @@ function validateServerUrl(serverUrl) {
406
656
  throw new Error(`Disallowed protocol "${parsed.protocol}". Only http and https are allowed.`);
407
657
  }
408
658
  const hostname = parsed.hostname.toLowerCase();
409
- if (hostname === "localhost") {
659
+ const BLOCKED_HOSTNAMES = /* @__PURE__ */ new Set([
660
+ "localhost",
661
+ "ip6-localhost",
662
+ "ip6-loopback"
663
+ ]);
664
+ if (BLOCKED_HOSTNAMES.has(hostname)) {
410
665
  throw new Error(`Access to "${hostname}" is not allowed.`);
411
666
  }
412
667
  const ipv4 = hostname.match(/^(\d+)\.(\d+)\.(\d+)\.(\d+)$/);
@@ -437,8 +692,32 @@ function validateServerUrl(serverUrl) {
437
692
  throw new Error(`Access to private/internal IPv6 addresses is not allowed.`);
438
693
  }
439
694
  }
695
+ const rawAllowed = typeof process !== "undefined" ? process.env.CKAN_ALLOWED_DOMAINS ?? "" : "";
696
+ const allowedDomains = rawAllowed.split(",").map((s) => s.trim()).filter(Boolean);
697
+ if (allowedDomains.length > 0 && !allowedDomains.includes(hostname)) {
698
+ throw new Error(`Domain "${hostname}" is not in the allowed list (CKAN_ALLOWED_DOMAINS).`);
699
+ }
700
+ }
701
+ function auditLog(serverUrl, action, params, cacheHit) {
702
+ if (typeof process === "undefined" || !process.versions?.node) return;
703
+ const entry = {
704
+ ts: (/* @__PURE__ */ new Date()).toISOString(),
705
+ server: serverUrl,
706
+ action,
707
+ cache_hit: cacheHit
708
+ };
709
+ if (params.q !== void 0) entry.q = params.q;
710
+ if (params.fq !== void 0) entry.fq = params.fq;
711
+ if (params.sql !== void 0) entry.sql = String(params.sql).slice(0, 200);
712
+ if (params.id !== void 0) entry.id = params.id;
713
+ if (params.rows !== void 0) entry.rows = params.rows;
714
+ if (params.limit !== void 0) entry.limit = params.limit;
715
+ try {
716
+ process.stderr.write(JSON.stringify(entry) + "\n");
717
+ } catch {
718
+ }
440
719
  }
441
- async function makeCkanRequest(serverUrl, action, params = {}) {
720
+ async function makeCkanRequest(serverUrl, action, params = {}, opts = {}) {
442
721
  const isNode = typeof process !== "undefined" && !!process.versions?.node;
443
722
  validateServerUrl(serverUrl);
444
723
  let resolvedServerUrl = serverUrl;
@@ -453,6 +732,26 @@ async function makeCkanRequest(serverUrl, action, params = {}) {
453
732
  const baseUrl = resolvedServerUrl.replace(/\/$/, "");
454
733
  const apiPath = getPortalApiPath(resolvedServerUrl);
455
734
  const url = `${baseUrl}${apiPath}/${action}`;
735
+ const cacheConfig = getCacheConfig();
736
+ const cacheEnabled = cacheConfig.enabled && opts.cache !== false;
737
+ const ttl = getTtlForAction(action, cacheConfig.ttlDefault);
738
+ const cache = cacheEnabled && ttl > 0 ? getCache() : null;
739
+ const cacheKey = cache ? await buildCacheKey(resolvedServerUrl, action, params) : "";
740
+ if (cache) {
741
+ const cached = await cache.get(cacheKey);
742
+ if (cached !== void 0) {
743
+ _lastCacheHit = true;
744
+ auditLog(serverUrl, action, params, true);
745
+ return cached;
746
+ }
747
+ }
748
+ _lastCacheHit = false;
749
+ const rateLimitConfig = getRateLimitConfig();
750
+ const rateLimitEnabled = rateLimitConfig.enabled && opts.rateLimit !== false;
751
+ if (rateLimitEnabled) {
752
+ const hostname = new URL(resolvedServerUrl).hostname;
753
+ await getRateLimiter().acquire(hostname);
754
+ }
456
755
  try {
457
756
  let decodedData;
458
757
  if (isNode) {
@@ -501,7 +800,7 @@ async function makeCkanRequest(serverUrl, action, params = {}) {
501
800
  clearTimeout(timeoutId);
502
801
  }
503
802
  if (!response.ok) {
504
- throw new Error(`CKAN API error (${response.status}): ${response.statusText}`);
803
+ throw new CkanApiError(`CKAN API error (${response.status}): ${response.statusText}`, response.status, action);
505
804
  }
506
805
  const buffer = await response.arrayBuffer();
507
806
  const headers = {};
@@ -511,20 +810,34 @@ async function makeCkanRequest(serverUrl, action, params = {}) {
511
810
  decodedData = await decodePossiblyCompressed(buffer, headers);
512
811
  }
513
812
  if (decodedData && decodedData.success === true) {
514
- return decodedData.result;
813
+ const result = decodedData.result;
814
+ if (cache) {
815
+ try {
816
+ const serialized = JSON.stringify(result);
817
+ if (serialized.length <= cacheConfig.maxEntryBytes) {
818
+ await cache.set(cacheKey, result, ttl);
819
+ }
820
+ } catch {
821
+ }
822
+ }
823
+ auditLog(serverUrl, action, params, false);
824
+ return result;
515
825
  } else {
516
- throw new Error(
517
- `CKAN API returned success=false: ${JSON.stringify(decodedData)}`
826
+ throw new CkanApiError(
827
+ `CKAN API returned success=false: ${JSON.stringify(decodedData)}`,
828
+ void 0,
829
+ action
518
830
  );
519
831
  }
520
832
  } catch (error) {
833
+ if (error instanceof CkanApiError) throw error;
521
834
  if (axios.isAxiosError(error)) {
522
835
  const axiosError = error;
523
836
  if (axiosError.response) {
524
837
  const status = axiosError.response.status;
525
838
  const data = axiosError.response.data;
526
839
  const errorMsg = data?.error?.message || data?.error || "Unknown error";
527
- throw new Error(`CKAN API error (${status}): ${errorMsg}`);
840
+ throw new CkanApiError(`CKAN API error (${status}): ${errorMsg}`, status, action);
528
841
  } else if (axiosError.code === "ECONNABORTED") {
529
842
  throw new Error(`Request timeout connecting to ${serverUrl}`);
530
843
  } else if (axiosError.code === "ENOTFOUND") {
@@ -604,6 +917,25 @@ function addDemoFooter(text) {
604
917
  }
605
918
 
606
919
  // src/utils/url-generator.ts
920
+ var UUID_RE = /\/resource\/([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})/i;
921
+ function extractSourcePortal(resourceUrl, serverUrl) {
922
+ if (!resourceUrl) return null;
923
+ let rParsed;
924
+ let sParsed;
925
+ try {
926
+ rParsed = new URL(resourceUrl);
927
+ sParsed = new URL(serverUrl);
928
+ } catch {
929
+ return null;
930
+ }
931
+ if (rParsed.hostname === sParsed.hostname) return null;
932
+ const match = rParsed.pathname.match(UUID_RE);
933
+ if (!match) return null;
934
+ return {
935
+ portalUrl: `${rParsed.protocol}//${rParsed.host}`,
936
+ resourceId: match[1]
937
+ };
938
+ }
607
939
  function getDatasetViewUrl(serverUrl, pkg) {
608
940
  const cleanServerUrl = normalizePortalUrl(serverUrl);
609
941
  const portal = getPortalConfig(serverUrl);
@@ -708,7 +1040,9 @@ var DEFAULT_RELEVANCE_WEIGHTS = {
708
1040
  title: 4,
709
1041
  notes: 2,
710
1042
  tags: 3,
711
- organization: 1
1043
+ organization: 1,
1044
+ holder: 4,
1045
+ publisher: 2
712
1046
  };
713
1047
  var QUERY_STOPWORDS = /* @__PURE__ */ new Set([
714
1048
  "a",
@@ -767,16 +1101,31 @@ var textMatchesTerms = (text, terms) => {
767
1101
  var scoreTextField = (text, terms, weight) => {
768
1102
  return textMatchesTerms(text, terms) ? weight : 0;
769
1103
  };
1104
+ var readDcatExtra = (dataset, key) => {
1105
+ const extras = Array.isArray(dataset.extras) ? dataset.extras : [];
1106
+ for (const e of extras) {
1107
+ if (e && typeof e === "object" && e.key === key) {
1108
+ const value = e.value;
1109
+ if (typeof value === "string" && value.length > 0) return value;
1110
+ }
1111
+ }
1112
+ const rootValue = dataset[key];
1113
+ return typeof rootValue === "string" ? rootValue : "";
1114
+ };
770
1115
  var scoreDatasetRelevance = (query, dataset, weights = DEFAULT_RELEVANCE_WEIGHTS) => {
771
1116
  const terms = extractQueryTerms(query);
772
1117
  const titleText = dataset.title || dataset.name || "";
773
1118
  const notesText = dataset.notes || "";
774
1119
  const orgText = dataset.organization?.title || dataset.organization?.name || dataset.owner_org || "";
1120
+ const holderText = readDcatExtra(dataset, "holder_name");
1121
+ const publisherText = readDcatExtra(dataset, "publisher_name");
775
1122
  const breakdown = {
776
1123
  title: scoreTextField(titleText, terms, weights.title),
777
1124
  notes: scoreTextField(notesText, terms, weights.notes),
778
1125
  tags: 0,
779
1126
  organization: scoreTextField(orgText, terms, weights.organization),
1127
+ holder: scoreTextField(holderText, terms, weights.holder),
1128
+ publisher: scoreTextField(publisherText, terms, weights.publisher),
780
1129
  total: 0
781
1130
  };
782
1131
  if (Array.isArray(dataset.tags) && dataset.tags.length > 0 && terms.length > 0) {
@@ -786,7 +1135,7 @@ var scoreDatasetRelevance = (query, dataset, weights = DEFAULT_RELEVANCE_WEIGHTS
786
1135
  });
787
1136
  breakdown.tags = tagMatch ? weights.tags : 0;
788
1137
  }
789
- breakdown.total = breakdown.title + breakdown.notes + breakdown.tags + breakdown.organization;
1138
+ breakdown.total = breakdown.title + breakdown.notes + breakdown.tags + breakdown.organization + breakdown.holder + breakdown.publisher;
790
1139
  return { total: breakdown.total, breakdown, terms };
791
1140
  };
792
1141
  var parseAccessServices = (resource) => {
@@ -1478,10 +1827,7 @@ Note: showing top ${sorted.length} only. Use \`response_format: json\` for full
1478
1827
  };
1479
1828
  } catch (error) {
1480
1829
  return {
1481
- content: [{
1482
- type: "text",
1483
- text: `Error searching packages: ${error instanceof Error ? error.message : String(error)}`
1484
- }],
1830
+ content: [{ type: "text", text: formatCkanError(error, "ckan_package_search") }],
1485
1831
  isError: true
1486
1832
  };
1487
1833
  }
@@ -1504,7 +1850,13 @@ Args:
1504
1850
  - query (string): Natural language or keyword query (e.g., "mobilit\xE0 urbana", "air quality")
1505
1851
  - limit (number): Number of datasets to return (default: 10)
1506
1852
  - weights (object): Field weights for scoring \u2014 higher weight = more influence on rank
1507
- Default: title=4, tags=3, notes=2, organization=1
1853
+ Default: title=4, tags=3, notes=2, organization=1, holder=4, publisher=2
1854
+ Note on holder vs organization: on federated catalogs (e.g. dati.gov.it), \`organization\`
1855
+ is the harvesting catalog (e.g. Regione Puglia), while \`holder\` (DCAT-AP_IT dct:rightsHolder)
1856
+ is the actual data owner (e.g. Comune di Lecce). Queries like "datasets from a specific Comune"
1857
+ match \`holder\` correctly; matching only \`organization\` misses datasets harvested via
1858
+ aggregators. \`publisher\` (dct:publisher) is scored separately at lower weight as it can
1859
+ contain technical roles ("Redazione OD") rather than the institutional owner.
1508
1860
  - query_parser ('default' | 'text'): Override search parser behavior
1509
1861
  - response_format ('markdown' | 'json'): Output format
1510
1862
 
@@ -1514,17 +1866,20 @@ Returns:
1514
1866
  Examples:
1515
1867
  - { server_url: "https://dati.gov.it/opendata", query: "mobilit\xE0" }
1516
1868
  - { server_url: "...", query: "trasporti", limit: 5, weights: { title: 5, notes: 2 } }
1869
+ - { server_url: "...", query: "defibrillatori Comune di Lecce", weights: { holder: 5 } }
1517
1870
 
1518
1871
  Typical workflow: ckan_find_relevant_datasets \u2192 ckan_package_show (inspect top results) \u2192 ckan_datastore_search (query data)`,
1519
1872
  inputSchema: z2.object({
1520
1873
  server_url: z2.string().url().describe("Base URL of the CKAN server (e.g., https://dati.gov.it/opendata)"),
1521
- query: z2.string().min(2).describe("Natural language or keyword query to match against dataset title, notes, tags, and organization"),
1874
+ query: z2.string().min(2).describe("Natural language or keyword query to match against dataset title, notes, tags, organization, holder and publisher"),
1522
1875
  limit: z2.coerce.number().int().min(1).max(50).optional().default(10).describe("Number of datasets to return"),
1523
1876
  weights: z2.object({
1524
1877
  title: z2.coerce.number().min(0).optional().describe("Weight for title match (default 4)"),
1525
1878
  notes: z2.coerce.number().min(0).optional().describe("Weight for description match (default 2)"),
1526
1879
  tags: z2.coerce.number().min(0).optional().describe("Weight for tag match (default 3)"),
1527
- organization: z2.coerce.number().min(0).optional().describe("Weight for organization match (default 1)")
1880
+ organization: z2.coerce.number().min(0).optional().describe("Weight for organization (CKAN catalog / harvester) match (default 1)"),
1881
+ holder: z2.coerce.number().min(0).optional().describe("Weight for holder_name match \u2014 DCAT-AP_IT dct:rightsHolder, the actual data owner (default 4)"),
1882
+ publisher: z2.coerce.number().min(0).optional().describe("Weight for publisher_name match \u2014 DCAT-AP_IT dct:publisher (default 2)")
1528
1883
  }).optional().describe("Per-field scoring weights; unspecified fields use defaults"),
1529
1884
  query_parser: z2.enum(["default", "text"]).optional().describe("Override search parser ('text' forces text:(...) on non-fielded queries)"),
1530
1885
  response_format: ResponseFormatSchema
@@ -1621,6 +1976,10 @@ Typical workflow: ckan_find_relevant_datasets \u2192 ckan_package_show (inspect
1621
1976
  markdown += `- **Tags**: ${weights.tags}
1622
1977
  `;
1623
1978
  markdown += `- **Organization**: ${weights.organization}
1979
+ `;
1980
+ markdown += `- **Holder**: ${weights.holder}
1981
+ `;
1982
+ markdown += `- **Publisher**: ${weights.publisher}
1624
1983
 
1625
1984
  `;
1626
1985
  if (top.length === 0) {
@@ -1655,6 +2014,10 @@ Typical workflow: ckan_find_relevant_datasets \u2192 ckan_package_show (inspect
1655
2014
  markdown += `- Tags: ${dataset.breakdown.tags}
1656
2015
  `;
1657
2016
  markdown += `- Organization: ${dataset.breakdown.organization}
2017
+ `;
2018
+ markdown += `- Holder: ${dataset.breakdown.holder}
2019
+ `;
2020
+ markdown += `- Publisher: ${dataset.breakdown.publisher}
1658
2021
  `;
1659
2022
  markdown += `- Total: ${dataset.breakdown.total}
1660
2023
 
@@ -1666,10 +2029,7 @@ Typical workflow: ckan_find_relevant_datasets \u2192 ckan_package_show (inspect
1666
2029
  };
1667
2030
  } catch (error) {
1668
2031
  return {
1669
- content: [{
1670
- type: "text",
1671
- text: `Error ranking datasets: ${error instanceof Error ? error.message : String(error)}`
1672
- }],
2032
+ content: [{ type: "text", text: formatCkanError(error, "ckan_find_relevant_datasets") }],
1673
2033
  isError: true
1674
2034
  };
1675
2035
  }
@@ -1745,15 +2105,20 @@ Typical workflow: ckan_package_show \u2192 pick a resource with datastore_active
1745
2105
  };
1746
2106
  } catch (error) {
1747
2107
  return {
1748
- content: [{
1749
- type: "text",
1750
- text: `Error fetching package: ${error instanceof Error ? error.message : String(error)}`
1751
- }],
2108
+ content: [{ type: "text", text: formatCkanError(error, "ckan_package_show") }],
1752
2109
  isError: true
1753
2110
  };
1754
2111
  }
1755
2112
  }
1756
2113
  );
2114
+ async function checkSourceDatastore(portalUrl, resourceId) {
2115
+ try {
2116
+ await makeCkanRequest(portalUrl, "datastore_search", { resource_id: resourceId, limit: 0 }, { cache: false });
2117
+ return true;
2118
+ } catch {
2119
+ return false;
2120
+ }
2121
+ }
1757
2122
  server.registerTool(
1758
2123
  "ckan_list_resources",
1759
2124
  {
@@ -1776,11 +2141,17 @@ Examples:
1776
2141
  - { server_url: "https://dati.gov.it/opendata", id: "dataset-name" }
1777
2142
  - { server_url: "...", id: "dataset-name", format_filter: "CSV" }
1778
2143
 
1779
- Typical workflow: ckan_package_search \u2192 ckan_list_resources (assess available files) \u2192 ckan_datastore_search (for resources with DataStore=true)`,
2144
+ Typical workflow: ckan_package_search \u2192 ckan_list_resources (assess available files) \u2192 ckan_datastore_search (for resources with DataStore=true)
2145
+
2146
+ When a resource has DataStore=false but its download URL belongs to a different (source) portal,
2147
+ the tool automatically probes the source portal for DataStore availability and reports
2148
+ source_datastore_active and source_portal_url so you can query the data there instead.
2149
+ Set check_source_portal=false to skip these extra HTTP calls.`,
1780
2150
  inputSchema: z2.object({
1781
2151
  server_url: z2.string().url().describe("Base URL of the CKAN server"),
1782
2152
  id: z2.string().min(1).describe("Dataset ID or name"),
1783
2153
  format_filter: z2.string().optional().describe("Filter resources by format, case-insensitive (e.g., 'CSV', 'json', 'XLSX')"),
2154
+ check_source_portal: z2.boolean().optional().describe("When true (default), probes the source portal for DataStore availability when a resource URL points to a different CKAN instance"),
1784
2155
  response_format: ResponseFormatSchema
1785
2156
  }).strict(),
1786
2157
  annotations: {
@@ -1799,6 +2170,7 @@ Typical workflow: ckan_package_search \u2192 ckan_list_resources (assess availab
1799
2170
  );
1800
2171
  const resources = Array.isArray(result.resources) ? result.resources : [];
1801
2172
  const formatFilter = params.format_filter?.toUpperCase();
2173
+ const doSourceCheck = params.check_source_portal !== false;
1802
2174
  const summary = resources.filter((r) => !formatFilter || (r.format || "").toUpperCase() === formatFilter).map((r) => {
1803
2175
  const effectiveUrl = resolveDownloadUrl(r);
1804
2176
  return {
@@ -1810,6 +2182,18 @@ Typical workflow: ckan_package_search \u2192 ckan_list_resources (assess availab
1810
2182
  url: effectiveUrl
1811
2183
  };
1812
2184
  });
2185
+ if (doSourceCheck) {
2186
+ await Promise.all(
2187
+ summary.map(async (item, idx) => {
2188
+ if (item.datastore_active) return;
2189
+ const extracted = extractSourcePortal(item.url, params.server_url);
2190
+ if (!extracted) return;
2191
+ const active = await checkSourceDatastore(extracted.portalUrl, extracted.resourceId);
2192
+ summary[idx].source_datastore_active = active;
2193
+ summary[idx].source_portal_url = active ? extracted.portalUrl : null;
2194
+ })
2195
+ );
2196
+ }
1813
2197
  if (params.response_format === "json" /* JSON */) {
1814
2198
  const payload = {
1815
2199
  dataset_id: result.id,
@@ -1864,6 +2248,16 @@ Typical workflow: ckan_package_search \u2192 ckan_list_resources (assess availab
1864
2248
  `;
1865
2249
  for (const r of dsResources) {
1866
2250
  markdown += `- **${r.name}** (${r.format}): \`${r.id}\`
2251
+ `;
2252
+ }
2253
+ }
2254
+ const sourceResources = summary.filter((r) => r.source_datastore_active && r.source_portal_url);
2255
+ if (sourceResources.length > 0) {
2256
+ markdown += `
2257
+ **Available on source portal** (use \`ckan_datastore_search\` with the source portal URL):
2258
+ `;
2259
+ for (const r of sourceResources) {
2260
+ markdown += `- **${r.name}** (${r.format}): \`${r.id}\` on ${r.source_portal_url}
1867
2261
  `;
1868
2262
  }
1869
2263
  }
@@ -1873,10 +2267,7 @@ Typical workflow: ckan_package_search \u2192 ckan_list_resources (assess availab
1873
2267
  };
1874
2268
  } catch (error) {
1875
2269
  return {
1876
- content: [{
1877
- type: "text",
1878
- text: `Error listing resources: ${error instanceof Error ? error.message : String(error)}`
1879
- }],
2270
+ content: [{ type: "text", text: formatCkanError(error, "ckan_list_resources") }],
1880
2271
  isError: true
1881
2272
  };
1882
2273
  }
@@ -2055,8 +2446,7 @@ Typical workflow: ckan_organization_list \u2192 ckan_organization_show (inspect
2055
2446
  }
2056
2447
  );
2057
2448
  } catch (error) {
2058
- const message = error instanceof Error ? error.message : String(error);
2059
- if (message.includes("CKAN API error (500)")) {
2449
+ if (error instanceof CkanApiError && error.status === 500) {
2060
2450
  const searchResult = await makeCkanRequest(
2061
2451
  params.server_url,
2062
2452
  "package_search",
@@ -2164,10 +2554,7 @@ Note: organization_list returned 500; using package_search facets.
2164
2554
  };
2165
2555
  } catch (error) {
2166
2556
  return {
2167
- content: [{
2168
- type: "text",
2169
- text: `Error listing organizations: ${error instanceof Error ? error.message : String(error)}`
2170
- }],
2557
+ content: [{ type: "text", text: formatCkanError(error, "ckan_organization_list") }],
2171
2558
  isError: true
2172
2559
  };
2173
2560
  }
@@ -2228,10 +2615,7 @@ Typical workflow: ckan_organization_show \u2192 ckan_package_show (inspect a dat
2228
2615
  };
2229
2616
  } catch (error) {
2230
2617
  return {
2231
- content: [{
2232
- type: "text",
2233
- text: `Error fetching organization: ${error instanceof Error ? error.message : String(error)}`
2234
- }],
2618
+ content: [{ type: "text", text: formatCkanError(error, "ckan_organization_show") }],
2235
2619
  isError: true
2236
2620
  };
2237
2621
  }
@@ -2339,10 +2723,7 @@ Typical workflow: ckan_organization_search \u2192 ckan_organization_show (get de
2339
2723
  };
2340
2724
  } catch (error) {
2341
2725
  return {
2342
- content: [{
2343
- type: "text",
2344
- text: `Error searching organizations: ${error instanceof Error ? error.message : String(error)}`
2345
- }],
2726
+ content: [{ type: "text", text: formatCkanError(error, "ckan_organization_search") }],
2346
2727
  isError: true
2347
2728
  };
2348
2729
  }
@@ -2554,10 +2935,7 @@ Typical workflow: ckan_package_search \u2192 ckan_package_show (find resource_id
2554
2935
  };
2555
2936
  } catch (error) {
2556
2937
  return {
2557
- content: [{
2558
- type: "text",
2559
- text: `Error querying DataStore: ${error instanceof Error ? error.message : String(error)}`
2560
- }],
2938
+ content: [{ type: "text", text: formatCkanError(error, "ckan_datastore_search") }],
2561
2939
  isError: true
2562
2940
  };
2563
2941
  }
@@ -2618,10 +2996,7 @@ Security note: SQL queries are forwarded directly to the CKAN DataStore API. The
2618
2996
  };
2619
2997
  } catch (error) {
2620
2998
  return {
2621
- content: [{
2622
- type: "text",
2623
- text: `Error querying DataStore SQL: ${error instanceof Error ? error.message : String(error)}`
2624
- }],
2999
+ content: [{ type: "text", text: formatCkanError(error, "ckan_datastore_search_sql") }],
2625
3000
  isError: true
2626
3001
  };
2627
3002
  }
@@ -3073,10 +3448,7 @@ Typical workflow: ckan_group_list \u2192 ckan_group_show (inspect one) \u2192 ck
3073
3448
  };
3074
3449
  } catch (error) {
3075
3450
  return {
3076
- content: [{
3077
- type: "text",
3078
- text: `Error listing groups: ${error instanceof Error ? error.message : String(error)}`
3079
- }],
3451
+ content: [{ type: "text", text: formatCkanError(error, "ckan_group_list") }],
3080
3452
  isError: true
3081
3453
  };
3082
3454
  }
@@ -3134,10 +3506,7 @@ Typical workflow: ckan_group_show \u2192 ckan_package_show (inspect a dataset) \
3134
3506
  };
3135
3507
  } catch (error) {
3136
3508
  return {
3137
- content: [{
3138
- type: "text",
3139
- text: `Error fetching group: ${error instanceof Error ? error.message : String(error)}`
3140
- }],
3509
+ content: [{ type: "text", text: formatCkanError(error, "ckan_group_show") }],
3141
3510
  isError: true
3142
3511
  };
3143
3512
  }
@@ -3239,10 +3608,7 @@ Typical workflow: ckan_group_search \u2192 ckan_group_show (get details) \u2192
3239
3608
  };
3240
3609
  } catch (error) {
3241
3610
  return {
3242
- content: [{
3243
- type: "text",
3244
- text: `Error searching groups: ${error instanceof Error ? error.message : String(error)}`
3245
- }],
3611
+ content: [{ type: "text", text: formatCkanError(error, "ckan_group_search") }],
3246
3612
  isError: true
3247
3613
  };
3248
3614
  }
@@ -3911,12 +4277,8 @@ The MQA (Metadata Quality Assurance) system is operated by data.europa.eu and on
3911
4277
  }]
3912
4278
  };
3913
4279
  } catch (error) {
3914
- const errorMessage = error instanceof Error ? error.message : String(error);
3915
4280
  return {
3916
- content: [{
3917
- type: "text",
3918
- text: `Error retrieving quality metrics: ${errorMessage}`
3919
- }]
4281
+ content: [{ type: "text", text: `Error retrieving quality metrics: ${formatCkanError(error, "ckan_get_mqa_quality")}` }]
3920
4282
  };
3921
4283
  }
3922
4284
  }
@@ -3960,12 +4322,8 @@ The MQA (Metadata Quality Assurance) system is operated by data.europa.eu and on
3960
4322
  }]
3961
4323
  };
3962
4324
  } catch (error) {
3963
- const errorMessage = error instanceof Error ? error.message : String(error);
3964
4325
  return {
3965
- content: [{
3966
- type: "text",
3967
- text: `Error retrieving quality details: ${errorMessage}`
3968
- }]
4326
+ content: [{ type: "text", text: `Error retrieving quality details: ${formatCkanError(error, "ckan_get_mqa_quality_details")}` }]
3969
4327
  };
3970
4328
  }
3971
4329
  }
@@ -4145,10 +4503,7 @@ Typical workflow: ckan_analyze_datasets \u2192 ckan_datastore_search (with known
4145
4503
  };
4146
4504
  } catch (error) {
4147
4505
  return {
4148
- content: [{
4149
- type: "text",
4150
- text: `Error analyzing datasets: ${error instanceof Error ? error.message : String(error)}`
4151
- }],
4506
+ content: [{ type: "text", text: formatCkanError(error, "ckan_analyze_datasets") }],
4152
4507
  isError: true
4153
4508
  };
4154
4509
  }
@@ -4236,10 +4591,7 @@ Typical workflow: ckan_catalog_stats (understand the portal) \u2192 ckan_package
4236
4591
  };
4237
4592
  } catch (error) {
4238
4593
  return {
4239
- content: [{
4240
- type: "text",
4241
- text: `Error retrieving catalog stats: ${error instanceof Error ? error.message : String(error)}`
4242
- }],
4594
+ content: [{ type: "text", text: formatCkanError(error, "ckan_catalog_stats") }],
4243
4595
  isError: true
4244
4596
  };
4245
4597
  }
@@ -4544,11 +4896,8 @@ Typical workflow: ckan_find_portals (discover portal URL) \u2192 ckan_status_sho
4544
4896
  };
4545
4897
  } catch (error) {
4546
4898
  return {
4547
- content: [{
4548
- type: "text",
4549
- text: `Could not fetch portal list from datashades.info:
4550
- ${error instanceof Error ? error.message : String(error)}`
4551
- }],
4899
+ content: [{ type: "text", text: `Could not fetch portal list from datashades.info:
4900
+ ${formatCkanError(error, "ckan_find_portals")}` }],
4552
4901
  isError: true
4553
4902
  };
4554
4903
  }
@@ -5170,7 +5519,7 @@ var registerAllPrompts = (server) => {
5170
5519
  function createServer() {
5171
5520
  return new McpServer({
5172
5521
  name: "ckan-mcp-server",
5173
- version: "0.4.99"
5522
+ version: "0.4.106"
5174
5523
  });
5175
5524
  }
5176
5525
  function registerAll(server) {