@aborruso/ckan-mcp-server 0.4.99 → 0.4.105

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/LOG.md +65 -0
  2. package/dist/index.js +430 -86
  3. package/dist/worker.js +207 -185
  4. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -262,7 +262,257 @@ function requiresMultilingualNormalization(serverUrl) {
262
262
  return portal?.normalize === "multilingual";
263
263
  }
264
264
 
265
+ // src/utils/cache.ts
266
+ var TTL_METADATA = /* @__PURE__ */ new Set([
267
+ "package_search",
268
+ "package_show",
269
+ "current_package_list_with_resources",
270
+ "resource_show",
271
+ "organization_show",
272
+ "organization_list",
273
+ "organization_search",
274
+ "group_show",
275
+ "group_list",
276
+ "group_search",
277
+ "tag_list",
278
+ "tag_show",
279
+ "tag_search"
280
+ ]);
281
+ var TTL_STATUS = /* @__PURE__ */ new Set(["status_show", "site_read"]);
282
+ var TTL_DATASTORE = /* @__PURE__ */ new Set(["datastore_search", "datastore_search_sql"]);
283
+ function getTtlForAction(action, fallback) {
284
+ if (TTL_METADATA.has(action)) return 300;
285
+ if (TTL_STATUS.has(action)) return 3600;
286
+ if (TTL_DATASTORE.has(action)) return 60;
287
+ return fallback;
288
+ }
289
+ function readEnv(name) {
290
+ if (typeof process === "undefined" || !process.env) return void 0;
291
+ const value = process.env[name];
292
+ return value === void 0 || value === "" ? void 0 : value;
293
+ }
294
+ function getCacheConfig() {
295
+ const enabledRaw = readEnv("CKAN_CACHE_ENABLED");
296
+ const isTest = readEnv("VITEST") === "true";
297
+ const enabled = enabledRaw !== void 0 ? enabledRaw !== "false" : !isTest;
298
+ const ttlDefault = Number(readEnv("CKAN_CACHE_TTL_DEFAULT")) || 300;
299
+ const maxEntries = Number(readEnv("CKAN_CACHE_MAX_ENTRIES")) || 500;
300
+ const maxEntryBytes = Number(readEnv("CKAN_CACHE_MAX_ENTRY_BYTES")) || 1024 * 1024;
301
+ return { enabled, ttlDefault, maxEntries, maxEntryBytes };
302
+ }
303
+ function canonicalizeParams(params) {
304
+ const keys = Object.keys(params).sort();
305
+ const pairs = [];
306
+ for (const key of keys) {
307
+ const value = params[key];
308
+ if (value === void 0 || value === null) continue;
309
+ const serialized = typeof value === "object" ? JSON.stringify(value) : String(value);
310
+ pairs.push(`${key}=${serialized}`);
311
+ }
312
+ return pairs.join("&");
313
+ }
314
+ async function sha1Hex(input) {
315
+ const data = new TextEncoder().encode(input);
316
+ const hashBuffer = await crypto.subtle.digest("SHA-1", data);
317
+ const bytes = new Uint8Array(hashBuffer);
318
+ let hex = "";
319
+ for (const b of bytes) hex += b.toString(16).padStart(2, "0");
320
+ return hex;
321
+ }
322
+ async function buildCacheKey(serverUrl, action, params) {
323
+ const raw = `${serverUrl}|${action}|${canonicalizeParams(params)}`;
324
+ return sha1Hex(raw);
325
+ }
326
+ var MemoryLruCache = class {
327
+ constructor(maxEntries) {
328
+ this.maxEntries = maxEntries;
329
+ }
330
+ store = /* @__PURE__ */ new Map();
331
+ async get(key) {
332
+ const entry = this.store.get(key);
333
+ if (!entry) return void 0;
334
+ if (entry.expiresAt <= Date.now()) {
335
+ this.store.delete(key);
336
+ return void 0;
337
+ }
338
+ this.store.delete(key);
339
+ this.store.set(key, entry);
340
+ return entry.value;
341
+ }
342
+ async set(key, value, ttlSeconds) {
343
+ if (ttlSeconds <= 0) return;
344
+ if (this.store.has(key)) this.store.delete(key);
345
+ this.store.set(key, { value, expiresAt: Date.now() + ttlSeconds * 1e3 });
346
+ while (this.store.size > this.maxEntries) {
347
+ const oldest = this.store.keys().next().value;
348
+ if (oldest === void 0) break;
349
+ this.store.delete(oldest);
350
+ }
351
+ }
352
+ clear() {
353
+ this.store.clear();
354
+ }
355
+ size() {
356
+ return this.store.size;
357
+ }
358
+ };
359
+ var WorkersCacheApi = class {
360
+ origin = "https://ckan-mcp-cache.internal";
361
+ async get(key) {
362
+ try {
363
+ const response = await caches.default.match(`${this.origin}/${key}`);
364
+ if (!response) return void 0;
365
+ return await response.json();
366
+ } catch {
367
+ return void 0;
368
+ }
369
+ }
370
+ async set(key, value, ttlSeconds) {
371
+ if (ttlSeconds <= 0) return;
372
+ try {
373
+ const body = JSON.stringify(value);
374
+ const response = new Response(body, {
375
+ headers: {
376
+ "Content-Type": "application/json",
377
+ "Cache-Control": `public, s-maxage=${ttlSeconds}`
378
+ }
379
+ });
380
+ await caches.default.put(`${this.origin}/${key}`, response);
381
+ } catch {
382
+ }
383
+ }
384
+ };
385
+ var sharedCache = null;
386
+ function getCache() {
387
+ if (sharedCache) return sharedCache;
388
+ const hasWorkersCaches = typeof caches !== "undefined" && typeof caches.default !== "undefined";
389
+ const isNode = typeof process !== "undefined" && !!process.versions?.node;
390
+ if (hasWorkersCaches && !isNode) {
391
+ sharedCache = new WorkersCacheApi();
392
+ } else {
393
+ sharedCache = new MemoryLruCache(getCacheConfig().maxEntries);
394
+ }
395
+ return sharedCache;
396
+ }
397
+
398
+ // src/utils/rate-limiter.ts
399
+ var RateLimitError = class extends Error {
400
+ constructor(hostname, waitMs) {
401
+ super(
402
+ `Rate limit exceeded for ${hostname}: would need to wait ${Math.round(waitMs)}ms`
403
+ );
404
+ this.name = "RateLimitError";
405
+ }
406
+ };
407
+ var MAX_BUCKETS = 200;
408
+ function readEnv2(name) {
409
+ if (typeof process === "undefined" || !process.env) return void 0;
410
+ const v = process.env[name];
411
+ return v === void 0 || v === "" ? void 0 : v;
412
+ }
413
+ function getRateLimitConfig() {
414
+ const enabledRaw = readEnv2("CKAN_RATE_LIMIT_ENABLED");
415
+ const isTest = readEnv2("VITEST") === "true";
416
+ const enabled = enabledRaw !== void 0 ? enabledRaw !== "false" : !isTest;
417
+ return {
418
+ enabled,
419
+ rps: Number(readEnv2("CKAN_RATE_LIMIT_RPS")) || 5,
420
+ burst: Number(readEnv2("CKAN_RATE_LIMIT_BURST")) || 10,
421
+ maxWaitMs: Number(readEnv2("CKAN_RATE_LIMIT_MAX_WAIT_MS")) || 5e3
422
+ };
423
+ }
424
+ function sleep(ms) {
425
+ return new Promise((resolve) => setTimeout(resolve, ms));
426
+ }
427
+ var UpstreamRateLimiter = class {
428
+ constructor(config) {
429
+ this.config = config;
430
+ }
431
+ buckets = /* @__PURE__ */ new Map();
432
+ async acquire(hostname) {
433
+ const { rps, burst, maxWaitMs } = this.config;
434
+ if (!this.buckets.has(hostname)) {
435
+ if (this.buckets.size >= MAX_BUCKETS) {
436
+ const oldest = this.buckets.keys().next().value;
437
+ if (oldest !== void 0) this.buckets.delete(oldest);
438
+ }
439
+ this.buckets.set(hostname, { tokens: burst, lastRefill: Date.now() });
440
+ }
441
+ let waited = 0;
442
+ while (true) {
443
+ const bucket = this.buckets.get(hostname);
444
+ const now = Date.now();
445
+ const elapsed = now - bucket.lastRefill;
446
+ bucket.tokens = Math.min(burst, bucket.tokens + elapsed / 1e3 * rps);
447
+ bucket.lastRefill = now;
448
+ if (bucket.tokens >= 1) {
449
+ bucket.tokens -= 1;
450
+ return;
451
+ }
452
+ const waitNeeded = (1 - bucket.tokens) / rps * 1e3;
453
+ if (waited + waitNeeded > maxWaitMs) {
454
+ throw new RateLimitError(hostname, waited + waitNeeded);
455
+ }
456
+ await sleep(waitNeeded);
457
+ waited += waitNeeded;
458
+ }
459
+ }
460
+ clear() {
461
+ this.buckets.clear();
462
+ }
463
+ };
464
+ var sharedLimiter = null;
465
+ function getRateLimiter() {
466
+ if (!sharedLimiter) {
467
+ sharedLimiter = new UpstreamRateLimiter(getRateLimitConfig());
468
+ }
469
+ return sharedLimiter;
470
+ }
471
+
265
472
  // src/utils/http.ts
473
+ var CkanApiError = class extends Error {
474
+ status;
475
+ action;
476
+ constructor(message, status, action) {
477
+ super(message);
478
+ this.name = "CkanApiError";
479
+ this.status = status;
480
+ this.action = action;
481
+ }
482
+ };
483
+ function formatCkanError(error, _toolName) {
484
+ if (!(error instanceof CkanApiError)) {
485
+ return error instanceof Error ? error.message : String(error);
486
+ }
487
+ const { status, action, message } = error;
488
+ let hint = "";
489
+ if (status === 404) {
490
+ if (action.startsWith("datastore_search")) {
491
+ hint = "\u2192 Get a valid resource_id first: call `ckan_package_show` on a dataset, then pick a resource where `datastore_active` is true.";
492
+ } else if (action === "package_show") {
493
+ hint = "\u2192 Use `ckan_package_search` to find a valid dataset name or ID.";
494
+ } else if (action === "organization_show") {
495
+ hint = "\u2192 Use `ckan_organization_list` or `ckan_organization_search` to discover valid organization names.";
496
+ }
497
+ } else if (status === 400) {
498
+ if (action === "datastore_search_sql") {
499
+ hint = "\u2192 Invalid SQL syntax or unknown column \u2014 check column names with `ckan_datastore_search` before writing SQL.";
500
+ } else if (action.startsWith("datastore_search")) {
501
+ hint = "\u2192 Bad request \u2014 likely an invalid field name or filter syntax; check column names with a `SELECT *` query first.";
502
+ }
503
+ } else if (status === 409 || status === 422) {
504
+ hint = "\u2192 Portal rejected the request \u2014 parameters may conflict; simplify filters and retry.";
505
+ } else if (status === 503 || status === 502 || status === 504) {
506
+ hint = "\u2192 Portal temporarily unavailable \u2014 retry in a few seconds.";
507
+ } else if (status === 500) {
508
+ hint = "\u2192 Portal internal error \u2014 try a different portal or retry later.";
509
+ } else if (status === void 0) {
510
+ hint = "\u2192 The portal may not support this action, or the endpoint is unavailable.";
511
+ }
512
+ return hint ? `${message}
513
+ ${hint}` : message;
514
+ }
515
+ var _lastCacheHit = null;
266
516
  var loadZlib = /* @__PURE__ */ (() => {
267
517
  let cached = null;
268
518
  return async () => {
@@ -437,8 +687,32 @@ function validateServerUrl(serverUrl) {
437
687
  throw new Error(`Access to private/internal IPv6 addresses is not allowed.`);
438
688
  }
439
689
  }
690
+ const rawAllowed = typeof process !== "undefined" ? process.env.CKAN_ALLOWED_DOMAINS ?? "" : "";
691
+ const allowedDomains = rawAllowed.split(",").map((s) => s.trim()).filter(Boolean);
692
+ if (allowedDomains.length > 0 && !allowedDomains.includes(hostname)) {
693
+ throw new Error(`Domain "${hostname}" is not in the allowed list (CKAN_ALLOWED_DOMAINS).`);
694
+ }
695
+ }
696
+ function auditLog(serverUrl, action, params, cacheHit) {
697
+ if (typeof process === "undefined" || !process.versions?.node) return;
698
+ const entry = {
699
+ ts: (/* @__PURE__ */ new Date()).toISOString(),
700
+ server: serverUrl,
701
+ action,
702
+ cache_hit: cacheHit
703
+ };
704
+ if (params.q !== void 0) entry.q = params.q;
705
+ if (params.fq !== void 0) entry.fq = params.fq;
706
+ if (params.sql !== void 0) entry.sql = String(params.sql).slice(0, 200);
707
+ if (params.id !== void 0) entry.id = params.id;
708
+ if (params.rows !== void 0) entry.rows = params.rows;
709
+ if (params.limit !== void 0) entry.limit = params.limit;
710
+ try {
711
+ process.stderr.write(JSON.stringify(entry) + "\n");
712
+ } catch {
713
+ }
440
714
  }
441
- async function makeCkanRequest(serverUrl, action, params = {}) {
715
+ async function makeCkanRequest(serverUrl, action, params = {}, opts = {}) {
442
716
  const isNode = typeof process !== "undefined" && !!process.versions?.node;
443
717
  validateServerUrl(serverUrl);
444
718
  let resolvedServerUrl = serverUrl;
@@ -453,6 +727,26 @@ async function makeCkanRequest(serverUrl, action, params = {}) {
453
727
  const baseUrl = resolvedServerUrl.replace(/\/$/, "");
454
728
  const apiPath = getPortalApiPath(resolvedServerUrl);
455
729
  const url = `${baseUrl}${apiPath}/${action}`;
730
+ const cacheConfig = getCacheConfig();
731
+ const cacheEnabled = cacheConfig.enabled && opts.cache !== false;
732
+ const ttl = getTtlForAction(action, cacheConfig.ttlDefault);
733
+ const cache = cacheEnabled && ttl > 0 ? getCache() : null;
734
+ const cacheKey = cache ? await buildCacheKey(resolvedServerUrl, action, params) : "";
735
+ if (cache) {
736
+ const cached = await cache.get(cacheKey);
737
+ if (cached !== void 0) {
738
+ _lastCacheHit = true;
739
+ auditLog(serverUrl, action, params, true);
740
+ return cached;
741
+ }
742
+ }
743
+ _lastCacheHit = false;
744
+ const rateLimitConfig = getRateLimitConfig();
745
+ const rateLimitEnabled = rateLimitConfig.enabled && opts.rateLimit !== false;
746
+ if (rateLimitEnabled) {
747
+ const hostname = new URL(resolvedServerUrl).hostname;
748
+ await getRateLimiter().acquire(hostname);
749
+ }
456
750
  try {
457
751
  let decodedData;
458
752
  if (isNode) {
@@ -501,7 +795,7 @@ async function makeCkanRequest(serverUrl, action, params = {}) {
501
795
  clearTimeout(timeoutId);
502
796
  }
503
797
  if (!response.ok) {
504
- throw new Error(`CKAN API error (${response.status}): ${response.statusText}`);
798
+ throw new CkanApiError(`CKAN API error (${response.status}): ${response.statusText}`, response.status, action);
505
799
  }
506
800
  const buffer = await response.arrayBuffer();
507
801
  const headers = {};
@@ -511,20 +805,34 @@ async function makeCkanRequest(serverUrl, action, params = {}) {
511
805
  decodedData = await decodePossiblyCompressed(buffer, headers);
512
806
  }
513
807
  if (decodedData && decodedData.success === true) {
514
- return decodedData.result;
808
+ const result = decodedData.result;
809
+ if (cache) {
810
+ try {
811
+ const serialized = JSON.stringify(result);
812
+ if (serialized.length <= cacheConfig.maxEntryBytes) {
813
+ await cache.set(cacheKey, result, ttl);
814
+ }
815
+ } catch {
816
+ }
817
+ }
818
+ auditLog(serverUrl, action, params, false);
819
+ return result;
515
820
  } else {
516
- throw new Error(
517
- `CKAN API returned success=false: ${JSON.stringify(decodedData)}`
821
+ throw new CkanApiError(
822
+ `CKAN API returned success=false: ${JSON.stringify(decodedData)}`,
823
+ void 0,
824
+ action
518
825
  );
519
826
  }
520
827
  } catch (error) {
828
+ if (error instanceof CkanApiError) throw error;
521
829
  if (axios.isAxiosError(error)) {
522
830
  const axiosError = error;
523
831
  if (axiosError.response) {
524
832
  const status = axiosError.response.status;
525
833
  const data = axiosError.response.data;
526
834
  const errorMsg = data?.error?.message || data?.error || "Unknown error";
527
- throw new Error(`CKAN API error (${status}): ${errorMsg}`);
835
+ throw new CkanApiError(`CKAN API error (${status}): ${errorMsg}`, status, action);
528
836
  } else if (axiosError.code === "ECONNABORTED") {
529
837
  throw new Error(`Request timeout connecting to ${serverUrl}`);
530
838
  } else if (axiosError.code === "ENOTFOUND") {
@@ -604,6 +912,25 @@ function addDemoFooter(text) {
604
912
  }
605
913
 
606
914
  // src/utils/url-generator.ts
915
+ var UUID_RE = /\/resource\/([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})/i;
916
+ function extractSourcePortal(resourceUrl, serverUrl) {
917
+ if (!resourceUrl) return null;
918
+ let rParsed;
919
+ let sParsed;
920
+ try {
921
+ rParsed = new URL(resourceUrl);
922
+ sParsed = new URL(serverUrl);
923
+ } catch {
924
+ return null;
925
+ }
926
+ if (rParsed.hostname === sParsed.hostname) return null;
927
+ const match = rParsed.pathname.match(UUID_RE);
928
+ if (!match) return null;
929
+ return {
930
+ portalUrl: `${rParsed.protocol}//${rParsed.host}`,
931
+ resourceId: match[1]
932
+ };
933
+ }
607
934
  function getDatasetViewUrl(serverUrl, pkg) {
608
935
  const cleanServerUrl = normalizePortalUrl(serverUrl);
609
936
  const portal = getPortalConfig(serverUrl);
@@ -708,7 +1035,9 @@ var DEFAULT_RELEVANCE_WEIGHTS = {
708
1035
  title: 4,
709
1036
  notes: 2,
710
1037
  tags: 3,
711
- organization: 1
1038
+ organization: 1,
1039
+ holder: 4,
1040
+ publisher: 2
712
1041
  };
713
1042
  var QUERY_STOPWORDS = /* @__PURE__ */ new Set([
714
1043
  "a",
@@ -767,16 +1096,31 @@ var textMatchesTerms = (text, terms) => {
767
1096
  var scoreTextField = (text, terms, weight) => {
768
1097
  return textMatchesTerms(text, terms) ? weight : 0;
769
1098
  };
1099
+ var readDcatExtra = (dataset, key) => {
1100
+ const extras = Array.isArray(dataset.extras) ? dataset.extras : [];
1101
+ for (const e of extras) {
1102
+ if (e && typeof e === "object" && e.key === key) {
1103
+ const value = e.value;
1104
+ if (typeof value === "string" && value.length > 0) return value;
1105
+ }
1106
+ }
1107
+ const rootValue = dataset[key];
1108
+ return typeof rootValue === "string" ? rootValue : "";
1109
+ };
770
1110
  var scoreDatasetRelevance = (query, dataset, weights = DEFAULT_RELEVANCE_WEIGHTS) => {
771
1111
  const terms = extractQueryTerms(query);
772
1112
  const titleText = dataset.title || dataset.name || "";
773
1113
  const notesText = dataset.notes || "";
774
1114
  const orgText = dataset.organization?.title || dataset.organization?.name || dataset.owner_org || "";
1115
+ const holderText = readDcatExtra(dataset, "holder_name");
1116
+ const publisherText = readDcatExtra(dataset, "publisher_name");
775
1117
  const breakdown = {
776
1118
  title: scoreTextField(titleText, terms, weights.title),
777
1119
  notes: scoreTextField(notesText, terms, weights.notes),
778
1120
  tags: 0,
779
1121
  organization: scoreTextField(orgText, terms, weights.organization),
1122
+ holder: scoreTextField(holderText, terms, weights.holder),
1123
+ publisher: scoreTextField(publisherText, terms, weights.publisher),
780
1124
  total: 0
781
1125
  };
782
1126
  if (Array.isArray(dataset.tags) && dataset.tags.length > 0 && terms.length > 0) {
@@ -786,7 +1130,7 @@ var scoreDatasetRelevance = (query, dataset, weights = DEFAULT_RELEVANCE_WEIGHTS
786
1130
  });
787
1131
  breakdown.tags = tagMatch ? weights.tags : 0;
788
1132
  }
789
- breakdown.total = breakdown.title + breakdown.notes + breakdown.tags + breakdown.organization;
1133
+ breakdown.total = breakdown.title + breakdown.notes + breakdown.tags + breakdown.organization + breakdown.holder + breakdown.publisher;
790
1134
  return { total: breakdown.total, breakdown, terms };
791
1135
  };
792
1136
  var parseAccessServices = (resource) => {
@@ -1478,10 +1822,7 @@ Note: showing top ${sorted.length} only. Use \`response_format: json\` for full
1478
1822
  };
1479
1823
  } catch (error) {
1480
1824
  return {
1481
- content: [{
1482
- type: "text",
1483
- text: `Error searching packages: ${error instanceof Error ? error.message : String(error)}`
1484
- }],
1825
+ content: [{ type: "text", text: formatCkanError(error, "ckan_package_search") }],
1485
1826
  isError: true
1486
1827
  };
1487
1828
  }
@@ -1504,7 +1845,13 @@ Args:
1504
1845
  - query (string): Natural language or keyword query (e.g., "mobilit\xE0 urbana", "air quality")
1505
1846
  - limit (number): Number of datasets to return (default: 10)
1506
1847
  - weights (object): Field weights for scoring \u2014 higher weight = more influence on rank
1507
- Default: title=4, tags=3, notes=2, organization=1
1848
+ Default: title=4, tags=3, notes=2, organization=1, holder=4, publisher=2
1849
+ Note on holder vs organization: on federated catalogs (e.g. dati.gov.it), \`organization\`
1850
+ is the harvesting catalog (e.g. Regione Puglia), while \`holder\` (DCAT-AP_IT dct:rightsHolder)
1851
+ is the actual data owner (e.g. Comune di Lecce). Queries like "datasets from a specific Comune"
1852
+ match \`holder\` correctly; matching only \`organization\` misses datasets harvested via
1853
+ aggregators. \`publisher\` (dct:publisher) is scored separately at lower weight as it can
1854
+ contain technical roles ("Redazione OD") rather than the institutional owner.
1508
1855
  - query_parser ('default' | 'text'): Override search parser behavior
1509
1856
  - response_format ('markdown' | 'json'): Output format
1510
1857
 
@@ -1514,17 +1861,20 @@ Returns:
1514
1861
  Examples:
1515
1862
  - { server_url: "https://dati.gov.it/opendata", query: "mobilit\xE0" }
1516
1863
  - { server_url: "...", query: "trasporti", limit: 5, weights: { title: 5, notes: 2 } }
1864
+ - { server_url: "...", query: "defibrillatori Comune di Lecce", weights: { holder: 5 } }
1517
1865
 
1518
1866
  Typical workflow: ckan_find_relevant_datasets \u2192 ckan_package_show (inspect top results) \u2192 ckan_datastore_search (query data)`,
1519
1867
  inputSchema: z2.object({
1520
1868
  server_url: z2.string().url().describe("Base URL of the CKAN server (e.g., https://dati.gov.it/opendata)"),
1521
- query: z2.string().min(2).describe("Natural language or keyword query to match against dataset title, notes, tags, and organization"),
1869
+ query: z2.string().min(2).describe("Natural language or keyword query to match against dataset title, notes, tags, organization, holder and publisher"),
1522
1870
  limit: z2.coerce.number().int().min(1).max(50).optional().default(10).describe("Number of datasets to return"),
1523
1871
  weights: z2.object({
1524
1872
  title: z2.coerce.number().min(0).optional().describe("Weight for title match (default 4)"),
1525
1873
  notes: z2.coerce.number().min(0).optional().describe("Weight for description match (default 2)"),
1526
1874
  tags: z2.coerce.number().min(0).optional().describe("Weight for tag match (default 3)"),
1527
- organization: z2.coerce.number().min(0).optional().describe("Weight for organization match (default 1)")
1875
+ organization: z2.coerce.number().min(0).optional().describe("Weight for organization (CKAN catalog / harvester) match (default 1)"),
1876
+ holder: z2.coerce.number().min(0).optional().describe("Weight for holder_name match \u2014 DCAT-AP_IT dct:rightsHolder, the actual data owner (default 4)"),
1877
+ publisher: z2.coerce.number().min(0).optional().describe("Weight for publisher_name match \u2014 DCAT-AP_IT dct:publisher (default 2)")
1528
1878
  }).optional().describe("Per-field scoring weights; unspecified fields use defaults"),
1529
1879
  query_parser: z2.enum(["default", "text"]).optional().describe("Override search parser ('text' forces text:(...) on non-fielded queries)"),
1530
1880
  response_format: ResponseFormatSchema
@@ -1621,6 +1971,10 @@ Typical workflow: ckan_find_relevant_datasets \u2192 ckan_package_show (inspect
1621
1971
  markdown += `- **Tags**: ${weights.tags}
1622
1972
  `;
1623
1973
  markdown += `- **Organization**: ${weights.organization}
1974
+ `;
1975
+ markdown += `- **Holder**: ${weights.holder}
1976
+ `;
1977
+ markdown += `- **Publisher**: ${weights.publisher}
1624
1978
 
1625
1979
  `;
1626
1980
  if (top.length === 0) {
@@ -1655,6 +2009,10 @@ Typical workflow: ckan_find_relevant_datasets \u2192 ckan_package_show (inspect
1655
2009
  markdown += `- Tags: ${dataset.breakdown.tags}
1656
2010
  `;
1657
2011
  markdown += `- Organization: ${dataset.breakdown.organization}
2012
+ `;
2013
+ markdown += `- Holder: ${dataset.breakdown.holder}
2014
+ `;
2015
+ markdown += `- Publisher: ${dataset.breakdown.publisher}
1658
2016
  `;
1659
2017
  markdown += `- Total: ${dataset.breakdown.total}
1660
2018
 
@@ -1666,10 +2024,7 @@ Typical workflow: ckan_find_relevant_datasets \u2192 ckan_package_show (inspect
1666
2024
  };
1667
2025
  } catch (error) {
1668
2026
  return {
1669
- content: [{
1670
- type: "text",
1671
- text: `Error ranking datasets: ${error instanceof Error ? error.message : String(error)}`
1672
- }],
2027
+ content: [{ type: "text", text: formatCkanError(error, "ckan_find_relevant_datasets") }],
1673
2028
  isError: true
1674
2029
  };
1675
2030
  }
@@ -1745,15 +2100,20 @@ Typical workflow: ckan_package_show \u2192 pick a resource with datastore_active
1745
2100
  };
1746
2101
  } catch (error) {
1747
2102
  return {
1748
- content: [{
1749
- type: "text",
1750
- text: `Error fetching package: ${error instanceof Error ? error.message : String(error)}`
1751
- }],
2103
+ content: [{ type: "text", text: formatCkanError(error, "ckan_package_show") }],
1752
2104
  isError: true
1753
2105
  };
1754
2106
  }
1755
2107
  }
1756
2108
  );
2109
+ async function checkSourceDatastore(portalUrl, resourceId) {
2110
+ try {
2111
+ await makeCkanRequest(portalUrl, "datastore_search", { resource_id: resourceId, limit: 0 }, { cache: false });
2112
+ return true;
2113
+ } catch {
2114
+ return false;
2115
+ }
2116
+ }
1757
2117
  server.registerTool(
1758
2118
  "ckan_list_resources",
1759
2119
  {
@@ -1776,11 +2136,17 @@ Examples:
1776
2136
  - { server_url: "https://dati.gov.it/opendata", id: "dataset-name" }
1777
2137
  - { server_url: "...", id: "dataset-name", format_filter: "CSV" }
1778
2138
 
1779
- Typical workflow: ckan_package_search \u2192 ckan_list_resources (assess available files) \u2192 ckan_datastore_search (for resources with DataStore=true)`,
2139
+ Typical workflow: ckan_package_search \u2192 ckan_list_resources (assess available files) \u2192 ckan_datastore_search (for resources with DataStore=true)
2140
+
2141
+ When a resource has DataStore=false but its download URL belongs to a different (source) portal,
2142
+ the tool automatically probes the source portal for DataStore availability and reports
2143
+ source_datastore_active and source_portal_url so you can query the data there instead.
2144
+ Set check_source_portal=false to skip these extra HTTP calls.`,
1780
2145
  inputSchema: z2.object({
1781
2146
  server_url: z2.string().url().describe("Base URL of the CKAN server"),
1782
2147
  id: z2.string().min(1).describe("Dataset ID or name"),
1783
2148
  format_filter: z2.string().optional().describe("Filter resources by format, case-insensitive (e.g., 'CSV', 'json', 'XLSX')"),
2149
+ check_source_portal: z2.boolean().optional().describe("When true (default), probes the source portal for DataStore availability when a resource URL points to a different CKAN instance"),
1784
2150
  response_format: ResponseFormatSchema
1785
2151
  }).strict(),
1786
2152
  annotations: {
@@ -1799,6 +2165,7 @@ Typical workflow: ckan_package_search \u2192 ckan_list_resources (assess availab
1799
2165
  );
1800
2166
  const resources = Array.isArray(result.resources) ? result.resources : [];
1801
2167
  const formatFilter = params.format_filter?.toUpperCase();
2168
+ const doSourceCheck = params.check_source_portal !== false;
1802
2169
  const summary = resources.filter((r) => !formatFilter || (r.format || "").toUpperCase() === formatFilter).map((r) => {
1803
2170
  const effectiveUrl = resolveDownloadUrl(r);
1804
2171
  return {
@@ -1810,6 +2177,18 @@ Typical workflow: ckan_package_search \u2192 ckan_list_resources (assess availab
1810
2177
  url: effectiveUrl
1811
2178
  };
1812
2179
  });
2180
+ if (doSourceCheck) {
2181
+ await Promise.all(
2182
+ summary.map(async (item, idx) => {
2183
+ if (item.datastore_active) return;
2184
+ const extracted = extractSourcePortal(item.url, params.server_url);
2185
+ if (!extracted) return;
2186
+ const active = await checkSourceDatastore(extracted.portalUrl, extracted.resourceId);
2187
+ summary[idx].source_datastore_active = active;
2188
+ summary[idx].source_portal_url = active ? extracted.portalUrl : null;
2189
+ })
2190
+ );
2191
+ }
1813
2192
  if (params.response_format === "json" /* JSON */) {
1814
2193
  const payload = {
1815
2194
  dataset_id: result.id,
@@ -1864,6 +2243,16 @@ Typical workflow: ckan_package_search \u2192 ckan_list_resources (assess availab
1864
2243
  `;
1865
2244
  for (const r of dsResources) {
1866
2245
  markdown += `- **${r.name}** (${r.format}): \`${r.id}\`
2246
+ `;
2247
+ }
2248
+ }
2249
+ const sourceResources = summary.filter((r) => r.source_datastore_active && r.source_portal_url);
2250
+ if (sourceResources.length > 0) {
2251
+ markdown += `
2252
+ **Available on source portal** (use \`ckan_datastore_search\` with the source portal URL):
2253
+ `;
2254
+ for (const r of sourceResources) {
2255
+ markdown += `- **${r.name}** (${r.format}): \`${r.id}\` on ${r.source_portal_url}
1867
2256
  `;
1868
2257
  }
1869
2258
  }
@@ -1873,10 +2262,7 @@ Typical workflow: ckan_package_search \u2192 ckan_list_resources (assess availab
1873
2262
  };
1874
2263
  } catch (error) {
1875
2264
  return {
1876
- content: [{
1877
- type: "text",
1878
- text: `Error listing resources: ${error instanceof Error ? error.message : String(error)}`
1879
- }],
2265
+ content: [{ type: "text", text: formatCkanError(error, "ckan_list_resources") }],
1880
2266
  isError: true
1881
2267
  };
1882
2268
  }
@@ -2055,8 +2441,7 @@ Typical workflow: ckan_organization_list \u2192 ckan_organization_show (inspect
2055
2441
  }
2056
2442
  );
2057
2443
  } catch (error) {
2058
- const message = error instanceof Error ? error.message : String(error);
2059
- if (message.includes("CKAN API error (500)")) {
2444
+ if (error instanceof CkanApiError && error.status === 500) {
2060
2445
  const searchResult = await makeCkanRequest(
2061
2446
  params.server_url,
2062
2447
  "package_search",
@@ -2164,10 +2549,7 @@ Note: organization_list returned 500; using package_search facets.
2164
2549
  };
2165
2550
  } catch (error) {
2166
2551
  return {
2167
- content: [{
2168
- type: "text",
2169
- text: `Error listing organizations: ${error instanceof Error ? error.message : String(error)}`
2170
- }],
2552
+ content: [{ type: "text", text: formatCkanError(error, "ckan_organization_list") }],
2171
2553
  isError: true
2172
2554
  };
2173
2555
  }
@@ -2228,10 +2610,7 @@ Typical workflow: ckan_organization_show \u2192 ckan_package_show (inspect a dat
2228
2610
  };
2229
2611
  } catch (error) {
2230
2612
  return {
2231
- content: [{
2232
- type: "text",
2233
- text: `Error fetching organization: ${error instanceof Error ? error.message : String(error)}`
2234
- }],
2613
+ content: [{ type: "text", text: formatCkanError(error, "ckan_organization_show") }],
2235
2614
  isError: true
2236
2615
  };
2237
2616
  }
@@ -2339,10 +2718,7 @@ Typical workflow: ckan_organization_search \u2192 ckan_organization_show (get de
2339
2718
  };
2340
2719
  } catch (error) {
2341
2720
  return {
2342
- content: [{
2343
- type: "text",
2344
- text: `Error searching organizations: ${error instanceof Error ? error.message : String(error)}`
2345
- }],
2721
+ content: [{ type: "text", text: formatCkanError(error, "ckan_organization_search") }],
2346
2722
  isError: true
2347
2723
  };
2348
2724
  }
@@ -2554,10 +2930,7 @@ Typical workflow: ckan_package_search \u2192 ckan_package_show (find resource_id
2554
2930
  };
2555
2931
  } catch (error) {
2556
2932
  return {
2557
- content: [{
2558
- type: "text",
2559
- text: `Error querying DataStore: ${error instanceof Error ? error.message : String(error)}`
2560
- }],
2933
+ content: [{ type: "text", text: formatCkanError(error, "ckan_datastore_search") }],
2561
2934
  isError: true
2562
2935
  };
2563
2936
  }
@@ -2618,10 +2991,7 @@ Security note: SQL queries are forwarded directly to the CKAN DataStore API. The
2618
2991
  };
2619
2992
  } catch (error) {
2620
2993
  return {
2621
- content: [{
2622
- type: "text",
2623
- text: `Error querying DataStore SQL: ${error instanceof Error ? error.message : String(error)}`
2624
- }],
2994
+ content: [{ type: "text", text: formatCkanError(error, "ckan_datastore_search_sql") }],
2625
2995
  isError: true
2626
2996
  };
2627
2997
  }
@@ -3073,10 +3443,7 @@ Typical workflow: ckan_group_list \u2192 ckan_group_show (inspect one) \u2192 ck
3073
3443
  };
3074
3444
  } catch (error) {
3075
3445
  return {
3076
- content: [{
3077
- type: "text",
3078
- text: `Error listing groups: ${error instanceof Error ? error.message : String(error)}`
3079
- }],
3446
+ content: [{ type: "text", text: formatCkanError(error, "ckan_group_list") }],
3080
3447
  isError: true
3081
3448
  };
3082
3449
  }
@@ -3134,10 +3501,7 @@ Typical workflow: ckan_group_show \u2192 ckan_package_show (inspect a dataset) \
3134
3501
  };
3135
3502
  } catch (error) {
3136
3503
  return {
3137
- content: [{
3138
- type: "text",
3139
- text: `Error fetching group: ${error instanceof Error ? error.message : String(error)}`
3140
- }],
3504
+ content: [{ type: "text", text: formatCkanError(error, "ckan_group_show") }],
3141
3505
  isError: true
3142
3506
  };
3143
3507
  }
@@ -3239,10 +3603,7 @@ Typical workflow: ckan_group_search \u2192 ckan_group_show (get details) \u2192
3239
3603
  };
3240
3604
  } catch (error) {
3241
3605
  return {
3242
- content: [{
3243
- type: "text",
3244
- text: `Error searching groups: ${error instanceof Error ? error.message : String(error)}`
3245
- }],
3606
+ content: [{ type: "text", text: formatCkanError(error, "ckan_group_search") }],
3246
3607
  isError: true
3247
3608
  };
3248
3609
  }
@@ -3911,12 +4272,8 @@ The MQA (Metadata Quality Assurance) system is operated by data.europa.eu and on
3911
4272
  }]
3912
4273
  };
3913
4274
  } catch (error) {
3914
- const errorMessage = error instanceof Error ? error.message : String(error);
3915
4275
  return {
3916
- content: [{
3917
- type: "text",
3918
- text: `Error retrieving quality metrics: ${errorMessage}`
3919
- }]
4276
+ content: [{ type: "text", text: `Error retrieving quality metrics: ${formatCkanError(error, "ckan_get_mqa_quality")}` }]
3920
4277
  };
3921
4278
  }
3922
4279
  }
@@ -3960,12 +4317,8 @@ The MQA (Metadata Quality Assurance) system is operated by data.europa.eu and on
3960
4317
  }]
3961
4318
  };
3962
4319
  } catch (error) {
3963
- const errorMessage = error instanceof Error ? error.message : String(error);
3964
4320
  return {
3965
- content: [{
3966
- type: "text",
3967
- text: `Error retrieving quality details: ${errorMessage}`
3968
- }]
4321
+ content: [{ type: "text", text: `Error retrieving quality details: ${formatCkanError(error, "ckan_get_mqa_quality_details")}` }]
3969
4322
  };
3970
4323
  }
3971
4324
  }
@@ -4145,10 +4498,7 @@ Typical workflow: ckan_analyze_datasets \u2192 ckan_datastore_search (with known
4145
4498
  };
4146
4499
  } catch (error) {
4147
4500
  return {
4148
- content: [{
4149
- type: "text",
4150
- text: `Error analyzing datasets: ${error instanceof Error ? error.message : String(error)}`
4151
- }],
4501
+ content: [{ type: "text", text: formatCkanError(error, "ckan_analyze_datasets") }],
4152
4502
  isError: true
4153
4503
  };
4154
4504
  }
@@ -4236,10 +4586,7 @@ Typical workflow: ckan_catalog_stats (understand the portal) \u2192 ckan_package
4236
4586
  };
4237
4587
  } catch (error) {
4238
4588
  return {
4239
- content: [{
4240
- type: "text",
4241
- text: `Error retrieving catalog stats: ${error instanceof Error ? error.message : String(error)}`
4242
- }],
4589
+ content: [{ type: "text", text: formatCkanError(error, "ckan_catalog_stats") }],
4243
4590
  isError: true
4244
4591
  };
4245
4592
  }
@@ -4544,11 +4891,8 @@ Typical workflow: ckan_find_portals (discover portal URL) \u2192 ckan_status_sho
4544
4891
  };
4545
4892
  } catch (error) {
4546
4893
  return {
4547
- content: [{
4548
- type: "text",
4549
- text: `Could not fetch portal list from datashades.info:
4550
- ${error instanceof Error ? error.message : String(error)}`
4551
- }],
4894
+ content: [{ type: "text", text: `Could not fetch portal list from datashades.info:
4895
+ ${formatCkanError(error, "ckan_find_portals")}` }],
4552
4896
  isError: true
4553
4897
  };
4554
4898
  }
@@ -5170,7 +5514,7 @@ var registerAllPrompts = (server) => {
5170
5514
  function createServer() {
5171
5515
  return new McpServer({
5172
5516
  name: "ckan-mcp-server",
5173
- version: "0.4.99"
5517
+ version: "0.4.105"
5174
5518
  });
5175
5519
  }
5176
5520
  function registerAll(server) {