@aborruso/ckan-mcp-server 0.4.73 → 0.4.76

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LOG.md CHANGED
@@ -1,5 +1,14 @@
1
1
  # LOG
2
2
 
3
+ ## 2026-03-06 (v0.4.75)
4
+
5
+ - fix(`ckan_find_portals`): deduplicate portals by hostname, preferring https over http
6
+ - feat: new tool `ckan_find_portals` — discovers CKAN portals from datashades.info registry (~950 portals); filters by country, keyword, min_datasets, language, has_datastore; LLM translates country to English
7
+
8
+ ## 2026-03-06 (v0.4.74)
9
+
10
+ - fix: use `z.coerce.number()` for all numeric tool parameters — fixes validation errors when MCP clients pass numbers as strings (closes #16)
11
+
3
12
  ## 2026-03-05 (v0.4.73)
4
13
 
5
14
  - feat: `package_show` now includes `api_json_url` for dataset and each resource (direct CKAN API JSON link)
package/dist/index.js CHANGED
@@ -1062,16 +1062,16 @@ Typical workflow: ckan_package_search \u2192 ckan_package_show (get full metadat
1062
1062
  server_url: z2.string().url("Must be a valid URL").describe("Base URL of the CKAN server"),
1063
1063
  q: z2.string().optional().default("*:*").describe("Search query in Solr syntax"),
1064
1064
  fq: z2.string().optional().describe(`Filter query in Solr syntax; applied after scoring, does not affect relevance. CKAN extras fields use prefix 'extras_' (e.g. extras_hvd_category). For OR on same field use field:(val1 OR val2), never field:val1 OR field:val2 (silently breaks). Examples: 'organization:comune-palermo', 'res_format:CSV', 'extras_hvd_category:("uri1" OR "uri2")'.`),
1065
- rows: z2.number().int().min(0).max(1e3).optional().default(10).describe("Number of results to return"),
1066
- start: z2.number().int().min(0).optional().default(0).describe("Offset for pagination"),
1065
+ rows: z2.coerce.number().int().min(0).max(1e3).optional().default(10).describe("Number of results to return"),
1066
+ start: z2.coerce.number().int().min(0).optional().default(0).describe("Offset for pagination"),
1067
1067
  sort: z2.string().optional().describe("Sort field and direction (e.g., 'metadata_modified desc')"),
1068
1068
  facet_field: z2.array(z2.string()).optional().describe("Fields to facet on"),
1069
- facet_limit: z2.number().int().min(1).optional().default(50).describe("Maximum facet values per field"),
1070
- page: z2.number().int().min(1).optional().describe("Page number (1-based); alias for start. Overrides start if provided."),
1071
- page_size: z2.number().int().min(1).max(1e3).optional().default(10).describe("Results per page when using page (default: 10)"),
1069
+ facet_limit: z2.coerce.number().int().min(1).optional().default(50).describe("Maximum facet values per field"),
1070
+ page: z2.coerce.number().int().min(1).optional().describe("Page number (1-based); alias for start. Overrides start if provided."),
1071
+ page_size: z2.coerce.number().int().min(1).max(1e3).optional().default(10).describe("Results per page when using page (default: 10)"),
1072
1072
  include_drafts: z2.boolean().optional().default(false).describe("Include draft datasets"),
1073
1073
  content_recent: z2.boolean().optional().default(false).describe("Use issued date with fallback to metadata_created for recent content"),
1074
- content_recent_days: z2.number().int().min(1).optional().default(30).describe("Day window for content_recent (default 30)"),
1074
+ content_recent_days: z2.coerce.number().int().min(1).optional().default(30).describe("Day window for content_recent (default 30)"),
1075
1075
  query_parser: z2.enum(["default", "text"]).optional().describe("Override search parser ('text' forces text:(...) on non-fielded queries)"),
1076
1076
  response_format: ResponseFormatSchema
1077
1077
  }).strict(),
@@ -1312,12 +1312,12 @@ Typical workflow: ckan_find_relevant_datasets \u2192 ckan_package_show (inspect
1312
1312
  inputSchema: z2.object({
1313
1313
  server_url: z2.string().url().describe("Base URL of the CKAN server (e.g., https://dati.gov.it/opendata)"),
1314
1314
  query: z2.string().min(2).describe("Natural language or keyword query to match against dataset title, notes, tags, and organization"),
1315
- limit: z2.number().int().min(1).max(50).optional().default(10).describe("Number of datasets to return"),
1315
+ limit: z2.coerce.number().int().min(1).max(50).optional().default(10).describe("Number of datasets to return"),
1316
1316
  weights: z2.object({
1317
- title: z2.number().min(0).optional().describe("Weight for title match (default 4)"),
1318
- notes: z2.number().min(0).optional().describe("Weight for description match (default 2)"),
1319
- tags: z2.number().min(0).optional().describe("Weight for tag match (default 3)"),
1320
- organization: z2.number().min(0).optional().describe("Weight for organization match (default 1)")
1317
+ title: z2.coerce.number().min(0).optional().describe("Weight for title match (default 4)"),
1318
+ notes: z2.coerce.number().min(0).optional().describe("Weight for description match (default 2)"),
1319
+ tags: z2.coerce.number().min(0).optional().describe("Weight for tag match (default 3)"),
1320
+ organization: z2.coerce.number().min(0).optional().describe("Weight for organization match (default 1)")
1321
1321
  }).optional().describe("Per-field scoring weights; unspecified fields use defaults"),
1322
1322
  query_parser: z2.enum(["default", "text"]).optional().describe("Override search parser ('text' forces text:(...) on non-fielded queries)"),
1323
1323
  response_format: ResponseFormatSchema
@@ -1795,8 +1795,8 @@ Typical workflow: ckan_organization_list \u2192 ckan_organization_show (inspect
1795
1795
  server_url: z3.string().url().describe("Base URL of the CKAN server (e.g., https://dati.gov.it/opendata)"),
1796
1796
  all_fields: z3.boolean().optional().default(false).describe("Return full organization objects (true) or just name slugs (false)"),
1797
1797
  sort: z3.string().optional().default("name asc").describe("Sort field and direction (e.g., 'name asc', 'package_count desc')"),
1798
- limit: z3.number().int().min(0).optional().default(100).describe("Max organizations to return. Use 0 to get only the count via faceting"),
1799
- offset: z3.number().int().min(0).optional().default(0).describe("Pagination offset"),
1798
+ limit: z3.coerce.number().int().min(0).optional().default(100).describe("Max organizations to return. Use 0 to get only the count via faceting"),
1799
+ offset: z3.coerce.number().int().min(0).optional().default(0).describe("Pagination offset"),
1800
1800
  response_format: ResponseFormatSchema
1801
1801
  }).strict(),
1802
1802
  annotations: {
@@ -2301,8 +2301,8 @@ Typical workflow: ckan_package_search \u2192 ckan_package_show (find resource_id
2301
2301
  resource_id: z4.string().min(1).describe("UUID of the DataStore resource (from ckan_package_show resource.id where datastore_active is true)"),
2302
2302
  q: z4.string().optional().describe("Full-text search across all fields"),
2303
2303
  filters: z4.record(z4.any()).optional().describe('Key-value filters for exact matches (e.g., { "regione": "Sicilia", "anno": 2023 })'),
2304
- limit: z4.number().int().min(0).max(32e3).optional().default(100).describe("Max rows to return (default 100, max 32000); use 0 to get only column names without data"),
2305
- offset: z4.number().int().min(0).optional().default(0).describe("Pagination offset"),
2304
+ limit: z4.coerce.number().int().min(0).max(32e3).optional().default(100).describe("Max rows to return (default 100, max 32000); use 0 to get only column names without data"),
2305
+ offset: z4.coerce.number().int().min(0).optional().default(0).describe("Pagination offset"),
2306
2306
  fields: z4.array(z4.string()).optional().describe("Specific field names to return; omit to return all fields"),
2307
2307
  sort: z4.string().optional().describe("Sort expression (e.g., 'anno desc', 'nome asc')"),
2308
2308
  distinct: z4.boolean().optional().default(false).describe("Return only distinct rows"),
@@ -4215,6 +4215,132 @@ ${error instanceof Error ? error.message : String(error)}`
4215
4215
  );
4216
4216
  }
4217
4217
 
4218
+ // src/tools/portal-discovery.ts
4219
+ import { z as z11 } from "zod";
4220
+ import axios3 from "axios";
4221
+ var DATASHADES_URL = "https://datashades.info/api/portal/list";
4222
+ async function fetchPortals() {
4223
+ const resp = await axios3.get(DATASHADES_URL, {
4224
+ timeout: 15e3,
4225
+ headers: { "User-Agent": "CKAN-MCP-Server/1.0" }
4226
+ });
4227
+ return resp.data.portals;
4228
+ }
4229
+ function deduplicateByHostname(portals) {
4230
+ const seen = /* @__PURE__ */ new Map();
4231
+ for (const p of portals) {
4232
+ try {
4233
+ const hostname = new URL(p.Href).hostname;
4234
+ const existing = seen.get(hostname);
4235
+ if (!existing || p.Href.startsWith("https://")) {
4236
+ seen.set(hostname, p);
4237
+ }
4238
+ } catch {
4239
+ }
4240
+ }
4241
+ return Array.from(seen.values());
4242
+ }
4243
+ function filterPortals(portals, params) {
4244
+ const filtered = portals.filter((p) => p.status === "active" && p.Href).filter((p) => !params.country || p.Coordinates.country_name.toLowerCase().includes(params.country.toLowerCase())).filter((p) => !params.query || p.SiteInfo.site_title.toLowerCase().includes(params.query.toLowerCase())).filter((p) => params.min_datasets === void 0 || p.DatasetsNumber >= params.min_datasets).filter((p) => !params.language || p.SiteInfo.locale_default.toLowerCase().startsWith(params.language.toLowerCase())).filter((p) => !params.has_datastore || (p.Plugins || []).includes("datastore"));
4245
+ return deduplicateByHostname(filtered).sort((a, b) => b.DatasetsNumber - a.DatasetsNumber).slice(0, params.limit);
4246
+ }
4247
+ function formatMarkdown(portals, total, limit) {
4248
+ if (portals.length === 0) return "No CKAN portals found matching the given filters.";
4249
+ const rows = portals.map(
4250
+ (p) => `| [${p.SiteInfo.site_title || p.Href}](${p.Href}) | ${p.Coordinates.country_name} | ${p.Version} | ${p.DatasetsNumber.toLocaleString()} | ${p.SiteInfo.locale_default} | ${(p.Plugins || []).includes("datastore") ? "\u2705" : "\u274C"} |`
4251
+ ).join("\n");
4252
+ return `# CKAN Portals
4253
+
4254
+ **Source**: [datashades.info](https://datashades.info/portals) \u2014 live registry of ${total} active portals
4255
+ **Showing**: ${portals.length} of ${total} (filtered, sorted by dataset count)
4256
+
4257
+ | Portal | Country | CKAN | Datasets | Locale | DataStore |
4258
+ |--------|---------|------|----------|--------|-----------|
4259
+ ${rows}
4260
+
4261
+ ---
4262
+ \u{1F4A1} Use the portal URL as \`server_url\` in any CKAN tool.`;
4263
+ }
4264
+ function registerPortalDiscoveryTools(server2) {
4265
+ server2.registerTool(
4266
+ "ckan_find_portals",
4267
+ {
4268
+ title: "Find CKAN Portals",
4269
+ description: `Search the live datashades.info registry of ~950 CKAN portals worldwide.
4270
+
4271
+ Use this tool to discover which CKAN portals exist for a country, language, or topic
4272
+ before querying them with other CKAN tools.
4273
+
4274
+ **IMPORTANT \u2014 country parameter**: always pass country name in English.
4275
+ If the user writes in another language (e.g. "Italia", "Espa\xF1a", "Brasil"),
4276
+ translate to English ("Italy", "Spain", "Brazil") before calling this tool.
4277
+
4278
+ Args:
4279
+ - country (string): Country name in English (e.g. "Italy", "Brazil", "France")
4280
+ - query (string): Keyword to match against portal title (e.g. "transport", "health")
4281
+ - min_datasets (number): Minimum number of datasets (e.g. 100)
4282
+ - language (string): Portal default locale code (e.g. "it", "en", "pt_BR", "fr")
4283
+ - has_datastore (boolean): If true, return only portals with DataStore enabled (supports SQL queries)
4284
+ - limit (number): Max results to return (default 10, max 50)
4285
+
4286
+ Returns:
4287
+ Ranked list of matching portals with URL, country, CKAN version, dataset count, and DataStore status.
4288
+
4289
+ Typical workflow: ckan_find_portals (discover portal URL) \u2192 ckan_status_show (verify) \u2192 ckan_package_search (search datasets)`,
4290
+ inputSchema: z11.object({
4291
+ country: z11.string().optional().describe("Country name in English (e.g. 'Italy', 'Brazil'). Translate from any language before passing."),
4292
+ query: z11.string().optional().describe("Keyword matched against portal title (case-insensitive)"),
4293
+ min_datasets: z11.coerce.number().int().min(0).optional().describe("Minimum number of datasets"),
4294
+ language: z11.string().optional().describe("Portal default locale code (e.g. 'it', 'en', 'pt_BR')"),
4295
+ has_datastore: z11.boolean().optional().describe("If true, return only portals with DataStore plugin (required for SQL queries)"),
4296
+ limit: z11.coerce.number().int().min(1).max(50).optional().default(10).describe("Max results (default 10, max 50)")
4297
+ }).strict(),
4298
+ annotations: {
4299
+ readOnlyHint: true,
4300
+ destructiveHint: false,
4301
+ idempotentHint: true,
4302
+ openWorldHint: true
4303
+ }
4304
+ },
4305
+ async (params) => {
4306
+ try {
4307
+ const all = await fetchPortals();
4308
+ const active = all.filter((p) => p.status === "active");
4309
+ const results = filterPortals(all, {
4310
+ country: params.country,
4311
+ query: params.query,
4312
+ min_datasets: params.min_datasets,
4313
+ language: params.language,
4314
+ has_datastore: params.has_datastore,
4315
+ limit: params.limit
4316
+ });
4317
+ const markdown = formatMarkdown(results, active.length, params.limit);
4318
+ return {
4319
+ content: [{ type: "text", text: addDemoFooter(markdown) }],
4320
+ structuredContent: { portals: results.map((p) => ({
4321
+ url: p.Href,
4322
+ title: p.SiteInfo.site_title,
4323
+ country: p.Coordinates.country_name,
4324
+ version: p.Version,
4325
+ datasets: p.DatasetsNumber,
4326
+ locale: p.SiteInfo.locale_default,
4327
+ has_datastore: (p.Plugins || []).includes("datastore")
4328
+ })) }
4329
+ };
4330
+ } catch (error) {
4331
+ return {
4332
+ content: [{
4333
+ type: "text",
4334
+ text: `Could not fetch portal list from datashades.info:
4335
+ ${error instanceof Error ? error.message : String(error)}`
4336
+ }],
4337
+ isError: true
4338
+ };
4339
+ }
4340
+ }
4341
+ );
4342
+ }
4343
+
4218
4344
  // src/resources/dataset.ts
4219
4345
  import { ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
4220
4346
 
@@ -4484,7 +4610,7 @@ function registerAllResources(server2) {
4484
4610
  }
4485
4611
 
4486
4612
  // src/prompts/theme.ts
4487
- import { z as z11 } from "zod";
4613
+ import { z as z12 } from "zod";
4488
4614
 
4489
4615
  // src/prompts/types.ts
4490
4616
  var createTextPrompt = (text) => ({
@@ -4545,9 +4671,9 @@ var registerThemePrompt = (server2) => {
4545
4671
  title: "Search datasets by theme",
4546
4672
  description: "Guided prompt to discover a theme and search datasets under it.",
4547
4673
  argsSchema: {
4548
- server_url: z11.string().url().describe("Base URL of the CKAN server"),
4549
- theme: z11.string().min(1).describe("Theme or group name to search"),
4550
- rows: z11.coerce.number().int().positive().default(10).describe("Max results to return")
4674
+ server_url: z12.string().url().describe("Base URL of the CKAN server"),
4675
+ theme: z12.string().min(1).describe("Theme or group name to search"),
4676
+ rows: z12.coerce.number().int().positive().default(10).describe("Max results to return")
4551
4677
  }
4552
4678
  },
4553
4679
  async ({ server_url, theme, rows }) => createTextPrompt(buildThemePromptText(server_url, theme, rows))
@@ -4555,7 +4681,7 @@ var registerThemePrompt = (server2) => {
4555
4681
  };
4556
4682
 
4557
4683
  // src/prompts/organization.ts
4558
- import { z as z12 } from "zod";
4684
+ import { z as z13 } from "zod";
4559
4685
  var ORGANIZATION_PROMPT_NAME = "ckan-search-by-organization";
4560
4686
  var buildOrganizationPromptText = (serverUrl, organization, rows) => `# Guided search: datasets by organization
4561
4687
 
@@ -4590,9 +4716,9 @@ var registerOrganizationPrompt = (server2) => {
4590
4716
  title: "Search datasets by organization",
4591
4717
  description: "Guided prompt to find a publisher and list its datasets.",
4592
4718
  argsSchema: {
4593
- server_url: z12.string().url().describe("Base URL of the CKAN server"),
4594
- organization: z12.string().min(1).describe("Organization name or keyword"),
4595
- rows: z12.coerce.number().int().positive().default(10).describe("Max results to return")
4719
+ server_url: z13.string().url().describe("Base URL of the CKAN server"),
4720
+ organization: z13.string().min(1).describe("Organization name or keyword"),
4721
+ rows: z13.coerce.number().int().positive().default(10).describe("Max results to return")
4596
4722
  }
4597
4723
  },
4598
4724
  async ({ server_url, organization, rows }) => createTextPrompt(buildOrganizationPromptText(server_url, organization, rows))
@@ -4600,7 +4726,7 @@ var registerOrganizationPrompt = (server2) => {
4600
4726
  };
4601
4727
 
4602
4728
  // src/prompts/format.ts
4603
- import { z as z13 } from "zod";
4729
+ import { z as z14 } from "zod";
4604
4730
  var FORMAT_PROMPT_NAME = "ckan-search-by-format";
4605
4731
  var buildFormatPromptText = (serverUrl, format, rows) => `# Guided search: datasets by resource format
4606
4732
 
@@ -4624,9 +4750,9 @@ var registerFormatPrompt = (server2) => {
4624
4750
  title: "Search datasets by resource format",
4625
4751
  description: "Guided prompt to find datasets with a given resource format.",
4626
4752
  argsSchema: {
4627
- server_url: z13.string().url().describe("Base URL of the CKAN server"),
4628
- format: z13.string().min(1).describe("Resource format (e.g., CSV, JSON)"),
4629
- rows: z13.coerce.number().int().positive().default(10).describe("Max results to return")
4753
+ server_url: z14.string().url().describe("Base URL of the CKAN server"),
4754
+ format: z14.string().min(1).describe("Resource format (e.g., CSV, JSON)"),
4755
+ rows: z14.coerce.number().int().positive().default(10).describe("Max results to return")
4630
4756
  }
4631
4757
  },
4632
4758
  async ({ server_url, format, rows }) => createTextPrompt(buildFormatPromptText(server_url, format, rows))
@@ -4634,7 +4760,7 @@ var registerFormatPrompt = (server2) => {
4634
4760
  };
4635
4761
 
4636
4762
  // src/prompts/recent.ts
4637
- import { z as z14 } from "zod";
4763
+ import { z as z15 } from "zod";
4638
4764
  var RECENT_PROMPT_NAME = "ckan-recent-datasets";
4639
4765
  var buildRecentPromptText = (serverUrl, rows) => `# Guided search: recent datasets
4640
4766
 
@@ -4682,8 +4808,8 @@ var registerRecentPrompt = (server2) => {
4682
4808
  title: "Find recently updated datasets",
4683
4809
  description: "Guided prompt to list recently updated datasets on a CKAN portal.",
4684
4810
  argsSchema: {
4685
- server_url: z14.string().url().describe("Base URL of the CKAN server"),
4686
- rows: z14.coerce.number().int().positive().default(10).describe("Max results to return")
4811
+ server_url: z15.string().url().describe("Base URL of the CKAN server"),
4812
+ rows: z15.coerce.number().int().positive().default(10).describe("Max results to return")
4687
4813
  }
4688
4814
  },
4689
4815
  async ({ server_url, rows }) => createTextPrompt(buildRecentPromptText(server_url, rows))
@@ -4691,7 +4817,7 @@ var registerRecentPrompt = (server2) => {
4691
4817
  };
4692
4818
 
4693
4819
  // src/prompts/dataset-analysis.ts
4694
- import { z as z15 } from "zod";
4820
+ import { z as z16 } from "zod";
4695
4821
  var DATASET_ANALYSIS_PROMPT_NAME = "ckan-analyze-dataset";
4696
4822
  var buildDatasetAnalysisPromptText = (serverUrl, id) => `# Guided analysis: dataset
4697
4823
 
@@ -4733,8 +4859,8 @@ var registerDatasetAnalysisPrompt = (server2) => {
4733
4859
  title: "Analyze a dataset",
4734
4860
  description: "Guided prompt to inspect dataset metadata and explore DataStore tables.",
4735
4861
  argsSchema: {
4736
- server_url: z15.string().url().describe("Base URL of the CKAN server"),
4737
- id: z15.string().min(1).describe("Dataset id or name (CKAN package id)")
4862
+ server_url: z16.string().url().describe("Base URL of the CKAN server"),
4863
+ id: z16.string().min(1).describe("Dataset id or name (CKAN package id)")
4738
4864
  }
4739
4865
  },
4740
4866
  async ({ server_url, id }) => createTextPrompt(buildDatasetAnalysisPromptText(server_url, id))
@@ -4742,7 +4868,7 @@ var registerDatasetAnalysisPrompt = (server2) => {
4742
4868
  };
4743
4869
 
4744
4870
  // src/prompts/hvd.ts
4745
- import { z as z16 } from "zod";
4871
+ import { z as z17 } from "zod";
4746
4872
  var HVD_PROMPT_NAME = "ckan-search-hvd";
4747
4873
  var buildHvdPromptText = (serverUrl, rows, categoryField) => {
4748
4874
  if (!categoryField) {
@@ -4804,8 +4930,8 @@ var registerHvdPrompt = (server2) => {
4804
4930
  title: "Search High-Value Datasets (HVD)",
4805
4931
  description: "Guided prompt to find High-Value Datasets (HVD) on a CKAN portal. Automatically uses the correct filter field from portal configuration.",
4806
4932
  argsSchema: {
4807
- server_url: z16.string().url().describe("Base URL of the CKAN server"),
4808
- rows: z16.coerce.number().int().positive().default(10).describe("Max results to return")
4933
+ server_url: z17.string().url().describe("Base URL of the CKAN server"),
4934
+ rows: z17.coerce.number().int().positive().default(10).describe("Max results to return")
4809
4935
  }
4810
4936
  },
4811
4937
  async ({ server_url, rows }) => {
@@ -4829,7 +4955,7 @@ var registerAllPrompts = (server2) => {
4829
4955
  function createServer() {
4830
4956
  return new McpServer({
4831
4957
  name: "ckan-mcp-server",
4832
- version: "0.4.73"
4958
+ version: "0.4.76"
4833
4959
  });
4834
4960
  }
4835
4961
  function registerAll(server2) {
@@ -4843,6 +4969,7 @@ function registerAll(server2) {
4843
4969
  registerAnalyzeTools(server2);
4844
4970
  registerCatalogStatsTools(server2);
4845
4971
  registerSparqlTools(server2);
4972
+ registerPortalDiscoveryTools(server2);
4846
4973
  registerAllResources(server2);
4847
4974
  registerAllPrompts(server2);
4848
4975
  }