@aborruso/ckan-mcp-server 0.4.64 → 0.4.66

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LOG.md CHANGED
@@ -1,5 +1,15 @@
1
1
  # LOG
2
2
 
3
+ ## 2026-03-04 (v0.4.66)
4
+
5
+ - feat: add `sparql_query` tool — execute SPARQL SELECT against any public HTTPS endpoint (e.g. data.europa.eu/sparql, DBpedia, Wikidata)
6
+ - feat(europa): update `europa_dataset_search` description to suggest `sparql_query` for publisher aggregations and queries not exposed as facets
7
+ - test: add 12 tests for `sparql_query` (querySparqlEndpoint, formatSparqlMarkdown, formatSparqlJson)
8
+
9
+ ## 2026-03-04 (v0.4.65)
10
+
11
+ - feat(europa): add `publisher` to `ALLOWED_FACETS` in `europa_dataset_search` — now shows top publishers per query
12
+
3
13
  ## 2026-03-04 (v0.4.64)
4
14
 
5
15
  - fix(europa): `q=*` now correctly returns all 1.7M+ datasets — Europa API ignores `q` when omitted; sending `q=*` was causing Elasticsearch to return only ~6k results; match-all queries (`*`, `*:*`) now omit the `q` parameter
package/dist/index.js CHANGED
@@ -4188,7 +4188,8 @@ var ALLOWED_FACETS = /* @__PURE__ */ new Set([
4188
4188
  "scoring",
4189
4189
  "language",
4190
4190
  "subject",
4191
- "hvdCategory"
4191
+ "hvdCategory",
4192
+ "publisher"
4192
4193
  ]);
4193
4194
  function resolveFacetTitle(title, lang) {
4194
4195
  if (typeof title === "string") return title;
@@ -4262,6 +4263,15 @@ Args:
4262
4263
  - page_size (number): Results per page (default 10, max 50)
4263
4264
  - response_format: "markdown" or "json" (JSON is compact: description truncated, max 3 distributions)
4264
4265
 
4266
+ Available facets (fixed \u2014 returned automatically): country, categories, format, language, is_hvd, hvdCategory, scoring.
4267
+ Publisher/organization breakdown is NOT available as a facet here. Use sparql_query on https://data.europa.eu/sparql for that.
4268
+
4269
+ When to use sparql_query instead of this tool:
4270
+ - Count or list datasets grouped by publisher/organization
4271
+ - Query any property not exposed as a facet (e.g., license URI, spatial coverage, themes)
4272
+ - Cross-join datasets with EU controlled vocabularies (publications.europa.eu)
4273
+ - Any aggregation or filter not supported by the search API
4274
+
4265
4275
  Examples:
4266
4276
  - { q: "environment", country: ["IT"], page_size: 5 }
4267
4277
  - { q: "transport", sort: "modified", order: "desc" }
@@ -4356,6 +4366,138 @@ Examples:
4356
4366
  );
4357
4367
  }
4358
4368
 
4369
+ // src/tools/sparql.ts
4370
+ import { z as z11 } from "zod";
4371
+ async function querySparqlEndpoint(endpointUrl, query) {
4372
+ const url = new URL(endpointUrl);
4373
+ if (url.protocol !== "https:") {
4374
+ throw new Error("Only HTTPS endpoints are allowed");
4375
+ }
4376
+ const controller = new AbortController();
4377
+ const timeoutId = setTimeout(() => controller.abort(), 15e3);
4378
+ let response;
4379
+ try {
4380
+ response = await fetch(endpointUrl, {
4381
+ method: "POST",
4382
+ signal: controller.signal,
4383
+ headers: {
4384
+ "Content-Type": "application/sparql-query",
4385
+ "Accept": "application/sparql-results+json"
4386
+ },
4387
+ body: query
4388
+ });
4389
+ } finally {
4390
+ clearTimeout(timeoutId);
4391
+ }
4392
+ if (!response.ok) {
4393
+ throw new Error(`SPARQL endpoint error (${response.status}): ${response.statusText}`);
4394
+ }
4395
+ return response.json();
4396
+ }
4397
+ function formatSparqlMarkdown(data, endpointUrl) {
4398
+ const vars = data.head.vars;
4399
+ const bindings = data.results.bindings;
4400
+ let md = `# SPARQL Query Results
4401
+
4402
+ `;
4403
+ md += `**Endpoint**: ${endpointUrl}
4404
+ `;
4405
+ md += `**Rows**: ${bindings.length}
4406
+
4407
+ `;
4408
+ if (bindings.length === 0) {
4409
+ return md + "_No results_\n";
4410
+ }
4411
+ md += `| ${vars.join(" | ")} |
4412
+ `;
4413
+ md += `| ${vars.map(() => "---").join(" | ")} |
4414
+ `;
4415
+ for (const row of bindings) {
4416
+ const cells = vars.map((v) => {
4417
+ const val = row[v]?.value ?? "";
4418
+ return val.replace(/\|/g, "\\|");
4419
+ });
4420
+ md += `| ${cells.join(" | ")} |
4421
+ `;
4422
+ }
4423
+ return md;
4424
+ }
4425
+ function formatSparqlJson(data) {
4426
+ const vars = data.head.vars;
4427
+ const rows = data.results.bindings.map((row) => {
4428
+ const obj = {};
4429
+ for (const v of vars) {
4430
+ obj[v] = row[v]?.value ?? "";
4431
+ }
4432
+ return obj;
4433
+ });
4434
+ return { count: rows.length, columns: vars, rows };
4435
+ }
4436
+ function registerSparqlTools(server2) {
4437
+ server2.registerTool(
4438
+ "sparql_query",
4439
+ {
4440
+ title: "SPARQL Query",
4441
+ description: `Execute a SPARQL SELECT query against any public HTTPS SPARQL endpoint.
4442
+
4443
+ Useful for querying open data portals and knowledge graphs that expose SPARQL endpoints, including:
4444
+ - data.europa.eu (European open data portal)
4445
+ - publications.europa.eu (EU Publications Office)
4446
+ - DBpedia, Wikidata
4447
+ - Any DCAT-AP compliant data catalog
4448
+
4449
+ Only HTTPS endpoints are allowed. Queries timeout after 15 seconds.
4450
+ Only SELECT queries are supported (read-only).
4451
+
4452
+ Args:
4453
+ - endpoint_url (string): HTTPS URL of the SPARQL endpoint
4454
+ - query (string): SPARQL SELECT query to execute
4455
+ - response_format ('markdown' | 'json'): Output format
4456
+
4457
+ Examples:
4458
+ - Count Italian HVD datasets by publisher on data.europa.eu
4459
+ - Query Wikidata for entities related to a dataset topic
4460
+ - Explore EU controlled vocabularies on publications.europa.eu
4461
+
4462
+ Typical workflow: sparql_query (explore schema) \u2192 sparql_query (targeted query) \u2192 ckan_package_search (get dataset details)`,
4463
+ inputSchema: z11.object({
4464
+ endpoint_url: z11.string().url().describe("HTTPS URL of the SPARQL endpoint"),
4465
+ query: z11.string().min(1).describe("SPARQL SELECT query to execute"),
4466
+ response_format: ResponseFormatSchema
4467
+ }).strict(),
4468
+ annotations: {
4469
+ readOnlyHint: true,
4470
+ destructiveHint: false,
4471
+ idempotentHint: true,
4472
+ openWorldHint: true
4473
+ }
4474
+ },
4475
+ async (params) => {
4476
+ try {
4477
+ const data = await querySparqlEndpoint(params.endpoint_url, params.query);
4478
+ if (params.response_format === "json" /* JSON */) {
4479
+ return {
4480
+ content: [{ type: "text", text: JSON.stringify(formatSparqlJson(data), null, 2) }],
4481
+ structuredContent: formatSparqlJson(data)
4482
+ };
4483
+ }
4484
+ return {
4485
+ content: [{ type: "text", text: formatSparqlMarkdown(data, params.endpoint_url) }]
4486
+ };
4487
+ } catch (error) {
4488
+ return {
4489
+ content: [{
4490
+ type: "text",
4491
+ text: `SPARQL query failed:
4492
+ ${error instanceof Error ? error.message : String(error)}`
4493
+ }],
4494
+ isError: true
4495
+ };
4496
+ }
4497
+ }
4498
+ );
4499
+ }
4500
+
4359
4501
  // src/resources/dataset.ts
4360
4502
  import { ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
4361
4503
 
@@ -4625,7 +4767,7 @@ function registerAllResources(server2) {
4625
4767
  }
4626
4768
 
4627
4769
  // src/prompts/theme.ts
4628
- import { z as z11 } from "zod";
4770
+ import { z as z12 } from "zod";
4629
4771
 
4630
4772
  // src/prompts/types.ts
4631
4773
  var createTextPrompt = (text) => ({
@@ -4686,9 +4828,9 @@ var registerThemePrompt = (server2) => {
4686
4828
  title: "Search datasets by theme",
4687
4829
  description: "Guided prompt to discover a theme and search datasets under it.",
4688
4830
  argsSchema: {
4689
- server_url: z11.string().url().describe("Base URL of the CKAN server"),
4690
- theme: z11.string().min(1).describe("Theme or group name to search"),
4691
- rows: z11.coerce.number().int().positive().default(10).describe("Max results to return")
4831
+ server_url: z12.string().url().describe("Base URL of the CKAN server"),
4832
+ theme: z12.string().min(1).describe("Theme or group name to search"),
4833
+ rows: z12.coerce.number().int().positive().default(10).describe("Max results to return")
4692
4834
  }
4693
4835
  },
4694
4836
  async ({ server_url, theme, rows }) => createTextPrompt(buildThemePromptText(server_url, theme, rows))
@@ -4696,7 +4838,7 @@ var registerThemePrompt = (server2) => {
4696
4838
  };
4697
4839
 
4698
4840
  // src/prompts/organization.ts
4699
- import { z as z12 } from "zod";
4841
+ import { z as z13 } from "zod";
4700
4842
  var ORGANIZATION_PROMPT_NAME = "ckan-search-by-organization";
4701
4843
  var buildOrganizationPromptText = (serverUrl, organization, rows) => `# Guided search: datasets by organization
4702
4844
 
@@ -4731,9 +4873,9 @@ var registerOrganizationPrompt = (server2) => {
4731
4873
  title: "Search datasets by organization",
4732
4874
  description: "Guided prompt to find a publisher and list its datasets.",
4733
4875
  argsSchema: {
4734
- server_url: z12.string().url().describe("Base URL of the CKAN server"),
4735
- organization: z12.string().min(1).describe("Organization name or keyword"),
4736
- rows: z12.coerce.number().int().positive().default(10).describe("Max results to return")
4876
+ server_url: z13.string().url().describe("Base URL of the CKAN server"),
4877
+ organization: z13.string().min(1).describe("Organization name or keyword"),
4878
+ rows: z13.coerce.number().int().positive().default(10).describe("Max results to return")
4737
4879
  }
4738
4880
  },
4739
4881
  async ({ server_url, organization, rows }) => createTextPrompt(buildOrganizationPromptText(server_url, organization, rows))
@@ -4741,7 +4883,7 @@ var registerOrganizationPrompt = (server2) => {
4741
4883
  };
4742
4884
 
4743
4885
  // src/prompts/format.ts
4744
- import { z as z13 } from "zod";
4886
+ import { z as z14 } from "zod";
4745
4887
  var FORMAT_PROMPT_NAME = "ckan-search-by-format";
4746
4888
  var buildFormatPromptText = (serverUrl, format, rows) => `# Guided search: datasets by resource format
4747
4889
 
@@ -4765,9 +4907,9 @@ var registerFormatPrompt = (server2) => {
4765
4907
  title: "Search datasets by resource format",
4766
4908
  description: "Guided prompt to find datasets with a given resource format.",
4767
4909
  argsSchema: {
4768
- server_url: z13.string().url().describe("Base URL of the CKAN server"),
4769
- format: z13.string().min(1).describe("Resource format (e.g., CSV, JSON)"),
4770
- rows: z13.coerce.number().int().positive().default(10).describe("Max results to return")
4910
+ server_url: z14.string().url().describe("Base URL of the CKAN server"),
4911
+ format: z14.string().min(1).describe("Resource format (e.g., CSV, JSON)"),
4912
+ rows: z14.coerce.number().int().positive().default(10).describe("Max results to return")
4771
4913
  }
4772
4914
  },
4773
4915
  async ({ server_url, format, rows }) => createTextPrompt(buildFormatPromptText(server_url, format, rows))
@@ -4775,7 +4917,7 @@ var registerFormatPrompt = (server2) => {
4775
4917
  };
4776
4918
 
4777
4919
  // src/prompts/recent.ts
4778
- import { z as z14 } from "zod";
4920
+ import { z as z15 } from "zod";
4779
4921
  var RECENT_PROMPT_NAME = "ckan-recent-datasets";
4780
4922
  var buildRecentPromptText = (serverUrl, rows) => `# Guided search: recent datasets
4781
4923
 
@@ -4823,8 +4965,8 @@ var registerRecentPrompt = (server2) => {
4823
4965
  title: "Find recently updated datasets",
4824
4966
  description: "Guided prompt to list recently updated datasets on a CKAN portal.",
4825
4967
  argsSchema: {
4826
- server_url: z14.string().url().describe("Base URL of the CKAN server"),
4827
- rows: z14.coerce.number().int().positive().default(10).describe("Max results to return")
4968
+ server_url: z15.string().url().describe("Base URL of the CKAN server"),
4969
+ rows: z15.coerce.number().int().positive().default(10).describe("Max results to return")
4828
4970
  }
4829
4971
  },
4830
4972
  async ({ server_url, rows }) => createTextPrompt(buildRecentPromptText(server_url, rows))
@@ -4832,7 +4974,7 @@ var registerRecentPrompt = (server2) => {
4832
4974
  };
4833
4975
 
4834
4976
  // src/prompts/dataset-analysis.ts
4835
- import { z as z15 } from "zod";
4977
+ import { z as z16 } from "zod";
4836
4978
  var DATASET_ANALYSIS_PROMPT_NAME = "ckan-analyze-dataset";
4837
4979
  var buildDatasetAnalysisPromptText = (serverUrl, id) => `# Guided analysis: dataset
4838
4980
 
@@ -4874,8 +5016,8 @@ var registerDatasetAnalysisPrompt = (server2) => {
4874
5016
  title: "Analyze a dataset",
4875
5017
  description: "Guided prompt to inspect dataset metadata and explore DataStore tables.",
4876
5018
  argsSchema: {
4877
- server_url: z15.string().url().describe("Base URL of the CKAN server"),
4878
- id: z15.string().min(1).describe("Dataset id or name (CKAN package id)")
5019
+ server_url: z16.string().url().describe("Base URL of the CKAN server"),
5020
+ id: z16.string().min(1).describe("Dataset id or name (CKAN package id)")
4879
5021
  }
4880
5022
  },
4881
5023
  async ({ server_url, id }) => createTextPrompt(buildDatasetAnalysisPromptText(server_url, id))
@@ -4895,7 +5037,7 @@ var registerAllPrompts = (server2) => {
4895
5037
  function createServer() {
4896
5038
  return new McpServer({
4897
5039
  name: "ckan-mcp-server",
4898
- version: "0.4.63"
5040
+ version: "0.4.66"
4899
5041
  });
4900
5042
  }
4901
5043
  function registerAll(server2) {
@@ -4909,6 +5051,7 @@ function registerAll(server2) {
4909
5051
  registerAnalyzeTools(server2);
4910
5052
  registerCatalogStatsTools(server2);
4911
5053
  registerEuropaTools(server2);
5054
+ registerSparqlTools(server2);
4912
5055
  registerAllResources(server2);
4913
5056
  registerAllPrompts(server2);
4914
5057
  }