@aborruso/ckan-mcp-server 0.4.65 → 0.4.67

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LOG.md CHANGED
@@ -1,5 +1,16 @@
1
1
  # LOG
2
2
 
3
+ ## 2026-03-04 (v0.4.67)
4
+
5
+ - improve: `sparql_query` — validate SELECT-only, auto-inject LIMIT (default 25, max 1000), truncate output at CHARACTER_LIMIT; +11 tests (310 total)
6
+ - remove: `europa_dataset_search` tool and related files (`src/tools/europa.ts`, `src/utils/europa-http.ts`, Europa types in `src/types.ts`, docs, tests); tool count 19→18
7
+
8
+ ## 2026-03-04 (v0.4.66)
9
+
10
+ - feat: add `sparql_query` tool — execute SPARQL SELECT against any public HTTPS endpoint (e.g. data.europa.eu/sparql, DBpedia, Wikidata)
11
+ - feat(europa): update `europa_dataset_search` description to suggest `sparql_query` for publisher aggregations and queries not exposed as facets
12
+ - test: add 12 tests for `sparql_query` (querySparqlEndpoint, formatSparqlMarkdown, formatSparqlJson)
13
+
3
14
  ## 2026-03-04 (v0.4.65)
4
15
 
5
16
  - feat(europa): add `publisher` to `ALLOWED_FACETS` in `europa_dataset_search` — now shows top publishers per query
package/dist/index.js CHANGED
@@ -3982,301 +3982,122 @@ Typical workflow: ckan_catalog_stats (understand the portal) \u2192 ckan_package
3982
3982
  );
3983
3983
  }
3984
3984
 
3985
- // src/tools/europa.ts
3985
+ // src/tools/sparql.ts
3986
3986
  import { z as z10 } from "zod";
3987
-
3988
- // src/utils/europa-http.ts
3989
- import axios3 from "axios";
3990
- var EUROPA_SEARCH_URL = "https://data.europa.eu/api/hub/search/search";
3991
- async function makeEuropaSearchRequest(params) {
3992
- const isNode = typeof process !== "undefined" && !!process.versions?.node;
3993
- const searchParams = new URLSearchParams();
3994
- const isMatchAll = !params.q || params.q === "*" || params.q === "*:*";
3995
- if (!isMatchAll) {
3996
- searchParams.set("q", params.q);
3997
- }
3998
- searchParams.set("page", String(params.page ?? 0));
3999
- searchParams.set("limit", String(params.limit ?? 10));
4000
- searchParams.set("filters", "dataset");
4001
- if (params.facets && Object.keys(params.facets).length > 0) {
4002
- searchParams.set("facets", JSON.stringify(params.facets));
4003
- }
4004
- if (params.sort) {
4005
- searchParams.set("sort", params.sort);
4006
- }
4007
- const url = `${EUROPA_SEARCH_URL}?${searchParams.toString()}`;
4008
- let data;
4009
- if (isNode) {
4010
- const response = await axios3.get(url, {
4011
- timeout: 3e4,
4012
- headers: {
4013
- Accept: "application/json",
4014
- "User-Agent": "CKAN-MCP-Server/1.0"
4015
- }
4016
- });
4017
- data = response.data;
4018
- } else {
4019
- const controller = new AbortController();
4020
- const timeoutId = setTimeout(() => controller.abort(), 3e4);
4021
- try {
4022
- const response = await fetch(url, {
4023
- method: "GET",
4024
- signal: controller.signal,
4025
- headers: {
4026
- Accept: "application/json",
4027
- "User-Agent": "CKAN-MCP-Server/1.0"
4028
- }
4029
- });
4030
- if (!response.ok) {
4031
- throw new Error(`Europa API error (${response.status}): ${response.statusText}`);
4032
- }
4033
- data = await response.json();
4034
- } finally {
4035
- clearTimeout(timeoutId);
4036
- }
3987
+ var DEFAULT_LIMIT = 25;
3988
+ var MAX_LIMIT = 1e3;
3989
+ function validateSelectQuery(query) {
3990
+ const stripped = query.replace(/#[^\n]*/g, "");
3991
+ if (!/\bSELECT\b/i.test(stripped)) {
3992
+ throw new Error(
3993
+ "Only SELECT queries are supported (not CONSTRUCT, ASK, DESCRIBE, or write operations)."
3994
+ );
4037
3995
  }
4038
- const envelope = data;
4039
- return {
4040
- count: envelope?.result?.count ?? 0,
4041
- results: envelope?.result?.results ?? [],
4042
- rawFacets: envelope?.result?.facets ?? []
4043
- };
4044
3996
  }
4045
-
4046
- // src/tools/europa.ts
4047
- function sanitizeText(text) {
4048
- return text.replace(/[\x00-\x09\x0B\x0C\x0E-\x1F]/g, " ").replace(/[\r\n]+/g, " ");
4049
- }
4050
- function pickLang(obj, lang) {
4051
- if (!obj || typeof obj !== "object") return "";
4052
- const raw = obj[lang] || (lang !== "en" ? obj["en"] : void 0);
4053
- if (raw) return sanitizeText(raw);
4054
- const keys = Object.keys(obj);
4055
- return keys.length > 0 ? sanitizeText(obj[keys[0]]) : "";
4056
- }
4057
- function extractFormats(dataset) {
4058
- if (!Array.isArray(dataset.distributions)) return [];
4059
- const formats = /* @__PURE__ */ new Set();
4060
- for (const dist of dataset.distributions) {
4061
- const label = dist.format?.label;
4062
- if (label) formats.add(label.toUpperCase());
4063
- }
4064
- return Array.from(formats).sort();
3997
+ function injectLimit(query, limit) {
3998
+ if (/\bLIMIT\b/i.test(query)) return query;
3999
+ return `${query.trimEnd()}
4000
+ LIMIT ${limit}`;
4065
4001
  }
4066
- function extractKeywords(dataset, lang) {
4067
- if (!Array.isArray(dataset.keywords)) return [];
4068
- const kws = [];
4069
- for (const kw of dataset.keywords) {
4070
- if (kw.language === lang && kw.label) {
4071
- kws.push(kw.label);
4072
- }
4002
+ async function querySparqlEndpoint(endpointUrl, query) {
4003
+ const url = new URL(endpointUrl);
4004
+ if (url.protocol !== "https:") {
4005
+ throw new Error("Only HTTPS endpoints are allowed");
4073
4006
  }
4074
- if (kws.length === 0) {
4075
- for (const kw of dataset.keywords) {
4076
- if (kw.language === "en" && kw.label) {
4077
- kws.push(kw.label);
4078
- }
4079
- }
4007
+ const controller = new AbortController();
4008
+ const timeoutId = setTimeout(() => controller.abort(), 15e3);
4009
+ let response;
4010
+ try {
4011
+ response = await fetch(endpointUrl, {
4012
+ method: "POST",
4013
+ signal: controller.signal,
4014
+ headers: {
4015
+ "Content-Type": "application/sparql-query",
4016
+ "Accept": "application/sparql-results+json"
4017
+ },
4018
+ body: query
4019
+ });
4020
+ } finally {
4021
+ clearTimeout(timeoutId);
4080
4022
  }
4081
- if (kws.length === 0) {
4082
- for (const kw of dataset.keywords) {
4083
- if (kw.label) kws.push(kw.label);
4084
- }
4023
+ if (!response.ok) {
4024
+ throw new Error(`SPARQL endpoint error (${response.status}): ${response.statusText}`);
4085
4025
  }
4086
- return [...new Set(kws)];
4026
+ return response.json();
4087
4027
  }
4088
- function extractDistributions(dataset) {
4089
- if (!Array.isArray(dataset.distributions)) return [];
4090
- const dists = [];
4091
- for (const dist of dataset.distributions) {
4092
- const format = dist.format?.label?.toUpperCase() || "UNKNOWN";
4093
- const rawDownload = dist.download_url;
4094
- const rawAccess = dist.access_url;
4095
- const download = Array.isArray(rawDownload) ? rawDownload[0] : rawDownload;
4096
- const access = Array.isArray(rawAccess) ? rawAccess[0] : rawAccess;
4097
- const url = download || access || "";
4098
- if (url) dists.push({ format, url });
4099
- }
4100
- return dists;
4101
- }
4102
- function renderEuropaSearchMarkdown(results, count, params) {
4103
- const { q, country, lang, page, page_size } = params;
4104
- let md = `# European Data Portal \u2014 Search Results
4105
-
4106
- `;
4107
- md += `**Query**: ${q}`;
4108
- if (country && country.length > 0) {
4109
- md += ` | **Country**: ${country.map((c) => c.toUpperCase()).join(", ")}`;
4110
- }
4111
- md += ` | **Results**: ${count} (page ${page}, ${page_size} per page)
4112
-
4113
- `;
4114
- if (results.length === 0) {
4115
- md += `No datasets found matching your query.
4116
- `;
4117
- return md;
4118
- }
4119
- md += `---
4120
-
4121
- `;
4122
- results.forEach((dataset, i) => {
4123
- const title = pickLang(dataset.title, lang) || dataset.id;
4124
- const description = pickLang(dataset.description, lang);
4125
- const formats = extractFormats(dataset);
4126
- const keywords = extractKeywords(dataset, lang);
4127
- const countryLabel = dataset.country?.id?.toUpperCase() || "";
4128
- md += `### ${i + 1}. ${title}
4129
-
4130
- `;
4131
- md += `| Field | Value |
4132
- |---|---|
4133
- `;
4134
- if (countryLabel) md += `| Country | ${countryLabel} |
4135
- `;
4136
- if (dataset.issued) md += `| Issued | ${formatDate(dataset.issued)} |
4137
- `;
4138
- if (dataset.modified) md += `| Modified | ${formatDate(dataset.modified)} |
4139
- `;
4140
- if (formats.length > 0) md += `| Formats | ${formats.join(", ")} |
4141
- `;
4142
- if (keywords.length > 0) md += `| Keywords | ${keywords.slice(0, 8).join(", ")}${keywords.length > 8 ? ", ..." : ""} |
4143
- `;
4144
- if (dataset.is_hvd) md += `| HVD | Yes |
4145
- `;
4146
- md += `
4147
- `;
4148
- if (description) {
4149
- const truncated = description.length > 200 ? description.substring(0, 200) + "..." : description;
4150
- md += `${truncated}
4028
+ function formatSparqlMarkdown(data, endpointUrl) {
4029
+ const vars = data.head.vars;
4030
+ const bindings = data.results.bindings;
4031
+ let md = `# SPARQL Query Results
4151
4032
 
4152
4033
  `;
4153
- }
4154
- md += `Link: https://data.europa.eu/data/datasets/${dataset.id}
4155
-
4156
- `;
4157
- const dists = extractDistributions(dataset);
4158
- if (dists.length > 0) {
4159
- md += `**Resources** (${dists.length}):
4160
- `;
4161
- for (const d of dists.slice(0, 5)) {
4162
- md += `- ${d.format}: ${d.url}
4163
- `;
4164
- }
4165
- if (dists.length > 5) {
4166
- md += `- ... and ${dists.length - 5} more
4167
- `;
4168
- }
4169
- md += `
4034
+ md += `**Endpoint**: ${endpointUrl}
4170
4035
  `;
4171
- }
4172
- md += `---
4036
+ md += `**Rows**: ${bindings.length}
4173
4037
 
4174
4038
  `;
4175
- });
4176
- if (count > page * page_size) {
4177
- md += `> More results available \u2014 use \`page: ${page + 1}\`
4178
- `;
4179
- }
4180
- return md;
4181
- }
4182
- var MAX_FACET_ITEMS = 15;
4183
- var ALLOWED_FACETS = /* @__PURE__ */ new Set([
4184
- "country",
4185
- "categories",
4186
- "format",
4187
- "is_hvd",
4188
- "scoring",
4189
- "language",
4190
- "subject",
4191
- "hvdCategory",
4192
- "publisher"
4193
- ]);
4194
- function resolveFacetTitle(title, lang) {
4195
- if (typeof title === "string") return title;
4196
- return title[lang] || title["en"] || Object.values(title)[0] || "";
4197
- }
4198
- function resolveRawFacets(rawFacets, lang) {
4199
- const result = [];
4200
- for (const raw of rawFacets) {
4201
- if (!ALLOWED_FACETS.has(raw.id)) continue;
4202
- if (!raw.items || raw.items.length === 0) continue;
4203
- const items = raw.items.sort((a, b) => b.count - a.count).slice(0, MAX_FACET_ITEMS).map((item) => ({
4204
- id: item.id,
4205
- title: resolveFacetTitle(item.title, lang),
4206
- count: item.count
4207
- }));
4208
- result.push({ id: raw.id, title: raw.title, items });
4039
+ if (bindings.length === 0) {
4040
+ return md + "_No results_\n";
4209
4041
  }
4210
- return result;
4211
- }
4212
- function renderFacetsMarkdown(facets) {
4213
- if (!facets || facets.length === 0) return "";
4214
- let md = `## Facets
4215
-
4216
- `;
4217
- for (const facet of facets) {
4218
- if (!facet.items || facet.items.length === 0) continue;
4219
- md += `### ${facet.title || facet.id}
4220
-
4221
- `;
4222
- md += `| Value | Count |
4223
- |---|---:|
4042
+ md += `| ${vars.join(" | ")} |
4224
4043
  `;
4225
- for (const item of facet.items) {
4226
- md += `| ${item.title || item.id} | ${item.count} |
4044
+ md += `| ${vars.map(() => "---").join(" | ")} |
4227
4045
  `;
4228
- }
4229
- md += `
4046
+ for (const row of bindings) {
4047
+ const cells = vars.map((v) => {
4048
+ const val = row[v]?.value ?? "";
4049
+ return val.replace(/\|/g, "\\|");
4050
+ });
4051
+ md += `| ${cells.join(" | ")} |
4230
4052
  `;
4231
4053
  }
4232
4054
  return md;
4233
4055
  }
4234
- function facetsToCompactJson(facets) {
4235
- const result = {};
4236
- for (const facet of facets) {
4237
- if (!facet.items || facet.items.length === 0) continue;
4238
- result[facet.id] = facet.items.map((item) => ({
4239
- id: item.id,
4240
- title: item.title,
4241
- count: item.count
4242
- }));
4243
- }
4244
- return result;
4056
+ function formatSparqlJson(data) {
4057
+ const vars = data.head.vars;
4058
+ const rows = data.results.bindings.map((row) => {
4059
+ const obj = {};
4060
+ for (const v of vars) {
4061
+ obj[v] = row[v]?.value ?? "";
4062
+ }
4063
+ return obj;
4064
+ });
4065
+ return { count: rows.length, columns: vars, rows };
4245
4066
  }
4246
- function registerEuropaTools(server2) {
4067
+ function registerSparqlTools(server2) {
4247
4068
  server2.registerTool(
4248
- "europa_dataset_search",
4069
+ "sparql_query",
4249
4070
  {
4250
- title: "Search European Data Portal",
4251
- description: `Search datasets on the European Data Portal (data.europa.eu), which aggregates 1.7M+ datasets from all EU countries.
4071
+ title: "SPARQL Query",
4072
+ description: `Execute a SPARQL SELECT query against any public HTTPS SPARQL endpoint.
4073
+
4074
+ Useful for querying open data portals and knowledge graphs that expose SPARQL endpoints, including:
4075
+ - data.europa.eu (European open data portal)
4076
+ - publications.europa.eu (EU Publications Office)
4077
+ - DBpedia, Wikidata
4078
+ - Any DCAT-AP compliant data catalog
4252
4079
 
4253
- This is NOT a CKAN portal. Use this tool specifically for EU-wide open data discovery.
4080
+ Only HTTPS endpoints are allowed. Queries timeout after 15 seconds.
4081
+ Only SELECT queries are supported (read-only).
4082
+
4083
+ If the query does not contain a LIMIT clause, one is injected automatically (default: ${DEFAULT_LIMIT}, max: ${MAX_LIMIT}).
4254
4084
 
4255
4085
  Args:
4256
- - q (string): Search query
4257
- - country (string[]): ISO 3166-1 alpha-2 country codes (e.g., ["IT", "DE"])
4258
- - is_hvd (boolean): Filter only High Value Datasets
4259
- - lang (string): Language for multilingual fields (default "en")
4260
- - sort (enum): Sort by "relevance", "issued", "modified", "title"
4261
- - order (enum): Sort direction "asc" or "desc" (default "desc")
4262
- - page (number): Page number, 1-based (default 1)
4263
- - page_size (number): Results per page (default 10, max 50)
4264
- - response_format: "markdown" or "json" (JSON is compact: description truncated, max 3 distributions)
4086
+ - endpoint_url (string): HTTPS URL of the SPARQL endpoint
4087
+ - query (string): SPARQL SELECT query to execute
4088
+ - limit (number): Max rows to return (default: ${DEFAULT_LIMIT}). Ignored if query already contains LIMIT.
4089
+ - response_format ('markdown' | 'json'): Output format
4265
4090
 
4266
4091
  Examples:
4267
- - { q: "environment", country: ["IT"], page_size: 5 }
4268
- - { q: "transport", sort: "modified", order: "desc" }
4269
- - { q: "health data", lang: "it" }
4270
- - { q: "transport", is_hvd: true }`,
4092
+ - Count Italian HVD datasets by publisher on data.europa.eu
4093
+ - Query Wikidata for entities related to a dataset topic
4094
+ - Explore EU controlled vocabularies on publications.europa.eu
4095
+
4096
+ Typical workflow: sparql_query (explore schema) \u2192 sparql_query (targeted query) \u2192 ckan_package_search (get dataset details)`,
4271
4097
  inputSchema: z10.object({
4272
- q: z10.string().min(1).describe("Search query"),
4273
- country: z10.array(z10.string().length(2)).optional().describe("ISO 3166-1 alpha-2 country codes"),
4274
- is_hvd: z10.boolean().optional().describe("Filter only High Value Datasets (HVD)"),
4275
- lang: z10.string().optional().default("en").describe("Language for multilingual fields"),
4276
- sort: z10.enum(["relevance", "issued", "modified", "title"]).optional().describe("Sort field"),
4277
- order: z10.enum(["asc", "desc"]).optional().default("desc").describe("Sort direction"),
4278
- page: z10.number().int().min(1).optional().default(1).describe("Page number (1-based)"),
4279
- page_size: z10.number().int().min(1).max(50).optional().default(10).describe("Results per page"),
4098
+ endpoint_url: z10.string().url().describe("HTTPS URL of the SPARQL endpoint"),
4099
+ query: z10.string().min(1).describe("SPARQL SELECT query to execute"),
4100
+ limit: z10.number().int().min(1).max(MAX_LIMIT).default(DEFAULT_LIMIT).describe(`Max rows to return (default: ${DEFAULT_LIMIT}, max: ${MAX_LIMIT}). Injected as SPARQL LIMIT if not already present in query.`),
4280
4101
  response_format: ResponseFormatSchema
4281
4102
  }).strict(),
4282
4103
  annotations: {
@@ -4288,67 +4109,31 @@ Examples:
4288
4109
  },
4289
4110
  async (params) => {
4290
4111
  try {
4291
- const filterFacets = {};
4292
- if (params.country && params.country.length > 0) {
4293
- filterFacets.country = params.country.map((c) => c.toLowerCase());
4294
- }
4295
- if (params.is_hvd) {
4296
- filterFacets.is_hvd = ["true"];
4297
- }
4298
- let sortParam;
4299
- if (params.sort && params.sort !== "relevance") {
4300
- sortParam = `${params.sort}+${params.order}`;
4301
- }
4302
- const { count, results, rawFacets } = await makeEuropaSearchRequest({
4303
- q: params.q,
4304
- page: params.page - 1,
4305
- limit: params.page_size,
4306
- facets: Object.keys(filterFacets).length > 0 ? filterFacets : void 0,
4307
- sort: sortParam
4308
- });
4309
- const facets = resolveRawFacets(rawFacets, params.lang);
4112
+ validateSelectQuery(params.query);
4113
+ const limitedQuery = injectLimit(params.query, params.limit);
4114
+ const data = await querySparqlEndpoint(params.endpoint_url, limitedQuery);
4310
4115
  if (params.response_format === "json" /* JSON */) {
4311
- const filtered = results.map((d) => {
4312
- const desc = pickLang(d.description, params.lang);
4313
- const dists = extractDistributions(d);
4314
- return {
4315
- id: d.id,
4316
- title: pickLang(d.title, params.lang),
4317
- description: desc.length > 200 ? desc.substring(0, 200) + "..." : desc,
4318
- issued: d.issued ?? null,
4319
- modified: d.modified ?? null,
4320
- country: d.country?.id?.toUpperCase() || null,
4321
- formats: extractFormats(d),
4322
- keywords: extractKeywords(d, params.lang).slice(0, 5),
4323
- is_hvd: d.is_hvd ?? false,
4324
- link: `https://data.europa.eu/data/datasets/${d.id}`,
4325
- distributions: dists.slice(0, 3)
4326
- };
4327
- });
4328
- const compactFacets = facetsToCompactJson(facets);
4116
+ const result = formatSparqlJson(data);
4117
+ let text2 = JSON.stringify(result, null, 2);
4118
+ if (text2.length > CHARACTER_LIMIT) {
4119
+ text2 = text2.slice(0, CHARACTER_LIMIT) + "\n/* output truncated */";
4120
+ }
4329
4121
  return {
4330
- content: [{
4331
- type: "text",
4332
- text: truncateText(JSON.stringify({ count, results: filtered, facets: compactFacets }, null, 2))
4333
- }]
4122
+ content: [{ type: "text", text: text2 }],
4123
+ structuredContent: result
4334
4124
  };
4335
4125
  }
4336
- let markdown = renderEuropaSearchMarkdown(results, count, {
4337
- q: params.q,
4338
- country: params.country,
4339
- lang: params.lang,
4340
- page: params.page,
4341
- page_size: params.page_size
4342
- });
4343
- markdown += renderFacetsMarkdown(facets);
4344
- return {
4345
- content: [{ type: "text", text: truncateText(addDemoFooter(markdown)) }]
4346
- };
4126
+ let text = formatSparqlMarkdown(data, params.endpoint_url);
4127
+ if (text.length > CHARACTER_LIMIT) {
4128
+ text = text.slice(0, CHARACTER_LIMIT) + "\n\n_Output truncated._";
4129
+ }
4130
+ return { content: [{ type: "text", text }] };
4347
4131
  } catch (error) {
4348
4132
  return {
4349
4133
  content: [{
4350
4134
  type: "text",
4351
- text: `Error searching European Data Portal: ${error instanceof Error ? error.message : String(error)}`
4135
+ text: `SPARQL query failed:
4136
+ ${error instanceof Error ? error.message : String(error)}`
4352
4137
  }],
4353
4138
  isError: true
4354
4139
  };
@@ -4896,7 +4681,7 @@ var registerAllPrompts = (server2) => {
4896
4681
  function createServer() {
4897
4682
  return new McpServer({
4898
4683
  name: "ckan-mcp-server",
4899
- version: "0.4.63"
4684
+ version: "0.4.67"
4900
4685
  });
4901
4686
  }
4902
4687
  function registerAll(server2) {
@@ -4909,7 +4694,7 @@ function registerAll(server2) {
4909
4694
  registerQualityTools(server2);
4910
4695
  registerAnalyzeTools(server2);
4911
4696
  registerCatalogStatsTools(server2);
4912
- registerEuropaTools(server2);
4697
+ registerSparqlTools(server2);
4913
4698
  registerAllResources(server2);
4914
4699
  registerAllPrompts(server2);
4915
4700
  }