mcp-chilegob-dataset 0.2.1 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -4
- package/dist/src/ckan.js +211 -0
- package/dist/{server.js → src/server.js} +8 -1
- package/dist/{stdio.js → src/stdio.js} +1 -0
- package/dist/src/tools/organizations.js +31 -0
- package/dist/{tools → src/tools}/resource.js +9 -11
- package/dist/src/tools/schema.js +24 -0
- package/dist/{tools → src/tools}/search.js +2 -2
- package/dist/vitest.config.js +6 -0
- package/package.json +5 -4
- package/src/__tests__/ckan.test.ts +122 -0
- package/src/ckan.ts +157 -18
- package/src/server.ts +9 -1
- package/src/stdio.ts +1 -0
- package/src/tools/organizations.ts +36 -0
- package/src/tools/resource.ts +10 -12
- package/src/tools/schema.ts +30 -0
- package/src/tools/search.ts +2 -2
- package/dist/ckan.js +0 -97
- /package/dist/{index.js → src/index.js} +0 -0
- /package/dist/{tools → src/tools}/dataset.js +0 -0
package/README.md
CHANGED
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
# mcp-chilegob-dataset
|
|
2
2
|
|
|
3
|
+
[](https://www.npmjs.com/package/mcp-chilegob-dataset)
|
|
4
|
+
[](./LICENSE)
|
|
5
|
+
|
|
6
|
+
> **English summary:** MCP server that exposes Chile's open government data portal ([datos.gob.cl](https://datos.gob.cl)) as tools for AI assistants. Search and read thousands of public datasets from Chilean government institutions — health, education, transport, environment, and more. No API key required.
|
|
7
|
+
|
|
3
8
|
Servidor MCP que expone el portal de datos abiertos del gobierno de Chile — [datos.gob.cl](https://datos.gob.cl) — como herramientas para asistentes de inteligencia artificial.
|
|
4
9
|
|
|
5
10
|
Construido con [Hono](https://hono.dev) y el [SDK de TypeScript del Model Context Protocol](https://github.com/modelcontextprotocol/typescript-sdk).
|
|
@@ -18,7 +23,17 @@ El **Model Context Protocol (MCP)** es un estándar abierto que permite a los as
|
|
|
18
23
|
|
|
19
24
|
## Instalación y uso rápido
|
|
20
25
|
|
|
21
|
-
### Con Claude
|
|
26
|
+
### Con Claude Code (un comando)
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
claude mcp add chilegob -- npx -y mcp-chilegob-dataset
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Eso es todo. El servidor queda registrado globalmente en tu Claude Code. Reinicia la sesión y ya puedes usarlo.
|
|
33
|
+
|
|
34
|
+
---
|
|
35
|
+
|
|
36
|
+
### Con Claude Desktop
|
|
22
37
|
|
|
23
38
|
**Paso 1** — Abre la configuración de Claude Desktop.
|
|
24
39
|
|
|
@@ -180,7 +195,7 @@ Lee filas tabulares de un recurso CKAN. Intenta primero el datastore y, si no es
|
|
|
180
195
|
"parseable": false,
|
|
181
196
|
"format": "XLS",
|
|
182
197
|
"url": "https://datosabiertos.mineduc.cl/archivo.xls",
|
|
183
|
-
"message": "This resource is a XLS file and cannot be parsed automatically. Download it directly from the URL
|
|
198
|
+
"message": "This resource is a XLS file and cannot be parsed automatically. Download it directly from the URL provided."
|
|
184
199
|
}
|
|
185
200
|
```
|
|
186
201
|
|
|
@@ -279,7 +294,8 @@ export function registerTuHerramienta(server: McpServer): void {
|
|
|
279
294
|
- **Disponibilidad del datastore** — No todos los recursos tienen datastore habilitado en CKAN. `get_resource_data` intenta automáticamente descargar el archivo (CSV, TSV, JSON); los formatos binarios (XLS, PDF) requieren descarga manual desde la URL devuelta.
|
|
280
295
|
- **Archivos grandes** — La descarga directa carga el archivo completo en memoria antes de paginar. Para archivos muy grandes (>100 MB) esto puede ser lento o fallar.
|
|
281
296
|
- **Encoding** — Los archivos CSV de datos.gob.cl pueden estar en ISO-8859-1 (Latin-1). La herramienta intenta leerlos como UTF-8; si los caracteres aparecen corruptos, descarga el archivo directamente.
|
|
282
|
-
- **
|
|
297
|
+
- **Caché en memoria (5 min)** — `search_datasets` y `get_dataset` usan caché en memoria con TTL de 5 minutos. `get_resource_data` siempre consulta en vivo. No hay límites de tasa documentados en datos.gob.cl.
|
|
298
|
+
- **Timeout de red (10s)** — Todas las solicitudes a datos.gob.cl tienen un timeout de 10 segundos. Si el portal está lento o caído, las herramientas devuelven un error claro en lugar de colgar indefinidamente.
|
|
283
299
|
- **Paquetes en alpha** — `@modelcontextprotocol/hono` y `@modelcontextprotocol/server` están en versión alpha.
|
|
284
300
|
|
|
285
301
|
---
|
|
@@ -290,7 +306,6 @@ Las contribuciones son bienvenidas. Algunas ideas:
|
|
|
290
306
|
|
|
291
307
|
- [ ] Herramienta `list_organizations` — listar instituciones disponibles
|
|
292
308
|
- [ ] Herramienta `get_resource_schema` — tipos y descripciones de columnas
|
|
293
|
-
- [ ] Caché en memoria para reducir llamadas a la API
|
|
294
309
|
- [ ] MCP Resources con URI templates (`datos-gob-cl://dataset/{id}`)
|
|
295
310
|
|
|
296
311
|
Por favor, abre un issue antes de enviar un PR grande.
|
package/dist/src/ckan.js
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
const CKAN_BASE = 'https://datos.gob.cl/api/3/action';
|
|
2
|
+
const FETCH_TIMEOUT_MS = 10_000;
|
|
3
|
+
const CACHE_TTL_MS = 5 * 60 * 1000;
|
|
4
|
+
class TTLCache {
|
|
5
|
+
store = new Map();
|
|
6
|
+
get(key) {
|
|
7
|
+
const entry = this.store.get(key);
|
|
8
|
+
if (entry === undefined)
|
|
9
|
+
return undefined;
|
|
10
|
+
if (Date.now() > entry.expiresAt) {
|
|
11
|
+
this.store.delete(key);
|
|
12
|
+
return undefined;
|
|
13
|
+
}
|
|
14
|
+
return entry.value;
|
|
15
|
+
}
|
|
16
|
+
set(key, value, ttlMs) {
|
|
17
|
+
this.store.set(key, { value, expiresAt: Date.now() + ttlMs });
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
export class NotParseableError extends Error {
|
|
21
|
+
format;
|
|
22
|
+
url;
|
|
23
|
+
constructor(format, url) {
|
|
24
|
+
super(`Format not parseable: ${format} (${url})`);
|
|
25
|
+
this.format = format;
|
|
26
|
+
this.url = url;
|
|
27
|
+
this.name = 'NotParseableError';
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
export class CkanHttpError extends Error {
|
|
31
|
+
statusCode;
|
|
32
|
+
statusText;
|
|
33
|
+
constructor(statusCode, statusText) {
|
|
34
|
+
super(`CKAN HTTP error: ${statusCode} ${statusText}`);
|
|
35
|
+
this.statusCode = statusCode;
|
|
36
|
+
this.statusText = statusText;
|
|
37
|
+
this.name = 'CkanHttpError';
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
export class CkanApiError extends Error {
|
|
41
|
+
errorType;
|
|
42
|
+
constructor(message, errorType) {
|
|
43
|
+
super(`CKAN API error: ${message}`);
|
|
44
|
+
this.errorType = errorType;
|
|
45
|
+
this.name = 'CkanApiError';
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
async function ckanAction(action, params) {
|
|
49
|
+
const url = new URL(`${CKAN_BASE}/${action}`);
|
|
50
|
+
for (const [key, value] of Object.entries(params)) {
|
|
51
|
+
url.searchParams.set(key, String(value));
|
|
52
|
+
}
|
|
53
|
+
const controller = new AbortController();
|
|
54
|
+
const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
|
|
55
|
+
try {
|
|
56
|
+
const response = await fetch(url.toString(), { signal: controller.signal });
|
|
57
|
+
if (!response.ok) {
|
|
58
|
+
throw new CkanHttpError(response.status, response.statusText);
|
|
59
|
+
}
|
|
60
|
+
const data = await response.json();
|
|
61
|
+
if (!data.success) {
|
|
62
|
+
const errorType = data.error?.__type ?? 'Unknown Error';
|
|
63
|
+
const message = data.error?.message ?? 'Unknown error';
|
|
64
|
+
throw new CkanApiError(message, errorType);
|
|
65
|
+
}
|
|
66
|
+
return data.result;
|
|
67
|
+
}
|
|
68
|
+
catch (err) {
|
|
69
|
+
if (err instanceof Error && err.name === 'AbortError') {
|
|
70
|
+
throw new Error(`Request timed out after ${FETCH_TIMEOUT_MS / 1000}s`);
|
|
71
|
+
}
|
|
72
|
+
throw err;
|
|
73
|
+
}
|
|
74
|
+
finally {
|
|
75
|
+
clearTimeout(timer);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
const searchCache = new TTLCache();
|
|
79
|
+
const datasetCache = new TTLCache();
|
|
80
|
+
export async function searchDatasets(query, limit = 10) {
|
|
81
|
+
const key = `search:${query}:${limit}`;
|
|
82
|
+
const cached = searchCache.get(key);
|
|
83
|
+
if (cached !== undefined)
|
|
84
|
+
return cached;
|
|
85
|
+
const result = await ckanAction('package_search', { q: query, rows: limit });
|
|
86
|
+
const value = { total: result.count, results: result.results };
|
|
87
|
+
searchCache.set(key, value, CACHE_TTL_MS);
|
|
88
|
+
return value;
|
|
89
|
+
}
|
|
90
|
+
export async function getDataset(id) {
|
|
91
|
+
const key = `dataset:${id}`;
|
|
92
|
+
const cached = datasetCache.get(key);
|
|
93
|
+
if (cached !== undefined)
|
|
94
|
+
return cached;
|
|
95
|
+
const dataset = await ckanAction('package_show', { id });
|
|
96
|
+
datasetCache.set(key, dataset, CACHE_TTL_MS);
|
|
97
|
+
return dataset;
|
|
98
|
+
}
|
|
99
|
+
export async function getResourceData(resourceId, limit = 50, offset = 0) {
|
|
100
|
+
return ckanAction('datastore_search', {
|
|
101
|
+
resource_id: resourceId,
|
|
102
|
+
limit,
|
|
103
|
+
offset,
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
export async function getResource(resourceId) {
|
|
107
|
+
return ckanAction('resource_show', { id: resourceId });
|
|
108
|
+
}
|
|
109
|
+
export async function listOrganizations() {
|
|
110
|
+
return ckanAction('organization_list', { all_fields: true });
|
|
111
|
+
}
|
|
112
|
+
export async function getResourceSchema(resourceId) {
|
|
113
|
+
const result = await ckanAction('datastore_info', { id: resourceId });
|
|
114
|
+
return result.fields
|
|
115
|
+
.filter(field => field.id !== '_id')
|
|
116
|
+
.map(field => ({
|
|
117
|
+
id: field.id,
|
|
118
|
+
type: field.type,
|
|
119
|
+
label: field.info?.label ?? null,
|
|
120
|
+
description: field.info?.notes ?? null,
|
|
121
|
+
}));
|
|
122
|
+
}
|
|
123
|
+
const PARSEABLE_FORMATS = new Set(['CSV', 'TSV', 'JSON']);
|
|
124
|
+
function decodeText(buffer, contentType) {
|
|
125
|
+
const charsetMatch = /charset=([^\s;]+)/i.exec(contentType);
|
|
126
|
+
const charset = (charsetMatch?.[1] ?? 'utf-8').replace(/^"|"$/g, '');
|
|
127
|
+
const text = new TextDecoder(charset).decode(buffer);
|
|
128
|
+
if (text.includes('\uFFFD') && charset.toLowerCase() === 'utf-8') {
|
|
129
|
+
return new TextDecoder('iso-8859-1').decode(buffer);
|
|
130
|
+
}
|
|
131
|
+
return text;
|
|
132
|
+
}
|
|
133
|
+
export async function fetchAndParseFile(url, format, limit, offset) {
|
|
134
|
+
const normalizedFormat = format.toUpperCase().trim();
|
|
135
|
+
if (!PARSEABLE_FORMATS.has(normalizedFormat)) {
|
|
136
|
+
throw new NotParseableError(normalizedFormat, url);
|
|
137
|
+
}
|
|
138
|
+
const controller = new AbortController();
|
|
139
|
+
const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
|
|
140
|
+
let response;
|
|
141
|
+
try {
|
|
142
|
+
response = await fetch(url, { signal: controller.signal });
|
|
143
|
+
}
|
|
144
|
+
catch (err) {
|
|
145
|
+
if (err instanceof Error && err.name === 'AbortError') {
|
|
146
|
+
throw new Error(`Request timed out after ${FETCH_TIMEOUT_MS / 1000}s`);
|
|
147
|
+
}
|
|
148
|
+
throw err;
|
|
149
|
+
}
|
|
150
|
+
finally {
|
|
151
|
+
clearTimeout(timer);
|
|
152
|
+
}
|
|
153
|
+
if (!response.ok) {
|
|
154
|
+
throw new CkanHttpError(response.status, response.statusText);
|
|
155
|
+
}
|
|
156
|
+
if (normalizedFormat === 'JSON') {
|
|
157
|
+
const json = await response.json();
|
|
158
|
+
const rows = Array.isArray(json)
|
|
159
|
+
? json
|
|
160
|
+
: [{ data: json }];
|
|
161
|
+
const page = rows.slice(offset, offset + limit);
|
|
162
|
+
const fields = page.length > 0
|
|
163
|
+
? Object.keys(page[0]).map(key => ({ id: key, type: 'text' }))
|
|
164
|
+
: [];
|
|
165
|
+
return { fields, records: page, total: rows.length, source: 'file' };
|
|
166
|
+
}
|
|
167
|
+
// CSV / TSV
|
|
168
|
+
const buffer = await response.arrayBuffer();
|
|
169
|
+
const contentType = response.headers.get('content-type') ?? '';
|
|
170
|
+
const text = decodeText(buffer, contentType);
|
|
171
|
+
const separator = normalizedFormat === 'TSV' ? '\t' : ',';
|
|
172
|
+
const lines = text.split(/\r?\n/).filter(l => l.trim() !== '');
|
|
173
|
+
if (lines.length === 0) {
|
|
174
|
+
return { fields: [], records: [], total: 0, source: 'file' };
|
|
175
|
+
}
|
|
176
|
+
const headers = parseDelimitedLine(lines[0], separator);
|
|
177
|
+
const dataLines = lines.slice(1);
|
|
178
|
+
const page = dataLines.slice(offset, offset + limit);
|
|
179
|
+
const records = page.map(line => {
|
|
180
|
+
const values = parseDelimitedLine(line, separator);
|
|
181
|
+
return Object.fromEntries(headers.map((h, i) => [h, values[i] ?? '']));
|
|
182
|
+
});
|
|
183
|
+
const fields = headers.map(h => ({ id: h, type: 'text' }));
|
|
184
|
+
return { fields, records, total: dataLines.length, source: 'file' };
|
|
185
|
+
}
|
|
186
|
+
function parseDelimitedLine(line, separator) {
|
|
187
|
+
const result = [];
|
|
188
|
+
let current = '';
|
|
189
|
+
let inQuotes = false;
|
|
190
|
+
for (let i = 0; i < line.length; i++) {
|
|
191
|
+
const char = line[i];
|
|
192
|
+
if (char === '"') {
|
|
193
|
+
if (inQuotes && line[i + 1] === '"') {
|
|
194
|
+
current += '"';
|
|
195
|
+
i++;
|
|
196
|
+
}
|
|
197
|
+
else {
|
|
198
|
+
inQuotes = !inQuotes;
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
else if (char === separator && !inQuotes) {
|
|
202
|
+
result.push(current);
|
|
203
|
+
current = '';
|
|
204
|
+
}
|
|
205
|
+
else {
|
|
206
|
+
current += char;
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
result.push(current);
|
|
210
|
+
return result;
|
|
211
|
+
}
|
|
@@ -1,11 +1,18 @@
|
|
|
1
|
+
import { createRequire } from 'node:module';
|
|
1
2
|
import { McpServer } from '@modelcontextprotocol/server';
|
|
2
3
|
import { registerSearchTool } from './tools/search.js';
|
|
3
4
|
import { registerDatasetTool } from './tools/dataset.js';
|
|
4
5
|
import { registerResourceTool } from './tools/resource.js';
|
|
6
|
+
import { registerOrganizationsTool } from './tools/organizations.js';
|
|
7
|
+
import { registerSchemaTool } from './tools/schema.js';
|
|
8
|
+
const _require = createRequire(import.meta.url);
|
|
9
|
+
const { version } = _require('../package.json');
|
|
5
10
|
export const server = new McpServer({
|
|
6
11
|
name: 'datos-gob-cl',
|
|
7
|
-
version
|
|
12
|
+
version,
|
|
8
13
|
});
|
|
9
14
|
registerSearchTool(server);
|
|
10
15
|
registerDatasetTool(server);
|
|
11
16
|
registerResourceTool(server);
|
|
17
|
+
registerOrganizationsTool(server);
|
|
18
|
+
registerSchemaTool(server);
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import { listOrganizations } from '../ckan.js';
|
|
3
|
+
export function registerOrganizationsTool(server) {
|
|
4
|
+
server.registerTool('list_organizations', {
|
|
5
|
+
title: 'List Organizations',
|
|
6
|
+
description: 'List all government institutions (organizations) that publish datasets on datos.gob.cl. Use this to discover which institutions have data before searching.',
|
|
7
|
+
inputSchema: z.object({}),
|
|
8
|
+
}, async () => {
|
|
9
|
+
try {
|
|
10
|
+
const orgs = await listOrganizations();
|
|
11
|
+
const formatted = orgs
|
|
12
|
+
.filter(o => o.package_count > 0)
|
|
13
|
+
.sort((a, b) => b.package_count - a.package_count)
|
|
14
|
+
.map(o => ({
|
|
15
|
+
id: o.name,
|
|
16
|
+
title: o.title,
|
|
17
|
+
description: o.description?.slice(0, 150) ?? '',
|
|
18
|
+
dataset_count: o.package_count,
|
|
19
|
+
}));
|
|
20
|
+
return {
|
|
21
|
+
content: [{ type: 'text', text: JSON.stringify(formatted, null, 2) }],
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
catch (error) {
|
|
25
|
+
return {
|
|
26
|
+
content: [{ type: 'text', text: `Error: ${error instanceof Error ? error.message : String(error)}` }],
|
|
27
|
+
isError: true,
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
});
|
|
31
|
+
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
|
-
import { getResourceData, getResource, fetchAndParseFile } from '../ckan.js';
|
|
2
|
+
import { getResourceData, getResource, fetchAndParseFile, NotParseableError, CkanHttpError, CkanApiError } from '../ckan.js';
|
|
3
3
|
export function registerResourceTool(server) {
|
|
4
4
|
server.registerTool('get_resource_data', {
|
|
5
5
|
title: 'Get Resource Data',
|
|
@@ -30,11 +30,10 @@ export function registerResourceTool(server) {
|
|
|
30
30
|
};
|
|
31
31
|
}
|
|
32
32
|
catch (datastoreError) {
|
|
33
|
-
const
|
|
34
|
-
|
|
35
|
-
dsMessage.includes('404') ||
|
|
36
|
-
dsMessage.toUpperCase().includes('NOT FOUND');
|
|
33
|
+
const isNoDatastore = (datastoreError instanceof CkanHttpError && datastoreError.statusCode === 404) ||
|
|
34
|
+
(datastoreError instanceof CkanApiError && datastoreError.errorType.toLowerCase().includes('not found'));
|
|
37
35
|
if (!isNoDatastore) {
|
|
36
|
+
const dsMessage = datastoreError instanceof Error ? datastoreError.message : String(datastoreError);
|
|
38
37
|
return {
|
|
39
38
|
content: [{ type: 'text', text: `Error: ${dsMessage}` }],
|
|
40
39
|
isError: true,
|
|
@@ -62,23 +61,22 @@ export function registerResourceTool(server) {
|
|
|
62
61
|
};
|
|
63
62
|
}
|
|
64
63
|
catch (fileError) {
|
|
65
|
-
const fileMessage = fileError instanceof Error ? fileError.message : String(fileError);
|
|
66
64
|
// Format not parseable — return the URL so the AI can guide the user
|
|
67
|
-
if (
|
|
68
|
-
const [, fmt, url] = fileMessage.split(':');
|
|
65
|
+
if (fileError instanceof NotParseableError) {
|
|
69
66
|
return {
|
|
70
67
|
content: [{
|
|
71
68
|
type: 'text',
|
|
72
69
|
text: JSON.stringify({
|
|
73
70
|
source: 'file',
|
|
74
71
|
parseable: false,
|
|
75
|
-
format:
|
|
76
|
-
url,
|
|
77
|
-
message: `This resource is a ${
|
|
72
|
+
format: fileError.format,
|
|
73
|
+
url: fileError.url,
|
|
74
|
+
message: `This resource is a ${fileError.format} file and cannot be parsed automatically. Download it directly from the URL above.`,
|
|
78
75
|
}, null, 2),
|
|
79
76
|
}],
|
|
80
77
|
};
|
|
81
78
|
}
|
|
79
|
+
const fileMessage = fileError instanceof Error ? fileError.message : String(fileError);
|
|
82
80
|
return {
|
|
83
81
|
content: [{ type: 'text', text: `Error reading file: ${fileMessage}` }],
|
|
84
82
|
isError: true,
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import { getResourceSchema } from '../ckan.js';
|
|
3
|
+
export function registerSchemaTool(server) {
|
|
4
|
+
server.registerTool('get_resource_schema', {
|
|
5
|
+
title: 'Get Resource Schema',
|
|
6
|
+
description: 'Get the column schema (field names, types, and descriptions) for a CKAN datastore resource. Only works for resources that have datastore enabled (datastore_available: true from get_dataset). Use this before reading data to understand the structure.',
|
|
7
|
+
inputSchema: z.object({
|
|
8
|
+
resource_id: z.string().describe('UUID of the resource (from get_dataset)'),
|
|
9
|
+
}),
|
|
10
|
+
}, async ({ resource_id }) => {
|
|
11
|
+
try {
|
|
12
|
+
const fields = await getResourceSchema(resource_id);
|
|
13
|
+
return {
|
|
14
|
+
content: [{ type: 'text', text: JSON.stringify(fields, null, 2) }],
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
catch (error) {
|
|
18
|
+
return {
|
|
19
|
+
content: [{ type: 'text', text: `Error: ${error instanceof Error ? error.message : String(error)}` }],
|
|
20
|
+
isError: true,
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
});
|
|
24
|
+
}
|
|
@@ -11,7 +11,7 @@ export function registerSearchTool(server) {
|
|
|
11
11
|
}, async ({ query, limit }) => {
|
|
12
12
|
try {
|
|
13
13
|
const datasets = await searchDatasets(query, limit ?? 10);
|
|
14
|
-
const formatted = datasets.map(d => ({
|
|
14
|
+
const formatted = datasets.results.map(d => ({
|
|
15
15
|
id: d.name,
|
|
16
16
|
title: d.title,
|
|
17
17
|
description: d.notes?.slice(0, 200) ?? '',
|
|
@@ -19,7 +19,7 @@ export function registerSearchTool(server) {
|
|
|
19
19
|
resource_count: d.num_resources,
|
|
20
20
|
}));
|
|
21
21
|
return {
|
|
22
|
-
content: [{ type: 'text', text: JSON.stringify(formatted, null, 2) }],
|
|
22
|
+
content: [{ type: 'text', text: JSON.stringify({ total: datasets.total, returned: formatted.length, results: formatted }, null, 2) }],
|
|
23
23
|
};
|
|
24
24
|
}
|
|
25
25
|
catch (error) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mcp-chilegob-dataset",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.1",
|
|
4
4
|
"description": "MCP server exposing Chile's open government dataset portal (datos.gob.cl / CKAN API v3)",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|
|
@@ -34,10 +34,10 @@
|
|
|
34
34
|
"build": "tsc",
|
|
35
35
|
"start": "node dist/index.js",
|
|
36
36
|
"typecheck": "tsc --noEmit",
|
|
37
|
-
"prepublishOnly": "tsc"
|
|
37
|
+
"prepublishOnly": "tsc",
|
|
38
|
+
"test": "vitest run"
|
|
38
39
|
},
|
|
39
40
|
"dependencies": {
|
|
40
|
-
"@cfworker/json-schema": "^4.1.1",
|
|
41
41
|
"@hono/node-server": "^1.19.13",
|
|
42
42
|
"@modelcontextprotocol/hono": "^2.0.0-alpha.2",
|
|
43
43
|
"@modelcontextprotocol/server": "^2.0.0-alpha.2",
|
|
@@ -47,6 +47,7 @@
|
|
|
47
47
|
"devDependencies": {
|
|
48
48
|
"@types/node": "^20.11.17",
|
|
49
49
|
"tsx": "^4.7.1",
|
|
50
|
-
"typescript": "^5.8.3"
|
|
50
|
+
"typescript": "^5.8.3",
|
|
51
|
+
"vitest": "^4.1.4"
|
|
51
52
|
}
|
|
52
53
|
}
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import { describe, it, expect, vi, afterEach } from 'vitest'
|
|
2
|
+
import {
|
|
3
|
+
NotParseableError,
|
|
4
|
+
CkanHttpError,
|
|
5
|
+
CkanApiError,
|
|
6
|
+
fetchAndParseFile,
|
|
7
|
+
} from '../ckan.ts'
|
|
8
|
+
|
|
9
|
+
afterEach(() => {
|
|
10
|
+
vi.restoreAllMocks()
|
|
11
|
+
})
|
|
12
|
+
|
|
13
|
+
describe('Error classes', () => {
|
|
14
|
+
it('NotParseableError sets format and url', () => {
|
|
15
|
+
const err = new NotParseableError('XLS', 'https://example.com/file.xls')
|
|
16
|
+
expect(err.format).toBe('XLS')
|
|
17
|
+
expect(err.url).toBe('https://example.com/file.xls')
|
|
18
|
+
expect(err.name).toBe('NotParseableError')
|
|
19
|
+
expect(err).toBeInstanceOf(Error)
|
|
20
|
+
})
|
|
21
|
+
|
|
22
|
+
it('CkanHttpError sets statusCode and statusText', () => {
|
|
23
|
+
const err = new CkanHttpError(404, 'Not Found')
|
|
24
|
+
expect(err.statusCode).toBe(404)
|
|
25
|
+
expect(err.statusText).toBe('Not Found')
|
|
26
|
+
expect(err.name).toBe('CkanHttpError')
|
|
27
|
+
})
|
|
28
|
+
|
|
29
|
+
it('CkanApiError sets errorType', () => {
|
|
30
|
+
const err = new CkanApiError('something failed', 'Validation Error')
|
|
31
|
+
expect(err.errorType).toBe('Validation Error')
|
|
32
|
+
expect(err.name).toBe('CkanApiError')
|
|
33
|
+
})
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
describe('fetchAndParseFile', () => {
|
|
37
|
+
it('throws NotParseableError for XLS format', async () => {
|
|
38
|
+
await expect(fetchAndParseFile('https://x.com/f.xls', 'XLS', 10, 0))
|
|
39
|
+
.rejects.toBeInstanceOf(NotParseableError)
|
|
40
|
+
})
|
|
41
|
+
|
|
42
|
+
it('throws NotParseableError for PDF format', async () => {
|
|
43
|
+
await expect(fetchAndParseFile('https://x.com/f.pdf', 'PDF', 10, 0))
|
|
44
|
+
.rejects.toBeInstanceOf(NotParseableError)
|
|
45
|
+
})
|
|
46
|
+
|
|
47
|
+
it('parses CSV and returns correct fields and records', async () => {
|
|
48
|
+
const csvContent = 'Region,Provincia,Comuna\nCOQUIMBO,ELQUI,LA SERENA\nATACAMA,COPIAPO,COPIAPO\n'
|
|
49
|
+
vi.spyOn(global, 'fetch').mockResolvedValueOnce(
|
|
50
|
+
new Response(csvContent, {
|
|
51
|
+
status: 200,
|
|
52
|
+
headers: { 'content-type': 'text/csv; charset=utf-8' },
|
|
53
|
+
})
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
const result = await fetchAndParseFile('https://x.com/data.csv', 'CSV', 10, 0)
|
|
57
|
+
expect(result.fields).toEqual([
|
|
58
|
+
{ id: 'Region', type: 'text' },
|
|
59
|
+
{ id: 'Provincia', type: 'text' },
|
|
60
|
+
{ id: 'Comuna', type: 'text' },
|
|
61
|
+
])
|
|
62
|
+
expect(result.records).toHaveLength(2)
|
|
63
|
+
expect(result.records[0]).toEqual({ Region: 'COQUIMBO', Provincia: 'ELQUI', Comuna: 'LA SERENA' })
|
|
64
|
+
expect(result.total).toBe(2)
|
|
65
|
+
expect(result.source).toBe('file')
|
|
66
|
+
})
|
|
67
|
+
|
|
68
|
+
it('respects limit and offset', async () => {
|
|
69
|
+
const lines = ['A,B', '1,2', '3,4', '5,6', '7,8'].join('\n')
|
|
70
|
+
vi.spyOn(global, 'fetch').mockResolvedValueOnce(
|
|
71
|
+
new Response(lines, { status: 200, headers: { 'content-type': 'text/csv' } })
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
const result = await fetchAndParseFile('https://x.com/data.csv', 'CSV', 2, 1)
|
|
75
|
+
expect(result.records).toHaveLength(2)
|
|
76
|
+
expect(result.records[0]).toEqual({ A: '3', B: '4' })
|
|
77
|
+
expect(result.total).toBe(4)
|
|
78
|
+
})
|
|
79
|
+
|
|
80
|
+
it('parses TSV and splits on tab', async () => {
|
|
81
|
+
const tsv = 'A\tB\n1\t2\n3\t4\n'
|
|
82
|
+
vi.spyOn(global, 'fetch').mockResolvedValueOnce(
|
|
83
|
+
new Response(tsv, { status: 200, headers: { 'content-type': 'text/tab-separated-values' } })
|
|
84
|
+
)
|
|
85
|
+
const result = await fetchAndParseFile('https://x.com/data.tsv', 'TSV', 10, 0)
|
|
86
|
+
expect(result.records[0]).toEqual({ A: '1', B: '2' })
|
|
87
|
+
expect(result.total).toBe(2)
|
|
88
|
+
expect(result.source).toBe('file')
|
|
89
|
+
})
|
|
90
|
+
|
|
91
|
+
it('parses JSON array and returns records', async () => {
|
|
92
|
+
const json = JSON.stringify([{ name: 'Chile', code: 'CL' }, { name: 'Peru', code: 'PE' }])
|
|
93
|
+
vi.spyOn(global, 'fetch').mockResolvedValueOnce(
|
|
94
|
+
new Response(json, { status: 200, headers: { 'content-type': 'application/json' } })
|
|
95
|
+
)
|
|
96
|
+
const result = await fetchAndParseFile('https://x.com/data.json', 'JSON', 10, 0)
|
|
97
|
+
expect(result.total).toBe(2)
|
|
98
|
+
expect(result.records[0]).toEqual({ name: 'Chile', code: 'CL' })
|
|
99
|
+
expect(result.fields).toEqual([{ id: 'name', type: 'text' }, { id: 'code', type: 'text' }])
|
|
100
|
+
expect(result.source).toBe('file')
|
|
101
|
+
})
|
|
102
|
+
|
|
103
|
+
it('returns empty result for empty CSV body', async () => {
|
|
104
|
+
vi.spyOn(global, 'fetch').mockResolvedValueOnce(
|
|
105
|
+
new Response('\n\n \n', { status: 200, headers: { 'content-type': 'text/csv' } })
|
|
106
|
+
)
|
|
107
|
+
const result = await fetchAndParseFile('https://x.com/empty.csv', 'CSV', 10, 0)
|
|
108
|
+
expect(result.fields).toEqual([])
|
|
109
|
+
expect(result.records).toEqual([])
|
|
110
|
+
expect(result.total).toBe(0)
|
|
111
|
+
})
|
|
112
|
+
|
|
113
|
+
it('throws CkanHttpError with correct status on non-ok response', async () => {
|
|
114
|
+
vi.spyOn(global, 'fetch').mockResolvedValueOnce(
|
|
115
|
+
new Response('Not Found', { status: 404, statusText: 'Not Found' })
|
|
116
|
+
)
|
|
117
|
+
const err = await fetchAndParseFile('https://x.com/data.csv', 'CSV', 10, 0).catch(e => e)
|
|
118
|
+
expect(err).toBeInstanceOf(CkanHttpError)
|
|
119
|
+
expect(err.statusCode).toBe(404)
|
|
120
|
+
expect(err.statusText).toBe('Not Found')
|
|
121
|
+
})
|
|
122
|
+
})
|
package/src/ckan.ts
CHANGED
|
@@ -1,4 +1,54 @@
|
|
|
1
1
|
const CKAN_BASE = 'https://datos.gob.cl/api/3/action'
|
|
2
|
+
const FETCH_TIMEOUT_MS = 10_000
|
|
3
|
+
const CACHE_TTL_MS = 5 * 60 * 1000
|
|
4
|
+
|
|
5
|
+
class TTLCache<V> {
|
|
6
|
+
private readonly store = new Map<string, { value: V; expiresAt: number }>()
|
|
7
|
+
|
|
8
|
+
get(key: string): V | undefined {
|
|
9
|
+
const entry = this.store.get(key)
|
|
10
|
+
if (entry === undefined) return undefined
|
|
11
|
+
if (Date.now() > entry.expiresAt) {
|
|
12
|
+
this.store.delete(key)
|
|
13
|
+
return undefined
|
|
14
|
+
}
|
|
15
|
+
return entry.value
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
set(key: string, value: V, ttlMs: number): void {
|
|
19
|
+
this.store.set(key, { value, expiresAt: Date.now() + ttlMs })
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export class NotParseableError extends Error {
|
|
24
|
+
constructor(
|
|
25
|
+
public readonly format: string,
|
|
26
|
+
public readonly url: string,
|
|
27
|
+
) {
|
|
28
|
+
super(`Format not parseable: ${format} (${url})`)
|
|
29
|
+
this.name = 'NotParseableError'
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export class CkanHttpError extends Error {
|
|
34
|
+
constructor(
|
|
35
|
+
public readonly statusCode: number,
|
|
36
|
+
public readonly statusText: string,
|
|
37
|
+
) {
|
|
38
|
+
super(`CKAN HTTP error: ${statusCode} ${statusText}`)
|
|
39
|
+
this.name = 'CkanHttpError'
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export class CkanApiError extends Error {
|
|
44
|
+
constructor(
|
|
45
|
+
message: string,
|
|
46
|
+
public readonly errorType: string,
|
|
47
|
+
) {
|
|
48
|
+
super(`CKAN API error: ${message}`)
|
|
49
|
+
this.name = 'CkanApiError'
|
|
50
|
+
}
|
|
51
|
+
}
|
|
2
52
|
|
|
3
53
|
export interface CkanDataset {
|
|
4
54
|
id: string
|
|
@@ -43,26 +93,64 @@ async function ckanAction<T>(action: string, params: Record<string, unknown>): P
|
|
|
43
93
|
url.searchParams.set(key, String(value))
|
|
44
94
|
}
|
|
45
95
|
|
|
46
|
-
const
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
96
|
+
const controller = new AbortController()
|
|
97
|
+
const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS)
|
|
98
|
+
try {
|
|
99
|
+
const response = await fetch(url.toString(), { signal: controller.signal })
|
|
100
|
+
if (!response.ok) {
|
|
101
|
+
throw new CkanHttpError(response.status, response.statusText)
|
|
102
|
+
}
|
|
50
103
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
104
|
+
const data = await response.json() as { success: boolean; result: T; error?: { __type: string; message?: string } }
|
|
105
|
+
if (!data.success) {
|
|
106
|
+
const errorType = data.error?.__type ?? 'Unknown Error'
|
|
107
|
+
const message = data.error?.message ?? 'Unknown error'
|
|
108
|
+
throw new CkanApiError(message, errorType)
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
return data.result
|
|
112
|
+
} catch (err) {
|
|
113
|
+
if (err instanceof Error && err.name === 'AbortError') {
|
|
114
|
+
throw new Error(`Request timed out after ${FETCH_TIMEOUT_MS / 1000}s`)
|
|
115
|
+
}
|
|
116
|
+
throw err
|
|
117
|
+
} finally {
|
|
118
|
+
clearTimeout(timer)
|
|
54
119
|
}
|
|
120
|
+
}
|
|
55
121
|
|
|
56
|
-
|
|
122
|
+
export interface CkanOrganization {
|
|
123
|
+
name: string
|
|
124
|
+
title: string
|
|
125
|
+
description: string | null
|
|
126
|
+
package_count: number
|
|
57
127
|
}
|
|
58
128
|
|
|
59
|
-
export
|
|
60
|
-
|
|
61
|
-
|
|
129
|
+
export interface CkanSearchResult {
|
|
130
|
+
total: number
|
|
131
|
+
results: CkanDataset[]
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const searchCache = new TTLCache<CkanSearchResult>()
|
|
135
|
+
const datasetCache = new TTLCache<CkanDataset>()
|
|
136
|
+
|
|
137
|
+
export async function searchDatasets(query: string, limit: number = 10): Promise<CkanSearchResult> {
|
|
138
|
+
const key = `search:${query}:${limit}`
|
|
139
|
+
const cached = searchCache.get(key)
|
|
140
|
+
if (cached !== undefined) return cached
|
|
141
|
+
const result = await ckanAction<{ count: number; results: CkanDataset[] }>('package_search', { q: query, rows: limit })
|
|
142
|
+
const value: CkanSearchResult = { total: result.count, results: result.results }
|
|
143
|
+
searchCache.set(key, value, CACHE_TTL_MS)
|
|
144
|
+
return value
|
|
62
145
|
}
|
|
63
146
|
|
|
64
147
|
export async function getDataset(id: string): Promise<CkanDataset> {
|
|
65
|
-
|
|
148
|
+
const key = `dataset:${id}`
|
|
149
|
+
const cached = datasetCache.get(key)
|
|
150
|
+
if (cached !== undefined) return cached
|
|
151
|
+
const dataset = await ckanAction<CkanDataset>('package_show', { id })
|
|
152
|
+
datasetCache.set(key, dataset, CACHE_TTL_MS)
|
|
153
|
+
return dataset
|
|
66
154
|
}
|
|
67
155
|
|
|
68
156
|
export async function getResourceData(
|
|
@@ -81,8 +169,47 @@ export async function getResource(resourceId: string): Promise<CkanResourceDetai
|
|
|
81
169
|
return ckanAction<CkanResourceDetail>('resource_show', { id: resourceId })
|
|
82
170
|
}
|
|
83
171
|
|
|
172
|
+
export async function listOrganizations(): Promise<CkanOrganization[]> {
|
|
173
|
+
return ckanAction<CkanOrganization[]>('organization_list', { all_fields: true })
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
export interface CkanFieldSchema {
|
|
177
|
+
id: string
|
|
178
|
+
type: string
|
|
179
|
+
label: string | null
|
|
180
|
+
description: string | null
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
interface RawField {
|
|
184
|
+
id: string
|
|
185
|
+
type: string
|
|
186
|
+
info?: { label?: string; notes?: string }
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
export async function getResourceSchema(resourceId: string): Promise<CkanFieldSchema[]> {
|
|
190
|
+
const result = await ckanAction<{ id: string; fields: RawField[] }>('datastore_info', { id: resourceId })
|
|
191
|
+
return result.fields
|
|
192
|
+
.filter(field => field.id !== '_id')
|
|
193
|
+
.map(field => ({
|
|
194
|
+
id: field.id,
|
|
195
|
+
type: field.type,
|
|
196
|
+
label: field.info?.label ?? null,
|
|
197
|
+
description: field.info?.notes ?? null,
|
|
198
|
+
}))
|
|
199
|
+
}
|
|
200
|
+
|
|
84
201
|
const PARSEABLE_FORMATS = new Set(['CSV', 'TSV', 'JSON'])
|
|
85
202
|
|
|
203
|
+
function decodeText(buffer: ArrayBuffer, contentType: string): string {
|
|
204
|
+
const charsetMatch = /charset=([^\s;]+)/i.exec(contentType)
|
|
205
|
+
const charset = (charsetMatch?.[1] ?? 'utf-8').replace(/^"|"$/g, '')
|
|
206
|
+
const text = new TextDecoder(charset).decode(buffer)
|
|
207
|
+
if (text.includes('\uFFFD') && charset.toLowerCase() === 'utf-8') {
|
|
208
|
+
return new TextDecoder('iso-8859-1').decode(buffer)
|
|
209
|
+
}
|
|
210
|
+
return text
|
|
211
|
+
}
|
|
212
|
+
|
|
86
213
|
export async function fetchAndParseFile(
|
|
87
214
|
url: string,
|
|
88
215
|
format: string,
|
|
@@ -92,14 +219,24 @@ export async function fetchAndParseFile(
|
|
|
92
219
|
const normalizedFormat = format.toUpperCase().trim()
|
|
93
220
|
|
|
94
221
|
if (!PARSEABLE_FORMATS.has(normalizedFormat)) {
|
|
95
|
-
throw new
|
|
96
|
-
`FORMAT_NOT_PARSEABLE:${normalizedFormat}:${url}`
|
|
97
|
-
)
|
|
222
|
+
throw new NotParseableError(normalizedFormat, url)
|
|
98
223
|
}
|
|
99
224
|
|
|
100
|
-
const
|
|
225
|
+
const controller = new AbortController()
|
|
226
|
+
const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS)
|
|
227
|
+
let response: Response
|
|
228
|
+
try {
|
|
229
|
+
response = await fetch(url, { signal: controller.signal })
|
|
230
|
+
} catch (err) {
|
|
231
|
+
if (err instanceof Error && err.name === 'AbortError') {
|
|
232
|
+
throw new Error(`Request timed out after ${FETCH_TIMEOUT_MS / 1000}s`)
|
|
233
|
+
}
|
|
234
|
+
throw err
|
|
235
|
+
} finally {
|
|
236
|
+
clearTimeout(timer)
|
|
237
|
+
}
|
|
101
238
|
if (!response.ok) {
|
|
102
|
-
throw new
|
|
239
|
+
throw new CkanHttpError(response.status, response.statusText)
|
|
103
240
|
}
|
|
104
241
|
|
|
105
242
|
if (normalizedFormat === 'JSON') {
|
|
@@ -117,7 +254,9 @@ export async function fetchAndParseFile(
|
|
|
117
254
|
}
|
|
118
255
|
|
|
119
256
|
// CSV / TSV
|
|
120
|
-
const
|
|
257
|
+
const buffer = await response.arrayBuffer()
|
|
258
|
+
const contentType = response.headers.get('content-type') ?? ''
|
|
259
|
+
const text = decodeText(buffer, contentType)
|
|
121
260
|
const separator = normalizedFormat === 'TSV' ? '\t' : ','
|
|
122
261
|
const lines = text.split(/\r?\n/).filter(l => l.trim() !== '')
|
|
123
262
|
|
package/src/server.ts
CHANGED
|
@@ -1,13 +1,21 @@
|
|
|
1
|
+
import { createRequire } from 'node:module'
|
|
1
2
|
import { McpServer } from '@modelcontextprotocol/server'
|
|
2
3
|
import { registerSearchTool } from './tools/search.js'
|
|
3
4
|
import { registerDatasetTool } from './tools/dataset.js'
|
|
4
5
|
import { registerResourceTool } from './tools/resource.js'
|
|
6
|
+
import { registerOrganizationsTool } from './tools/organizations.js'
|
|
7
|
+
import { registerSchemaTool } from './tools/schema.js'
|
|
8
|
+
|
|
9
|
+
const _require = createRequire(import.meta.url)
|
|
10
|
+
const { version } = _require('../package.json') as { version: string }
|
|
5
11
|
|
|
6
12
|
export const server = new McpServer({
|
|
7
13
|
name: 'datos-gob-cl',
|
|
8
|
-
version
|
|
14
|
+
version,
|
|
9
15
|
})
|
|
10
16
|
|
|
11
17
|
registerSearchTool(server)
|
|
12
18
|
registerDatasetTool(server)
|
|
13
19
|
registerResourceTool(server)
|
|
20
|
+
registerOrganizationsTool(server)
|
|
21
|
+
registerSchemaTool(server)
|
package/src/stdio.ts
CHANGED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import type { McpServer } from '@modelcontextprotocol/server'
|
|
2
|
+
import { z } from 'zod'
|
|
3
|
+
import { listOrganizations } from '../ckan.js'
|
|
4
|
+
|
|
5
|
+
export function registerOrganizationsTool(server: McpServer): void {
|
|
6
|
+
server.registerTool(
|
|
7
|
+
'list_organizations',
|
|
8
|
+
{
|
|
9
|
+
title: 'List Organizations',
|
|
10
|
+
description: 'List all government institutions (organizations) that publish datasets on datos.gob.cl. Use this to discover which institutions have data before searching.',
|
|
11
|
+
inputSchema: z.object({}),
|
|
12
|
+
},
|
|
13
|
+
async () => {
|
|
14
|
+
try {
|
|
15
|
+
const orgs = await listOrganizations()
|
|
16
|
+
const formatted = orgs
|
|
17
|
+
.filter(o => o.package_count > 0)
|
|
18
|
+
.sort((a, b) => b.package_count - a.package_count)
|
|
19
|
+
.map(o => ({
|
|
20
|
+
id: o.name,
|
|
21
|
+
title: o.title,
|
|
22
|
+
description: o.description?.slice(0, 150) ?? '',
|
|
23
|
+
dataset_count: o.package_count,
|
|
24
|
+
}))
|
|
25
|
+
return {
|
|
26
|
+
content: [{ type: 'text', text: JSON.stringify(formatted, null, 2) }],
|
|
27
|
+
}
|
|
28
|
+
} catch (error) {
|
|
29
|
+
return {
|
|
30
|
+
content: [{ type: 'text', text: `Error: ${error instanceof Error ? error.message : String(error)}` }],
|
|
31
|
+
isError: true,
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
)
|
|
36
|
+
}
|
package/src/tools/resource.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { McpServer } from '@modelcontextprotocol/server'
|
|
2
2
|
import { z } from 'zod'
|
|
3
|
-
import { getResourceData, getResource, fetchAndParseFile } from '../ckan.js'
|
|
3
|
+
import { getResourceData, getResource, fetchAndParseFile, NotParseableError, CkanHttpError, CkanApiError } from '../ckan.js'
|
|
4
4
|
|
|
5
5
|
export function registerResourceTool(server: McpServer): void {
|
|
6
6
|
server.registerTool(
|
|
@@ -36,13 +36,12 @@ export function registerResourceTool(server: McpServer): void {
|
|
|
36
36
|
}],
|
|
37
37
|
}
|
|
38
38
|
} catch (datastoreError) {
|
|
39
|
-
const dsMessage = datastoreError instanceof Error ? datastoreError.message : String(datastoreError)
|
|
40
39
|
const isNoDatastore =
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
dsMessage.toUpperCase().includes('NOT FOUND')
|
|
40
|
+
(datastoreError instanceof CkanHttpError && datastoreError.statusCode === 404) ||
|
|
41
|
+
(datastoreError instanceof CkanApiError && datastoreError.errorType.toLowerCase().includes('not found'))
|
|
44
42
|
|
|
45
43
|
if (!isNoDatastore) {
|
|
44
|
+
const dsMessage = datastoreError instanceof Error ? datastoreError.message : String(datastoreError)
|
|
46
45
|
return {
|
|
47
46
|
content: [{ type: 'text', text: `Error: ${dsMessage}` }],
|
|
48
47
|
isError: true,
|
|
@@ -71,25 +70,24 @@ export function registerResourceTool(server: McpServer): void {
|
|
|
71
70
|
}],
|
|
72
71
|
}
|
|
73
72
|
} catch (fileError) {
|
|
74
|
-
const fileMessage = fileError instanceof Error ? fileError.message : String(fileError)
|
|
75
|
-
|
|
76
73
|
// Format not parseable — return the URL so the AI can guide the user
|
|
77
|
-
if (
|
|
78
|
-
const [, fmt, url] = fileMessage.split(':')
|
|
74
|
+
if (fileError instanceof NotParseableError) {
|
|
79
75
|
return {
|
|
80
76
|
content: [{
|
|
81
77
|
type: 'text',
|
|
82
78
|
text: JSON.stringify({
|
|
83
79
|
source: 'file',
|
|
84
80
|
parseable: false,
|
|
85
|
-
format:
|
|
86
|
-
url,
|
|
87
|
-
message: `This resource is a ${
|
|
81
|
+
format: fileError.format,
|
|
82
|
+
url: fileError.url,
|
|
83
|
+
message: `This resource is a ${fileError.format} file and cannot be parsed automatically. Download it directly from the URL above.`,
|
|
88
84
|
}, null, 2),
|
|
89
85
|
}],
|
|
90
86
|
}
|
|
91
87
|
}
|
|
92
88
|
|
|
89
|
+
const fileMessage = fileError instanceof Error ? fileError.message : String(fileError)
|
|
90
|
+
|
|
93
91
|
return {
|
|
94
92
|
content: [{ type: 'text', text: `Error reading file: ${fileMessage}` }],
|
|
95
93
|
isError: true,
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import type { McpServer } from '@modelcontextprotocol/server'
|
|
2
|
+
import { z } from 'zod'
|
|
3
|
+
import { getResourceSchema } from '../ckan.js'
|
|
4
|
+
|
|
5
|
+
export function registerSchemaTool(server: McpServer): void {
|
|
6
|
+
server.registerTool(
|
|
7
|
+
'get_resource_schema',
|
|
8
|
+
{
|
|
9
|
+
title: 'Get Resource Schema',
|
|
10
|
+
description:
|
|
11
|
+
'Get the column schema (field names, types, and descriptions) for a CKAN datastore resource. Only works for resources that have datastore enabled (datastore_available: true from get_dataset). Use this before reading data to understand the structure.',
|
|
12
|
+
inputSchema: z.object({
|
|
13
|
+
resource_id: z.string().describe('UUID of the resource (from get_dataset)'),
|
|
14
|
+
}),
|
|
15
|
+
},
|
|
16
|
+
async ({ resource_id }) => {
|
|
17
|
+
try {
|
|
18
|
+
const fields = await getResourceSchema(resource_id)
|
|
19
|
+
return {
|
|
20
|
+
content: [{ type: 'text', text: JSON.stringify(fields, null, 2) }],
|
|
21
|
+
}
|
|
22
|
+
} catch (error) {
|
|
23
|
+
return {
|
|
24
|
+
content: [{ type: 'text', text: `Error: ${error instanceof Error ? error.message : String(error)}` }],
|
|
25
|
+
isError: true,
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
)
|
|
30
|
+
}
|
package/src/tools/search.ts
CHANGED
|
@@ -16,7 +16,7 @@ export function registerSearchTool(server: McpServer): void {
|
|
|
16
16
|
async ({ query, limit }) => {
|
|
17
17
|
try {
|
|
18
18
|
const datasets = await searchDatasets(query, limit ?? 10)
|
|
19
|
-
const formatted = datasets.map(d => ({
|
|
19
|
+
const formatted = datasets.results.map(d => ({
|
|
20
20
|
id: d.name,
|
|
21
21
|
title: d.title,
|
|
22
22
|
description: d.notes?.slice(0, 200) ?? '',
|
|
@@ -24,7 +24,7 @@ export function registerSearchTool(server: McpServer): void {
|
|
|
24
24
|
resource_count: d.num_resources,
|
|
25
25
|
}))
|
|
26
26
|
return {
|
|
27
|
-
content: [{ type: 'text', text: JSON.stringify(formatted, null, 2) }],
|
|
27
|
+
content: [{ type: 'text', text: JSON.stringify({ total: datasets.total, returned: formatted.length, results: formatted }, null, 2) }],
|
|
28
28
|
}
|
|
29
29
|
} catch (error) {
|
|
30
30
|
return {
|
package/dist/ckan.js
DELETED
|
@@ -1,97 +0,0 @@
|
|
|
1
|
-
const CKAN_BASE = 'https://datos.gob.cl/api/3/action';
|
|
2
|
-
async function ckanAction(action, params) {
|
|
3
|
-
const url = new URL(`${CKAN_BASE}/${action}`);
|
|
4
|
-
for (const [key, value] of Object.entries(params)) {
|
|
5
|
-
url.searchParams.set(key, String(value));
|
|
6
|
-
}
|
|
7
|
-
const response = await fetch(url.toString());
|
|
8
|
-
if (!response.ok) {
|
|
9
|
-
throw new Error(`CKAN API error: ${response.status} ${response.statusText}`);
|
|
10
|
-
}
|
|
11
|
-
const data = await response.json();
|
|
12
|
-
if (!data.success) {
|
|
13
|
-
throw new Error(`CKAN error: ${data.error?.message ?? 'Unknown error'}`);
|
|
14
|
-
}
|
|
15
|
-
return data.result;
|
|
16
|
-
}
|
|
17
|
-
export async function searchDatasets(query, limit = 10) {
|
|
18
|
-
const result = await ckanAction('package_search', { q: query, rows: limit });
|
|
19
|
-
return result.results;
|
|
20
|
-
}
|
|
21
|
-
export async function getDataset(id) {
|
|
22
|
-
return ckanAction('package_show', { id });
|
|
23
|
-
}
|
|
24
|
-
export async function getResourceData(resourceId, limit = 50, offset = 0) {
|
|
25
|
-
return ckanAction('datastore_search', {
|
|
26
|
-
resource_id: resourceId,
|
|
27
|
-
limit,
|
|
28
|
-
offset,
|
|
29
|
-
});
|
|
30
|
-
}
|
|
31
|
-
export async function getResource(resourceId) {
|
|
32
|
-
return ckanAction('resource_show', { id: resourceId });
|
|
33
|
-
}
|
|
34
|
-
const PARSEABLE_FORMATS = new Set(['CSV', 'TSV', 'JSON']);
|
|
35
|
-
export async function fetchAndParseFile(url, format, limit, offset) {
|
|
36
|
-
const normalizedFormat = format.toUpperCase().trim();
|
|
37
|
-
if (!PARSEABLE_FORMATS.has(normalizedFormat)) {
|
|
38
|
-
throw new Error(`FORMAT_NOT_PARSEABLE:${normalizedFormat}:${url}`);
|
|
39
|
-
}
|
|
40
|
-
const response = await fetch(url);
|
|
41
|
-
if (!response.ok) {
|
|
42
|
-
throw new Error(`Failed to fetch file: ${response.status} ${response.statusText}`);
|
|
43
|
-
}
|
|
44
|
-
if (normalizedFormat === 'JSON') {
|
|
45
|
-
const json = await response.json();
|
|
46
|
-
const rows = Array.isArray(json)
|
|
47
|
-
? json
|
|
48
|
-
: [{ data: json }];
|
|
49
|
-
const page = rows.slice(offset, offset + limit);
|
|
50
|
-
const fields = page.length > 0
|
|
51
|
-
? Object.keys(page[0]).map(key => ({ id: key, type: 'text' }))
|
|
52
|
-
: [];
|
|
53
|
-
return { fields, records: page, total: rows.length, source: 'file' };
|
|
54
|
-
}
|
|
55
|
-
// CSV / TSV
|
|
56
|
-
const text = await response.text();
|
|
57
|
-
const separator = normalizedFormat === 'TSV' ? '\t' : ',';
|
|
58
|
-
const lines = text.split(/\r?\n/).filter(l => l.trim() !== '');
|
|
59
|
-
if (lines.length === 0) {
|
|
60
|
-
return { fields: [], records: [], total: 0, source: 'file' };
|
|
61
|
-
}
|
|
62
|
-
const headers = parseDelimitedLine(lines[0], separator);
|
|
63
|
-
const dataLines = lines.slice(1);
|
|
64
|
-
const page = dataLines.slice(offset, offset + limit);
|
|
65
|
-
const records = page.map(line => {
|
|
66
|
-
const values = parseDelimitedLine(line, separator);
|
|
67
|
-
return Object.fromEntries(headers.map((h, i) => [h, values[i] ?? '']));
|
|
68
|
-
});
|
|
69
|
-
const fields = headers.map(h => ({ id: h, type: 'text' }));
|
|
70
|
-
return { fields, records, total: dataLines.length, source: 'file' };
|
|
71
|
-
}
|
|
72
|
-
function parseDelimitedLine(line, separator) {
|
|
73
|
-
const result = [];
|
|
74
|
-
let current = '';
|
|
75
|
-
let inQuotes = false;
|
|
76
|
-
for (let i = 0; i < line.length; i++) {
|
|
77
|
-
const char = line[i];
|
|
78
|
-
if (char === '"') {
|
|
79
|
-
if (inQuotes && line[i + 1] === '"') {
|
|
80
|
-
current += '"';
|
|
81
|
-
i++;
|
|
82
|
-
}
|
|
83
|
-
else {
|
|
84
|
-
inQuotes = !inQuotes;
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
else if (char === separator && !inQuotes) {
|
|
88
|
-
result.push(current);
|
|
89
|
-
current = '';
|
|
90
|
-
}
|
|
91
|
-
else {
|
|
92
|
-
current += char;
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
result.push(current);
|
|
96
|
-
return result;
|
|
97
|
-
}
|
|
File without changes
|
|
File without changes
|