mcp-chilegob-dataset 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,5 +1,10 @@
1
1
  # mcp-chilegob-dataset
2
2
 
3
+ [![npm version](https://img.shields.io/npm/v/mcp-chilegob-dataset.svg)](https://www.npmjs.com/package/mcp-chilegob-dataset)
4
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](./LICENSE)
5
+
6
+ > **English summary:** MCP server that exposes Chile's open government data portal ([datos.gob.cl](https://datos.gob.cl)) as tools for AI assistants. Search and read thousands of public datasets from Chilean government institutions — health, education, transport, environment, and more. No API key required.
7
+
3
8
  Servidor MCP que expone el portal de datos abiertos del gobierno de Chile — [datos.gob.cl](https://datos.gob.cl) — como herramientas para asistentes de inteligencia artificial.
4
9
 
5
10
  Construido con [Hono](https://hono.dev) y el [SDK de TypeScript del Model Context Protocol](https://github.com/modelcontextprotocol/typescript-sdk).
@@ -18,7 +23,17 @@ El **Model Context Protocol (MCP)** es un estándar abierto que permite a los as
18
23
 
19
24
  ## Instalación y uso rápido
20
25
 
21
- ### Con Claude Desktop (recomendado)
26
+ ### Con Claude Code (un comando)
27
+
28
+ ```bash
29
+ claude mcp add chilegob -- npx -y mcp-chilegob-dataset
30
+ ```
31
+
32
+ Eso es todo. El servidor queda registrado globalmente en tu Claude Code. Reinicia la sesión y ya puedes usarlo.
33
+
34
+ ---
35
+
36
+ ### Con Claude Desktop
22
37
 
23
38
  **Paso 1** — Abre la configuración de Claude Desktop.
24
39
 
@@ -180,7 +195,7 @@ Lee filas tabulares de un recurso CKAN. Intenta primero el datastore y, si no es
180
195
  "parseable": false,
181
196
  "format": "XLS",
182
197
  "url": "https://datosabiertos.mineduc.cl/archivo.xls",
183
- "message": "This resource is a XLS file and cannot be parsed automatically. Download it directly from the URL above."
198
+ "message": "This resource is a XLS file and cannot be parsed automatically. Download it directly from the URL provided."
184
199
  }
185
200
  ```
186
201
 
@@ -279,7 +294,8 @@ export function registerTuHerramienta(server: McpServer): void {
279
294
  - **Disponibilidad del datastore** — No todos los recursos tienen datastore habilitado en CKAN. `get_resource_data` intenta automáticamente descargar el archivo (CSV, TSV, JSON); los formatos binarios (XLS, PDF) requieren descarga manual desde la URL devuelta.
280
295
  - **Archivos grandes** — La descarga directa carga el archivo completo en memoria antes de paginar. Para archivos muy grandes (>100 MB) esto puede ser lento o fallar.
281
296
  - **Encoding** — Los archivos CSV de datos.gob.cl pueden estar en ISO-8859-1 (Latin-1). La herramienta intenta leerlos como UTF-8; si los caracteres aparecen corruptos, descarga el archivo directamente.
282
- - **Sin caché**Cada llamada hace una solicitud en vivo a datos.gob.cl. No hay límites de tasa documentados.
297
+ - **Caché en memoria (5 min)** `search_datasets` y `get_dataset` usan caché en memoria con TTL de 5 minutos. `get_resource_data` siempre consulta en vivo. No hay límites de tasa documentados en datos.gob.cl.
298
+ - **Timeout de red (10s)** — Todas las solicitudes a datos.gob.cl tienen un timeout de 10 segundos. Si el portal está lento o caído, las herramientas devuelven un error claro en lugar de colgar indefinidamente.
283
299
  - **Paquetes en alpha** — `@modelcontextprotocol/hono` y `@modelcontextprotocol/server` están en versión alpha.
284
300
 
285
301
  ---
@@ -290,7 +306,6 @@ Las contribuciones son bienvenidas. Algunas ideas:
290
306
 
291
307
  - [ ] Herramienta `list_organizations` — listar instituciones disponibles
292
308
  - [ ] Herramienta `get_resource_schema` — tipos y descripciones de columnas
293
- - [ ] Caché en memoria para reducir llamadas a la API
294
309
  - [ ] MCP Resources con URI templates (`datos-gob-cl://dataset/{id}`)
295
310
 
296
311
  Por favor, abre un issue antes de enviar un PR grande.
@@ -0,0 +1,211 @@
1
+ const CKAN_BASE = 'https://datos.gob.cl/api/3/action';
2
+ const FETCH_TIMEOUT_MS = 10_000;
3
+ const CACHE_TTL_MS = 5 * 60 * 1000;
4
+ class TTLCache {
5
+ store = new Map();
6
+ get(key) {
7
+ const entry = this.store.get(key);
8
+ if (entry === undefined)
9
+ return undefined;
10
+ if (Date.now() > entry.expiresAt) {
11
+ this.store.delete(key);
12
+ return undefined;
13
+ }
14
+ return entry.value;
15
+ }
16
+ set(key, value, ttlMs) {
17
+ this.store.set(key, { value, expiresAt: Date.now() + ttlMs });
18
+ }
19
+ }
20
+ export class NotParseableError extends Error {
21
+ format;
22
+ url;
23
+ constructor(format, url) {
24
+ super(`Format not parseable: ${format} (${url})`);
25
+ this.format = format;
26
+ this.url = url;
27
+ this.name = 'NotParseableError';
28
+ }
29
+ }
30
+ export class CkanHttpError extends Error {
31
+ statusCode;
32
+ statusText;
33
+ constructor(statusCode, statusText) {
34
+ super(`CKAN HTTP error: ${statusCode} ${statusText}`);
35
+ this.statusCode = statusCode;
36
+ this.statusText = statusText;
37
+ this.name = 'CkanHttpError';
38
+ }
39
+ }
40
+ export class CkanApiError extends Error {
41
+ errorType;
42
+ constructor(message, errorType) {
43
+ super(`CKAN API error: ${message}`);
44
+ this.errorType = errorType;
45
+ this.name = 'CkanApiError';
46
+ }
47
+ }
48
+ async function ckanAction(action, params) {
49
+ const url = new URL(`${CKAN_BASE}/${action}`);
50
+ for (const [key, value] of Object.entries(params)) {
51
+ url.searchParams.set(key, String(value));
52
+ }
53
+ const controller = new AbortController();
54
+ const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
55
+ try {
56
+ const response = await fetch(url.toString(), { signal: controller.signal });
57
+ if (!response.ok) {
58
+ throw new CkanHttpError(response.status, response.statusText);
59
+ }
60
+ const data = await response.json();
61
+ if (!data.success) {
62
+ const errorType = data.error?.__type ?? 'Unknown Error';
63
+ const message = data.error?.message ?? 'Unknown error';
64
+ throw new CkanApiError(message, errorType);
65
+ }
66
+ return data.result;
67
+ }
68
+ catch (err) {
69
+ if (err instanceof Error && err.name === 'AbortError') {
70
+ throw new Error(`Request timed out after ${FETCH_TIMEOUT_MS / 1000}s`);
71
+ }
72
+ throw err;
73
+ }
74
+ finally {
75
+ clearTimeout(timer);
76
+ }
77
+ }
78
+ const searchCache = new TTLCache();
79
+ const datasetCache = new TTLCache();
80
+ export async function searchDatasets(query, limit = 10) {
81
+ const key = `search:${query}:${limit}`;
82
+ const cached = searchCache.get(key);
83
+ if (cached !== undefined)
84
+ return cached;
85
+ const result = await ckanAction('package_search', { q: query, rows: limit });
86
+ const value = { total: result.count, results: result.results };
87
+ searchCache.set(key, value, CACHE_TTL_MS);
88
+ return value;
89
+ }
90
+ export async function getDataset(id) {
91
+ const key = `dataset:${id}`;
92
+ const cached = datasetCache.get(key);
93
+ if (cached !== undefined)
94
+ return cached;
95
+ const dataset = await ckanAction('package_show', { id });
96
+ datasetCache.set(key, dataset, CACHE_TTL_MS);
97
+ return dataset;
98
+ }
99
+ export async function getResourceData(resourceId, limit = 50, offset = 0) {
100
+ return ckanAction('datastore_search', {
101
+ resource_id: resourceId,
102
+ limit,
103
+ offset,
104
+ });
105
+ }
106
+ export async function getResource(resourceId) {
107
+ return ckanAction('resource_show', { id: resourceId });
108
+ }
109
+ export async function listOrganizations() {
110
+ return ckanAction('organization_list', { all_fields: true });
111
+ }
112
+ export async function getResourceSchema(resourceId) {
113
+ const result = await ckanAction('datastore_info', { id: resourceId });
114
+ return result.fields
115
+ .filter(field => field.id !== '_id')
116
+ .map(field => ({
117
+ id: field.id,
118
+ type: field.type,
119
+ label: field.info?.label ?? null,
120
+ description: field.info?.notes ?? null,
121
+ }));
122
+ }
123
+ const PARSEABLE_FORMATS = new Set(['CSV', 'TSV', 'JSON']);
124
+ function decodeText(buffer, contentType) {
125
+ const charsetMatch = /charset=([^\s;]+)/i.exec(contentType);
126
+ const charset = (charsetMatch?.[1] ?? 'utf-8').replace(/^"|"$/g, '');
127
+ const text = new TextDecoder(charset).decode(buffer);
128
+ if (text.includes('\uFFFD') && charset.toLowerCase() === 'utf-8') {
129
+ return new TextDecoder('iso-8859-1').decode(buffer);
130
+ }
131
+ return text;
132
+ }
133
+ export async function fetchAndParseFile(url, format, limit, offset) {
134
+ const normalizedFormat = format.toUpperCase().trim();
135
+ if (!PARSEABLE_FORMATS.has(normalizedFormat)) {
136
+ throw new NotParseableError(normalizedFormat, url);
137
+ }
138
+ const controller = new AbortController();
139
+ const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
140
+ let response;
141
+ try {
142
+ response = await fetch(url, { signal: controller.signal });
143
+ }
144
+ catch (err) {
145
+ if (err instanceof Error && err.name === 'AbortError') {
146
+ throw new Error(`Request timed out after ${FETCH_TIMEOUT_MS / 1000}s`);
147
+ }
148
+ throw err;
149
+ }
150
+ finally {
151
+ clearTimeout(timer);
152
+ }
153
+ if (!response.ok) {
154
+ throw new CkanHttpError(response.status, response.statusText);
155
+ }
156
+ if (normalizedFormat === 'JSON') {
157
+ const json = await response.json();
158
+ const rows = Array.isArray(json)
159
+ ? json
160
+ : [{ data: json }];
161
+ const page = rows.slice(offset, offset + limit);
162
+ const fields = page.length > 0
163
+ ? Object.keys(page[0]).map(key => ({ id: key, type: 'text' }))
164
+ : [];
165
+ return { fields, records: page, total: rows.length, source: 'file' };
166
+ }
167
+ // CSV / TSV
168
+ const buffer = await response.arrayBuffer();
169
+ const contentType = response.headers.get('content-type') ?? '';
170
+ const text = decodeText(buffer, contentType);
171
+ const separator = normalizedFormat === 'TSV' ? '\t' : ',';
172
+ const lines = text.split(/\r?\n/).filter(l => l.trim() !== '');
173
+ if (lines.length === 0) {
174
+ return { fields: [], records: [], total: 0, source: 'file' };
175
+ }
176
+ const headers = parseDelimitedLine(lines[0], separator);
177
+ const dataLines = lines.slice(1);
178
+ const page = dataLines.slice(offset, offset + limit);
179
+ const records = page.map(line => {
180
+ const values = parseDelimitedLine(line, separator);
181
+ return Object.fromEntries(headers.map((h, i) => [h, values[i] ?? '']));
182
+ });
183
+ const fields = headers.map(h => ({ id: h, type: 'text' }));
184
+ return { fields, records, total: dataLines.length, source: 'file' };
185
+ }
186
+ function parseDelimitedLine(line, separator) {
187
+ const result = [];
188
+ let current = '';
189
+ let inQuotes = false;
190
+ for (let i = 0; i < line.length; i++) {
191
+ const char = line[i];
192
+ if (char === '"') {
193
+ if (inQuotes && line[i + 1] === '"') {
194
+ current += '"';
195
+ i++;
196
+ }
197
+ else {
198
+ inQuotes = !inQuotes;
199
+ }
200
+ }
201
+ else if (char === separator && !inQuotes) {
202
+ result.push(current);
203
+ current = '';
204
+ }
205
+ else {
206
+ current += char;
207
+ }
208
+ }
209
+ result.push(current);
210
+ return result;
211
+ }
@@ -1,11 +1,18 @@
1
+ import { createRequire } from 'node:module';
1
2
  import { McpServer } from '@modelcontextprotocol/server';
2
3
  import { registerSearchTool } from './tools/search.js';
3
4
  import { registerDatasetTool } from './tools/dataset.js';
4
5
  import { registerResourceTool } from './tools/resource.js';
6
+ import { registerOrganizationsTool } from './tools/organizations.js';
7
+ import { registerSchemaTool } from './tools/schema.js';
8
+ const _require = createRequire(import.meta.url);
9
+ const { version } = _require('../package.json');
5
10
  export const server = new McpServer({
6
11
  name: 'datos-gob-cl',
7
- version: '1.0.0',
12
+ version,
8
13
  });
9
14
  registerSearchTool(server);
10
15
  registerDatasetTool(server);
11
16
  registerResourceTool(server);
17
+ registerOrganizationsTool(server);
18
+ registerSchemaTool(server);
@@ -1,3 +1,4 @@
1
+ #!/usr/bin/env node
1
2
  import { StdioServerTransport } from '@modelcontextprotocol/server';
2
3
  import { server } from './server.js';
3
4
  const transport = new StdioServerTransport();
@@ -0,0 +1,31 @@
1
+ import { z } from 'zod';
2
+ import { listOrganizations } from '../ckan.js';
3
+ export function registerOrganizationsTool(server) {
4
+ server.registerTool('list_organizations', {
5
+ title: 'List Organizations',
6
+ description: 'List all government institutions (organizations) that publish datasets on datos.gob.cl. Use this to discover which institutions have data before searching.',
7
+ inputSchema: z.object({}),
8
+ }, async () => {
9
+ try {
10
+ const orgs = await listOrganizations();
11
+ const formatted = orgs
12
+ .filter(o => o.package_count > 0)
13
+ .sort((a, b) => b.package_count - a.package_count)
14
+ .map(o => ({
15
+ id: o.name,
16
+ title: o.title,
17
+ description: o.description?.slice(0, 150) ?? '',
18
+ dataset_count: o.package_count,
19
+ }));
20
+ return {
21
+ content: [{ type: 'text', text: JSON.stringify(formatted, null, 2) }],
22
+ };
23
+ }
24
+ catch (error) {
25
+ return {
26
+ content: [{ type: 'text', text: `Error: ${error instanceof Error ? error.message : String(error)}` }],
27
+ isError: true,
28
+ };
29
+ }
30
+ });
31
+ }
@@ -1,5 +1,5 @@
1
1
  import { z } from 'zod';
2
- import { getResourceData, getResource, fetchAndParseFile } from '../ckan.js';
2
+ import { getResourceData, getResource, fetchAndParseFile, NotParseableError, CkanHttpError, CkanApiError } from '../ckan.js';
3
3
  export function registerResourceTool(server) {
4
4
  server.registerTool('get_resource_data', {
5
5
  title: 'Get Resource Data',
@@ -30,11 +30,10 @@ export function registerResourceTool(server) {
30
30
  };
31
31
  }
32
32
  catch (datastoreError) {
33
- const dsMessage = datastoreError instanceof Error ? datastoreError.message : String(datastoreError);
34
- const isNoDatastore = dsMessage.toLowerCase().includes('datastore') ||
35
- dsMessage.includes('404') ||
36
- dsMessage.toUpperCase().includes('NOT FOUND');
33
+ const isNoDatastore = (datastoreError instanceof CkanHttpError && datastoreError.statusCode === 404) ||
34
+ (datastoreError instanceof CkanApiError && datastoreError.errorType.toLowerCase().includes('not found'));
37
35
  if (!isNoDatastore) {
36
+ const dsMessage = datastoreError instanceof Error ? datastoreError.message : String(datastoreError);
38
37
  return {
39
38
  content: [{ type: 'text', text: `Error: ${dsMessage}` }],
40
39
  isError: true,
@@ -62,23 +61,22 @@ export function registerResourceTool(server) {
62
61
  };
63
62
  }
64
63
  catch (fileError) {
65
- const fileMessage = fileError instanceof Error ? fileError.message : String(fileError);
66
64
  // Format not parseable — return the URL so the AI can guide the user
67
- if (fileMessage.startsWith('FORMAT_NOT_PARSEABLE:')) {
68
- const [, fmt, url] = fileMessage.split(':');
65
+ if (fileError instanceof NotParseableError) {
69
66
  return {
70
67
  content: [{
71
68
  type: 'text',
72
69
  text: JSON.stringify({
73
70
  source: 'file',
74
71
  parseable: false,
75
- format: fmt,
76
- url,
77
- message: `This resource is a ${fmt} file and cannot be parsed automatically. Download it directly from the URL above.`,
72
+ format: fileError.format,
73
+ url: fileError.url,
74
+ message: `This resource is a ${fileError.format} file and cannot be parsed automatically. Download it directly from the URL above.`,
78
75
  }, null, 2),
79
76
  }],
80
77
  };
81
78
  }
79
+ const fileMessage = fileError instanceof Error ? fileError.message : String(fileError);
82
80
  return {
83
81
  content: [{ type: 'text', text: `Error reading file: ${fileMessage}` }],
84
82
  isError: true,
@@ -0,0 +1,24 @@
1
+ import { z } from 'zod';
2
+ import { getResourceSchema } from '../ckan.js';
3
+ export function registerSchemaTool(server) {
4
+ server.registerTool('get_resource_schema', {
5
+ title: 'Get Resource Schema',
6
+ description: 'Get the column schema (field names, types, and descriptions) for a CKAN datastore resource. Only works for resources that have datastore enabled (datastore_available: true from get_dataset). Use this before reading data to understand the structure.',
7
+ inputSchema: z.object({
8
+ resource_id: z.string().describe('UUID of the resource (from get_dataset)'),
9
+ }),
10
+ }, async ({ resource_id }) => {
11
+ try {
12
+ const fields = await getResourceSchema(resource_id);
13
+ return {
14
+ content: [{ type: 'text', text: JSON.stringify(fields, null, 2) }],
15
+ };
16
+ }
17
+ catch (error) {
18
+ return {
19
+ content: [{ type: 'text', text: `Error: ${error instanceof Error ? error.message : String(error)}` }],
20
+ isError: true,
21
+ };
22
+ }
23
+ });
24
+ }
@@ -11,7 +11,7 @@ export function registerSearchTool(server) {
11
11
  }, async ({ query, limit }) => {
12
12
  try {
13
13
  const datasets = await searchDatasets(query, limit ?? 10);
14
- const formatted = datasets.map(d => ({
14
+ const formatted = datasets.results.map(d => ({
15
15
  id: d.name,
16
16
  title: d.title,
17
17
  description: d.notes?.slice(0, 200) ?? '',
@@ -19,7 +19,7 @@ export function registerSearchTool(server) {
19
19
  resource_count: d.num_resources,
20
20
  }));
21
21
  return {
22
- content: [{ type: 'text', text: JSON.stringify(formatted, null, 2) }],
22
+ content: [{ type: 'text', text: JSON.stringify({ total: datasets.total, returned: formatted.length, results: formatted }, null, 2) }],
23
23
  };
24
24
  }
25
25
  catch (error) {
@@ -0,0 +1,6 @@
1
+ import { defineConfig } from 'vitest/config';
2
+ export default defineConfig({
3
+ test: {
4
+ environment: 'node',
5
+ },
6
+ });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mcp-chilegob-dataset",
3
- "version": "0.2.1",
3
+ "version": "0.3.1",
4
4
  "description": "MCP server exposing Chile's open government dataset portal (datos.gob.cl / CKAN API v3)",
5
5
  "type": "module",
6
6
  "license": "MIT",
@@ -34,10 +34,10 @@
34
34
  "build": "tsc",
35
35
  "start": "node dist/index.js",
36
36
  "typecheck": "tsc --noEmit",
37
- "prepublishOnly": "tsc"
37
+ "prepublishOnly": "tsc",
38
+ "test": "vitest run"
38
39
  },
39
40
  "dependencies": {
40
- "@cfworker/json-schema": "^4.1.1",
41
41
  "@hono/node-server": "^1.19.13",
42
42
  "@modelcontextprotocol/hono": "^2.0.0-alpha.2",
43
43
  "@modelcontextprotocol/server": "^2.0.0-alpha.2",
@@ -47,6 +47,7 @@
47
47
  "devDependencies": {
48
48
  "@types/node": "^20.11.17",
49
49
  "tsx": "^4.7.1",
50
- "typescript": "^5.8.3"
50
+ "typescript": "^5.8.3",
51
+ "vitest": "^4.1.4"
51
52
  }
52
53
  }
@@ -0,0 +1,122 @@
1
+ import { describe, it, expect, vi, afterEach } from 'vitest'
2
+ import {
3
+ NotParseableError,
4
+ CkanHttpError,
5
+ CkanApiError,
6
+ fetchAndParseFile,
7
+ } from '../ckan.ts'
8
+
9
+ afterEach(() => {
10
+ vi.restoreAllMocks()
11
+ })
12
+
13
+ describe('Error classes', () => {
14
+ it('NotParseableError sets format and url', () => {
15
+ const err = new NotParseableError('XLS', 'https://example.com/file.xls')
16
+ expect(err.format).toBe('XLS')
17
+ expect(err.url).toBe('https://example.com/file.xls')
18
+ expect(err.name).toBe('NotParseableError')
19
+ expect(err).toBeInstanceOf(Error)
20
+ })
21
+
22
+ it('CkanHttpError sets statusCode and statusText', () => {
23
+ const err = new CkanHttpError(404, 'Not Found')
24
+ expect(err.statusCode).toBe(404)
25
+ expect(err.statusText).toBe('Not Found')
26
+ expect(err.name).toBe('CkanHttpError')
27
+ })
28
+
29
+ it('CkanApiError sets errorType', () => {
30
+ const err = new CkanApiError('something failed', 'Validation Error')
31
+ expect(err.errorType).toBe('Validation Error')
32
+ expect(err.name).toBe('CkanApiError')
33
+ })
34
+ })
35
+
36
+ describe('fetchAndParseFile', () => {
37
+ it('throws NotParseableError for XLS format', async () => {
38
+ await expect(fetchAndParseFile('https://x.com/f.xls', 'XLS', 10, 0))
39
+ .rejects.toBeInstanceOf(NotParseableError)
40
+ })
41
+
42
+ it('throws NotParseableError for PDF format', async () => {
43
+ await expect(fetchAndParseFile('https://x.com/f.pdf', 'PDF', 10, 0))
44
+ .rejects.toBeInstanceOf(NotParseableError)
45
+ })
46
+
47
+ it('parses CSV and returns correct fields and records', async () => {
48
+ const csvContent = 'Region,Provincia,Comuna\nCOQUIMBO,ELQUI,LA SERENA\nATACAMA,COPIAPO,COPIAPO\n'
49
+ vi.spyOn(global, 'fetch').mockResolvedValueOnce(
50
+ new Response(csvContent, {
51
+ status: 200,
52
+ headers: { 'content-type': 'text/csv; charset=utf-8' },
53
+ })
54
+ )
55
+
56
+ const result = await fetchAndParseFile('https://x.com/data.csv', 'CSV', 10, 0)
57
+ expect(result.fields).toEqual([
58
+ { id: 'Region', type: 'text' },
59
+ { id: 'Provincia', type: 'text' },
60
+ { id: 'Comuna', type: 'text' },
61
+ ])
62
+ expect(result.records).toHaveLength(2)
63
+ expect(result.records[0]).toEqual({ Region: 'COQUIMBO', Provincia: 'ELQUI', Comuna: 'LA SERENA' })
64
+ expect(result.total).toBe(2)
65
+ expect(result.source).toBe('file')
66
+ })
67
+
68
+ it('respects limit and offset', async () => {
69
+ const lines = ['A,B', '1,2', '3,4', '5,6', '7,8'].join('\n')
70
+ vi.spyOn(global, 'fetch').mockResolvedValueOnce(
71
+ new Response(lines, { status: 200, headers: { 'content-type': 'text/csv' } })
72
+ )
73
+
74
+ const result = await fetchAndParseFile('https://x.com/data.csv', 'CSV', 2, 1)
75
+ expect(result.records).toHaveLength(2)
76
+ expect(result.records[0]).toEqual({ A: '3', B: '4' })
77
+ expect(result.total).toBe(4)
78
+ })
79
+
80
+ it('parses TSV and splits on tab', async () => {
81
+ const tsv = 'A\tB\n1\t2\n3\t4\n'
82
+ vi.spyOn(global, 'fetch').mockResolvedValueOnce(
83
+ new Response(tsv, { status: 200, headers: { 'content-type': 'text/tab-separated-values' } })
84
+ )
85
+ const result = await fetchAndParseFile('https://x.com/data.tsv', 'TSV', 10, 0)
86
+ expect(result.records[0]).toEqual({ A: '1', B: '2' })
87
+ expect(result.total).toBe(2)
88
+ expect(result.source).toBe('file')
89
+ })
90
+
91
+ it('parses JSON array and returns records', async () => {
92
+ const json = JSON.stringify([{ name: 'Chile', code: 'CL' }, { name: 'Peru', code: 'PE' }])
93
+ vi.spyOn(global, 'fetch').mockResolvedValueOnce(
94
+ new Response(json, { status: 200, headers: { 'content-type': 'application/json' } })
95
+ )
96
+ const result = await fetchAndParseFile('https://x.com/data.json', 'JSON', 10, 0)
97
+ expect(result.total).toBe(2)
98
+ expect(result.records[0]).toEqual({ name: 'Chile', code: 'CL' })
99
+ expect(result.fields).toEqual([{ id: 'name', type: 'text' }, { id: 'code', type: 'text' }])
100
+ expect(result.source).toBe('file')
101
+ })
102
+
103
+ it('returns empty result for empty CSV body', async () => {
104
+ vi.spyOn(global, 'fetch').mockResolvedValueOnce(
105
+ new Response('\n\n \n', { status: 200, headers: { 'content-type': 'text/csv' } })
106
+ )
107
+ const result = await fetchAndParseFile('https://x.com/empty.csv', 'CSV', 10, 0)
108
+ expect(result.fields).toEqual([])
109
+ expect(result.records).toEqual([])
110
+ expect(result.total).toBe(0)
111
+ })
112
+
113
+ it('throws CkanHttpError with correct status on non-ok response', async () => {
114
+ vi.spyOn(global, 'fetch').mockResolvedValueOnce(
115
+ new Response('Not Found', { status: 404, statusText: 'Not Found' })
116
+ )
117
+ const err = await fetchAndParseFile('https://x.com/data.csv', 'CSV', 10, 0).catch(e => e)
118
+ expect(err).toBeInstanceOf(CkanHttpError)
119
+ expect(err.statusCode).toBe(404)
120
+ expect(err.statusText).toBe('Not Found')
121
+ })
122
+ })
package/src/ckan.ts CHANGED
@@ -1,4 +1,54 @@
1
1
  const CKAN_BASE = 'https://datos.gob.cl/api/3/action'
2
+ const FETCH_TIMEOUT_MS = 10_000
3
+ const CACHE_TTL_MS = 5 * 60 * 1000
4
+
5
+ class TTLCache<V> {
6
+ private readonly store = new Map<string, { value: V; expiresAt: number }>()
7
+
8
+ get(key: string): V | undefined {
9
+ const entry = this.store.get(key)
10
+ if (entry === undefined) return undefined
11
+ if (Date.now() > entry.expiresAt) {
12
+ this.store.delete(key)
13
+ return undefined
14
+ }
15
+ return entry.value
16
+ }
17
+
18
+ set(key: string, value: V, ttlMs: number): void {
19
+ this.store.set(key, { value, expiresAt: Date.now() + ttlMs })
20
+ }
21
+ }
22
+
23
+ export class NotParseableError extends Error {
24
+ constructor(
25
+ public readonly format: string,
26
+ public readonly url: string,
27
+ ) {
28
+ super(`Format not parseable: ${format} (${url})`)
29
+ this.name = 'NotParseableError'
30
+ }
31
+ }
32
+
33
+ export class CkanHttpError extends Error {
34
+ constructor(
35
+ public readonly statusCode: number,
36
+ public readonly statusText: string,
37
+ ) {
38
+ super(`CKAN HTTP error: ${statusCode} ${statusText}`)
39
+ this.name = 'CkanHttpError'
40
+ }
41
+ }
42
+
43
+ export class CkanApiError extends Error {
44
+ constructor(
45
+ message: string,
46
+ public readonly errorType: string,
47
+ ) {
48
+ super(`CKAN API error: ${message}`)
49
+ this.name = 'CkanApiError'
50
+ }
51
+ }
2
52
 
3
53
  export interface CkanDataset {
4
54
  id: string
@@ -43,26 +93,64 @@ async function ckanAction<T>(action: string, params: Record<string, unknown>): P
43
93
  url.searchParams.set(key, String(value))
44
94
  }
45
95
 
46
- const response = await fetch(url.toString())
47
- if (!response.ok) {
48
- throw new Error(`CKAN API error: ${response.status} ${response.statusText}`)
49
- }
96
+ const controller = new AbortController()
97
+ const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS)
98
+ try {
99
+ const response = await fetch(url.toString(), { signal: controller.signal })
100
+ if (!response.ok) {
101
+ throw new CkanHttpError(response.status, response.statusText)
102
+ }
50
103
 
51
- const data = await response.json() as { success: boolean; result: T; error?: { message: string } }
52
- if (!data.success) {
53
- throw new Error(`CKAN error: ${data.error?.message ?? 'Unknown error'}`)
104
+ const data = await response.json() as { success: boolean; result: T; error?: { __type: string; message?: string } }
105
+ if (!data.success) {
106
+ const errorType = data.error?.__type ?? 'Unknown Error'
107
+ const message = data.error?.message ?? 'Unknown error'
108
+ throw new CkanApiError(message, errorType)
109
+ }
110
+
111
+ return data.result
112
+ } catch (err) {
113
+ if (err instanceof Error && err.name === 'AbortError') {
114
+ throw new Error(`Request timed out after ${FETCH_TIMEOUT_MS / 1000}s`)
115
+ }
116
+ throw err
117
+ } finally {
118
+ clearTimeout(timer)
54
119
  }
120
+ }
55
121
 
56
- return data.result
122
+ export interface CkanOrganization {
123
+ name: string
124
+ title: string
125
+ description: string | null
126
+ package_count: number
57
127
  }
58
128
 
59
- export async function searchDatasets(query: string, limit: number = 10): Promise<CkanDataset[]> {
60
- const result = await ckanAction<{ results: CkanDataset[] }>('package_search', { q: query, rows: limit })
61
- return result.results
129
+ export interface CkanSearchResult {
130
+ total: number
131
+ results: CkanDataset[]
132
+ }
133
+
134
+ const searchCache = new TTLCache<CkanSearchResult>()
135
+ const datasetCache = new TTLCache<CkanDataset>()
136
+
137
+ export async function searchDatasets(query: string, limit: number = 10): Promise<CkanSearchResult> {
138
+ const key = `search:${query}:${limit}`
139
+ const cached = searchCache.get(key)
140
+ if (cached !== undefined) return cached
141
+ const result = await ckanAction<{ count: number; results: CkanDataset[] }>('package_search', { q: query, rows: limit })
142
+ const value: CkanSearchResult = { total: result.count, results: result.results }
143
+ searchCache.set(key, value, CACHE_TTL_MS)
144
+ return value
62
145
  }
63
146
 
64
147
  export async function getDataset(id: string): Promise<CkanDataset> {
65
- return ckanAction<CkanDataset>('package_show', { id })
148
+ const key = `dataset:${id}`
149
+ const cached = datasetCache.get(key)
150
+ if (cached !== undefined) return cached
151
+ const dataset = await ckanAction<CkanDataset>('package_show', { id })
152
+ datasetCache.set(key, dataset, CACHE_TTL_MS)
153
+ return dataset
66
154
  }
67
155
 
68
156
  export async function getResourceData(
@@ -81,8 +169,47 @@ export async function getResource(resourceId: string): Promise<CkanResourceDetai
81
169
  return ckanAction<CkanResourceDetail>('resource_show', { id: resourceId })
82
170
  }
83
171
 
172
+ export async function listOrganizations(): Promise<CkanOrganization[]> {
173
+ return ckanAction<CkanOrganization[]>('organization_list', { all_fields: true })
174
+ }
175
+
176
+ export interface CkanFieldSchema {
177
+ id: string
178
+ type: string
179
+ label: string | null
180
+ description: string | null
181
+ }
182
+
183
+ interface RawField {
184
+ id: string
185
+ type: string
186
+ info?: { label?: string; notes?: string }
187
+ }
188
+
189
+ export async function getResourceSchema(resourceId: string): Promise<CkanFieldSchema[]> {
190
+ const result = await ckanAction<{ id: string; fields: RawField[] }>('datastore_info', { id: resourceId })
191
+ return result.fields
192
+ .filter(field => field.id !== '_id')
193
+ .map(field => ({
194
+ id: field.id,
195
+ type: field.type,
196
+ label: field.info?.label ?? null,
197
+ description: field.info?.notes ?? null,
198
+ }))
199
+ }
200
+
84
201
  const PARSEABLE_FORMATS = new Set(['CSV', 'TSV', 'JSON'])
85
202
 
203
+ function decodeText(buffer: ArrayBuffer, contentType: string): string {
204
+ const charsetMatch = /charset=([^\s;]+)/i.exec(contentType)
205
+ const charset = (charsetMatch?.[1] ?? 'utf-8').replace(/^"|"$/g, '')
206
+ const text = new TextDecoder(charset).decode(buffer)
207
+ if (text.includes('\uFFFD') && charset.toLowerCase() === 'utf-8') {
208
+ return new TextDecoder('iso-8859-1').decode(buffer)
209
+ }
210
+ return text
211
+ }
212
+
86
213
  export async function fetchAndParseFile(
87
214
  url: string,
88
215
  format: string,
@@ -92,14 +219,24 @@ export async function fetchAndParseFile(
92
219
  const normalizedFormat = format.toUpperCase().trim()
93
220
 
94
221
  if (!PARSEABLE_FORMATS.has(normalizedFormat)) {
95
- throw new Error(
96
- `FORMAT_NOT_PARSEABLE:${normalizedFormat}:${url}`
97
- )
222
+ throw new NotParseableError(normalizedFormat, url)
98
223
  }
99
224
 
100
- const response = await fetch(url)
225
+ const controller = new AbortController()
226
+ const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS)
227
+ let response: Response
228
+ try {
229
+ response = await fetch(url, { signal: controller.signal })
230
+ } catch (err) {
231
+ if (err instanceof Error && err.name === 'AbortError') {
232
+ throw new Error(`Request timed out after ${FETCH_TIMEOUT_MS / 1000}s`)
233
+ }
234
+ throw err
235
+ } finally {
236
+ clearTimeout(timer)
237
+ }
101
238
  if (!response.ok) {
102
- throw new Error(`Failed to fetch file: ${response.status} ${response.statusText}`)
239
+ throw new CkanHttpError(response.status, response.statusText)
103
240
  }
104
241
 
105
242
  if (normalizedFormat === 'JSON') {
@@ -117,7 +254,9 @@ export async function fetchAndParseFile(
117
254
  }
118
255
 
119
256
  // CSV / TSV
120
- const text = await response.text()
257
+ const buffer = await response.arrayBuffer()
258
+ const contentType = response.headers.get('content-type') ?? ''
259
+ const text = decodeText(buffer, contentType)
121
260
  const separator = normalizedFormat === 'TSV' ? '\t' : ','
122
261
  const lines = text.split(/\r?\n/).filter(l => l.trim() !== '')
123
262
 
package/src/server.ts CHANGED
@@ -1,13 +1,21 @@
1
+ import { createRequire } from 'node:module'
1
2
  import { McpServer } from '@modelcontextprotocol/server'
2
3
  import { registerSearchTool } from './tools/search.js'
3
4
  import { registerDatasetTool } from './tools/dataset.js'
4
5
  import { registerResourceTool } from './tools/resource.js'
6
+ import { registerOrganizationsTool } from './tools/organizations.js'
7
+ import { registerSchemaTool } from './tools/schema.js'
8
+
9
+ const _require = createRequire(import.meta.url)
10
+ const { version } = _require('../package.json') as { version: string }
5
11
 
6
12
  export const server = new McpServer({
7
13
  name: 'datos-gob-cl',
8
- version: '1.0.0',
14
+ version,
9
15
  })
10
16
 
11
17
  registerSearchTool(server)
12
18
  registerDatasetTool(server)
13
19
  registerResourceTool(server)
20
+ registerOrganizationsTool(server)
21
+ registerSchemaTool(server)
package/src/stdio.ts CHANGED
@@ -1,3 +1,4 @@
1
+ #!/usr/bin/env node
1
2
  import { StdioServerTransport } from '@modelcontextprotocol/server'
2
3
  import { server } from './server.js'
3
4
 
@@ -0,0 +1,36 @@
1
+ import type { McpServer } from '@modelcontextprotocol/server'
2
+ import { z } from 'zod'
3
+ import { listOrganizations } from '../ckan.js'
4
+
5
+ export function registerOrganizationsTool(server: McpServer): void {
6
+ server.registerTool(
7
+ 'list_organizations',
8
+ {
9
+ title: 'List Organizations',
10
+ description: 'List all government institutions (organizations) that publish datasets on datos.gob.cl. Use this to discover which institutions have data before searching.',
11
+ inputSchema: z.object({}),
12
+ },
13
+ async () => {
14
+ try {
15
+ const orgs = await listOrganizations()
16
+ const formatted = orgs
17
+ .filter(o => o.package_count > 0)
18
+ .sort((a, b) => b.package_count - a.package_count)
19
+ .map(o => ({
20
+ id: o.name,
21
+ title: o.title,
22
+ description: o.description?.slice(0, 150) ?? '',
23
+ dataset_count: o.package_count,
24
+ }))
25
+ return {
26
+ content: [{ type: 'text', text: JSON.stringify(formatted, null, 2) }],
27
+ }
28
+ } catch (error) {
29
+ return {
30
+ content: [{ type: 'text', text: `Error: ${error instanceof Error ? error.message : String(error)}` }],
31
+ isError: true,
32
+ }
33
+ }
34
+ }
35
+ )
36
+ }
@@ -1,6 +1,6 @@
1
1
  import type { McpServer } from '@modelcontextprotocol/server'
2
2
  import { z } from 'zod'
3
- import { getResourceData, getResource, fetchAndParseFile } from '../ckan.js'
3
+ import { getResourceData, getResource, fetchAndParseFile, NotParseableError, CkanHttpError, CkanApiError } from '../ckan.js'
4
4
 
5
5
  export function registerResourceTool(server: McpServer): void {
6
6
  server.registerTool(
@@ -36,13 +36,12 @@ export function registerResourceTool(server: McpServer): void {
36
36
  }],
37
37
  }
38
38
  } catch (datastoreError) {
39
- const dsMessage = datastoreError instanceof Error ? datastoreError.message : String(datastoreError)
40
39
  const isNoDatastore =
41
- dsMessage.toLowerCase().includes('datastore') ||
42
- dsMessage.includes('404') ||
43
- dsMessage.toUpperCase().includes('NOT FOUND')
40
+ (datastoreError instanceof CkanHttpError && datastoreError.statusCode === 404) ||
41
+ (datastoreError instanceof CkanApiError && datastoreError.errorType.toLowerCase().includes('not found'))
44
42
 
45
43
  if (!isNoDatastore) {
44
+ const dsMessage = datastoreError instanceof Error ? datastoreError.message : String(datastoreError)
46
45
  return {
47
46
  content: [{ type: 'text', text: `Error: ${dsMessage}` }],
48
47
  isError: true,
@@ -71,25 +70,24 @@ export function registerResourceTool(server: McpServer): void {
71
70
  }],
72
71
  }
73
72
  } catch (fileError) {
74
- const fileMessage = fileError instanceof Error ? fileError.message : String(fileError)
75
-
76
73
  // Format not parseable — return the URL so the AI can guide the user
77
- if (fileMessage.startsWith('FORMAT_NOT_PARSEABLE:')) {
78
- const [, fmt, url] = fileMessage.split(':')
74
+ if (fileError instanceof NotParseableError) {
79
75
  return {
80
76
  content: [{
81
77
  type: 'text',
82
78
  text: JSON.stringify({
83
79
  source: 'file',
84
80
  parseable: false,
85
- format: fmt,
86
- url,
87
- message: `This resource is a ${fmt} file and cannot be parsed automatically. Download it directly from the URL above.`,
81
+ format: fileError.format,
82
+ url: fileError.url,
83
+ message: `This resource is a ${fileError.format} file and cannot be parsed automatically. Download it directly from the URL above.`,
88
84
  }, null, 2),
89
85
  }],
90
86
  }
91
87
  }
92
88
 
89
+ const fileMessage = fileError instanceof Error ? fileError.message : String(fileError)
90
+
93
91
  return {
94
92
  content: [{ type: 'text', text: `Error reading file: ${fileMessage}` }],
95
93
  isError: true,
@@ -0,0 +1,30 @@
1
+ import type { McpServer } from '@modelcontextprotocol/server'
2
+ import { z } from 'zod'
3
+ import { getResourceSchema } from '../ckan.js'
4
+
5
+ export function registerSchemaTool(server: McpServer): void {
6
+ server.registerTool(
7
+ 'get_resource_schema',
8
+ {
9
+ title: 'Get Resource Schema',
10
+ description:
11
+ 'Get the column schema (field names, types, and descriptions) for a CKAN datastore resource. Only works for resources that have datastore enabled (datastore_available: true from get_dataset). Use this before reading data to understand the structure.',
12
+ inputSchema: z.object({
13
+ resource_id: z.string().describe('UUID of the resource (from get_dataset)'),
14
+ }),
15
+ },
16
+ async ({ resource_id }) => {
17
+ try {
18
+ const fields = await getResourceSchema(resource_id)
19
+ return {
20
+ content: [{ type: 'text', text: JSON.stringify(fields, null, 2) }],
21
+ }
22
+ } catch (error) {
23
+ return {
24
+ content: [{ type: 'text', text: `Error: ${error instanceof Error ? error.message : String(error)}` }],
25
+ isError: true,
26
+ }
27
+ }
28
+ }
29
+ )
30
+ }
@@ -16,7 +16,7 @@ export function registerSearchTool(server: McpServer): void {
16
16
  async ({ query, limit }) => {
17
17
  try {
18
18
  const datasets = await searchDatasets(query, limit ?? 10)
19
- const formatted = datasets.map(d => ({
19
+ const formatted = datasets.results.map(d => ({
20
20
  id: d.name,
21
21
  title: d.title,
22
22
  description: d.notes?.slice(0, 200) ?? '',
@@ -24,7 +24,7 @@ export function registerSearchTool(server: McpServer): void {
24
24
  resource_count: d.num_resources,
25
25
  }))
26
26
  return {
27
- content: [{ type: 'text', text: JSON.stringify(formatted, null, 2) }],
27
+ content: [{ type: 'text', text: JSON.stringify({ total: datasets.total, returned: formatted.length, results: formatted }, null, 2) }],
28
28
  }
29
29
  } catch (error) {
30
30
  return {
package/dist/ckan.js DELETED
@@ -1,97 +0,0 @@
1
- const CKAN_BASE = 'https://datos.gob.cl/api/3/action';
2
- async function ckanAction(action, params) {
3
- const url = new URL(`${CKAN_BASE}/${action}`);
4
- for (const [key, value] of Object.entries(params)) {
5
- url.searchParams.set(key, String(value));
6
- }
7
- const response = await fetch(url.toString());
8
- if (!response.ok) {
9
- throw new Error(`CKAN API error: ${response.status} ${response.statusText}`);
10
- }
11
- const data = await response.json();
12
- if (!data.success) {
13
- throw new Error(`CKAN error: ${data.error?.message ?? 'Unknown error'}`);
14
- }
15
- return data.result;
16
- }
17
- export async function searchDatasets(query, limit = 10) {
18
- const result = await ckanAction('package_search', { q: query, rows: limit });
19
- return result.results;
20
- }
21
- export async function getDataset(id) {
22
- return ckanAction('package_show', { id });
23
- }
24
- export async function getResourceData(resourceId, limit = 50, offset = 0) {
25
- return ckanAction('datastore_search', {
26
- resource_id: resourceId,
27
- limit,
28
- offset,
29
- });
30
- }
31
- export async function getResource(resourceId) {
32
- return ckanAction('resource_show', { id: resourceId });
33
- }
34
- const PARSEABLE_FORMATS = new Set(['CSV', 'TSV', 'JSON']);
35
- export async function fetchAndParseFile(url, format, limit, offset) {
36
- const normalizedFormat = format.toUpperCase().trim();
37
- if (!PARSEABLE_FORMATS.has(normalizedFormat)) {
38
- throw new Error(`FORMAT_NOT_PARSEABLE:${normalizedFormat}:${url}`);
39
- }
40
- const response = await fetch(url);
41
- if (!response.ok) {
42
- throw new Error(`Failed to fetch file: ${response.status} ${response.statusText}`);
43
- }
44
- if (normalizedFormat === 'JSON') {
45
- const json = await response.json();
46
- const rows = Array.isArray(json)
47
- ? json
48
- : [{ data: json }];
49
- const page = rows.slice(offset, offset + limit);
50
- const fields = page.length > 0
51
- ? Object.keys(page[0]).map(key => ({ id: key, type: 'text' }))
52
- : [];
53
- return { fields, records: page, total: rows.length, source: 'file' };
54
- }
55
- // CSV / TSV
56
- const text = await response.text();
57
- const separator = normalizedFormat === 'TSV' ? '\t' : ',';
58
- const lines = text.split(/\r?\n/).filter(l => l.trim() !== '');
59
- if (lines.length === 0) {
60
- return { fields: [], records: [], total: 0, source: 'file' };
61
- }
62
- const headers = parseDelimitedLine(lines[0], separator);
63
- const dataLines = lines.slice(1);
64
- const page = dataLines.slice(offset, offset + limit);
65
- const records = page.map(line => {
66
- const values = parseDelimitedLine(line, separator);
67
- return Object.fromEntries(headers.map((h, i) => [h, values[i] ?? '']));
68
- });
69
- const fields = headers.map(h => ({ id: h, type: 'text' }));
70
- return { fields, records, total: dataLines.length, source: 'file' };
71
- }
72
- function parseDelimitedLine(line, separator) {
73
- const result = [];
74
- let current = '';
75
- let inQuotes = false;
76
- for (let i = 0; i < line.length; i++) {
77
- const char = line[i];
78
- if (char === '"') {
79
- if (inQuotes && line[i + 1] === '"') {
80
- current += '"';
81
- i++;
82
- }
83
- else {
84
- inQuotes = !inQuotes;
85
- }
86
- }
87
- else if (char === separator && !inQuotes) {
88
- result.push(current);
89
- current = '';
90
- }
91
- else {
92
- current += char;
93
- }
94
- }
95
- result.push(current);
96
- return result;
97
- }
File without changes
File without changes