mcp-chilegob-dataset 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -1
- package/dist/ckan.js +30 -4
- package/dist/server.js +4 -0
- package/dist/tools/organizations.js +31 -0
- package/dist/tools/schema.js +24 -0
- package/dist/tools/search.js +2 -2
- package/package.json +7 -4
- package/src/__tests__/ckan.test.ts +122 -0
- package/src/ckan.ts +61 -7
- package/src/server.ts +4 -0
- package/src/tools/organizations.ts +36 -0
- package/src/tools/schema.ts +30 -0
- package/src/tools/search.ts +2 -2
package/README.md
CHANGED
|
@@ -23,7 +23,17 @@ El **Model Context Protocol (MCP)** es un estándar abierto que permite a los as
|
|
|
23
23
|
|
|
24
24
|
## Instalación y uso rápido
|
|
25
25
|
|
|
26
|
-
### Con Claude
|
|
26
|
+
### Con Claude Code (un comando)
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
claude mcp add chilegob -- npx -y mcp-chilegob-dataset
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Eso es todo. El servidor queda registrado globalmente en tu Claude Code. Reinicia la sesión y ya puedes usarlo.
|
|
33
|
+
|
|
34
|
+
---
|
|
35
|
+
|
|
36
|
+
### Con Claude Desktop
|
|
27
37
|
|
|
28
38
|
**Paso 1** — Abre la configuración de Claude Desktop.
|
|
29
39
|
|
package/dist/ckan.js
CHANGED
|
@@ -83,8 +83,9 @@ export async function searchDatasets(query, limit = 10) {
|
|
|
83
83
|
if (cached !== undefined)
|
|
84
84
|
return cached;
|
|
85
85
|
const result = await ckanAction('package_search', { q: query, rows: limit });
|
|
86
|
-
|
|
87
|
-
|
|
86
|
+
const value = { total: result.count, results: result.results };
|
|
87
|
+
searchCache.set(key, value, CACHE_TTL_MS);
|
|
88
|
+
return value;
|
|
88
89
|
}
|
|
89
90
|
export async function getDataset(id) {
|
|
90
91
|
const key = `dataset:${id}`;
|
|
@@ -105,7 +106,30 @@ export async function getResourceData(resourceId, limit = 50, offset = 0) {
|
|
|
105
106
|
export async function getResource(resourceId) {
|
|
106
107
|
return ckanAction('resource_show', { id: resourceId });
|
|
107
108
|
}
|
|
109
|
+
export async function listOrganizations() {
|
|
110
|
+
return ckanAction('organization_list', { all_fields: true });
|
|
111
|
+
}
|
|
112
|
+
export async function getResourceSchema(resourceId) {
|
|
113
|
+
const result = await ckanAction('datastore_info', { id: resourceId });
|
|
114
|
+
return result.fields
|
|
115
|
+
.filter(field => field.id !== '_id')
|
|
116
|
+
.map(field => ({
|
|
117
|
+
id: field.id,
|
|
118
|
+
type: field.type,
|
|
119
|
+
label: field.info?.label ?? null,
|
|
120
|
+
description: field.info?.notes ?? null,
|
|
121
|
+
}));
|
|
122
|
+
}
|
|
108
123
|
const PARSEABLE_FORMATS = new Set(['CSV', 'TSV', 'JSON']);
|
|
124
|
+
function decodeText(buffer, contentType) {
|
|
125
|
+
const charsetMatch = /charset=([^\s;]+)/i.exec(contentType);
|
|
126
|
+
const charset = (charsetMatch?.[1] ?? 'utf-8').replace(/^"|"$/g, '');
|
|
127
|
+
const text = new TextDecoder(charset).decode(buffer);
|
|
128
|
+
if (text.includes('\uFFFD') && charset.toLowerCase() === 'utf-8') {
|
|
129
|
+
return new TextDecoder('iso-8859-1').decode(buffer);
|
|
130
|
+
}
|
|
131
|
+
return text;
|
|
132
|
+
}
|
|
109
133
|
export async function fetchAndParseFile(url, format, limit, offset) {
|
|
110
134
|
const normalizedFormat = format.toUpperCase().trim();
|
|
111
135
|
if (!PARSEABLE_FORMATS.has(normalizedFormat)) {
|
|
@@ -127,7 +151,7 @@ export async function fetchAndParseFile(url, format, limit, offset) {
|
|
|
127
151
|
clearTimeout(timer);
|
|
128
152
|
}
|
|
129
153
|
if (!response.ok) {
|
|
130
|
-
throw new
|
|
154
|
+
throw new CkanHttpError(response.status, response.statusText);
|
|
131
155
|
}
|
|
132
156
|
if (normalizedFormat === 'JSON') {
|
|
133
157
|
const json = await response.json();
|
|
@@ -141,7 +165,9 @@ export async function fetchAndParseFile(url, format, limit, offset) {
|
|
|
141
165
|
return { fields, records: page, total: rows.length, source: 'file' };
|
|
142
166
|
}
|
|
143
167
|
// CSV / TSV
|
|
144
|
-
const
|
|
168
|
+
const buffer = await response.arrayBuffer();
|
|
169
|
+
const contentType = response.headers.get('content-type') ?? '';
|
|
170
|
+
const text = decodeText(buffer, contentType);
|
|
145
171
|
const separator = normalizedFormat === 'TSV' ? '\t' : ',';
|
|
146
172
|
const lines = text.split(/\r?\n/).filter(l => l.trim() !== '');
|
|
147
173
|
if (lines.length === 0) {
|
package/dist/server.js
CHANGED
|
@@ -3,6 +3,8 @@ import { McpServer } from '@modelcontextprotocol/server';
|
|
|
3
3
|
import { registerSearchTool } from './tools/search.js';
|
|
4
4
|
import { registerDatasetTool } from './tools/dataset.js';
|
|
5
5
|
import { registerResourceTool } from './tools/resource.js';
|
|
6
|
+
import { registerOrganizationsTool } from './tools/organizations.js';
|
|
7
|
+
import { registerSchemaTool } from './tools/schema.js';
|
|
6
8
|
const _require = createRequire(import.meta.url);
|
|
7
9
|
const { version } = _require('../package.json');
|
|
8
10
|
export const server = new McpServer({
|
|
@@ -12,3 +14,5 @@ export const server = new McpServer({
|
|
|
12
14
|
registerSearchTool(server);
|
|
13
15
|
registerDatasetTool(server);
|
|
14
16
|
registerResourceTool(server);
|
|
17
|
+
registerOrganizationsTool(server);
|
|
18
|
+
registerSchemaTool(server);
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import { listOrganizations } from '../ckan.js';
|
|
3
|
+
export function registerOrganizationsTool(server) {
|
|
4
|
+
server.registerTool('list_organizations', {
|
|
5
|
+
title: 'List Organizations',
|
|
6
|
+
description: 'List all government institutions (organizations) that publish datasets on datos.gob.cl. Use this to discover which institutions have data before searching.',
|
|
7
|
+
inputSchema: z.object({}),
|
|
8
|
+
}, async () => {
|
|
9
|
+
try {
|
|
10
|
+
const orgs = await listOrganizations();
|
|
11
|
+
const formatted = orgs
|
|
12
|
+
.filter(o => o.package_count > 0)
|
|
13
|
+
.sort((a, b) => b.package_count - a.package_count)
|
|
14
|
+
.map(o => ({
|
|
15
|
+
id: o.name,
|
|
16
|
+
title: o.title,
|
|
17
|
+
description: o.description?.slice(0, 150) ?? '',
|
|
18
|
+
dataset_count: o.package_count,
|
|
19
|
+
}));
|
|
20
|
+
return {
|
|
21
|
+
content: [{ type: 'text', text: JSON.stringify(formatted, null, 2) }],
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
catch (error) {
|
|
25
|
+
return {
|
|
26
|
+
content: [{ type: 'text', text: `Error: ${error instanceof Error ? error.message : String(error)}` }],
|
|
27
|
+
isError: true,
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
});
|
|
31
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import { getResourceSchema } from '../ckan.js';
|
|
3
|
+
export function registerSchemaTool(server) {
|
|
4
|
+
server.registerTool('get_resource_schema', {
|
|
5
|
+
title: 'Get Resource Schema',
|
|
6
|
+
description: 'Get the column schema (field names, types, and descriptions) for a CKAN datastore resource. Only works for resources that have datastore enabled (datastore_available: true from get_dataset). Use this before reading data to understand the structure.',
|
|
7
|
+
inputSchema: z.object({
|
|
8
|
+
resource_id: z.string().describe('UUID of the resource (from get_dataset)'),
|
|
9
|
+
}),
|
|
10
|
+
}, async ({ resource_id }) => {
|
|
11
|
+
try {
|
|
12
|
+
const fields = await getResourceSchema(resource_id);
|
|
13
|
+
return {
|
|
14
|
+
content: [{ type: 'text', text: JSON.stringify(fields, null, 2) }],
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
catch (error) {
|
|
18
|
+
return {
|
|
19
|
+
content: [{ type: 'text', text: `Error: ${error instanceof Error ? error.message : String(error)}` }],
|
|
20
|
+
isError: true,
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
});
|
|
24
|
+
}
|
package/dist/tools/search.js
CHANGED
|
@@ -11,7 +11,7 @@ export function registerSearchTool(server) {
|
|
|
11
11
|
}, async ({ query, limit }) => {
|
|
12
12
|
try {
|
|
13
13
|
const datasets = await searchDatasets(query, limit ?? 10);
|
|
14
|
-
const formatted = datasets.map(d => ({
|
|
14
|
+
const formatted = datasets.results.map(d => ({
|
|
15
15
|
id: d.name,
|
|
16
16
|
title: d.title,
|
|
17
17
|
description: d.notes?.slice(0, 200) ?? '',
|
|
@@ -19,7 +19,7 @@ export function registerSearchTool(server) {
|
|
|
19
19
|
resource_count: d.num_resources,
|
|
20
20
|
}));
|
|
21
21
|
return {
|
|
22
|
-
content: [{ type: 'text', text: JSON.stringify(formatted, null, 2) }],
|
|
22
|
+
content: [{ type: 'text', text: JSON.stringify({ total: datasets.total, returned: formatted.length, results: formatted }, null, 2) }],
|
|
23
23
|
};
|
|
24
24
|
}
|
|
25
25
|
catch (error) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mcp-chilegob-dataset",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.2",
|
|
4
4
|
"description": "MCP server exposing Chile's open government dataset portal (datos.gob.cl / CKAN API v3)",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|
|
@@ -34,10 +34,12 @@
|
|
|
34
34
|
"build": "tsc",
|
|
35
35
|
"start": "node dist/index.js",
|
|
36
36
|
"typecheck": "tsc --noEmit",
|
|
37
|
-
"prepublishOnly": "
|
|
37
|
+
"prepublishOnly": "npm run build",
|
|
38
|
+
"postbuild": "node scripts/add-shebang.mjs",
|
|
39
|
+
"test": "vitest run"
|
|
38
40
|
},
|
|
39
41
|
"dependencies": {
|
|
40
|
-
"@hono/node-server": "^1.19.13",
|
|
42
|
+
"@hono/node-server": "^1.19.13",
|
|
41
43
|
"@modelcontextprotocol/hono": "^2.0.0-alpha.2",
|
|
42
44
|
"@modelcontextprotocol/server": "^2.0.0-alpha.2",
|
|
43
45
|
"hono": "^4.12.12",
|
|
@@ -46,6 +48,7 @@
|
|
|
46
48
|
"devDependencies": {
|
|
47
49
|
"@types/node": "^20.11.17",
|
|
48
50
|
"tsx": "^4.7.1",
|
|
49
|
-
"typescript": "^5.8.3"
|
|
51
|
+
"typescript": "^5.8.3",
|
|
52
|
+
"vitest": "^4.1.4"
|
|
50
53
|
}
|
|
51
54
|
}
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import { describe, it, expect, vi, afterEach } from 'vitest'
|
|
2
|
+
import {
|
|
3
|
+
NotParseableError,
|
|
4
|
+
CkanHttpError,
|
|
5
|
+
CkanApiError,
|
|
6
|
+
fetchAndParseFile,
|
|
7
|
+
} from '../ckan.ts'
|
|
8
|
+
|
|
9
|
+
afterEach(() => {
|
|
10
|
+
vi.restoreAllMocks()
|
|
11
|
+
})
|
|
12
|
+
|
|
13
|
+
describe('Error classes', () => {
|
|
14
|
+
it('NotParseableError sets format and url', () => {
|
|
15
|
+
const err = new NotParseableError('XLS', 'https://example.com/file.xls')
|
|
16
|
+
expect(err.format).toBe('XLS')
|
|
17
|
+
expect(err.url).toBe('https://example.com/file.xls')
|
|
18
|
+
expect(err.name).toBe('NotParseableError')
|
|
19
|
+
expect(err).toBeInstanceOf(Error)
|
|
20
|
+
})
|
|
21
|
+
|
|
22
|
+
it('CkanHttpError sets statusCode and statusText', () => {
|
|
23
|
+
const err = new CkanHttpError(404, 'Not Found')
|
|
24
|
+
expect(err.statusCode).toBe(404)
|
|
25
|
+
expect(err.statusText).toBe('Not Found')
|
|
26
|
+
expect(err.name).toBe('CkanHttpError')
|
|
27
|
+
})
|
|
28
|
+
|
|
29
|
+
it('CkanApiError sets errorType', () => {
|
|
30
|
+
const err = new CkanApiError('something failed', 'Validation Error')
|
|
31
|
+
expect(err.errorType).toBe('Validation Error')
|
|
32
|
+
expect(err.name).toBe('CkanApiError')
|
|
33
|
+
})
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
describe('fetchAndParseFile', () => {
|
|
37
|
+
it('throws NotParseableError for XLS format', async () => {
|
|
38
|
+
await expect(fetchAndParseFile('https://x.com/f.xls', 'XLS', 10, 0))
|
|
39
|
+
.rejects.toBeInstanceOf(NotParseableError)
|
|
40
|
+
})
|
|
41
|
+
|
|
42
|
+
it('throws NotParseableError for PDF format', async () => {
|
|
43
|
+
await expect(fetchAndParseFile('https://x.com/f.pdf', 'PDF', 10, 0))
|
|
44
|
+
.rejects.toBeInstanceOf(NotParseableError)
|
|
45
|
+
})
|
|
46
|
+
|
|
47
|
+
it('parses CSV and returns correct fields and records', async () => {
|
|
48
|
+
const csvContent = 'Region,Provincia,Comuna\nCOQUIMBO,ELQUI,LA SERENA\nATACAMA,COPIAPO,COPIAPO\n'
|
|
49
|
+
vi.spyOn(global, 'fetch').mockResolvedValueOnce(
|
|
50
|
+
new Response(csvContent, {
|
|
51
|
+
status: 200,
|
|
52
|
+
headers: { 'content-type': 'text/csv; charset=utf-8' },
|
|
53
|
+
})
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
const result = await fetchAndParseFile('https://x.com/data.csv', 'CSV', 10, 0)
|
|
57
|
+
expect(result.fields).toEqual([
|
|
58
|
+
{ id: 'Region', type: 'text' },
|
|
59
|
+
{ id: 'Provincia', type: 'text' },
|
|
60
|
+
{ id: 'Comuna', type: 'text' },
|
|
61
|
+
])
|
|
62
|
+
expect(result.records).toHaveLength(2)
|
|
63
|
+
expect(result.records[0]).toEqual({ Region: 'COQUIMBO', Provincia: 'ELQUI', Comuna: 'LA SERENA' })
|
|
64
|
+
expect(result.total).toBe(2)
|
|
65
|
+
expect(result.source).toBe('file')
|
|
66
|
+
})
|
|
67
|
+
|
|
68
|
+
it('respects limit and offset', async () => {
|
|
69
|
+
const lines = ['A,B', '1,2', '3,4', '5,6', '7,8'].join('\n')
|
|
70
|
+
vi.spyOn(global, 'fetch').mockResolvedValueOnce(
|
|
71
|
+
new Response(lines, { status: 200, headers: { 'content-type': 'text/csv' } })
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
const result = await fetchAndParseFile('https://x.com/data.csv', 'CSV', 2, 1)
|
|
75
|
+
expect(result.records).toHaveLength(2)
|
|
76
|
+
expect(result.records[0]).toEqual({ A: '3', B: '4' })
|
|
77
|
+
expect(result.total).toBe(4)
|
|
78
|
+
})
|
|
79
|
+
|
|
80
|
+
it('parses TSV and splits on tab', async () => {
|
|
81
|
+
const tsv = 'A\tB\n1\t2\n3\t4\n'
|
|
82
|
+
vi.spyOn(global, 'fetch').mockResolvedValueOnce(
|
|
83
|
+
new Response(tsv, { status: 200, headers: { 'content-type': 'text/tab-separated-values' } })
|
|
84
|
+
)
|
|
85
|
+
const result = await fetchAndParseFile('https://x.com/data.tsv', 'TSV', 10, 0)
|
|
86
|
+
expect(result.records[0]).toEqual({ A: '1', B: '2' })
|
|
87
|
+
expect(result.total).toBe(2)
|
|
88
|
+
expect(result.source).toBe('file')
|
|
89
|
+
})
|
|
90
|
+
|
|
91
|
+
it('parses JSON array and returns records', async () => {
|
|
92
|
+
const json = JSON.stringify([{ name: 'Chile', code: 'CL' }, { name: 'Peru', code: 'PE' }])
|
|
93
|
+
vi.spyOn(global, 'fetch').mockResolvedValueOnce(
|
|
94
|
+
new Response(json, { status: 200, headers: { 'content-type': 'application/json' } })
|
|
95
|
+
)
|
|
96
|
+
const result = await fetchAndParseFile('https://x.com/data.json', 'JSON', 10, 0)
|
|
97
|
+
expect(result.total).toBe(2)
|
|
98
|
+
expect(result.records[0]).toEqual({ name: 'Chile', code: 'CL' })
|
|
99
|
+
expect(result.fields).toEqual([{ id: 'name', type: 'text' }, { id: 'code', type: 'text' }])
|
|
100
|
+
expect(result.source).toBe('file')
|
|
101
|
+
})
|
|
102
|
+
|
|
103
|
+
it('returns empty result for empty CSV body', async () => {
|
|
104
|
+
vi.spyOn(global, 'fetch').mockResolvedValueOnce(
|
|
105
|
+
new Response('\n\n \n', { status: 200, headers: { 'content-type': 'text/csv' } })
|
|
106
|
+
)
|
|
107
|
+
const result = await fetchAndParseFile('https://x.com/empty.csv', 'CSV', 10, 0)
|
|
108
|
+
expect(result.fields).toEqual([])
|
|
109
|
+
expect(result.records).toEqual([])
|
|
110
|
+
expect(result.total).toBe(0)
|
|
111
|
+
})
|
|
112
|
+
|
|
113
|
+
it('throws CkanHttpError with correct status on non-ok response', async () => {
|
|
114
|
+
vi.spyOn(global, 'fetch').mockResolvedValueOnce(
|
|
115
|
+
new Response('Not Found', { status: 404, statusText: 'Not Found' })
|
|
116
|
+
)
|
|
117
|
+
const err = await fetchAndParseFile('https://x.com/data.csv', 'CSV', 10, 0).catch(e => e)
|
|
118
|
+
expect(err).toBeInstanceOf(CkanHttpError)
|
|
119
|
+
expect(err.statusCode).toBe(404)
|
|
120
|
+
expect(err.statusText).toBe('Not Found')
|
|
121
|
+
})
|
|
122
|
+
})
|
package/src/ckan.ts
CHANGED
|
@@ -119,16 +119,29 @@ async function ckanAction<T>(action: string, params: Record<string, unknown>): P
|
|
|
119
119
|
}
|
|
120
120
|
}
|
|
121
121
|
|
|
122
|
-
|
|
122
|
+
export interface CkanOrganization {
|
|
123
|
+
name: string
|
|
124
|
+
title: string
|
|
125
|
+
description: string | null
|
|
126
|
+
package_count: number
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
export interface CkanSearchResult {
|
|
130
|
+
total: number
|
|
131
|
+
results: CkanDataset[]
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const searchCache = new TTLCache<CkanSearchResult>()
|
|
123
135
|
const datasetCache = new TTLCache<CkanDataset>()
|
|
124
136
|
|
|
125
|
-
export async function searchDatasets(query: string, limit: number = 10): Promise<
|
|
137
|
+
export async function searchDatasets(query: string, limit: number = 10): Promise<CkanSearchResult> {
|
|
126
138
|
const key = `search:${query}:${limit}`
|
|
127
139
|
const cached = searchCache.get(key)
|
|
128
140
|
if (cached !== undefined) return cached
|
|
129
|
-
const result = await ckanAction<{ results: CkanDataset[] }>('package_search', { q: query, rows: limit })
|
|
130
|
-
|
|
131
|
-
|
|
141
|
+
const result = await ckanAction<{ count: number; results: CkanDataset[] }>('package_search', { q: query, rows: limit })
|
|
142
|
+
const value: CkanSearchResult = { total: result.count, results: result.results }
|
|
143
|
+
searchCache.set(key, value, CACHE_TTL_MS)
|
|
144
|
+
return value
|
|
132
145
|
}
|
|
133
146
|
|
|
134
147
|
export async function getDataset(id: string): Promise<CkanDataset> {
|
|
@@ -156,8 +169,47 @@ export async function getResource(resourceId: string): Promise<CkanResourceDetai
|
|
|
156
169
|
return ckanAction<CkanResourceDetail>('resource_show', { id: resourceId })
|
|
157
170
|
}
|
|
158
171
|
|
|
172
|
+
export async function listOrganizations(): Promise<CkanOrganization[]> {
|
|
173
|
+
return ckanAction<CkanOrganization[]>('organization_list', { all_fields: true })
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
export interface CkanFieldSchema {
|
|
177
|
+
id: string
|
|
178
|
+
type: string
|
|
179
|
+
label: string | null
|
|
180
|
+
description: string | null
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
interface RawField {
|
|
184
|
+
id: string
|
|
185
|
+
type: string
|
|
186
|
+
info?: { label?: string; notes?: string }
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
export async function getResourceSchema(resourceId: string): Promise<CkanFieldSchema[]> {
|
|
190
|
+
const result = await ckanAction<{ id: string; fields: RawField[] }>('datastore_info', { id: resourceId })
|
|
191
|
+
return result.fields
|
|
192
|
+
.filter(field => field.id !== '_id')
|
|
193
|
+
.map(field => ({
|
|
194
|
+
id: field.id,
|
|
195
|
+
type: field.type,
|
|
196
|
+
label: field.info?.label ?? null,
|
|
197
|
+
description: field.info?.notes ?? null,
|
|
198
|
+
}))
|
|
199
|
+
}
|
|
200
|
+
|
|
159
201
|
const PARSEABLE_FORMATS = new Set(['CSV', 'TSV', 'JSON'])
|
|
160
202
|
|
|
203
|
+
function decodeText(buffer: ArrayBuffer, contentType: string): string {
|
|
204
|
+
const charsetMatch = /charset=([^\s;]+)/i.exec(contentType)
|
|
205
|
+
const charset = (charsetMatch?.[1] ?? 'utf-8').replace(/^"|"$/g, '')
|
|
206
|
+
const text = new TextDecoder(charset).decode(buffer)
|
|
207
|
+
if (text.includes('\uFFFD') && charset.toLowerCase() === 'utf-8') {
|
|
208
|
+
return new TextDecoder('iso-8859-1').decode(buffer)
|
|
209
|
+
}
|
|
210
|
+
return text
|
|
211
|
+
}
|
|
212
|
+
|
|
161
213
|
export async function fetchAndParseFile(
|
|
162
214
|
url: string,
|
|
163
215
|
format: string,
|
|
@@ -184,7 +236,7 @@ export async function fetchAndParseFile(
|
|
|
184
236
|
clearTimeout(timer)
|
|
185
237
|
}
|
|
186
238
|
if (!response.ok) {
|
|
187
|
-
throw new
|
|
239
|
+
throw new CkanHttpError(response.status, response.statusText)
|
|
188
240
|
}
|
|
189
241
|
|
|
190
242
|
if (normalizedFormat === 'JSON') {
|
|
@@ -202,7 +254,9 @@ export async function fetchAndParseFile(
|
|
|
202
254
|
}
|
|
203
255
|
|
|
204
256
|
// CSV / TSV
|
|
205
|
-
const
|
|
257
|
+
const buffer = await response.arrayBuffer()
|
|
258
|
+
const contentType = response.headers.get('content-type') ?? ''
|
|
259
|
+
const text = decodeText(buffer, contentType)
|
|
206
260
|
const separator = normalizedFormat === 'TSV' ? '\t' : ','
|
|
207
261
|
const lines = text.split(/\r?\n/).filter(l => l.trim() !== '')
|
|
208
262
|
|
package/src/server.ts
CHANGED
|
@@ -3,6 +3,8 @@ import { McpServer } from '@modelcontextprotocol/server'
|
|
|
3
3
|
import { registerSearchTool } from './tools/search.js'
|
|
4
4
|
import { registerDatasetTool } from './tools/dataset.js'
|
|
5
5
|
import { registerResourceTool } from './tools/resource.js'
|
|
6
|
+
import { registerOrganizationsTool } from './tools/organizations.js'
|
|
7
|
+
import { registerSchemaTool } from './tools/schema.js'
|
|
6
8
|
|
|
7
9
|
const _require = createRequire(import.meta.url)
|
|
8
10
|
const { version } = _require('../package.json') as { version: string }
|
|
@@ -15,3 +17,5 @@ export const server = new McpServer({
|
|
|
15
17
|
registerSearchTool(server)
|
|
16
18
|
registerDatasetTool(server)
|
|
17
19
|
registerResourceTool(server)
|
|
20
|
+
registerOrganizationsTool(server)
|
|
21
|
+
registerSchemaTool(server)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import type { McpServer } from '@modelcontextprotocol/server'
|
|
2
|
+
import { z } from 'zod'
|
|
3
|
+
import { listOrganizations } from '../ckan.js'
|
|
4
|
+
|
|
5
|
+
export function registerOrganizationsTool(server: McpServer): void {
|
|
6
|
+
server.registerTool(
|
|
7
|
+
'list_organizations',
|
|
8
|
+
{
|
|
9
|
+
title: 'List Organizations',
|
|
10
|
+
description: 'List all government institutions (organizations) that publish datasets on datos.gob.cl. Use this to discover which institutions have data before searching.',
|
|
11
|
+
inputSchema: z.object({}),
|
|
12
|
+
},
|
|
13
|
+
async () => {
|
|
14
|
+
try {
|
|
15
|
+
const orgs = await listOrganizations()
|
|
16
|
+
const formatted = orgs
|
|
17
|
+
.filter(o => o.package_count > 0)
|
|
18
|
+
.sort((a, b) => b.package_count - a.package_count)
|
|
19
|
+
.map(o => ({
|
|
20
|
+
id: o.name,
|
|
21
|
+
title: o.title,
|
|
22
|
+
description: o.description?.slice(0, 150) ?? '',
|
|
23
|
+
dataset_count: o.package_count,
|
|
24
|
+
}))
|
|
25
|
+
return {
|
|
26
|
+
content: [{ type: 'text', text: JSON.stringify(formatted, null, 2) }],
|
|
27
|
+
}
|
|
28
|
+
} catch (error) {
|
|
29
|
+
return {
|
|
30
|
+
content: [{ type: 'text', text: `Error: ${error instanceof Error ? error.message : String(error)}` }],
|
|
31
|
+
isError: true,
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
)
|
|
36
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import type { McpServer } from '@modelcontextprotocol/server'
|
|
2
|
+
import { z } from 'zod'
|
|
3
|
+
import { getResourceSchema } from '../ckan.js'
|
|
4
|
+
|
|
5
|
+
export function registerSchemaTool(server: McpServer): void {
|
|
6
|
+
server.registerTool(
|
|
7
|
+
'get_resource_schema',
|
|
8
|
+
{
|
|
9
|
+
title: 'Get Resource Schema',
|
|
10
|
+
description:
|
|
11
|
+
'Get the column schema (field names, types, and descriptions) for a CKAN datastore resource. Only works for resources that have datastore enabled (datastore_available: true from get_dataset). Use this before reading data to understand the structure.',
|
|
12
|
+
inputSchema: z.object({
|
|
13
|
+
resource_id: z.string().describe('UUID of the resource (from get_dataset)'),
|
|
14
|
+
}),
|
|
15
|
+
},
|
|
16
|
+
async ({ resource_id }) => {
|
|
17
|
+
try {
|
|
18
|
+
const fields = await getResourceSchema(resource_id)
|
|
19
|
+
return {
|
|
20
|
+
content: [{ type: 'text', text: JSON.stringify(fields, null, 2) }],
|
|
21
|
+
}
|
|
22
|
+
} catch (error) {
|
|
23
|
+
return {
|
|
24
|
+
content: [{ type: 'text', text: `Error: ${error instanceof Error ? error.message : String(error)}` }],
|
|
25
|
+
isError: true,
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
)
|
|
30
|
+
}
|
package/src/tools/search.ts
CHANGED
|
@@ -16,7 +16,7 @@ export function registerSearchTool(server: McpServer): void {
|
|
|
16
16
|
async ({ query, limit }) => {
|
|
17
17
|
try {
|
|
18
18
|
const datasets = await searchDatasets(query, limit ?? 10)
|
|
19
|
-
const formatted = datasets.map(d => ({
|
|
19
|
+
const formatted = datasets.results.map(d => ({
|
|
20
20
|
id: d.name,
|
|
21
21
|
title: d.title,
|
|
22
22
|
description: d.notes?.slice(0, 200) ?? '',
|
|
@@ -24,7 +24,7 @@ export function registerSearchTool(server: McpServer): void {
|
|
|
24
24
|
resource_count: d.num_resources,
|
|
25
25
|
}))
|
|
26
26
|
return {
|
|
27
|
-
content: [{ type: 'text', text: JSON.stringify(formatted, null, 2) }],
|
|
27
|
+
content: [{ type: 'text', text: JSON.stringify({ total: datasets.total, returned: formatted.length, results: formatted }, null, 2) }],
|
|
28
28
|
}
|
|
29
29
|
} catch (error) {
|
|
30
30
|
return {
|