@ansvar/eu-regulations-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +242 -0
- package/data/seed/ai-act.json +1026 -0
- package/data/seed/applicability/dora.json +92 -0
- package/data/seed/applicability/gdpr.json +74 -0
- package/data/seed/applicability/nis2.json +83 -0
- package/data/seed/cra.json +690 -0
- package/data/seed/cybersecurity-act.json +534 -0
- package/data/seed/dora.json +719 -0
- package/data/seed/gdpr.json +732 -0
- package/data/seed/mappings/iso27001-dora.json +106 -0
- package/data/seed/mappings/iso27001-gdpr.json +114 -0
- package/data/seed/mappings/iso27001-nis2.json +98 -0
- package/data/seed/nis2.json +492 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +271 -0
- package/dist/index.js.map +1 -0
- package/dist/tools/applicability.d.ts +20 -0
- package/dist/tools/applicability.d.ts.map +1 -0
- package/dist/tools/applicability.js +42 -0
- package/dist/tools/applicability.js.map +1 -0
- package/dist/tools/article.d.ts +17 -0
- package/dist/tools/article.d.ts.map +1 -0
- package/dist/tools/article.js +29 -0
- package/dist/tools/article.js.map +1 -0
- package/dist/tools/compare.d.ts +18 -0
- package/dist/tools/compare.d.ts.map +1 -0
- package/dist/tools/compare.js +60 -0
- package/dist/tools/compare.js.map +1 -0
- package/dist/tools/definitions.d.ts +14 -0
- package/dist/tools/definitions.d.ts.map +1 -0
- package/dist/tools/definitions.js +26 -0
- package/dist/tools/definitions.js.map +1 -0
- package/dist/tools/list.d.ts +22 -0
- package/dist/tools/list.d.ts.map +1 -0
- package/dist/tools/list.js +67 -0
- package/dist/tools/list.js.map +1 -0
- package/dist/tools/map.d.ts +19 -0
- package/dist/tools/map.d.ts.map +1 -0
- package/dist/tools/map.js +44 -0
- package/dist/tools/map.js.map +1 -0
- package/dist/tools/search.d.ts +15 -0
- package/dist/tools/search.d.ts.map +1 -0
- package/dist/tools/search.js +62 -0
- package/dist/tools/search.js.map +1 -0
- package/package.json +70 -0
- package/scripts/build-db.ts +292 -0
- package/scripts/check-updates.ts +192 -0
- package/scripts/ingest-eurlex.ts +219 -0
- package/src/index.ts +294 -0
- package/src/tools/applicability.ts +84 -0
- package/src/tools/article.ts +61 -0
- package/src/tools/compare.ts +94 -0
- package/src/tools/definitions.ts +54 -0
- package/src/tools/list.ts +116 -0
- package/src/tools/map.ts +84 -0
- package/src/tools/search.ts +95 -0
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
#!/usr/bin/env npx tsx
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Check for updates to EU regulations from EUR-Lex.
|
|
5
|
+
* Compares current database versions against EUR-Lex metadata.
|
|
6
|
+
*
|
|
7
|
+
* Usage: npx tsx scripts/check-updates.ts
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import Database from 'better-sqlite3';
|
|
11
|
+
import { existsSync } from 'fs';
|
|
12
|
+
import { join, dirname } from 'path';
|
|
13
|
+
import { fileURLToPath } from 'url';
|
|
14
|
+
|
|
15
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
16
|
+
const __dirname = dirname(__filename);
|
|
17
|
+
|
|
18
|
+
const DB_PATH = join(__dirname, '..', 'data', 'regulations.db');
|
|
19
|
+
|
|
20
|
+
interface SourceRecord {
|
|
21
|
+
regulation: string;
|
|
22
|
+
celex_id: string;
|
|
23
|
+
eur_lex_version: string | null;
|
|
24
|
+
last_fetched: string | null;
|
|
25
|
+
articles_expected: number | null;
|
|
26
|
+
articles_parsed: number | null;
|
|
27
|
+
quality_status: string;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
interface EurLexMetadata {
|
|
31
|
+
celexId: string;
|
|
32
|
+
lastModified: string;
|
|
33
|
+
title: string;
|
|
34
|
+
dateDocument: string;
|
|
35
|
+
consolidatedVersions?: string[];
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Known regulations to monitor
|
|
39
|
+
const MONITORED_REGULATIONS = [
|
|
40
|
+
{ celex_id: '32016R0679', id: 'GDPR', name: 'General Data Protection Regulation' },
|
|
41
|
+
{ celex_id: '32022L2555', id: 'NIS2', name: 'NIS2 Directive' },
|
|
42
|
+
{ celex_id: '32022R2554', id: 'DORA', name: 'Digital Operational Resilience Act' },
|
|
43
|
+
{ celex_id: '32024R1689', id: 'AI_ACT', name: 'Artificial Intelligence Act' },
|
|
44
|
+
{ celex_id: '32024R2847', id: 'CRA', name: 'Cyber Resilience Act' },
|
|
45
|
+
{ celex_id: '32019R0881', id: 'CYBERSECURITY_ACT', name: 'EU Cybersecurity Act' },
|
|
46
|
+
];
|
|
47
|
+
|
|
48
|
+
async function fetchEurLexMetadata(celexId: string): Promise<EurLexMetadata | null> {
|
|
49
|
+
// Use EUR-Lex REST API to get document metadata
|
|
50
|
+
const metadataUrl = `https://eur-lex.europa.eu/search.html?SUBDOM_INIT=LEGISLATION&DB_TYPE_OF_ACT=regulation&DTS_SUBDOM=LEGISLATION&typeOfActStatus=REGULATION&qid=1&FM_CODED=REG&type=advanced&DTS_DOM=ALL&page=1&lang=en&CELEX=${celexId}`;
|
|
51
|
+
|
|
52
|
+
// Alternative: use the document info endpoint
|
|
53
|
+
const infoUrl = `https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX:${celexId}`;
|
|
54
|
+
|
|
55
|
+
try {
|
|
56
|
+
const response = await fetch(infoUrl, {
|
|
57
|
+
headers: {
|
|
58
|
+
'User-Agent': 'Mozilla/5.0 (compatible; EU-Compliance-MCP/1.0)',
|
|
59
|
+
'Accept': 'text/html',
|
|
60
|
+
},
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
if (!response.ok) {
|
|
64
|
+
console.error(`Failed to fetch metadata for ${celexId}: ${response.status}`);
|
|
65
|
+
return null;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const html = await response.text();
|
|
69
|
+
|
|
70
|
+
// Extract last modified from HTML meta tags or content
|
|
71
|
+
const dateMatch = html.match(/Date of document:\s*(\d{2}\/\d{2}\/\d{4})/i);
|
|
72
|
+
const lastModMatch = html.match(/ELI.*?(\d{4}-\d{2}-\d{2})/i);
|
|
73
|
+
const titleMatch = html.match(/<title>([^<]+)<\/title>/i);
|
|
74
|
+
|
|
75
|
+
// Check for consolidated versions indicator
|
|
76
|
+
const hasConsolidated = html.includes('Consolidated text') || html.includes('consolidated version');
|
|
77
|
+
|
|
78
|
+
return {
|
|
79
|
+
celexId,
|
|
80
|
+
lastModified: lastModMatch?.[1] || dateMatch?.[1] || 'unknown',
|
|
81
|
+
title: titleMatch?.[1]?.trim() || 'Unknown',
|
|
82
|
+
dateDocument: dateMatch?.[1] || 'unknown',
|
|
83
|
+
};
|
|
84
|
+
} catch (error) {
|
|
85
|
+
console.error(`Error fetching metadata for ${celexId}:`, error);
|
|
86
|
+
return null;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
async function checkForUpdates(): Promise<void> {
|
|
91
|
+
console.log('Checking EUR-Lex for regulation updates...\n');
|
|
92
|
+
|
|
93
|
+
// Check if database exists
|
|
94
|
+
if (!existsSync(DB_PATH)) {
|
|
95
|
+
console.log('Database not found. Run `npm run build:db` first.');
|
|
96
|
+
process.exit(1);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
const db = new Database(DB_PATH, { readonly: true });
|
|
100
|
+
|
|
101
|
+
// Get current source registry
|
|
102
|
+
const sources = db.prepare(`
|
|
103
|
+
SELECT regulation, celex_id, eur_lex_version, last_fetched, quality_status
|
|
104
|
+
FROM source_registry
|
|
105
|
+
`).all() as SourceRecord[];
|
|
106
|
+
|
|
107
|
+
const sourceMap = new Map(sources.map(s => [s.celex_id, s]));
|
|
108
|
+
|
|
109
|
+
console.log('Status Report');
|
|
110
|
+
console.log('='.repeat(80));
|
|
111
|
+
|
|
112
|
+
const updates: Array<{ id: string; celex_id: string; reason: string }> = [];
|
|
113
|
+
|
|
114
|
+
for (const reg of MONITORED_REGULATIONS) {
|
|
115
|
+
const source = sourceMap.get(reg.celex_id);
|
|
116
|
+
|
|
117
|
+
process.stdout.write(`\n${reg.id.padEnd(20)} (${reg.celex_id}): `);
|
|
118
|
+
|
|
119
|
+
if (!source) {
|
|
120
|
+
console.log('NOT INGESTED');
|
|
121
|
+
updates.push({ id: reg.id, celex_id: reg.celex_id, reason: 'Not yet ingested' });
|
|
122
|
+
continue;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Fetch current EUR-Lex metadata
|
|
126
|
+
const metadata = await fetchEurLexMetadata(reg.celex_id);
|
|
127
|
+
|
|
128
|
+
if (!metadata) {
|
|
129
|
+
console.log('FETCH FAILED');
|
|
130
|
+
continue;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const lastFetched = source.last_fetched || 'never';
|
|
134
|
+
const eurLexVersion = metadata.lastModified;
|
|
135
|
+
|
|
136
|
+
if (source.eur_lex_version !== eurLexVersion && source.eur_lex_version) {
|
|
137
|
+
console.log('UPDATE AVAILABLE');
|
|
138
|
+
console.log(` Local version: ${source.eur_lex_version}`);
|
|
139
|
+
console.log(` EUR-Lex version: ${eurLexVersion}`);
|
|
140
|
+
updates.push({ id: reg.id, celex_id: reg.celex_id, reason: `Version changed: ${source.eur_lex_version} -> ${eurLexVersion}` });
|
|
141
|
+
} else if (source.quality_status !== 'complete') {
|
|
142
|
+
console.log(`INCOMPLETE (${source.quality_status})`);
|
|
143
|
+
updates.push({ id: reg.id, celex_id: reg.celex_id, reason: `Quality status: ${source.quality_status}` });
|
|
144
|
+
} else {
|
|
145
|
+
console.log('UP TO DATE');
|
|
146
|
+
console.log(` Last fetched: ${lastFetched}`);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
db.close();
|
|
151
|
+
|
|
152
|
+
// Summary
|
|
153
|
+
console.log('\n' + '='.repeat(80));
|
|
154
|
+
console.log('Summary');
|
|
155
|
+
console.log('='.repeat(80));
|
|
156
|
+
|
|
157
|
+
if (updates.length === 0) {
|
|
158
|
+
console.log('\n✓ All monitored regulations are up to date.');
|
|
159
|
+
} else {
|
|
160
|
+
console.log(`\n⚠ ${updates.length} regulation(s) need attention:\n`);
|
|
161
|
+
for (const u of updates) {
|
|
162
|
+
console.log(` - ${u.id}: ${u.reason}`);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
console.log('\nTo update, run:');
|
|
166
|
+
for (const u of updates) {
|
|
167
|
+
console.log(` npx tsx scripts/ingest-eurlex.ts ${u.celex_id} data/seed/${u.id.toLowerCase()}.json`);
|
|
168
|
+
}
|
|
169
|
+
console.log('\nThen: npm run build:db');
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// Also provide a function to update the source registry after ingestion
|
|
174
|
+
export async function updateSourceRegistry(
|
|
175
|
+
db: Database.Database,
|
|
176
|
+
regulation: string,
|
|
177
|
+
celexId: string,
|
|
178
|
+
articleCount: number
|
|
179
|
+
): Promise<void> {
|
|
180
|
+
const now = new Date().toISOString();
|
|
181
|
+
|
|
182
|
+
db.prepare(`
|
|
183
|
+
INSERT OR REPLACE INTO source_registry
|
|
184
|
+
(regulation, celex_id, eur_lex_version, last_fetched, articles_expected, articles_parsed, quality_status)
|
|
185
|
+
VALUES (?, ?, ?, ?, ?, ?, 'complete')
|
|
186
|
+
`).run(regulation, celexId, now.split('T')[0], now, articleCount, articleCount);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
checkForUpdates().catch(err => {
|
|
190
|
+
console.error('Error:', err);
|
|
191
|
+
process.exit(1);
|
|
192
|
+
});
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
#!/usr/bin/env npx tsx
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Ingest EU regulations from EUR-Lex.
|
|
5
|
+
*
|
|
6
|
+
* Usage: npx tsx scripts/ingest-eurlex.ts <celex_id> <output_file>
|
|
7
|
+
* Example: npx tsx scripts/ingest-eurlex.ts 32016R0679 data/seed/gdpr.json
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { writeFileSync } from 'fs';
|
|
11
|
+
import { JSDOM } from 'jsdom';
|
|
12
|
+
|
|
13
|
+
interface Article {
|
|
14
|
+
number: string;
|
|
15
|
+
title?: string;
|
|
16
|
+
text: string;
|
|
17
|
+
chapter?: string;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
interface Definition {
|
|
21
|
+
term: string;
|
|
22
|
+
definition: string;
|
|
23
|
+
article: string;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
interface RegulationData {
|
|
27
|
+
id: string;
|
|
28
|
+
full_name: string;
|
|
29
|
+
celex_id: string;
|
|
30
|
+
effective_date?: string;
|
|
31
|
+
eur_lex_url: string;
|
|
32
|
+
articles: Article[];
|
|
33
|
+
definitions: Definition[];
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const REGULATION_METADATA: Record<string, { id: string; full_name: string; effective_date?: string }> = {
|
|
37
|
+
'32016R0679': { id: 'GDPR', full_name: 'General Data Protection Regulation', effective_date: '2018-05-25' },
|
|
38
|
+
'32022L2555': { id: 'NIS2', full_name: 'Directive on measures for a high common level of cybersecurity across the Union', effective_date: '2024-10-17' },
|
|
39
|
+
'32022R2554': { id: 'DORA', full_name: 'Digital Operational Resilience Act', effective_date: '2025-01-17' },
|
|
40
|
+
'32024R1689': { id: 'AI_ACT', full_name: 'Artificial Intelligence Act', effective_date: '2024-08-01' },
|
|
41
|
+
'32024R2847': { id: 'CRA', full_name: 'Cyber Resilience Act', effective_date: '2024-12-10' },
|
|
42
|
+
'32019R0881': { id: 'CYBERSECURITY_ACT', full_name: 'EU Cybersecurity Act', effective_date: '2019-06-27' },
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
async function fetchEurLexHtml(celexId: string): Promise<string> {
|
|
46
|
+
const url = `https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:${celexId}`;
|
|
47
|
+
console.log(`Fetching: ${url}`);
|
|
48
|
+
|
|
49
|
+
const response = await fetch(url, {
|
|
50
|
+
headers: {
|
|
51
|
+
'User-Agent': 'Mozilla/5.0 (compatible; EU-Compliance-MCP/1.0; +https://github.com/Ansvar-Systems/EU_compliance_MCP)',
|
|
52
|
+
'Accept': 'text/html',
|
|
53
|
+
},
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
if (!response.ok) {
|
|
57
|
+
throw new Error(`Failed to fetch: ${response.status} ${response.statusText}`);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
return response.text();
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function parseArticles(html: string, celexId: string): { articles: Article[]; definitions: Definition[] } {
|
|
64
|
+
const dom = new JSDOM(html);
|
|
65
|
+
const doc = dom.window.document;
|
|
66
|
+
|
|
67
|
+
const articles: Article[] = [];
|
|
68
|
+
const definitions: Definition[] = [];
|
|
69
|
+
let currentChapter = '';
|
|
70
|
+
|
|
71
|
+
// Get all text content and split by article markers
|
|
72
|
+
const allText = doc.body?.textContent || '';
|
|
73
|
+
const lines = allText.split('\n').map(l => l.trim()).filter(l => l);
|
|
74
|
+
|
|
75
|
+
let currentArticle: { number: string; title?: string; lines: string[] } | null = null;
|
|
76
|
+
|
|
77
|
+
for (const line of lines) {
|
|
78
|
+
const articleStart = line.match(/^Article\s+(\d+)$/i);
|
|
79
|
+
if (articleStart) {
|
|
80
|
+
if (currentArticle && currentArticle.lines.length > 0) {
|
|
81
|
+
articles.push({
|
|
82
|
+
number: currentArticle.number,
|
|
83
|
+
title: currentArticle.title,
|
|
84
|
+
text: currentArticle.lines.join('\n\n'),
|
|
85
|
+
chapter: currentChapter || undefined,
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
currentArticle = { number: articleStart[1], lines: [] };
|
|
89
|
+
continue;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const chapterStart = line.match(/^CHAPTER\s+([IVXLC]+)/i);
|
|
93
|
+
if (chapterStart) {
|
|
94
|
+
currentChapter = chapterStart[1];
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
if (currentArticle) {
|
|
99
|
+
// Check if this is a title line (short, no period at end)
|
|
100
|
+
if (!currentArticle.title && currentArticle.lines.length === 0 && line.length < 100 && !line.endsWith('.')) {
|
|
101
|
+
currentArticle.title = line;
|
|
102
|
+
} else if (line.length > 0) {
|
|
103
|
+
currentArticle.lines.push(line);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// Don't forget the last article
|
|
109
|
+
if (currentArticle && currentArticle.lines.length > 0) {
|
|
110
|
+
articles.push({
|
|
111
|
+
number: currentArticle.number,
|
|
112
|
+
title: currentArticle.title,
|
|
113
|
+
text: currentArticle.lines.join('\n\n'),
|
|
114
|
+
chapter: currentChapter || undefined,
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Deduplicate articles - keep the one with the most content for each number
|
|
119
|
+
const articleMap = new Map<string, Article>();
|
|
120
|
+
for (const article of articles) {
|
|
121
|
+
const existing = articleMap.get(article.number);
|
|
122
|
+
if (!existing || article.text.length > existing.text.length) {
|
|
123
|
+
articleMap.set(article.number, article);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
const deduplicatedArticles = Array.from(articleMap.values())
|
|
127
|
+
.sort((a, b) => parseInt(a.number) - parseInt(b.number));
|
|
128
|
+
|
|
129
|
+
// Extract definitions from Article 4 (or similar definitions article)
|
|
130
|
+
// Find definitions article from deduplicated list
|
|
131
|
+
const defsArticle = deduplicatedArticles.find(a =>
|
|
132
|
+
a.title?.toLowerCase().includes('definition')
|
|
133
|
+
);
|
|
134
|
+
|
|
135
|
+
if (defsArticle && defsArticle.text.includes('means')) {
|
|
136
|
+
// Normalize text: collapse whitespace and normalize quotes
|
|
137
|
+
const normalizedText = defsArticle.text
|
|
138
|
+
.replace(/\s+/g, ' ')
|
|
139
|
+
.replace(/[\u2018\u2019]/g, "'"); // Curly quotes to straight
|
|
140
|
+
|
|
141
|
+
// Parse definitions by extracting content between consecutive numbered entries
|
|
142
|
+
// This handles:
|
|
143
|
+
// - Complex definitions with internal periods/semicolons
|
|
144
|
+
// - 'term' or 'alternate' means... patterns (NIS2 Art 6)
|
|
145
|
+
// - 'term1', 'term2' and 'term3' mean... patterns (CRA Art 3)
|
|
146
|
+
// - 'term' of the something means... patterns (GDPR Art 4)
|
|
147
|
+
// - mean, respectively... patterns (CRA Art 3)
|
|
148
|
+
// - means: (a) ... patterns (complex definitions with sub-parts)
|
|
149
|
+
const defRegex = /\((\d+)\)\s*'([^']+)'(?:[^(]*?)means?[,:;]?\s+(.+?)(?=\(\d+\)\s*'|$)/g;
|
|
150
|
+
let defMatch;
|
|
151
|
+
while ((defMatch = defRegex.exec(normalizedText)) !== null) {
|
|
152
|
+
const term = defMatch[2].trim().toLowerCase();
|
|
153
|
+
const definition = defMatch[3].trim();
|
|
154
|
+
// Only add if we got meaningful content
|
|
155
|
+
if (term.length > 0 && definition.length > 10) {
|
|
156
|
+
definitions.push({
|
|
157
|
+
term,
|
|
158
|
+
definition,
|
|
159
|
+
article: defsArticle.number,
|
|
160
|
+
});
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
return { articles: deduplicatedArticles, definitions };
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
async function ingestRegulation(celexId: string, outputPath: string): Promise<void> {
|
|
169
|
+
const metadata = REGULATION_METADATA[celexId];
|
|
170
|
+
if (!metadata) {
|
|
171
|
+
console.warn(`Unknown CELEX ID: ${celexId}. Using generic metadata.`);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
const html = await fetchEurLexHtml(celexId);
|
|
175
|
+
console.log(`Fetched ${html.length} bytes`);
|
|
176
|
+
|
|
177
|
+
const { articles, definitions } = parseArticles(html, celexId);
|
|
178
|
+
console.log(`Parsed ${articles.length} articles, ${definitions.length} definitions`);
|
|
179
|
+
|
|
180
|
+
if (articles.length === 0) {
|
|
181
|
+
console.error('No articles found! The HTML structure may have changed.');
|
|
182
|
+
console.log('Saving raw HTML for debugging...');
|
|
183
|
+
writeFileSync(outputPath.replace('.json', '.html'), html);
|
|
184
|
+
return;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
const regulation: RegulationData = {
|
|
188
|
+
id: metadata?.id || celexId,
|
|
189
|
+
full_name: metadata?.full_name || `Regulation ${celexId}`,
|
|
190
|
+
celex_id: celexId,
|
|
191
|
+
effective_date: metadata?.effective_date,
|
|
192
|
+
eur_lex_url: `https://eur-lex.europa.eu/eli/reg/2016/679/oj`,
|
|
193
|
+
articles,
|
|
194
|
+
definitions,
|
|
195
|
+
};
|
|
196
|
+
|
|
197
|
+
writeFileSync(outputPath, JSON.stringify(regulation, null, 2));
|
|
198
|
+
console.log(`\nSaved to: ${outputPath}`);
|
|
199
|
+
console.log(`Articles: ${articles.length}`);
|
|
200
|
+
console.log(`Definitions: ${definitions.length}`);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// Main
|
|
204
|
+
const [,, celexId, outputPath] = process.argv;
|
|
205
|
+
|
|
206
|
+
if (!celexId || !outputPath) {
|
|
207
|
+
console.log('Usage: npx tsx scripts/ingest-eurlex.ts <celex_id> <output_file>');
|
|
208
|
+
console.log('Example: npx tsx scripts/ingest-eurlex.ts 32016R0679 data/seed/gdpr.json');
|
|
209
|
+
console.log('\nKnown CELEX IDs:');
|
|
210
|
+
Object.entries(REGULATION_METADATA).forEach(([id, meta]) => {
|
|
211
|
+
console.log(` ${id} - ${meta.id} (${meta.full_name})`);
|
|
212
|
+
});
|
|
213
|
+
process.exit(1);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
ingestRegulation(celexId, outputPath).catch(err => {
|
|
217
|
+
console.error('Error:', err);
|
|
218
|
+
process.exit(1);
|
|
219
|
+
});
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
|
4
|
+
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
5
|
+
import {
|
|
6
|
+
CallToolRequestSchema,
|
|
7
|
+
ListToolsRequestSchema,
|
|
8
|
+
} from '@modelcontextprotocol/sdk/types.js';
|
|
9
|
+
import Database from 'better-sqlite3';
|
|
10
|
+
import { fileURLToPath } from 'url';
|
|
11
|
+
import { dirname, join } from 'path';
|
|
12
|
+
|
|
13
|
+
import { searchRegulations, type SearchInput } from './tools/search.js';
|
|
14
|
+
import { getArticle, type GetArticleInput } from './tools/article.js';
|
|
15
|
+
import { listRegulations, type ListInput } from './tools/list.js';
|
|
16
|
+
import { compareRequirements, type CompareInput } from './tools/compare.js';
|
|
17
|
+
import { mapControls, type MapControlsInput } from './tools/map.js';
|
|
18
|
+
import { checkApplicability, type ApplicabilityInput } from './tools/applicability.js';
|
|
19
|
+
import { getDefinitions, type DefinitionsInput } from './tools/definitions.js';
|
|
20
|
+
|
|
21
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
22
|
+
const __dirname = dirname(__filename);
|
|
23
|
+
|
|
24
|
+
// Database path - look for regulations.db in data folder
|
|
25
|
+
const DB_PATH = process.env.EU_COMPLIANCE_DB_PATH || join(__dirname, '..', 'data', 'regulations.db');
|
|
26
|
+
|
|
27
|
+
let db: Database.Database;
|
|
28
|
+
|
|
29
|
+
function getDatabase(): Database.Database {
|
|
30
|
+
if (!db) {
|
|
31
|
+
try {
|
|
32
|
+
db = new Database(DB_PATH, { readonly: true });
|
|
33
|
+
} catch (error) {
|
|
34
|
+
throw new Error(`Failed to open database at ${DB_PATH}: ${error}`);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
return db;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const server = new Server(
|
|
41
|
+
{
|
|
42
|
+
name: 'eu-regulations-mcp',
|
|
43
|
+
version: '0.1.0',
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
capabilities: {
|
|
47
|
+
tools: {},
|
|
48
|
+
},
|
|
49
|
+
}
|
|
50
|
+
);
|
|
51
|
+
|
|
52
|
+
// Define available tools
|
|
53
|
+
server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
|
54
|
+
tools: [
|
|
55
|
+
{
|
|
56
|
+
name: 'search_regulations',
|
|
57
|
+
description: 'Search across all EU regulations for articles matching a query. Returns relevant articles with snippets highlighting matches.',
|
|
58
|
+
inputSchema: {
|
|
59
|
+
type: 'object',
|
|
60
|
+
properties: {
|
|
61
|
+
query: {
|
|
62
|
+
type: 'string',
|
|
63
|
+
description: 'Search query (e.g., "incident reporting", "personal data breach")',
|
|
64
|
+
},
|
|
65
|
+
regulations: {
|
|
66
|
+
type: 'array',
|
|
67
|
+
items: { type: 'string' },
|
|
68
|
+
description: 'Optional: filter to specific regulations (e.g., ["GDPR", "NIS2"])',
|
|
69
|
+
},
|
|
70
|
+
limit: {
|
|
71
|
+
type: 'number',
|
|
72
|
+
description: 'Maximum results to return (default: 10)',
|
|
73
|
+
},
|
|
74
|
+
},
|
|
75
|
+
required: ['query'],
|
|
76
|
+
},
|
|
77
|
+
},
|
|
78
|
+
{
|
|
79
|
+
name: 'get_article',
|
|
80
|
+
description: 'Retrieve the full text of a specific article from a regulation.',
|
|
81
|
+
inputSchema: {
|
|
82
|
+
type: 'object',
|
|
83
|
+
properties: {
|
|
84
|
+
regulation: {
|
|
85
|
+
type: 'string',
|
|
86
|
+
description: 'Regulation ID (e.g., "GDPR", "NIS2", "DORA")',
|
|
87
|
+
},
|
|
88
|
+
article: {
|
|
89
|
+
type: 'string',
|
|
90
|
+
description: 'Article number (e.g., "17", "23")',
|
|
91
|
+
},
|
|
92
|
+
},
|
|
93
|
+
required: ['regulation', 'article'],
|
|
94
|
+
},
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
name: 'list_regulations',
|
|
98
|
+
description: 'List available regulations and their structure. Without parameters, lists all regulations. With a regulation specified, shows chapters and articles.',
|
|
99
|
+
inputSchema: {
|
|
100
|
+
type: 'object',
|
|
101
|
+
properties: {
|
|
102
|
+
regulation: {
|
|
103
|
+
type: 'string',
|
|
104
|
+
description: 'Optional: specific regulation to get detailed structure for',
|
|
105
|
+
},
|
|
106
|
+
},
|
|
107
|
+
},
|
|
108
|
+
},
|
|
109
|
+
{
|
|
110
|
+
name: 'compare_requirements',
|
|
111
|
+
description: 'Compare requirements across multiple regulations on a specific topic. Useful for understanding differences in how regulations address similar concerns.',
|
|
112
|
+
inputSchema: {
|
|
113
|
+
type: 'object',
|
|
114
|
+
properties: {
|
|
115
|
+
topic: {
|
|
116
|
+
type: 'string',
|
|
117
|
+
description: 'Topic to compare (e.g., "incident reporting", "risk assessment")',
|
|
118
|
+
},
|
|
119
|
+
regulations: {
|
|
120
|
+
type: 'array',
|
|
121
|
+
items: { type: 'string' },
|
|
122
|
+
description: 'Regulations to compare (e.g., ["DORA", "NIS2"])',
|
|
123
|
+
},
|
|
124
|
+
},
|
|
125
|
+
required: ['topic', 'regulations'],
|
|
126
|
+
},
|
|
127
|
+
},
|
|
128
|
+
{
|
|
129
|
+
name: 'map_controls',
|
|
130
|
+
description: 'Map ISO 27001:2022 controls to EU regulation requirements. Shows which articles satisfy specific security controls.',
|
|
131
|
+
inputSchema: {
|
|
132
|
+
type: 'object',
|
|
133
|
+
properties: {
|
|
134
|
+
framework: {
|
|
135
|
+
type: 'string',
|
|
136
|
+
enum: ['ISO27001'],
|
|
137
|
+
description: 'Control framework (currently only ISO27001 supported)',
|
|
138
|
+
},
|
|
139
|
+
control: {
|
|
140
|
+
type: 'string',
|
|
141
|
+
description: 'Optional: specific control ID (e.g., "A.5.1", "A.6.8")',
|
|
142
|
+
},
|
|
143
|
+
regulation: {
|
|
144
|
+
type: 'string',
|
|
145
|
+
description: 'Optional: filter mappings to specific regulation',
|
|
146
|
+
},
|
|
147
|
+
},
|
|
148
|
+
required: ['framework'],
|
|
149
|
+
},
|
|
150
|
+
},
|
|
151
|
+
{
|
|
152
|
+
name: 'check_applicability',
|
|
153
|
+
description: 'Determine which EU regulations apply to an organization based on sector and characteristics.',
|
|
154
|
+
inputSchema: {
|
|
155
|
+
type: 'object',
|
|
156
|
+
properties: {
|
|
157
|
+
sector: {
|
|
158
|
+
type: 'string',
|
|
159
|
+
enum: ['financial', 'healthcare', 'energy', 'transport', 'digital_infrastructure', 'public_administration', 'manufacturing', 'other'],
|
|
160
|
+
description: 'Organization sector',
|
|
161
|
+
},
|
|
162
|
+
subsector: {
|
|
163
|
+
type: 'string',
|
|
164
|
+
description: 'Optional: more specific subsector (e.g., "bank", "insurance" for financial)',
|
|
165
|
+
},
|
|
166
|
+
member_state: {
|
|
167
|
+
type: 'string',
|
|
168
|
+
description: 'Optional: EU member state (ISO country code)',
|
|
169
|
+
},
|
|
170
|
+
size: {
|
|
171
|
+
type: 'string',
|
|
172
|
+
enum: ['sme', 'large'],
|
|
173
|
+
description: 'Optional: organization size',
|
|
174
|
+
},
|
|
175
|
+
},
|
|
176
|
+
required: ['sector'],
|
|
177
|
+
},
|
|
178
|
+
},
|
|
179
|
+
{
|
|
180
|
+
name: 'get_definitions',
|
|
181
|
+
description: 'Look up official definitions of terms from EU regulations. Terms are defined in each regulation\'s definitions article.',
|
|
182
|
+
inputSchema: {
|
|
183
|
+
type: 'object',
|
|
184
|
+
properties: {
|
|
185
|
+
term: {
|
|
186
|
+
type: 'string',
|
|
187
|
+
description: 'Term to look up (e.g., "personal data", "incident", "processing")',
|
|
188
|
+
},
|
|
189
|
+
regulation: {
|
|
190
|
+
type: 'string',
|
|
191
|
+
description: 'Optional: filter to specific regulation',
|
|
192
|
+
},
|
|
193
|
+
},
|
|
194
|
+
required: ['term'],
|
|
195
|
+
},
|
|
196
|
+
},
|
|
197
|
+
],
|
|
198
|
+
}));
|
|
199
|
+
|
|
200
|
+
// Handle tool calls
|
|
201
|
+
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
202
|
+
const { name, arguments: args } = request.params;
|
|
203
|
+
|
|
204
|
+
try {
|
|
205
|
+
const database = getDatabase();
|
|
206
|
+
|
|
207
|
+
switch (name) {
|
|
208
|
+
case 'search_regulations': {
|
|
209
|
+
const input = args as unknown as SearchInput;
|
|
210
|
+
const results = await searchRegulations(database, input);
|
|
211
|
+
return {
|
|
212
|
+
content: [{ type: 'text', text: JSON.stringify(results, null, 2) }],
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
case 'get_article': {
|
|
217
|
+
const input = args as unknown as GetArticleInput;
|
|
218
|
+
const article = await getArticle(database, input);
|
|
219
|
+
if (!article) {
|
|
220
|
+
return {
|
|
221
|
+
content: [{ type: 'text', text: `Article ${input.article} not found in ${input.regulation}` }],
|
|
222
|
+
isError: true,
|
|
223
|
+
};
|
|
224
|
+
}
|
|
225
|
+
return {
|
|
226
|
+
content: [{ type: 'text', text: JSON.stringify(article, null, 2) }],
|
|
227
|
+
};
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
case 'list_regulations': {
|
|
231
|
+
const input = (args ?? {}) as unknown as ListInput;
|
|
232
|
+
const result = await listRegulations(database, input);
|
|
233
|
+
return {
|
|
234
|
+
content: [{ type: 'text', text: JSON.stringify(result, null, 2) }],
|
|
235
|
+
};
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
case 'compare_requirements': {
|
|
239
|
+
const input = args as unknown as CompareInput;
|
|
240
|
+
const result = await compareRequirements(database, input);
|
|
241
|
+
return {
|
|
242
|
+
content: [{ type: 'text', text: JSON.stringify(result, null, 2) }],
|
|
243
|
+
};
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
case 'map_controls': {
|
|
247
|
+
const input = args as unknown as MapControlsInput;
|
|
248
|
+
const result = await mapControls(database, input);
|
|
249
|
+
return {
|
|
250
|
+
content: [{ type: 'text', text: JSON.stringify(result, null, 2) }],
|
|
251
|
+
};
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
case 'check_applicability': {
|
|
255
|
+
const input = args as unknown as ApplicabilityInput;
|
|
256
|
+
const result = await checkApplicability(database, input);
|
|
257
|
+
return {
|
|
258
|
+
content: [{ type: 'text', text: JSON.stringify(result, null, 2) }],
|
|
259
|
+
};
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
case 'get_definitions': {
|
|
263
|
+
const input = args as unknown as DefinitionsInput;
|
|
264
|
+
const result = await getDefinitions(database, input);
|
|
265
|
+
return {
|
|
266
|
+
content: [{ type: 'text', text: JSON.stringify(result, null, 2) }],
|
|
267
|
+
};
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
default:
|
|
271
|
+
return {
|
|
272
|
+
content: [{ type: 'text', text: `Unknown tool: ${name}` }],
|
|
273
|
+
isError: true,
|
|
274
|
+
};
|
|
275
|
+
}
|
|
276
|
+
} catch (error) {
|
|
277
|
+
return {
|
|
278
|
+
content: [{ type: 'text', text: `Error: ${error instanceof Error ? error.message : String(error)}` }],
|
|
279
|
+
isError: true,
|
|
280
|
+
};
|
|
281
|
+
}
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
// Start the server
|
|
285
|
+
async function main() {
|
|
286
|
+
const transport = new StdioServerTransport();
|
|
287
|
+
await server.connect(transport);
|
|
288
|
+
console.error('EU Regulations MCP server started');
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
main().catch((error) => {
|
|
292
|
+
console.error('Fatal error:', error);
|
|
293
|
+
process.exit(1);
|
|
294
|
+
});
|