@ansvar/eu-regulations-mcp 0.8.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/README.md +76 -29
  2. package/data/regulations.db +0 -0
  3. package/data/seed/applicability/chips-act.json +67 -0
  4. package/data/seed/applicability/crma.json +85 -0
  5. package/data/seed/chips-act.json +714 -0
  6. package/data/seed/crma.json +877 -0
  7. package/data/seed/mappings/iso27001-chips-act.json +50 -0
  8. package/data/seed/mappings/iso27001-crma.json +50 -0
  9. package/data/seed/mappings/nist-csf-chips-act.json +56 -0
  10. package/data/seed/mappings/nist-csf-crma.json +56 -0
  11. package/dist/database/sqlite-adapter.d.ts +2 -2
  12. package/dist/database/sqlite-adapter.d.ts.map +1 -1
  13. package/dist/database/sqlite-adapter.js.map +1 -1
  14. package/dist/http-server.js +27 -5
  15. package/dist/http-server.js.map +1 -1
  16. package/dist/index.js +27 -4
  17. package/dist/index.js.map +1 -1
  18. package/dist/tools/about.d.ts +40 -0
  19. package/dist/tools/about.d.ts.map +1 -0
  20. package/dist/tools/about.js +61 -0
  21. package/dist/tools/about.js.map +1 -0
  22. package/dist/tools/list.d.ts +7 -0
  23. package/dist/tools/list.d.ts.map +1 -1
  24. package/dist/tools/list.js +73 -8
  25. package/dist/tools/list.js.map +1 -1
  26. package/dist/tools/registry.d.ts +11 -1
  27. package/dist/tools/registry.d.ts.map +1 -1
  28. package/dist/tools/registry.js +56 -4
  29. package/dist/tools/registry.js.map +1 -1
  30. package/dist/worker.d.ts.map +1 -1
  31. package/dist/worker.js +17 -5
  32. package/dist/worker.js.map +1 -1
  33. package/package.json +8 -7
  34. package/scripts/add-cross-references.sql +0 -200
  35. package/scripts/analyze-survey-responses.ts +0 -285
  36. package/scripts/build-db.ts +0 -421
  37. package/scripts/bulk-reingest-all.ts +0 -331
  38. package/scripts/check-updates.ts +0 -294
  39. package/scripts/extract-eprivacy-recitals.ts +0 -98
  40. package/scripts/ingest-eurlex-browser.ts +0 -113
  41. package/scripts/ingest-eurlex.ts +0 -346
  42. package/scripts/ingest-unece.ts +0 -382
  43. package/scripts/migrate-postgres.ts +0 -445
  44. package/scripts/migrate-to-postgres.ts +0 -353
  45. package/scripts/reingest-all-with-recitals.sh +0 -81
  46. package/scripts/sync-versions.ts +0 -206
  47. package/scripts/test-cross-refs.js +0 -26
  48. package/scripts/test-postgres-adapter.ts +0 -146
  49. package/scripts/update-dora-rts-metadata.ts +0 -112
  50. package/src/database/postgres-adapter.ts +0 -84
  51. package/src/database/sqlite-adapter.ts +0 -44
  52. package/src/database/types.ts +0 -10
  53. package/src/http-server.ts +0 -149
  54. package/src/index.ts +0 -61
  55. package/src/middleware/rate-limit.ts +0 -104
  56. package/src/tools/applicability.ts +0 -167
  57. package/src/tools/article.ts +0 -81
  58. package/src/tools/compare.ts +0 -217
  59. package/src/tools/definitions.ts +0 -49
  60. package/src/tools/evidence.ts +0 -84
  61. package/src/tools/list.ts +0 -124
  62. package/src/tools/map.ts +0 -86
  63. package/src/tools/recital.ts +0 -60
  64. package/src/tools/registry.ts +0 -311
  65. package/src/tools/search.ts +0 -297
  66. package/src/worker.ts +0 -708
@@ -1,382 +0,0 @@
1
- #!/usr/bin/env npx tsx
2
-
3
- /**
4
- * Ingest UN/ECE regulations from EUR-Lex.
5
- * UN regulations use numbered sections (1., 2., etc.) instead of "Article X".
6
- *
7
- * Usage: npx tsx scripts/ingest-unece.ts <celex_id> <output_file>
8
- * Example: npx tsx scripts/ingest-unece.ts 42021X0387 data/seed/un-r155.json
9
- */
10
-
11
- import { writeFileSync } from 'fs';
12
- import { JSDOM } from 'jsdom';
13
-
14
- interface Article {
15
- number: string;
16
- title?: string;
17
- text: string;
18
- chapter?: string;
19
- }
20
-
21
- interface Definition {
22
- term: string;
23
- definition: string;
24
- article: string;
25
- }
26
-
27
- interface RegulationData {
28
- id: string;
29
- full_name: string;
30
- celex_id: string;
31
- effective_date?: string;
32
- eur_lex_url: string;
33
- articles: Article[];
34
- definitions: Definition[];
35
- }
36
-
37
- const UN_REGULATION_METADATA: Record<string, { id: string; full_name: string; effective_date?: string }> = {
38
- '42021X0387': {
39
- id: 'UN_R155',
40
- full_name: 'UN Regulation No. 155 - Cyber security and cyber security management system',
41
- effective_date: '2021-01-22',
42
- },
43
- '42025X0005': {
44
- id: 'UN_R155',
45
- full_name: 'UN Regulation No. 155 - Cyber security and cyber security management system (Supplement 3)',
46
- effective_date: '2025-01-10',
47
- },
48
- '42021X0388': {
49
- id: 'UN_R156',
50
- full_name: 'UN Regulation No. 156 - Software update and software update management system',
51
- effective_date: '2021-01-22',
52
- },
53
- };
54
-
55
- // Section titles for UN regulations (most are shared, some differ)
56
- const COMMON_SECTION_TITLES: Record<string, string> = {
57
- '1': 'Scope',
58
- '2': 'Definitions',
59
- '3': 'Application for approval',
60
- '4': 'Markings',
61
- '5': 'Approval',
62
- '7': 'Specifications',
63
- '8': 'Modification of vehicle type and extension of type approval',
64
- '9': 'Conformity of production',
65
- '10': 'Penalties for non-conformity of production',
66
- '11': 'Production definitively discontinued',
67
- '12': 'Names and addresses of Technical Services responsible for conducting approval tests, and of Type Approval Authorities',
68
- };
69
-
70
- // Regulation-specific section titles (for section 6 which differs)
71
- const REGULATION_SECTION_TITLES: Record<string, Record<string, string>> = {
72
- UN_R155: {
73
- '6': 'Certificate of Compliance for Cybersecurity Management System',
74
- },
75
- UN_R156: {
76
- '6': 'Certificate of Compliance for Software Update Management System',
77
- },
78
- };
79
-
80
- function getSectionTitle(sectionNum: string, regulationId: string): string {
81
- const regSpecific = REGULATION_SECTION_TITLES[regulationId]?.[sectionNum];
82
- if (regSpecific) return regSpecific;
83
- return COMMON_SECTION_TITLES[sectionNum] || `Section ${sectionNum}`;
84
- }
85
-
86
- async function fetchEurLexHtml(celexId: string): Promise<string> {
87
- const url = `https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:${celexId}`;
88
- console.log(`Fetching: ${url}`);
89
-
90
- const response = await fetch(url, {
91
- headers: {
92
- 'User-Agent': 'Mozilla/5.0 (compatible; EU-Compliance-MCP/1.0; +https://github.com/Ansvar-Systems/EU_compliance_MCP)',
93
- Accept: 'text/html',
94
- },
95
- });
96
-
97
- if (!response.ok) {
98
- throw new Error(`Failed to fetch: ${response.status} ${response.statusText}`);
99
- }
100
-
101
- return response.text();
102
- }
103
-
104
- function parseUnRegulation(html: string, celexId: string): { articles: Article[]; definitions: Definition[] } {
105
- const dom = new JSDOM(html);
106
- const doc = dom.window.document;
107
-
108
- const articles: Article[] = [];
109
- const definitions: Definition[] = [];
110
-
111
- // Strategy: Get all text elements (p, span, td) and process sequentially
112
- // UN regulations use numbered sections with content in tables/spans
113
- const allElements = Array.from(doc.querySelectorAll('p, span, td'));
114
-
115
- let currentSection: { number: string; title: string; lines: string[] } | null = null;
116
- let currentAnnex: { number: string; title: string; lines: string[] } | null = null;
117
- let inAnnex = false;
118
- let seenSections = new Set<string>();
119
- let inTableOfContents = true; // Skip TOC at start
120
-
121
- for (const el of allElements) {
122
- const text = el.textContent?.trim() || '';
123
- if (!text || text.length < 2) continue;
124
-
125
- // Detect end of table of contents - when we see the actual section header format
126
- const mainSectionHeader = text.match(/^(\d{1,2})\.\s+[A-Z][A-Z\s]+$/);
127
- if (mainSectionHeader && el.classList?.contains('oj-ti-grseq-1')) {
128
- inTableOfContents = false;
129
- }
130
-
131
- // Skip if still in table of contents
132
- if (inTableOfContents && !el.classList?.contains('oj-ti-grseq-1')) {
133
- continue;
134
- }
135
-
136
- // Check for main section headers (format: "8. MODIFICATION OF VEHICLE TYPE...")
137
- // Note: some titles have hyphens (e.g., "NON-CONFORMITY")
138
- const sectionHeaderMatch = text.match(/^(\d{1,2})\.\s+([A-Z][A-Z\s,\-]+)$/);
139
- if (sectionHeaderMatch && el.classList?.contains('oj-ti-grseq-1')) {
140
- const sectionNum = sectionHeaderMatch[1];
141
-
142
- // Save current section if exists
143
- if (currentSection && currentSection.lines.length > 0 && !seenSections.has(currentSection.number)) {
144
- articles.push({
145
- number: currentSection.number,
146
- title: currentSection.title,
147
- text: currentSection.lines.join('\n\n'),
148
- });
149
- seenSections.add(currentSection.number);
150
- }
151
-
152
- currentSection = {
153
- number: sectionNum,
154
- title: getSectionTitle(sectionNum, metadata?.id || 'UN_R155') || sectionHeaderMatch[2].trim(),
155
- lines: [],
156
- };
157
- currentAnnex = null;
158
- inAnnex = false;
159
- continue;
160
- }
161
-
162
- // Check for Annex headers
163
- const annexMatch = text.match(/^Annex\s+(\d+)/i) || text.match(/^ANNEX\s+(\d+)/i);
164
- if (annexMatch || (el.classList?.contains('oj-doc-ti') && text.includes('Annex'))) {
165
- // Save current section/annex if exists
166
- if (currentSection && currentSection.lines.length > 0 && !seenSections.has(currentSection.number)) {
167
- articles.push({
168
- number: currentSection.number,
169
- title: currentSection.title,
170
- text: currentSection.lines.join('\n\n'),
171
- });
172
- seenSections.add(currentSection.number);
173
- }
174
- if (currentAnnex && currentAnnex.lines.length > 0 && !seenSections.has(`Annex ${currentAnnex.number}`)) {
175
- articles.push({
176
- number: `Annex ${currentAnnex.number}`,
177
- title: currentAnnex.title,
178
- text: currentAnnex.lines.join('\n\n'),
179
- chapter: 'Annexes',
180
- });
181
- seenSections.add(`Annex ${currentAnnex.number}`);
182
- }
183
-
184
- const annexNum = annexMatch?.[1] || text.match(/Annex\s+(\d+)/i)?.[1];
185
- if (annexNum) {
186
- inAnnex = true;
187
- currentAnnex = { number: annexNum, title: extractAnnexTitle(text), lines: [] };
188
- currentSection = null;
189
- }
190
- continue;
191
- }
192
-
193
- // Skip metadata and navigation elements
194
- if (
195
- text.includes('Official Journal') ||
196
- text.includes('EUR-Lex') ||
197
- text.includes('CONTENTS') ||
198
- text.match(/^[A-Z]+$/) ||
199
- text.match(/^L\s+\d+\/\d+$/) ||
200
- text.match(/^\d+\.\d+\.\d+\s+EN$/)
201
- ) {
202
- continue;
203
- }
204
-
205
- // Add content to current section or annex
206
- if (inAnnex && currentAnnex) {
207
- currentAnnex.lines.push(text);
208
- } else if (currentSection) {
209
- currentSection.lines.push(text);
210
- }
211
- }
212
-
213
- // Don't forget last section/annex
214
- if (currentSection && currentSection.lines.length > 0 && !seenSections.has(currentSection.number)) {
215
- articles.push({
216
- number: currentSection.number,
217
- title: currentSection.title,
218
- text: currentSection.lines.join('\n\n'),
219
- });
220
- }
221
- if (currentAnnex && currentAnnex.lines.length > 0 && !seenSections.has(`Annex ${currentAnnex.number}`)) {
222
- articles.push({
223
- number: `Annex ${currentAnnex.number}`,
224
- title: currentAnnex.title,
225
- text: currentAnnex.lines.join('\n\n'),
226
- chapter: 'Annexes',
227
- });
228
- }
229
-
230
- // Extract definitions from Section 2
231
- // UN regulations use format: 2.1. 'term' means/refers to definition
232
- // Note: Uses curly quotes (Unicode 8216/8217) not straight quotes
233
- const defsSection = articles.find((a) => a.number === '2');
234
- if (defsSection) {
235
- // Normalize text: collapse newlines, handle both straight and curly quotes
236
- const normalizedText = defsSection.text
237
- .replace(/\n+/g, ' ')
238
- .replace(/\s+/g, ' ')
239
- .replace(/[\u2018\u2019]/g, "'"); // Convert curly quotes to straight
240
-
241
- // Match patterns like: 2.1. 'Vehicle type' means/refers to ...
242
- // Some definitions use "means", others use "refers to"
243
- const defRegex = /(\d+\.\d+\.)\s*'([^']+)'\s+(?:means|refers to)\s+(.+?)(?=\d+\.\d+\.\s*'|$)/g;
244
- const defMatches = normalizedText.matchAll(defRegex);
245
- for (const match of defMatches) {
246
- const term = match[2].trim().toLowerCase();
247
- let definition = match[3].trim();
248
- // Clean up the definition - remove trailing section numbers and punctuation
249
- definition = definition.replace(/\s*\d+\.\d+\.\s*$/, '').replace(/[;.]$/, '').trim();
250
- if (term && definition.length > 10) {
251
- definitions.push({
252
- term,
253
- definition,
254
- article: '2',
255
- });
256
- }
257
- }
258
- }
259
-
260
- // Deduplicate and sort articles
261
- const articleMap = new Map<string, Article>();
262
- for (const article of articles) {
263
- const existing = articleMap.get(article.number);
264
- if (!existing || article.text.length > existing.text.length) {
265
- articleMap.set(article.number, article);
266
- }
267
- }
268
-
269
- const sortedArticles = Array.from(articleMap.values()).sort((a, b) => {
270
- // Sort numbered sections first, then annexes
271
- const aIsAnnex = a.number.startsWith('Annex');
272
- const bIsAnnex = b.number.startsWith('Annex');
273
- if (aIsAnnex && !bIsAnnex) return 1;
274
- if (!aIsAnnex && bIsAnnex) return -1;
275
- if (aIsAnnex && bIsAnnex) {
276
- // Extract numeric part from "Annex 5" or "Annex 5a"
277
- const matchA = a.number.match(/Annex (\d+)([a-z]?)/);
278
- const matchB = b.number.match(/Annex (\d+)([a-z]?)/);
279
- if (!matchA || !matchB) return 0;
280
- const numA = parseInt(matchA[1]);
281
- const numB = parseInt(matchB[1]);
282
- if (numA !== numB) return numA - numB;
283
- return (matchA[2] || '').localeCompare(matchB[2] || '');
284
- }
285
- // Regular numbered sections - handle sub-sections like "5a"
286
- const matchA = a.number.match(/^(\d+)([a-z]?)$/);
287
- const matchB = b.number.match(/^(\d+)([a-z]?)$/);
288
- if (!matchA || !matchB) return 0;
289
- const numA = parseInt(matchA[1]);
290
- const numB = parseInt(matchB[1]);
291
- if (numA !== numB) return numA - numB;
292
- return (matchA[2] || '').localeCompare(matchB[2] || '');
293
- });
294
-
295
- return { articles: sortedArticles, definitions };
296
- }
297
-
298
- function extractAnnexTitle(text: string): string {
299
- // Extract title after "Annex X"
300
- const match = text.match(/Annex\s+\d+\s*[–—-]?\s*(.*)/i);
301
- if (match && match[1]) {
302
- return match[1].trim();
303
- }
304
-
305
- // Common annex titles for R155
306
- const annexTitles: Record<string, string> = {
307
- '1': 'Information document',
308
- '2': 'Communication',
309
- '3': 'Arrangements of the approval mark',
310
- '4': 'Certificate of Compliance for CSMS',
311
- '5': 'List of threats and corresponding mitigations',
312
- };
313
-
314
- const annexNum = text.match(/Annex\s+(\d+)/i)?.[1];
315
- if (annexNum && annexTitles[annexNum]) {
316
- return annexTitles[annexNum];
317
- }
318
-
319
- return '';
320
- }
321
-
322
- async function ingestUnRegulation(celexId: string, outputPath: string): Promise<void> {
323
- const metadata = UN_REGULATION_METADATA[celexId];
324
- if (!metadata) {
325
- console.warn(`Unknown CELEX ID: ${celexId}. Using generic metadata.`);
326
- }
327
-
328
- const html = await fetchEurLexHtml(celexId);
329
- console.log(`Fetched ${html.length} bytes`);
330
-
331
- // Save HTML for debugging
332
- writeFileSync(outputPath.replace('.json', '.html'), html);
333
-
334
- const { articles, definitions } = parseUnRegulation(html, celexId);
335
- console.log(`Parsed ${articles.length} articles/sections, ${definitions.length} definitions`);
336
-
337
- if (articles.length === 0) {
338
- console.error('No sections found! The HTML structure may have changed.');
339
- return;
340
- }
341
-
342
- const regulation: RegulationData = {
343
- id: metadata?.id || celexId,
344
- full_name: metadata?.full_name || `UN Regulation ${celexId}`,
345
- celex_id: celexId,
346
- effective_date: metadata?.effective_date,
347
- eur_lex_url: `https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX:${celexId}`,
348
- articles,
349
- definitions,
350
- };
351
-
352
- writeFileSync(outputPath, JSON.stringify(regulation, null, 2));
353
- console.log(`\nSaved to: ${outputPath}`);
354
- console.log(`Sections: ${articles.filter((a) => !a.number.startsWith('Annex')).length}`);
355
- console.log(`Annexes: ${articles.filter((a) => a.number.startsWith('Annex')).length}`);
356
- console.log(`Definitions: ${definitions.length}`);
357
-
358
- // Print summary
359
- console.log('\nSections found:');
360
- for (const article of articles) {
361
- const preview = article.text.substring(0, 60).replace(/\n/g, ' ');
362
- console.log(` ${article.number}: ${article.title || '(no title)'} - ${preview}...`);
363
- }
364
- }
365
-
366
- // Main
367
- const [, , celexId, outputPath] = process.argv;
368
-
369
- if (!celexId || !outputPath) {
370
- console.log('Usage: npx tsx scripts/ingest-unece.ts <celex_id> <output_file>');
371
- console.log('Example: npx tsx scripts/ingest-unece.ts 42021X0387 data/seed/un-r155.json');
372
- console.log('\nKnown UN/ECE CELEX IDs:');
373
- Object.entries(UN_REGULATION_METADATA).forEach(([id, meta]) => {
374
- console.log(` ${id} - ${meta.id} (${meta.full_name})`);
375
- });
376
- process.exit(1);
377
- }
378
-
379
- ingestUnRegulation(celexId, outputPath).catch((err) => {
380
- console.error('Error:', err);
381
- process.exit(1);
382
- });