@ansvar/eu-regulations-mcp 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/LICENSE +190 -21
  2. package/README.md +159 -26
  3. package/data/seed/aifmd.json +432 -0
  4. package/data/seed/applicability/ai-act.json +87 -0
  5. package/data/seed/applicability/aifmd.json +74 -0
  6. package/data/seed/applicability/cbam.json +74 -0
  7. package/data/seed/applicability/cer.json +74 -0
  8. package/data/seed/applicability/cra.json +77 -0
  9. package/data/seed/applicability/csddd.json +74 -0
  10. package/data/seed/applicability/csrd.json +74 -0
  11. package/data/seed/applicability/cyber_solidarity.json +74 -0
  12. package/data/seed/applicability/cybersecurity-act.json +69 -0
  13. package/data/seed/applicability/data-act.json +71 -0
  14. package/data/seed/applicability/dga.json +74 -0
  15. package/data/seed/applicability/dma.json +77 -0
  16. package/data/seed/applicability/dsa.json +71 -0
  17. package/data/seed/applicability/eecc.json +74 -0
  18. package/data/seed/applicability/ehds.json +74 -0
  19. package/data/seed/applicability/eidas2.json +86 -0
  20. package/data/seed/applicability/eprivacy.json +74 -0
  21. package/data/seed/applicability/eu_taxonomy.json +74 -0
  22. package/data/seed/applicability/eucc.json +74 -0
  23. package/data/seed/applicability/eudr.json +74 -0
  24. package/data/seed/applicability/gpsr.json +74 -0
  25. package/data/seed/applicability/ivdr.json +74 -0
  26. package/data/seed/applicability/led.json +74 -0
  27. package/data/seed/applicability/machinery.json +74 -0
  28. package/data/seed/applicability/mdr.json +74 -0
  29. package/data/seed/applicability/mica.json +74 -0
  30. package/data/seed/applicability/mifid2.json +74 -0
  31. package/data/seed/applicability/mifir.json +74 -0
  32. package/data/seed/applicability/pld.json +74 -0
  33. package/data/seed/applicability/psd2.json +74 -0
  34. package/data/seed/applicability/red.json +74 -0
  35. package/data/seed/applicability/sfdr.json +74 -0
  36. package/data/seed/applicability/un-r155.json +68 -0
  37. package/data/seed/applicability/un-r156.json +68 -0
  38. package/data/seed/cbam.json +397 -0
  39. package/data/seed/cer.json +233 -0
  40. package/data/seed/csddd.json +205 -0
  41. package/data/seed/csrd.json +50 -0
  42. package/data/seed/cyber_solidarity.json +252 -0
  43. package/data/seed/data-act.json +517 -0
  44. package/data/seed/dga.json +342 -0
  45. package/data/seed/dma.json +499 -0
  46. package/data/seed/dsa.json +686 -0
  47. package/data/seed/eecc.json +981 -0
  48. package/data/seed/ehds.json +638 -0
  49. package/data/seed/eidas2.json +590 -0
  50. package/data/seed/eprivacy.json +115 -0
  51. package/data/seed/eu_taxonomy.json +285 -0
  52. package/data/seed/eucc.json +386 -0
  53. package/data/seed/eudr.json +401 -0
  54. package/data/seed/gpsr.json +462 -0
  55. package/data/seed/ivdr.json +1036 -0
  56. package/data/seed/led.json +480 -0
  57. package/data/seed/machinery.json +513 -0
  58. package/data/seed/mappings/iso27001-ai-act.json +114 -0
  59. package/data/seed/mappings/iso27001-aifmd.json +50 -0
  60. package/data/seed/mappings/iso27001-cbam.json +26 -0
  61. package/data/seed/mappings/iso27001-cer.json +74 -0
  62. package/data/seed/mappings/iso27001-cra.json +130 -0
  63. package/data/seed/mappings/iso27001-csddd.json +50 -0
  64. package/data/seed/mappings/iso27001-csrd.json +26 -0
  65. package/data/seed/mappings/iso27001-cyber_solidarity.json +82 -0
  66. package/data/seed/mappings/iso27001-cybersecurity-act.json +90 -0
  67. package/data/seed/mappings/iso27001-data-act.json +66 -0
  68. package/data/seed/mappings/iso27001-dga.json +50 -0
  69. package/data/seed/mappings/iso27001-dma.json +50 -0
  70. package/data/seed/mappings/iso27001-dsa.json +58 -0
  71. package/data/seed/mappings/iso27001-eecc.json +74 -0
  72. package/data/seed/mappings/iso27001-ehds.json +90 -0
  73. package/data/seed/mappings/iso27001-eidas2.json +106 -0
  74. package/data/seed/mappings/iso27001-eprivacy.json +66 -0
  75. package/data/seed/mappings/iso27001-eu_taxonomy.json +34 -0
  76. package/data/seed/mappings/iso27001-eucc.json +66 -0
  77. package/data/seed/mappings/iso27001-eudr.json +34 -0
  78. package/data/seed/mappings/iso27001-gpsr.json +42 -0
  79. package/data/seed/mappings/iso27001-ivdr.json +66 -0
  80. package/data/seed/mappings/iso27001-led.json +74 -0
  81. package/data/seed/mappings/iso27001-machinery.json +50 -0
  82. package/data/seed/mappings/iso27001-mdr.json +82 -0
  83. package/data/seed/mappings/iso27001-mica.json +66 -0
  84. package/data/seed/mappings/iso27001-mifid2.json +66 -0
  85. package/data/seed/mappings/iso27001-mifir.json +42 -0
  86. package/data/seed/mappings/iso27001-pld.json +26 -0
  87. package/data/seed/mappings/iso27001-psd2.json +82 -0
  88. package/data/seed/mappings/iso27001-red.json +42 -0
  89. package/data/seed/mappings/iso27001-sfdr.json +50 -0
  90. package/data/seed/mappings/iso27001-un-r155.json +130 -0
  91. package/data/seed/mappings/iso27001-un-r156.json +106 -0
  92. package/data/seed/mappings/nist-csf-ai-act.json +138 -0
  93. package/data/seed/mappings/nist-csf-aifmd.json +58 -0
  94. package/data/seed/mappings/nist-csf-cbam.json +42 -0
  95. package/data/seed/mappings/nist-csf-cer.json +90 -0
  96. package/data/seed/mappings/nist-csf-cra.json +130 -0
  97. package/data/seed/mappings/nist-csf-csddd.json +50 -0
  98. package/data/seed/mappings/nist-csf-csrd.json +34 -0
  99. package/data/seed/mappings/nist-csf-cyber_solidarity.json +90 -0
  100. package/data/seed/mappings/nist-csf-cybersecurity-act.json +90 -0
  101. package/data/seed/mappings/nist-csf-data-act.json +50 -0
  102. package/data/seed/mappings/nist-csf-dga.json +58 -0
  103. package/data/seed/mappings/nist-csf-dma.json +42 -0
  104. package/data/seed/mappings/nist-csf-dora.json +210 -0
  105. package/data/seed/mappings/nist-csf-dsa.json +82 -0
  106. package/data/seed/mappings/nist-csf-eecc.json +90 -0
  107. package/data/seed/mappings/nist-csf-ehds.json +98 -0
  108. package/data/seed/mappings/nist-csf-eidas2.json +114 -0
  109. package/data/seed/mappings/nist-csf-eprivacy.json +58 -0
  110. package/data/seed/mappings/nist-csf-eu_taxonomy.json +34 -0
  111. package/data/seed/mappings/nist-csf-eucc.json +66 -0
  112. package/data/seed/mappings/nist-csf-eudr.json +58 -0
  113. package/data/seed/mappings/nist-csf-gdpr.json +178 -0
  114. package/data/seed/mappings/nist-csf-gpsr.json +58 -0
  115. package/data/seed/mappings/nist-csf-ivdr.json +66 -0
  116. package/data/seed/mappings/nist-csf-led.json +74 -0
  117. package/data/seed/mappings/nist-csf-machinery.json +58 -0
  118. package/data/seed/mappings/nist-csf-mdr.json +66 -0
  119. package/data/seed/mappings/nist-csf-mica.json +98 -0
  120. package/data/seed/mappings/nist-csf-mifid2.json +74 -0
  121. package/data/seed/mappings/nist-csf-mifir.json +50 -0
  122. package/data/seed/mappings/nist-csf-nis2.json +194 -0
  123. package/data/seed/mappings/nist-csf-pld.json +34 -0
  124. package/data/seed/mappings/nist-csf-psd2.json +98 -0
  125. package/data/seed/mappings/nist-csf-red.json +58 -0
  126. package/data/seed/mappings/nist-csf-sfdr.json +42 -0
  127. package/data/seed/mappings/nist-csf-un-r155.json +130 -0
  128. package/data/seed/mappings/nist-csf-un-r156.json +98 -0
  129. package/data/seed/mdr.json +1066 -0
  130. package/data/seed/mica.json +1003 -0
  131. package/data/seed/mifid2.json +906 -0
  132. package/data/seed/mifir.json +512 -0
  133. package/data/seed/pld.json +244 -0
  134. package/data/seed/psd2.json +827 -0
  135. package/data/seed/red.json +452 -0
  136. package/data/seed/sfdr.json +228 -0
  137. package/data/seed/un-r155.json +166 -0
  138. package/data/seed/un-r156.json +150 -0
  139. package/dist/http-server.d.ts +9 -0
  140. package/dist/http-server.d.ts.map +1 -0
  141. package/dist/http-server.js +342 -0
  142. package/dist/http-server.js.map +1 -0
  143. package/dist/index.js +4 -4
  144. package/dist/index.js.map +1 -1
  145. package/dist/tools/map.d.ts +1 -1
  146. package/dist/tools/map.d.ts.map +1 -1
  147. package/dist/tools/map.js +3 -3
  148. package/dist/tools/map.js.map +1 -1
  149. package/package.json +8 -3
  150. package/scripts/build-db.ts +20 -8
  151. package/scripts/check-updates.ts +141 -39
  152. package/scripts/ingest-eurlex.ts +9 -1
  153. package/scripts/ingest-unece.ts +368 -0
  154. package/src/http-server.ts +380 -0
  155. package/src/index.ts +4 -4
  156. package/src/tools/map.ts +4 -4
@@ -0,0 +1,368 @@
1
+ #!/usr/bin/env npx tsx
2
+
3
+ /**
4
+ * Ingest UN/ECE regulations from EUR-Lex.
5
+ * UN regulations use numbered sections (1., 2., etc.) instead of "Article X".
6
+ *
7
+ * Usage: npx tsx scripts/ingest-unece.ts <celex_id> <output_file>
8
+ * Example: npx tsx scripts/ingest-unece.ts 42021X0387 data/seed/un-r155.json
9
+ */
10
+
11
+ import { writeFileSync } from 'fs';
12
+ import { JSDOM } from 'jsdom';
13
+
14
+ interface Article {
15
+ number: string;
16
+ title?: string;
17
+ text: string;
18
+ chapter?: string;
19
+ }
20
+
21
+ interface Definition {
22
+ term: string;
23
+ definition: string;
24
+ article: string;
25
+ }
26
+
27
+ interface RegulationData {
28
+ id: string;
29
+ full_name: string;
30
+ celex_id: string;
31
+ effective_date?: string;
32
+ eur_lex_url: string;
33
+ articles: Article[];
34
+ definitions: Definition[];
35
+ }
36
+
37
+ const UN_REGULATION_METADATA: Record<string, { id: string; full_name: string; effective_date?: string }> = {
38
+ '42021X0387': {
39
+ id: 'UN_R155',
40
+ full_name: 'UN Regulation No. 155 - Cyber security and cyber security management system',
41
+ effective_date: '2021-01-22',
42
+ },
43
+ '42025X0005': {
44
+ id: 'UN_R155',
45
+ full_name: 'UN Regulation No. 155 - Cyber security and cyber security management system (Supplement 3)',
46
+ effective_date: '2025-01-10',
47
+ },
48
+ '42021X0388': {
49
+ id: 'UN_R156',
50
+ full_name: 'UN Regulation No. 156 - Software update and software update management system',
51
+ effective_date: '2021-01-22',
52
+ },
53
+ };
54
+
55
+ // Section titles for UN regulations (most are shared, some differ)
56
+ const COMMON_SECTION_TITLES: Record<string, string> = {
57
+ '1': 'Scope',
58
+ '2': 'Definitions',
59
+ '3': 'Application for approval',
60
+ '4': 'Markings',
61
+ '5': 'Approval',
62
+ '7': 'Specifications',
63
+ '8': 'Modification of vehicle type and extension of type approval',
64
+ '9': 'Conformity of production',
65
+ '10': 'Penalties for non-conformity of production',
66
+ '11': 'Production definitively discontinued',
67
+ '12': 'Names and addresses of Technical Services responsible for conducting approval tests, and of Type Approval Authorities',
68
+ };
69
+
70
+ // Regulation-specific section titles (for section 6 which differs)
71
+ const REGULATION_SECTION_TITLES: Record<string, Record<string, string>> = {
72
+ UN_R155: {
73
+ '6': 'Certificate of Compliance for Cybersecurity Management System',
74
+ },
75
+ UN_R156: {
76
+ '6': 'Certificate of Compliance for Software Update Management System',
77
+ },
78
+ };
79
+
80
+ function getSectionTitle(sectionNum: string, regulationId: string): string {
81
+ const regSpecific = REGULATION_SECTION_TITLES[regulationId]?.[sectionNum];
82
+ if (regSpecific) return regSpecific;
83
+ return COMMON_SECTION_TITLES[sectionNum] || `Section ${sectionNum}`;
84
+ }
85
+
86
+ async function fetchEurLexHtml(celexId: string): Promise<string> {
87
+ const url = `https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:${celexId}`;
88
+ console.log(`Fetching: ${url}`);
89
+
90
+ const response = await fetch(url, {
91
+ headers: {
92
+ 'User-Agent': 'Mozilla/5.0 (compatible; EU-Compliance-MCP/1.0; +https://github.com/Ansvar-Systems/EU_compliance_MCP)',
93
+ Accept: 'text/html',
94
+ },
95
+ });
96
+
97
+ if (!response.ok) {
98
+ throw new Error(`Failed to fetch: ${response.status} ${response.statusText}`);
99
+ }
100
+
101
+ return response.text();
102
+ }
103
+
104
+ function parseUnRegulation(html: string, celexId: string): { articles: Article[]; definitions: Definition[] } {
105
+ const dom = new JSDOM(html);
106
+ const doc = dom.window.document;
107
+
108
+ const articles: Article[] = [];
109
+ const definitions: Definition[] = [];
110
+
111
+ // Strategy: Get all text elements (p, span, td) and process sequentially
112
+ // UN regulations use numbered sections with content in tables/spans
113
+ const allElements = Array.from(doc.querySelectorAll('p, span, td'));
114
+
115
+ let currentSection: { number: string; title: string; lines: string[] } | null = null;
116
+ let currentAnnex: { number: string; title: string; lines: string[] } | null = null;
117
+ let inAnnex = false;
118
+ let seenSections = new Set<string>();
119
+ let inTableOfContents = true; // Skip TOC at start
120
+
121
+ for (const el of allElements) {
122
+ const text = el.textContent?.trim() || '';
123
+ if (!text || text.length < 2) continue;
124
+
125
+ // Detect end of table of contents - when we see the actual section header format
126
+ const mainSectionHeader = text.match(/^(\d{1,2})\.\s+[A-Z][A-Z\s]+$/);
127
+ if (mainSectionHeader && el.classList?.contains('oj-ti-grseq-1')) {
128
+ inTableOfContents = false;
129
+ }
130
+
131
+ // Skip if still in table of contents
132
+ if (inTableOfContents && !el.classList?.contains('oj-ti-grseq-1')) {
133
+ continue;
134
+ }
135
+
136
+ // Check for main section headers (format: "8. MODIFICATION OF VEHICLE TYPE...")
137
+ // Note: some titles have hyphens (e.g., "NON-CONFORMITY")
138
+ const sectionHeaderMatch = text.match(/^(\d{1,2})\.\s+([A-Z][A-Z\s,\-]+)$/);
139
+ if (sectionHeaderMatch && el.classList?.contains('oj-ti-grseq-1')) {
140
+ const sectionNum = sectionHeaderMatch[1];
141
+
142
+ // Save current section if exists
143
+ if (currentSection && currentSection.lines.length > 0 && !seenSections.has(currentSection.number)) {
144
+ articles.push({
145
+ number: currentSection.number,
146
+ title: currentSection.title,
147
+ text: currentSection.lines.join('\n\n'),
148
+ });
149
+ seenSections.add(currentSection.number);
150
+ }
151
+
152
+ currentSection = {
153
+ number: sectionNum,
154
+ title: R155_SECTION_TITLES[sectionNum] || sectionHeaderMatch[2].trim(),
155
+ lines: [],
156
+ };
157
+ currentAnnex = null;
158
+ inAnnex = false;
159
+ continue;
160
+ }
161
+
162
+ // Check for Annex headers
163
+ const annexMatch = text.match(/^Annex\s+(\d+)/i) || text.match(/^ANNEX\s+(\d+)/i);
164
+ if (annexMatch || (el.classList?.contains('oj-doc-ti') && text.includes('Annex'))) {
165
+ // Save current section/annex if exists
166
+ if (currentSection && currentSection.lines.length > 0 && !seenSections.has(currentSection.number)) {
167
+ articles.push({
168
+ number: currentSection.number,
169
+ title: currentSection.title,
170
+ text: currentSection.lines.join('\n\n'),
171
+ });
172
+ seenSections.add(currentSection.number);
173
+ }
174
+ if (currentAnnex && currentAnnex.lines.length > 0 && !seenSections.has(`Annex ${currentAnnex.number}`)) {
175
+ articles.push({
176
+ number: `Annex ${currentAnnex.number}`,
177
+ title: currentAnnex.title,
178
+ text: currentAnnex.lines.join('\n\n'),
179
+ chapter: 'Annexes',
180
+ });
181
+ seenSections.add(`Annex ${currentAnnex.number}`);
182
+ }
183
+
184
+ const annexNum = annexMatch?.[1] || text.match(/Annex\s+(\d+)/i)?.[1];
185
+ if (annexNum) {
186
+ inAnnex = true;
187
+ currentAnnex = { number: annexNum, title: extractAnnexTitle(text), lines: [] };
188
+ currentSection = null;
189
+ }
190
+ continue;
191
+ }
192
+
193
+ // Skip metadata and navigation elements
194
+ if (
195
+ text.includes('Official Journal') ||
196
+ text.includes('EUR-Lex') ||
197
+ text.includes('CONTENTS') ||
198
+ text.match(/^[A-Z]+$/) ||
199
+ text.match(/^L\s+\d+\/\d+$/) ||
200
+ text.match(/^\d+\.\d+\.\d+\s+EN$/)
201
+ ) {
202
+ continue;
203
+ }
204
+
205
+ // Add content to current section or annex
206
+ if (inAnnex && currentAnnex) {
207
+ currentAnnex.lines.push(text);
208
+ } else if (currentSection) {
209
+ currentSection.lines.push(text);
210
+ }
211
+ }
212
+
213
+ // Don't forget last section/annex
214
+ if (currentSection && currentSection.lines.length > 0 && !seenSections.has(currentSection.number)) {
215
+ articles.push({
216
+ number: currentSection.number,
217
+ title: currentSection.title,
218
+ text: currentSection.lines.join('\n\n'),
219
+ });
220
+ }
221
+ if (currentAnnex && currentAnnex.lines.length > 0 && !seenSections.has(`Annex ${currentAnnex.number}`)) {
222
+ articles.push({
223
+ number: `Annex ${currentAnnex.number}`,
224
+ title: currentAnnex.title,
225
+ text: currentAnnex.lines.join('\n\n'),
226
+ chapter: 'Annexes',
227
+ });
228
+ }
229
+
230
+ // Extract definitions from Section 2
231
+ // UN regulations use format: 2.1. 'term' means/refers to definition
232
+ // Note: Uses curly quotes (Unicode 8216/8217) not straight quotes
233
+ const defsSection = articles.find((a) => a.number === '2');
234
+ if (defsSection) {
235
+ // Normalize text: collapse newlines, handle both straight and curly quotes
236
+ const normalizedText = defsSection.text
237
+ .replace(/\n+/g, ' ')
238
+ .replace(/\s+/g, ' ')
239
+ .replace(/[\u2018\u2019]/g, "'"); // Convert curly quotes to straight
240
+
241
+ // Match patterns like: 2.1. 'Vehicle type' means/refers to ...
242
+ // Some definitions use "means", others use "refers to"
243
+ const defRegex = /(\d+\.\d+\.)\s*'([^']+)'\s+(?:means|refers to)\s+(.+?)(?=\d+\.\d+\.\s*'|$)/g;
244
+ const defMatches = normalizedText.matchAll(defRegex);
245
+ for (const match of defMatches) {
246
+ const term = match[2].trim().toLowerCase();
247
+ let definition = match[3].trim();
248
+ // Clean up the definition - remove trailing section numbers and punctuation
249
+ definition = definition.replace(/\s*\d+\.\d+\.\s*$/, '').replace(/[;.]$/, '').trim();
250
+ if (term && definition.length > 10) {
251
+ definitions.push({
252
+ term,
253
+ definition,
254
+ article: '2',
255
+ });
256
+ }
257
+ }
258
+ }
259
+
260
+ // Deduplicate and sort articles
261
+ const articleMap = new Map<string, Article>();
262
+ for (const article of articles) {
263
+ const existing = articleMap.get(article.number);
264
+ if (!existing || article.text.length > existing.text.length) {
265
+ articleMap.set(article.number, article);
266
+ }
267
+ }
268
+
269
+ const sortedArticles = Array.from(articleMap.values()).sort((a, b) => {
270
+ // Sort numbered sections first, then annexes
271
+ const aIsAnnex = a.number.startsWith('Annex');
272
+ const bIsAnnex = b.number.startsWith('Annex');
273
+ if (aIsAnnex && !bIsAnnex) return 1;
274
+ if (!aIsAnnex && bIsAnnex) return -1;
275
+ if (aIsAnnex && bIsAnnex) {
276
+ return parseInt(a.number.replace('Annex ', '')) - parseInt(b.number.replace('Annex ', ''));
277
+ }
278
+ return parseInt(a.number) - parseInt(b.number);
279
+ });
280
+
281
+ return { articles: sortedArticles, definitions };
282
+ }
283
+
284
+ function extractAnnexTitle(text: string): string {
285
+ // Extract title after "Annex X"
286
+ const match = text.match(/Annex\s+\d+\s*[–—-]?\s*(.*)/i);
287
+ if (match && match[1]) {
288
+ return match[1].trim();
289
+ }
290
+
291
+ // Common annex titles for R155
292
+ const annexTitles: Record<string, string> = {
293
+ '1': 'Information document',
294
+ '2': 'Communication',
295
+ '3': 'Arrangements of the approval mark',
296
+ '4': 'Certificate of Compliance for CSMS',
297
+ '5': 'List of threats and corresponding mitigations',
298
+ };
299
+
300
+ const annexNum = text.match(/Annex\s+(\d+)/i)?.[1];
301
+ if (annexNum && annexTitles[annexNum]) {
302
+ return annexTitles[annexNum];
303
+ }
304
+
305
+ return '';
306
+ }
307
+
308
+ async function ingestUnRegulation(celexId: string, outputPath: string): Promise<void> {
309
+ const metadata = UN_REGULATION_METADATA[celexId];
310
+ if (!metadata) {
311
+ console.warn(`Unknown CELEX ID: ${celexId}. Using generic metadata.`);
312
+ }
313
+
314
+ const html = await fetchEurLexHtml(celexId);
315
+ console.log(`Fetched ${html.length} bytes`);
316
+
317
+ // Save HTML for debugging
318
+ writeFileSync(outputPath.replace('.json', '.html'), html);
319
+
320
+ const { articles, definitions } = parseUnRegulation(html, celexId);
321
+ console.log(`Parsed ${articles.length} articles/sections, ${definitions.length} definitions`);
322
+
323
+ if (articles.length === 0) {
324
+ console.error('No sections found! The HTML structure may have changed.');
325
+ return;
326
+ }
327
+
328
+ const regulation: RegulationData = {
329
+ id: metadata?.id || celexId,
330
+ full_name: metadata?.full_name || `UN Regulation ${celexId}`,
331
+ celex_id: celexId,
332
+ effective_date: metadata?.effective_date,
333
+ eur_lex_url: `https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX:${celexId}`,
334
+ articles,
335
+ definitions,
336
+ };
337
+
338
+ writeFileSync(outputPath, JSON.stringify(regulation, null, 2));
339
+ console.log(`\nSaved to: ${outputPath}`);
340
+ console.log(`Sections: ${articles.filter((a) => !a.number.startsWith('Annex')).length}`);
341
+ console.log(`Annexes: ${articles.filter((a) => a.number.startsWith('Annex')).length}`);
342
+ console.log(`Definitions: ${definitions.length}`);
343
+
344
+ // Print summary
345
+ console.log('\nSections found:');
346
+ for (const article of articles) {
347
+ const preview = article.text.substring(0, 60).replace(/\n/g, ' ');
348
+ console.log(` ${article.number}: ${article.title || '(no title)'} - ${preview}...`);
349
+ }
350
+ }
351
+
352
+ // Main
353
+ const [, , celexId, outputPath] = process.argv;
354
+
355
+ if (!celexId || !outputPath) {
356
+ console.log('Usage: npx tsx scripts/ingest-unece.ts <celex_id> <output_file>');
357
+ console.log('Example: npx tsx scripts/ingest-unece.ts 42021X0387 data/seed/un-r155.json');
358
+ console.log('\nKnown UN/ECE CELEX IDs:');
359
+ Object.entries(UN_REGULATION_METADATA).forEach(([id, meta]) => {
360
+ console.log(` ${id} - ${meta.id} (${meta.full_name})`);
361
+ });
362
+ process.exit(1);
363
+ }
364
+
365
+ ingestUnRegulation(celexId, outputPath).catch((err) => {
366
+ console.error('Error:', err);
367
+ process.exit(1);
368
+ });