@ansvar/eu-regulations-mcp 0.8.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/README.md +76 -29
  2. package/data/regulations.db +0 -0
  3. package/data/seed/applicability/chips-act.json +67 -0
  4. package/data/seed/applicability/crma.json +85 -0
  5. package/data/seed/chips-act.json +714 -0
  6. package/data/seed/crma.json +877 -0
  7. package/data/seed/mappings/iso27001-chips-act.json +50 -0
  8. package/data/seed/mappings/iso27001-crma.json +50 -0
  9. package/data/seed/mappings/nist-csf-chips-act.json +56 -0
  10. package/data/seed/mappings/nist-csf-crma.json +56 -0
  11. package/dist/database/sqlite-adapter.d.ts +2 -2
  12. package/dist/database/sqlite-adapter.d.ts.map +1 -1
  13. package/dist/database/sqlite-adapter.js.map +1 -1
  14. package/dist/http-server.js +27 -5
  15. package/dist/http-server.js.map +1 -1
  16. package/dist/index.js +27 -4
  17. package/dist/index.js.map +1 -1
  18. package/dist/tools/about.d.ts +40 -0
  19. package/dist/tools/about.d.ts.map +1 -0
  20. package/dist/tools/about.js +61 -0
  21. package/dist/tools/about.js.map +1 -0
  22. package/dist/tools/list.d.ts +7 -0
  23. package/dist/tools/list.d.ts.map +1 -1
  24. package/dist/tools/list.js +73 -8
  25. package/dist/tools/list.js.map +1 -1
  26. package/dist/tools/registry.d.ts +11 -1
  27. package/dist/tools/registry.d.ts.map +1 -1
  28. package/dist/tools/registry.js +56 -4
  29. package/dist/tools/registry.js.map +1 -1
  30. package/dist/worker.d.ts.map +1 -1
  31. package/dist/worker.js +17 -5
  32. package/dist/worker.js.map +1 -1
  33. package/package.json +8 -7
  34. package/scripts/add-cross-references.sql +0 -200
  35. package/scripts/analyze-survey-responses.ts +0 -285
  36. package/scripts/build-db.ts +0 -421
  37. package/scripts/bulk-reingest-all.ts +0 -331
  38. package/scripts/check-updates.ts +0 -294
  39. package/scripts/extract-eprivacy-recitals.ts +0 -98
  40. package/scripts/ingest-eurlex-browser.ts +0 -113
  41. package/scripts/ingest-eurlex.ts +0 -346
  42. package/scripts/ingest-unece.ts +0 -382
  43. package/scripts/migrate-postgres.ts +0 -445
  44. package/scripts/migrate-to-postgres.ts +0 -353
  45. package/scripts/reingest-all-with-recitals.sh +0 -81
  46. package/scripts/sync-versions.ts +0 -206
  47. package/scripts/test-cross-refs.js +0 -26
  48. package/scripts/test-postgres-adapter.ts +0 -146
  49. package/scripts/update-dora-rts-metadata.ts +0 -112
  50. package/src/database/postgres-adapter.ts +0 -84
  51. package/src/database/sqlite-adapter.ts +0 -44
  52. package/src/database/types.ts +0 -10
  53. package/src/http-server.ts +0 -149
  54. package/src/index.ts +0 -61
  55. package/src/middleware/rate-limit.ts +0 -104
  56. package/src/tools/applicability.ts +0 -167
  57. package/src/tools/article.ts +0 -81
  58. package/src/tools/compare.ts +0 -217
  59. package/src/tools/definitions.ts +0 -49
  60. package/src/tools/evidence.ts +0 -84
  61. package/src/tools/list.ts +0 -124
  62. package/src/tools/map.ts +0 -86
  63. package/src/tools/recital.ts +0 -60
  64. package/src/tools/registry.ts +0 -311
  65. package/src/tools/search.ts +0 -297
  66. package/src/worker.ts +0 -708
@@ -1,331 +0,0 @@
1
- #!/usr/bin/env npx tsx
2
-
3
- /**
4
- * Bulk re-ingestion script for all 37 EU regulations.
5
- *
6
- * Uses Puppeteer-based browser ingestion to bypass EUR-Lex WAF.
7
- * Processes regulations in batches of 3 parallel browser instances
8
- * with 2s delays between batches for rate limiting.
9
- *
10
- * Usage:
11
- * npx tsx scripts/bulk-reingest-all.ts
12
- * npx tsx scripts/bulk-reingest-all.ts --dry-run # Preview without executing
13
- *
14
- * SECURITY NOTE: Uses execFile (not exec) to prevent command injection.
15
- */
16
-
17
- import { execFile } from 'child_process';
18
- import { promisify } from 'util';
19
- import { readdir } from 'fs/promises';
20
- import { join, basename } from 'path';
21
-
22
- const execFileAsync = promisify(execFile);
23
-
24
- interface Regulation {
25
- celexId: string;
26
- filename: string;
27
- filepath: string;
28
- }
29
-
30
- interface IngestionResult {
31
- regulation: string;
32
- success: boolean;
33
- error?: string;
34
- duration?: number;
35
- }
36
-
37
- // Configuration
38
- const BATCH_SIZE = 3; // Parallel browser instances
39
- const BATCH_DELAY_MS = 2000; // Rate limiting between batches
40
- const TIMEOUT_MS = 120000; // 2 minutes per regulation
41
-
42
- /**
43
- * Discover all regulation JSON files in data/seed/
44
- */
45
- async function discoverRegulations(): Promise<Regulation[]> {
46
- const seedDir = join(process.cwd(), 'data', 'seed');
47
- const files = await readdir(seedDir);
48
-
49
- const regulations: Regulation[] = [];
50
-
51
- for (const file of files) {
52
- if (!file.endsWith('.json')) continue;
53
-
54
- const filepath = join(seedDir, file);
55
- const filename = basename(file, '.json');
56
-
57
- // Read JSON to extract CELEX ID
58
- try {
59
- const { readFileSync } = await import('fs');
60
- const content = readFileSync(filepath, 'utf-8');
61
- const data = JSON.parse(content);
62
-
63
- if (data.celex_id) {
64
- regulations.push({
65
- celexId: data.celex_id,
66
- filename,
67
- filepath,
68
- });
69
- } else {
70
- console.warn(`⚠️ Warning: ${file} has no celex_id field`);
71
- }
72
- } catch (err) {
73
- console.warn(`⚠️ Warning: Failed to parse ${file}:`, (err as Error).message);
74
- }
75
- }
76
-
77
- return regulations.sort((a, b) => a.filename.localeCompare(b.filename));
78
- }
79
-
80
- /**
81
- * Ingest a single regulation using the appropriate script
82
- */
83
- async function ingestRegulation(regulation: Regulation): Promise<IngestionResult> {
84
- const startTime = Date.now();
85
-
86
- try {
87
- // Determine which ingestion script to use
88
- const isUNECE = regulation.celexId.startsWith('42021X');
89
- const scriptName = isUNECE ? 'ingest-unece.ts' : 'ingest-eurlex.ts';
90
- const scriptPath = join(process.cwd(), 'scripts', scriptName);
91
-
92
- // SECURITY: Using execFile (not exec) to prevent command injection
93
- const args = ['tsx', scriptPath, regulation.celexId, regulation.filepath];
94
-
95
- // Add --browser flag for EUR-Lex regulations to bypass WAF
96
- if (!isUNECE) {
97
- args.push('--browser');
98
- }
99
-
100
- const { stdout, stderr } = await execFileAsync(
101
- 'npx',
102
- args,
103
- {
104
- timeout: TIMEOUT_MS,
105
- cwd: process.cwd(),
106
- maxBuffer: 10 * 1024 * 1024, // 10MB buffer for large outputs
107
- }
108
- );
109
-
110
- const duration = Date.now() - startTime;
111
-
112
- // Log any warnings from stderr
113
- if (stderr) {
114
- console.log(` [stderr] ${stderr.trim()}`);
115
- }
116
-
117
- return {
118
- regulation: regulation.filename,
119
- success: true,
120
- duration,
121
- };
122
- } catch (err: any) {
123
- const duration = Date.now() - startTime;
124
-
125
- return {
126
- regulation: regulation.filename,
127
- success: false,
128
- error: err.message || String(err),
129
- duration,
130
- };
131
- }
132
- }
133
-
134
- /**
135
- * Process regulations in batches with rate limiting
136
- */
137
- async function processBatches(regulations: Regulation[]): Promise<IngestionResult[]> {
138
- const results: IngestionResult[] = [];
139
- const totalBatches = Math.ceil(regulations.length / BATCH_SIZE);
140
-
141
- for (let i = 0; i < regulations.length; i += BATCH_SIZE) {
142
- const batch = regulations.slice(i, i + BATCH_SIZE);
143
- const batchNum = Math.floor(i / BATCH_SIZE) + 1;
144
-
145
- console.log(`\n📦 Batch ${batchNum}/${totalBatches} (${batch.length} regulations)`);
146
- console.log('─'.repeat(60));
147
-
148
- // Process batch in parallel
149
- const batchPromises = batch.map(async (reg, idx) => {
150
- const regNum = i + idx + 1;
151
- console.log(`[${regNum}/${regulations.length}] Starting: ${reg.filename} (${reg.celexId})`);
152
-
153
- const result = await ingestRegulation(reg);
154
-
155
- if (result.success) {
156
- console.log(`[${regNum}/${regulations.length}] ✅ ${reg.filename} (${result.duration}ms)`);
157
- } else {
158
- console.log(`[${regNum}/${regulations.length}] ❌ ${reg.filename}: ${result.error}`);
159
- }
160
-
161
- return result;
162
- });
163
-
164
- const batchResults = await Promise.all(batchPromises);
165
- results.push(...batchResults);
166
-
167
- // Rate limiting: delay between batches (except after last batch)
168
- if (i + BATCH_SIZE < regulations.length) {
169
- console.log(`\n⏸️ Waiting ${BATCH_DELAY_MS}ms before next batch...`);
170
- await new Promise(resolve => setTimeout(resolve, BATCH_DELAY_MS));
171
- }
172
- }
173
-
174
- return results;
175
- }
176
-
177
- /**
178
- * Print summary report
179
- */
180
- function printSummary(results: IngestionResult[]) {
181
- console.log('\n' + '═'.repeat(60));
182
- console.log('📊 INGESTION SUMMARY');
183
- console.log('═'.repeat(60));
184
-
185
- const successful = results.filter(r => r.success);
186
- const failed = results.filter(r => !r.success);
187
-
188
- console.log(`\n✅ Successful: ${successful.length}/${results.length}`);
189
- console.log(`❌ Failed: ${failed.length}/${results.length}`);
190
-
191
- if (successful.length > 0) {
192
- const totalDuration = successful.reduce((sum, r) => sum + (r.duration || 0), 0);
193
- const avgDuration = totalDuration / successful.length;
194
- console.log(`⏱️ Average duration: ${Math.round(avgDuration)}ms`);
195
- }
196
-
197
- if (failed.length > 0) {
198
- console.log('\n❌ Failed regulations:');
199
- for (const result of failed) {
200
- console.log(` - ${result.regulation}: ${result.error}`);
201
- }
202
- }
203
-
204
- console.log('\n' + '═'.repeat(60));
205
- }
206
-
207
- /**
208
- * Rebuild database and verify recitals
209
- */
210
- async function rebuildDatabase() {
211
- console.log('\n🔨 Rebuilding database...');
212
-
213
- try {
214
- const { stdout } = await execFileAsync('npm', ['run', 'build:db'], {
215
- cwd: process.cwd(),
216
- timeout: 60000, // 1 minute timeout
217
- });
218
-
219
- console.log(stdout);
220
- console.log('✅ Database rebuilt successfully');
221
- } catch (err: any) {
222
- console.error('❌ Database rebuild failed:', err.message);
223
- throw err;
224
- }
225
- }
226
-
227
- /**
228
- * Query and display recital counts
229
- */
230
- async function verifyRecitals() {
231
- console.log('\n📊 Verifying recital counts...');
232
-
233
- try {
234
- const { stdout } = await execFileAsync(
235
- 'sqlite3',
236
- [
237
- 'data/regulations.db',
238
- 'SELECT regulation, COUNT(*) as recital_count FROM recitals GROUP BY regulation ORDER BY recital_count DESC LIMIT 10;',
239
- ],
240
- { cwd: process.cwd() }
241
- );
242
-
243
- console.log('\nTop 10 regulations by recital count:');
244
- console.log(stdout);
245
-
246
- // Get total count
247
- const { stdout: totalStdout } = await execFileAsync(
248
- 'sqlite3',
249
- ['data/regulations.db', 'SELECT COUNT(*) FROM recitals;'],
250
- { cwd: process.cwd() }
251
- );
252
-
253
- const totalRecitals = parseInt(totalStdout.trim());
254
- console.log(`\n📈 Total recitals in database: ${totalRecitals}`);
255
-
256
- if (totalRecitals < 2000) {
257
- console.warn(`⚠️ Warning: Expected ~2,500+ recitals, got ${totalRecitals}`);
258
- } else {
259
- console.log('✅ Recital count looks good!');
260
- }
261
- } catch (err: any) {
262
- console.error('❌ Verification failed:', err.message);
263
- }
264
- }
265
-
266
- /**
267
- * Main execution
268
- */
269
- async function main() {
270
- const isDryRun = process.argv.includes('--dry-run');
271
-
272
- console.log('🚀 EU Regulations Bulk Re-Ingestion');
273
- console.log('═'.repeat(60));
274
- console.log(`Mode: ${isDryRun ? 'DRY RUN' : 'LIVE'}`);
275
- console.log(`Batch size: ${BATCH_SIZE} parallel instances`);
276
- console.log(`Batch delay: ${BATCH_DELAY_MS}ms`);
277
- console.log(`Timeout per regulation: ${TIMEOUT_MS}ms`);
278
- console.log('═'.repeat(60));
279
-
280
- // Step 1: Discover regulations
281
- console.log('\n🔍 Discovering regulations...');
282
- const regulations = await discoverRegulations();
283
- console.log(`Found ${regulations.length} regulations in data/seed/`);
284
-
285
- if (regulations.length === 0) {
286
- console.error('❌ No regulations found. Exiting.');
287
- process.exit(1);
288
- }
289
-
290
- // List regulations
291
- console.log('\nRegulations to process:');
292
- for (const reg of regulations) {
293
- const type = reg.celexId.startsWith('42021X') ? '[UNECE]' : '[EUR-Lex]';
294
- console.log(` ${type} ${reg.filename} (${reg.celexId})`);
295
- }
296
-
297
- if (isDryRun) {
298
- console.log('\n✅ Dry run complete. No ingestion performed.');
299
- return;
300
- }
301
-
302
- // Confirm before starting
303
- console.log('\n⚠️ This will re-ingest all regulations using browser automation.');
304
- console.log(`⏱️ Estimated time: ${Math.ceil((regulations.length / BATCH_SIZE) * (BATCH_DELAY_MS / 1000))} seconds + ingestion time`);
305
-
306
- // Step 2: Process batches
307
- const results = await processBatches(regulations);
308
-
309
- // Step 3: Print summary
310
- printSummary(results);
311
-
312
- // Step 4: Rebuild database
313
- if (results.some(r => r.success)) {
314
- await rebuildDatabase();
315
-
316
- // Step 5: Verify recitals
317
- await verifyRecitals();
318
- } else {
319
- console.error('\n❌ No successful ingestions. Skipping database rebuild.');
320
- }
321
-
322
- // Exit with appropriate code
323
- const hasFailures = results.some(r => !r.success);
324
- process.exit(hasFailures ? 1 : 0);
325
- }
326
-
327
- // Run main
328
- main().catch(err => {
329
- console.error('💥 Fatal error:', err);
330
- process.exit(1);
331
- });
@@ -1,294 +0,0 @@
1
- #!/usr/bin/env npx tsx
2
-
3
- /**
4
- * Check for updates to EU regulations from EUR-Lex.
5
- * Compares current database versions against EUR-Lex metadata.
6
- *
7
- * Usage: npx tsx scripts/check-updates.ts
8
- */
9
-
10
- import Database from 'better-sqlite3';
11
- import { existsSync } from 'fs';
12
- import { join, dirname } from 'path';
13
- import { fileURLToPath } from 'url';
14
-
15
- const __filename = fileURLToPath(import.meta.url);
16
- const __dirname = dirname(__filename);
17
-
18
- const DB_PATH = join(__dirname, '..', 'data', 'regulations.db');
19
-
20
- interface SourceRecord {
21
- regulation: string;
22
- celex_id: string;
23
- eur_lex_version: string | null;
24
- last_fetched: string | null;
25
- articles_expected: number | null;
26
- articles_parsed: number | null;
27
- quality_status: string;
28
- }
29
-
30
- interface EurLexMetadata {
31
- celexId: string;
32
- lastModified: string;
33
- title: string;
34
- dateDocument: string;
35
- consolidatedVersions?: string[];
36
- }
37
-
38
- // No hardcoded list - source_registry table IS the source of truth
39
- // To add a new regulation: ingest it, and it's automatically monitored
40
-
41
- async function fetchEurLexMetadata(celexId: string): Promise<EurLexMetadata | null> {
42
- const infoUrl = `https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX:${celexId}`;
43
-
44
- try {
45
- const response = await fetch(infoUrl, {
46
- headers: {
47
- 'User-Agent': 'Mozilla/5.0 (compatible; EU-Compliance-MCP/1.0)',
48
- 'Accept': 'text/html',
49
- },
50
- });
51
-
52
- if (!response.ok) {
53
- console.error(`Failed to fetch metadata for ${celexId}: ${response.status}`);
54
- return null;
55
- }
56
-
57
- const html = await response.text();
58
-
59
- // Try multiple extraction methods in order of reliability:
60
-
61
- // 1. ELI metadata (works for all document types including UNECE)
62
- const eliDateDoc = html.match(/property="eli:date_document"[^>]*content="(\d{4}-\d{2}-\d{2})"/);
63
- const eliDatePub = html.match(/property="eli:date_publication"[^>]*content="(\d{4}-\d{2}-\d{2})"/);
64
- // Also try the reverse attribute order
65
- const eliDateDoc2 = html.match(/content="(\d{4}-\d{2}-\d{2})"[^>]*property="eli:date_document"/);
66
- const eliDatePub2 = html.match(/content="(\d{4}-\d{2}-\d{2})"[^>]*property="eli:date_publication"/);
67
-
68
- // 2. Visible text patterns
69
- const dateMatch = html.match(/Date of document:\s*(\d{2}\/\d{2}\/\d{4})/i);
70
-
71
- // 3. Generic ELI date pattern (fallback)
72
- const genericEli = html.match(/eli[^>]*(\d{4}-\d{2}-\d{2})/i);
73
-
74
- const titleMatch = html.match(/<title>([^<]+)<\/title>/i);
75
-
76
- // Use the best available date (prefer publication date for tracking updates)
77
- const lastModified =
78
- eliDatePub?.[1] || eliDatePub2?.[1] ||
79
- eliDateDoc?.[1] || eliDateDoc2?.[1] ||
80
- genericEli?.[1] ||
81
- (dateMatch?.[1] ? convertDateFormat(dateMatch[1]) : null) ||
82
- 'unknown';
83
-
84
- return {
85
- celexId,
86
- lastModified,
87
- title: titleMatch?.[1]?.trim() || 'Unknown',
88
- dateDocument: eliDateDoc?.[1] || eliDateDoc2?.[1] || 'unknown',
89
- };
90
- } catch (error) {
91
- console.error(`Error fetching metadata for ${celexId}:`, error);
92
- return null;
93
- }
94
- }
95
-
96
- // Convert DD/MM/YYYY to YYYY-MM-DD
97
- function convertDateFormat(date: string): string {
98
- const parts = date.split('/');
99
- if (parts.length === 3) {
100
- return `${parts[2]}-${parts[1]}-${parts[0]}`;
101
- }
102
- return date;
103
- }
104
-
105
- // Sync mode: update database with current EUR-Lex versions
106
- async function syncVersions(): Promise<void> {
107
- console.log('Syncing EUR-Lex versions to database...\n');
108
-
109
- if (!existsSync(DB_PATH)) {
110
- console.log('Database not found. Run `npm run build:db` first.');
111
- process.exit(1);
112
- }
113
-
114
- const db = new Database(DB_PATH);
115
-
116
- const sources = db.prepare(`
117
- SELECT regulation, celex_id FROM source_registry
118
- WHERE celex_id IS NOT NULL AND celex_id != ''
119
- `).all() as SourceRecord[];
120
-
121
- const updateStmt = db.prepare(`
122
- UPDATE source_registry SET eur_lex_version = ?, last_fetched = ?
123
- WHERE regulation = ?
124
- `);
125
-
126
- const now = new Date().toISOString();
127
- let updated = 0;
128
-
129
- for (const source of sources) {
130
- process.stdout.write(`${source.regulation}: `);
131
- const metadata = await fetchEurLexMetadata(source.celex_id);
132
-
133
- if (metadata && metadata.lastModified !== 'unknown') {
134
- updateStmt.run(metadata.lastModified, now, source.regulation);
135
- console.log(`synced to ${metadata.lastModified}`);
136
- updated++;
137
- } else {
138
- console.log('skipped (unknown version)');
139
- }
140
- }
141
-
142
- db.close();
143
- console.log(`\n✓ Synced ${updated} regulation(s)`);
144
- }
145
-
146
- async function checkForUpdates(): Promise<void> {
147
- console.log('Checking EUR-Lex for regulation updates...\n');
148
-
149
- // Check if database exists
150
- if (!existsSync(DB_PATH)) {
151
- console.log('Database not found. Run `npm run build:db` first.');
152
- process.exit(1);
153
- }
154
-
155
- const db = new Database(DB_PATH, { readonly: true });
156
-
157
- // Get all regulations from source_registry - this IS the source of truth
158
- const sources = db.prepare(`
159
- SELECT regulation, celex_id, eur_lex_version, last_fetched, quality_status
160
- FROM source_registry
161
- WHERE celex_id IS NOT NULL AND celex_id != ''
162
- ORDER BY regulation
163
- `).all() as SourceRecord[];
164
-
165
- if (sources.length === 0) {
166
- console.log('No regulations found in source_registry.');
167
- console.log('Ingest regulations first with: npx tsx scripts/ingest-eurlex.ts <CELEX_ID> <output.json>');
168
- db.close();
169
- process.exit(0);
170
- }
171
-
172
- console.log(`Found ${sources.length} regulation(s) to check\n`);
173
- console.log('Status Report');
174
- console.log('='.repeat(80));
175
-
176
- const updates: Array<{ id: string; celex_id: string; reason: string }> = [];
177
-
178
- for (const source of sources) {
179
- process.stdout.write(`\n${source.regulation.padEnd(20)} (${source.celex_id}): `);
180
-
181
- // Fetch current EUR-Lex metadata
182
- const metadata = await fetchEurLexMetadata(source.celex_id);
183
-
184
- if (!metadata) {
185
- console.log('FETCH FAILED');
186
- continue;
187
- }
188
-
189
- const lastFetched = source.last_fetched || 'never';
190
- const eurLexVersion = metadata.lastModified;
191
-
192
- // Helper to compare dates (returns true if eurLex is newer)
193
- const isNewer = (eurLex: string, local: string): boolean => {
194
- if (eurLex === 'unknown' || !eurLex) return false;
195
- try {
196
- const eurLexDate = new Date(eurLex);
197
- const localDate = new Date(local);
198
- return eurLexDate > localDate;
199
- } catch {
200
- return false;
201
- }
202
- };
203
-
204
- if (eurLexVersion === 'unknown') {
205
- // UNECE or non-standard documents - can't auto-check
206
- console.log('MANUAL CHECK REQUIRED');
207
- console.log(` Source type: Non-standard (UNECE/consolidated)`);
208
- console.log(` Last fetched: ${lastFetched}`);
209
- } else if (!source.eur_lex_version) {
210
- // First time checking - record the version but don't flag as update
211
- console.log('VERSION NOT TRACKED');
212
- console.log(` EUR-Lex version: ${eurLexVersion}`);
213
- console.log(` Run ingest again to record version`);
214
- } else if (isNewer(eurLexVersion, source.eur_lex_version)) {
215
- // EUR-Lex has a newer version
216
- console.log('UPDATE AVAILABLE');
217
- console.log(` Local version: ${source.eur_lex_version}`);
218
- console.log(` EUR-Lex version: ${eurLexVersion}`);
219
- updates.push({
220
- id: source.regulation,
221
- celex_id: source.celex_id,
222
- reason: `Newer version: ${source.eur_lex_version} -> ${eurLexVersion}`
223
- });
224
- } else if (source.quality_status !== 'complete') {
225
- console.log(`INCOMPLETE (${source.quality_status})`);
226
- updates.push({
227
- id: source.regulation,
228
- celex_id: source.celex_id,
229
- reason: `Quality status: ${source.quality_status}`
230
- });
231
- } else {
232
- console.log('UP TO DATE');
233
- console.log(` EUR-Lex version: ${eurLexVersion}`);
234
- console.log(` Last fetched: ${lastFetched}`);
235
- }
236
- }
237
-
238
- db.close();
239
-
240
- // Summary
241
- console.log('\n' + '='.repeat(80));
242
- console.log('Summary');
243
- console.log('='.repeat(80));
244
-
245
- if (updates.length === 0) {
246
- console.log('\n✓ All monitored regulations are up to date.');
247
- } else {
248
- console.log(`\n⚠ ${updates.length} regulation(s) need attention:\n`);
249
- for (const u of updates) {
250
- console.log(` - ${u.id}: ${u.reason}`);
251
- }
252
-
253
- console.log('\nTo update, run:');
254
- for (const u of updates) {
255
- console.log(` npx tsx scripts/ingest-eurlex.ts ${u.celex_id} data/seed/${u.id.toLowerCase()}.json`);
256
- }
257
- console.log('\nThen: npm run build:db');
258
- }
259
-
260
- // Output for CI: write CELEX IDs to file for workflow to use
261
- const celexList = sources.map(s => s.celex_id).join('|');
262
- console.log(`\n::set-output name=celex_pattern::${celexList}`);
263
- }
264
-
265
- // Also provide a function to update the source registry after ingestion
266
- export async function updateSourceRegistry(
267
- db: Database.Database,
268
- regulation: string,
269
- celexId: string,
270
- articleCount: number
271
- ): Promise<void> {
272
- const now = new Date().toISOString();
273
-
274
- db.prepare(`
275
- INSERT OR REPLACE INTO source_registry
276
- (regulation, celex_id, eur_lex_version, last_fetched, articles_expected, articles_parsed, quality_status)
277
- VALUES (?, ?, ?, ?, ?, ?, 'complete')
278
- `).run(regulation, celexId, now.split('T')[0], now, articleCount, articleCount);
279
- }
280
-
281
- // Main execution
282
- const args = process.argv.slice(2);
283
-
284
- if (args.includes('--sync')) {
285
- syncVersions().catch(err => {
286
- console.error('Error:', err);
287
- process.exit(1);
288
- });
289
- } else {
290
- checkForUpdates().catch(err => {
291
- console.error('Error:', err);
292
- process.exit(1);
293
- });
294
- }