@ansvar/rwandan-law-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +110 -0
- package/README.md +83 -0
- package/dist/__tests__/contract/golden.test.d.ts +9 -0
- package/dist/__tests__/contract/golden.test.d.ts.map +1 -0
- package/dist/__tests__/contract/golden.test.js +76 -0
- package/dist/__tests__/contract/golden.test.js.map +1 -0
- package/dist/api/health.d.ts +3 -0
- package/dist/api/health.d.ts.map +1 -0
- package/dist/api/health.js +101 -0
- package/dist/api/health.js.map +1 -0
- package/dist/api/mcp.d.ts +3 -0
- package/dist/api/mcp.d.ts.map +1 -0
- package/dist/api/mcp.js +119 -0
- package/dist/api/mcp.js.map +1 -0
- package/dist/scripts/build-db.d.ts +11 -0
- package/dist/scripts/build-db.d.ts.map +1 -0
- package/dist/scripts/build-db.js +375 -0
- package/dist/scripts/build-db.js.map +1 -0
- package/dist/scripts/drift-detect.d.ts +9 -0
- package/dist/scripts/drift-detect.d.ts.map +1 -0
- package/dist/scripts/drift-detect.js +62 -0
- package/dist/scripts/drift-detect.js.map +1 -0
- package/dist/scripts/ingest.d.ts +10 -0
- package/dist/scripts/ingest.d.ts.map +1 -0
- package/dist/scripts/ingest.js +333 -0
- package/dist/scripts/ingest.js.map +1 -0
- package/dist/scripts/lib/fetcher.d.ts +26 -0
- package/dist/scripts/lib/fetcher.d.ts.map +1 -0
- package/dist/scripts/lib/fetcher.js +102 -0
- package/dist/scripts/lib/fetcher.js.map +1 -0
- package/dist/scripts/lib/parser.d.ts +60 -0
- package/dist/scripts/lib/parser.d.ts.map +1 -0
- package/dist/scripts/lib/parser.js +560 -0
- package/dist/scripts/lib/parser.js.map +1 -0
- package/dist/scripts/lib/pdf.d.ts +7 -0
- package/dist/scripts/lib/pdf.d.ts.map +1 -0
- package/dist/scripts/lib/pdf.js +124 -0
- package/dist/scripts/lib/pdf.js.map +1 -0
- package/dist/src/capabilities.d.ts +16 -0
- package/dist/src/capabilities.d.ts.map +1 -0
- package/dist/src/capabilities.js +43 -0
- package/dist/src/capabilities.js.map +1 -0
- package/dist/src/constants.d.ts +7 -0
- package/dist/src/constants.d.ts.map +1 -0
- package/dist/src/constants.js +7 -0
- package/dist/src/constants.js.map +1 -0
- package/dist/src/index.d.ts +8 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/src/index.js +80 -0
- package/dist/src/index.js.map +1 -0
- package/dist/src/tools/about.d.ts +45 -0
- package/dist/src/tools/about.d.ts.map +1 -0
- package/dist/src/tools/about.js +54 -0
- package/dist/src/tools/about.js.map +1 -0
- package/dist/src/tools/build-legal-stance.d.ts +21 -0
- package/dist/src/tools/build-legal-stance.d.ts.map +1 -0
- package/dist/src/tools/build-legal-stance.js +46 -0
- package/dist/src/tools/build-legal-stance.js.map +1 -0
- package/dist/src/tools/check-currency.d.ts +20 -0
- package/dist/src/tools/check-currency.d.ts.map +1 -0
- package/dist/src/tools/check-currency.js +41 -0
- package/dist/src/tools/check-currency.js.map +1 -0
- package/dist/src/tools/format-citation.d.ts +14 -0
- package/dist/src/tools/format-citation.d.ts.map +1 -0
- package/dist/src/tools/format-citation.js +28 -0
- package/dist/src/tools/format-citation.js.map +1 -0
- package/dist/src/tools/get-eu-basis.d.ts +21 -0
- package/dist/src/tools/get-eu-basis.d.ts.map +1 -0
- package/dist/src/tools/get-eu-basis.js +52 -0
- package/dist/src/tools/get-eu-basis.js.map +1 -0
- package/dist/src/tools/get-provision-eu-basis.d.ts +20 -0
- package/dist/src/tools/get-provision-eu-basis.d.ts.map +1 -0
- package/dist/src/tools/get-provision-eu-basis.js +45 -0
- package/dist/src/tools/get-provision-eu-basis.js.map +1 -0
- package/dist/src/tools/get-provision.d.ts +24 -0
- package/dist/src/tools/get-provision.d.ts.map +1 -0
- package/dist/src/tools/get-provision.js +80 -0
- package/dist/src/tools/get-provision.js.map +1 -0
- package/dist/src/tools/get-rwandan-implementations.d.ts +21 -0
- package/dist/src/tools/get-rwandan-implementations.d.ts.map +1 -0
- package/dist/src/tools/get-rwandan-implementations.js +42 -0
- package/dist/src/tools/get-rwandan-implementations.js.map +1 -0
- package/dist/src/tools/list-sources.d.ts +25 -0
- package/dist/src/tools/list-sources.d.ts.map +1 -0
- package/dist/src/tools/list-sources.js +41 -0
- package/dist/src/tools/list-sources.js.map +1 -0
- package/dist/src/tools/registry.d.ts +13 -0
- package/dist/src/tools/registry.d.ts.map +1 -0
- package/dist/src/tools/registry.js +365 -0
- package/dist/src/tools/registry.js.map +1 -0
- package/dist/src/tools/search-eu-implementations.d.ts +24 -0
- package/dist/src/tools/search-eu-implementations.d.ts.map +1 -0
- package/dist/src/tools/search-eu-implementations.js +58 -0
- package/dist/src/tools/search-eu-implementations.js.map +1 -0
- package/dist/src/tools/search-legislation.d.ts +24 -0
- package/dist/src/tools/search-legislation.d.ts.map +1 -0
- package/dist/src/tools/search-legislation.js +54 -0
- package/dist/src/tools/search-legislation.js.map +1 -0
- package/dist/src/tools/validate-citation.d.ts +20 -0
- package/dist/src/tools/validate-citation.d.ts.map +1 -0
- package/dist/src/tools/validate-citation.js +107 -0
- package/dist/src/tools/validate-citation.js.map +1 -0
- package/dist/src/tools/validate-eu-compliance.d.ts +20 -0
- package/dist/src/tools/validate-eu-compliance.d.ts.map +1 -0
- package/dist/src/tools/validate-eu-compliance.js +98 -0
- package/dist/src/tools/validate-eu-compliance.js.map +1 -0
- package/dist/src/utils/as-of-date.d.ts +9 -0
- package/dist/src/utils/as-of-date.d.ts.map +1 -0
- package/dist/src/utils/as-of-date.js +25 -0
- package/dist/src/utils/as-of-date.js.map +1 -0
- package/dist/src/utils/fts-query.d.ts +19 -0
- package/dist/src/utils/fts-query.d.ts.map +1 -0
- package/dist/src/utils/fts-query.js +47 -0
- package/dist/src/utils/fts-query.js.map +1 -0
- package/dist/src/utils/metadata.d.ts +16 -0
- package/dist/src/utils/metadata.d.ts.map +1 -0
- package/dist/src/utils/metadata.js +22 -0
- package/dist/src/utils/metadata.js.map +1 -0
- package/dist/src/utils/statute-id.d.ts +18 -0
- package/dist/src/utils/statute-id.d.ts.map +1 -0
- package/dist/src/utils/statute-id.js +34 -0
- package/dist/src/utils/statute-id.js.map +1 -0
- package/package.json +74 -0
- package/server.json +30 -0
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
#!/usr/bin/env tsx
|
|
2
|
+
/**
|
|
3
|
+
* Rwanda Law MCP -- full-catalog ingestion.
|
|
4
|
+
*
|
|
5
|
+
* Sources:
|
|
6
|
+
* - RwandaLII law catalog API (`/search/api/documents/`)
|
|
7
|
+
* - RwandaLII law detail pages (AKN HTML or PDF-backed)
|
|
8
|
+
*/
|
|
9
|
+
import * as fs from 'fs';
|
|
10
|
+
import * as path from 'path';
|
|
11
|
+
import { fileURLToPath } from 'url';
|
|
12
|
+
import { fetchBinaryWithRateLimit, fetchWithRateLimit } from './lib/fetcher.js';
|
|
13
|
+
import { buildDocumentIdFromHref, extractLawPageMetadata, parseAknLawHtml, parseCatalogResultsHtml, parsePdfExtractedText, } from './lib/parser.js';
|
|
14
|
+
import { extractTextFromPdf } from './lib/pdf.js';
|
|
15
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
16
|
+
const __dirname = path.dirname(__filename);
|
|
17
|
+
const ROOT_URL = 'https://rwandalii.org';
|
|
18
|
+
const SEARCH_API = 'https://rwandalii.org/search/api/documents/';
|
|
19
|
+
const SOURCE_DIR = path.resolve(__dirname, '../data/source');
|
|
20
|
+
const SEED_DIR = path.resolve(__dirname, '../data/seed');
|
|
21
|
+
const CATALOG_CACHE_PATH = path.resolve(SOURCE_DIR, '_catalog-laws.json');
|
|
22
|
+
function parseArgs() {
|
|
23
|
+
const args = process.argv.slice(2);
|
|
24
|
+
let limit = null;
|
|
25
|
+
let offset = 0;
|
|
26
|
+
let append = false;
|
|
27
|
+
let refreshCatalog = false;
|
|
28
|
+
let skipFetch = false;
|
|
29
|
+
for (let i = 0; i < args.length; i++) {
|
|
30
|
+
if (args[i] === '--limit' && args[i + 1]) {
|
|
31
|
+
limit = Number.parseInt(args[i + 1], 10);
|
|
32
|
+
i++;
|
|
33
|
+
continue;
|
|
34
|
+
}
|
|
35
|
+
if (args[i] === '--offset' && args[i + 1]) {
|
|
36
|
+
offset = Number.parseInt(args[i + 1], 10);
|
|
37
|
+
i++;
|
|
38
|
+
continue;
|
|
39
|
+
}
|
|
40
|
+
if (args[i] === '--append') {
|
|
41
|
+
append = true;
|
|
42
|
+
continue;
|
|
43
|
+
}
|
|
44
|
+
if (args[i] === '--refresh-catalog') {
|
|
45
|
+
refreshCatalog = true;
|
|
46
|
+
continue;
|
|
47
|
+
}
|
|
48
|
+
if (args[i] === '--skip-fetch') {
|
|
49
|
+
skipFetch = true;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
return { limit, offset: Math.max(0, offset), append, refreshCatalog, skipFetch };
|
|
53
|
+
}
|
|
54
|
+
function ensureDirectories() {
|
|
55
|
+
fs.mkdirSync(SOURCE_DIR, { recursive: true });
|
|
56
|
+
fs.mkdirSync(SEED_DIR, { recursive: true });
|
|
57
|
+
}
|
|
58
|
+
function resetSeedDirectory() {
|
|
59
|
+
const files = fs.readdirSync(SEED_DIR).filter(file => file.endsWith('.json'));
|
|
60
|
+
for (const file of files) {
|
|
61
|
+
fs.unlinkSync(path.join(SEED_DIR, file));
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
function readJson(content) {
|
|
65
|
+
return JSON.parse(content);
|
|
66
|
+
}
|
|
67
|
+
function absoluteUrl(href) {
|
|
68
|
+
return new URL(href, ROOT_URL).toString();
|
|
69
|
+
}
|
|
70
|
+
function sourceHtmlPath(id) {
|
|
71
|
+
return path.join(SOURCE_DIR, `${id}.html`);
|
|
72
|
+
}
|
|
73
|
+
function sourcePdfPath(id) {
|
|
74
|
+
return path.join(SOURCE_DIR, `${id}.pdf`);
|
|
75
|
+
}
|
|
76
|
+
function sourceTextPath(id) {
|
|
77
|
+
return path.join(SOURCE_DIR, `${id}.txt`);
|
|
78
|
+
}
|
|
79
|
+
function seedPath(index, actId) {
|
|
80
|
+
const seq = String(index).padStart(3, '0');
|
|
81
|
+
return path.join(SEED_DIR, `${seq}-${actId}.json`);
|
|
82
|
+
}
|
|
83
|
+
function currentSeedIndex() {
|
|
84
|
+
const existing = fs.readdirSync(SEED_DIR)
|
|
85
|
+
.filter(file => /^\d{3}-.*\.json$/.test(file))
|
|
86
|
+
.map(file => Number.parseInt(file.slice(0, 3), 10))
|
|
87
|
+
.filter(n => Number.isFinite(n));
|
|
88
|
+
if (existing.length === 0)
|
|
89
|
+
return 1;
|
|
90
|
+
return Math.max(...existing) + 1;
|
|
91
|
+
}
|
|
92
|
+
async function fetchCatalogLaws() {
|
|
93
|
+
const collected = new Map();
|
|
94
|
+
const currentYear = new Date().getUTCFullYear();
|
|
95
|
+
const firstYear = 2000;
|
|
96
|
+
const yearQueries = Array.from({ length: Math.max(0, currentYear - firstYear + 1) }, (_, index) => String(firstYear + index));
|
|
97
|
+
const queries = ['law', ...yearQueries];
|
|
98
|
+
for (const query of queries) {
|
|
99
|
+
let page = 1;
|
|
100
|
+
let count = 0;
|
|
101
|
+
let pageSize = 10;
|
|
102
|
+
while (true) {
|
|
103
|
+
const url = `${SEARCH_API}?search=${encodeURIComponent(query)}` +
|
|
104
|
+
`&page=${page}&ordering=-date&mode=text&doc_type=legislation&nature=Law`;
|
|
105
|
+
const response = await fetchWithRateLimit(url);
|
|
106
|
+
const responseStatus = response.status;
|
|
107
|
+
const responseBody = response.body;
|
|
108
|
+
if (responseStatus === 400 && page > 1) {
|
|
109
|
+
// RwandaLII search API currently returns HTTP 400 for pages beyond the valid range.
|
|
110
|
+
break;
|
|
111
|
+
}
|
|
112
|
+
if (responseStatus !== 200) {
|
|
113
|
+
throw new Error(`Catalog fetch failed for query "${query}" page ${page}: HTTP ${responseStatus}`);
|
|
114
|
+
}
|
|
115
|
+
const body = readJson(responseBody);
|
|
116
|
+
count = body.count;
|
|
117
|
+
const rows = parseCatalogResultsHtml(body.results_html ?? '');
|
|
118
|
+
if (rows.length === 0)
|
|
119
|
+
break;
|
|
120
|
+
pageSize = rows.length;
|
|
121
|
+
for (const row of rows) {
|
|
122
|
+
if (!collected.has(row.href)) {
|
|
123
|
+
collected.set(row.href, row);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
const reachedEnd = page * pageSize >= count;
|
|
127
|
+
if (reachedEnd)
|
|
128
|
+
break;
|
|
129
|
+
page++;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
return Array.from(collected.values());
|
|
133
|
+
}
|
|
134
|
+
function loadCachedCatalog() {
|
|
135
|
+
if (!fs.existsSync(CATALOG_CACHE_PATH))
|
|
136
|
+
return null;
|
|
137
|
+
try {
|
|
138
|
+
const raw = fs.readFileSync(CATALOG_CACHE_PATH, 'utf-8');
|
|
139
|
+
const data = JSON.parse(raw);
|
|
140
|
+
if (!Array.isArray(data) || data.length === 0)
|
|
141
|
+
return null;
|
|
142
|
+
return data;
|
|
143
|
+
}
|
|
144
|
+
catch {
|
|
145
|
+
return null;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
function writeCatalogCache(catalog) {
|
|
149
|
+
fs.writeFileSync(CATALOG_CACHE_PATH, `${JSON.stringify(catalog, null, 2)}\n`);
|
|
150
|
+
}
|
|
151
|
+
async function fetchLawHtml(lawUrl, provisionalId, skipFetch) {
|
|
152
|
+
const htmlPath = sourceHtmlPath(provisionalId);
|
|
153
|
+
if (skipFetch && fs.existsSync(htmlPath)) {
|
|
154
|
+
return fs.readFileSync(htmlPath, 'utf-8');
|
|
155
|
+
}
|
|
156
|
+
const response = await fetchWithRateLimit(lawUrl);
|
|
157
|
+
if (response.status !== 200) {
|
|
158
|
+
throw new Error(`HTTP ${response.status}`);
|
|
159
|
+
}
|
|
160
|
+
fs.writeFileSync(htmlPath, response.body);
|
|
161
|
+
return response.body;
|
|
162
|
+
}
|
|
163
|
+
async function fetchLawPdf(pdfUrl, id, skipFetch) {
|
|
164
|
+
const pdfPath = sourcePdfPath(id);
|
|
165
|
+
if (skipFetch && fs.existsSync(pdfPath)) {
|
|
166
|
+
return pdfPath;
|
|
167
|
+
}
|
|
168
|
+
const response = await fetchBinaryWithRateLimit(pdfUrl);
|
|
169
|
+
if (response.status !== 200) {
|
|
170
|
+
throw new Error(`PDF HTTP ${response.status}`);
|
|
171
|
+
}
|
|
172
|
+
fs.writeFileSync(pdfPath, response.body);
|
|
173
|
+
return pdfPath;
|
|
174
|
+
}
|
|
175
|
+
function parseAct(metadata, html, skipFetch) {
|
|
176
|
+
if (metadata.source_type === 'akn') {
|
|
177
|
+
return { act: parseAknLawHtml(html, metadata), warnings: [] };
|
|
178
|
+
}
|
|
179
|
+
if (!metadata.pdf_url) {
|
|
180
|
+
throw new Error('PDF source URL missing for PDF-backed law page');
|
|
181
|
+
}
|
|
182
|
+
const pdfPath = sourcePdfPath(metadata.id);
|
|
183
|
+
if (!skipFetch && !fs.existsSync(pdfPath)) {
|
|
184
|
+
throw new Error(`PDF file missing at ${pdfPath}`);
|
|
185
|
+
}
|
|
186
|
+
if (!fs.existsSync(pdfPath)) {
|
|
187
|
+
throw new Error(`PDF cache not found at ${pdfPath}`);
|
|
188
|
+
}
|
|
189
|
+
const extraction = extractTextFromPdf(pdfPath);
|
|
190
|
+
fs.writeFileSync(sourceTextPath(metadata.id), extraction.text);
|
|
191
|
+
const act = parsePdfExtractedText(extraction.text, metadata);
|
|
192
|
+
const warnings = [...extraction.warnings, `pdf_text_method=${extraction.method}`];
|
|
193
|
+
return { act, warnings };
|
|
194
|
+
}
|
|
195
|
+
async function run() {
|
|
196
|
+
const { limit, offset, append, refreshCatalog, skipFetch } = parseArgs();
|
|
197
|
+
console.log('Rwandan Law MCP -- Full Law Ingestion');
|
|
198
|
+
console.log('=====================================\n');
|
|
199
|
+
console.log(`Catalog source: ${SEARCH_API}`);
|
|
200
|
+
if (limit)
|
|
201
|
+
console.log(`Mode: --limit ${limit}`);
|
|
202
|
+
if (offset)
|
|
203
|
+
console.log(`Mode: --offset ${offset}`);
|
|
204
|
+
if (append)
|
|
205
|
+
console.log('Mode: --append');
|
|
206
|
+
if (refreshCatalog)
|
|
207
|
+
console.log('Mode: --refresh-catalog');
|
|
208
|
+
if (skipFetch)
|
|
209
|
+
console.log('Mode: --skip-fetch');
|
|
210
|
+
console.log('');
|
|
211
|
+
ensureDirectories();
|
|
212
|
+
if (!append) {
|
|
213
|
+
resetSeedDirectory();
|
|
214
|
+
}
|
|
215
|
+
let catalog = null;
|
|
216
|
+
if (!refreshCatalog) {
|
|
217
|
+
catalog = loadCachedCatalog();
|
|
218
|
+
}
|
|
219
|
+
if (catalog) {
|
|
220
|
+
console.log(`Using cached catalog: ${CATALOG_CACHE_PATH}`);
|
|
221
|
+
}
|
|
222
|
+
else {
|
|
223
|
+
console.log('Fetching law catalog...');
|
|
224
|
+
catalog = await fetchCatalogLaws();
|
|
225
|
+
writeCatalogCache(catalog);
|
|
226
|
+
}
|
|
227
|
+
const sliced = offset > 0 ? catalog.slice(offset) : catalog;
|
|
228
|
+
const targetRows = limit ? sliced.slice(0, limit) : sliced;
|
|
229
|
+
console.log(`Catalog size: ${catalog.length} | Processing: ${targetRows.length}\n`);
|
|
230
|
+
let seedIndex = append ? currentSeedIndex() : 1;
|
|
231
|
+
let totalProvisions = 0;
|
|
232
|
+
let totalDefinitions = 0;
|
|
233
|
+
let aknCount = 0;
|
|
234
|
+
let pdfCount = 0;
|
|
235
|
+
const results = [];
|
|
236
|
+
for (const row of targetRows) {
|
|
237
|
+
const lawUrl = absoluteUrl(row.href);
|
|
238
|
+
const provisionalId = buildDocumentIdFromHref(row.href);
|
|
239
|
+
process.stdout.write(` ${provisionalId} ...`);
|
|
240
|
+
try {
|
|
241
|
+
const html = await fetchLawHtml(lawUrl, provisionalId, skipFetch);
|
|
242
|
+
const metadata = extractLawPageMetadata(html, lawUrl);
|
|
243
|
+
if (metadata.id !== provisionalId) {
|
|
244
|
+
const oldPath = sourceHtmlPath(provisionalId);
|
|
245
|
+
const newPath = sourceHtmlPath(metadata.id);
|
|
246
|
+
if (fs.existsSync(oldPath) && !fs.existsSync(newPath)) {
|
|
247
|
+
fs.renameSync(oldPath, newPath);
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
if (metadata.source_type === 'pdf') {
|
|
251
|
+
if (!metadata.pdf_url) {
|
|
252
|
+
throw new Error('Missing pdf_url in metadata');
|
|
253
|
+
}
|
|
254
|
+
await fetchLawPdf(metadata.pdf_url, metadata.id, skipFetch);
|
|
255
|
+
pdfCount++;
|
|
256
|
+
}
|
|
257
|
+
else {
|
|
258
|
+
aknCount++;
|
|
259
|
+
}
|
|
260
|
+
const { act, warnings } = parseAct(metadata, html, skipFetch);
|
|
261
|
+
const seedFile = path.basename(seedPath(seedIndex, metadata.id));
|
|
262
|
+
fs.writeFileSync(seedPath(seedIndex, metadata.id), `${JSON.stringify(act, null, 2)}\n`);
|
|
263
|
+
seedIndex++;
|
|
264
|
+
totalProvisions += act.provisions.length;
|
|
265
|
+
totalDefinitions += act.definitions.length;
|
|
266
|
+
results.push({
|
|
267
|
+
id: metadata.id,
|
|
268
|
+
url: metadata.url,
|
|
269
|
+
source_type: metadata.source_type,
|
|
270
|
+
status: 'ok',
|
|
271
|
+
provisions: act.provisions.length,
|
|
272
|
+
definitions: act.definitions.length,
|
|
273
|
+
seed_file: seedFile,
|
|
274
|
+
warnings: warnings.length > 0 ? warnings : undefined,
|
|
275
|
+
});
|
|
276
|
+
console.log(` OK (${metadata.source_type}, ${act.provisions.length} provisions, ${act.definitions.length} definitions)`);
|
|
277
|
+
}
|
|
278
|
+
catch (error) {
|
|
279
|
+
const reason = error instanceof Error ? error.message : String(error);
|
|
280
|
+
const isImageOnlyPdf = reason.toLowerCase().includes('no text extracted from pdf');
|
|
281
|
+
const status = isImageOnlyPdf ? 'skipped' : 'failed';
|
|
282
|
+
results.push({
|
|
283
|
+
id: provisionalId,
|
|
284
|
+
url: lawUrl,
|
|
285
|
+
source_type: 'pdf',
|
|
286
|
+
status,
|
|
287
|
+
provisions: 0,
|
|
288
|
+
definitions: 0,
|
|
289
|
+
reason: isImageOnlyPdf
|
|
290
|
+
? `${reason}; source appears image-only (no text layer available for parsing)`
|
|
291
|
+
: reason,
|
|
292
|
+
});
|
|
293
|
+
console.log(` ${status.toUpperCase()} (${reason})`);
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
const success = results.filter(r => r.status === 'ok').length;
|
|
297
|
+
const skipped = results.filter(r => r.status === 'skipped').length;
|
|
298
|
+
const failed = results.filter(r => r.status === 'failed').length;
|
|
299
|
+
const report = {
|
|
300
|
+
generated_at: new Date().toISOString(),
|
|
301
|
+
catalog_total: catalog.length,
|
|
302
|
+
processed_total: targetRows.length,
|
|
303
|
+
success,
|
|
304
|
+
skipped,
|
|
305
|
+
failed,
|
|
306
|
+
akn_success: results.filter(r => r.status === 'ok' && r.source_type === 'akn').length,
|
|
307
|
+
pdf_success: results.filter(r => r.status === 'ok' && r.source_type === 'pdf').length,
|
|
308
|
+
total_provisions: totalProvisions,
|
|
309
|
+
total_definitions: totalDefinitions,
|
|
310
|
+
results,
|
|
311
|
+
};
|
|
312
|
+
fs.writeFileSync(path.join(SEED_DIR, '_ingestion-report.json'), `${JSON.stringify(report, null, 2)}\n`);
|
|
313
|
+
console.log('\nIngestion report');
|
|
314
|
+
console.log('----------------');
|
|
315
|
+
console.log(`Catalog entries: ${catalog.length}`);
|
|
316
|
+
console.log(`Processed: ${targetRows.length}`);
|
|
317
|
+
console.log(`Success: ${success}`);
|
|
318
|
+
console.log(`Skipped: ${skipped}`);
|
|
319
|
+
console.log(`Failed: ${failed}`);
|
|
320
|
+
console.log(`AKN parsed: ${aknCount}`);
|
|
321
|
+
console.log(`PDF parsed: ${pdfCount}`);
|
|
322
|
+
console.log(`Provisions: ${totalProvisions}`);
|
|
323
|
+
console.log(`Definitions: ${totalDefinitions}`);
|
|
324
|
+
console.log(`Report file: ${path.join(SEED_DIR, '_ingestion-report.json')}`);
|
|
325
|
+
if (failed > 0) {
|
|
326
|
+
process.exitCode = 1;
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
run().catch(error => {
|
|
330
|
+
console.error('Fatal ingestion error:', error);
|
|
331
|
+
process.exit(1);
|
|
332
|
+
});
|
|
333
|
+
//# sourceMappingURL=ingest.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ingest.js","sourceRoot":"","sources":["../../scripts/ingest.ts"],"names":[],"mappings":";AACA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AACzB,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAC7B,OAAO,EAAE,aAAa,EAAE,MAAM,KAAK,CAAC;AACpC,OAAO,EAAE,wBAAwB,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AAChF,OAAO,EACL,uBAAuB,EACvB,sBAAsB,EACtB,eAAe,EACf,uBAAuB,EACvB,qBAAqB,GAItB,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAE,kBAAkB,EAAE,MAAM,cAAc,CAAC;AAElD,MAAM,UAAU,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAClD,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;AAE3C,MAAM,QAAQ,GAAG,uBAAuB,CAAC;AACzC,MAAM,UAAU,GAAG,6CAA6C,CAAC;AACjE,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,gBAAgB,CAAC,CAAC;AAC7D,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,cAAc,CAAC,CAAC;AACzD,MAAM,kBAAkB,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,oBAAoB,CAAC,CAAC;AA2B1E,SAAS,SAAS;IAChB,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACnC,IAAI,KAAK,GAAkB,IAAI,CAAC;IAChC,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,IAAI,MAAM,GAAG,KAAK,CAAC;IACnB,IAAI,cAAc,GAAG,KAAK,CAAC;IAC3B,IAAI,SAAS,GAAG,KAAK,CAAC;IAEtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,SAAS,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YACzC,KAAK,GAAG,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACzC,CAAC,EAAE,CAAC;YACJ,SAAS;QACX,CAAC;QACD,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,UAAU,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YAC1C,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAC1C,CAAC,EAAE,CAAC;YACJ,SAAS;QACX,CAAC;QACD,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,UAAU,EAAE,CAAC;YAC3B,MAAM,GAAG,IAAI,CAAC;YACd,SAAS;QACX,CAAC;QACD,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,mBAAmB,EAAE,CAAC;YACpC,cAAc,GAAG,IAAI,CAAC;YACtB,SAAS;QACX,CAAC;QACD,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,cAAc,EAAE,CAAC;YAC/B,SAAS,GAAG,IAAI,CAAC;QACnB,CAAC;IACH,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,EAAE,MAAM,EAAE,cAAc,EAAE,SAAS,EAAE,CAAC;AACnF,CAAC;AAED,SAAS,iBAAiB;IACxB,EAAE,CAAC,SAAS,CAAC,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC9C,EAAE,CAAC,SAAS,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;AAC9C,CAAC;AAED,SAAS,kBAAkB;IACzB,MAAM,KAAK,GAAG,EAAE,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;IAC9E,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC,CAAC;IAC3C,CAAC;AACH,CAAC;AAED,SAAS,QAAQ,CAAI,OAAe;IAClC,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,CAAM,CAAC;AAClC,CAAC;AAED,SAAS,WAAW,CAAC,IAAY;IAC/B,OAAO,IAAI,GAAG,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,QAAQ,EAAE,CAAC;AAC5C,CAAC;AAED,SAAS,cAAc,CAAC,EAAU;IAChC,OAAO,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,GAAG,EAAE,OAAO,CAAC,CAAC;AAC7C,CAAC;AAED,SAAS,aAAa,CAAC,EAAU;IAC/B,OAAO,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;AAC5C,CAAC;AAED,SAAS,cAAc,CAAC,EAAU;IAChC,OAAO,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;AAC5C,CAAC;AAED,SAAS,QAAQ,CAAC,KAAa,EAAE,KAAa;IAC5C,MAAM,GAAG,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IAC3C,OAAO,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,GAAG,GAAG,IAAI,KAAK,OAAO,CAAC,CAAC;AACrD,CAAC;AAED,SAAS,gBAAgB;IACvB,MAAM,QAAQ,GAAG,EAAE,CAAC,WAAW,CAAC,QAAQ,CAAC;SACtC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;SAC7C,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;SAClD,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;IACnC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACpC,OAAO,IAAI,CAAC,GAAG,CAAC,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC;AACnC,CAAC;AAED,KAAK,UAAU,gBAAgB;IAC7B,MAAM,SAAS,GAAG,IAAI,GAAG,EAAsB,CAAC;IAChD,MAAM,WAAW,GAAG,IAAI,IAAI,EAAE,CAAC,cAAc,EAAE,CAAC;IAChD,MAAM,SAAS,GAAG,IAAI,CAAC;IACvB,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAC5B,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,WAAW,GAAG,SAAS,GAAG,CAAC,CAAC,EAAE,EACpD,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC,MAAM,CAAC,SAAS,GAAG,KAAK,CAAC,CACxC,CAAC;IACF,MAAM,OAAO,GAAG,CAAC,KAAK,EAAE,GAAG,WAAW,CAAC,CAAC;IAExC,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,IAAI,IAAI,GAAG,CAAC,CAAC;QACb,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,QAAQ,GAAG,EAAE,CAAC;QAElB,OAAO,IAAI,EAAE,CAAC;YACZ,MAAM,GAAG,GACP,GAAG,UAAU,WAAW,kBAAkB,CAAC,KAAK,CAAC,EAAE;gBACnD,SAAS,IAAI,2DAA2D,CAAC;YAC3E,MAAM,QAAQ,GAAG,MAAM,kBAAkB,CAAC,GAAG,CAAC,CAAC;YAC/C,MAAM,cAAc,GAAG,QAAQ,CAAC,MAAM,CAAC;YACvC,MAAM,YAAY,GAAG,QAAQ,CAAC,IAAI,CAAC;YAEnC,IAAI,cAAc,KAAK,GAAG,IAAI,IAAI,GAAG,CAAC,EAAE,CAAC;gBACvC,oFAAoF;gBACpF,MAAM;YACR,CAAC;YACD,IAAI,cAAc,KAAK,GAAG,EAAE,CAAC;gBAC3B,MAAM,IAAI,KAAK,CAAC,mCAAmC,KAAK,UAAU,IAAI,UAAU,cAAc,EAAE,CAAC,CAAC;YACpG,CAAC;YAED,MAAM,IAAI,GAAG,QAAQ,CAAoB,YAAY,CAAC,CAAC;YACvD,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC;YACnB,MAAM,IAAI,GAAG,uBAAuB,CAAC,IAAI,CAAC,YAAY,IAAI,EAAE,CAAC,CAAC;YAC9D,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;gBAAE,MAAM;YAE7B,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC;YACvB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;gBACvB,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;oBAC7B,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;gBAC/B,CAAC;YACH,CAAC;YAED,MAAM,UAAU,GAAG,IAAI,GAAG,QAAQ,IAAI,KAAK,CAAC;YAC5C,IAAI,UAAU;gBAAE,MAAM;YACtB,IAAI,EAAE,CAAC;QACT,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC,CAAC;AACxC,CAAC;AAED,SAAS,iBAAiB;IACxB,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,kBAAkB,CAAC;QAAE,OAAO,IAAI,CAAC;IACpD,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,EAAE,CAAC,YAAY,CAAC,kBAAkB,EAAE,OAAO,CAAC,CAAC;QACzD,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAiB,CAAC;QAC7C,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,IAAI,CAAC;QAC3D,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,SAAS,iBAAiB,CAAC,OAAqB;IAC9C,EAAE,CAAC,aAAa,CAAC,kBAAkB,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC;AAChF,CAAC;AAED,KAAK,UAAU,YAAY,CACzB,MAAc,EACd,aAAqB,EACrB,SAAkB;IAElB,MAAM,QAAQ,GAAG,cAAc,CAAC,aAAa,CAAC,CAAC;IAC/C,IAAI,SAAS,IAAI,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;QACzC,OAAO,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAC5C,CAAC;IAED,MAAM,QAAQ,GAAG,MAAM,kBAAkB,CAAC,MAAM,CAAC,CAAC;IAClD,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;QAC5B,MAAM,IAAI,KAAK,CAAC,QAAQ,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;IAC7C,CAAC;IACD,EAAE,CAAC,aAAa,CAAC,QAAQ,EAAE,QAAQ,CAAC,IAAI,CAAC,CAAC;IAC1C,OAAO,QAAQ,CAAC,IAAI,CAAC;AACvB,CAAC;AAED,KAAK,UAAU,WAAW,CACxB,MAAc,EACd,EAAU,EACV,SAAkB;IAElB,MAAM,OAAO,GAAG,aAAa,CAAC,EAAE,CAAC,CAAC;IAClC,IAAI,SAAS,IAAI,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;QACxC,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,MAAM,QAAQ,GAAG,MAAM,wBAAwB,CAAC,MAAM,CAAC,CAAC;IACxD,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;QAC5B,MAAM,IAAI,KAAK,CAAC,YAAY,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;IACjD,CAAC;IACD,EAAE,CAAC,aAAa,CAAC,OAAO,EAAE,QAAQ,CAAC,IAAI,CAAC,CAAC;IACzC,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,QAAQ,CACf,QAAyB,EACzB,IAAY,EACZ,SAAkB;IAElB,IAAI,QAAQ,CAAC,WAAW,KAAK,KAAK,EAAE,CAAC;QACnC,OAAO,EAAE,GAAG,EAAE,eAAe,CAAC,IAAI,EAAE,QAAQ,CAAC,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IAChE,CAAC;IAED,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,CAAC;QACtB,MAAM,IAAI,KAAK,CAAC,gDAAgD,CAAC,CAAC;IACpE,CAAC;IAED,MAAM,OAAO,GAAG,aAAa,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;IAC3C,IAAI,CAAC,SAAS,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;QAC1C,MAAM,IAAI,KAAK,CAAC,uBAAuB,OAAO,EAAE,CAAC,CAAC;IACpD,CAAC;IACD,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;QAC5B,MAAM,IAAI,KAAK,CAAC,0BAA0B,OAAO,EAAE,CAAC,CAAC;IACvD,CAAC;IAED,MAAM,UAAU,GAAG,kBAAkB,CAAC,OAAO,CAAC,CAAC;IAC/C,EAAE,CAAC,aAAa,CAAC,cAAc,CAAC,QAAQ,CAAC,EAAE,CAAC,EAAE,UAAU,CAAC,IAAI,CAAC,CAAC;IAE/D,MAAM,GAAG,GAAG,qBAAqB,CAAC,UAAU,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;IAC7D,MAAM,QAAQ,GAAG,CAAC,GAAG,UAAU,CAAC,QAAQ,EAAE,mBAAmB,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC;IAClF,OAAO,EAAE,GAAG,EAAE,QAAQ,EAAE,CAAC;AAC3B,CAAC;AAED,KAAK,UAAU,GAAG;IAChB,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,cAAc,EAAE,SAAS,EAAE,GAAG,SAAS,EAAE,CAAC;IAEzE,OAAO,CAAC,GAAG,CAAC,uCAAuC,CAAC,CAAC;IACrD,OAAO,CAAC,GAAG,CAAC,yCAAyC,CAAC,CAAC;IACvD,OAAO,CAAC,GAAG,CAAC,mBAAmB,UAAU,EAAE,CAAC,CAAC;IAC7C,IAAI,KAAK;QAAE,OAAO,CAAC,GAAG,CAAC,iBAAiB,KAAK,EAAE,CAAC,CAAC;IACjD,IAAI,MAAM;QAAE,OAAO,CAAC,GAAG,CAAC,kBAAkB,MAAM,EAAE,CAAC,CAAC;IACpD,IAAI,MAAM;QAAE,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;IAC1C,IAAI,cAAc;QAAE,OAAO,CAAC,GAAG,CAAC,yBAAyB,CAAC,CAAC;IAC3D,IAAI,SAAS;QAAE,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC;IACjD,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAEhB,iBAAiB,EAAE,CAAC;IACpB,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,kBAAkB,EAAE,CAAC;IACvB,CAAC;IAED,IAAI,OAAO,GAAwB,IAAI,CAAC;IACxC,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,OAAO,GAAG,iBAAiB,EAAE,CAAC;IAChC,CAAC;IAED,IAAI,OAAO,EAAE,CAAC;QACZ,OAAO,CAAC,GAAG,CAAC,yBAAyB,kBAAkB,EAAE,CAAC,CAAC;IAC7D,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,GAAG,CAAC,yBAAyB,CAAC,CAAC;QACvC,OAAO,GAAG,MAAM,gBAAgB,EAAE,CAAC;QACnC,iBAAiB,CAAC,OAAO,CAAC,CAAC;IAC7B,CAAC;IACD,MAAM,MAAM,GAAG,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;IAC5D,MAAM,UAAU,GAAG,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IAC3D,OAAO,CAAC,GAAG,CAAC,iBAAiB,OAAO,CAAC,MAAM,kBAAkB,UAAU,CAAC,MAAM,IAAI,CAAC,CAAC;IAEpF,IAAI,SAAS,GAAG,MAAM,CAAC,CAAC,CAAC,gBAAgB,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IAChD,IAAI,eAAe,GAAG,CAAC,CAAC;IACxB,IAAI,gBAAgB,GAAG,CAAC,CAAC;IACzB,IAAI,QAAQ,GAAG,CAAC,CAAC;IACjB,IAAI,QAAQ,GAAG,CAAC,CAAC;IAEjB,MAAM,OAAO,GAAsB,EAAE,CAAC;IAEtC,KAAK,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;QAC7B,MAAM,MAAM,GAAG,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACrC,MAAM,aAAa,GAAG,uBAAuB,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACxD,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,aAAa,MAAM,CAAC,CAAC;QAE/C,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,MAAM,YAAY,CAAC,MAAM,EAAE,aAAa,EAAE,SAAS,CAAC,CAAC;YAClE,MAAM,QAAQ,GAAG,sBAAsB,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAEtD,IAAI,QAAQ,CAAC,EAAE,KAAK,aAAa,EAAE,CAAC;gBAClC,MAAM,OAAO,GAAG,cAAc,CAAC,aAAa,CAAC,CAAC;gBAC9C,MAAM,OAAO,GAAG,cAAc,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;gBAC5C,IAAI,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;oBACtD,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;gBAClC,CAAC;YACH,CAAC;YAED,IAAI,QAAQ,CAAC,WAAW,KAAK,KAAK,EAAE,CAAC;gBACnC,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,CAAC;oBACtB,MAAM,IAAI,KAAK,CAAC,6BAA6B,CAAC,CAAC;gBACjD,CAAC;gBACD,MAAM,WAAW,CAAC,QAAQ,CAAC,OAAO,EAAE,QAAQ,CAAC,EAAE,EAAE,SAAS,CAAC,CAAC;gBAC5D,QAAQ,EAAE,CAAC;YACb,CAAC;iBAAM,CAAC;gBACN,QAAQ,EAAE,CAAC;YACb,CAAC;YAED,MAAM,EAAE,GAAG,EAAE,QAAQ,EAAE,GAAG,QAAQ,CAAC,QAAQ,EAAE,IAAI,EAAE,SAAS,CAAC,CAAC;YAC9D,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,SAAS,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC;YACjE,EAAE,CAAC,aAAa,CAAC,QAAQ,CAAC,SAAS,EAAE,QAAQ,CAAC,EAAE,CAAC,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC;YACxF,SAAS,EAAE,CAAC;YAEZ,eAAe,IAAI,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC;YACzC,gBAAgB,IAAI,GAAG,CAAC,WAAW,CAAC,MAAM,CAAC;YAE3C,OAAO,CAAC,IAAI,CAAC;gBACX,EAAE,EAAE,QAAQ,CAAC,EAAE;gBACf,GAAG,EAAE,QAAQ,CAAC,GAAG;gBACjB,WAAW,EAAE,QAAQ,CAAC,WAAW;gBACjC,MAAM,EAAE,IAAI;gBACZ,UAAU,EAAE,GAAG,CAAC,UAAU,CAAC,MAAM;gBACjC,WAAW,EAAE,GAAG,CAAC,WAAW,CAAC,MAAM;gBACnC,SAAS,EAAE,QAAQ;gBACnB,QAAQ,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;aACrD,CAAC,CAAC;YAEH,OAAO,CAAC,GAAG,CACT,QAAQ,QAAQ,CAAC,WAAW,KAAK,GAAG,CAAC,UAAU,CAAC,MAAM,gBAAgB,GAAG,CAAC,WAAW,CAAC,MAAM,eAAe,CAC5G,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,MAAM,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACtE,MAAM,cAAc,GAAG,MAAM,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,4BAA4B,CAAC,CAAC;YACnF,MAAM,MAAM,GAA8B,cAAc,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,QAAQ,CAAC;YAEhF,OAAO,CAAC,IAAI,CAAC;gBACX,EAAE,EAAE,aAAa;gBACjB,GAAG,EAAE,MAAM;gBACX,WAAW,EAAE,KAAK;gBAClB,MAAM;gBACN,UAAU,EAAE,CAAC;gBACb,WAAW,EAAE,CAAC;gBACd,MAAM,EAAE,cAAc;oBACpB,CAAC,CAAC,GAAG,MAAM,mEAAmE;oBAC9E,CAAC,CAAC,MAAM;aACX,CAAC,CAAC;YACH,OAAO,CAAC,GAAG,CAAC,IAAI,MAAM,CAAC,WAAW,EAAE,KAAK,MAAM,GAAG,CAAC,CAAC;QACtD,CAAC;IACH,CAAC;IAED,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,IAAI,CAAC,CAAC,MAAM,CAAC;IAC9D,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC,MAAM,CAAC;IACnE,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,QAAQ,CAAC,CAAC,MAAM,CAAC;IAEjE,MAAM,MAAM,GAAG;QACb,YAAY,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACtC,aAAa,EAAE,OAAO,CAAC,MAAM;QAC7B,eAAe,EAAE,UAAU,CAAC,MAAM;QAClC,OAAO;QACP,OAAO;QACP,MAAM;QACN,WAAW,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,IAAI,IAAI,CAAC,CAAC,WAAW,KAAK,KAAK,CAAC,CAAC,MAAM;QACrF,WAAW,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,IAAI,IAAI,CAAC,CAAC,WAAW,KAAK,KAAK,CAAC,CAAC,MAAM;QACrF,gBAAgB,EAAE,eAAe;QACjC,iBAAiB,EAAE,gBAAgB;QACnC,OAAO;KACR,CAAC;IACF,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,wBAAwB,CAAC,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC;IAExG,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC;IAClC,OAAO,CAAC,GAAG,CAAC,kBAAkB,CAAC,CAAC;IAChC,OAAO,CAAC,GAAG,CAAC,oBAAoB,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IAClD,OAAO,CAAC,GAAG,CAAC,oBAAoB,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC;IACrD,OAAO,CAAC,GAAG,CAAC,oBAAoB,OAAO,EAAE,CAAC,CAAC;IAC3C,OAAO,CAAC,GAAG,CAAC,oBAAoB,OAAO,EAAE,CAAC,CAAC;IAC3C,OAAO,CAAC,GAAG,CAAC,oBAAoB,MAAM,EAAE,CAAC,CAAC;IAC1C,OAAO,CAAC,GAAG,CAAC,oBAAoB,QAAQ,EAAE,CAAC,CAAC;IAC5C,OAAO,CAAC,GAAG,CAAC,oBAAoB,QAAQ,EAAE,CAAC,CAAC;IAC5C,OAAO,CAAC,GAAG,CAAC,oBAAoB,eAAe,EAAE,CAAC,CAAC;IACnD,OAAO,CAAC,GAAG,CAAC,oBAAoB,gBAAgB,EAAE,CAAC,CAAC;IACpD,OAAO,CAAC,GAAG,CAAC,oBAAoB,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,wBAAwB,CAAC,EAAE,CAAC,CAAC;IAEjF,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;QACf,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;IACvB,CAAC;AACH,CAAC;AAED,GAAG,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE;IAClB,OAAO,CAAC,KAAK,CAAC,wBAAwB,EAAE,KAAK,CAAC,CAAC;IAC/C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Rate-limited HTTP client for Rwanda legislation pages hosted on RwandaLII.
|
|
3
|
+
*
|
|
4
|
+
* Source:
|
|
5
|
+
* https://rwandalii.org/akn/rw/act/law/{year}/{number}/eng@{date}
|
|
6
|
+
*
|
|
7
|
+
* Notes:
|
|
8
|
+
* - Uses a 1.2s minimum delay between requests to respect remote servers.
|
|
9
|
+
* - Retries transient failures (429/5xx/network) with exponential backoff.
|
|
10
|
+
* - Uses an explicit User-Agent for ingestion transparency.
|
|
11
|
+
*/
|
|
12
|
+
export interface FetchResult {
|
|
13
|
+
status: number;
|
|
14
|
+
body: string;
|
|
15
|
+
contentType: string;
|
|
16
|
+
url: string;
|
|
17
|
+
}
|
|
18
|
+
export interface FetchBinaryResult {
|
|
19
|
+
status: number;
|
|
20
|
+
body: Buffer;
|
|
21
|
+
contentType: string;
|
|
22
|
+
url: string;
|
|
23
|
+
}
|
|
24
|
+
export declare function fetchWithRateLimit(url: string, maxRetries?: number): Promise<FetchResult>;
|
|
25
|
+
export declare function fetchBinaryWithRateLimit(url: string, maxRetries?: number): Promise<FetchBinaryResult>;
|
|
26
|
+
//# sourceMappingURL=fetcher.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetcher.d.ts","sourceRoot":"","sources":["../../../scripts/lib/fetcher.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAsBH,MAAM,WAAW,WAAW;IAC1B,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,GAAG,EAAE,MAAM,CAAC;CACb;AAED,MAAM,WAAW,iBAAiB;IAChC,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,GAAG,EAAE,MAAM,CAAC;CACb;AAED,wBAAsB,kBAAkB,CAAC,GAAG,EAAE,MAAM,EAAE,UAAU,SAAI,GAAG,OAAO,CAAC,WAAW,CAAC,CAuC1F;AAED,wBAAsB,wBAAwB,CAAC,GAAG,EAAE,MAAM,EAAE,UAAU,SAAI,GAAG,OAAO,CAAC,iBAAiB,CAAC,CAuCtG"}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Rate-limited HTTP client for Rwanda legislation pages hosted on RwandaLII.
|
|
3
|
+
*
|
|
4
|
+
* Source:
|
|
5
|
+
* https://rwandalii.org/akn/rw/act/law/{year}/{number}/eng@{date}
|
|
6
|
+
*
|
|
7
|
+
* Notes:
|
|
8
|
+
* - Uses a 1.2s minimum delay between requests to respect remote servers.
|
|
9
|
+
* - Retries transient failures (429/5xx/network) with exponential backoff.
|
|
10
|
+
* - Uses an explicit User-Agent for ingestion transparency.
|
|
11
|
+
*/
|
|
12
|
+
const USER_AGENT = 'Rwandan-Law-MCP/1.0 (+https://github.com/Ansvar-Systems/Rwandan-law-mcp)';
|
|
13
|
+
const MIN_DELAY_MS = 1200;
|
|
14
|
+
const RETRYABLE_STATUSES = new Set([429, 500, 502, 503, 504]);
|
|
15
|
+
const REQUEST_TIMEOUT_MS = 45000;
|
|
16
|
+
let lastRequestAt = 0;
|
|
17
|
+
function sleep(ms) {
|
|
18
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
19
|
+
}
|
|
20
|
+
async function enforceRateLimit() {
|
|
21
|
+
const now = Date.now();
|
|
22
|
+
const elapsed = now - lastRequestAt;
|
|
23
|
+
if (elapsed < MIN_DELAY_MS) {
|
|
24
|
+
await sleep(MIN_DELAY_MS - elapsed);
|
|
25
|
+
}
|
|
26
|
+
lastRequestAt = Date.now();
|
|
27
|
+
}
|
|
28
|
+
export async function fetchWithRateLimit(url, maxRetries = 3) {
|
|
29
|
+
await enforceRateLimit();
|
|
30
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
31
|
+
try {
|
|
32
|
+
const response = await fetch(url, {
|
|
33
|
+
headers: {
|
|
34
|
+
'User-Agent': USER_AGENT,
|
|
35
|
+
'Accept': 'text/html,application/xhtml+xml,*/*',
|
|
36
|
+
},
|
|
37
|
+
signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS),
|
|
38
|
+
redirect: 'follow',
|
|
39
|
+
});
|
|
40
|
+
const body = await response.text();
|
|
41
|
+
if (RETRYABLE_STATUSES.has(response.status) && attempt < maxRetries) {
|
|
42
|
+
const backoffMs = Math.pow(2, attempt + 1) * 1000;
|
|
43
|
+
console.log(` HTTP ${response.status} from ${url}; retrying in ${backoffMs}ms...`);
|
|
44
|
+
await sleep(backoffMs);
|
|
45
|
+
continue;
|
|
46
|
+
}
|
|
47
|
+
return {
|
|
48
|
+
status: response.status,
|
|
49
|
+
body,
|
|
50
|
+
contentType: response.headers.get('content-type') ?? '',
|
|
51
|
+
url: response.url,
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
catch (error) {
|
|
55
|
+
if (attempt >= maxRetries) {
|
|
56
|
+
throw error;
|
|
57
|
+
}
|
|
58
|
+
const backoffMs = Math.pow(2, attempt + 1) * 1000;
|
|
59
|
+
console.log(` Network error for ${url}; retrying in ${backoffMs}ms...`);
|
|
60
|
+
await sleep(backoffMs);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
throw new Error(`Failed to fetch ${url} after ${maxRetries + 1} attempts`);
|
|
64
|
+
}
|
|
65
|
+
export async function fetchBinaryWithRateLimit(url, maxRetries = 3) {
|
|
66
|
+
await enforceRateLimit();
|
|
67
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
68
|
+
try {
|
|
69
|
+
const response = await fetch(url, {
|
|
70
|
+
headers: {
|
|
71
|
+
'User-Agent': USER_AGENT,
|
|
72
|
+
'Accept': 'application/pdf,application/octet-stream,*/*',
|
|
73
|
+
},
|
|
74
|
+
signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS),
|
|
75
|
+
redirect: 'follow',
|
|
76
|
+
});
|
|
77
|
+
const body = Buffer.from(await response.arrayBuffer());
|
|
78
|
+
if (RETRYABLE_STATUSES.has(response.status) && attempt < maxRetries) {
|
|
79
|
+
const backoffMs = Math.pow(2, attempt + 1) * 1000;
|
|
80
|
+
console.log(` HTTP ${response.status} from ${url}; retrying in ${backoffMs}ms...`);
|
|
81
|
+
await sleep(backoffMs);
|
|
82
|
+
continue;
|
|
83
|
+
}
|
|
84
|
+
return {
|
|
85
|
+
status: response.status,
|
|
86
|
+
body,
|
|
87
|
+
contentType: response.headers.get('content-type') ?? '',
|
|
88
|
+
url: response.url,
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
catch (error) {
|
|
92
|
+
if (attempt >= maxRetries) {
|
|
93
|
+
throw error;
|
|
94
|
+
}
|
|
95
|
+
const backoffMs = Math.pow(2, attempt + 1) * 1000;
|
|
96
|
+
console.log(` Network error for ${url}; retrying in ${backoffMs}ms...`);
|
|
97
|
+
await sleep(backoffMs);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
throw new Error(`Failed to fetch ${url} after ${maxRetries + 1} attempts`);
|
|
101
|
+
}
|
|
102
|
+
//# sourceMappingURL=fetcher.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetcher.js","sourceRoot":"","sources":["../../../scripts/lib/fetcher.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,MAAM,UAAU,GAAG,0EAA0E,CAAC;AAC9F,MAAM,YAAY,GAAG,IAAI,CAAC;AAC1B,MAAM,kBAAkB,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;AAC9D,MAAM,kBAAkB,GAAG,KAAK,CAAC;AAEjC,IAAI,aAAa,GAAG,CAAC,CAAC;AAEtB,SAAS,KAAK,CAAC,EAAU;IACvB,OAAO,IAAI,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,CAAC;AACzD,CAAC;AAED,KAAK,UAAU,gBAAgB;IAC7B,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACvB,MAAM,OAAO,GAAG,GAAG,GAAG,aAAa,CAAC;IACpC,IAAI,OAAO,GAAG,YAAY,EAAE,CAAC;QAC3B,MAAM,KAAK,CAAC,YAAY,GAAG,OAAO,CAAC,CAAC;IACtC,CAAC;IACD,aAAa,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;AAC7B,CAAC;AAgBD,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,GAAW,EAAE,UAAU,GAAG,CAAC;IAClE,MAAM,gBAAgB,EAAE,CAAC;IAEzB,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,UAAU,EAAE,OAAO,EAAE,EAAE,CAAC;QACvD,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;gBAChC,OAAO,EAAE;oBACP,YAAY,EAAE,UAAU;oBACxB,QAAQ,EAAE,qCAAqC;iBAChD;gBACD,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,kBAAkB,CAAC;gBAC/C,QAAQ,EAAE,QAAQ;aACnB,CAAC,CAAC;YAEH,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnC,IAAI,kBAAkB,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,OAAO,GAAG,UAAU,EAAE,CAAC;gBACpE,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC;gBAClD,OAAO,CAAC,GAAG,CAAC,UAAU,QAAQ,CAAC,MAAM,SAAS,GAAG,iBAAiB,SAAS,OAAO,CAAC,CAAC;gBACpF,MAAM,KAAK,CAAC,SAAS,CAAC,CAAC;gBACvB,SAAS;YACX,CAAC;YAED,OAAO;gBACL,MAAM,EAAE,QAAQ,CAAC,MAAM;gBACvB,IAAI;gBACJ,WAAW,EAAE,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,EAAE;gBACvD,GAAG,EAAE,QAAQ,CAAC,GAAG;aAClB,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,OAAO,IAAI,UAAU,EAAE,CAAC;gBAC1B,MAAM,KAAK,CAAC;YACd,CAAC;YACD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC;YAClD,OAAO,CAAC,GAAG,CAAC,uBAAuB,GAAG,iBAAiB,SAAS,OAAO,CAAC,CAAC;YACzE,MAAM,KAAK,CAAC,SAAS,CAAC,CAAC;QACzB,CAAC;IACH,CAAC;IAED,MAAM,IAAI,KAAK,CAAC,mBAAmB,GAAG,UAAU,UAAU,GAAG,CAAC,WAAW,CAAC,CAAC;AAC7E,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,wBAAwB,CAAC,GAAW,EAAE,UAAU,GAAG,CAAC;IACxE,MAAM,gBAAgB,EAAE,CAAC;IAEzB,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,UAAU,EAAE,OAAO,EAAE,EAAE,CAAC;QACvD,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;gBAChC,OAAO,EAAE;oBACP,YAAY,EAAE,UAAU;oBACxB,QAAQ,EAAE,8CAA8C;iBACzD;gBACD,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,kBAAkB,CAAC;gBAC/C,QAAQ,EAAE,QAAQ;aACnB,CAAC,CAAC;YAEH,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAC;YACvD,IAAI,kBAAkB,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,OAAO,GAAG,UAAU,EAAE,CAAC;gBACpE,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC;gBAClD,OAAO,CAAC,GAAG,CAAC,UAAU,QAAQ,CAAC,MAAM,SAAS,GAAG,iBAAiB,SAAS,OAAO,CAAC,CAAC;gBACpF,MAAM,KAAK,CAAC,SAAS,CAAC,CAAC;gBACvB,SAAS;YACX,CAAC;YAED,OAAO;gBACL,MAAM,EAAE,QAAQ,CAAC,MAAM;gBACvB,IAAI;gBACJ,WAAW,EAAE,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,EAAE;gBACvD,GAAG,EAAE,QAAQ,CAAC,GAAG;aAClB,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,OAAO,IAAI,UAAU,EAAE,CAAC;gBAC1B,MAAM,KAAK,CAAC;YACd,CAAC;YACD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC;YAClD,OAAO,CAAC,GAAG,CAAC,uBAAuB,GAAG,iBAAiB,SAAS,OAAO,CAAC,CAAC;YACzE,MAAM,KAAK,CAAC,SAAS,CAAC,CAAC;QACzB,CAAC;IACH,CAAC;IAED,MAAM,IAAI,KAAK,CAAC,mBAAmB,GAAG,UAAU,UAAU,GAAG,CAAC,WAAW,CAAC,CAAC;AAC7E,CAAC"}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Rwanda legislation parser utilities.
|
|
3
|
+
*
|
|
4
|
+
* Supports:
|
|
5
|
+
* - law catalog extraction from RwandaLII search HTML snippets
|
|
6
|
+
* - metadata extraction from law detail pages
|
|
7
|
+
* - provision parsing from AKN HTML pages
|
|
8
|
+
* - provision parsing from PDF-extracted text
|
|
9
|
+
*/
|
|
10
|
+
export type LawSourceType = 'akn' | 'pdf';
|
|
11
|
+
export interface CatalogLaw {
|
|
12
|
+
href: string;
|
|
13
|
+
title: string;
|
|
14
|
+
citation: string;
|
|
15
|
+
}
|
|
16
|
+
export interface LawPageMetadata {
|
|
17
|
+
id: string;
|
|
18
|
+
title: string;
|
|
19
|
+
title_en: string;
|
|
20
|
+
short_name: string;
|
|
21
|
+
status: 'in_force' | 'amended' | 'repealed' | 'not_yet_in_force';
|
|
22
|
+
issued_date: string;
|
|
23
|
+
in_force_date: string;
|
|
24
|
+
url: string;
|
|
25
|
+
source_type: LawSourceType;
|
|
26
|
+
work_frbr_uri?: string;
|
|
27
|
+
pdf_url?: string;
|
|
28
|
+
}
|
|
29
|
+
export interface ParsedProvision {
|
|
30
|
+
provision_ref: string;
|
|
31
|
+
chapter?: string;
|
|
32
|
+
section: string;
|
|
33
|
+
title: string;
|
|
34
|
+
content: string;
|
|
35
|
+
}
|
|
36
|
+
export interface ParsedDefinition {
|
|
37
|
+
term: string;
|
|
38
|
+
definition: string;
|
|
39
|
+
source_provision?: string;
|
|
40
|
+
}
|
|
41
|
+
export interface ParsedAct {
|
|
42
|
+
id: string;
|
|
43
|
+
type: 'statute';
|
|
44
|
+
title: string;
|
|
45
|
+
title_en: string;
|
|
46
|
+
short_name: string;
|
|
47
|
+
status: 'in_force' | 'amended' | 'repealed' | 'not_yet_in_force';
|
|
48
|
+
issued_date: string;
|
|
49
|
+
in_force_date: string;
|
|
50
|
+
url: string;
|
|
51
|
+
description?: string;
|
|
52
|
+
provisions: ParsedProvision[];
|
|
53
|
+
definitions: ParsedDefinition[];
|
|
54
|
+
}
|
|
55
|
+
export declare function parseCatalogResultsHtml(resultsHtml: string): CatalogLaw[];
|
|
56
|
+
export declare function buildDocumentIdFromHref(hrefOrWorkUri: string): string;
|
|
57
|
+
export declare function extractLawPageMetadata(html: string, pageUrl: string): LawPageMetadata;
|
|
58
|
+
export declare function parseAknLawHtml(html: string, metadata: LawPageMetadata): ParsedAct;
|
|
59
|
+
export declare function parsePdfExtractedText(pdfText: string, metadata: LawPageMetadata): ParsedAct;
|
|
60
|
+
//# sourceMappingURL=parser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parser.d.ts","sourceRoot":"","sources":["../../../scripts/lib/parser.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,MAAM,MAAM,aAAa,GAAG,KAAK,GAAG,KAAK,CAAC;AAE1C,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,eAAe;IAC9B,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,UAAU,GAAG,SAAS,GAAG,UAAU,GAAG,kBAAkB,CAAC;IACjE,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,EAAE,MAAM,CAAC;IACtB,GAAG,EAAE,MAAM,CAAC;IACZ,WAAW,EAAE,aAAa,CAAC;IAC3B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,eAAe;IAC9B,aAAa,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;IACnB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED,MAAM,WAAW,SAAS;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,SAAS,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,UAAU,GAAG,SAAS,GAAG,UAAU,GAAG,kBAAkB,CAAC;IACjE,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,EAAE,MAAM,CAAC;IACtB,GAAG,EAAE,MAAM,CAAC;IACZ,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,eAAe,EAAE,CAAC;IAC9B,WAAW,EAAE,gBAAgB,EAAE,CAAC;CACjC;AA6YD,wBAAgB,uBAAuB,CAAC,WAAW,EAAE,MAAM,GAAG,UAAU,EAAE,CAczE;AAED,wBAAgB,uBAAuB,CAAC,aAAa,EAAE,MAAM,GAAG,MAAM,CASrE;AAED,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,eAAe,CAqDrF;AAED,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,eAAe,GAAG,SAAS,CA8ClF;AA8BD,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,eAAe,GAAG,SAAS,CAmF3F"}
|