@lexbuild/fr 1.14.0 → 1.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/index.d.ts +104 -6
- package/dist/index.js +256 -102
- package/dist/index.js.map +1 -1
- package/package.json +13 -13
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Chris Thomas
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/dist/index.d.ts
CHANGED
|
@@ -35,6 +35,8 @@ interface FrDocumentXmlMeta {
|
|
|
35
35
|
rin?: string | undefined;
|
|
36
36
|
/** FR document number extracted from FRDOC text */
|
|
37
37
|
documentNumber?: string | undefined;
|
|
38
|
+
/** Publication date inferred from FRDOC filing date (YYYY-MM-DD) */
|
|
39
|
+
publicationDate?: string | undefined;
|
|
38
40
|
}
|
|
39
41
|
/**
|
|
40
42
|
* Federal Register AST Builder.
|
|
@@ -274,10 +276,22 @@ declare function buildYearDir(year: string, outputRoot: string): string;
|
|
|
274
276
|
* enriches frontmatter with JSON sidecar metadata, renders via core's
|
|
275
277
|
* renderDocument, and writes structured Markdown output.
|
|
276
278
|
*
|
|
277
|
-
* Processes FR documents in
|
|
278
|
-
*
|
|
279
|
+
* Processes FR documents in a single streaming pass: parse each XML file,
|
|
280
|
+
* render Markdown, and write output immediately. No link pre-registration
|
|
281
|
+
* since FR documents rarely cross-reference each other.
|
|
279
282
|
*/
|
|
280
283
|
|
|
284
|
+
/** Progress info for conversion callback */
|
|
285
|
+
interface FrConvertProgress {
|
|
286
|
+
/** Documents converted so far */
|
|
287
|
+
documentsConverted: number;
|
|
288
|
+
/** XML files processed so far */
|
|
289
|
+
filesProcessed: number;
|
|
290
|
+
/** Total XML files to process */
|
|
291
|
+
totalFiles: number;
|
|
292
|
+
/** Current XML file being processed */
|
|
293
|
+
currentFile: string;
|
|
294
|
+
}
|
|
281
295
|
/** Options for converting FR documents */
|
|
282
296
|
interface FrConvertOptions {
|
|
283
297
|
/** Path to input file or directory containing .xml/.json files */
|
|
@@ -294,6 +308,8 @@ interface FrConvertOptions {
|
|
|
294
308
|
to?: string | undefined;
|
|
295
309
|
/** Filter: document types */
|
|
296
310
|
types?: FrDocumentType[] | undefined;
|
|
311
|
+
/** Progress callback */
|
|
312
|
+
onProgress?: ((progress: FrConvertProgress) => void) | undefined;
|
|
297
313
|
}
|
|
298
314
|
/** Result of a conversion operation */
|
|
299
315
|
interface FrConvertResult {
|
|
@@ -339,8 +355,8 @@ interface FrDownloadOptions {
|
|
|
339
355
|
types?: FrDocumentType[] | undefined;
|
|
340
356
|
/** Maximum number of documents to download (for testing) */
|
|
341
357
|
limit?: number | undefined;
|
|
342
|
-
/**
|
|
343
|
-
|
|
358
|
+
/** Number of concurrent XML downloads (default 10) */
|
|
359
|
+
concurrency?: number | undefined;
|
|
344
360
|
/** Progress callback */
|
|
345
361
|
onProgress?: ((progress: FrDownloadProgress) => void) | undefined;
|
|
346
362
|
}
|
|
@@ -401,7 +417,8 @@ declare function buildFrApiListUrl(from: string, to: string, page: number, types
|
|
|
401
417
|
* Download FR documents for a date range.
|
|
402
418
|
*
|
|
403
419
|
* Automatically chunks large date ranges into month-sized windows to stay
|
|
404
|
-
* under the API's 10,000 result cap per query.
|
|
420
|
+
* under the API's 10,000 result cap per query. Within each chunk, document
|
|
421
|
+
* XML files are downloaded concurrently (default 10 at a time).
|
|
405
422
|
*/
|
|
406
423
|
declare function downloadFrDocuments(options: FrDownloadOptions): Promise<FrDownloadResult>;
|
|
407
424
|
/**
|
|
@@ -411,4 +428,85 @@ declare function downloadFrDocuments(options: FrDownloadOptions): Promise<FrDown
|
|
|
411
428
|
*/
|
|
412
429
|
declare function downloadSingleFrDocument(documentNumber: string, output: string): Promise<FrDownloadedFile>;
|
|
413
430
|
|
|
414
|
-
|
|
431
|
+
/**
|
|
432
|
+
* Federal Register govinfo bulk downloader.
|
|
433
|
+
*
|
|
434
|
+
* Downloads complete daily-issue XML files from govinfo.gov. Each file contains
|
|
435
|
+
* all FR documents published on a single day (~150 documents, ~2.4 MB average).
|
|
436
|
+
* This is dramatically faster than the per-document API for historical backfill.
|
|
437
|
+
*
|
|
438
|
+
* URL pattern: https://www.govinfo.gov/content/pkg/FR-{YYYY-MM-DD}/xml/FR-{YYYY-MM-DD}.xml
|
|
439
|
+
*
|
|
440
|
+
* The existing FrASTBuilder handles daily-issue XML natively: FEDREG root is a
|
|
441
|
+
* passthrough, section containers (RULES, NOTICES, etc.) are passthroughs, and
|
|
442
|
+
* individual document elements emit via onEmit. No splitter needed.
|
|
443
|
+
*/
|
|
444
|
+
/** Options for downloading FR bulk XML from govinfo */
|
|
445
|
+
interface FrGovinfoBulkOptions {
|
|
446
|
+
/** Download directory (e.g., "./downloads/fr") */
|
|
447
|
+
output: string;
|
|
448
|
+
/** Start date (YYYY-MM-DD, inclusive) */
|
|
449
|
+
from: string;
|
|
450
|
+
/** End date (YYYY-MM-DD, inclusive). Defaults to today. */
|
|
451
|
+
to?: string | undefined;
|
|
452
|
+
/** Number of concurrent downloads (default 10) */
|
|
453
|
+
concurrency?: number | undefined;
|
|
454
|
+
/** Progress callback */
|
|
455
|
+
onProgress?: ((progress: FrGovinfoProgress) => void) | undefined;
|
|
456
|
+
}
|
|
457
|
+
/** Progress info for govinfo download callback */
|
|
458
|
+
interface FrGovinfoProgress {
|
|
459
|
+
/** Files downloaded so far */
|
|
460
|
+
downloaded: number;
|
|
461
|
+
/** Total publishing days in date range */
|
|
462
|
+
totalDays: number;
|
|
463
|
+
/** Skipped days (weekends/holidays — 404) */
|
|
464
|
+
skipped: number;
|
|
465
|
+
/** Failed downloads */
|
|
466
|
+
failed: number;
|
|
467
|
+
/** Current date being downloaded */
|
|
468
|
+
currentDate: string;
|
|
469
|
+
}
|
|
470
|
+
/** A successfully downloaded bulk file */
|
|
471
|
+
interface FrGovinfoDownloadedFile {
|
|
472
|
+
/** Absolute path to the downloaded XML file */
|
|
473
|
+
path: string;
|
|
474
|
+
/** Publication date (YYYY-MM-DD) */
|
|
475
|
+
date: string;
|
|
476
|
+
/** File size in bytes */
|
|
477
|
+
size: number;
|
|
478
|
+
}
|
|
479
|
+
/** Result of a govinfo bulk download */
|
|
480
|
+
interface FrGovinfoResult {
|
|
481
|
+
/** Number of daily files downloaded */
|
|
482
|
+
filesDownloaded: number;
|
|
483
|
+
/** Downloaded files */
|
|
484
|
+
files: FrGovinfoDownloadedFile[];
|
|
485
|
+
/** Total bytes downloaded */
|
|
486
|
+
totalBytes: number;
|
|
487
|
+
/** Date range covered */
|
|
488
|
+
dateRange: {
|
|
489
|
+
from: string;
|
|
490
|
+
to: string;
|
|
491
|
+
};
|
|
492
|
+
/** Days skipped (no issue published — weekends/holidays) */
|
|
493
|
+
skipped: number;
|
|
494
|
+
/** Days that failed to download */
|
|
495
|
+
failed: number;
|
|
496
|
+
}
|
|
497
|
+
/**
|
|
498
|
+
* Build the govinfo download URL for a single day's FR issue.
|
|
499
|
+
*/
|
|
500
|
+
declare function buildGovinfoFrUrl(date: string): string;
|
|
501
|
+
/**
|
|
502
|
+
* Build the local file path for a downloaded daily-issue XML.
|
|
503
|
+
* Stored as: {output}/bulk/{YYYY}/FR-{YYYY-MM-DD}.xml
|
|
504
|
+
*/
|
|
505
|
+
declare function buildGovinfoBulkPath(date: string, outputDir: string): string;
|
|
506
|
+
/**
|
|
507
|
+
* Download FR daily-issue XML files from govinfo for a date range.
|
|
508
|
+
* Skips weekends/holidays (404 responses) and retries transient errors.
|
|
509
|
+
*/
|
|
510
|
+
declare function downloadFrBulk(options: FrGovinfoBulkOptions): Promise<FrGovinfoResult>;
|
|
511
|
+
|
|
512
|
+
export { FR_BLOCK_ELEMENTS, FR_CONTENT_ELEMENTS, FR_DOCUMENT_ELEMENTS, FR_DOCUMENT_TYPE_KEYS, FR_DOCUMENT_TYPE_MAP, FR_EMPHASIS_MAP, FR_HD_SOURCE_TO_DEPTH, FR_HEADING_ELEMENT, FR_IGNORE_ELEMENTS, FR_INLINE_ELEMENTS, FR_NOTE_ELEMENTS, FR_PASSTHROUGH_ELEMENTS, FR_PREAMBLE_META_ELEMENTS, FR_PREAMBLE_SECTIONS, FR_PRESIDENTIAL_SUBTYPES, FR_REGTEXT_ELEMENTS, FR_SECTION_CONTAINERS, FR_SIGNATURE_ELEMENTS, FR_SKIP_ELEMENTS, FR_TABLE_ELEMENTS, FrASTBuilder, type FrASTBuilderOptions, type FrConvertOptions, type FrConvertProgress, type FrConvertResult, type FrDocumentJsonMeta, type FrDocumentType, type FrDocumentXmlMeta, type FrDownloadFailure, type FrDownloadOptions, type FrDownloadProgress, type FrDownloadResult, type FrDownloadedFile, type FrGovinfoBulkOptions, type FrGovinfoDownloadedFile, type FrGovinfoProgress, type FrGovinfoResult, buildFrApiListUrl, buildFrDownloadJsonPath, buildFrDownloadXmlPath, buildFrFrontmatter, buildFrOutputPath, buildGovinfoBulkPath, buildGovinfoFrUrl, buildMonthDir, buildYearDir, convertFrDocuments, downloadFrBulk, downloadFrDocuments, downloadSingleFrDocument };
|