@lexbuild/fr 1.14.0 → 1.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Chris Thomas
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/dist/index.d.ts CHANGED
@@ -35,6 +35,8 @@ interface FrDocumentXmlMeta {
35
35
  rin?: string | undefined;
36
36
  /** FR document number extracted from FRDOC text */
37
37
  documentNumber?: string | undefined;
38
+ /** Publication date inferred from FRDOC filing date (YYYY-MM-DD) */
39
+ publicationDate?: string | undefined;
38
40
  }
39
41
  /**
40
42
  * Federal Register AST Builder.
@@ -274,10 +276,22 @@ declare function buildYearDir(year: string, outputRoot: string): string;
274
276
  * enriches frontmatter with JSON sidecar metadata, renders via core's
275
277
  * renderDocument, and writes structured Markdown output.
276
278
  *
277
- * Processes FR documents in two passes: (1) parse all files and register
278
- * identifiers for link resolution, (2) render and write output files.
279
+ * Processes FR documents in a single streaming pass: parse each XML file,
280
+ * render Markdown, and write output immediately. No link pre-registration
281
+ * since FR documents rarely cross-reference each other.
279
282
  */
280
283
 
284
+ /** Progress info for conversion callback */
285
+ interface FrConvertProgress {
286
+ /** Documents converted so far */
287
+ documentsConverted: number;
288
+ /** XML files processed so far */
289
+ filesProcessed: number;
290
+ /** Total XML files to process */
291
+ totalFiles: number;
292
+ /** Current XML file being processed */
293
+ currentFile: string;
294
+ }
281
295
  /** Options for converting FR documents */
282
296
  interface FrConvertOptions {
283
297
  /** Path to input file or directory containing .xml/.json files */
@@ -294,6 +308,8 @@ interface FrConvertOptions {
294
308
  to?: string | undefined;
295
309
  /** Filter: document types */
296
310
  types?: FrDocumentType[] | undefined;
311
+ /** Progress callback */
312
+ onProgress?: ((progress: FrConvertProgress) => void) | undefined;
297
313
  }
298
314
  /** Result of a conversion operation */
299
315
  interface FrConvertResult {
@@ -339,8 +355,8 @@ interface FrDownloadOptions {
339
355
  types?: FrDocumentType[] | undefined;
340
356
  /** Maximum number of documents to download (for testing) */
341
357
  limit?: number | undefined;
342
- /** Delay between XML fetches in milliseconds */
343
- fetchDelayMs?: number | undefined;
358
+ /** Number of concurrent XML downloads (default 10) */
359
+ concurrency?: number | undefined;
344
360
  /** Progress callback */
345
361
  onProgress?: ((progress: FrDownloadProgress) => void) | undefined;
346
362
  }
@@ -401,7 +417,8 @@ declare function buildFrApiListUrl(from: string, to: string, page: number, types
401
417
  * Download FR documents for a date range.
402
418
  *
403
419
  * Automatically chunks large date ranges into month-sized windows to stay
404
- * under the API's 10,000 result cap per query.
420
+ * under the API's 10,000 result cap per query. Within each chunk, document
421
+ * XML files are downloaded concurrently (default 10 at a time).
405
422
  */
406
423
  declare function downloadFrDocuments(options: FrDownloadOptions): Promise<FrDownloadResult>;
407
424
  /**
@@ -411,4 +428,85 @@ declare function downloadFrDocuments(options: FrDownloadOptions): Promise<FrDown
411
428
  */
412
429
  declare function downloadSingleFrDocument(documentNumber: string, output: string): Promise<FrDownloadedFile>;
413
430
 
414
- export { FR_BLOCK_ELEMENTS, FR_CONTENT_ELEMENTS, FR_DOCUMENT_ELEMENTS, FR_DOCUMENT_TYPE_KEYS, FR_DOCUMENT_TYPE_MAP, FR_EMPHASIS_MAP, FR_HD_SOURCE_TO_DEPTH, FR_HEADING_ELEMENT, FR_IGNORE_ELEMENTS, FR_INLINE_ELEMENTS, FR_NOTE_ELEMENTS, FR_PASSTHROUGH_ELEMENTS, FR_PREAMBLE_META_ELEMENTS, FR_PREAMBLE_SECTIONS, FR_PRESIDENTIAL_SUBTYPES, FR_REGTEXT_ELEMENTS, FR_SECTION_CONTAINERS, FR_SIGNATURE_ELEMENTS, FR_SKIP_ELEMENTS, FR_TABLE_ELEMENTS, FrASTBuilder, type FrASTBuilderOptions, type FrConvertOptions, type FrConvertResult, type FrDocumentJsonMeta, type FrDocumentType, type FrDocumentXmlMeta, type FrDownloadFailure, type FrDownloadOptions, type FrDownloadProgress, type FrDownloadResult, type FrDownloadedFile, buildFrApiListUrl, buildFrDownloadJsonPath, buildFrDownloadXmlPath, buildFrFrontmatter, buildFrOutputPath, buildMonthDir, buildYearDir, convertFrDocuments, downloadFrDocuments, downloadSingleFrDocument };
431
+ /**
432
+ * Federal Register govinfo bulk downloader.
433
+ *
434
+ * Downloads complete daily-issue XML files from govinfo.gov. Each file contains
435
+ * all FR documents published on a single day (~150 documents, ~2.4 MB average).
436
+ * This is dramatically faster than the per-document API for historical backfill.
437
+ *
438
+ * URL pattern: https://www.govinfo.gov/content/pkg/FR-{YYYY-MM-DD}/xml/FR-{YYYY-MM-DD}.xml
439
+ *
440
+ * The existing FrASTBuilder handles daily-issue XML natively: FEDREG root is a
441
+ * passthrough, section containers (RULES, NOTICES, etc.) are passthroughs, and
442
+ * individual document elements emit via onEmit. No splitter needed.
443
+ */
444
+ /** Options for downloading FR bulk XML from govinfo */
445
+ interface FrGovinfoBulkOptions {
446
+ /** Download directory (e.g., "./downloads/fr") */
447
+ output: string;
448
+ /** Start date (YYYY-MM-DD, inclusive) */
449
+ from: string;
450
+ /** End date (YYYY-MM-DD, inclusive). Defaults to today. */
451
+ to?: string | undefined;
452
+ /** Number of concurrent downloads (default 10) */
453
+ concurrency?: number | undefined;
454
+ /** Progress callback */
455
+ onProgress?: ((progress: FrGovinfoProgress) => void) | undefined;
456
+ }
457
+ /** Progress info for govinfo download callback */
458
+ interface FrGovinfoProgress {
459
+ /** Files downloaded so far */
460
+ downloaded: number;
461
+ /** Total publishing days in date range */
462
+ totalDays: number;
463
+ /** Skipped days (weekends/holidays — 404) */
464
+ skipped: number;
465
+ /** Failed downloads */
466
+ failed: number;
467
+ /** Current date being downloaded */
468
+ currentDate: string;
469
+ }
470
+ /** A successfully downloaded bulk file */
471
+ interface FrGovinfoDownloadedFile {
472
+ /** Absolute path to the downloaded XML file */
473
+ path: string;
474
+ /** Publication date (YYYY-MM-DD) */
475
+ date: string;
476
+ /** File size in bytes */
477
+ size: number;
478
+ }
479
+ /** Result of a govinfo bulk download */
480
+ interface FrGovinfoResult {
481
+ /** Number of daily files downloaded */
482
+ filesDownloaded: number;
483
+ /** Downloaded files */
484
+ files: FrGovinfoDownloadedFile[];
485
+ /** Total bytes downloaded */
486
+ totalBytes: number;
487
+ /** Date range covered */
488
+ dateRange: {
489
+ from: string;
490
+ to: string;
491
+ };
492
+ /** Days skipped (no issue published — weekends/holidays) */
493
+ skipped: number;
494
+ /** Days that failed to download */
495
+ failed: number;
496
+ }
497
+ /**
498
+ * Build the govinfo download URL for a single day's FR issue.
499
+ */
500
+ declare function buildGovinfoFrUrl(date: string): string;
501
+ /**
502
+ * Build the local file path for a downloaded daily-issue XML.
503
+ * Stored as: {output}/bulk/{YYYY}/FR-{YYYY-MM-DD}.xml
504
+ */
505
+ declare function buildGovinfoBulkPath(date: string, outputDir: string): string;
506
+ /**
507
+ * Download FR daily-issue XML files from govinfo for a date range.
508
+ * Skips weekends/holidays (404 responses) and retries transient errors.
509
+ */
510
+ declare function downloadFrBulk(options: FrGovinfoBulkOptions): Promise<FrGovinfoResult>;
511
+
512
+ export { FR_BLOCK_ELEMENTS, FR_CONTENT_ELEMENTS, FR_DOCUMENT_ELEMENTS, FR_DOCUMENT_TYPE_KEYS, FR_DOCUMENT_TYPE_MAP, FR_EMPHASIS_MAP, FR_HD_SOURCE_TO_DEPTH, FR_HEADING_ELEMENT, FR_IGNORE_ELEMENTS, FR_INLINE_ELEMENTS, FR_NOTE_ELEMENTS, FR_PASSTHROUGH_ELEMENTS, FR_PREAMBLE_META_ELEMENTS, FR_PREAMBLE_SECTIONS, FR_PRESIDENTIAL_SUBTYPES, FR_REGTEXT_ELEMENTS, FR_SECTION_CONTAINERS, FR_SIGNATURE_ELEMENTS, FR_SKIP_ELEMENTS, FR_TABLE_ELEMENTS, FrASTBuilder, type FrASTBuilderOptions, type FrConvertOptions, type FrConvertProgress, type FrConvertResult, type FrDocumentJsonMeta, type FrDocumentType, type FrDocumentXmlMeta, type FrDownloadFailure, type FrDownloadOptions, type FrDownloadProgress, type FrDownloadResult, type FrDownloadedFile, type FrGovinfoBulkOptions, type FrGovinfoDownloadedFile, type FrGovinfoProgress, type FrGovinfoResult, buildFrApiListUrl, buildFrDownloadJsonPath, buildFrDownloadXmlPath, buildFrFrontmatter, buildFrOutputPath, buildGovinfoBulkPath, buildGovinfoFrUrl, buildMonthDir, buildYearDir, convertFrDocuments, downloadFrBulk, downloadFrDocuments, downloadSingleFrDocument };