@lexbuild/ecfr 1.9.0 → 1.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/index.d.ts +208 -0
- package/dist/index.js +1344 -0
- package/dist/index.js.map +1 -0
- package/package.json +12 -12
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Chris Thomas
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
import { LevelType, LevelNode, EmitContext, Attributes, InlineType } from '@lexbuild/core';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* eCFR conversion orchestrator.
|
|
5
|
+
*
|
|
6
|
+
* Follows the same collect-then-write pattern as the USC converter:
|
|
7
|
+
* 1. Parse XML via SAX → feed EcfrASTBuilder
|
|
8
|
+
* 2. Collect emitted sections/parts/titles
|
|
9
|
+
* 3. Two-pass link registration (with duplicate detection)
|
|
10
|
+
* 4. Write Markdown files, _meta.json, and README.md
|
|
11
|
+
*/
|
|
12
|
+
/** Options for converting an eCFR XML file */
|
|
13
|
+
interface EcfrConvertOptions {
|
|
14
|
+
/** Path to input eCFR XML file */
|
|
15
|
+
input: string;
|
|
16
|
+
/** Output root directory */
|
|
17
|
+
output: string;
|
|
18
|
+
/** Output granularity: section (default), part, chapter, or title */
|
|
19
|
+
granularity: "section" | "part" | "chapter" | "title";
|
|
20
|
+
/** Link style for cross-references */
|
|
21
|
+
linkStyle: "relative" | "canonical" | "plaintext";
|
|
22
|
+
/** Include source credits in output */
|
|
23
|
+
includeSourceCredits: boolean;
|
|
24
|
+
/** Include all notes */
|
|
25
|
+
includeNotes: boolean;
|
|
26
|
+
/** Selectively include editorial notes */
|
|
27
|
+
includeEditorialNotes: boolean;
|
|
28
|
+
/** Selectively include statutory/regulatory notes */
|
|
29
|
+
includeStatutoryNotes: boolean;
|
|
30
|
+
/** Selectively include amendment history */
|
|
31
|
+
includeAmendments: boolean;
|
|
32
|
+
/** Parse only, don't write files */
|
|
33
|
+
dryRun: boolean;
|
|
34
|
+
}
|
|
35
|
+
/** Result of an eCFR conversion */
|
|
36
|
+
interface EcfrConvertResult {
|
|
37
|
+
/** Number of sections/parts/titles written */
|
|
38
|
+
sectionsWritten: number;
|
|
39
|
+
/** Paths of written files */
|
|
40
|
+
files: string[];
|
|
41
|
+
/** Title number from XML metadata */
|
|
42
|
+
titleNumber: string;
|
|
43
|
+
/** Title name from XML metadata */
|
|
44
|
+
titleName: string;
|
|
45
|
+
/** Whether this was a dry run */
|
|
46
|
+
dryRun: boolean;
|
|
47
|
+
/** Number of unique parts */
|
|
48
|
+
partCount: number;
|
|
49
|
+
/** Total estimated tokens */
|
|
50
|
+
totalTokenEstimate: number;
|
|
51
|
+
/** Peak RSS in bytes during conversion */
|
|
52
|
+
peakMemoryBytes: number;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Convert an eCFR XML file to structured Markdown.
|
|
56
|
+
*/
|
|
57
|
+
declare function convertEcfrTitle(options: EcfrConvertOptions): Promise<EcfrConvertResult>;
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* eCFR bulk XML downloader.
|
|
61
|
+
*
|
|
62
|
+
* Downloads individual title XML files from govinfo.gov's bulk data repository.
|
|
63
|
+
* Unlike the USC downloader, eCFR files are plain XML (not ZIP archives).
|
|
64
|
+
*/
|
|
65
|
+
/** Total number of CFR titles */
|
|
66
|
+
declare const ECFR_TITLE_COUNT = 50;
|
|
67
|
+
/** All eCFR title numbers (1-50) */
|
|
68
|
+
declare const ECFR_TITLE_NUMBERS: number[];
|
|
69
|
+
/** Options for downloading eCFR titles */
|
|
70
|
+
interface EcfrDownloadOptions {
|
|
71
|
+
/** Download directory */
|
|
72
|
+
output: string;
|
|
73
|
+
/** Specific titles to download (1-50), or undefined for all */
|
|
74
|
+
titles?: number[] | undefined;
|
|
75
|
+
}
|
|
76
|
+
/** Result of a successful download */
|
|
77
|
+
interface EcfrDownloadResult {
|
|
78
|
+
/** Number of titles successfully downloaded */
|
|
79
|
+
titlesDownloaded: number;
|
|
80
|
+
/** Paths of downloaded files */
|
|
81
|
+
files: EcfrDownloadedFile[];
|
|
82
|
+
/** Total bytes downloaded */
|
|
83
|
+
totalBytes: number;
|
|
84
|
+
}
|
|
85
|
+
/** Metadata for a single downloaded file */
|
|
86
|
+
interface EcfrDownloadedFile {
|
|
87
|
+
/** Absolute path to the downloaded file */
|
|
88
|
+
path: string;
|
|
89
|
+
/** Title number */
|
|
90
|
+
titleNumber: number;
|
|
91
|
+
/** File size in bytes */
|
|
92
|
+
size: number;
|
|
93
|
+
}
|
|
94
|
+
/** Error for a failed download */
|
|
95
|
+
interface EcfrDownloadError {
|
|
96
|
+
/** Title number that failed */
|
|
97
|
+
titleNumber: number;
|
|
98
|
+
/** HTTP status code or error message */
|
|
99
|
+
error: string;
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Build the download URL for an eCFR title.
|
|
103
|
+
*/
|
|
104
|
+
declare function buildEcfrDownloadUrl(titleNumber: number): string;
|
|
105
|
+
/**
|
|
106
|
+
* Download eCFR XML files from govinfo bulk data.
|
|
107
|
+
*/
|
|
108
|
+
declare function downloadEcfrTitles(options: EcfrDownloadOptions): Promise<EcfrDownloadResult>;
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* eCFR AST Builder — converts SAX events from GPO/SGML-derived XML into AST nodes.
|
|
112
|
+
*
|
|
113
|
+
* Follows the same stack-based, emit-at-level pattern as the USLM builder in core,
|
|
114
|
+
* but dispatches on eCFR element names (DIV1-DIV9 with TYPE attributes, HEAD, P, etc.)
|
|
115
|
+
* instead of USLM semantic element names.
|
|
116
|
+
*/
|
|
117
|
+
|
|
118
|
+
/** Options for configuring the eCFR AST builder */
|
|
119
|
+
interface EcfrASTBuilderOptions {
|
|
120
|
+
/** Emit completed nodes at this level instead of accumulating */
|
|
121
|
+
emitAt: LevelType;
|
|
122
|
+
/** Callback when a completed node is ready */
|
|
123
|
+
onEmit: (node: LevelNode, context: EmitContext) => void | Promise<void>;
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* eCFR AST Builder. Consumes SAX events and produces LexBuild AST nodes.
|
|
127
|
+
*/
|
|
128
|
+
declare class EcfrASTBuilder {
|
|
129
|
+
private readonly options;
|
|
130
|
+
private readonly stack;
|
|
131
|
+
private documentMeta;
|
|
132
|
+
private readonly emitAtIndex;
|
|
133
|
+
/** Track title number from metadata header */
|
|
134
|
+
private titleNumber;
|
|
135
|
+
/** Depth inside CFRTOC or other ignored container */
|
|
136
|
+
private ignoredContainerDepth;
|
|
137
|
+
/** Part-level notes (authority/source) keyed by part identifier */
|
|
138
|
+
private readonly partNotes;
|
|
139
|
+
constructor(options: EcfrASTBuilderOptions);
|
|
140
|
+
/** Get part-level notes (authority/source) captured during parsing */
|
|
141
|
+
getPartNotes(): ReadonlyMap<string, {
|
|
142
|
+
authority?: string | undefined;
|
|
143
|
+
regulatorySource?: string | undefined;
|
|
144
|
+
}>;
|
|
145
|
+
/** Handle SAX open element */
|
|
146
|
+
onOpenElement(name: string, attrs: Attributes): void;
|
|
147
|
+
/** Handle SAX close element */
|
|
148
|
+
onCloseElement(name: string): void;
|
|
149
|
+
/** Handle SAX text content */
|
|
150
|
+
onText(text: string): void;
|
|
151
|
+
private openLevel;
|
|
152
|
+
private closeLevel;
|
|
153
|
+
private openContent;
|
|
154
|
+
private closeContent;
|
|
155
|
+
private openInline;
|
|
156
|
+
private openRef;
|
|
157
|
+
private closeInline;
|
|
158
|
+
private openNote;
|
|
159
|
+
private closeNote;
|
|
160
|
+
private closeTable;
|
|
161
|
+
private closeTableRow;
|
|
162
|
+
private closeTableCell;
|
|
163
|
+
private popFrame;
|
|
164
|
+
private findParentLevel;
|
|
165
|
+
private findParentNote;
|
|
166
|
+
private findTableFrame;
|
|
167
|
+
private extractNoteText;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* eCFR GPO/SGML-derived XML element classification.
|
|
172
|
+
*
|
|
173
|
+
* The eCFR XML uses a numbered DIV system (DIV1-DIV9) where the TYPE
|
|
174
|
+
* attribute determines the semantic level, not the element name.
|
|
175
|
+
*
|
|
176
|
+
* This element vocabulary is shared by the eCFR bulk data and the
|
|
177
|
+
* annual CFR bulk data on govinfo. If a future @lexbuild/cfr package
|
|
178
|
+
* is created for the annual edition, it can import these classifications.
|
|
179
|
+
*/
|
|
180
|
+
|
|
181
|
+
/** Map from DIV TYPE attribute values to LexBuild level types */
|
|
182
|
+
declare const ECFR_TYPE_TO_LEVEL: Readonly<Record<string, LevelType>>;
|
|
183
|
+
/** DIV element names (all route to the TYPE-based level mapping) */
|
|
184
|
+
declare const ECFR_DIV_ELEMENTS: Set<string>;
|
|
185
|
+
/** Elements that contain text content directly */
|
|
186
|
+
declare const ECFR_CONTENT_ELEMENTS: Set<string>;
|
|
187
|
+
/** Elements that contain inline formatting */
|
|
188
|
+
declare const ECFR_INLINE_ELEMENTS: Set<string>;
|
|
189
|
+
/** Map from E element T attribute to InlineType */
|
|
190
|
+
declare const ECFR_EMPHASIS_MAP: Readonly<Record<string, InlineType>>;
|
|
191
|
+
/** Note-like elements */
|
|
192
|
+
declare const ECFR_NOTE_ELEMENTS: Set<string>;
|
|
193
|
+
/** Sub-heading elements within sections/appendices */
|
|
194
|
+
declare const ECFR_HEADING_ELEMENTS: Set<string>;
|
|
195
|
+
/** Block-level elements that wrap content */
|
|
196
|
+
declare const ECFR_BLOCK_ELEMENTS: Set<string>;
|
|
197
|
+
/** Elements to fully ignore (skip entire subtree) */
|
|
198
|
+
declare const ECFR_IGNORE_ELEMENTS: Set<string>;
|
|
199
|
+
/** Elements that are transparent wrappers — pass through without creating frames */
|
|
200
|
+
declare const ECFR_PASSTHROUGH_ELEMENTS: Set<string>;
|
|
201
|
+
/** Self-contained elements to skip (no subtree concerns) */
|
|
202
|
+
declare const ECFR_SKIP_ELEMENTS: Set<string>;
|
|
203
|
+
/** Cross-reference elements */
|
|
204
|
+
declare const ECFR_REF_ELEMENTS: Set<string>;
|
|
205
|
+
/** Table elements (HTML-style) */
|
|
206
|
+
declare const ECFR_TABLE_ELEMENTS: Set<string>;
|
|
207
|
+
|
|
208
|
+
export { ECFR_BLOCK_ELEMENTS, ECFR_CONTENT_ELEMENTS, ECFR_DIV_ELEMENTS, ECFR_EMPHASIS_MAP, ECFR_HEADING_ELEMENTS, ECFR_IGNORE_ELEMENTS, ECFR_INLINE_ELEMENTS, ECFR_NOTE_ELEMENTS, ECFR_PASSTHROUGH_ELEMENTS, ECFR_REF_ELEMENTS, ECFR_SKIP_ELEMENTS, ECFR_TABLE_ELEMENTS, ECFR_TITLE_COUNT, ECFR_TITLE_NUMBERS, ECFR_TYPE_TO_LEVEL, EcfrASTBuilder, type EcfrASTBuilderOptions, type EcfrConvertOptions, type EcfrConvertResult, type EcfrDownloadError, type EcfrDownloadOptions, type EcfrDownloadResult, type EcfrDownloadedFile, buildEcfrDownloadUrl, convertEcfrTitle, downloadEcfrTitles };
|