@lexbuild/ecfr 1.9.0 → 1.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/index.d.ts +208 -0
- package/dist/index.js +1344 -0
- package/dist/index.js.map +1 -0
- package/package.json +12 -12
package/dist/index.js
ADDED
|
@@ -0,0 +1,1344 @@
|
|
|
1
|
+
// src/converter.ts
|
|
2
|
+
import { createReadStream } from "fs";
|
|
3
|
+
import { mkdir, writeFile } from "fs/promises";
|
|
4
|
+
import { join as join2, dirname, basename, relative } from "path";
|
|
5
|
+
import {
|
|
6
|
+
XMLParser,
|
|
7
|
+
renderDocument,
|
|
8
|
+
createLinkResolver,
|
|
9
|
+
FORMAT_VERSION,
|
|
10
|
+
GENERATOR
|
|
11
|
+
} from "@lexbuild/core";
|
|
12
|
+
|
|
13
|
+
// src/ecfr-builder.ts
|
|
14
|
+
import { LEVEL_TYPES } from "@lexbuild/core";
|
|
15
|
+
|
|
16
|
+
// src/ecfr-elements.ts
|
|
17
|
+
var ECFR_TYPE_TO_LEVEL = {
|
|
18
|
+
TITLE: "title",
|
|
19
|
+
SUBTITLE: "subtitle",
|
|
20
|
+
CHAPTER: "chapter",
|
|
21
|
+
SUBCHAP: "subchapter",
|
|
22
|
+
PART: "part",
|
|
23
|
+
SUBPART: "subpart",
|
|
24
|
+
SUBJGRP: "subpart",
|
|
25
|
+
// Subject groups act like subparts
|
|
26
|
+
SECTION: "section",
|
|
27
|
+
APPENDIX: "appendix"
|
|
28
|
+
};
|
|
29
|
+
var ECFR_DIV_ELEMENTS = /* @__PURE__ */ new Set([
|
|
30
|
+
"DIV1",
|
|
31
|
+
"DIV2",
|
|
32
|
+
"DIV3",
|
|
33
|
+
"DIV4",
|
|
34
|
+
"DIV5",
|
|
35
|
+
"DIV6",
|
|
36
|
+
"DIV7",
|
|
37
|
+
"DIV8",
|
|
38
|
+
"DIV9"
|
|
39
|
+
]);
|
|
40
|
+
var ECFR_CONTENT_ELEMENTS = /* @__PURE__ */ new Set([
|
|
41
|
+
"P",
|
|
42
|
+
// Paragraph (primary content element)
|
|
43
|
+
"FP",
|
|
44
|
+
// Flush paragraph
|
|
45
|
+
"FP-1",
|
|
46
|
+
// Indented flush paragraph (level 1)
|
|
47
|
+
"FP-2",
|
|
48
|
+
// Indented flush paragraph (level 2)
|
|
49
|
+
"FP-DASH",
|
|
50
|
+
// Dash-leader flush paragraph (form lines)
|
|
51
|
+
"FP1-2",
|
|
52
|
+
// Alternative indented paragraph
|
|
53
|
+
"FRP"
|
|
54
|
+
// Flush right paragraph
|
|
55
|
+
]);
|
|
56
|
+
var ECFR_INLINE_ELEMENTS = /* @__PURE__ */ new Set([
|
|
57
|
+
"I",
|
|
58
|
+
// Italic
|
|
59
|
+
"B",
|
|
60
|
+
// Bold
|
|
61
|
+
"E",
|
|
62
|
+
// Emphasis (type varies by T attribute)
|
|
63
|
+
"SU",
|
|
64
|
+
// Superscript
|
|
65
|
+
"FR",
|
|
66
|
+
// Fraction
|
|
67
|
+
"AC"
|
|
68
|
+
// Accent/diacritical
|
|
69
|
+
]);
|
|
70
|
+
var ECFR_EMPHASIS_MAP = {
|
|
71
|
+
"01": "bold",
|
|
72
|
+
"02": "italic",
|
|
73
|
+
"03": "bold",
|
|
74
|
+
// bold italic in print — treat as bold for Markdown
|
|
75
|
+
"04": "italic",
|
|
76
|
+
// italic in headings
|
|
77
|
+
"05": "italic",
|
|
78
|
+
// small caps — render as italic
|
|
79
|
+
"51": "sub",
|
|
80
|
+
// subscript
|
|
81
|
+
"52": "sub",
|
|
82
|
+
// subscript
|
|
83
|
+
"54": "sub",
|
|
84
|
+
// subscript (math)
|
|
85
|
+
"7462": "italic"
|
|
86
|
+
// special terms (et seq., De minimis)
|
|
87
|
+
};
|
|
88
|
+
var ECFR_NOTE_ELEMENTS = /* @__PURE__ */ new Set([
|
|
89
|
+
"AUTH",
|
|
90
|
+
// Authority citation
|
|
91
|
+
"SOURCE",
|
|
92
|
+
// Source/provenance note
|
|
93
|
+
"EDNOTE",
|
|
94
|
+
// Editorial note
|
|
95
|
+
"EFFDNOT",
|
|
96
|
+
// Effective date note
|
|
97
|
+
"CITA",
|
|
98
|
+
// Citation / amendment history
|
|
99
|
+
"APPRO",
|
|
100
|
+
// OMB approval note
|
|
101
|
+
"NOTE",
|
|
102
|
+
// General note
|
|
103
|
+
"CROSSREF",
|
|
104
|
+
// Cross-reference block
|
|
105
|
+
"SECAUTH",
|
|
106
|
+
// Section-level authority
|
|
107
|
+
"FTNT"
|
|
108
|
+
// Footnote
|
|
109
|
+
]);
|
|
110
|
+
var ECFR_HEADING_ELEMENTS = /* @__PURE__ */ new Set(["HD1", "HD2", "HD3"]);
|
|
111
|
+
var ECFR_BLOCK_ELEMENTS = /* @__PURE__ */ new Set([
|
|
112
|
+
"EXTRACT",
|
|
113
|
+
// Extracted/quoted text
|
|
114
|
+
"EXAMPLE"
|
|
115
|
+
// Example text
|
|
116
|
+
]);
|
|
117
|
+
var ECFR_IGNORE_ELEMENTS = /* @__PURE__ */ new Set([
|
|
118
|
+
"CFRTOC",
|
|
119
|
+
// Table of contents (skip subtree)
|
|
120
|
+
"HEADER"
|
|
121
|
+
// File metadata header (skip subtree)
|
|
122
|
+
]);
|
|
123
|
+
var ECFR_PASSTHROUGH_ELEMENTS = /* @__PURE__ */ new Set(["DLPSTEXTCLASS", "TEXT", "BODY", "ECFRBRWS"]);
|
|
124
|
+
var ECFR_SKIP_ELEMENTS = /* @__PURE__ */ new Set([
|
|
125
|
+
"PTHD",
|
|
126
|
+
// Part heading in TOC
|
|
127
|
+
"CHAPTI",
|
|
128
|
+
// Chapter item in TOC
|
|
129
|
+
"SECHD",
|
|
130
|
+
// Section heading in TOC
|
|
131
|
+
"SUBJECT",
|
|
132
|
+
// Subject text in TOC
|
|
133
|
+
"RESERVED",
|
|
134
|
+
// Reserved placeholder
|
|
135
|
+
"PG",
|
|
136
|
+
// Page number
|
|
137
|
+
"STARS",
|
|
138
|
+
// Visual separator
|
|
139
|
+
"AMDDATE"
|
|
140
|
+
// Amendment date
|
|
141
|
+
]);
|
|
142
|
+
var ECFR_REF_ELEMENTS = /* @__PURE__ */ new Set([
|
|
143
|
+
"XREF",
|
|
144
|
+
// Cross-reference link
|
|
145
|
+
"FTREF"
|
|
146
|
+
// Footnote reference marker
|
|
147
|
+
]);
|
|
148
|
+
var ECFR_TABLE_ELEMENTS = /* @__PURE__ */ new Set(["TABLE", "TR", "TH", "TD"]);
|
|
149
|
+
|
|
150
|
+
// src/ecfr-builder.ts
|
|
151
|
+
var EcfrASTBuilder = class {
|
|
152
|
+
options;
|
|
153
|
+
stack = [];
|
|
154
|
+
documentMeta = {};
|
|
155
|
+
emitAtIndex;
|
|
156
|
+
/** Track title number from metadata header */
|
|
157
|
+
titleNumber = "";
|
|
158
|
+
/** Depth inside CFRTOC or other ignored container */
|
|
159
|
+
ignoredContainerDepth = 0;
|
|
160
|
+
/** Part-level notes (authority/source) keyed by part identifier */
|
|
161
|
+
partNotes = /* @__PURE__ */ new Map();
|
|
162
|
+
constructor(options) {
|
|
163
|
+
this.options = options;
|
|
164
|
+
this.emitAtIndex = LEVEL_TYPES.indexOf(options.emitAt);
|
|
165
|
+
}
|
|
166
|
+
/** Get part-level notes (authority/source) captured during parsing */
|
|
167
|
+
getPartNotes() {
|
|
168
|
+
return this.partNotes;
|
|
169
|
+
}
|
|
170
|
+
/** Handle SAX open element */
|
|
171
|
+
onOpenElement(name, attrs) {
|
|
172
|
+
if (this.ignoredContainerDepth > 0) {
|
|
173
|
+
this.ignoredContainerDepth++;
|
|
174
|
+
return;
|
|
175
|
+
}
|
|
176
|
+
if (ECFR_IGNORE_ELEMENTS.has(name)) {
|
|
177
|
+
this.ignoredContainerDepth = 1;
|
|
178
|
+
return;
|
|
179
|
+
}
|
|
180
|
+
if (ECFR_PASSTHROUGH_ELEMENTS.has(name)) {
|
|
181
|
+
return;
|
|
182
|
+
}
|
|
183
|
+
if (ECFR_SKIP_ELEMENTS.has(name)) {
|
|
184
|
+
this.ignoredContainerDepth = 1;
|
|
185
|
+
return;
|
|
186
|
+
}
|
|
187
|
+
if (ECFR_DIV_ELEMENTS.has(name)) {
|
|
188
|
+
const divType = attrs["TYPE"];
|
|
189
|
+
if (divType) {
|
|
190
|
+
const levelType = ECFR_TYPE_TO_LEVEL[divType];
|
|
191
|
+
if (levelType) {
|
|
192
|
+
this.openLevel(levelType, name, attrs);
|
|
193
|
+
return;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
this.stack.push({ kind: "ignore", elementName: name, textBuffer: "" });
|
|
197
|
+
return;
|
|
198
|
+
}
|
|
199
|
+
if (name === "HEAD") {
|
|
200
|
+
this.stack.push({ kind: "heading", elementName: name, textBuffer: "" });
|
|
201
|
+
return;
|
|
202
|
+
}
|
|
203
|
+
if (name === "HED") {
|
|
204
|
+
this.stack.push({ kind: "heading", elementName: name, textBuffer: "" });
|
|
205
|
+
return;
|
|
206
|
+
}
|
|
207
|
+
if (name === "PSPACE") {
|
|
208
|
+
this.stack.push({ kind: "noteContent", elementName: name, textBuffer: "" });
|
|
209
|
+
return;
|
|
210
|
+
}
|
|
211
|
+
if (ECFR_CONTENT_ELEMENTS.has(name)) {
|
|
212
|
+
this.openContent(name);
|
|
213
|
+
return;
|
|
214
|
+
}
|
|
215
|
+
if (ECFR_HEADING_ELEMENTS.has(name)) {
|
|
216
|
+
this.openContent(name);
|
|
217
|
+
return;
|
|
218
|
+
}
|
|
219
|
+
if (ECFR_INLINE_ELEMENTS.has(name)) {
|
|
220
|
+
this.openInline(name, attrs);
|
|
221
|
+
return;
|
|
222
|
+
}
|
|
223
|
+
if (ECFR_REF_ELEMENTS.has(name)) {
|
|
224
|
+
this.openRef(name, attrs);
|
|
225
|
+
return;
|
|
226
|
+
}
|
|
227
|
+
if (ECFR_NOTE_ELEMENTS.has(name)) {
|
|
228
|
+
this.openNote(name, attrs);
|
|
229
|
+
return;
|
|
230
|
+
}
|
|
231
|
+
if (ECFR_BLOCK_ELEMENTS.has(name)) {
|
|
232
|
+
this.stack.push({ kind: "block", elementName: name, textBuffer: "" });
|
|
233
|
+
return;
|
|
234
|
+
}
|
|
235
|
+
if (name === "TABLE") {
|
|
236
|
+
this.stack.push({
|
|
237
|
+
kind: "table",
|
|
238
|
+
elementName: name,
|
|
239
|
+
textBuffer: "",
|
|
240
|
+
headers: [],
|
|
241
|
+
rows: [],
|
|
242
|
+
currentRow: [],
|
|
243
|
+
isHeaderRow: false
|
|
244
|
+
});
|
|
245
|
+
return;
|
|
246
|
+
}
|
|
247
|
+
if (name === "TR") {
|
|
248
|
+
const tableFrame = this.findTableFrame();
|
|
249
|
+
if (tableFrame) {
|
|
250
|
+
tableFrame.currentRow = [];
|
|
251
|
+
tableFrame.isHeaderRow = false;
|
|
252
|
+
this.stack.push({ kind: "tableRow", elementName: name, textBuffer: "" });
|
|
253
|
+
}
|
|
254
|
+
return;
|
|
255
|
+
}
|
|
256
|
+
if (name === "TH") {
|
|
257
|
+
const tableFrame = this.findTableFrame();
|
|
258
|
+
if (tableFrame) {
|
|
259
|
+
tableFrame.isHeaderRow = true;
|
|
260
|
+
this.stack.push({ kind: "tableCell", elementName: name, textBuffer: "" });
|
|
261
|
+
}
|
|
262
|
+
return;
|
|
263
|
+
}
|
|
264
|
+
if (name === "TD") {
|
|
265
|
+
this.stack.push({ kind: "tableCell", elementName: name, textBuffer: "" });
|
|
266
|
+
return;
|
|
267
|
+
}
|
|
268
|
+
if (name === "DIV" || name === "div") {
|
|
269
|
+
this.stack.push({ kind: "ignore", elementName: name, textBuffer: "" });
|
|
270
|
+
return;
|
|
271
|
+
}
|
|
272
|
+
if (name === "img") {
|
|
273
|
+
return;
|
|
274
|
+
}
|
|
275
|
+
this.stack.push({ kind: "ignore", elementName: name, textBuffer: "" });
|
|
276
|
+
}
|
|
277
|
+
/** Handle SAX close element */
|
|
278
|
+
onCloseElement(name) {
|
|
279
|
+
if (this.ignoredContainerDepth > 0) {
|
|
280
|
+
this.ignoredContainerDepth--;
|
|
281
|
+
return;
|
|
282
|
+
}
|
|
283
|
+
if (ECFR_PASSTHROUGH_ELEMENTS.has(name)) {
|
|
284
|
+
return;
|
|
285
|
+
}
|
|
286
|
+
if (name === "HEAD") {
|
|
287
|
+
const frame = this.popFrame(name);
|
|
288
|
+
if (frame) {
|
|
289
|
+
const parentLevel = this.findParentLevel();
|
|
290
|
+
if (parentLevel?.node && parentLevel.node.type === "level") {
|
|
291
|
+
const levelNode = parentLevel.node;
|
|
292
|
+
const headText = frame.textBuffer.trim();
|
|
293
|
+
if (levelNode.levelType === "section" && levelNode.numValue) {
|
|
294
|
+
const prefix = `\xA7 ${levelNode.numValue}`;
|
|
295
|
+
let stripped = headText;
|
|
296
|
+
if (stripped.startsWith(prefix)) {
|
|
297
|
+
stripped = stripped.slice(prefix.length).replace(/^[\s.]+/, "").trim();
|
|
298
|
+
}
|
|
299
|
+
levelNode.heading = stripped || headText;
|
|
300
|
+
} else {
|
|
301
|
+
levelNode.heading = stripLevelPrefix(headText);
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
return;
|
|
306
|
+
}
|
|
307
|
+
if (name === "HED") {
|
|
308
|
+
this.popFrame(name);
|
|
309
|
+
return;
|
|
310
|
+
}
|
|
311
|
+
if (name === "PSPACE") {
|
|
312
|
+
const frame = this.popFrame(name);
|
|
313
|
+
if (frame) {
|
|
314
|
+
const parentNote = this.findParentNote();
|
|
315
|
+
if (parentNote?.node && parentNote.node.type === "note") {
|
|
316
|
+
const noteNode = parentNote.node;
|
|
317
|
+
const textNode = {
|
|
318
|
+
type: "inline",
|
|
319
|
+
inlineType: "text",
|
|
320
|
+
text: frame.textBuffer.trim()
|
|
321
|
+
};
|
|
322
|
+
const contentNode = {
|
|
323
|
+
type: "content",
|
|
324
|
+
variant: "content",
|
|
325
|
+
children: [textNode]
|
|
326
|
+
};
|
|
327
|
+
noteNode.children.push(contentNode);
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
return;
|
|
331
|
+
}
|
|
332
|
+
if (ECFR_DIV_ELEMENTS.has(name)) {
|
|
333
|
+
this.closeLevel(name);
|
|
334
|
+
return;
|
|
335
|
+
}
|
|
336
|
+
if (ECFR_CONTENT_ELEMENTS.has(name) || ECFR_HEADING_ELEMENTS.has(name)) {
|
|
337
|
+
this.closeContent(name);
|
|
338
|
+
return;
|
|
339
|
+
}
|
|
340
|
+
if (ECFR_INLINE_ELEMENTS.has(name)) {
|
|
341
|
+
this.closeInline(name);
|
|
342
|
+
return;
|
|
343
|
+
}
|
|
344
|
+
if (ECFR_REF_ELEMENTS.has(name)) {
|
|
345
|
+
this.closeInline(name);
|
|
346
|
+
return;
|
|
347
|
+
}
|
|
348
|
+
if (ECFR_NOTE_ELEMENTS.has(name)) {
|
|
349
|
+
this.closeNote(name);
|
|
350
|
+
return;
|
|
351
|
+
}
|
|
352
|
+
if (ECFR_BLOCK_ELEMENTS.has(name)) {
|
|
353
|
+
this.popFrame(name);
|
|
354
|
+
return;
|
|
355
|
+
}
|
|
356
|
+
if (name === "TABLE") {
|
|
357
|
+
this.closeTable();
|
|
358
|
+
return;
|
|
359
|
+
}
|
|
360
|
+
if (name === "TR") {
|
|
361
|
+
this.closeTableRow();
|
|
362
|
+
return;
|
|
363
|
+
}
|
|
364
|
+
if (name === "TH" || name === "TD") {
|
|
365
|
+
this.closeTableCell();
|
|
366
|
+
return;
|
|
367
|
+
}
|
|
368
|
+
if (name === "img") {
|
|
369
|
+
return;
|
|
370
|
+
}
|
|
371
|
+
if (this.stack.length > 0 && this.stack[this.stack.length - 1]?.elementName === name) {
|
|
372
|
+
this.stack.pop();
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
/** Handle SAX text content */
|
|
376
|
+
onText(text) {
|
|
377
|
+
if (this.ignoredContainerDepth > 0) return;
|
|
378
|
+
const frame = this.stack[this.stack.length - 1];
|
|
379
|
+
if (!frame) return;
|
|
380
|
+
if (frame.kind === "heading" || frame.kind === "noteContent" || frame.kind === "tableCell") {
|
|
381
|
+
frame.textBuffer += text;
|
|
382
|
+
return;
|
|
383
|
+
}
|
|
384
|
+
if (frame.kind === "content" && frame.node?.type === "content") {
|
|
385
|
+
const contentNode = frame.node;
|
|
386
|
+
const trimmed = text;
|
|
387
|
+
if (trimmed) {
|
|
388
|
+
contentNode.children.push({
|
|
389
|
+
type: "inline",
|
|
390
|
+
inlineType: "text",
|
|
391
|
+
text: trimmed
|
|
392
|
+
});
|
|
393
|
+
}
|
|
394
|
+
return;
|
|
395
|
+
}
|
|
396
|
+
if (frame.kind === "inline" && frame.node?.type === "inline") {
|
|
397
|
+
const inlineNode = frame.node;
|
|
398
|
+
if (inlineNode.children) {
|
|
399
|
+
inlineNode.children.push({
|
|
400
|
+
type: "inline",
|
|
401
|
+
inlineType: "text",
|
|
402
|
+
text
|
|
403
|
+
});
|
|
404
|
+
} else {
|
|
405
|
+
inlineNode.text = (inlineNode.text ?? "") + text;
|
|
406
|
+
}
|
|
407
|
+
return;
|
|
408
|
+
}
|
|
409
|
+
if (frame.kind === "note" && frame.node?.type === "note") {
|
|
410
|
+
frame.textBuffer += text;
|
|
411
|
+
return;
|
|
412
|
+
}
|
|
413
|
+
if (frame.kind === "level") {
|
|
414
|
+
return;
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
// ---- Private helpers ----
|
|
418
|
+
openLevel(levelType, elementName, attrs) {
|
|
419
|
+
const nAttr = attrs["N"] ?? "";
|
|
420
|
+
const nodeAttr = attrs["NODE"] ?? "";
|
|
421
|
+
let numValue = nAttr.replace(/^§\s*/, "").trim();
|
|
422
|
+
const num = nAttr.trim();
|
|
423
|
+
if (levelType === "title") {
|
|
424
|
+
const titleFromNode = nodeAttr.split(":")[0];
|
|
425
|
+
if (titleFromNode) {
|
|
426
|
+
numValue = titleFromNode;
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
let identifier;
|
|
430
|
+
if (levelType === "title") {
|
|
431
|
+
identifier = `/us/cfr/t${numValue}`;
|
|
432
|
+
this.titleNumber = numValue;
|
|
433
|
+
} else if (levelType === "section") {
|
|
434
|
+
identifier = `/us/cfr/t${this.titleNumber}/s${numValue}`;
|
|
435
|
+
} else if (levelType === "part") {
|
|
436
|
+
identifier = `/us/cfr/t${this.titleNumber}/pt${numValue}`;
|
|
437
|
+
} else if (levelType === "chapter") {
|
|
438
|
+
identifier = `/us/cfr/t${this.titleNumber}/ch${numValue}`;
|
|
439
|
+
}
|
|
440
|
+
const node = {
|
|
441
|
+
type: "level",
|
|
442
|
+
levelType,
|
|
443
|
+
num: num || void 0,
|
|
444
|
+
numValue: numValue || void 0,
|
|
445
|
+
identifier,
|
|
446
|
+
children: [],
|
|
447
|
+
sourceElement: elementName
|
|
448
|
+
};
|
|
449
|
+
this.stack.push({ kind: "level", elementName, node, textBuffer: "" });
|
|
450
|
+
}
|
|
451
|
+
closeLevel(elementName) {
|
|
452
|
+
const frame = this.popFrame(elementName);
|
|
453
|
+
if (!frame || frame.kind !== "level" || !frame.node) return;
|
|
454
|
+
const levelNode = frame.node;
|
|
455
|
+
const levelIndex = LEVEL_TYPES.indexOf(levelNode.levelType);
|
|
456
|
+
if (levelNode.levelType === "part" && levelNode.identifier) {
|
|
457
|
+
let authority;
|
|
458
|
+
let regulatorySource;
|
|
459
|
+
for (const child of levelNode.children) {
|
|
460
|
+
if (child.type === "note") {
|
|
461
|
+
const noteNode = child;
|
|
462
|
+
if (noteNode.noteType === "authority" && !authority) {
|
|
463
|
+
authority = this.extractNoteText(noteNode);
|
|
464
|
+
}
|
|
465
|
+
if (noteNode.noteType === "regulatorySource" && !regulatorySource) {
|
|
466
|
+
regulatorySource = this.extractNoteText(noteNode);
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
if (authority || regulatorySource) {
|
|
471
|
+
this.partNotes.set(levelNode.identifier, { authority, regulatorySource });
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
if (levelIndex >= 0 && levelIndex >= this.emitAtIndex) {
|
|
475
|
+
const ancestors = [];
|
|
476
|
+
for (const f of this.stack) {
|
|
477
|
+
if (f.kind === "level" && f.node?.type === "level") {
|
|
478
|
+
const ln = f.node;
|
|
479
|
+
ancestors.push({
|
|
480
|
+
levelType: ln.levelType,
|
|
481
|
+
numValue: ln.numValue,
|
|
482
|
+
heading: ln.heading,
|
|
483
|
+
identifier: ln.identifier
|
|
484
|
+
});
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
const context = {
|
|
488
|
+
ancestors,
|
|
489
|
+
documentMeta: { ...this.documentMeta }
|
|
490
|
+
};
|
|
491
|
+
this.options.onEmit(levelNode, context);
|
|
492
|
+
} else {
|
|
493
|
+
const parentLevel = this.findParentLevel();
|
|
494
|
+
if (parentLevel?.node && parentLevel.node.type === "level") {
|
|
495
|
+
parentLevel.node.children.push(levelNode);
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
openContent(elementName) {
|
|
500
|
+
const variant = "content";
|
|
501
|
+
const isSubHeading = ECFR_HEADING_ELEMENTS.has(elementName);
|
|
502
|
+
const node = {
|
|
503
|
+
type: "content",
|
|
504
|
+
variant,
|
|
505
|
+
children: []
|
|
506
|
+
};
|
|
507
|
+
if (isSubHeading) {
|
|
508
|
+
node.children.push({
|
|
509
|
+
type: "inline",
|
|
510
|
+
inlineType: "bold",
|
|
511
|
+
children: []
|
|
512
|
+
});
|
|
513
|
+
}
|
|
514
|
+
this.stack.push({ kind: "content", elementName, node, textBuffer: "" });
|
|
515
|
+
}
|
|
516
|
+
closeContent(elementName) {
|
|
517
|
+
const frame = this.popFrame(elementName);
|
|
518
|
+
if (!frame || !frame.node) return;
|
|
519
|
+
const contentNode = frame.node;
|
|
520
|
+
if (ECFR_HEADING_ELEMENTS.has(elementName)) {
|
|
521
|
+
const boldNode = contentNode.children[0];
|
|
522
|
+
if (boldNode && boldNode.type === "inline" && boldNode.inlineType === "bold") {
|
|
523
|
+
if (!boldNode.text && (!boldNode.children || boldNode.children.length === 0) && contentNode.children.length <= 1) {
|
|
524
|
+
return;
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
const parent = this.findParentLevel() ?? this.findParentNote();
|
|
529
|
+
if (parent?.node) {
|
|
530
|
+
if (parent.node.type === "level") {
|
|
531
|
+
parent.node.children.push(contentNode);
|
|
532
|
+
} else if (parent.node.type === "note") {
|
|
533
|
+
parent.node.children.push(contentNode);
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
openInline(elementName, attrs) {
|
|
538
|
+
let inlineType = "text";
|
|
539
|
+
if (elementName === "I") {
|
|
540
|
+
inlineType = "italic";
|
|
541
|
+
} else if (elementName === "B") {
|
|
542
|
+
inlineType = "bold";
|
|
543
|
+
} else if (elementName === "SU") {
|
|
544
|
+
inlineType = "sup";
|
|
545
|
+
} else if (elementName === "FR") {
|
|
546
|
+
inlineType = "text";
|
|
547
|
+
} else if (elementName === "E") {
|
|
548
|
+
const tValue = attrs["T"] ?? "";
|
|
549
|
+
inlineType = ECFR_EMPHASIS_MAP[tValue] ?? "italic";
|
|
550
|
+
}
|
|
551
|
+
const node = {
|
|
552
|
+
type: "inline",
|
|
553
|
+
inlineType,
|
|
554
|
+
children: []
|
|
555
|
+
};
|
|
556
|
+
this.stack.push({ kind: "inline", elementName, node, textBuffer: "" });
|
|
557
|
+
}
|
|
558
|
+
openRef(elementName, attrs) {
|
|
559
|
+
if (elementName === "FTREF") {
|
|
560
|
+
const node = {
|
|
561
|
+
type: "inline",
|
|
562
|
+
inlineType: "footnoteRef",
|
|
563
|
+
idref: attrs["ID"]
|
|
564
|
+
};
|
|
565
|
+
this.stack.push({ kind: "inline", elementName, node, textBuffer: "" });
|
|
566
|
+
} else {
|
|
567
|
+
const node = {
|
|
568
|
+
type: "inline",
|
|
569
|
+
inlineType: "ref",
|
|
570
|
+
href: attrs["ID"],
|
|
571
|
+
children: []
|
|
572
|
+
};
|
|
573
|
+
this.stack.push({ kind: "inline", elementName, node, textBuffer: "" });
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
closeInline(elementName) {
|
|
577
|
+
const frame = this.popFrame(elementName);
|
|
578
|
+
if (!frame || !frame.node) return;
|
|
579
|
+
const inlineNode = frame.node;
|
|
580
|
+
if (inlineNode.inlineType === "footnoteRef" && frame.textBuffer) {
|
|
581
|
+
inlineNode.text = frame.textBuffer.trim();
|
|
582
|
+
}
|
|
583
|
+
const parentFrame = this.stack[this.stack.length - 1];
|
|
584
|
+
if (!parentFrame) return;
|
|
585
|
+
if (parentFrame.kind === "content" && parentFrame.node?.type === "content") {
|
|
586
|
+
const parentContent = parentFrame.node;
|
|
587
|
+
if (ECFR_HEADING_ELEMENTS.has(parentFrame.elementName) && parentContent.children.length > 0 && parentContent.children[0]?.type === "inline" && parentContent.children[0].inlineType === "bold") {
|
|
588
|
+
const boldNode = parentContent.children[0];
|
|
589
|
+
if (boldNode.children) {
|
|
590
|
+
boldNode.children.push(inlineNode);
|
|
591
|
+
}
|
|
592
|
+
} else {
|
|
593
|
+
parentContent.children.push(inlineNode);
|
|
594
|
+
}
|
|
595
|
+
} else if (parentFrame.kind === "inline" && parentFrame.node?.type === "inline") {
|
|
596
|
+
const parentInline = parentFrame.node;
|
|
597
|
+
if (parentInline.children) {
|
|
598
|
+
parentInline.children.push(inlineNode);
|
|
599
|
+
}
|
|
600
|
+
} else if (parentFrame.kind === "note") {
|
|
601
|
+
frame.textBuffer = "";
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
openNote(elementName, _attrs) {
|
|
605
|
+
const noteTypeMap = {
|
|
606
|
+
AUTH: "authority",
|
|
607
|
+
SOURCE: "regulatorySource",
|
|
608
|
+
EDNOTE: "editorial",
|
|
609
|
+
EFFDNOT: "effectiveDate",
|
|
610
|
+
CITA: "citation",
|
|
611
|
+
APPRO: "approval",
|
|
612
|
+
NOTE: "general",
|
|
613
|
+
CROSSREF: "crossReference",
|
|
614
|
+
SECAUTH: "sectionAuthority",
|
|
615
|
+
FTNT: "footnote"
|
|
616
|
+
};
|
|
617
|
+
const noteType = noteTypeMap[elementName] ?? elementName.toLowerCase();
|
|
618
|
+
const node = {
|
|
619
|
+
type: "note",
|
|
620
|
+
noteType,
|
|
621
|
+
children: []
|
|
622
|
+
};
|
|
623
|
+
this.stack.push({ kind: "note", elementName, node, textBuffer: "" });
|
|
624
|
+
}
|
|
625
|
+
closeNote(elementName) {
|
|
626
|
+
const frame = this.popFrame(elementName);
|
|
627
|
+
if (!frame || !frame.node) return;
|
|
628
|
+
const noteNode = frame.node;
|
|
629
|
+
if (frame.textBuffer.trim() && noteNode.children.length === 0) {
|
|
630
|
+
const textNode = {
|
|
631
|
+
type: "inline",
|
|
632
|
+
inlineType: "text",
|
|
633
|
+
text: frame.textBuffer.trim()
|
|
634
|
+
};
|
|
635
|
+
const contentNode = {
|
|
636
|
+
type: "content",
|
|
637
|
+
variant: "content",
|
|
638
|
+
children: [textNode]
|
|
639
|
+
};
|
|
640
|
+
noteNode.children.push(contentNode);
|
|
641
|
+
}
|
|
642
|
+
const parentLevel = this.findParentLevel();
|
|
643
|
+
if (parentLevel?.node && parentLevel.node.type === "level") {
|
|
644
|
+
const levelNode = parentLevel.node;
|
|
645
|
+
if (noteNode.noteType === "regulatorySource") {
|
|
646
|
+
const sourceText = this.extractNoteText(noteNode);
|
|
647
|
+
if (sourceText) {
|
|
648
|
+
const sourceCreditNode = {
|
|
649
|
+
type: "sourceCredit",
|
|
650
|
+
children: [{ type: "inline", inlineType: "text", text: sourceText }]
|
|
651
|
+
};
|
|
652
|
+
levelNode.children.push(sourceCreditNode);
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
levelNode.children.push(noteNode);
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
closeTable() {
|
|
659
|
+
const frame = this.popFrame("TABLE");
|
|
660
|
+
if (!frame || frame.kind !== "table") return;
|
|
661
|
+
const tableNode = {
|
|
662
|
+
type: "table",
|
|
663
|
+
variant: "xhtml",
|
|
664
|
+
headers: frame.headers ?? [],
|
|
665
|
+
rows: frame.rows ?? []
|
|
666
|
+
};
|
|
667
|
+
const parentLevel = this.findParentLevel();
|
|
668
|
+
if (parentLevel?.node && parentLevel.node.type === "level") {
|
|
669
|
+
parentLevel.node.children.push(tableNode);
|
|
670
|
+
}
|
|
671
|
+
}
|
|
672
|
+
closeTableRow() {
|
|
673
|
+
const rowFrame = this.popFrame("TR");
|
|
674
|
+
if (!rowFrame) return;
|
|
675
|
+
const tableFrame = this.findTableFrame();
|
|
676
|
+
if (tableFrame && tableFrame.currentRow) {
|
|
677
|
+
if (tableFrame.isHeaderRow) {
|
|
678
|
+
tableFrame.headers?.push([...tableFrame.currentRow]);
|
|
679
|
+
} else {
|
|
680
|
+
tableFrame.rows?.push([...tableFrame.currentRow]);
|
|
681
|
+
}
|
|
682
|
+
tableFrame.currentRow = [];
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
closeTableCell() {
|
|
686
|
+
const cellFrame = this.stack.pop();
|
|
687
|
+
if (!cellFrame || cellFrame.kind !== "tableCell") return;
|
|
688
|
+
const tableFrame = this.findTableFrame();
|
|
689
|
+
if (tableFrame?.currentRow) {
|
|
690
|
+
tableFrame.currentRow.push(cellFrame.textBuffer.trim());
|
|
691
|
+
}
|
|
692
|
+
}
|
|
693
|
+
popFrame(elementName) {
|
|
694
|
+
if (this.stack.length === 0) return void 0;
|
|
695
|
+
for (let i = this.stack.length - 1; i >= 0; i--) {
|
|
696
|
+
if (this.stack[i]?.elementName === elementName) {
|
|
697
|
+
return this.stack.splice(i, 1)[0];
|
|
698
|
+
}
|
|
699
|
+
}
|
|
700
|
+
return this.stack.pop();
|
|
701
|
+
}
|
|
702
|
+
findParentLevel() {
|
|
703
|
+
for (let i = this.stack.length - 1; i >= 0; i--) {
|
|
704
|
+
if (this.stack[i]?.kind === "level") {
|
|
705
|
+
return this.stack[i];
|
|
706
|
+
}
|
|
707
|
+
}
|
|
708
|
+
return void 0;
|
|
709
|
+
}
|
|
710
|
+
findParentNote() {
|
|
711
|
+
for (let i = this.stack.length - 1; i >= 0; i--) {
|
|
712
|
+
if (this.stack[i]?.kind === "note") {
|
|
713
|
+
return this.stack[i];
|
|
714
|
+
}
|
|
715
|
+
}
|
|
716
|
+
return void 0;
|
|
717
|
+
}
|
|
718
|
+
findTableFrame() {
|
|
719
|
+
for (let i = this.stack.length - 1; i >= 0; i--) {
|
|
720
|
+
if (this.stack[i]?.kind === "table") {
|
|
721
|
+
return this.stack[i];
|
|
722
|
+
}
|
|
723
|
+
}
|
|
724
|
+
return void 0;
|
|
725
|
+
}
|
|
726
|
+
extractNoteText(noteNode) {
|
|
727
|
+
const parts = [];
|
|
728
|
+
for (const child of noteNode.children) {
|
|
729
|
+
if (child.type === "content") {
|
|
730
|
+
for (const inline of child.children) {
|
|
731
|
+
if (inline.text) parts.push(inline.text);
|
|
732
|
+
}
|
|
733
|
+
}
|
|
734
|
+
}
|
|
735
|
+
return parts.join("").trim();
|
|
736
|
+
}
|
|
737
|
+
};
|
|
738
|
+
function stripLevelPrefix(heading) {
|
|
739
|
+
const match = /^(?:CHAPTER|PART|SUBCHAPTER|SUBPART|SUBTITLE|DIVISION|ARTICLE)\s+[A-Za-z0-9]+\s*[—–-]\s*/i.exec(
|
|
740
|
+
heading
|
|
741
|
+
);
|
|
742
|
+
if (match) {
|
|
743
|
+
const stripped = heading.slice(match[0].length).trim();
|
|
744
|
+
return stripped || heading.trim();
|
|
745
|
+
}
|
|
746
|
+
const titleMatch = /^Title\s+\d+\s*[—–-]\s*/i.exec(heading);
|
|
747
|
+
if (titleMatch) {
|
|
748
|
+
let stripped = heading.slice(titleMatch[0].length).trim();
|
|
749
|
+
const volIdx = stripped.search(/--Volume\s/i);
|
|
750
|
+
if (volIdx !== -1) {
|
|
751
|
+
stripped = stripped.slice(0, volIdx).trim();
|
|
752
|
+
}
|
|
753
|
+
return stripped || heading.trim();
|
|
754
|
+
}
|
|
755
|
+
return heading.trim();
|
|
756
|
+
}
|
|
757
|
+
|
|
758
|
+
// src/ecfr-frontmatter.ts
|
|
759
|
+
function buildEcfrFrontmatter(node, context) {
|
|
760
|
+
const titleAncestor = context.ancestors.find((a) => a.levelType === "title");
|
|
761
|
+
const partAncestor = context.ancestors.find((a) => a.levelType === "part");
|
|
762
|
+
const chapterAncestor = context.ancestors.find((a) => a.levelType === "chapter");
|
|
763
|
+
const subchapterAncestor = context.ancestors.find((a) => a.levelType === "subchapter");
|
|
764
|
+
const titleNum = parseInt(titleAncestor?.numValue ?? node.numValue ?? "0", 10);
|
|
765
|
+
const sectionNum = node.numValue ?? "0";
|
|
766
|
+
const sectionName = node.heading?.trim() ?? "";
|
|
767
|
+
const titleName = titleAncestor?.heading?.trim() ?? context.documentMeta.dcTitle ?? "";
|
|
768
|
+
let displayTitle;
|
|
769
|
+
if (node.levelType === "title") {
|
|
770
|
+
displayTitle = `Title ${titleNum} \u2014 ${titleName}`;
|
|
771
|
+
} else if (node.levelType === "part") {
|
|
772
|
+
displayTitle = `${titleNum} CFR Part ${sectionNum} - ${sectionName}`;
|
|
773
|
+
} else {
|
|
774
|
+
displayTitle = `${titleNum} CFR \xA7 ${sectionNum} - ${sectionName}`;
|
|
775
|
+
}
|
|
776
|
+
const authority = extractNoteText(node, "authority");
|
|
777
|
+
const regulatorySource = extractNoteText(node, "regulatorySource");
|
|
778
|
+
const partAuthority = authority ?? extractNoteTextFromAncestors(context, "authority");
|
|
779
|
+
const partSource = regulatorySource ?? extractNoteTextFromAncestors(context, "regulatorySource");
|
|
780
|
+
const sourceCredit = extractSourceCreditText(node);
|
|
781
|
+
const today = (/* @__PURE__ */ new Date()).toISOString().slice(0, 10);
|
|
782
|
+
const fm = {
|
|
783
|
+
source: "ecfr",
|
|
784
|
+
legal_status: "authoritative_unofficial",
|
|
785
|
+
identifier: node.identifier ?? `/us/cfr/t${titleNum}/s${sectionNum}`,
|
|
786
|
+
title: displayTitle,
|
|
787
|
+
title_number: titleNum,
|
|
788
|
+
title_name: titleName,
|
|
789
|
+
positive_law: false,
|
|
790
|
+
// Regulations, not legislation
|
|
791
|
+
currency: today,
|
|
792
|
+
last_updated: today
|
|
793
|
+
};
|
|
794
|
+
if (node.levelType === "section" || node.levelType === "part") {
|
|
795
|
+
fm.section_number = sectionNum;
|
|
796
|
+
fm.section_name = sectionName;
|
|
797
|
+
}
|
|
798
|
+
if (chapterAncestor?.numValue) {
|
|
799
|
+
const parsed = parseInt(chapterAncestor.numValue, 10);
|
|
800
|
+
if (!isNaN(parsed)) {
|
|
801
|
+
fm.chapter_number = parsed;
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
if (chapterAncestor?.heading) {
|
|
805
|
+
fm.chapter_name = chapterAncestor.heading.trim();
|
|
806
|
+
}
|
|
807
|
+
if (subchapterAncestor?.numValue) {
|
|
808
|
+
fm.subchapter_number = subchapterAncestor.numValue;
|
|
809
|
+
}
|
|
810
|
+
if (subchapterAncestor?.heading) {
|
|
811
|
+
fm.subchapter_name = subchapterAncestor.heading.trim();
|
|
812
|
+
}
|
|
813
|
+
if (partAncestor?.numValue) {
|
|
814
|
+
fm.part_number = partAncestor.numValue;
|
|
815
|
+
fm.cfr_part = partAncestor.numValue;
|
|
816
|
+
} else if (node.levelType === "part") {
|
|
817
|
+
fm.part_number = sectionNum;
|
|
818
|
+
fm.cfr_part = sectionNum;
|
|
819
|
+
}
|
|
820
|
+
if (partAncestor?.heading) {
|
|
821
|
+
fm.part_name = partAncestor.heading.trim();
|
|
822
|
+
} else if (node.levelType === "part") {
|
|
823
|
+
fm.part_name = sectionName;
|
|
824
|
+
}
|
|
825
|
+
if (partAuthority) {
|
|
826
|
+
fm.authority = partAuthority;
|
|
827
|
+
}
|
|
828
|
+
if (partSource) {
|
|
829
|
+
fm.regulatory_source = partSource;
|
|
830
|
+
}
|
|
831
|
+
if (sourceCredit) {
|
|
832
|
+
fm.source_credit = sourceCredit;
|
|
833
|
+
}
|
|
834
|
+
if (node.status) {
|
|
835
|
+
fm.status = node.status;
|
|
836
|
+
}
|
|
837
|
+
return fm;
|
|
838
|
+
}
|
|
839
|
+
function extractNoteText(node, noteType) {
|
|
840
|
+
for (const child of node.children) {
|
|
841
|
+
if (child.type === "note" && child.noteType === noteType) {
|
|
842
|
+
return flattenNoteText(child);
|
|
843
|
+
}
|
|
844
|
+
}
|
|
845
|
+
return void 0;
|
|
846
|
+
}
|
|
847
|
+
function extractNoteTextFromAncestors(_context, _noteType) {
|
|
848
|
+
return void 0;
|
|
849
|
+
}
|
|
850
|
+
function extractSourceCreditText(node) {
|
|
851
|
+
for (const child of node.children) {
|
|
852
|
+
if (child.type === "sourceCredit") {
|
|
853
|
+
const parts = [];
|
|
854
|
+
for (const inline of child.children) {
|
|
855
|
+
if (inline.type === "inline" && "text" in inline) {
|
|
856
|
+
parts.push(inline.text);
|
|
857
|
+
}
|
|
858
|
+
}
|
|
859
|
+
const text = parts.join("").trim();
|
|
860
|
+
return text || void 0;
|
|
861
|
+
}
|
|
862
|
+
}
|
|
863
|
+
return void 0;
|
|
864
|
+
}
|
|
865
|
+
function flattenNoteText(node) {
|
|
866
|
+
const parts = [];
|
|
867
|
+
if ("children" in node && Array.isArray(node.children)) {
|
|
868
|
+
for (const child of node.children) {
|
|
869
|
+
if (child.type === "content" && "children" in child) {
|
|
870
|
+
for (const inline of child.children) {
|
|
871
|
+
if (inline.type === "inline" && "text" in inline && inline.text) {
|
|
872
|
+
parts.push(inline.text);
|
|
873
|
+
}
|
|
874
|
+
}
|
|
875
|
+
} else if (child.type === "inline" && "text" in child && child.text) {
|
|
876
|
+
parts.push(child.text);
|
|
877
|
+
} else {
|
|
878
|
+
parts.push(flattenNoteText(child));
|
|
879
|
+
}
|
|
880
|
+
}
|
|
881
|
+
}
|
|
882
|
+
return parts.join("").trim();
|
|
883
|
+
}
|
|
884
|
+
|
|
885
|
+
// src/ecfr-path.ts
|
|
886
|
+
import { join } from "path";
|
|
887
|
+
function buildEcfrOutputPath(node, context, outputRoot) {
|
|
888
|
+
const titleNum = findAncestorValue(context, "title") ?? node.numValue ?? "0";
|
|
889
|
+
const chapterNum = findAncestorValue(context, "chapter");
|
|
890
|
+
const partNum = findAncestorValue(context, "part");
|
|
891
|
+
const titleDir = `title-${padTwo(titleNum)}`;
|
|
892
|
+
const segments = [outputRoot, "ecfr", titleDir];
|
|
893
|
+
if (chapterNum) {
|
|
894
|
+
segments.push(`chapter-${chapterNum}`);
|
|
895
|
+
}
|
|
896
|
+
if (node.levelType === "title") {
|
|
897
|
+
return join(outputRoot, "ecfr", `${titleDir}.md`);
|
|
898
|
+
} else if (node.levelType === "chapter") {
|
|
899
|
+
const chapNum = node.numValue ?? "0";
|
|
900
|
+
return join(outputRoot, "ecfr", titleDir, `chapter-${chapNum}.md`);
|
|
901
|
+
} else if (node.levelType === "part") {
|
|
902
|
+
segments.push(`part-${node.numValue ?? "0"}.md`);
|
|
903
|
+
} else if (node.levelType === "appendix") {
|
|
904
|
+
const appendixName = sanitizeFilename(node.numValue ?? node.heading ?? "appendix");
|
|
905
|
+
if (partNum) {
|
|
906
|
+
segments.push(`part-${partNum}`);
|
|
907
|
+
}
|
|
908
|
+
segments.push(`${appendixName}.md`);
|
|
909
|
+
} else {
|
|
910
|
+
if (partNum) {
|
|
911
|
+
segments.push(`part-${partNum}`);
|
|
912
|
+
}
|
|
913
|
+
const sectionNum = node.numValue ?? "0";
|
|
914
|
+
segments.push(`section-${sectionNum}.md`);
|
|
915
|
+
}
|
|
916
|
+
return join(...segments);
|
|
917
|
+
}
|
|
918
|
+
function buildTitleDir(titleNum, outputRoot) {
|
|
919
|
+
return join(outputRoot, "ecfr", `title-${padTwo(titleNum)}`);
|
|
920
|
+
}
|
|
921
|
+
function findAncestorValue(context, levelType) {
|
|
922
|
+
return context.ancestors.find((a) => a.levelType === levelType)?.numValue;
|
|
923
|
+
}
|
|
924
|
+
function padTwo(num) {
|
|
925
|
+
const n = parseInt(num, 10);
|
|
926
|
+
return isNaN(n) ? num : String(n).padStart(2, "0");
|
|
927
|
+
}
|
|
928
|
+
function sanitizeFilename(name) {
|
|
929
|
+
const sanitized = name.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "").slice(0, 50);
|
|
930
|
+
return sanitized || "appendix";
|
|
931
|
+
}
|
|
932
|
+
|
|
933
|
+
// src/converter.ts
|
|
934
|
+
async function convertEcfrTitle(options) {
|
|
935
|
+
const { input, output, granularity, dryRun } = options;
|
|
936
|
+
let peakMemory = process.memoryUsage().rss;
|
|
937
|
+
const emitAt = granularity === "title" ? "title" : granularity === "part" ? "part" : "section";
|
|
938
|
+
const collected = [];
|
|
939
|
+
const builder = new EcfrASTBuilder({
|
|
940
|
+
emitAt,
|
|
941
|
+
onEmit: (node, context) => {
|
|
942
|
+
collected.push({ node, context });
|
|
943
|
+
}
|
|
944
|
+
});
|
|
945
|
+
const parser = new XMLParser({ defaultNamespace: "" });
|
|
946
|
+
parser.on("openElement", (name, attrs) => builder.onOpenElement(name, attrs));
|
|
947
|
+
parser.on("closeElement", (name) => builder.onCloseElement(name));
|
|
948
|
+
parser.on("text", (text) => builder.onText(text));
|
|
949
|
+
const stream = createReadStream(input, "utf-8");
|
|
950
|
+
await parser.parseStream(stream);
|
|
951
|
+
const rss = process.memoryUsage().rss;
|
|
952
|
+
if (rss > peakMemory) peakMemory = rss;
|
|
953
|
+
const partNotes = builder.getPartNotes();
|
|
954
|
+
let titleNumber = "0";
|
|
955
|
+
let titleName = "";
|
|
956
|
+
const firstCollected = collected[0];
|
|
957
|
+
if (firstCollected) {
|
|
958
|
+
const firstCtx = firstCollected.context;
|
|
959
|
+
const titleAncestor = firstCtx.ancestors.find((a) => a.levelType === "title");
|
|
960
|
+
if (titleAncestor) {
|
|
961
|
+
titleNumber = titleAncestor.numValue ?? "0";
|
|
962
|
+
titleName = titleAncestor.heading ?? firstCtx.documentMeta.dcTitle ?? "";
|
|
963
|
+
} else if (firstCollected.node.levelType === "title") {
|
|
964
|
+
titleNumber = firstCollected.node.numValue ?? "0";
|
|
965
|
+
titleName = firstCollected.node.heading ?? "";
|
|
966
|
+
}
|
|
967
|
+
}
|
|
968
|
+
const notesFilter = buildNotesFilter(options);
|
|
969
|
+
const renderOpts = {
|
|
970
|
+
headingOffset: 0,
|
|
971
|
+
linkStyle: options.linkStyle,
|
|
972
|
+
notesFilter
|
|
973
|
+
};
|
|
974
|
+
if (dryRun) {
|
|
975
|
+
return buildDryRunResult(collected, granularity, titleNumber, titleName, peakMemory);
|
|
976
|
+
}
|
|
977
|
+
const linkResolver = createLinkResolver();
|
|
978
|
+
const sectionMetas = [];
|
|
979
|
+
if (granularity === "section") {
|
|
980
|
+
const counts = /* @__PURE__ */ new Map();
|
|
981
|
+
for (const { node, context } of collected) {
|
|
982
|
+
const partNum = context.ancestors.find((a) => a.levelType === "part")?.numValue ?? "__root__";
|
|
983
|
+
const secNum = node.numValue ?? "0";
|
|
984
|
+
const key = `${partNum}/${secNum}`;
|
|
985
|
+
counts.set(key, (counts.get(key) ?? 0) + 1);
|
|
986
|
+
}
|
|
987
|
+
const seen = /* @__PURE__ */ new Map();
|
|
988
|
+
const outputPaths = [];
|
|
989
|
+
for (const { node, context } of collected) {
|
|
990
|
+
const partNum = context.ancestors.find((a) => a.levelType === "part")?.numValue ?? "__root__";
|
|
991
|
+
const secNum = node.numValue ?? "0";
|
|
992
|
+
const key = `${partNum}/${secNum}`;
|
|
993
|
+
const occurrence = (seen.get(key) ?? 0) + 1;
|
|
994
|
+
seen.set(key, occurrence);
|
|
995
|
+
const total = counts.get(key) ?? 1;
|
|
996
|
+
const suffix = total > 1 && occurrence > 1 ? `-${occurrence}` : "";
|
|
997
|
+
const filePath = buildEcfrOutputPath(node, context, output);
|
|
998
|
+
const suffixedPath = suffix ? filePath.replace(/\.md$/, `${suffix}.md`) : filePath;
|
|
999
|
+
outputPaths.push(suffixedPath);
|
|
1000
|
+
if (node.identifier && occurrence === 1) {
|
|
1001
|
+
linkResolver.register(node.identifier, suffixedPath);
|
|
1002
|
+
}
|
|
1003
|
+
}
|
|
1004
|
+
for (let i = 0; i < collected.length; i++) {
|
|
1005
|
+
const item = collected[i];
|
|
1006
|
+
const suffixedPath = outputPaths[i];
|
|
1007
|
+
if (!item || !suffixedPath) continue;
|
|
1008
|
+
const { node, context } = item;
|
|
1009
|
+
const frontmatter = buildEcfrFrontmatter(node, context);
|
|
1010
|
+
const partId = context.ancestors.find((a) => a.levelType === "part")?.identifier;
|
|
1011
|
+
if (partId && (!frontmatter.authority || !frontmatter.regulatory_source)) {
|
|
1012
|
+
const partNoteData = partNotes.get(partId);
|
|
1013
|
+
if (partNoteData) {
|
|
1014
|
+
if (!frontmatter.authority && partNoteData.authority) {
|
|
1015
|
+
frontmatter.authority = partNoteData.authority;
|
|
1016
|
+
}
|
|
1017
|
+
if (!frontmatter.regulatory_source && partNoteData.regulatorySource) {
|
|
1018
|
+
frontmatter.regulatory_source = partNoteData.regulatorySource;
|
|
1019
|
+
}
|
|
1020
|
+
}
|
|
1021
|
+
}
|
|
1022
|
+
const fromFile = suffixedPath;
|
|
1023
|
+
const markdown = renderDocument(node, frontmatter, {
|
|
1024
|
+
...renderOpts,
|
|
1025
|
+
resolveLink: (identifier) => linkResolver.resolve(identifier, fromFile)
|
|
1026
|
+
});
|
|
1027
|
+
await mkdir(dirname(suffixedPath), { recursive: true });
|
|
1028
|
+
await writeFile(suffixedPath, markdown, "utf-8");
|
|
1029
|
+
const hasNotes = node.children.some((c) => c.type === "note" || c.type === "notesContainer");
|
|
1030
|
+
const secNum = node.numValue ?? "0";
|
|
1031
|
+
const partNum = context.ancestors.find((a) => a.levelType === "part")?.numValue ?? "__root__";
|
|
1032
|
+
sectionMetas.push({
|
|
1033
|
+
identifier: node.identifier ?? `/us/cfr/t${titleNumber}/s${secNum}`,
|
|
1034
|
+
number: secNum,
|
|
1035
|
+
name: node.heading?.trim() ?? "",
|
|
1036
|
+
fileName: basename(suffixedPath),
|
|
1037
|
+
relativeFile: relative(buildTitleDir(titleNumber, output), suffixedPath),
|
|
1038
|
+
contentLength: markdown.length,
|
|
1039
|
+
hasNotes,
|
|
1040
|
+
status: node.status ?? "current",
|
|
1041
|
+
partIdentifier: context.ancestors.find((a) => a.levelType === "part")?.identifier ?? "",
|
|
1042
|
+
partNumber: partNum,
|
|
1043
|
+
partName: context.ancestors.find((a) => a.levelType === "part")?.heading?.trim() ?? ""
|
|
1044
|
+
});
|
|
1045
|
+
const currentRss = process.memoryUsage().rss;
|
|
1046
|
+
if (currentRss > peakMemory) peakMemory = currentRss;
|
|
1047
|
+
}
|
|
1048
|
+
await writeMetaFiles(sectionMetas, titleNumber, titleName, output, granularity, input);
|
|
1049
|
+
const files2 = sectionMetas.map((m) => join2(buildTitleDir(titleNumber, output), m.relativeFile));
|
|
1050
|
+
return {
|
|
1051
|
+
sectionsWritten: sectionMetas.length,
|
|
1052
|
+
files: files2,
|
|
1053
|
+
titleNumber,
|
|
1054
|
+
titleName,
|
|
1055
|
+
dryRun: false,
|
|
1056
|
+
partCount: new Set(sectionMetas.map((s) => s.partNumber)).size,
|
|
1057
|
+
totalTokenEstimate: Math.ceil(sectionMetas.reduce((sum, m) => sum + m.contentLength, 0) / 4),
|
|
1058
|
+
peakMemoryBytes: peakMemory
|
|
1059
|
+
};
|
|
1060
|
+
}
|
|
1061
|
+
const files = [];
|
|
1062
|
+
let totalLength = 0;
|
|
1063
|
+
if (granularity === "chapter") {
|
|
1064
|
+
const chapterMap = /* @__PURE__ */ new Map();
|
|
1065
|
+
for (const item of collected) {
|
|
1066
|
+
const chapterAnc = item.context.ancestors.find((a) => a.levelType === "chapter");
|
|
1067
|
+
const chapterKey = chapterAnc?.numValue ?? "__root__";
|
|
1068
|
+
const existing = chapterMap.get(chapterKey);
|
|
1069
|
+
if (existing) {
|
|
1070
|
+
existing.sections.push(item);
|
|
1071
|
+
} else {
|
|
1072
|
+
chapterMap.set(chapterKey, {
|
|
1073
|
+
sections: [item],
|
|
1074
|
+
chapterAncestor: chapterAnc ?? { levelType: "chapter", numValue: chapterKey },
|
|
1075
|
+
firstContext: item.context
|
|
1076
|
+
});
|
|
1077
|
+
}
|
|
1078
|
+
}
|
|
1079
|
+
for (const [_chapterKey, { sections, chapterAncestor, firstContext }] of chapterMap) {
|
|
1080
|
+
const chapterNode = {
|
|
1081
|
+
type: "level",
|
|
1082
|
+
levelType: "chapter",
|
|
1083
|
+
num: chapterAncestor.numValue,
|
|
1084
|
+
numValue: chapterAncestor.numValue,
|
|
1085
|
+
heading: chapterAncestor.heading,
|
|
1086
|
+
identifier: chapterAncestor.identifier,
|
|
1087
|
+
children: sections.map((s) => s.node)
|
|
1088
|
+
};
|
|
1089
|
+
const frontmatter = buildEcfrFrontmatter(chapterNode, firstContext);
|
|
1090
|
+
const markdown = renderDocument(chapterNode, frontmatter, renderOpts);
|
|
1091
|
+
const filePath = buildEcfrOutputPath(chapterNode, firstContext, output);
|
|
1092
|
+
await mkdir(dirname(filePath), { recursive: true });
|
|
1093
|
+
await writeFile(filePath, markdown, "utf-8");
|
|
1094
|
+
files.push(filePath);
|
|
1095
|
+
totalLength += markdown.length;
|
|
1096
|
+
}
|
|
1097
|
+
} else {
|
|
1098
|
+
const targetLevel = emitAt;
|
|
1099
|
+
const filtered = collected.filter((c) => c.node.levelType === targetLevel);
|
|
1100
|
+
for (const { node, context } of filtered) {
|
|
1101
|
+
const frontmatter = buildEcfrFrontmatter(node, context);
|
|
1102
|
+
const markdown = renderDocument(node, frontmatter, renderOpts);
|
|
1103
|
+
const filePath = buildEcfrOutputPath(node, context, output);
|
|
1104
|
+
await mkdir(dirname(filePath), { recursive: true });
|
|
1105
|
+
await writeFile(filePath, markdown, "utf-8");
|
|
1106
|
+
files.push(filePath);
|
|
1107
|
+
totalLength += markdown.length;
|
|
1108
|
+
}
|
|
1109
|
+
}
|
|
1110
|
+
const partCount = granularity === "part" ? files.length : granularity === "chapter" ? new Set(
|
|
1111
|
+
collected.map((c) => c.context.ancestors.find((a) => a.levelType === "part")?.numValue).filter(Boolean)
|
|
1112
|
+
).size : 0;
|
|
1113
|
+
return {
|
|
1114
|
+
sectionsWritten: files.length,
|
|
1115
|
+
files,
|
|
1116
|
+
titleNumber,
|
|
1117
|
+
titleName,
|
|
1118
|
+
dryRun: false,
|
|
1119
|
+
partCount,
|
|
1120
|
+
totalTokenEstimate: Math.ceil(totalLength / 4),
|
|
1121
|
+
peakMemoryBytes: peakMemory
|
|
1122
|
+
};
|
|
1123
|
+
}
|
|
1124
|
+
function buildDryRunResult(collected, granularity, titleNumber, titleName, peakMemory) {
|
|
1125
|
+
let totalEstimate = 0;
|
|
1126
|
+
let count;
|
|
1127
|
+
if (granularity === "chapter") {
|
|
1128
|
+
const chapterKeys = /* @__PURE__ */ new Set();
|
|
1129
|
+
for (const { node, context } of collected) {
|
|
1130
|
+
const chapterAnc = context.ancestors.find((a) => a.levelType === "chapter");
|
|
1131
|
+
const key = chapterAnc?.numValue ?? "__root__";
|
|
1132
|
+
chapterKeys.add(key);
|
|
1133
|
+
totalEstimate += estimateTokens(node);
|
|
1134
|
+
}
|
|
1135
|
+
count = chapterKeys.size;
|
|
1136
|
+
} else {
|
|
1137
|
+
const targetLevel = granularity === "title" ? "title" : granularity === "part" ? "part" : "section";
|
|
1138
|
+
const filtered = collected.filter((c) => c.node.levelType === targetLevel);
|
|
1139
|
+
count = filtered.length;
|
|
1140
|
+
for (const { node } of filtered) {
|
|
1141
|
+
totalEstimate += estimateTokens(node);
|
|
1142
|
+
}
|
|
1143
|
+
}
|
|
1144
|
+
return {
|
|
1145
|
+
sectionsWritten: count,
|
|
1146
|
+
files: [],
|
|
1147
|
+
titleNumber,
|
|
1148
|
+
titleName,
|
|
1149
|
+
dryRun: true,
|
|
1150
|
+
partCount: 0,
|
|
1151
|
+
totalTokenEstimate: totalEstimate,
|
|
1152
|
+
peakMemoryBytes: peakMemory
|
|
1153
|
+
};
|
|
1154
|
+
}
|
|
1155
|
+
function estimateTokens(node) {
|
|
1156
|
+
let length = 0;
|
|
1157
|
+
function walk(n) {
|
|
1158
|
+
if (n.type === "inline" && "text" in n && n.text) {
|
|
1159
|
+
length += n.text.length;
|
|
1160
|
+
}
|
|
1161
|
+
if ("children" in n && Array.isArray(n.children)) {
|
|
1162
|
+
for (const child of n.children) {
|
|
1163
|
+
walk(child);
|
|
1164
|
+
}
|
|
1165
|
+
}
|
|
1166
|
+
}
|
|
1167
|
+
walk(node);
|
|
1168
|
+
return Math.ceil(length / 4);
|
|
1169
|
+
}
|
|
1170
|
+
function buildNotesFilter(options) {
|
|
1171
|
+
if (options.includeNotes) return void 0;
|
|
1172
|
+
const hasSelective = options.includeEditorialNotes || options.includeStatutoryNotes || options.includeAmendments;
|
|
1173
|
+
if (!hasSelective) {
|
|
1174
|
+
return { editorial: false, statutory: false, amendments: false };
|
|
1175
|
+
}
|
|
1176
|
+
return {
|
|
1177
|
+
editorial: options.includeEditorialNotes,
|
|
1178
|
+
statutory: options.includeStatutoryNotes,
|
|
1179
|
+
amendments: options.includeAmendments
|
|
1180
|
+
};
|
|
1181
|
+
}
|
|
1182
|
+
async function writeMetaFiles(sectionMetas, titleNumber, titleName, outputRoot, granularity, sourceXml) {
|
|
1183
|
+
const partMap = /* @__PURE__ */ new Map();
|
|
1184
|
+
for (const meta of sectionMetas) {
|
|
1185
|
+
const key = meta.partNumber;
|
|
1186
|
+
const arr = partMap.get(key) ?? [];
|
|
1187
|
+
arr.push(meta);
|
|
1188
|
+
partMap.set(key, arr);
|
|
1189
|
+
}
|
|
1190
|
+
const parts = [];
|
|
1191
|
+
for (const [partNum, sections] of partMap) {
|
|
1192
|
+
const first = sections[0];
|
|
1193
|
+
if (!first) continue;
|
|
1194
|
+
parts.push({
|
|
1195
|
+
identifier: first.partIdentifier || `/us/cfr/t${titleNumber}/pt${partNum}`,
|
|
1196
|
+
number: partNum,
|
|
1197
|
+
name: first.partName,
|
|
1198
|
+
directory: `part-${partNum}`,
|
|
1199
|
+
sections: sections.map((s) => ({
|
|
1200
|
+
identifier: s.identifier,
|
|
1201
|
+
number: s.number,
|
|
1202
|
+
name: s.name,
|
|
1203
|
+
file: s.fileName,
|
|
1204
|
+
token_estimate: Math.ceil(s.contentLength / 4),
|
|
1205
|
+
has_notes: s.hasNotes,
|
|
1206
|
+
status: s.status
|
|
1207
|
+
}))
|
|
1208
|
+
});
|
|
1209
|
+
}
|
|
1210
|
+
const titleDir = buildTitleDir(titleNumber, outputRoot);
|
|
1211
|
+
await mkdir(titleDir, { recursive: true });
|
|
1212
|
+
for (const part of parts) {
|
|
1213
|
+
const partDir = join2(titleDir, getPartDirPath(sectionMetas, part.number));
|
|
1214
|
+
await mkdir(partDir, { recursive: true });
|
|
1215
|
+
const partMeta = {
|
|
1216
|
+
format_version: FORMAT_VERSION,
|
|
1217
|
+
identifier: part.identifier,
|
|
1218
|
+
part_number: part.number,
|
|
1219
|
+
part_name: part.name,
|
|
1220
|
+
title_number: parseInt(titleNumber, 10),
|
|
1221
|
+
section_count: part.sections.length,
|
|
1222
|
+
sections: part.sections
|
|
1223
|
+
};
|
|
1224
|
+
await writeFile(join2(partDir, "_meta.json"), JSON.stringify(partMeta, null, 2) + "\n", "utf-8");
|
|
1225
|
+
}
|
|
1226
|
+
const totalTokens = sectionMetas.reduce((sum, m) => sum + m.contentLength, 0);
|
|
1227
|
+
const titleMeta = {
|
|
1228
|
+
format_version: FORMAT_VERSION,
|
|
1229
|
+
generator: GENERATOR,
|
|
1230
|
+
generated_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1231
|
+
identifier: `/us/cfr/t${titleNumber}`,
|
|
1232
|
+
title_number: parseInt(titleNumber, 10),
|
|
1233
|
+
title_name: titleName,
|
|
1234
|
+
source: "ecfr",
|
|
1235
|
+
legal_status: "authoritative_unofficial",
|
|
1236
|
+
currency: (/* @__PURE__ */ new Date()).toISOString().slice(0, 10),
|
|
1237
|
+
source_xml: basename(sourceXml),
|
|
1238
|
+
granularity,
|
|
1239
|
+
stats: {
|
|
1240
|
+
part_count: parts.length,
|
|
1241
|
+
section_count: sectionMetas.length,
|
|
1242
|
+
total_files: sectionMetas.length,
|
|
1243
|
+
total_tokens_estimate: Math.ceil(totalTokens / 4)
|
|
1244
|
+
},
|
|
1245
|
+
parts
|
|
1246
|
+
};
|
|
1247
|
+
await writeFile(join2(titleDir, "_meta.json"), JSON.stringify(titleMeta, null, 2) + "\n", "utf-8");
|
|
1248
|
+
const readme = buildReadme(titleNumber, titleName, parts, sectionMetas, granularity);
|
|
1249
|
+
await writeFile(join2(titleDir, "README.md"), readme, "utf-8");
|
|
1250
|
+
}
|
|
1251
|
+
function getPartDirPath(sectionMetas, partNumber) {
|
|
1252
|
+
const first = sectionMetas.find((m) => m.partNumber === partNumber);
|
|
1253
|
+
if (!first) return `part-${partNumber}`;
|
|
1254
|
+
const dir = dirname(first.relativeFile);
|
|
1255
|
+
return dir === "." ? `part-${partNumber}` : dir;
|
|
1256
|
+
}
|
|
1257
|
+
function buildReadme(titleNumber, titleName, parts, sectionMetas, granularity) {
|
|
1258
|
+
const totalTokens = Math.ceil(sectionMetas.reduce((sum, m) => sum + m.contentLength, 0) / 4);
|
|
1259
|
+
const lines = [];
|
|
1260
|
+
lines.push(`# Title ${titleNumber} \u2014 ${titleName}`);
|
|
1261
|
+
lines.push("");
|
|
1262
|
+
lines.push("| Metric | Value |");
|
|
1263
|
+
lines.push("|--------|-------|");
|
|
1264
|
+
lines.push(`| Source | eCFR (govinfo.gov) |`);
|
|
1265
|
+
lines.push(`| Legal Status | Authoritative, unofficial |`);
|
|
1266
|
+
lines.push(`| Parts | ${parts.length.toLocaleString()} |`);
|
|
1267
|
+
lines.push(`| Sections | ${sectionMetas.length.toLocaleString()} |`);
|
|
1268
|
+
lines.push(`| Estimated Tokens | ${totalTokens.toLocaleString()} |`);
|
|
1269
|
+
lines.push(`| Granularity | ${granularity} |`);
|
|
1270
|
+
lines.push("");
|
|
1271
|
+
lines.push("## Parts");
|
|
1272
|
+
lines.push("");
|
|
1273
|
+
for (const part of parts) {
|
|
1274
|
+
lines.push(`### Part ${part.number} \u2014 ${part.name} (${part.sections.length} sections)`);
|
|
1275
|
+
lines.push("");
|
|
1276
|
+
}
|
|
1277
|
+
lines.push("---");
|
|
1278
|
+
lines.push("");
|
|
1279
|
+
lines.push("Generated by LexBuild");
|
|
1280
|
+
lines.push("");
|
|
1281
|
+
return lines.join("\n");
|
|
1282
|
+
}
|
|
1283
|
+
|
|
1284
|
+
// src/downloader.ts
|
|
1285
|
+
import { createWriteStream } from "fs";
|
|
1286
|
+
import { mkdir as mkdir2, stat } from "fs/promises";
|
|
1287
|
+
import { join as join3 } from "path";
|
|
1288
|
+
import { pipeline } from "stream/promises";
|
|
1289
|
+
import { Readable } from "stream";
|
|
1290
|
+
var ECFR_BULK_BASE = "https://www.govinfo.gov/bulkdata/ECFR";
|
|
1291
|
+
var ECFR_TITLE_COUNT = 50;
|
|
1292
|
+
var ECFR_TITLE_NUMBERS = Array.from({ length: ECFR_TITLE_COUNT }, (_, i) => i + 1);
|
|
1293
|
+
var RESERVED_TITLES = /* @__PURE__ */ new Set([35]);
|
|
1294
|
+
function buildEcfrDownloadUrl(titleNumber) {
|
|
1295
|
+
return `${ECFR_BULK_BASE}/title-${titleNumber}/ECFR-title${titleNumber}.xml`;
|
|
1296
|
+
}
|
|
1297
|
+
async function downloadEcfrTitles(options) {
|
|
1298
|
+
const { output } = options;
|
|
1299
|
+
const titles = options.titles ?? ECFR_TITLE_NUMBERS;
|
|
1300
|
+
await mkdir2(output, { recursive: true });
|
|
1301
|
+
const files = [];
|
|
1302
|
+
let totalBytes = 0;
|
|
1303
|
+
for (const titleNum of titles) {
|
|
1304
|
+
if (RESERVED_TITLES.has(titleNum)) continue;
|
|
1305
|
+
const url = buildEcfrDownloadUrl(titleNum);
|
|
1306
|
+
const filePath = join3(output, `ECFR-title${titleNum}.xml`);
|
|
1307
|
+
const response = await fetch(url);
|
|
1308
|
+
if (!response.ok) {
|
|
1309
|
+
console.warn(`Failed to download eCFR Title ${titleNum}: ${response.status}`);
|
|
1310
|
+
continue;
|
|
1311
|
+
}
|
|
1312
|
+
const body = response.body;
|
|
1313
|
+
if (!body) continue;
|
|
1314
|
+
const dest = createWriteStream(filePath);
|
|
1315
|
+
await pipeline(Readable.fromWeb(body), dest);
|
|
1316
|
+
const fileStat = await stat(filePath);
|
|
1317
|
+
const size = fileStat.size;
|
|
1318
|
+
totalBytes += size;
|
|
1319
|
+
files.push({ path: filePath, titleNumber: titleNum, size });
|
|
1320
|
+
}
|
|
1321
|
+
return { titlesDownloaded: files.length, files, totalBytes };
|
|
1322
|
+
}
|
|
1323
|
+
export {
|
|
1324
|
+
ECFR_BLOCK_ELEMENTS,
|
|
1325
|
+
ECFR_CONTENT_ELEMENTS,
|
|
1326
|
+
ECFR_DIV_ELEMENTS,
|
|
1327
|
+
ECFR_EMPHASIS_MAP,
|
|
1328
|
+
ECFR_HEADING_ELEMENTS,
|
|
1329
|
+
ECFR_IGNORE_ELEMENTS,
|
|
1330
|
+
ECFR_INLINE_ELEMENTS,
|
|
1331
|
+
ECFR_NOTE_ELEMENTS,
|
|
1332
|
+
ECFR_PASSTHROUGH_ELEMENTS,
|
|
1333
|
+
ECFR_REF_ELEMENTS,
|
|
1334
|
+
ECFR_SKIP_ELEMENTS,
|
|
1335
|
+
ECFR_TABLE_ELEMENTS,
|
|
1336
|
+
ECFR_TITLE_COUNT,
|
|
1337
|
+
ECFR_TITLE_NUMBERS,
|
|
1338
|
+
ECFR_TYPE_TO_LEVEL,
|
|
1339
|
+
EcfrASTBuilder,
|
|
1340
|
+
buildEcfrDownloadUrl,
|
|
1341
|
+
convertEcfrTitle,
|
|
1342
|
+
downloadEcfrTitles
|
|
1343
|
+
};
|
|
1344
|
+
//# sourceMappingURL=index.js.map
|