@lexbuild/fr 1.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +231 -0
- package/dist/index.d.ts +414 -0
- package/dist/index.js +1502 -0
- package/dist/index.js.map +1 -0
- package/package.json +67 -0
package/README.md
ADDED
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
# @lexbuild/fr
|
|
2
|
+
|
|
3
|
+
[](https://www.npmjs.com/package/@lexbuild/fr)
|
|
4
|
+
[](https://github.com/chris-c-thomas/LexBuild/blob/main/LICENSE)
|
|
5
|
+
|
|
6
|
+
Converts [Federal Register](https://www.federalregister.gov/) XML into structured Markdown optimized for AI, RAG pipelines, and semantic search. Downloads documents via the FederalRegister.gov API with rich JSON metadata (agencies, CFR references, docket IDs, effective dates) alongside XML full text.
|
|
7
|
+
|
|
8
|
+
> **Tip:** For command-line usage, install [`@lexbuild/cli`](https://www.npmjs.com/package/@lexbuild/cli) instead. This package is the programmatic API.
|
|
9
|
+
|
|
10
|
+
## Install
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
npm install @lexbuild/fr
|
|
14
|
+
# or
|
|
15
|
+
pnpm add @lexbuild/fr
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
**Peer dependency:** [`@lexbuild/core`](https://www.npmjs.com/package/@lexbuild/core) (installed automatically via workspace protocol in the monorepo).
|
|
19
|
+
|
|
20
|
+
## Quick Start
|
|
21
|
+
|
|
22
|
+
### Download and Convert
|
|
23
|
+
|
|
24
|
+
```ts
|
|
25
|
+
import { downloadFrDocuments, convertFrDocuments } from "@lexbuild/fr";
|
|
26
|
+
|
|
27
|
+
// Download last 30 days of Federal Register documents
|
|
28
|
+
const download = await downloadFrDocuments({
|
|
29
|
+
output: "./downloads/fr",
|
|
30
|
+
from: "2026-03-01",
|
|
31
|
+
to: "2026-03-31",
|
|
32
|
+
});
|
|
33
|
+
console.log(`Downloaded ${download.documentsDownloaded} documents`);
|
|
34
|
+
|
|
35
|
+
// Convert to Markdown
|
|
36
|
+
const result = await convertFrDocuments({
|
|
37
|
+
input: "./downloads/fr",
|
|
38
|
+
output: "./output",
|
|
39
|
+
linkStyle: "plaintext",
|
|
40
|
+
dryRun: false,
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
console.log(`${result.documentsConverted} documents, ${result.totalTokenEstimate} est. tokens`);
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### Download by Document Type
|
|
47
|
+
|
|
48
|
+
```ts
|
|
49
|
+
import { downloadFrDocuments } from "@lexbuild/fr";
|
|
50
|
+
|
|
51
|
+
// Download only final rules from Q1 2026
|
|
52
|
+
const result = await downloadFrDocuments({
|
|
53
|
+
output: "./downloads/fr",
|
|
54
|
+
from: "2026-01-01",
|
|
55
|
+
to: "2026-03-31",
|
|
56
|
+
types: ["RULE"],
|
|
57
|
+
});
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Download a Single Document
|
|
61
|
+
|
|
62
|
+
```ts
|
|
63
|
+
import { downloadSingleFrDocument } from "@lexbuild/fr";
|
|
64
|
+
|
|
65
|
+
const file = await downloadSingleFrDocument("2026-06029", "./downloads/fr");
|
|
66
|
+
console.log(`XML: ${file.xmlPath}`);
|
|
67
|
+
console.log(`JSON: ${file.jsonPath}`);
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### Convert with Type Filtering
|
|
71
|
+
|
|
72
|
+
```ts
|
|
73
|
+
import { convertFrDocuments } from "@lexbuild/fr";
|
|
74
|
+
|
|
75
|
+
const result = await convertFrDocuments({
|
|
76
|
+
input: "./downloads/fr",
|
|
77
|
+
output: "./output",
|
|
78
|
+
linkStyle: "plaintext",
|
|
79
|
+
dryRun: false,
|
|
80
|
+
from: "2026-01-01",
|
|
81
|
+
to: "2026-03-31",
|
|
82
|
+
types: ["RULE", "PRORULE"], // Only rules and proposed rules
|
|
83
|
+
});
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## API Reference
|
|
87
|
+
|
|
88
|
+
### Functions
|
|
89
|
+
|
|
90
|
+
| Export | Description |
|
|
91
|
+
|--------|-------------|
|
|
92
|
+
| `convertFrDocuments(options)` | Convert FR XML files to Markdown |
|
|
93
|
+
| `downloadFrDocuments(options)` | Download FR documents by date range from the API |
|
|
94
|
+
| `downloadSingleFrDocument(number, output)` | Download a single document by document number |
|
|
95
|
+
| `buildFrApiListUrl(from, to, page, types?)` | Build the API listing URL for a date range |
|
|
96
|
+
| `buildFrFrontmatter(node, context, xmlMeta, jsonMeta?)` | Build frontmatter from AST node and metadata |
|
|
97
|
+
| `buildFrOutputPath(number, date, root)` | Build output file path for a document |
|
|
98
|
+
| `buildFrDownloadXmlPath(number, date, root)` | Build download XML file path |
|
|
99
|
+
| `buildFrDownloadJsonPath(number, date, root)` | Build download JSON file path |
|
|
100
|
+
|
|
101
|
+
### Types
|
|
102
|
+
|
|
103
|
+
| Export | Description |
|
|
104
|
+
|--------|-------------|
|
|
105
|
+
| `FrConvertOptions` | Options for `convertFrDocuments()` — input, output, link style, date/type filters |
|
|
106
|
+
| `FrConvertResult` | Conversion result — documents converted, files, token estimate |
|
|
107
|
+
| `FrDownloadOptions` | Options for `downloadFrDocuments()` — output, date range, types, limit |
|
|
108
|
+
| `FrDownloadResult` | Download result — documents downloaded, files, bytes, skipped, failed |
|
|
109
|
+
| `FrDownloadedFile` | Single downloaded file metadata (XML path, JSON path, size) |
|
|
110
|
+
| `FrDownloadFailure` | Failed download metadata (document number, error) |
|
|
111
|
+
| `FrDownloadProgress` | Progress info for the download callback |
|
|
112
|
+
| `FrDocumentType` | `"RULE" \| "PRORULE" \| "NOTICE" \| "PRESDOCU"` |
|
|
113
|
+
| `FrDocumentJsonMeta` | JSON metadata structure from the FederalRegister.gov API |
|
|
114
|
+
| `FrDocumentXmlMeta` | Metadata extracted from FR XML during SAX parsing |
|
|
115
|
+
|
|
116
|
+
### Classes
|
|
117
|
+
|
|
118
|
+
| Export | Description |
|
|
119
|
+
|--------|-------------|
|
|
120
|
+
| `FrASTBuilder` | SAX-to-AST builder for FR GPO/SGML XML. Emits one section-level node per document. |
|
|
121
|
+
|
|
122
|
+
### Constants
|
|
123
|
+
|
|
124
|
+
| Export | Description |
|
|
125
|
+
|--------|-------------|
|
|
126
|
+
| `FR_DOCUMENT_ELEMENTS` | Set of document type elements (`RULE`, `NOTICE`, etc.) |
|
|
127
|
+
| `FR_DOCUMENT_TYPE_MAP` | Map from element names to normalized type strings |
|
|
128
|
+
| `FR_EMPHASIS_MAP` | Map from `E` element `T` attribute to inline formatting types |
|
|
129
|
+
| `FR_HD_SOURCE_TO_DEPTH` | Map from `HD` `SOURCE` attribute to heading depth |
|
|
130
|
+
|
|
131
|
+
## Output
|
|
132
|
+
|
|
133
|
+
Each document produces one Markdown file organized by publication date:
|
|
134
|
+
|
|
135
|
+
| Output Path | Description |
|
|
136
|
+
|---|---|
|
|
137
|
+
| `fr/2026/03/2026-06029.md` | Individual document |
|
|
138
|
+
|
|
139
|
+
No granularity options — FR documents are already atomic (one file per document).
|
|
140
|
+
|
|
141
|
+
### Frontmatter
|
|
142
|
+
|
|
143
|
+
FR documents include source-specific metadata alongside standard LexBuild fields. When a JSON sidecar from the API is available, frontmatter is enriched with structured agency, CFR reference, docket, and date information:
|
|
144
|
+
|
|
145
|
+
```yaml
|
|
146
|
+
---
|
|
147
|
+
identifier: "/us/fr/2026-06029"
|
|
148
|
+
source: "fr"
|
|
149
|
+
legal_status: "authoritative_unofficial"
|
|
150
|
+
title: "Meeting of the Advisory Board on Radiation and Worker Health"
|
|
151
|
+
title_number: 0
|
|
152
|
+
title_name: "Federal Register"
|
|
153
|
+
section_number: "2026-06029"
|
|
154
|
+
positive_law: false
|
|
155
|
+
currency: "2026-03-30"
|
|
156
|
+
last_updated: "2026-03-30"
|
|
157
|
+
agency: "Health and Human Services Department"
|
|
158
|
+
document_number: "2026-06029"
|
|
159
|
+
document_type: "notice"
|
|
160
|
+
fr_citation: "91 FR 15619"
|
|
161
|
+
fr_volume: 91
|
|
162
|
+
publication_date: "2026-03-30"
|
|
163
|
+
agencies:
|
|
164
|
+
- "Health and Human Services Department"
|
|
165
|
+
- "Centers for Disease Control and Prevention"
|
|
166
|
+
fr_action: "Notice of meeting."
|
|
167
|
+
---
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
Rules and proposed rules include additional fields:
|
|
171
|
+
|
|
172
|
+
```yaml
|
|
173
|
+
cfr_references:
|
|
174
|
+
- "10 CFR Part 53"
|
|
175
|
+
- "10 CFR Part 50"
|
|
176
|
+
docket_ids:
|
|
177
|
+
- "NRC-2019-0062"
|
|
178
|
+
rin: "3150-AK31"
|
|
179
|
+
effective_date: "2026-04-29"
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
## Data Source
|
|
183
|
+
|
|
184
|
+
| Field | Detail |
|
|
185
|
+
|-------|--------|
|
|
186
|
+
| **API** | `federalregister.gov/api/v1/` |
|
|
187
|
+
| **Authentication** | None required |
|
|
188
|
+
| **Rate limits** | No documented limits |
|
|
189
|
+
| **Coverage** | JSON metadata from 1994, XML full text from 2000 |
|
|
190
|
+
| **Update cadence** | Daily (each business day) |
|
|
191
|
+
| **Volume** | ~28,000-31,000 documents/year |
|
|
192
|
+
| **Legal status** | Unofficial — only authenticated PDF has legal standing |
|
|
193
|
+
|
|
194
|
+
The downloader fetches both JSON metadata (40+ structured fields) and XML full text per document. Large date ranges are automatically chunked by month to stay under the API's 10,000-result cap per query.
|
|
195
|
+
|
|
196
|
+
## Document Types
|
|
197
|
+
|
|
198
|
+
| Type | Element | Annual Volume | Description |
|
|
199
|
+
|------|---------|---------------|-------------|
|
|
200
|
+
| Notice | `NOTICE` | ~22,000-25,000 | Agency announcements, meetings, information collections |
|
|
201
|
+
| Rule | `RULE` | ~3,000-3,200 | Final rules and regulations |
|
|
202
|
+
| Proposed Rule | `PRORULE` | ~1,700-2,100 | Notices of proposed rulemaking (NPRMs) |
|
|
203
|
+
| Presidential Document | `PRESDOCU` | ~300-470 | Executive orders, memoranda, proclamations |
|
|
204
|
+
|
|
205
|
+
## Compatibility
|
|
206
|
+
|
|
207
|
+
- **Node.js** >= 22
|
|
208
|
+
- **ESM only** — no CommonJS build
|
|
209
|
+
- **TypeScript** — ships `.d.ts` type declarations
|
|
210
|
+
|
|
211
|
+
## Monorepo Context
|
|
212
|
+
|
|
213
|
+
Part of the [LexBuild](https://github.com/chris-c-thomas/LexBuild) monorepo. Depends on `@lexbuild/core` for XML parsing, AST types, and Markdown rendering.
|
|
214
|
+
|
|
215
|
+
```bash
|
|
216
|
+
pnpm turbo build --filter=@lexbuild/fr
|
|
217
|
+
pnpm turbo test --filter=@lexbuild/fr
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
## Related Packages
|
|
221
|
+
|
|
222
|
+
| Package | Description |
|
|
223
|
+
|---------|-------------|
|
|
224
|
+
| [`@lexbuild/cli`](https://www.npmjs.com/package/@lexbuild/cli) | CLI tool — the easiest way to use LexBuild |
|
|
225
|
+
| [`@lexbuild/core`](https://www.npmjs.com/package/@lexbuild/core) | Shared parsing, AST, and rendering infrastructure |
|
|
226
|
+
| [`@lexbuild/usc`](https://www.npmjs.com/package/@lexbuild/usc) | U.S. Code (USLM XML) converter |
|
|
227
|
+
| [`@lexbuild/ecfr`](https://www.npmjs.com/package/@lexbuild/ecfr) | eCFR (Code of Federal Regulations) converter |
|
|
228
|
+
|
|
229
|
+
## License
|
|
230
|
+
|
|
231
|
+
[MIT](https://github.com/chris-c-thomas/LexBuild/blob/main/LICENSE)
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,414 @@
|
|
|
1
|
+
import { LevelNode, EmitContext, Attributes, InlineType, FrontmatterData } from '@lexbuild/core';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Federal Register AST Builder — converts SAX events from FR XML into AST nodes.
|
|
5
|
+
*
|
|
6
|
+
* Follows the stack-based pattern from the eCFR builder but adapted for FR's
|
|
7
|
+
* flat, document-centric structure. Each FR document (RULE, NOTICE, PRORULE,
|
|
8
|
+
* PRESDOCU) becomes a single section-level LevelNode emitted via onEmit.
|
|
9
|
+
*
|
|
10
|
+
* FR XML is GPO/SGML-derived with no namespace. It shares inline formatting
|
|
11
|
+
* (E T="nn", SU, FTNT) with eCFR but uses a different document structure:
|
|
12
|
+
* preamble (PREAMB) → supplementary info (SUPLINF) → signature (SIG).
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
/** Options for configuring the FR AST builder */
|
|
16
|
+
interface FrASTBuilderOptions {
|
|
17
|
+
/** Callback when a completed document node is ready */
|
|
18
|
+
onEmit: (node: LevelNode, context: EmitContext) => void | Promise<void>;
|
|
19
|
+
}
|
|
20
|
+
/** Metadata extracted from the FR document XML during parsing */
|
|
21
|
+
interface FrDocumentXmlMeta {
|
|
22
|
+
/** Document type element name (RULE, NOTICE, etc.) */
|
|
23
|
+
documentType: string;
|
|
24
|
+
/** Normalized document type (rule, proposed_rule, etc.) */
|
|
25
|
+
documentTypeNormalized: string;
|
|
26
|
+
/** Agency name from AGENCY element */
|
|
27
|
+
agency?: string | undefined;
|
|
28
|
+
/** Sub-agency name from SUBAGY element */
|
|
29
|
+
subAgency?: string | undefined;
|
|
30
|
+
/** Subject/title from SUBJECT element */
|
|
31
|
+
subject?: string | undefined;
|
|
32
|
+
/** CFR citation from CFR element */
|
|
33
|
+
cfrCitation?: string | undefined;
|
|
34
|
+
/** Regulation Identifier Number from RIN element */
|
|
35
|
+
rin?: string | undefined;
|
|
36
|
+
/** FR document number extracted from FRDOC text */
|
|
37
|
+
documentNumber?: string | undefined;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Federal Register AST Builder.
|
|
41
|
+
*
|
|
42
|
+
* Consumes SAX events and produces LexBuild AST nodes. Each FR document
|
|
43
|
+
* (RULE, NOTICE, PRORULE, PRESDOCU) is emitted as a single section-level
|
|
44
|
+
* LevelNode via the onEmit callback.
|
|
45
|
+
*/
|
|
46
|
+
declare class FrASTBuilder {
|
|
47
|
+
private readonly options;
|
|
48
|
+
private readonly stack;
|
|
49
|
+
/** Depth inside fully-ignored elements (CNTNTS, GPH) */
|
|
50
|
+
private ignoredContainerDepth;
|
|
51
|
+
/** Metadata extracted from current document */
|
|
52
|
+
private currentDocMeta;
|
|
53
|
+
/** All document metadata collected during parsing */
|
|
54
|
+
private readonly documentMetas;
|
|
55
|
+
constructor(options: FrASTBuilderOptions);
|
|
56
|
+
/** Get metadata for all documents parsed so far */
|
|
57
|
+
getDocumentMetas(): readonly FrDocumentXmlMeta[];
|
|
58
|
+
/** Handle SAX open element */
|
|
59
|
+
onOpenElement(name: string, attrs: Attributes): void;
|
|
60
|
+
/** Handle SAX close element */
|
|
61
|
+
onCloseElement(name: string): void;
|
|
62
|
+
/** Handle SAX text content */
|
|
63
|
+
onText(text: string): void;
|
|
64
|
+
private openDocument;
|
|
65
|
+
private closeDocument;
|
|
66
|
+
private closePreambleMeta;
|
|
67
|
+
private openHeading;
|
|
68
|
+
private closeHeading;
|
|
69
|
+
private openContent;
|
|
70
|
+
private closeContent;
|
|
71
|
+
private openInline;
|
|
72
|
+
private closeInline;
|
|
73
|
+
private openNote;
|
|
74
|
+
private closeNote;
|
|
75
|
+
private openRegtext;
|
|
76
|
+
private closeRegtext;
|
|
77
|
+
private openSignature;
|
|
78
|
+
private closeSignature;
|
|
79
|
+
private openTableElement;
|
|
80
|
+
private closeTableElement;
|
|
81
|
+
private closeGpoTable;
|
|
82
|
+
private closeTableHeader;
|
|
83
|
+
private closeTableRow;
|
|
84
|
+
private closeTableCell;
|
|
85
|
+
private closeFrdoc;
|
|
86
|
+
private addToDocument;
|
|
87
|
+
private findParentDocument;
|
|
88
|
+
private findParentNote;
|
|
89
|
+
private findTableFrame;
|
|
90
|
+
private findFrame;
|
|
91
|
+
private popFrame;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Federal Register XML element classification.
|
|
96
|
+
*
|
|
97
|
+
* The FR XML is GPO/SGML-derived with no namespace. It shares many
|
|
98
|
+
* inline formatting elements with eCFR (E T="nn", SU, FTNT) but uses
|
|
99
|
+
* a flat document-centric structure rather than a hierarchical DIV system.
|
|
100
|
+
*
|
|
101
|
+
* Each FR document (RULE, PRORULE, NOTICE, PRESDOCU) contains a preamble
|
|
102
|
+
* (PREAMB) with structured metadata, supplementary information (SUPLINF)
|
|
103
|
+
* with the document body, and optional regulatory text (REGTEXT).
|
|
104
|
+
*/
|
|
105
|
+
|
|
106
|
+
/** FR document type element names as a const tuple — single source of truth */
|
|
107
|
+
declare const FR_DOCUMENT_TYPE_KEYS: readonly ["RULE", "PRORULE", "NOTICE", "PRESDOCU"];
|
|
108
|
+
/** FR document types supported by the API and XML */
|
|
109
|
+
type FrDocumentType = (typeof FR_DOCUMENT_TYPE_KEYS)[number];
|
|
110
|
+
/** Top-level document elements — each becomes an emitted section-level node */
|
|
111
|
+
declare const FR_DOCUMENT_ELEMENTS: Set<string>;
|
|
112
|
+
/** Container elements that group documents within daily issues */
|
|
113
|
+
declare const FR_SECTION_CONTAINERS: Set<string>;
|
|
114
|
+
/** Map from document element name to normalized document type string */
|
|
115
|
+
declare const FR_DOCUMENT_TYPE_MAP: Readonly<Record<string, string>>;
|
|
116
|
+
/** Preamble section elements containing structured content */
|
|
117
|
+
declare const FR_PREAMBLE_SECTIONS: Set<string>;
|
|
118
|
+
/** Preamble metadata elements — text extracted for frontmatter */
|
|
119
|
+
declare const FR_PREAMBLE_META_ELEMENTS: Set<string>;
|
|
120
|
+
/** Elements that contain paragraph text */
|
|
121
|
+
declare const FR_CONTENT_ELEMENTS: Set<string>;
|
|
122
|
+
/** Heading element — level determined by SOURCE attribute */
|
|
123
|
+
declare const FR_HEADING_ELEMENT = "HD";
|
|
124
|
+
/**
|
|
125
|
+
* Map from HD SOURCE attribute to heading depth.
|
|
126
|
+
* HED = top-level (section-like), HD1 = subsection, etc.
|
|
127
|
+
*/
|
|
128
|
+
declare const FR_HD_SOURCE_TO_DEPTH: Readonly<Record<string, number>>;
|
|
129
|
+
/** Inline formatting elements */
|
|
130
|
+
declare const FR_INLINE_ELEMENTS: Set<string>;
|
|
131
|
+
/**
|
|
132
|
+
* Map from E element T attribute to InlineType.
|
|
133
|
+
* Duplicated from eCFR — source packages must not import each other.
|
|
134
|
+
*/
|
|
135
|
+
declare const FR_EMPHASIS_MAP: Readonly<Record<string, InlineType>>;
|
|
136
|
+
/** Regulatory text amendment elements (within SUPLINF) */
|
|
137
|
+
declare const FR_REGTEXT_ELEMENTS: Set<string>;
|
|
138
|
+
/** Signature block elements */
|
|
139
|
+
declare const FR_SIGNATURE_ELEMENTS: Set<string>;
|
|
140
|
+
/** Presidential document subtype containers */
|
|
141
|
+
declare const FR_PRESIDENTIAL_SUBTYPES: Set<string>;
|
|
142
|
+
/** Footnote and editorial note elements */
|
|
143
|
+
declare const FR_NOTE_ELEMENTS: Set<string>;
|
|
144
|
+
/** Block-level content wrappers */
|
|
145
|
+
declare const FR_BLOCK_ELEMENTS: Set<string>;
|
|
146
|
+
/** GPOTABLE elements */
|
|
147
|
+
declare const FR_TABLE_ELEMENTS: Set<string>;
|
|
148
|
+
/** Elements whose entire subtree should be skipped */
|
|
149
|
+
declare const FR_IGNORE_ELEMENTS: Set<string>;
|
|
150
|
+
/** Self-contained elements to skip — metadata extracted elsewhere or irrelevant */
|
|
151
|
+
declare const FR_SKIP_ELEMENTS: Set<string>;
|
|
152
|
+
/** Transparent wrappers — pass through without creating frames */
|
|
153
|
+
declare const FR_PASSTHROUGH_ELEMENTS: Set<string>;
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Federal Register frontmatter builder.
|
|
157
|
+
*
|
|
158
|
+
* Constructs FrontmatterData from an emitted FR AST node, its context,
|
|
159
|
+
* and optional JSON metadata from the FederalRegister.gov API.
|
|
160
|
+
*/
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Metadata from the FederalRegister.gov API JSON response.
|
|
164
|
+
* Stored as a sidecar `.json` file alongside each `.xml` download.
|
|
165
|
+
*/
|
|
166
|
+
interface FrDocumentJsonMeta {
|
|
167
|
+
/** FR document number (e.g., "2026-06029") */
|
|
168
|
+
document_number: string;
|
|
169
|
+
/** Document type (Rule, Proposed Rule, Notice, Presidential Document) */
|
|
170
|
+
type: string;
|
|
171
|
+
/** Document title */
|
|
172
|
+
title: string;
|
|
173
|
+
/** Publication date (YYYY-MM-DD) */
|
|
174
|
+
publication_date: string;
|
|
175
|
+
/** Full FR citation (e.g., "91 FR 14523") */
|
|
176
|
+
citation: string;
|
|
177
|
+
/** FR volume number */
|
|
178
|
+
volume: number;
|
|
179
|
+
/** Start page number */
|
|
180
|
+
start_page: number;
|
|
181
|
+
/** End page number */
|
|
182
|
+
end_page: number;
|
|
183
|
+
/** Agencies with hierarchy info */
|
|
184
|
+
agencies: Array<{
|
|
185
|
+
name: string;
|
|
186
|
+
id: number;
|
|
187
|
+
slug: string;
|
|
188
|
+
parent_id?: number | null;
|
|
189
|
+
raw_name?: string;
|
|
190
|
+
}>;
|
|
191
|
+
/** CFR title/part references */
|
|
192
|
+
cfr_references: Array<{
|
|
193
|
+
title: number;
|
|
194
|
+
part: number;
|
|
195
|
+
}>;
|
|
196
|
+
/** Docket identifiers */
|
|
197
|
+
docket_ids: string[];
|
|
198
|
+
/** Regulation Identifier Numbers */
|
|
199
|
+
regulation_id_numbers: string[];
|
|
200
|
+
/** Effective date (YYYY-MM-DD) */
|
|
201
|
+
effective_on?: string | null;
|
|
202
|
+
/** Comment period end date (YYYY-MM-DD) */
|
|
203
|
+
comments_close_on?: string | null;
|
|
204
|
+
/** Action description (e.g., "Final rule.") */
|
|
205
|
+
action?: string | null;
|
|
206
|
+
/** Document abstract */
|
|
207
|
+
abstract?: string | null;
|
|
208
|
+
/** Whether the document is significant */
|
|
209
|
+
significant?: boolean | null;
|
|
210
|
+
/** Topics/keywords */
|
|
211
|
+
topics: string[];
|
|
212
|
+
/** URL to full text XML */
|
|
213
|
+
full_text_xml_url: string;
|
|
214
|
+
}
|
|
215
|
+
/**
|
|
216
|
+
* Build FrontmatterData from an FR document node with optional JSON metadata.
|
|
217
|
+
*
|
|
218
|
+
* If JSON metadata is available (from the API sidecar file), it enriches
|
|
219
|
+
* the frontmatter with structured agency, CFR reference, docket, and
|
|
220
|
+
* date information that isn't available in the XML alone.
|
|
221
|
+
*/
|
|
222
|
+
declare function buildFrFrontmatter(node: LevelNode, _context: EmitContext, xmlMeta: FrDocumentXmlMeta, jsonMeta?: FrDocumentJsonMeta): FrontmatterData;
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* Output path builder for Federal Register directory structure.
|
|
226
|
+
*
|
|
227
|
+
* FR path structure:
|
|
228
|
+
* output/fr/{YYYY}/{MM}/{document_number}.md
|
|
229
|
+
*
|
|
230
|
+
* Downloads path structure:
|
|
231
|
+
* downloads/fr/{YYYY}/{MM}/{document_number}.xml
|
|
232
|
+
* downloads/fr/{YYYY}/{MM}/{document_number}.json
|
|
233
|
+
*/
|
|
234
|
+
/**
|
|
235
|
+
* Build the output file path for an FR document.
|
|
236
|
+
*
|
|
237
|
+
* @param documentNumber - FR document number (e.g., "2026-06029")
|
|
238
|
+
* @param publicationDate - Publication date in YYYY-MM-DD format
|
|
239
|
+
* @param outputRoot - Output root directory (e.g., "./output")
|
|
240
|
+
* @returns Full output file path (e.g., "output/fr/2026/03/2026-06029.md")
|
|
241
|
+
*/
|
|
242
|
+
declare function buildFrOutputPath(documentNumber: string, publicationDate: string, outputRoot: string): string;
|
|
243
|
+
/**
|
|
244
|
+
* Build the download file path for an FR document XML.
|
|
245
|
+
*
|
|
246
|
+
* @param documentNumber - FR document number
|
|
247
|
+
* @param publicationDate - Publication date in YYYY-MM-DD format
|
|
248
|
+
* @param downloadRoot - Download root directory (e.g., "./downloads/fr")
|
|
249
|
+
* @returns Full download file path (e.g., "downloads/fr/2026/03/2026-06029.xml")
|
|
250
|
+
*/
|
|
251
|
+
declare function buildFrDownloadXmlPath(documentNumber: string, publicationDate: string, downloadRoot: string): string;
|
|
252
|
+
/**
|
|
253
|
+
* Build the download file path for an FR document JSON metadata.
|
|
254
|
+
*
|
|
255
|
+
* @param documentNumber - FR document number
|
|
256
|
+
* @param publicationDate - Publication date in YYYY-MM-DD format
|
|
257
|
+
* @param downloadRoot - Download root directory
|
|
258
|
+
* @returns Full download file path (e.g., "downloads/fr/2026/03/2026-06029.json")
|
|
259
|
+
*/
|
|
260
|
+
declare function buildFrDownloadJsonPath(documentNumber: string, publicationDate: string, downloadRoot: string): string;
|
|
261
|
+
/**
|
|
262
|
+
* Build the directory path for a year/month within the FR output structure.
|
|
263
|
+
*/
|
|
264
|
+
declare function buildMonthDir(year: string, month: string, outputRoot: string): string;
|
|
265
|
+
/**
|
|
266
|
+
* Build the directory path for a year.
|
|
267
|
+
*/
|
|
268
|
+
declare function buildYearDir(year: string, outputRoot: string): string;
|
|
269
|
+
|
|
270
|
+
/**
|
|
271
|
+
* Federal Register conversion orchestrator.
|
|
272
|
+
*
|
|
273
|
+
* Discovers downloaded FR XML files, parses them with FrASTBuilder,
|
|
274
|
+
* enriches frontmatter with JSON sidecar metadata, renders via core's
|
|
275
|
+
* renderDocument, and writes structured Markdown output.
|
|
276
|
+
*
|
|
277
|
+
* Processes FR documents in two passes: (1) parse all files and register
|
|
278
|
+
* identifiers for link resolution, (2) render and write output files.
|
|
279
|
+
*/
|
|
280
|
+
|
|
281
|
+
/** Options for converting FR documents */
|
|
282
|
+
interface FrConvertOptions {
|
|
283
|
+
/** Path to input file or directory containing .xml/.json files */
|
|
284
|
+
input: string;
|
|
285
|
+
/** Output root directory */
|
|
286
|
+
output: string;
|
|
287
|
+
/** Link style for cross-references */
|
|
288
|
+
linkStyle: "relative" | "canonical" | "plaintext";
|
|
289
|
+
/** Parse only, don't write files */
|
|
290
|
+
dryRun: boolean;
|
|
291
|
+
/** Filter: start date (YYYY-MM-DD) */
|
|
292
|
+
from?: string | undefined;
|
|
293
|
+
/** Filter: end date (YYYY-MM-DD) */
|
|
294
|
+
to?: string | undefined;
|
|
295
|
+
/** Filter: document types */
|
|
296
|
+
types?: FrDocumentType[] | undefined;
|
|
297
|
+
}
|
|
298
|
+
/** Result of a conversion operation */
|
|
299
|
+
interface FrConvertResult {
|
|
300
|
+
/** Number of documents converted */
|
|
301
|
+
documentsConverted: number;
|
|
302
|
+
/** Paths of written files */
|
|
303
|
+
files: string[];
|
|
304
|
+
/** Total estimated tokens */
|
|
305
|
+
totalTokenEstimate: number;
|
|
306
|
+
/** Peak RSS in bytes */
|
|
307
|
+
peakMemoryBytes: number;
|
|
308
|
+
/** Whether this was a dry run */
|
|
309
|
+
dryRun: boolean;
|
|
310
|
+
}
|
|
311
|
+
/**
|
|
312
|
+
* Convert FR XML documents to Markdown.
|
|
313
|
+
*
|
|
314
|
+
* Supports both single-file mode (input is a .xml path) and batch mode
|
|
315
|
+
* (input is a directory containing year/month/doc.xml structure).
|
|
316
|
+
*/
|
|
317
|
+
declare function convertFrDocuments(options: FrConvertOptions): Promise<FrConvertResult>;
|
|
318
|
+
|
|
319
|
+
/**
|
|
320
|
+
* Federal Register API downloader.
|
|
321
|
+
*
|
|
322
|
+
* Downloads FR documents (XML + JSON metadata) from the FederalRegister.gov API.
|
|
323
|
+
* The API provides per-document endpoints, rich JSON metadata, and requires no
|
|
324
|
+
* authentication. Results are paginated (max 200/page) with a 10,000 result cap
|
|
325
|
+
* per query — the downloader auto-chunks by month for large date ranges.
|
|
326
|
+
*
|
|
327
|
+
* API base: https://www.federalregister.gov/api/v1/
|
|
328
|
+
*/
|
|
329
|
+
|
|
330
|
+
/** Options for downloading FR documents */
|
|
331
|
+
interface FrDownloadOptions {
|
|
332
|
+
/** Download directory (e.g., "./downloads/fr") */
|
|
333
|
+
output: string;
|
|
334
|
+
/** Start date (YYYY-MM-DD, inclusive) */
|
|
335
|
+
from: string;
|
|
336
|
+
/** End date (YYYY-MM-DD, inclusive). Defaults to today. */
|
|
337
|
+
to?: string | undefined;
|
|
338
|
+
/** Document types to download. All types if omitted. */
|
|
339
|
+
types?: FrDocumentType[] | undefined;
|
|
340
|
+
/** Maximum number of documents to download (for testing) */
|
|
341
|
+
limit?: number | undefined;
|
|
342
|
+
/** Delay between XML fetches in milliseconds */
|
|
343
|
+
fetchDelayMs?: number | undefined;
|
|
344
|
+
/** Progress callback */
|
|
345
|
+
onProgress?: ((progress: FrDownloadProgress) => void) | undefined;
|
|
346
|
+
}
|
|
347
|
+
/** Progress info for download callback */
|
|
348
|
+
interface FrDownloadProgress {
|
|
349
|
+
/** Documents downloaded so far */
|
|
350
|
+
documentsDownloaded: number;
|
|
351
|
+
/** Total documents found across all pages */
|
|
352
|
+
totalDocuments: number;
|
|
353
|
+
/** Current document number being downloaded */
|
|
354
|
+
currentDocument: string;
|
|
355
|
+
/** Current date chunk being processed (YYYY-MM) */
|
|
356
|
+
currentChunk: string;
|
|
357
|
+
}
|
|
358
|
+
/** A successfully downloaded FR document */
|
|
359
|
+
interface FrDownloadedFile {
|
|
360
|
+
/** Absolute path to the XML file */
|
|
361
|
+
xmlPath: string;
|
|
362
|
+
/** Absolute path to the JSON metadata file */
|
|
363
|
+
jsonPath: string;
|
|
364
|
+
/** Document number */
|
|
365
|
+
documentNumber: string;
|
|
366
|
+
/** Publication date */
|
|
367
|
+
publicationDate: string;
|
|
368
|
+
/** Combined size in bytes (XML + JSON) */
|
|
369
|
+
size: number;
|
|
370
|
+
}
|
|
371
|
+
/** A failed download */
|
|
372
|
+
interface FrDownloadFailure {
|
|
373
|
+
/** Document number */
|
|
374
|
+
documentNumber: string;
|
|
375
|
+
/** Error message */
|
|
376
|
+
error: string;
|
|
377
|
+
}
|
|
378
|
+
/** Result of a download operation */
|
|
379
|
+
interface FrDownloadResult {
|
|
380
|
+
/** Number of documents downloaded */
|
|
381
|
+
documentsDownloaded: number;
|
|
382
|
+
/** Paths of downloaded files */
|
|
383
|
+
files: FrDownloadedFile[];
|
|
384
|
+
/** Total bytes downloaded */
|
|
385
|
+
totalBytes: number;
|
|
386
|
+
/** Date range covered */
|
|
387
|
+
dateRange: {
|
|
388
|
+
from: string;
|
|
389
|
+
to: string;
|
|
390
|
+
};
|
|
391
|
+
/** Documents without XML (pre-2000) */
|
|
392
|
+
skipped: number;
|
|
393
|
+
/** Documents that failed to download */
|
|
394
|
+
failed: FrDownloadFailure[];
|
|
395
|
+
}
|
|
396
|
+
/**
|
|
397
|
+
* Build the API documents listing URL for a date range.
|
|
398
|
+
*/
|
|
399
|
+
declare function buildFrApiListUrl(from: string, to: string, page: number, types?: FrDocumentType[]): string;
|
|
400
|
+
/**
|
|
401
|
+
* Download FR documents for a date range.
|
|
402
|
+
*
|
|
403
|
+
* Automatically chunks large date ranges into month-sized windows to stay
|
|
404
|
+
* under the API's 10,000 result cap per query.
|
|
405
|
+
*/
|
|
406
|
+
declare function downloadFrDocuments(options: FrDownloadOptions): Promise<FrDownloadResult>;
|
|
407
|
+
/**
|
|
408
|
+
* Download a single FR document by document number.
|
|
409
|
+
*
|
|
410
|
+
* Fetches both the JSON metadata and XML full text.
|
|
411
|
+
*/
|
|
412
|
+
declare function downloadSingleFrDocument(documentNumber: string, output: string): Promise<FrDownloadedFile>;
|
|
413
|
+
|
|
414
|
+
export { FR_BLOCK_ELEMENTS, FR_CONTENT_ELEMENTS, FR_DOCUMENT_ELEMENTS, FR_DOCUMENT_TYPE_KEYS, FR_DOCUMENT_TYPE_MAP, FR_EMPHASIS_MAP, FR_HD_SOURCE_TO_DEPTH, FR_HEADING_ELEMENT, FR_IGNORE_ELEMENTS, FR_INLINE_ELEMENTS, FR_NOTE_ELEMENTS, FR_PASSTHROUGH_ELEMENTS, FR_PREAMBLE_META_ELEMENTS, FR_PREAMBLE_SECTIONS, FR_PRESIDENTIAL_SUBTYPES, FR_REGTEXT_ELEMENTS, FR_SECTION_CONTAINERS, FR_SIGNATURE_ELEMENTS, FR_SKIP_ELEMENTS, FR_TABLE_ELEMENTS, FrASTBuilder, type FrASTBuilderOptions, type FrConvertOptions, type FrConvertResult, type FrDocumentJsonMeta, type FrDocumentType, type FrDocumentXmlMeta, type FrDownloadFailure, type FrDownloadOptions, type FrDownloadProgress, type FrDownloadResult, type FrDownloadedFile, buildFrApiListUrl, buildFrDownloadJsonPath, buildFrDownloadXmlPath, buildFrFrontmatter, buildFrOutputPath, buildMonthDir, buildYearDir, convertFrDocuments, downloadFrDocuments, downloadSingleFrDocument };
|