@ansvar/us-regulations-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +190 -0
- package/README.md +275 -0
- package/data/.gitkeep +0 -0
- package/data/regulations.db +0 -0
- package/data/seed/applicability/rules.json +74 -0
- package/data/seed/mappings/ccpa-nist-csf.json +144 -0
- package/data/seed/mappings/hipaa-nist-800-53.json +377 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +41 -0
- package/dist/index.js.map +1 -0
- package/dist/ingest/adapters/california-leginfo.d.ts +72 -0
- package/dist/ingest/adapters/california-leginfo.d.ts.map +1 -0
- package/dist/ingest/adapters/california-leginfo.js +270 -0
- package/dist/ingest/adapters/california-leginfo.js.map +1 -0
- package/dist/ingest/adapters/ecfr.d.ts +76 -0
- package/dist/ingest/adapters/ecfr.d.ts.map +1 -0
- package/dist/ingest/adapters/ecfr.js +355 -0
- package/dist/ingest/adapters/ecfr.js.map +1 -0
- package/dist/ingest/adapters/regulations-gov.d.ts +47 -0
- package/dist/ingest/adapters/regulations-gov.d.ts.map +1 -0
- package/dist/ingest/adapters/regulations-gov.js +91 -0
- package/dist/ingest/adapters/regulations-gov.js.map +1 -0
- package/dist/ingest/framework.d.ts +84 -0
- package/dist/ingest/framework.d.ts.map +1 -0
- package/dist/ingest/framework.js +8 -0
- package/dist/ingest/framework.js.map +1 -0
- package/dist/tools/action-items.d.ts +23 -0
- package/dist/tools/action-items.d.ts.map +1 -0
- package/dist/tools/action-items.js +118 -0
- package/dist/tools/action-items.js.map +1 -0
- package/dist/tools/applicability.d.ts +26 -0
- package/dist/tools/applicability.d.ts.map +1 -0
- package/dist/tools/applicability.js +49 -0
- package/dist/tools/applicability.js.map +1 -0
- package/dist/tools/compare.d.ts +20 -0
- package/dist/tools/compare.d.ts.map +1 -0
- package/dist/tools/compare.js +35 -0
- package/dist/tools/compare.js.map +1 -0
- package/dist/tools/definitions.d.ts +22 -0
- package/dist/tools/definitions.d.ts.map +1 -0
- package/dist/tools/definitions.js +43 -0
- package/dist/tools/definitions.js.map +1 -0
- package/dist/tools/evidence.d.ts +23 -0
- package/dist/tools/evidence.d.ts.map +1 -0
- package/dist/tools/evidence.js +27 -0
- package/dist/tools/evidence.js.map +1 -0
- package/dist/tools/list.d.ts +25 -0
- package/dist/tools/list.d.ts.map +1 -0
- package/dist/tools/list.js +66 -0
- package/dist/tools/list.js.map +1 -0
- package/dist/tools/map.d.ts +26 -0
- package/dist/tools/map.d.ts.map +1 -0
- package/dist/tools/map.js +58 -0
- package/dist/tools/map.js.map +1 -0
- package/dist/tools/registry.d.ts +19 -0
- package/dist/tools/registry.d.ts.map +1 -0
- package/dist/tools/registry.js +260 -0
- package/dist/tools/registry.js.map +1 -0
- package/dist/tools/search.d.ts +15 -0
- package/dist/tools/search.d.ts.map +1 -0
- package/dist/tools/search.js +94 -0
- package/dist/tools/search.js.map +1 -0
- package/dist/tools/section.d.ts +19 -0
- package/dist/tools/section.d.ts.map +1 -0
- package/dist/tools/section.js +50 -0
- package/dist/tools/section.js.map +1 -0
- package/package.json +76 -0
- package/scripts/build-db.ts +268 -0
- package/scripts/ingest.ts +214 -0
- package/scripts/load-seed-data.ts +133 -0
- package/scripts/quality-test.ts +346 -0
- package/scripts/test-mcp-tools.ts +187 -0
- package/scripts/test-remaining-tools.ts +107 -0
- package/src/index.ts +55 -0
- package/src/ingest/adapters/california-leginfo.ts +322 -0
- package/src/ingest/adapters/ecfr.ts +403 -0
- package/src/ingest/adapters/regulations-gov.ts +112 -0
- package/src/ingest/framework.ts +92 -0
- package/src/tools/action-items.ts +164 -0
- package/src/tools/applicability.ts +91 -0
- package/src/tools/compare.ts +61 -0
- package/src/tools/definitions.ts +79 -0
- package/src/tools/evidence.ts +53 -0
- package/src/tools/list.ts +120 -0
- package/src/tools/map.ts +100 -0
- package/src/tools/registry.ts +275 -0
- package/src/tools/search.ts +132 -0
- package/src/tools/section.ts +85 -0
|
@@ -0,0 +1,403 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* eCFR Adapter (Electronic Code of Federal Regulations)
|
|
3
|
+
*
|
|
4
|
+
* Fetches HIPAA regulations from ecfr.gov API.
|
|
5
|
+
* Source: 45 CFR Parts 160, 162, 164 (Privacy Rule, Security Rule, Breach Notification)
|
|
6
|
+
*
|
|
7
|
+
* PRODUCTION IMPLEMENTATION
|
|
8
|
+
* Uses the eCFR API: https://www.ecfr.gov/developers/documentation/api/v1
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import {
|
|
12
|
+
SourceAdapter,
|
|
13
|
+
RegulationMetadata,
|
|
14
|
+
Section,
|
|
15
|
+
Definition,
|
|
16
|
+
UpdateStatus,
|
|
17
|
+
} from '../framework.js';
|
|
18
|
+
import { XMLParser } from 'fast-xml-parser';
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Adapter for fetching HIPAA from eCFR API
|
|
22
|
+
*/
|
|
23
|
+
export class EcfrAdapter implements SourceAdapter {
|
|
24
|
+
private readonly regulationId: string;
|
|
25
|
+
private readonly cfr_title: number;
|
|
26
|
+
private readonly cfr_parts: number[];
|
|
27
|
+
|
|
28
|
+
constructor(regulationId: string, cfr_title: number, cfr_parts: number[]) {
|
|
29
|
+
this.regulationId = regulationId;
|
|
30
|
+
this.cfr_title = cfr_title;
|
|
31
|
+
this.cfr_parts = cfr_parts;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Fetch HIPAA metadata
|
|
36
|
+
*
|
|
37
|
+
* PLACEHOLDER: Returns hardcoded HIPAA metadata
|
|
38
|
+
* TODO: Integrate with eCFR API to fetch live metadata
|
|
39
|
+
*/
|
|
40
|
+
async fetchMetadata(): Promise<RegulationMetadata> {
|
|
41
|
+
// Placeholder metadata for HIPAA
|
|
42
|
+
return {
|
|
43
|
+
id: this.regulationId,
|
|
44
|
+
full_name: 'Health Insurance Portability and Accountability Act',
|
|
45
|
+
citation: '45 CFR Parts 160, 162, 164',
|
|
46
|
+
effective_date: '2003-04-14',
|
|
47
|
+
last_amended: '2013-01-25',
|
|
48
|
+
source_url: 'https://www.ecfr.gov/current/title-45',
|
|
49
|
+
jurisdiction: 'federal',
|
|
50
|
+
regulation_type: 'rule',
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Fetch all HIPAA sections
|
|
56
|
+
*
|
|
57
|
+
* Fetches XML from eCFR API and parses sections with hierarchical structure
|
|
58
|
+
* API endpoint: https://www.ecfr.gov/api/versioner/v1/full/{date}/title-{title}.xml
|
|
59
|
+
*/
|
|
60
|
+
async *fetchSections(): AsyncGenerator<Section[]> {
|
|
61
|
+
// Get the latest available date for this title
|
|
62
|
+
const date = await this.getLatestDate();
|
|
63
|
+
const url = `https://www.ecfr.gov/api/versioner/v1/full/${date}/title-${this.cfr_title}.xml`;
|
|
64
|
+
|
|
65
|
+
console.log(`Fetching eCFR Title ${this.cfr_title} from ${url}...`);
|
|
66
|
+
|
|
67
|
+
// Fetch XML with retry logic
|
|
68
|
+
const response = await this.fetchWithRetry(url);
|
|
69
|
+
const xmlText = await response.text();
|
|
70
|
+
|
|
71
|
+
// Parse XML
|
|
72
|
+
const parser = new XMLParser({
|
|
73
|
+
ignoreAttributes: false,
|
|
74
|
+
attributeNamePrefix: '@_',
|
|
75
|
+
textNodeName: '#text',
|
|
76
|
+
preserveOrder: false,
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
const xmlDoc = parser.parse(xmlText);
|
|
80
|
+
|
|
81
|
+
// Extract sections from relevant parts
|
|
82
|
+
const sections: Section[] = [];
|
|
83
|
+
|
|
84
|
+
// Navigate XML structure: ECFR → DIV1 (Title) → DIV5 (Part) → DIV8 (Section)
|
|
85
|
+
// The structure is: DIV1 (title) → DIV2 (subtitle) → DIV4 (subchapter) → DIV5 (part) → DIV8 (section)
|
|
86
|
+
const title = xmlDoc.ECFR?.DIV1;
|
|
87
|
+
if (!title) {
|
|
88
|
+
console.warn(`No title found in eCFR XML for Title ${this.cfr_title}`);
|
|
89
|
+
return;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Find all DIV5 (parts) recursively
|
|
93
|
+
const findParts = (node: any): any[] => {
|
|
94
|
+
if (!node || typeof node !== 'object') return [];
|
|
95
|
+
|
|
96
|
+
const parts: any[] = [];
|
|
97
|
+
|
|
98
|
+
if (node.DIV5) {
|
|
99
|
+
const div5s = Array.isArray(node.DIV5) ? node.DIV5 : [node.DIV5];
|
|
100
|
+
parts.push(...div5s.filter(Boolean));
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Recurse into child DIVs
|
|
104
|
+
for (const key of Object.keys(node)) {
|
|
105
|
+
if (key.startsWith('DIV') && key !== 'DIV5' && typeof node[key] === 'object') {
|
|
106
|
+
const childNodes = Array.isArray(node[key]) ? node[key] : [node[key]];
|
|
107
|
+
for (const child of childNodes) {
|
|
108
|
+
parts.push(...findParts(child));
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
return parts;
|
|
114
|
+
};
|
|
115
|
+
|
|
116
|
+
const parts = findParts(title);
|
|
117
|
+
console.log(` Found ${parts.length} parts in Title ${this.cfr_title}`);
|
|
118
|
+
|
|
119
|
+
for (const part of parts) {
|
|
120
|
+
const partNum = this.extractNumber(part['@_N']);
|
|
121
|
+
|
|
122
|
+
// Only process our target parts
|
|
123
|
+
if (!this.cfr_parts.includes(partNum)) {
|
|
124
|
+
continue;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
console.log(` Processing Part ${partNum}...`);
|
|
128
|
+
|
|
129
|
+
// Sections are in DIV6 (subparts) → DIV8 (sections)
|
|
130
|
+
const findSections = (node: any): any[] => {
|
|
131
|
+
if (!node || typeof node !== 'object') return [];
|
|
132
|
+
|
|
133
|
+
const secs: any[] = [];
|
|
134
|
+
|
|
135
|
+
// If this node has DIV8, collect them
|
|
136
|
+
if (node.DIV8) {
|
|
137
|
+
const div8s = Array.isArray(node.DIV8) ? node.DIV8 : [node.DIV8];
|
|
138
|
+
secs.push(...div8s.filter(Boolean));
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// Recurse into DIV6 (subparts) and other child DIVs
|
|
142
|
+
for (const key of Object.keys(node)) {
|
|
143
|
+
if (key.startsWith('DIV') && key !== 'DIV8' && typeof node[key] === 'object') {
|
|
144
|
+
const childNodes = Array.isArray(node[key]) ? node[key] : [node[key]];
|
|
145
|
+
for (const child of childNodes) {
|
|
146
|
+
secs.push(...findSections(child));
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
return secs;
|
|
152
|
+
};
|
|
153
|
+
|
|
154
|
+
const sectionDivs = findSections(part);
|
|
155
|
+
console.log(` Found ${sectionDivs.length} sections in Part ${partNum}`);
|
|
156
|
+
|
|
157
|
+
for (const sectionDiv of sectionDivs) {
|
|
158
|
+
const section = this.parseSection(sectionDiv, partNum, '');
|
|
159
|
+
if (section) {
|
|
160
|
+
sections.push(section);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// Yield in batches of 50
|
|
165
|
+
if (sections.length >= 50) {
|
|
166
|
+
yield sections.splice(0, 50);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// Yield remaining sections
|
|
171
|
+
if (sections.length > 0) {
|
|
172
|
+
yield sections;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
/**
|
|
177
|
+
* Parse a section from XML DIV8 element
|
|
178
|
+
*/
|
|
179
|
+
private parseSection(div: any, partNum: number, subpartId: string): Section | null {
|
|
180
|
+
const sectionNum = div['@_N'];
|
|
181
|
+
if (!sectionNum) return null;
|
|
182
|
+
|
|
183
|
+
// Extract section text
|
|
184
|
+
const text = this.extractText(div);
|
|
185
|
+
if (!text || text.length < 10) return null;
|
|
186
|
+
|
|
187
|
+
// Extract title (HEAD)
|
|
188
|
+
const title = div.HEAD ? this.extractText(div.HEAD) : undefined;
|
|
189
|
+
|
|
190
|
+
// Build section number - eCFR @_N already includes part number (e.g., "164.308")
|
|
191
|
+
// Don't prepend if section already starts with part number
|
|
192
|
+
const fullSectionNum = sectionNum.startsWith(`${partNum}.`)
|
|
193
|
+
? sectionNum
|
|
194
|
+
: `${partNum}.${sectionNum}`;
|
|
195
|
+
|
|
196
|
+
// Extract cross-references (CITA tags)
|
|
197
|
+
const crossReferences = this.extractCrossReferences(div);
|
|
198
|
+
|
|
199
|
+
return {
|
|
200
|
+
sectionNumber: fullSectionNum,
|
|
201
|
+
title,
|
|
202
|
+
text,
|
|
203
|
+
chapter: `Part ${partNum}, Subpart ${subpartId}`,
|
|
204
|
+
parentSection: undefined, // TODO: implement parent detection for nested sections
|
|
205
|
+
crossReferences: crossReferences.length > 0 ? crossReferences : undefined,
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
/**
|
|
210
|
+
* Extract text content from XML element recursively
|
|
211
|
+
*/
|
|
212
|
+
private extractText(element: any): string {
|
|
213
|
+
if (typeof element === 'string') {
|
|
214
|
+
return element.trim();
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
if (typeof element === 'object') {
|
|
218
|
+
if (element['#text']) {
|
|
219
|
+
const text = element['#text'];
|
|
220
|
+
return typeof text === 'string' ? text.trim() : String(text).trim();
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// Recursively extract from child elements
|
|
224
|
+
const texts: string[] = [];
|
|
225
|
+
for (const key of Object.keys(element)) {
|
|
226
|
+
if (key.startsWith('@_')) continue; // Skip attributes
|
|
227
|
+
if (key === 'HEAD') continue; // Skip title
|
|
228
|
+
|
|
229
|
+
const child = element[key];
|
|
230
|
+
if (Array.isArray(child)) {
|
|
231
|
+
for (const item of child) {
|
|
232
|
+
const text = this.extractText(item);
|
|
233
|
+
if (text) texts.push(text);
|
|
234
|
+
}
|
|
235
|
+
} else {
|
|
236
|
+
const text = this.extractText(child);
|
|
237
|
+
if (text) texts.push(text);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
return texts.join(' ').trim();
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
return '';
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* Extract cross-references from CITA tags
|
|
248
|
+
*/
|
|
249
|
+
private extractCrossReferences(element: any): string[] {
|
|
250
|
+
const refs: string[] = [];
|
|
251
|
+
|
|
252
|
+
const findCITA = (obj: any) => {
|
|
253
|
+
if (typeof obj !== 'object') return;
|
|
254
|
+
|
|
255
|
+
for (const key of Object.keys(obj)) {
|
|
256
|
+
if (key === 'CITA') {
|
|
257
|
+
const cita = obj[key];
|
|
258
|
+
if (Array.isArray(cita)) {
|
|
259
|
+
refs.push(...cita.map(c => this.extractText(c)).filter(Boolean));
|
|
260
|
+
} else {
|
|
261
|
+
const ref = this.extractText(cita);
|
|
262
|
+
if (ref) refs.push(ref);
|
|
263
|
+
}
|
|
264
|
+
} else if (typeof obj[key] === 'object') {
|
|
265
|
+
findCITA(obj[key]);
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
};
|
|
269
|
+
|
|
270
|
+
findCITA(element);
|
|
271
|
+
return refs;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
/**
|
|
275
|
+
* Extract numeric part from section number
|
|
276
|
+
*/
|
|
277
|
+
private extractNumber(str: string | undefined): number {
|
|
278
|
+
if (!str) return 0;
|
|
279
|
+
const match = str.match(/\d+/);
|
|
280
|
+
return match ? parseInt(match[0]) : 0;
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
/**
|
|
284
|
+
* Get the latest available date for this title from eCFR titles API
|
|
285
|
+
*/
|
|
286
|
+
private async getLatestDate(): Promise<string> {
|
|
287
|
+
try {
|
|
288
|
+
const response = await fetch('https://www.ecfr.gov/api/versioner/v1/titles');
|
|
289
|
+
const data = await response.json();
|
|
290
|
+
|
|
291
|
+
const titleInfo = data.titles.find((t: any) => t.number === this.cfr_title);
|
|
292
|
+
if (titleInfo && titleInfo.latest_issue_date) {
|
|
293
|
+
return titleInfo.latest_issue_date;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
// Fallback to current date
|
|
297
|
+
return new Date().toISOString().split('T')[0];
|
|
298
|
+
} catch (error) {
|
|
299
|
+
console.warn('Failed to fetch latest date, using current date:', error);
|
|
300
|
+
return new Date().toISOString().split('T')[0];
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
/**
|
|
305
|
+
* Fetch with retry logic and exponential backoff
|
|
306
|
+
*/
|
|
307
|
+
private async fetchWithRetry(url: string, maxRetries = 3): Promise<Response> {
|
|
308
|
+
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
309
|
+
try {
|
|
310
|
+
const response = await fetch(url);
|
|
311
|
+
|
|
312
|
+
if (response.status === 429) {
|
|
313
|
+
// Rate limited - backoff
|
|
314
|
+
const delay = Math.min(1000 * 2 ** attempt + Math.random() * 1000, 30000);
|
|
315
|
+
console.warn(`Rate limited, retrying in ${delay}ms...`);
|
|
316
|
+
await new Promise(resolve => setTimeout(resolve, delay));
|
|
317
|
+
continue;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
if (!response.ok) {
|
|
321
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
return response;
|
|
325
|
+
} catch (error) {
|
|
326
|
+
if (attempt === maxRetries - 1) throw error;
|
|
327
|
+
const delay = 1000 * 2 ** attempt;
|
|
328
|
+
console.warn(`Fetch failed, retrying in ${delay}ms...`, error);
|
|
329
|
+
await new Promise(resolve => setTimeout(resolve, delay));
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
throw new Error('Max retries exceeded');
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
/**
|
|
337
|
+
* Check for updates since last fetch
|
|
338
|
+
*
|
|
339
|
+
* Queries eCFR API for revision dates and compares with lastFetched
|
|
340
|
+
* eCFR updates daily from Federal Register
|
|
341
|
+
*/
|
|
342
|
+
async checkForUpdates(lastFetched: Date): Promise<UpdateStatus> {
|
|
343
|
+
try {
|
|
344
|
+
// eCFR provides last-modified in HTTP headers
|
|
345
|
+
const date = new Date().toISOString().split('T')[0];
|
|
346
|
+
const url = `https://www.ecfr.gov/api/versioner/v1/full/${date}/title-${this.cfr_title}.xml`;
|
|
347
|
+
|
|
348
|
+
const response = await fetch(url, { method: 'HEAD' });
|
|
349
|
+
const lastModifiedHeader = response.headers.get('last-modified');
|
|
350
|
+
|
|
351
|
+
if (!lastModifiedHeader) {
|
|
352
|
+
console.warn('No last-modified header from eCFR');
|
|
353
|
+
return {
|
|
354
|
+
hasChanges: false,
|
|
355
|
+
lastModified: new Date(),
|
|
356
|
+
changes: [],
|
|
357
|
+
};
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
const lastModified = new Date(lastModifiedHeader);
|
|
361
|
+
|
|
362
|
+
return {
|
|
363
|
+
hasChanges: lastModified > lastFetched,
|
|
364
|
+
lastModified,
|
|
365
|
+
changes: lastModified > lastFetched
|
|
366
|
+
? [`Title ${this.cfr_title} updated on ${lastModified.toISOString()}`]
|
|
367
|
+
: [],
|
|
368
|
+
};
|
|
369
|
+
} catch (error) {
|
|
370
|
+
console.error('Error checking for updates:', error);
|
|
371
|
+
return {
|
|
372
|
+
hasChanges: false,
|
|
373
|
+
lastModified: new Date(),
|
|
374
|
+
changes: [],
|
|
375
|
+
};
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
/**
|
|
380
|
+
* Extract definitions from HIPAA sections
|
|
381
|
+
*
|
|
382
|
+
* Future enhancement: Parse definition sections (e.g., 45 CFR 160.103, 164.103)
|
|
383
|
+
* For now, returns empty array - definitions can be added manually if needed
|
|
384
|
+
*/
|
|
385
|
+
async extractDefinitions(): Promise<Definition[]> {
|
|
386
|
+
// TODO: Implement definition extraction
|
|
387
|
+
// HIPAA definitions are in:
|
|
388
|
+
// - 45 CFR 160.103 (General definitions)
|
|
389
|
+
// - 45 CFR 164.103 (Security Rule definitions)
|
|
390
|
+
// - 45 CFR 164.501 (Privacy Rule definitions)
|
|
391
|
+
//
|
|
392
|
+
// These sections have structured format: "Term means definition."
|
|
393
|
+
// Would need regex parsing or NLP to extract accurately
|
|
394
|
+
return [];
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
/**
|
|
399
|
+
* Factory function to create HIPAA adapter
|
|
400
|
+
*/
|
|
401
|
+
export function createHipaaAdapter(): EcfrAdapter {
|
|
402
|
+
return new EcfrAdapter('HIPAA', 45, [160, 162, 164]);
|
|
403
|
+
}
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SOX Adapter
|
|
3
|
+
*
|
|
4
|
+
* Fetches SOX regulations from eCFR (for SEC implementing rules).
|
|
5
|
+
* Source: 17 CFR Part 229 (Regulation S-K, Item 308) and Part 240 (Exchange Act Rules)
|
|
6
|
+
*
|
|
7
|
+
* PRODUCTION IMPLEMENTATION
|
|
8
|
+
* Uses eCFR API for SEC regulations implementing Sarbanes-Oxley Section 404
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import {
|
|
12
|
+
SourceAdapter,
|
|
13
|
+
RegulationMetadata,
|
|
14
|
+
Section,
|
|
15
|
+
Definition,
|
|
16
|
+
UpdateStatus,
|
|
17
|
+
} from '../framework.js';
|
|
18
|
+
import { XMLParser } from 'fast-xml-parser';
|
|
19
|
+
import { EcfrAdapter } from './ecfr.js';
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Adapter for fetching SOX regulations from eCFR
|
|
23
|
+
*
|
|
24
|
+
* Uses eCFR API for SEC regulations implementing Sarbanes-Oxley
|
|
25
|
+
*/
|
|
26
|
+
export class SoxAdapter implements SourceAdapter {
|
|
27
|
+
private readonly regulationId: string;
|
|
28
|
+
private readonly ecfrAdapter: EcfrAdapter;
|
|
29
|
+
|
|
30
|
+
constructor(regulationId: string) {
|
|
31
|
+
this.regulationId = regulationId;
|
|
32
|
+
// Use eCFR adapter for Title 17 (SEC regulations)
|
|
33
|
+
this.ecfrAdapter = new EcfrAdapter('SOX-SEC', 17, [229, 240]);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Fetch SOX metadata
|
|
38
|
+
*/
|
|
39
|
+
async fetchMetadata(): Promise<RegulationMetadata> {
|
|
40
|
+
return {
|
|
41
|
+
id: this.regulationId,
|
|
42
|
+
full_name: 'Sarbanes-Oxley Act - SEC Implementing Regulations',
|
|
43
|
+
citation: '17 CFR Parts 229, 240 (Regulation S-K Item 308, Exchange Act Rules)',
|
|
44
|
+
effective_date: '2003-06-05',
|
|
45
|
+
last_amended: new Date().toISOString().split('T')[0],
|
|
46
|
+
source_url: 'https://www.ecfr.gov/current/title-17',
|
|
47
|
+
jurisdiction: 'federal',
|
|
48
|
+
regulation_type: 'rule',
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Fetch all SOX-related sections from eCFR
|
|
54
|
+
*
|
|
55
|
+
* Fetches 17 CFR Parts 229 and 240, filtering to SOX-relevant sections
|
|
56
|
+
*/
|
|
57
|
+
async *fetchSections(): AsyncGenerator<Section[]> {
|
|
58
|
+
console.log('Fetching SOX sections from eCFR (Title 17)...');
|
|
59
|
+
|
|
60
|
+
// Key SOX-related sections:
|
|
61
|
+
// - 17 CFR 229.308 (Item 308: Internal control over financial reporting)
|
|
62
|
+
// - 17 CFR 240.13a-15 (Controls and procedures)
|
|
63
|
+
// - 17 CFR 240.15d-15 (Controls and procedures)
|
|
64
|
+
// - 17 CFR 240.13a-14 (Certifications)
|
|
65
|
+
// - 17 CFR 240.15d-14 (Certifications)
|
|
66
|
+
|
|
67
|
+
const relevantSections = [
|
|
68
|
+
'229.308',
|
|
69
|
+
'240.13a-15',
|
|
70
|
+
'240.15d-15',
|
|
71
|
+
'240.13a-14',
|
|
72
|
+
'240.15d-14',
|
|
73
|
+
];
|
|
74
|
+
|
|
75
|
+
// Fetch from eCFR adapter
|
|
76
|
+
for await (const sectionBatch of this.ecfrAdapter.fetchSections()) {
|
|
77
|
+
// Filter to SOX-relevant sections
|
|
78
|
+
const filtered = sectionBatch.filter(section =>
|
|
79
|
+
relevantSections.some(relevant => section.sectionNumber.includes(relevant))
|
|
80
|
+
);
|
|
81
|
+
|
|
82
|
+
if (filtered.length > 0) {
|
|
83
|
+
yield filtered;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Check for updates since last fetch
|
|
90
|
+
*
|
|
91
|
+
* Delegates to eCFR adapter for update checking
|
|
92
|
+
*/
|
|
93
|
+
async checkForUpdates(lastFetched: Date): Promise<UpdateStatus> {
|
|
94
|
+
return this.ecfrAdapter.checkForUpdates(lastFetched);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Extract definitions from SOX sections
|
|
99
|
+
*
|
|
100
|
+
* Future enhancement: Parse definitions from SEC regulations
|
|
101
|
+
*/
|
|
102
|
+
async extractDefinitions(): Promise<Definition[]> {
|
|
103
|
+
return this.ecfrAdapter.extractDefinitions();
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Factory function to create SOX adapter
|
|
109
|
+
*/
|
|
110
|
+
export function createSoxAdapter(): SoxAdapter {
|
|
111
|
+
return new SoxAdapter('SOX');
|
|
112
|
+
}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Ingestion Framework
|
|
3
|
+
*
|
|
4
|
+
* Defines interfaces for regulation data ingestion from multiple sources.
|
|
5
|
+
* Supports automated fetching from APIs (regulations.gov, ecfr.gov, California LegInfo).
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Metadata about a regulation
|
|
10
|
+
*/
|
|
11
|
+
export interface RegulationMetadata {
|
|
12
|
+
id: string; // 'HIPAA', 'CCPA', 'SOX'
|
|
13
|
+
full_name: string; // 'Health Insurance Portability and Accountability Act'
|
|
14
|
+
citation: string; // 'Pub. L. 104-191' or 'Cal. Civ. Code § 1798.100'
|
|
15
|
+
effective_date?: string; // ISO 8601 date string
|
|
16
|
+
last_amended?: string; // ISO 8601 date string
|
|
17
|
+
source_url: string; // API endpoint or official source URL
|
|
18
|
+
jurisdiction: string; // 'federal', 'california', 'virginia', etc.
|
|
19
|
+
regulation_type: string; // 'statute', 'rule', 'guidance'
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* A single section within a regulation
|
|
24
|
+
*/
|
|
25
|
+
export interface Section {
|
|
26
|
+
sectionNumber: string; // '164.308(a)(1)(ii)(A)' for HIPAA
|
|
27
|
+
title?: string; // Section title/heading
|
|
28
|
+
text: string; // Full text content
|
|
29
|
+
chapter?: string; // Chapter or part designation
|
|
30
|
+
parentSection?: string; // Parent section for nested structures
|
|
31
|
+
crossReferences?: string[]; // Array of referenced section numbers
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* A term definition from a regulation
|
|
36
|
+
*/
|
|
37
|
+
export interface Definition {
|
|
38
|
+
regulation: string; // Regulation ID
|
|
39
|
+
term: string; // Defined term
|
|
40
|
+
definition: string; // Full definition text
|
|
41
|
+
section: string; // Section where defined
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Status of potential updates from source
|
|
46
|
+
*/
|
|
47
|
+
export interface UpdateStatus {
|
|
48
|
+
hasChanges: boolean; // Whether updates detected
|
|
49
|
+
lastModified?: Date; // Last modification date from source
|
|
50
|
+
changes?: string[]; // Description of changes (if available)
|
|
51
|
+
sectionsAdded?: number; // Count of new sections
|
|
52
|
+
sectionsModified?: number; // Count of modified sections
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Source adapter interface
|
|
57
|
+
*
|
|
58
|
+
* Each regulation source (regulations.gov, ecfr.gov, etc.) implements this interface
|
|
59
|
+
* to provide normalized access to regulation data.
|
|
60
|
+
*/
|
|
61
|
+
export interface SourceAdapter {
|
|
62
|
+
/**
|
|
63
|
+
* Fetch regulation metadata
|
|
64
|
+
*/
|
|
65
|
+
fetchMetadata(): Promise<RegulationMetadata>;
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Fetch all sections with pagination support
|
|
69
|
+
* Returns an async generator for memory-efficient streaming
|
|
70
|
+
*/
|
|
71
|
+
fetchSections(): AsyncGenerator<Section[]>;
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Check if source has updates since last fetch
|
|
75
|
+
*/
|
|
76
|
+
checkForUpdates(lastFetched: Date): Promise<UpdateStatus>;
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Extract definitions from regulation text
|
|
80
|
+
*/
|
|
81
|
+
extractDefinitions(): Promise<Definition[]>;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Regulation source configuration
|
|
86
|
+
*/
|
|
87
|
+
export interface RegulationSource {
|
|
88
|
+
id: string; // 'HIPAA', 'CCPA', 'SOX'
|
|
89
|
+
name: string; // Full regulation name
|
|
90
|
+
sourceType: 'api' | 'html' | 'pdf';
|
|
91
|
+
adapter: SourceAdapter;
|
|
92
|
+
}
|