@ansvar/us-regulations-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/LICENSE +190 -0
  2. package/README.md +275 -0
  3. package/data/.gitkeep +0 -0
  4. package/data/regulations.db +0 -0
  5. package/data/seed/applicability/rules.json +74 -0
  6. package/data/seed/mappings/ccpa-nist-csf.json +144 -0
  7. package/data/seed/mappings/hipaa-nist-800-53.json +377 -0
  8. package/dist/index.d.ts +3 -0
  9. package/dist/index.d.ts.map +1 -0
  10. package/dist/index.js +41 -0
  11. package/dist/index.js.map +1 -0
  12. package/dist/ingest/adapters/california-leginfo.d.ts +72 -0
  13. package/dist/ingest/adapters/california-leginfo.d.ts.map +1 -0
  14. package/dist/ingest/adapters/california-leginfo.js +270 -0
  15. package/dist/ingest/adapters/california-leginfo.js.map +1 -0
  16. package/dist/ingest/adapters/ecfr.d.ts +76 -0
  17. package/dist/ingest/adapters/ecfr.d.ts.map +1 -0
  18. package/dist/ingest/adapters/ecfr.js +355 -0
  19. package/dist/ingest/adapters/ecfr.js.map +1 -0
  20. package/dist/ingest/adapters/regulations-gov.d.ts +47 -0
  21. package/dist/ingest/adapters/regulations-gov.d.ts.map +1 -0
  22. package/dist/ingest/adapters/regulations-gov.js +91 -0
  23. package/dist/ingest/adapters/regulations-gov.js.map +1 -0
  24. package/dist/ingest/framework.d.ts +84 -0
  25. package/dist/ingest/framework.d.ts.map +1 -0
  26. package/dist/ingest/framework.js +8 -0
  27. package/dist/ingest/framework.js.map +1 -0
  28. package/dist/tools/action-items.d.ts +23 -0
  29. package/dist/tools/action-items.d.ts.map +1 -0
  30. package/dist/tools/action-items.js +118 -0
  31. package/dist/tools/action-items.js.map +1 -0
  32. package/dist/tools/applicability.d.ts +26 -0
  33. package/dist/tools/applicability.d.ts.map +1 -0
  34. package/dist/tools/applicability.js +49 -0
  35. package/dist/tools/applicability.js.map +1 -0
  36. package/dist/tools/compare.d.ts +20 -0
  37. package/dist/tools/compare.d.ts.map +1 -0
  38. package/dist/tools/compare.js +35 -0
  39. package/dist/tools/compare.js.map +1 -0
  40. package/dist/tools/definitions.d.ts +22 -0
  41. package/dist/tools/definitions.d.ts.map +1 -0
  42. package/dist/tools/definitions.js +43 -0
  43. package/dist/tools/definitions.js.map +1 -0
  44. package/dist/tools/evidence.d.ts +23 -0
  45. package/dist/tools/evidence.d.ts.map +1 -0
  46. package/dist/tools/evidence.js +27 -0
  47. package/dist/tools/evidence.js.map +1 -0
  48. package/dist/tools/list.d.ts +25 -0
  49. package/dist/tools/list.d.ts.map +1 -0
  50. package/dist/tools/list.js +66 -0
  51. package/dist/tools/list.js.map +1 -0
  52. package/dist/tools/map.d.ts +26 -0
  53. package/dist/tools/map.d.ts.map +1 -0
  54. package/dist/tools/map.js +58 -0
  55. package/dist/tools/map.js.map +1 -0
  56. package/dist/tools/registry.d.ts +19 -0
  57. package/dist/tools/registry.d.ts.map +1 -0
  58. package/dist/tools/registry.js +260 -0
  59. package/dist/tools/registry.js.map +1 -0
  60. package/dist/tools/search.d.ts +15 -0
  61. package/dist/tools/search.d.ts.map +1 -0
  62. package/dist/tools/search.js +94 -0
  63. package/dist/tools/search.js.map +1 -0
  64. package/dist/tools/section.d.ts +19 -0
  65. package/dist/tools/section.d.ts.map +1 -0
  66. package/dist/tools/section.js +50 -0
  67. package/dist/tools/section.js.map +1 -0
  68. package/package.json +76 -0
  69. package/scripts/build-db.ts +268 -0
  70. package/scripts/ingest.ts +214 -0
  71. package/scripts/load-seed-data.ts +133 -0
  72. package/scripts/quality-test.ts +346 -0
  73. package/scripts/test-mcp-tools.ts +187 -0
  74. package/scripts/test-remaining-tools.ts +107 -0
  75. package/src/index.ts +55 -0
  76. package/src/ingest/adapters/california-leginfo.ts +322 -0
  77. package/src/ingest/adapters/ecfr.ts +403 -0
  78. package/src/ingest/adapters/regulations-gov.ts +112 -0
  79. package/src/ingest/framework.ts +92 -0
  80. package/src/tools/action-items.ts +164 -0
  81. package/src/tools/applicability.ts +91 -0
  82. package/src/tools/compare.ts +61 -0
  83. package/src/tools/definitions.ts +79 -0
  84. package/src/tools/evidence.ts +53 -0
  85. package/src/tools/list.ts +120 -0
  86. package/src/tools/map.ts +100 -0
  87. package/src/tools/registry.ts +275 -0
  88. package/src/tools/search.ts +132 -0
  89. package/src/tools/section.ts +85 -0
@@ -0,0 +1,403 @@
1
+ /**
2
+ * eCFR Adapter (Electronic Code of Federal Regulations)
3
+ *
4
+ * Fetches HIPAA regulations from ecfr.gov API.
5
+ * Source: 45 CFR Parts 160, 162, 164 (Privacy Rule, Security Rule, Breach Notification)
6
+ *
7
+ * PRODUCTION IMPLEMENTATION
8
+ * Uses the eCFR API: https://www.ecfr.gov/developers/documentation/api/v1
9
+ */
10
+
11
+ import {
12
+ SourceAdapter,
13
+ RegulationMetadata,
14
+ Section,
15
+ Definition,
16
+ UpdateStatus,
17
+ } from '../framework.js';
18
+ import { XMLParser } from 'fast-xml-parser';
19
+
20
+ /**
21
+ * Adapter for fetching HIPAA from eCFR API
22
+ */
23
+ export class EcfrAdapter implements SourceAdapter {
24
+ private readonly regulationId: string;
25
+ private readonly cfr_title: number;
26
+ private readonly cfr_parts: number[];
27
+
28
+ constructor(regulationId: string, cfr_title: number, cfr_parts: number[]) {
29
+ this.regulationId = regulationId;
30
+ this.cfr_title = cfr_title;
31
+ this.cfr_parts = cfr_parts;
32
+ }
33
+
34
+ /**
35
+ * Fetch HIPAA metadata
36
+ *
37
+ * PLACEHOLDER: Returns hardcoded HIPAA metadata
38
+ * TODO: Integrate with eCFR API to fetch live metadata
39
+ */
40
+ async fetchMetadata(): Promise<RegulationMetadata> {
41
+ // Placeholder metadata for HIPAA
42
+ return {
43
+ id: this.regulationId,
44
+ full_name: 'Health Insurance Portability and Accountability Act',
45
+ citation: '45 CFR Parts 160, 162, 164',
46
+ effective_date: '2003-04-14',
47
+ last_amended: '2013-01-25',
48
+ source_url: 'https://www.ecfr.gov/current/title-45',
49
+ jurisdiction: 'federal',
50
+ regulation_type: 'rule',
51
+ };
52
+ }
53
+
54
+ /**
55
+ * Fetch all HIPAA sections
56
+ *
57
+ * Fetches XML from eCFR API and parses sections with hierarchical structure
58
+ * API endpoint: https://www.ecfr.gov/api/versioner/v1/full/{date}/title-{title}.xml
59
+ */
60
+ async *fetchSections(): AsyncGenerator<Section[]> {
61
+ // Get the latest available date for this title
62
+ const date = await this.getLatestDate();
63
+ const url = `https://www.ecfr.gov/api/versioner/v1/full/${date}/title-${this.cfr_title}.xml`;
64
+
65
+ console.log(`Fetching eCFR Title ${this.cfr_title} from ${url}...`);
66
+
67
+ // Fetch XML with retry logic
68
+ const response = await this.fetchWithRetry(url);
69
+ const xmlText = await response.text();
70
+
71
+ // Parse XML
72
+ const parser = new XMLParser({
73
+ ignoreAttributes: false,
74
+ attributeNamePrefix: '@_',
75
+ textNodeName: '#text',
76
+ preserveOrder: false,
77
+ });
78
+
79
+ const xmlDoc = parser.parse(xmlText);
80
+
81
+ // Extract sections from relevant parts
82
+ const sections: Section[] = [];
83
+
84
+ // Navigate XML structure: ECFR → DIV1 (Title) → DIV5 (Part) → DIV8 (Section)
85
+ // The structure is: DIV1 (title) → DIV2 (subtitle) → DIV4 (subchapter) → DIV5 (part) → DIV8 (section)
86
+ const title = xmlDoc.ECFR?.DIV1;
87
+ if (!title) {
88
+ console.warn(`No title found in eCFR XML for Title ${this.cfr_title}`);
89
+ return;
90
+ }
91
+
92
+ // Find all DIV5 (parts) recursively
93
+ const findParts = (node: any): any[] => {
94
+ if (!node || typeof node !== 'object') return [];
95
+
96
+ const parts: any[] = [];
97
+
98
+ if (node.DIV5) {
99
+ const div5s = Array.isArray(node.DIV5) ? node.DIV5 : [node.DIV5];
100
+ parts.push(...div5s.filter(Boolean));
101
+ }
102
+
103
+ // Recurse into child DIVs
104
+ for (const key of Object.keys(node)) {
105
+ if (key.startsWith('DIV') && key !== 'DIV5' && typeof node[key] === 'object') {
106
+ const childNodes = Array.isArray(node[key]) ? node[key] : [node[key]];
107
+ for (const child of childNodes) {
108
+ parts.push(...findParts(child));
109
+ }
110
+ }
111
+ }
112
+
113
+ return parts;
114
+ };
115
+
116
+ const parts = findParts(title);
117
+ console.log(` Found ${parts.length} parts in Title ${this.cfr_title}`);
118
+
119
+ for (const part of parts) {
120
+ const partNum = this.extractNumber(part['@_N']);
121
+
122
+ // Only process our target parts
123
+ if (!this.cfr_parts.includes(partNum)) {
124
+ continue;
125
+ }
126
+
127
+ console.log(` Processing Part ${partNum}...`);
128
+
129
+ // Sections are in DIV6 (subparts) → DIV8 (sections)
130
+ const findSections = (node: any): any[] => {
131
+ if (!node || typeof node !== 'object') return [];
132
+
133
+ const secs: any[] = [];
134
+
135
+ // If this node has DIV8, collect them
136
+ if (node.DIV8) {
137
+ const div8s = Array.isArray(node.DIV8) ? node.DIV8 : [node.DIV8];
138
+ secs.push(...div8s.filter(Boolean));
139
+ }
140
+
141
+ // Recurse into DIV6 (subparts) and other child DIVs
142
+ for (const key of Object.keys(node)) {
143
+ if (key.startsWith('DIV') && key !== 'DIV8' && typeof node[key] === 'object') {
144
+ const childNodes = Array.isArray(node[key]) ? node[key] : [node[key]];
145
+ for (const child of childNodes) {
146
+ secs.push(...findSections(child));
147
+ }
148
+ }
149
+ }
150
+
151
+ return secs;
152
+ };
153
+
154
+ const sectionDivs = findSections(part);
155
+ console.log(` Found ${sectionDivs.length} sections in Part ${partNum}`);
156
+
157
+ for (const sectionDiv of sectionDivs) {
158
+ const section = this.parseSection(sectionDiv, partNum, '');
159
+ if (section) {
160
+ sections.push(section);
161
+ }
162
+ }
163
+
164
+ // Yield in batches of 50
165
+ if (sections.length >= 50) {
166
+ yield sections.splice(0, 50);
167
+ }
168
+ }
169
+
170
+ // Yield remaining sections
171
+ if (sections.length > 0) {
172
+ yield sections;
173
+ }
174
+ }
175
+
176
+ /**
177
+ * Parse a section from XML DIV8 element
178
+ */
179
+ private parseSection(div: any, partNum: number, subpartId: string): Section | null {
180
+ const sectionNum = div['@_N'];
181
+ if (!sectionNum) return null;
182
+
183
+ // Extract section text
184
+ const text = this.extractText(div);
185
+ if (!text || text.length < 10) return null;
186
+
187
+ // Extract title (HEAD)
188
+ const title = div.HEAD ? this.extractText(div.HEAD) : undefined;
189
+
190
+ // Build section number - eCFR @_N already includes part number (e.g., "164.308")
191
+ // Don't prepend if section already starts with part number
192
+ const fullSectionNum = sectionNum.startsWith(`${partNum}.`)
193
+ ? sectionNum
194
+ : `${partNum}.${sectionNum}`;
195
+
196
+ // Extract cross-references (CITA tags)
197
+ const crossReferences = this.extractCrossReferences(div);
198
+
199
+ return {
200
+ sectionNumber: fullSectionNum,
201
+ title,
202
+ text,
203
+ chapter: `Part ${partNum}, Subpart ${subpartId}`,
204
+ parentSection: undefined, // TODO: implement parent detection for nested sections
205
+ crossReferences: crossReferences.length > 0 ? crossReferences : undefined,
206
+ };
207
+ }
208
+
209
+ /**
210
+ * Extract text content from XML element recursively
211
+ */
212
+ private extractText(element: any): string {
213
+ if (typeof element === 'string') {
214
+ return element.trim();
215
+ }
216
+
217
+ if (typeof element === 'object') {
218
+ if (element['#text']) {
219
+ const text = element['#text'];
220
+ return typeof text === 'string' ? text.trim() : String(text).trim();
221
+ }
222
+
223
+ // Recursively extract from child elements
224
+ const texts: string[] = [];
225
+ for (const key of Object.keys(element)) {
226
+ if (key.startsWith('@_')) continue; // Skip attributes
227
+ if (key === 'HEAD') continue; // Skip title
228
+
229
+ const child = element[key];
230
+ if (Array.isArray(child)) {
231
+ for (const item of child) {
232
+ const text = this.extractText(item);
233
+ if (text) texts.push(text);
234
+ }
235
+ } else {
236
+ const text = this.extractText(child);
237
+ if (text) texts.push(text);
238
+ }
239
+ }
240
+ return texts.join(' ').trim();
241
+ }
242
+
243
+ return '';
244
+ }
245
+
246
+ /**
247
+ * Extract cross-references from CITA tags
248
+ */
249
+ private extractCrossReferences(element: any): string[] {
250
+ const refs: string[] = [];
251
+
252
+ const findCITA = (obj: any) => {
253
+ if (typeof obj !== 'object') return;
254
+
255
+ for (const key of Object.keys(obj)) {
256
+ if (key === 'CITA') {
257
+ const cita = obj[key];
258
+ if (Array.isArray(cita)) {
259
+ refs.push(...cita.map(c => this.extractText(c)).filter(Boolean));
260
+ } else {
261
+ const ref = this.extractText(cita);
262
+ if (ref) refs.push(ref);
263
+ }
264
+ } else if (typeof obj[key] === 'object') {
265
+ findCITA(obj[key]);
266
+ }
267
+ }
268
+ };
269
+
270
+ findCITA(element);
271
+ return refs;
272
+ }
273
+
274
+ /**
275
+ * Extract numeric part from section number
276
+ */
277
+ private extractNumber(str: string | undefined): number {
278
+ if (!str) return 0;
279
+ const match = str.match(/\d+/);
280
+ return match ? parseInt(match[0]) : 0;
281
+ }
282
+
283
+ /**
284
+ * Get the latest available date for this title from eCFR titles API
285
+ */
286
+ private async getLatestDate(): Promise<string> {
287
+ try {
288
+ const response = await fetch('https://www.ecfr.gov/api/versioner/v1/titles');
289
+ const data = await response.json();
290
+
291
+ const titleInfo = data.titles.find((t: any) => t.number === this.cfr_title);
292
+ if (titleInfo && titleInfo.latest_issue_date) {
293
+ return titleInfo.latest_issue_date;
294
+ }
295
+
296
+ // Fallback to current date
297
+ return new Date().toISOString().split('T')[0];
298
+ } catch (error) {
299
+ console.warn('Failed to fetch latest date, using current date:', error);
300
+ return new Date().toISOString().split('T')[0];
301
+ }
302
+ }
303
+
304
+ /**
305
+ * Fetch with retry logic and exponential backoff
306
+ */
307
+ private async fetchWithRetry(url: string, maxRetries = 3): Promise<Response> {
308
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
309
+ try {
310
+ const response = await fetch(url);
311
+
312
+ if (response.status === 429) {
313
+ // Rate limited - backoff
314
+ const delay = Math.min(1000 * 2 ** attempt + Math.random() * 1000, 30000);
315
+ console.warn(`Rate limited, retrying in ${delay}ms...`);
316
+ await new Promise(resolve => setTimeout(resolve, delay));
317
+ continue;
318
+ }
319
+
320
+ if (!response.ok) {
321
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
322
+ }
323
+
324
+ return response;
325
+ } catch (error) {
326
+ if (attempt === maxRetries - 1) throw error;
327
+ const delay = 1000 * 2 ** attempt;
328
+ console.warn(`Fetch failed, retrying in ${delay}ms...`, error);
329
+ await new Promise(resolve => setTimeout(resolve, delay));
330
+ }
331
+ }
332
+
333
+ throw new Error('Max retries exceeded');
334
+ }
335
+
336
+ /**
337
+ * Check for updates since last fetch
338
+ *
339
+ * Queries eCFR API for revision dates and compares with lastFetched
340
+ * eCFR updates daily from Federal Register
341
+ */
342
+ async checkForUpdates(lastFetched: Date): Promise<UpdateStatus> {
343
+ try {
344
+ // eCFR provides last-modified in HTTP headers
345
+ const date = new Date().toISOString().split('T')[0];
346
+ const url = `https://www.ecfr.gov/api/versioner/v1/full/${date}/title-${this.cfr_title}.xml`;
347
+
348
+ const response = await fetch(url, { method: 'HEAD' });
349
+ const lastModifiedHeader = response.headers.get('last-modified');
350
+
351
+ if (!lastModifiedHeader) {
352
+ console.warn('No last-modified header from eCFR');
353
+ return {
354
+ hasChanges: false,
355
+ lastModified: new Date(),
356
+ changes: [],
357
+ };
358
+ }
359
+
360
+ const lastModified = new Date(lastModifiedHeader);
361
+
362
+ return {
363
+ hasChanges: lastModified > lastFetched,
364
+ lastModified,
365
+ changes: lastModified > lastFetched
366
+ ? [`Title ${this.cfr_title} updated on ${lastModified.toISOString()}`]
367
+ : [],
368
+ };
369
+ } catch (error) {
370
+ console.error('Error checking for updates:', error);
371
+ return {
372
+ hasChanges: false,
373
+ lastModified: new Date(),
374
+ changes: [],
375
+ };
376
+ }
377
+ }
378
+
379
+ /**
380
+ * Extract definitions from HIPAA sections
381
+ *
382
+ * Future enhancement: Parse definition sections (e.g., 45 CFR 160.103, 164.103)
383
+ * For now, returns empty array - definitions can be added manually if needed
384
+ */
385
+ async extractDefinitions(): Promise<Definition[]> {
386
+ // TODO: Implement definition extraction
387
+ // HIPAA definitions are in:
388
+ // - 45 CFR 160.103 (General definitions)
389
+ // - 45 CFR 164.103 (Security Rule definitions)
390
+ // - 45 CFR 164.501 (Privacy Rule definitions)
391
+ //
392
+ // These sections have structured format: "Term means definition."
393
+ // Would need regex parsing or NLP to extract accurately
394
+ return [];
395
+ }
396
+ }
397
+
398
+ /**
399
+ * Factory function to create HIPAA adapter
400
+ */
401
+ export function createHipaaAdapter(): EcfrAdapter {
402
+ return new EcfrAdapter('HIPAA', 45, [160, 162, 164]);
403
+ }
@@ -0,0 +1,112 @@
1
+ /**
2
+ * SOX Adapter
3
+ *
4
+ * Fetches SOX regulations from eCFR (for SEC implementing rules).
5
+ * Source: 17 CFR Part 229 (Regulation S-K, Item 308) and Part 240 (Exchange Act Rules)
6
+ *
7
+ * PRODUCTION IMPLEMENTATION
8
+ * Uses eCFR API for SEC regulations implementing Sarbanes-Oxley Section 404
9
+ */
10
+
11
+ import {
12
+ SourceAdapter,
13
+ RegulationMetadata,
14
+ Section,
15
+ Definition,
16
+ UpdateStatus,
17
+ } from '../framework.js';
18
+ import { XMLParser } from 'fast-xml-parser';
19
+ import { EcfrAdapter } from './ecfr.js';
20
+
21
+ /**
22
+ * Adapter for fetching SOX regulations from eCFR
23
+ *
24
+ * Uses eCFR API for SEC regulations implementing Sarbanes-Oxley
25
+ */
26
+ export class SoxAdapter implements SourceAdapter {
27
+ private readonly regulationId: string;
28
+ private readonly ecfrAdapter: EcfrAdapter;
29
+
30
+ constructor(regulationId: string) {
31
+ this.regulationId = regulationId;
32
+ // Use eCFR adapter for Title 17 (SEC regulations)
33
+ this.ecfrAdapter = new EcfrAdapter('SOX-SEC', 17, [229, 240]);
34
+ }
35
+
36
+ /**
37
+ * Fetch SOX metadata
38
+ */
39
+ async fetchMetadata(): Promise<RegulationMetadata> {
40
+ return {
41
+ id: this.regulationId,
42
+ full_name: 'Sarbanes-Oxley Act - SEC Implementing Regulations',
43
+ citation: '17 CFR Parts 229, 240 (Regulation S-K Item 308, Exchange Act Rules)',
44
+ effective_date: '2003-06-05',
45
+ last_amended: new Date().toISOString().split('T')[0],
46
+ source_url: 'https://www.ecfr.gov/current/title-17',
47
+ jurisdiction: 'federal',
48
+ regulation_type: 'rule',
49
+ };
50
+ }
51
+
52
+ /**
53
+ * Fetch all SOX-related sections from eCFR
54
+ *
55
+ * Fetches 17 CFR Parts 229 and 240, filtering to SOX-relevant sections
56
+ */
57
+ async *fetchSections(): AsyncGenerator<Section[]> {
58
+ console.log('Fetching SOX sections from eCFR (Title 17)...');
59
+
60
+ // Key SOX-related sections:
61
+ // - 17 CFR 229.308 (Item 308: Internal control over financial reporting)
62
+ // - 17 CFR 240.13a-15 (Controls and procedures)
63
+ // - 17 CFR 240.15d-15 (Controls and procedures)
64
+ // - 17 CFR 240.13a-14 (Certifications)
65
+ // - 17 CFR 240.15d-14 (Certifications)
66
+
67
+ const relevantSections = [
68
+ '229.308',
69
+ '240.13a-15',
70
+ '240.15d-15',
71
+ '240.13a-14',
72
+ '240.15d-14',
73
+ ];
74
+
75
+ // Fetch from eCFR adapter
76
+ for await (const sectionBatch of this.ecfrAdapter.fetchSections()) {
77
+ // Filter to SOX-relevant sections
78
+ const filtered = sectionBatch.filter(section =>
79
+ relevantSections.some(relevant => section.sectionNumber.includes(relevant))
80
+ );
81
+
82
+ if (filtered.length > 0) {
83
+ yield filtered;
84
+ }
85
+ }
86
+ }
87
+
88
+ /**
89
+ * Check for updates since last fetch
90
+ *
91
+ * Delegates to eCFR adapter for update checking
92
+ */
93
+ async checkForUpdates(lastFetched: Date): Promise<UpdateStatus> {
94
+ return this.ecfrAdapter.checkForUpdates(lastFetched);
95
+ }
96
+
97
+ /**
98
+ * Extract definitions from SOX sections
99
+ *
100
+ * Future enhancement: Parse definitions from SEC regulations
101
+ */
102
+ async extractDefinitions(): Promise<Definition[]> {
103
+ return this.ecfrAdapter.extractDefinitions();
104
+ }
105
+ }
106
+
107
+ /**
108
+ * Factory function to create SOX adapter
109
+ */
110
+ export function createSoxAdapter(): SoxAdapter {
111
+ return new SoxAdapter('SOX');
112
+ }
@@ -0,0 +1,92 @@
1
+ /**
2
+ * Ingestion Framework
3
+ *
4
+ * Defines interfaces for regulation data ingestion from multiple sources.
5
+ * Supports automated fetching from APIs (regulations.gov, ecfr.gov, California LegInfo).
6
+ */
7
+
8
+ /**
9
+ * Metadata about a regulation
10
+ */
11
+ export interface RegulationMetadata {
12
+ id: string; // 'HIPAA', 'CCPA', 'SOX'
13
+ full_name: string; // 'Health Insurance Portability and Accountability Act'
14
+ citation: string; // 'Pub. L. 104-191' or 'Cal. Civ. Code § 1798.100'
15
+ effective_date?: string; // ISO 8601 date string
16
+ last_amended?: string; // ISO 8601 date string
17
+ source_url: string; // API endpoint or official source URL
18
+ jurisdiction: string; // 'federal', 'california', 'virginia', etc.
19
+ regulation_type: string; // 'statute', 'rule', 'guidance'
20
+ }
21
+
22
+ /**
23
+ * A single section within a regulation
24
+ */
25
+ export interface Section {
26
+ sectionNumber: string; // '164.308(a)(1)(ii)(A)' for HIPAA
27
+ title?: string; // Section title/heading
28
+ text: string; // Full text content
29
+ chapter?: string; // Chapter or part designation
30
+ parentSection?: string; // Parent section for nested structures
31
+ crossReferences?: string[]; // Array of referenced section numbers
32
+ }
33
+
34
+ /**
35
+ * A term definition from a regulation
36
+ */
37
+ export interface Definition {
38
+ regulation: string; // Regulation ID
39
+ term: string; // Defined term
40
+ definition: string; // Full definition text
41
+ section: string; // Section where defined
42
+ }
43
+
44
+ /**
45
+ * Status of potential updates from source
46
+ */
47
+ export interface UpdateStatus {
48
+ hasChanges: boolean; // Whether updates detected
49
+ lastModified?: Date; // Last modification date from source
50
+ changes?: string[]; // Description of changes (if available)
51
+ sectionsAdded?: number; // Count of new sections
52
+ sectionsModified?: number; // Count of modified sections
53
+ }
54
+
55
+ /**
56
+ * Source adapter interface
57
+ *
58
+ * Each regulation source (regulations.gov, ecfr.gov, etc.) implements this interface
59
+ * to provide normalized access to regulation data.
60
+ */
61
+ export interface SourceAdapter {
62
+ /**
63
+ * Fetch regulation metadata
64
+ */
65
+ fetchMetadata(): Promise<RegulationMetadata>;
66
+
67
+ /**
68
+ * Fetch all sections with pagination support
69
+ * Returns an async generator for memory-efficient streaming
70
+ */
71
+ fetchSections(): AsyncGenerator<Section[]>;
72
+
73
+ /**
74
+ * Check if source has updates since last fetch
75
+ */
76
+ checkForUpdates(lastFetched: Date): Promise<UpdateStatus>;
77
+
78
+ /**
79
+ * Extract definitions from regulation text
80
+ */
81
+ extractDefinitions(): Promise<Definition[]>;
82
+ }
83
+
84
+ /**
85
+ * Regulation source configuration
86
+ */
87
+ export interface RegulationSource {
88
+ id: string; // 'HIPAA', 'CCPA', 'SOX'
89
+ name: string; // Full regulation name
90
+ sourceType: 'api' | 'html' | 'pdf';
91
+ adapter: SourceAdapter;
92
+ }