@ansvar/us-regulations-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/LICENSE +190 -0
  2. package/README.md +275 -0
  3. package/data/.gitkeep +0 -0
  4. package/data/regulations.db +0 -0
  5. package/data/seed/applicability/rules.json +74 -0
  6. package/data/seed/mappings/ccpa-nist-csf.json +144 -0
  7. package/data/seed/mappings/hipaa-nist-800-53.json +377 -0
  8. package/dist/index.d.ts +3 -0
  9. package/dist/index.d.ts.map +1 -0
  10. package/dist/index.js +41 -0
  11. package/dist/index.js.map +1 -0
  12. package/dist/ingest/adapters/california-leginfo.d.ts +72 -0
  13. package/dist/ingest/adapters/california-leginfo.d.ts.map +1 -0
  14. package/dist/ingest/adapters/california-leginfo.js +270 -0
  15. package/dist/ingest/adapters/california-leginfo.js.map +1 -0
  16. package/dist/ingest/adapters/ecfr.d.ts +76 -0
  17. package/dist/ingest/adapters/ecfr.d.ts.map +1 -0
  18. package/dist/ingest/adapters/ecfr.js +355 -0
  19. package/dist/ingest/adapters/ecfr.js.map +1 -0
  20. package/dist/ingest/adapters/regulations-gov.d.ts +47 -0
  21. package/dist/ingest/adapters/regulations-gov.d.ts.map +1 -0
  22. package/dist/ingest/adapters/regulations-gov.js +91 -0
  23. package/dist/ingest/adapters/regulations-gov.js.map +1 -0
  24. package/dist/ingest/framework.d.ts +84 -0
  25. package/dist/ingest/framework.d.ts.map +1 -0
  26. package/dist/ingest/framework.js +8 -0
  27. package/dist/ingest/framework.js.map +1 -0
  28. package/dist/tools/action-items.d.ts +23 -0
  29. package/dist/tools/action-items.d.ts.map +1 -0
  30. package/dist/tools/action-items.js +118 -0
  31. package/dist/tools/action-items.js.map +1 -0
  32. package/dist/tools/applicability.d.ts +26 -0
  33. package/dist/tools/applicability.d.ts.map +1 -0
  34. package/dist/tools/applicability.js +49 -0
  35. package/dist/tools/applicability.js.map +1 -0
  36. package/dist/tools/compare.d.ts +20 -0
  37. package/dist/tools/compare.d.ts.map +1 -0
  38. package/dist/tools/compare.js +35 -0
  39. package/dist/tools/compare.js.map +1 -0
  40. package/dist/tools/definitions.d.ts +22 -0
  41. package/dist/tools/definitions.d.ts.map +1 -0
  42. package/dist/tools/definitions.js +43 -0
  43. package/dist/tools/definitions.js.map +1 -0
  44. package/dist/tools/evidence.d.ts +23 -0
  45. package/dist/tools/evidence.d.ts.map +1 -0
  46. package/dist/tools/evidence.js +27 -0
  47. package/dist/tools/evidence.js.map +1 -0
  48. package/dist/tools/list.d.ts +25 -0
  49. package/dist/tools/list.d.ts.map +1 -0
  50. package/dist/tools/list.js +66 -0
  51. package/dist/tools/list.js.map +1 -0
  52. package/dist/tools/map.d.ts +26 -0
  53. package/dist/tools/map.d.ts.map +1 -0
  54. package/dist/tools/map.js +58 -0
  55. package/dist/tools/map.js.map +1 -0
  56. package/dist/tools/registry.d.ts +19 -0
  57. package/dist/tools/registry.d.ts.map +1 -0
  58. package/dist/tools/registry.js +260 -0
  59. package/dist/tools/registry.js.map +1 -0
  60. package/dist/tools/search.d.ts +15 -0
  61. package/dist/tools/search.d.ts.map +1 -0
  62. package/dist/tools/search.js +94 -0
  63. package/dist/tools/search.js.map +1 -0
  64. package/dist/tools/section.d.ts +19 -0
  65. package/dist/tools/section.d.ts.map +1 -0
  66. package/dist/tools/section.js +50 -0
  67. package/dist/tools/section.js.map +1 -0
  68. package/package.json +76 -0
  69. package/scripts/build-db.ts +268 -0
  70. package/scripts/ingest.ts +214 -0
  71. package/scripts/load-seed-data.ts +133 -0
  72. package/scripts/quality-test.ts +346 -0
  73. package/scripts/test-mcp-tools.ts +187 -0
  74. package/scripts/test-remaining-tools.ts +107 -0
  75. package/src/index.ts +55 -0
  76. package/src/ingest/adapters/california-leginfo.ts +322 -0
  77. package/src/ingest/adapters/ecfr.ts +403 -0
  78. package/src/ingest/adapters/regulations-gov.ts +112 -0
  79. package/src/ingest/framework.ts +92 -0
  80. package/src/tools/action-items.ts +164 -0
  81. package/src/tools/applicability.ts +91 -0
  82. package/src/tools/compare.ts +61 -0
  83. package/src/tools/definitions.ts +79 -0
  84. package/src/tools/evidence.ts +53 -0
  85. package/src/tools/list.ts +120 -0
  86. package/src/tools/map.ts +100 -0
  87. package/src/tools/registry.ts +275 -0
  88. package/src/tools/search.ts +132 -0
  89. package/src/tools/section.ts +85 -0
@@ -0,0 +1,322 @@
1
+ /**
2
+ * California Legislative Information Adapter
3
+ *
4
+ * Fetches CCPA/CPRA regulations from California LegInfo.
5
+ * Source: California Civil Code § 1798.100-1798.199
6
+ *
7
+ * PRODUCTION IMPLEMENTATION
8
+ * Uses HTML scraping with fail-fast DOM validation
9
+ */
10
+
11
+ import {
12
+ SourceAdapter,
13
+ RegulationMetadata,
14
+ Section,
15
+ Definition,
16
+ UpdateStatus,
17
+ } from '../framework.js';
18
+ import * as cheerio from 'cheerio';
19
+
20
+ /**
21
+ * Scraping error thrown when DOM structure validation fails
22
+ */
23
+ class ScrapingError extends Error {
24
+ constructor(message: string) {
25
+ super(message);
26
+ this.name = 'ScrapingError';
27
+ }
28
+ }
29
+
30
+ /**
31
+ * DOM structure schema for validation
32
+ */
33
+ interface DOMSchema {
34
+ minExpectedSections: number;
35
+ minTextLength: number;
36
+ }
37
+
38
+ /**
39
+ * Adapter for fetching CCPA/CPRA from California Legislative Information
40
+ */
41
+ export class CaliforniaLeginfoAdapter implements SourceAdapter {
42
+ private readonly regulationId: string;
43
+ private readonly civilCodeStart: number;
44
+ private readonly civilCodeEnd: number;
45
+
46
+ constructor(regulationId: string, civilCodeStart: number, civilCodeEnd: number) {
47
+ this.regulationId = regulationId;
48
+ this.civilCodeStart = civilCodeStart;
49
+ this.civilCodeEnd = civilCodeEnd;
50
+ }
51
+
52
+ /**
53
+ * Fetch CCPA metadata
54
+ *
55
+ * PLACEHOLDER: Returns hardcoded CCPA metadata
56
+ * TODO: Integrate with California LegInfo to fetch live metadata
57
+ */
58
+ async fetchMetadata(): Promise<RegulationMetadata> {
59
+ // Placeholder metadata for CCPA/CPRA
60
+ return {
61
+ id: this.regulationId,
62
+ full_name: 'California Consumer Privacy Act',
63
+ citation: 'Cal. Civ. Code § 1798.100-1798.199',
64
+ effective_date: '2020-01-01',
65
+ last_amended: '2023-01-01',
66
+ source_url: 'https://leginfo.legislature.ca.gov/faces/codes_displayText.xhtml?division=3.&part=4.&lawCode=CIV&title=1.81.5',
67
+ jurisdiction: 'california',
68
+ regulation_type: 'statute',
69
+ };
70
+ }
71
+
72
+ /**
73
+ * Fetch all CCPA sections
74
+ *
75
+ * Scrapes HTML from California LegInfo with fail-fast validation
76
+ * Source: https://leginfo.legislature.ca.gov/faces/codes.xhtml
77
+ */
78
+ async *fetchSections(): AsyncGenerator<Section[]> {
79
+ const BASE_URL = 'https://leginfo.legislature.ca.gov/faces/codes_displayText.xhtml';
80
+ const sections: Section[] = [];
81
+
82
+ // CCPA main sections (based on actual structure)
83
+ // Use strings to preserve trailing zeros (1798.100 not 1798.1)
84
+ const sectionNumbers: string[] = [
85
+ '1798.100', '1798.105', '1798.110', '1798.115', '1798.120', '1798.121', '1798.125',
86
+ '1798.130', '1798.135', '1798.140', '1798.145', '1798.150', '1798.155', '1798.160',
87
+ '1798.175', '1798.180', '1798.185', '1798.190', '1798.192', '1798.194', '1798.196',
88
+ '1798.198', '1798.199'
89
+ ];
90
+
91
+ console.log(`Fetching ${sectionNumbers.length} CCPA sections from California LegInfo...`);
92
+
93
+ for (const sectionNum of sectionNumbers) {
94
+ try {
95
+ // Use displaySection endpoint which has cleaner structure
96
+ const url = `https://leginfo.legislature.ca.gov/faces/codes_displaySection.xhtml?lawCode=CIV&sectionNum=${sectionNum}`;
97
+
98
+ // Fetch HTML with polite delay
99
+ await this.sleep(500); // 500ms between requests
100
+ const response = await this.fetchWithRetry(url);
101
+ const html = await response.text();
102
+
103
+ // Parse HTML
104
+ const $ = cheerio.load(html);
105
+
106
+ // Validate DOM structure
107
+ this.validateDOM($);
108
+
109
+ // Extract section content
110
+ const section = this.parseSection($, sectionNum.toString());
111
+ if (section) {
112
+ sections.push(section);
113
+ console.log(` Fetched § ${sectionNum}`);
114
+ }
115
+
116
+ // Yield in batches of 10
117
+ if (sections.length >= 10) {
118
+ yield sections.splice(0, 10);
119
+ }
120
+ } catch (error) {
121
+ if (error instanceof ScrapingError) {
122
+ console.error(`Scraping failed for § ${sectionNum}:`, error.message);
123
+ throw error; // Fail fast on DOM structure issues
124
+ } else {
125
+ console.warn(`Failed to fetch § ${sectionNum}, continuing...`, error);
126
+ // Continue with other sections
127
+ }
128
+ }
129
+ }
130
+
131
+ // Yield remaining sections
132
+ if (sections.length > 0) {
133
+ yield sections;
134
+ }
135
+ }
136
+
137
+ /**
138
+ * Validate DOM structure with fail-fast assertions
139
+ */
140
+ private validateDOM($: cheerio.Root): void {
141
+ // Check for expected structure - single_law_section div
142
+ const sectionContent = $('#single_law_section, #codeLawSectionNoHead');
143
+
144
+ if (sectionContent.length === 0) {
145
+ throw new ScrapingError(
146
+ 'DOM structure changed: no section content found. Expected #single_law_section or #codeLawSectionNoHead'
147
+ );
148
+ }
149
+
150
+ // Additional validation: check if we got an error page
151
+ if ($('title').text().includes('Error') || $('body').text().includes('not found')) {
152
+ throw new ScrapingError('Section not found or error page returned');
153
+ }
154
+ }
155
+
156
+ /**
157
+ * Parse section from HTML
158
+ */
159
+ private parseSection($: cheerio.Root, sectionNum: string): Section | null {
160
+ // Extract from the main content div
161
+ const sectionDiv = $('#single_law_section');
162
+ if (sectionDiv.length === 0) {
163
+ console.warn(`Section § ${sectionNum} not found in HTML`);
164
+ return null;
165
+ }
166
+
167
+ // Get text content
168
+ let text = sectionDiv.text().trim();
169
+
170
+ // Clean up extra whitespace
171
+ text = text.replace(/\s+/g, ' ').replace(/\n\s*\n/g, '\n');
172
+
173
+ // Validate text length
174
+ if (!text || text.length < 100) {
175
+ console.warn(`Section § ${sectionNum} text too short (${text.length} chars) - may be empty or truncated`);
176
+ return null;
177
+ }
178
+
179
+ // Extract title from h6 tag (section number and title)
180
+ let title: string | undefined;
181
+ const titleElement = sectionDiv.find('h6, p').first();
182
+ if (titleElement.length > 0) {
183
+ title = titleElement.text().trim();
184
+ // Remove section number from title if present
185
+ title = title.replace(/^1798\.\d+\.?\s*/, '');
186
+ }
187
+
188
+ // Extract cross-references (sections mentioned in text)
189
+ const crossReferences = this.extractCrossReferences(text);
190
+
191
+ return {
192
+ sectionNumber: sectionNum,
193
+ title,
194
+ text,
195
+ chapter: 'Title 1.81.5 - California Consumer Privacy Act',
196
+ parentSection: undefined,
197
+ crossReferences: crossReferences.length > 0 ? crossReferences : undefined,
198
+ };
199
+ }
200
+
201
+ /**
202
+ * Extract cross-references from text
203
+ */
204
+ private extractCrossReferences(text: string): string[] {
205
+ const refs: string[] = [];
206
+
207
+ // Pattern: "Section 1798.XXX" or "§ 1798.XXX"
208
+ const pattern = /(?:Section|§)\s+1798\.\d+/g;
209
+ const matches = text.match(pattern);
210
+
211
+ if (matches) {
212
+ for (const match of matches) {
213
+ const ref = match.replace(/^(?:Section|§)\s+/, '');
214
+ if (!refs.includes(ref)) {
215
+ refs.push(ref);
216
+ }
217
+ }
218
+ }
219
+
220
+ return refs;
221
+ }
222
+
223
+ /**
224
+ * Fetch with retry logic and exponential backoff
225
+ */
226
+ private async fetchWithRetry(url: string, maxRetries = 3): Promise<Response> {
227
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
228
+ try {
229
+ const response = await fetch(url);
230
+
231
+ if (response.status === 429) {
232
+ const delay = Math.min(1000 * 2 ** attempt + Math.random() * 1000, 30000);
233
+ console.warn(`Rate limited, retrying in ${delay}ms...`);
234
+ await this.sleep(delay);
235
+ continue;
236
+ }
237
+
238
+ if (!response.ok) {
239
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
240
+ }
241
+
242
+ return response;
243
+ } catch (error) {
244
+ if (attempt === maxRetries - 1) throw error;
245
+ const delay = 1000 * 2 ** attempt;
246
+ console.warn(`Fetch failed, retrying in ${delay}ms...`, error);
247
+ await this.sleep(delay);
248
+ }
249
+ }
250
+
251
+ throw new Error('Max retries exceeded');
252
+ }
253
+
254
+ /**
255
+ * Sleep helper
256
+ */
257
+ private sleep(ms: number): Promise<void> {
258
+ return new Promise(resolve => setTimeout(resolve, ms));
259
+ }
260
+
261
+ /**
262
+ * Check for updates since last fetch
263
+ *
264
+ * Checks California LegInfo for amendments to Civil Code § 1798
265
+ * Note: This is best-effort since there's no official API
266
+ */
267
+ async checkForUpdates(lastFetched: Date): Promise<UpdateStatus> {
268
+ try {
269
+ // Fetch a sample section to check for updates
270
+ const url = 'https://leginfo.legislature.ca.gov/faces/codes_displayText.xhtml?lawCode=CIV&division=3.&part=4.&section=1798.100';
271
+ const response = await fetch(url);
272
+ const lastModifiedHeader = response.headers.get('last-modified');
273
+
274
+ if (!lastModifiedHeader) {
275
+ console.warn('No last-modified header from California LegInfo');
276
+ return {
277
+ hasChanges: false,
278
+ lastModified: new Date(),
279
+ changes: [],
280
+ };
281
+ }
282
+
283
+ const lastModified = new Date(lastModifiedHeader);
284
+
285
+ return {
286
+ hasChanges: lastModified > lastFetched,
287
+ lastModified,
288
+ changes: lastModified > lastFetched
289
+ ? [`California Civil Code § 1798 (CCPA) may have been updated on ${lastModified.toISOString()}`]
290
+ : [],
291
+ };
292
+ } catch (error) {
293
+ console.error('Error checking for updates:', error);
294
+ return {
295
+ hasChanges: false,
296
+ lastModified: new Date(),
297
+ changes: [],
298
+ };
299
+ }
300
+ }
301
+
302
+ /**
303
+ * Extract definitions from CCPA sections
304
+ *
305
+ * Future enhancement: Parse definition sections (primarily § 1798.140)
306
+ * For now, returns empty array - definitions can be added manually if needed
307
+ */
308
+ async extractDefinitions(): Promise<Definition[]> {
309
+ // TODO: Implement definition extraction
310
+ // CCPA definitions are primarily in § 1798.140
311
+ // Format: "(a) 'Term' means definition."
312
+ // Would need careful parsing to extract all term-definition pairs
313
+ return [];
314
+ }
315
+ }
316
+
317
+ /**
318
+ * Factory function to create CCPA adapter
319
+ */
320
+ export function createCcpaAdapter(): CaliforniaLeginfoAdapter {
321
+ return new CaliforniaLeginfoAdapter('CCPA', 1798.100, 1798.199);
322
+ }