@ansvar/us-regulations-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +190 -0
- package/README.md +275 -0
- package/data/.gitkeep +0 -0
- package/data/regulations.db +0 -0
- package/data/seed/applicability/rules.json +74 -0
- package/data/seed/mappings/ccpa-nist-csf.json +144 -0
- package/data/seed/mappings/hipaa-nist-800-53.json +377 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +41 -0
- package/dist/index.js.map +1 -0
- package/dist/ingest/adapters/california-leginfo.d.ts +72 -0
- package/dist/ingest/adapters/california-leginfo.d.ts.map +1 -0
- package/dist/ingest/adapters/california-leginfo.js +270 -0
- package/dist/ingest/adapters/california-leginfo.js.map +1 -0
- package/dist/ingest/adapters/ecfr.d.ts +76 -0
- package/dist/ingest/adapters/ecfr.d.ts.map +1 -0
- package/dist/ingest/adapters/ecfr.js +355 -0
- package/dist/ingest/adapters/ecfr.js.map +1 -0
- package/dist/ingest/adapters/regulations-gov.d.ts +47 -0
- package/dist/ingest/adapters/regulations-gov.d.ts.map +1 -0
- package/dist/ingest/adapters/regulations-gov.js +91 -0
- package/dist/ingest/adapters/regulations-gov.js.map +1 -0
- package/dist/ingest/framework.d.ts +84 -0
- package/dist/ingest/framework.d.ts.map +1 -0
- package/dist/ingest/framework.js +8 -0
- package/dist/ingest/framework.js.map +1 -0
- package/dist/tools/action-items.d.ts +23 -0
- package/dist/tools/action-items.d.ts.map +1 -0
- package/dist/tools/action-items.js +118 -0
- package/dist/tools/action-items.js.map +1 -0
- package/dist/tools/applicability.d.ts +26 -0
- package/dist/tools/applicability.d.ts.map +1 -0
- package/dist/tools/applicability.js +49 -0
- package/dist/tools/applicability.js.map +1 -0
- package/dist/tools/compare.d.ts +20 -0
- package/dist/tools/compare.d.ts.map +1 -0
- package/dist/tools/compare.js +35 -0
- package/dist/tools/compare.js.map +1 -0
- package/dist/tools/definitions.d.ts +22 -0
- package/dist/tools/definitions.d.ts.map +1 -0
- package/dist/tools/definitions.js +43 -0
- package/dist/tools/definitions.js.map +1 -0
- package/dist/tools/evidence.d.ts +23 -0
- package/dist/tools/evidence.d.ts.map +1 -0
- package/dist/tools/evidence.js +27 -0
- package/dist/tools/evidence.js.map +1 -0
- package/dist/tools/list.d.ts +25 -0
- package/dist/tools/list.d.ts.map +1 -0
- package/dist/tools/list.js +66 -0
- package/dist/tools/list.js.map +1 -0
- package/dist/tools/map.d.ts +26 -0
- package/dist/tools/map.d.ts.map +1 -0
- package/dist/tools/map.js +58 -0
- package/dist/tools/map.js.map +1 -0
- package/dist/tools/registry.d.ts +19 -0
- package/dist/tools/registry.d.ts.map +1 -0
- package/dist/tools/registry.js +260 -0
- package/dist/tools/registry.js.map +1 -0
- package/dist/tools/search.d.ts +15 -0
- package/dist/tools/search.d.ts.map +1 -0
- package/dist/tools/search.js +94 -0
- package/dist/tools/search.js.map +1 -0
- package/dist/tools/section.d.ts +19 -0
- package/dist/tools/section.d.ts.map +1 -0
- package/dist/tools/section.js +50 -0
- package/dist/tools/section.js.map +1 -0
- package/package.json +76 -0
- package/scripts/build-db.ts +268 -0
- package/scripts/ingest.ts +214 -0
- package/scripts/load-seed-data.ts +133 -0
- package/scripts/quality-test.ts +346 -0
- package/scripts/test-mcp-tools.ts +187 -0
- package/scripts/test-remaining-tools.ts +107 -0
- package/src/index.ts +55 -0
- package/src/ingest/adapters/california-leginfo.ts +322 -0
- package/src/ingest/adapters/ecfr.ts +403 -0
- package/src/ingest/adapters/regulations-gov.ts +112 -0
- package/src/ingest/framework.ts +92 -0
- package/src/tools/action-items.ts +164 -0
- package/src/tools/applicability.ts +91 -0
- package/src/tools/compare.ts +61 -0
- package/src/tools/definitions.ts +79 -0
- package/src/tools/evidence.ts +53 -0
- package/src/tools/list.ts +120 -0
- package/src/tools/map.ts +100 -0
- package/src/tools/registry.ts +275 -0
- package/src/tools/search.ts +132 -0
- package/src/tools/section.ts +85 -0
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
#!/usr/bin/env npx tsx
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Build the regulations.db SQLite database for US regulations.
|
|
5
|
+
* Run with: npm run build:db
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import Database from 'better-sqlite3';
|
|
9
|
+
import { readFileSync, existsSync, mkdirSync, unlinkSync, readdirSync } from 'fs';
|
|
10
|
+
import { join, dirname } from 'path';
|
|
11
|
+
import { fileURLToPath } from 'url';
|
|
12
|
+
|
|
13
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
14
|
+
const __dirname = dirname(__filename);
|
|
15
|
+
|
|
16
|
+
const DATA_DIR = join(__dirname, '..', 'data');
|
|
17
|
+
const SEED_DIR = join(DATA_DIR, 'seed');
|
|
18
|
+
const DB_PATH = join(DATA_DIR, 'regulations.db');
|
|
19
|
+
|
|
20
|
+
const SCHEMA = `
|
|
21
|
+
-- Core regulation metadata
|
|
22
|
+
CREATE TABLE IF NOT EXISTS regulations (
|
|
23
|
+
id TEXT PRIMARY KEY,
|
|
24
|
+
full_name TEXT NOT NULL,
|
|
25
|
+
short_name TEXT,
|
|
26
|
+
citation TEXT NOT NULL,
|
|
27
|
+
effective_date TEXT,
|
|
28
|
+
last_amended TEXT,
|
|
29
|
+
source_url TEXT,
|
|
30
|
+
jurisdiction TEXT,
|
|
31
|
+
regulation_type TEXT
|
|
32
|
+
);
|
|
33
|
+
|
|
34
|
+
-- Sections table (equivalent to articles in EU regulations)
|
|
35
|
+
CREATE TABLE IF NOT EXISTS sections (
|
|
36
|
+
rowid INTEGER PRIMARY KEY,
|
|
37
|
+
regulation TEXT NOT NULL REFERENCES regulations(id),
|
|
38
|
+
section_number TEXT NOT NULL,
|
|
39
|
+
title TEXT,
|
|
40
|
+
text TEXT NOT NULL,
|
|
41
|
+
part TEXT,
|
|
42
|
+
subpart TEXT,
|
|
43
|
+
chapter TEXT,
|
|
44
|
+
parent_section TEXT,
|
|
45
|
+
cross_references TEXT,
|
|
46
|
+
UNIQUE(regulation, section_number)
|
|
47
|
+
);
|
|
48
|
+
|
|
49
|
+
-- FTS5 virtual table for full-text search with Porter stemming
|
|
50
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS sections_fts USING fts5(
|
|
51
|
+
regulation,
|
|
52
|
+
section_number,
|
|
53
|
+
title,
|
|
54
|
+
text,
|
|
55
|
+
content='sections',
|
|
56
|
+
content_rowid='rowid',
|
|
57
|
+
tokenize='porter unicode61'
|
|
58
|
+
);
|
|
59
|
+
|
|
60
|
+
-- FTS5 triggers
|
|
61
|
+
CREATE TRIGGER IF NOT EXISTS sections_ai AFTER INSERT ON sections BEGIN
|
|
62
|
+
INSERT INTO sections_fts(rowid, regulation, section_number, title, text)
|
|
63
|
+
VALUES (new.rowid, new.regulation, new.section_number, new.title, new.text);
|
|
64
|
+
END;
|
|
65
|
+
|
|
66
|
+
CREATE TRIGGER IF NOT EXISTS sections_ad AFTER DELETE ON sections BEGIN
|
|
67
|
+
INSERT INTO sections_fts(sections_fts, rowid, regulation, section_number, title, text)
|
|
68
|
+
VALUES('delete', old.rowid, old.regulation, old.section_number, old.title, old.text);
|
|
69
|
+
END;
|
|
70
|
+
|
|
71
|
+
CREATE TRIGGER IF NOT EXISTS sections_au AFTER UPDATE ON sections BEGIN
|
|
72
|
+
INSERT INTO sections_fts(sections_fts, rowid, regulation, section_number, title, text)
|
|
73
|
+
VALUES('delete', old.rowid, old.regulation, old.section_number, old.title, old.text);
|
|
74
|
+
INSERT INTO sections_fts(rowid, regulation, section_number, title, text)
|
|
75
|
+
VALUES (new.rowid, new.regulation, new.section_number, new.title, new.text);
|
|
76
|
+
END;
|
|
77
|
+
|
|
78
|
+
-- Definitions
|
|
79
|
+
CREATE TABLE IF NOT EXISTS definitions (
|
|
80
|
+
id INTEGER PRIMARY KEY,
|
|
81
|
+
regulation TEXT NOT NULL REFERENCES regulations(id),
|
|
82
|
+
term TEXT NOT NULL,
|
|
83
|
+
definition TEXT NOT NULL,
|
|
84
|
+
section TEXT NOT NULL,
|
|
85
|
+
UNIQUE(regulation, term)
|
|
86
|
+
);
|
|
87
|
+
|
|
88
|
+
-- Control mappings (e.g., to NIST, ISO, etc.)
|
|
89
|
+
CREATE TABLE IF NOT EXISTS control_mappings (
|
|
90
|
+
id INTEGER PRIMARY KEY,
|
|
91
|
+
framework TEXT NOT NULL DEFAULT 'NIST_CSF',
|
|
92
|
+
control_id TEXT NOT NULL,
|
|
93
|
+
control_name TEXT NOT NULL,
|
|
94
|
+
regulation TEXT NOT NULL REFERENCES regulations(id),
|
|
95
|
+
sections TEXT NOT NULL,
|
|
96
|
+
coverage TEXT CHECK(coverage IN ('full', 'partial', 'related')),
|
|
97
|
+
notes TEXT,
|
|
98
|
+
confidence INTEGER,
|
|
99
|
+
generated_by TEXT,
|
|
100
|
+
UNIQUE(framework, control_id, regulation, sections)
|
|
101
|
+
);
|
|
102
|
+
|
|
103
|
+
-- Applicability rules (which sectors/industries each regulation applies to)
|
|
104
|
+
CREATE TABLE IF NOT EXISTS applicability_rules (
|
|
105
|
+
id INTEGER PRIMARY KEY,
|
|
106
|
+
regulation TEXT NOT NULL REFERENCES regulations(id),
|
|
107
|
+
sector TEXT NOT NULL,
|
|
108
|
+
subsector TEXT,
|
|
109
|
+
applies INTEGER NOT NULL,
|
|
110
|
+
confidence TEXT CHECK(confidence IN ('definite', 'likely', 'possible')),
|
|
111
|
+
basis_section TEXT,
|
|
112
|
+
notes TEXT,
|
|
113
|
+
rationale TEXT,
|
|
114
|
+
UNIQUE(regulation, sector, subsector)
|
|
115
|
+
);
|
|
116
|
+
|
|
117
|
+
-- Source registry for tracking data quality
|
|
118
|
+
CREATE TABLE IF NOT EXISTS source_registry (
|
|
119
|
+
regulation TEXT PRIMARY KEY REFERENCES regulations(id),
|
|
120
|
+
source_type TEXT NOT NULL,
|
|
121
|
+
source_url TEXT NOT NULL,
|
|
122
|
+
api_endpoint TEXT,
|
|
123
|
+
last_fetched TEXT,
|
|
124
|
+
sections_expected INTEGER,
|
|
125
|
+
sections_parsed INTEGER,
|
|
126
|
+
quality_status TEXT CHECK(quality_status IN ('complete', 'review', 'incomplete')),
|
|
127
|
+
notes TEXT
|
|
128
|
+
);
|
|
129
|
+
`;
|
|
130
|
+
|
|
131
|
+
interface RegulationSeed {
|
|
132
|
+
id: string;
|
|
133
|
+
full_name: string;
|
|
134
|
+
short_name?: string;
|
|
135
|
+
citation: string;
|
|
136
|
+
effective_date?: string;
|
|
137
|
+
source_url?: string;
|
|
138
|
+
jurisdiction?: string;
|
|
139
|
+
regulation_type?: string;
|
|
140
|
+
sections: Array<{
|
|
141
|
+
number: string;
|
|
142
|
+
title?: string;
|
|
143
|
+
text: string;
|
|
144
|
+
part?: string;
|
|
145
|
+
subpart?: string;
|
|
146
|
+
chapter?: string;
|
|
147
|
+
parent_section?: string;
|
|
148
|
+
cross_references?: string[];
|
|
149
|
+
}>;
|
|
150
|
+
definitions?: Array<{
|
|
151
|
+
term: string;
|
|
152
|
+
definition: string;
|
|
153
|
+
section: string;
|
|
154
|
+
}>;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
function buildDatabase() {
|
|
158
|
+
console.log('Building US regulations database...');
|
|
159
|
+
|
|
160
|
+
// Ensure data directory exists
|
|
161
|
+
if (!existsSync(DATA_DIR)) {
|
|
162
|
+
mkdirSync(DATA_DIR, { recursive: true });
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Delete existing database
|
|
166
|
+
if (existsSync(DB_PATH)) {
|
|
167
|
+
console.log('Removing existing database...');
|
|
168
|
+
unlinkSync(DB_PATH);
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// Create new database
|
|
172
|
+
const db = new Database(DB_PATH);
|
|
173
|
+
db.pragma('foreign_keys = ON');
|
|
174
|
+
|
|
175
|
+
// Create schema
|
|
176
|
+
console.log('Creating schema...');
|
|
177
|
+
db.exec(SCHEMA);
|
|
178
|
+
|
|
179
|
+
// Load and insert seed files
|
|
180
|
+
if (existsSync(SEED_DIR)) {
|
|
181
|
+
const seedFiles = readdirSync(SEED_DIR).filter((f: string) => f.endsWith('.json'));
|
|
182
|
+
|
|
183
|
+
for (const file of seedFiles) {
|
|
184
|
+
if (file.startsWith('mappings')) continue;
|
|
185
|
+
|
|
186
|
+
console.log(`Loading ${file}...`);
|
|
187
|
+
const content = readFileSync(join(SEED_DIR, file), 'utf-8');
|
|
188
|
+
const regulation: RegulationSeed = JSON.parse(content);
|
|
189
|
+
|
|
190
|
+
// Insert regulation
|
|
191
|
+
db.prepare(`
|
|
192
|
+
INSERT INTO regulations (id, full_name, short_name, citation, effective_date, source_url, jurisdiction, regulation_type)
|
|
193
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
194
|
+
`).run(
|
|
195
|
+
regulation.id,
|
|
196
|
+
regulation.full_name,
|
|
197
|
+
regulation.short_name || null,
|
|
198
|
+
regulation.citation,
|
|
199
|
+
regulation.effective_date || null,
|
|
200
|
+
regulation.source_url || null,
|
|
201
|
+
regulation.jurisdiction || null,
|
|
202
|
+
regulation.regulation_type || null
|
|
203
|
+
);
|
|
204
|
+
|
|
205
|
+
// Insert sections
|
|
206
|
+
const insertSection = db.prepare(`
|
|
207
|
+
INSERT INTO sections (regulation, section_number, title, text, part, subpart, chapter, parent_section, cross_references)
|
|
208
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
209
|
+
`);
|
|
210
|
+
|
|
211
|
+
for (const section of regulation.sections) {
|
|
212
|
+
insertSection.run(
|
|
213
|
+
regulation.id,
|
|
214
|
+
section.number,
|
|
215
|
+
section.title || null,
|
|
216
|
+
section.text,
|
|
217
|
+
section.part || null,
|
|
218
|
+
section.subpart || null,
|
|
219
|
+
section.chapter || null,
|
|
220
|
+
section.parent_section || null,
|
|
221
|
+
section.cross_references ? JSON.stringify(section.cross_references) : null
|
|
222
|
+
);
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
// Insert definitions
|
|
226
|
+
if (regulation.definitions) {
|
|
227
|
+
const insertDefinition = db.prepare(`
|
|
228
|
+
INSERT OR IGNORE INTO definitions (regulation, term, definition, section)
|
|
229
|
+
VALUES (?, ?, ?, ?)
|
|
230
|
+
`);
|
|
231
|
+
|
|
232
|
+
for (const def of regulation.definitions) {
|
|
233
|
+
insertDefinition.run(regulation.id, def.term, def.definition, def.section);
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// Update source registry with timestamps
|
|
238
|
+
const now = new Date().toISOString();
|
|
239
|
+
const sourceType = regulation.source_url?.includes('api') ? 'api' : regulation.source_url?.includes('.pdf') ? 'pdf' : 'html';
|
|
240
|
+
db.prepare(`
|
|
241
|
+
INSERT INTO source_registry (regulation, source_type, source_url, api_endpoint, last_fetched, sections_expected, sections_parsed, quality_status)
|
|
242
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, 'complete')
|
|
243
|
+
`).run(
|
|
244
|
+
regulation.id,
|
|
245
|
+
sourceType,
|
|
246
|
+
regulation.source_url || '',
|
|
247
|
+
regulation.source_url?.includes('api') ? regulation.source_url : null,
|
|
248
|
+
now,
|
|
249
|
+
regulation.sections.length,
|
|
250
|
+
regulation.sections.length
|
|
251
|
+
);
|
|
252
|
+
|
|
253
|
+
console.log(` Loaded ${regulation.sections.length} sections, ${regulation.definitions?.length || 0} definitions`);
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
// Note: Control mappings and applicability rules are loaded separately
|
|
257
|
+
// using scripts/load-seed-data.ts after data ingestion
|
|
258
|
+
console.log('\nNote: Run "npm run load-seed" to load control mappings and applicability rules');
|
|
259
|
+
} else {
|
|
260
|
+
console.log('No seed directory found. Database created with empty tables.');
|
|
261
|
+
console.log(`Create seed files in: ${SEED_DIR}`);
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
db.close();
|
|
265
|
+
console.log(`\nDatabase created at: ${DB_PATH}`);
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
buildDatabase();
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
#!/usr/bin/env npx tsx
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Ingestion Orchestrator
|
|
5
|
+
*
|
|
6
|
+
* Coordinates all three regulation adapters (HIPAA, CCPA, SOX) to populate the database.
|
|
7
|
+
* Run with: npx tsx scripts/ingest.ts
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import Database from 'better-sqlite3';
|
|
11
|
+
import { join, dirname } from 'path';
|
|
12
|
+
import { fileURLToPath } from 'url';
|
|
13
|
+
import { createHipaaAdapter } from '../src/ingest/adapters/ecfr.js';
|
|
14
|
+
import { createCcpaAdapter } from '../src/ingest/adapters/california-leginfo.js';
|
|
15
|
+
import { createSoxAdapter } from '../src/ingest/adapters/regulations-gov.js';
|
|
16
|
+
import type { SourceAdapter } from '../src/ingest/framework.js';
|
|
17
|
+
|
|
18
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
19
|
+
const __dirname = dirname(__filename);
|
|
20
|
+
|
|
21
|
+
const DB_PATH = join(__dirname, '..', 'data', 'regulations.db');
|
|
22
|
+
|
|
23
|
+
interface IngestResult {
|
|
24
|
+
regulation: string;
|
|
25
|
+
success: boolean;
|
|
26
|
+
sections_added: number;
|
|
27
|
+
definitions_added: number;
|
|
28
|
+
error?: string;
|
|
29
|
+
duration_ms: number;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Ingest a single regulation
|
|
34
|
+
*/
|
|
35
|
+
async function ingestRegulation(
|
|
36
|
+
db: Database.Database,
|
|
37
|
+
regulationId: string,
|
|
38
|
+
adapter: SourceAdapter
|
|
39
|
+
): Promise<IngestResult> {
|
|
40
|
+
const startTime = Date.now();
|
|
41
|
+
|
|
42
|
+
try {
|
|
43
|
+
console.log(`\nš„ Ingesting ${regulationId}...`);
|
|
44
|
+
|
|
45
|
+
let sectionsAdded = 0;
|
|
46
|
+
let definitionsAdded = 0;
|
|
47
|
+
|
|
48
|
+
// Fetch all data first (async operations)
|
|
49
|
+
console.log(` Fetching metadata...`);
|
|
50
|
+
const metadata = await adapter.fetchMetadata();
|
|
51
|
+
|
|
52
|
+
console.log(` Fetching sections...`);
|
|
53
|
+
const allSections: typeof sectionBatch = [];
|
|
54
|
+
for await (const sectionBatch of adapter.fetchSections()) {
|
|
55
|
+
allSections.push(...sectionBatch);
|
|
56
|
+
console.log(` ${allSections.length} sections...`);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
console.log(` Fetching definitions...`);
|
|
60
|
+
const definitions = await adapter.extractDefinitions();
|
|
61
|
+
|
|
62
|
+
// Then insert everything in one synchronous transaction
|
|
63
|
+
console.log(` Writing to database...`);
|
|
64
|
+
const insertTransaction = db.transaction(() => {
|
|
65
|
+
// 1. Insert regulation metadata
|
|
66
|
+
db.prepare(`
|
|
67
|
+
INSERT OR REPLACE INTO regulations
|
|
68
|
+
(id, full_name, short_name, citation, effective_date, last_amended, source_url, jurisdiction, regulation_type)
|
|
69
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
70
|
+
`).run(
|
|
71
|
+
metadata.id,
|
|
72
|
+
metadata.full_name,
|
|
73
|
+
null, // short_name
|
|
74
|
+
metadata.citation,
|
|
75
|
+
metadata.effective_date,
|
|
76
|
+
metadata.last_amended,
|
|
77
|
+
metadata.source_url,
|
|
78
|
+
metadata.jurisdiction,
|
|
79
|
+
metadata.regulation_type
|
|
80
|
+
);
|
|
81
|
+
|
|
82
|
+
// 2. Insert sections
|
|
83
|
+
const insertSection = db.prepare(`
|
|
84
|
+
INSERT OR REPLACE INTO sections
|
|
85
|
+
(regulation, section_number, title, text, part, subpart, chapter, parent_section, cross_references)
|
|
86
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
87
|
+
`);
|
|
88
|
+
|
|
89
|
+
for (const section of allSections) {
|
|
90
|
+
insertSection.run(
|
|
91
|
+
regulationId,
|
|
92
|
+
section.sectionNumber,
|
|
93
|
+
section.title || null,
|
|
94
|
+
section.text,
|
|
95
|
+
null, // part (deprecated, using chapter)
|
|
96
|
+
null, // subpart (deprecated, using chapter)
|
|
97
|
+
section.chapter || null,
|
|
98
|
+
section.parentSection || null,
|
|
99
|
+
section.crossReferences ? JSON.stringify(section.crossReferences) : null
|
|
100
|
+
);
|
|
101
|
+
sectionsAdded++;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// 3. Insert definitions
|
|
105
|
+
const insertDef = db.prepare(`
|
|
106
|
+
INSERT OR REPLACE INTO definitions (regulation, term, definition, section)
|
|
107
|
+
VALUES (?, ?, ?, ?)
|
|
108
|
+
`);
|
|
109
|
+
|
|
110
|
+
for (const def of definitions) {
|
|
111
|
+
insertDef.run(regulationId, def.term, def.definition, def.section);
|
|
112
|
+
definitionsAdded++;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// 4. Update source registry
|
|
116
|
+
db.prepare(`
|
|
117
|
+
INSERT OR REPLACE INTO source_registry
|
|
118
|
+
(regulation, source_type, source_url, last_fetched, sections_expected, sections_parsed, quality_status)
|
|
119
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
120
|
+
`).run(
|
|
121
|
+
regulationId,
|
|
122
|
+
'api',
|
|
123
|
+
metadata.source_url,
|
|
124
|
+
new Date().toISOString(),
|
|
125
|
+
sectionsAdded,
|
|
126
|
+
sectionsAdded,
|
|
127
|
+
sectionsAdded > 0 ? 'complete' : 'incomplete'
|
|
128
|
+
);
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
// Execute transaction
|
|
132
|
+
insertTransaction();
|
|
133
|
+
|
|
134
|
+
const duration = Date.now() - startTime;
|
|
135
|
+
|
|
136
|
+
console.log(`ā
${regulationId}: ${sectionsAdded} sections, ${definitionsAdded} definitions (${duration}ms)`);
|
|
137
|
+
|
|
138
|
+
return {
|
|
139
|
+
regulation: regulationId,
|
|
140
|
+
success: true,
|
|
141
|
+
sections_added: sectionsAdded,
|
|
142
|
+
definitions_added: definitionsAdded,
|
|
143
|
+
duration_ms: duration,
|
|
144
|
+
};
|
|
145
|
+
} catch (error) {
|
|
146
|
+
const duration = Date.now() - startTime;
|
|
147
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
148
|
+
|
|
149
|
+
console.error(`ā ${regulationId} failed: ${errorMessage}`);
|
|
150
|
+
|
|
151
|
+
return {
|
|
152
|
+
regulation: regulationId,
|
|
153
|
+
success: false,
|
|
154
|
+
sections_added: 0,
|
|
155
|
+
definitions_added: 0,
|
|
156
|
+
error: errorMessage,
|
|
157
|
+
duration_ms: duration,
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Main ingestion function
|
|
164
|
+
*/
|
|
165
|
+
async function ingestAll(): Promise<IngestResult[]> {
|
|
166
|
+
console.log('š Starting US Compliance MCP Ingestion...\n');
|
|
167
|
+
console.log(`Database: ${DB_PATH}`);
|
|
168
|
+
|
|
169
|
+
const db = Database(DB_PATH);
|
|
170
|
+
const results: IngestResult[] = [];
|
|
171
|
+
|
|
172
|
+
const adapters: Array<{ id: string; adapter: SourceAdapter }> = [
|
|
173
|
+
{ id: 'HIPAA', adapter: createHipaaAdapter() },
|
|
174
|
+
{ id: 'CCPA', adapter: createCcpaAdapter() },
|
|
175
|
+
{ id: 'SOX', adapter: createSoxAdapter() },
|
|
176
|
+
];
|
|
177
|
+
|
|
178
|
+
for (const { id, adapter } of adapters) {
|
|
179
|
+
const result = await ingestRegulation(db, id, adapter);
|
|
180
|
+
results.push(result);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
db.close();
|
|
184
|
+
|
|
185
|
+
console.log('\nš Ingestion Summary:');
|
|
186
|
+
console.table(
|
|
187
|
+
results.map(r => ({
|
|
188
|
+
Regulation: r.regulation,
|
|
189
|
+
Status: r.success ? 'ā
Success' : 'ā Failed',
|
|
190
|
+
Sections: r.sections_added,
|
|
191
|
+
Definitions: r.definitions_added,
|
|
192
|
+
'Duration (s)': (r.duration_ms / 1000).toFixed(2),
|
|
193
|
+
Error: r.error || '-',
|
|
194
|
+
}))
|
|
195
|
+
);
|
|
196
|
+
|
|
197
|
+
const totalSections = results.reduce((sum, r) => sum + r.sections_added, 0);
|
|
198
|
+
const successCount = results.filter(r => r.success).length;
|
|
199
|
+
|
|
200
|
+
console.log(`\n⨠Total: ${totalSections} sections from ${successCount}/${adapters.length} regulations`);
|
|
201
|
+
|
|
202
|
+
return results;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// Run if executed directly
|
|
206
|
+
ingestAll()
|
|
207
|
+
.then(results => {
|
|
208
|
+
const allSuccess = results.every(r => r.success);
|
|
209
|
+
process.exit(allSuccess ? 0 : 1);
|
|
210
|
+
})
|
|
211
|
+
.catch(error => {
|
|
212
|
+
console.error('Fatal error:', error);
|
|
213
|
+
process.exit(1);
|
|
214
|
+
});
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
#!/usr/bin/env npx tsx
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Load Seed Data
|
|
5
|
+
*
|
|
6
|
+
* Loads pre-generated control mappings and applicability rules into the database
|
|
7
|
+
* Run with: npx tsx scripts/load-seed-data.ts
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import Database from 'better-sqlite3';
|
|
11
|
+
import { readFileSync, readdirSync } from 'fs';
|
|
12
|
+
import { join, dirname } from 'path';
|
|
13
|
+
import { fileURLToPath } from 'url';
|
|
14
|
+
|
|
15
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
16
|
+
const __dirname = dirname(__filename);
|
|
17
|
+
|
|
18
|
+
const DB_PATH = join(__dirname, '..', 'data', 'regulations.db');
|
|
19
|
+
const MAPPINGS_DIR = join(__dirname, '..', 'data', 'seed', 'mappings');
|
|
20
|
+
const APPLICABILITY_DIR = join(__dirname, '..', 'data', 'seed', 'applicability');
|
|
21
|
+
|
|
22
|
+
interface Mapping {
|
|
23
|
+
section_number: string;
|
|
24
|
+
control_id: string;
|
|
25
|
+
control_name: string;
|
|
26
|
+
coverage: 'full' | 'partial' | 'related';
|
|
27
|
+
confidence: number;
|
|
28
|
+
rationale: string;
|
|
29
|
+
function?: string;
|
|
30
|
+
category?: string;
|
|
31
|
+
subcategory?: string;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
interface ApplicabilityRule {
|
|
35
|
+
regulation: string;
|
|
36
|
+
sector: string;
|
|
37
|
+
subsector: string | null;
|
|
38
|
+
confidence: 'definite' | 'likely' | 'possible';
|
|
39
|
+
rationale: string;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
console.log('š± Loading seed data into database...\n');
|
|
43
|
+
|
|
44
|
+
const db = Database(DB_PATH);
|
|
45
|
+
|
|
46
|
+
// Load control mappings
|
|
47
|
+
console.log('š Loading control mappings...');
|
|
48
|
+
let totalMappings = 0;
|
|
49
|
+
|
|
50
|
+
const mappingFiles = readdirSync(MAPPINGS_DIR).filter(f => f.endsWith('.json'));
|
|
51
|
+
|
|
52
|
+
for (const file of mappingFiles) {
|
|
53
|
+
const filePath = join(MAPPINGS_DIR, file);
|
|
54
|
+
const data = JSON.parse(readFileSync(filePath, 'utf-8'));
|
|
55
|
+
|
|
56
|
+
console.log(` ${file}: ${data.mappings.length} mappings`);
|
|
57
|
+
|
|
58
|
+
const insertMapping = db.prepare(`
|
|
59
|
+
INSERT OR REPLACE INTO control_mappings
|
|
60
|
+
(framework, control_id, control_name, regulation, sections, coverage, notes, confidence, generated_by)
|
|
61
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
62
|
+
`);
|
|
63
|
+
|
|
64
|
+
db.transaction(() => {
|
|
65
|
+
for (const mapping of data.mappings as Mapping[]) {
|
|
66
|
+
// For NIST CSF, construct control_id from function/category/subcategory
|
|
67
|
+
const controlId = mapping.control_id ||
|
|
68
|
+
(mapping.function && mapping.category ?
|
|
69
|
+
`${mapping.function}.${mapping.category}${mapping.subcategory ? `.${mapping.subcategory}` : ''}` :
|
|
70
|
+
'UNKNOWN');
|
|
71
|
+
|
|
72
|
+
const controlName = mapping.control_name ||
|
|
73
|
+
(mapping.category ? mapping.category : 'Unknown');
|
|
74
|
+
|
|
75
|
+
insertMapping.run(
|
|
76
|
+
data.framework,
|
|
77
|
+
controlId,
|
|
78
|
+
controlName,
|
|
79
|
+
data.regulation,
|
|
80
|
+
JSON.stringify([mapping.section_number]),
|
|
81
|
+
mapping.coverage,
|
|
82
|
+
mapping.rationale,
|
|
83
|
+
mapping.confidence,
|
|
84
|
+
data.generated_by
|
|
85
|
+
);
|
|
86
|
+
totalMappings++;
|
|
87
|
+
}
|
|
88
|
+
})();
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
console.log(`ā
Loaded ${totalMappings} control mappings\n`);
|
|
92
|
+
|
|
93
|
+
// Load applicability rules
|
|
94
|
+
console.log('šÆ Loading applicability rules...');
|
|
95
|
+
let totalRules = 0;
|
|
96
|
+
|
|
97
|
+
const applicabilityFiles = readdirSync(APPLICABILITY_DIR).filter(f => f.endsWith('.json'));
|
|
98
|
+
|
|
99
|
+
for (const file of applicabilityFiles) {
|
|
100
|
+
const filePath = join(APPLICABILITY_DIR, file);
|
|
101
|
+
const data = JSON.parse(readFileSync(filePath, 'utf-8'));
|
|
102
|
+
|
|
103
|
+
console.log(` ${file}: ${data.rules.length} rules`);
|
|
104
|
+
|
|
105
|
+
const insertRule = db.prepare(`
|
|
106
|
+
INSERT OR REPLACE INTO applicability_rules
|
|
107
|
+
(regulation, sector, subsector, applies, confidence, rationale)
|
|
108
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
109
|
+
`);
|
|
110
|
+
|
|
111
|
+
db.transaction(() => {
|
|
112
|
+
for (const rule of data.rules as ApplicabilityRule[]) {
|
|
113
|
+
insertRule.run(
|
|
114
|
+
rule.regulation,
|
|
115
|
+
rule.sector,
|
|
116
|
+
rule.subsector,
|
|
117
|
+
1, // applies = true
|
|
118
|
+
rule.confidence,
|
|
119
|
+
rule.rationale
|
|
120
|
+
);
|
|
121
|
+
totalRules++;
|
|
122
|
+
}
|
|
123
|
+
})();
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
console.log(`ā
Loaded ${totalRules} applicability rules\n`);
|
|
127
|
+
|
|
128
|
+
db.close();
|
|
129
|
+
|
|
130
|
+
console.log('š Seed data loading complete!');
|
|
131
|
+
console.log(`\nSummary:`);
|
|
132
|
+
console.log(` Control mappings: ${totalMappings}`);
|
|
133
|
+
console.log(` Applicability rules: ${totalRules}`);
|