@ansvar/us-regulations-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/LICENSE +190 -0
  2. package/README.md +275 -0
  3. package/data/.gitkeep +0 -0
  4. package/data/regulations.db +0 -0
  5. package/data/seed/applicability/rules.json +74 -0
  6. package/data/seed/mappings/ccpa-nist-csf.json +144 -0
  7. package/data/seed/mappings/hipaa-nist-800-53.json +377 -0
  8. package/dist/index.d.ts +3 -0
  9. package/dist/index.d.ts.map +1 -0
  10. package/dist/index.js +41 -0
  11. package/dist/index.js.map +1 -0
  12. package/dist/ingest/adapters/california-leginfo.d.ts +72 -0
  13. package/dist/ingest/adapters/california-leginfo.d.ts.map +1 -0
  14. package/dist/ingest/adapters/california-leginfo.js +270 -0
  15. package/dist/ingest/adapters/california-leginfo.js.map +1 -0
  16. package/dist/ingest/adapters/ecfr.d.ts +76 -0
  17. package/dist/ingest/adapters/ecfr.d.ts.map +1 -0
  18. package/dist/ingest/adapters/ecfr.js +355 -0
  19. package/dist/ingest/adapters/ecfr.js.map +1 -0
  20. package/dist/ingest/adapters/regulations-gov.d.ts +47 -0
  21. package/dist/ingest/adapters/regulations-gov.d.ts.map +1 -0
  22. package/dist/ingest/adapters/regulations-gov.js +91 -0
  23. package/dist/ingest/adapters/regulations-gov.js.map +1 -0
  24. package/dist/ingest/framework.d.ts +84 -0
  25. package/dist/ingest/framework.d.ts.map +1 -0
  26. package/dist/ingest/framework.js +8 -0
  27. package/dist/ingest/framework.js.map +1 -0
  28. package/dist/tools/action-items.d.ts +23 -0
  29. package/dist/tools/action-items.d.ts.map +1 -0
  30. package/dist/tools/action-items.js +118 -0
  31. package/dist/tools/action-items.js.map +1 -0
  32. package/dist/tools/applicability.d.ts +26 -0
  33. package/dist/tools/applicability.d.ts.map +1 -0
  34. package/dist/tools/applicability.js +49 -0
  35. package/dist/tools/applicability.js.map +1 -0
  36. package/dist/tools/compare.d.ts +20 -0
  37. package/dist/tools/compare.d.ts.map +1 -0
  38. package/dist/tools/compare.js +35 -0
  39. package/dist/tools/compare.js.map +1 -0
  40. package/dist/tools/definitions.d.ts +22 -0
  41. package/dist/tools/definitions.d.ts.map +1 -0
  42. package/dist/tools/definitions.js +43 -0
  43. package/dist/tools/definitions.js.map +1 -0
  44. package/dist/tools/evidence.d.ts +23 -0
  45. package/dist/tools/evidence.d.ts.map +1 -0
  46. package/dist/tools/evidence.js +27 -0
  47. package/dist/tools/evidence.js.map +1 -0
  48. package/dist/tools/list.d.ts +25 -0
  49. package/dist/tools/list.d.ts.map +1 -0
  50. package/dist/tools/list.js +66 -0
  51. package/dist/tools/list.js.map +1 -0
  52. package/dist/tools/map.d.ts +26 -0
  53. package/dist/tools/map.d.ts.map +1 -0
  54. package/dist/tools/map.js +58 -0
  55. package/dist/tools/map.js.map +1 -0
  56. package/dist/tools/registry.d.ts +19 -0
  57. package/dist/tools/registry.d.ts.map +1 -0
  58. package/dist/tools/registry.js +260 -0
  59. package/dist/tools/registry.js.map +1 -0
  60. package/dist/tools/search.d.ts +15 -0
  61. package/dist/tools/search.d.ts.map +1 -0
  62. package/dist/tools/search.js +94 -0
  63. package/dist/tools/search.js.map +1 -0
  64. package/dist/tools/section.d.ts +19 -0
  65. package/dist/tools/section.d.ts.map +1 -0
  66. package/dist/tools/section.js +50 -0
  67. package/dist/tools/section.js.map +1 -0
  68. package/package.json +76 -0
  69. package/scripts/build-db.ts +268 -0
  70. package/scripts/ingest.ts +214 -0
  71. package/scripts/load-seed-data.ts +133 -0
  72. package/scripts/quality-test.ts +346 -0
  73. package/scripts/test-mcp-tools.ts +187 -0
  74. package/scripts/test-remaining-tools.ts +107 -0
  75. package/src/index.ts +55 -0
  76. package/src/ingest/adapters/california-leginfo.ts +322 -0
  77. package/src/ingest/adapters/ecfr.ts +403 -0
  78. package/src/ingest/adapters/regulations-gov.ts +112 -0
  79. package/src/ingest/framework.ts +92 -0
  80. package/src/tools/action-items.ts +164 -0
  81. package/src/tools/applicability.ts +91 -0
  82. package/src/tools/compare.ts +61 -0
  83. package/src/tools/definitions.ts +79 -0
  84. package/src/tools/evidence.ts +53 -0
  85. package/src/tools/list.ts +120 -0
  86. package/src/tools/map.ts +100 -0
  87. package/src/tools/registry.ts +275 -0
  88. package/src/tools/search.ts +132 -0
  89. package/src/tools/section.ts +85 -0
@@ -0,0 +1,275 @@
1
+ import { Server } from '@modelcontextprotocol/sdk/server/index.js';
2
+ import {
3
+ CallToolRequestSchema,
4
+ ListToolsRequestSchema,
5
+ } from '@modelcontextprotocol/sdk/types.js';
6
+ import Database from 'better-sqlite3';
7
+ import { searchRegulations, SearchInput } from './search.js';
8
+ import { getSection, GetSectionInput } from './section.js';
9
+ import { listRegulations, ListInput } from './list.js';
10
+ import { compareRequirements, CompareInput } from './compare.js';
11
+ import { mapControls, MapControlsInput } from './map.js';
12
+ import { checkApplicability, ApplicabilityInput } from './applicability.js';
13
+ import { getDefinitions, DefinitionsInput } from './definitions.js';
14
+ import { getEvidenceRequirements, EvidenceInput } from './evidence.js';
15
+ import { getComplianceActionItems, ActionItemsInput } from './action-items.js';
16
+
17
+ export interface ToolDefinition {
18
+ name: string;
19
+ description: string;
20
+ inputSchema: any;
21
+ handler: (db: Database.Database, args: any) => Promise<any>;
22
+ }
23
+
24
+ /**
25
+ * Centralized registry of all MCP tools.
26
+ * Single source of truth for both stdio and HTTP servers.
27
+ */
28
+ export const TOOLS: ToolDefinition[] = [
29
+ {
30
+ name: 'search_regulations',
31
+ description: 'Search across all US regulations using full-text search. Returns relevant sections with highlighted snippets. Token-efficient: returns 32-token snippets with >>> <<< markers around matched terms.',
32
+ inputSchema: {
33
+ type: 'object',
34
+ properties: {
35
+ query: {
36
+ type: 'string',
37
+ description: 'Search query (supports natural language and technical terms)',
38
+ },
39
+ regulations: {
40
+ type: 'array',
41
+ items: { type: 'string' },
42
+ description: 'Optional: Filter results to specific regulations (e.g., ["HIPAA", "CCPA"])',
43
+ },
44
+ limit: {
45
+ type: 'number',
46
+ description: 'Maximum number of results to return (default: 10, max: 1000)',
47
+ default: 10,
48
+ },
49
+ },
50
+ required: ['query'],
51
+ },
52
+ handler: async (db: Database.Database, args: any) => {
53
+ return await searchRegulations(db, args as SearchInput);
54
+ },
55
+ },
56
+ {
57
+ name: 'get_section',
58
+ description: 'Retrieve the full text of a specific regulation section. Returns section content, metadata, and cross-references. Large sections are automatically truncated with a warning.',
59
+ inputSchema: {
60
+ type: 'object',
61
+ properties: {
62
+ regulation: {
63
+ type: 'string',
64
+ description: 'Regulation ID (e.g., "HIPAA", "CCPA")',
65
+ },
66
+ section: {
67
+ type: 'string',
68
+ description: 'Section number (e.g., "164.502", "1798.100")',
69
+ },
70
+ },
71
+ required: ['regulation', 'section'],
72
+ },
73
+ handler: async (db: Database.Database, args: any) => {
74
+ return await getSection(db, args as GetSectionInput);
75
+ },
76
+ },
77
+ {
78
+ name: 'list_regulations',
79
+ description: 'List all available regulations or get the structure of a specific regulation. Without parameters, returns all regulations with metadata. With a regulation ID, returns chapters and sections organized hierarchically.',
80
+ inputSchema: {
81
+ type: 'object',
82
+ properties: {
83
+ regulation: {
84
+ type: 'string',
85
+ description: 'Optional: Regulation ID to get detailed structure for (e.g., "HIPAA")',
86
+ },
87
+ },
88
+ },
89
+ handler: async (db: Database.Database, args: any) => {
90
+ return await listRegulations(db, args as ListInput);
91
+ },
92
+ },
93
+ {
94
+ name: 'compare_requirements',
95
+ description: 'Compare requirements across multiple regulations for a specific topic. Searches each regulation and returns the top matching sections with relevance scores.',
96
+ inputSchema: {
97
+ type: 'object',
98
+ properties: {
99
+ topic: {
100
+ type: 'string',
101
+ description: 'Topic to compare (e.g., "breach notification", "access controls")',
102
+ },
103
+ regulations: {
104
+ type: 'array',
105
+ items: { type: 'string' },
106
+ description: 'List of regulations to compare (e.g., ["HIPAA", "CCPA"])',
107
+ },
108
+ },
109
+ required: ['topic', 'regulations'],
110
+ },
111
+ handler: async (db: Database.Database, args: any) => {
112
+ return await compareRequirements(db, args as CompareInput);
113
+ },
114
+ },
115
+ {
116
+ name: 'map_controls',
117
+ description: 'Map NIST controls (800-53, CSF) to regulation sections. Shows which regulatory requirements satisfy specific control objectives. Can filter by control ID or regulation.',
118
+ inputSchema: {
119
+ type: 'object',
120
+ properties: {
121
+ framework: {
122
+ type: 'string',
123
+ description: 'Control framework (e.g., "NIST_CSF", "NIST_800_53", "ISO27001")',
124
+ },
125
+ control: {
126
+ type: 'string',
127
+ description: 'Optional: Specific control ID (e.g., "AC-1", "PR.AC-1")',
128
+ },
129
+ regulation: {
130
+ type: 'string',
131
+ description: 'Optional: Filter to specific regulation (e.g., "HIPAA")',
132
+ },
133
+ },
134
+ required: ['framework'],
135
+ },
136
+ handler: async (db: Database.Database, args: any) => {
137
+ return await mapControls(db, args as MapControlsInput);
138
+ },
139
+ },
140
+ {
141
+ name: 'check_applicability',
142
+ description: 'Determine which regulations apply to a specific sector or subsector. Returns applicable regulations with confidence levels (definite, likely, possible).',
143
+ inputSchema: {
144
+ type: 'object',
145
+ properties: {
146
+ sector: {
147
+ type: 'string',
148
+ description: 'Industry sector (e.g., "healthcare", "financial", "retail", "technology")',
149
+ },
150
+ subsector: {
151
+ type: 'string',
152
+ description: 'Optional: Specific subsector (e.g., "hospital", "bank", "e-commerce")',
153
+ },
154
+ },
155
+ required: ['sector'],
156
+ },
157
+ handler: async (db: Database.Database, args: any) => {
158
+ return await checkApplicability(db, args as ApplicabilityInput);
159
+ },
160
+ },
161
+ {
162
+ name: 'get_definitions',
163
+ description: 'Look up official term definitions across regulations. Uses partial matching to find terms (e.g., "health" matches "protected health information").',
164
+ inputSchema: {
165
+ type: 'object',
166
+ properties: {
167
+ term: {
168
+ type: 'string',
169
+ description: 'Term to look up (e.g., "protected health information", "personal data")',
170
+ },
171
+ regulation: {
172
+ type: 'string',
173
+ description: 'Optional: Filter to specific regulation (e.g., "HIPAA")',
174
+ },
175
+ },
176
+ required: ['term'],
177
+ },
178
+ handler: async (db: Database.Database, args: any) => {
179
+ return await getDefinitions(db, args as DefinitionsInput);
180
+ },
181
+ },
182
+ {
183
+ name: 'get_evidence_requirements',
184
+ description: 'Get compliance evidence requirements for a specific section (e.g., audit logs, policies, procedures). MVP: Returns placeholder until evidence data is seeded.',
185
+ inputSchema: {
186
+ type: 'object',
187
+ properties: {
188
+ regulation: {
189
+ type: 'string',
190
+ description: 'Regulation ID (e.g., "HIPAA")',
191
+ },
192
+ section: {
193
+ type: 'string',
194
+ description: 'Section number (e.g., "164.312(b)")',
195
+ },
196
+ },
197
+ required: ['regulation', 'section'],
198
+ },
199
+ handler: async (db: Database.Database, args: any) => {
200
+ return await getEvidenceRequirements(db, args as EvidenceInput);
201
+ },
202
+ },
203
+ {
204
+ name: 'get_compliance_action_items',
205
+ description: 'Generate structured compliance action items from regulation sections. Extracts priority (high/medium/low) based on regulatory language and identifies evidence needed.',
206
+ inputSchema: {
207
+ type: 'object',
208
+ properties: {
209
+ regulation: {
210
+ type: 'string',
211
+ description: 'Regulation ID (e.g., "HIPAA", "CCPA")',
212
+ },
213
+ sections: {
214
+ type: 'array',
215
+ items: { type: 'string' },
216
+ description: 'Section numbers to generate action items for (e.g., ["164.308(a)(1)(ii)(A)", "164.312(b)"])',
217
+ },
218
+ },
219
+ required: ['regulation', 'sections'],
220
+ },
221
+ handler: async (db: Database.Database, args: any) => {
222
+ return await getComplianceActionItems(db, args as ActionItemsInput);
223
+ },
224
+ },
225
+ ];
226
+
227
+ /**
228
+ * Register all tools with an MCP server instance.
229
+ * Use this for both stdio and HTTP servers to ensure parity.
230
+ */
231
+ export function registerTools(server: Server, db: Database.Database): void {
232
+ // List available tools
233
+ server.setRequestHandler(ListToolsRequestSchema, async () => ({
234
+ tools: TOOLS.map(tool => ({
235
+ name: tool.name,
236
+ description: tool.description,
237
+ inputSchema: tool.inputSchema,
238
+ })),
239
+ }));
240
+
241
+ // Handle tool calls
242
+ server.setRequestHandler(CallToolRequestSchema, async (request) => {
243
+ const { name, arguments: args } = request.params;
244
+ const tool = TOOLS.find(t => t.name === name);
245
+
246
+ if (!tool) {
247
+ return {
248
+ content: [{ type: 'text', text: `Unknown tool: ${name}` }],
249
+ isError: true,
250
+ };
251
+ }
252
+
253
+ try {
254
+ const result = await tool.handler(db, args || {});
255
+ return {
256
+ content: [
257
+ {
258
+ type: 'text',
259
+ text: typeof result === 'string' ? result : JSON.stringify(result, null, 2),
260
+ },
261
+ ],
262
+ };
263
+ } catch (error) {
264
+ return {
265
+ content: [
266
+ {
267
+ type: 'text',
268
+ text: `Error: ${error instanceof Error ? error.message : 'Unknown error'}`,
269
+ },
270
+ ],
271
+ isError: true,
272
+ };
273
+ }
274
+ });
275
+ }
@@ -0,0 +1,132 @@
1
+ import type { Database } from 'better-sqlite3';
2
+
3
+ export interface SearchInput {
4
+ query: string;
5
+ regulations?: string[];
6
+ limit?: number;
7
+ }
8
+
9
+ export interface SearchResult {
10
+ regulation: string;
11
+ section: string;
12
+ title: string;
13
+ snippet: string;
14
+ relevance: number;
15
+ }
16
+
17
+ /**
18
+ * Escape special FTS5 query characters and build optimal search query.
19
+ * Uses adaptive logic:
20
+ * - Short queries (1-3 words): AND logic with exact matching for precision
21
+ * - Long queries (4+ words): OR logic with prefix matching for recall
22
+ * This prevents empty results on complex queries while maintaining precision on simple ones.
23
+ *
24
+ * Handles hyphenated terms by converting them to spaces (e.g., "third-party" → "third party")
25
+ * to avoid FTS5 syntax errors where hyphens are interpreted as operators.
26
+ */
27
+ function escapeFts5Query(query: string): string {
28
+ // Common stopwords that add noise to searches
29
+ const stopwords = new Set(['a', 'an', 'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by']);
30
+
31
+ // Normalize query: remove quotes, convert hyphens to spaces
32
+ // This allows "third-party" to become "third party" which FTS5 handles naturally
33
+ const words = query
34
+ .replace(/['"]/g, '') // Remove quotes
35
+ .replace(/-/g, ' ') // Convert hyphens to spaces (fixes "third-party" → "third party")
36
+ .split(/\s+/)
37
+ .filter(word => word.length > 2 && !stopwords.has(word.toLowerCase())); // Filter short words and stopwords
38
+
39
+ if (words.length === 0) {
40
+ return '';
41
+ }
42
+
43
+ if (words.length <= 2) {
44
+ // Short queries (1-2 words): Use AND logic with prefix matching for precision
45
+ // Example: "incident reporting" → incident* reporting*
46
+ // Prefix matching handles word variations (encrypt vs encryption)
47
+ return words.map(word => `${word}*`).join(' ');
48
+ } else {
49
+ // Long queries (3+ words): Use OR logic with prefix matching for better recall
50
+ // Example: "encryption transmission storage" → encryption* OR transmission* OR storage*
51
+ // BM25 will still rank documents with more matches higher
52
+ return words.map(word => `${word}*`).join(' OR ');
53
+ }
54
+ }
55
+
56
+ export async function searchRegulations(
57
+ db: Database,
58
+ input: SearchInput
59
+ ): Promise<SearchResult[]> {
60
+ let { query, regulations, limit = 10 } = input;
61
+
62
+ // Validate and sanitize limit parameter
63
+ if (!Number.isFinite(limit) || limit < 0) {
64
+ limit = 10; // Default to safe value
65
+ }
66
+ // Cap at reasonable maximum
67
+ limit = Math.min(Math.floor(limit), 1000);
68
+
69
+ if (!query || query.trim().length === 0) {
70
+ throw new Error('Query cannot be empty. Please provide a search term.');
71
+ }
72
+
73
+ const escapedQuery = escapeFts5Query(query);
74
+
75
+ if (!escapedQuery) {
76
+ return [];
77
+ }
78
+
79
+ const params: (string | number)[] = [escapedQuery];
80
+
81
+ // Build optional regulation filter
82
+ let regulationFilter = '';
83
+ if (regulations && regulations.length > 0) {
84
+ const placeholders = regulations.map(() => '?').join(', ');
85
+ regulationFilter = ` AND regulation IN (${placeholders})`;
86
+ params.push(...regulations);
87
+ }
88
+
89
+ // Search in sections
90
+ const sectionsQuery = `
91
+ SELECT
92
+ sections_fts.regulation,
93
+ sections_fts.section_number as section,
94
+ sections_fts.title,
95
+ snippet(sections_fts, 3, '>>>', '<<<', '...', 32) as snippet,
96
+ bm25(sections_fts) as relevance
97
+ FROM sections_fts
98
+ WHERE sections_fts MATCH ?
99
+ ${regulationFilter}
100
+ ORDER BY bm25(sections_fts)
101
+ LIMIT ?
102
+ `;
103
+
104
+ try {
105
+ // Execute query
106
+ const sectionsParams = [...params, limit];
107
+
108
+ const sectionStmt = db.prepare(sectionsQuery);
109
+
110
+ const sectionRows = sectionStmt.all(...sectionsParams) as Array<{
111
+ regulation: string;
112
+ section: string;
113
+ title: string;
114
+ snippet: string;
115
+ relevance: number;
116
+ }>;
117
+
118
+ // BM25 returns negative scores; convert to positive for clarity
119
+ const results = sectionRows.map(row => ({
120
+ ...row,
121
+ relevance: Math.abs(row.relevance),
122
+ }));
123
+
124
+ return results;
125
+ } catch (error) {
126
+ // If FTS5 query fails (e.g., syntax error), return empty results
127
+ if (error instanceof Error && error.message.includes('fts5')) {
128
+ return [];
129
+ }
130
+ throw error;
131
+ }
132
+ }
@@ -0,0 +1,85 @@
1
+ import type { Database } from 'better-sqlite3';
2
+
3
+ export interface GetSectionInput {
4
+ regulation: string;
5
+ section: string;
6
+ }
7
+
8
+ export interface SectionData {
9
+ regulation: string;
10
+ section_number: string;
11
+ title: string | null;
12
+ text: string;
13
+ chapter: string | null;
14
+ parent_section: string | null;
15
+ cross_references: string[] | null;
16
+ truncated?: boolean;
17
+ original_length?: number;
18
+ token_estimate?: number;
19
+ }
20
+
21
+ export async function getSection(
22
+ db: Database,
23
+ input: GetSectionInput
24
+ ): Promise<SectionData | null> {
25
+ const { regulation, section } = input;
26
+
27
+ const sql = `
28
+ SELECT
29
+ regulation,
30
+ section_number,
31
+ title,
32
+ text,
33
+ chapter,
34
+ parent_section,
35
+ cross_references
36
+ FROM sections
37
+ WHERE regulation = ? AND section_number = ?
38
+ `;
39
+
40
+ const row = db.prepare(sql).get(regulation, section) as {
41
+ regulation: string;
42
+ section_number: string;
43
+ title: string | null;
44
+ text: string;
45
+ chapter: string | null;
46
+ parent_section: string | null;
47
+ cross_references: string | null;
48
+ } | undefined;
49
+
50
+ if (!row) {
51
+ return null;
52
+ }
53
+
54
+ // Token management: Truncate very large sections to prevent context overflow
55
+ const MAX_CHARS = 50000; // ~12,500 tokens (safe for 200k context window)
56
+ const originalLength = row.text.length;
57
+ const tokenEstimate = Math.ceil(originalLength / 4); // ~4 chars per token
58
+ let text = row.text;
59
+ let truncated = false;
60
+
61
+ if (originalLength > MAX_CHARS) {
62
+ text = row.text.substring(0, MAX_CHARS) + '\n\n[... Section truncated due to length. Original: ' + originalLength + ' chars (~' + tokenEstimate + ' tokens). Use search_regulations to find specific content.]';
63
+ truncated = true;
64
+ }
65
+
66
+ return {
67
+ regulation: row.regulation,
68
+ section_number: row.section_number,
69
+ title: row.title,
70
+ text,
71
+ chapter: row.chapter,
72
+ parent_section: row.parent_section,
73
+ cross_references: row.cross_references ? (() => {
74
+ try {
75
+ return JSON.parse(row.cross_references);
76
+ } catch {
77
+ console.warn(`Invalid cross_references JSON for ${row.regulation} ${row.section_number}`);
78
+ return null;
79
+ }
80
+ })() : null,
81
+ truncated,
82
+ original_length: truncated ? originalLength : undefined,
83
+ token_estimate: truncated ? tokenEstimate : undefined,
84
+ };
85
+ }