driftdetect-core 0.4.1 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/boundaries/boundary-scanner.d.ts +76 -0
- package/dist/boundaries/boundary-scanner.d.ts.map +1 -0
- package/dist/boundaries/boundary-scanner.js +801 -0
- package/dist/boundaries/boundary-scanner.js.map +1 -0
- package/dist/boundaries/data-access-learner.d.ts +126 -0
- package/dist/boundaries/data-access-learner.d.ts.map +1 -0
- package/dist/boundaries/data-access-learner.js +486 -0
- package/dist/boundaries/data-access-learner.js.map +1 -0
- package/dist/boundaries/index.d.ts +6 -0
- package/dist/boundaries/index.d.ts.map +1 -1
- package/dist/boundaries/index.js +6 -0
- package/dist/boundaries/index.js.map +1 -1
- package/dist/boundaries/security-prioritizer.d.ts +118 -0
- package/dist/boundaries/security-prioritizer.d.ts.map +1 -0
- package/dist/boundaries/security-prioritizer.js +316 -0
- package/dist/boundaries/security-prioritizer.js.map +1 -0
- package/dist/call-graph/analysis/coverage-analyzer.d.ts +201 -0
- package/dist/call-graph/analysis/coverage-analyzer.d.ts.map +1 -0
- package/dist/call-graph/analysis/coverage-analyzer.js +553 -0
- package/dist/call-graph/analysis/coverage-analyzer.js.map +1 -0
- package/dist/call-graph/analysis/dead-code-detector.d.ts +145 -0
- package/dist/call-graph/analysis/dead-code-detector.d.ts.map +1 -0
- package/dist/call-graph/analysis/dead-code-detector.js +391 -0
- package/dist/call-graph/analysis/dead-code-detector.js.map +1 -0
- package/dist/call-graph/analysis/graph-builder.d.ts +142 -0
- package/dist/call-graph/analysis/graph-builder.d.ts.map +1 -0
- package/dist/call-graph/analysis/graph-builder.js +624 -0
- package/dist/call-graph/analysis/graph-builder.js.map +1 -0
- package/dist/call-graph/analysis/impact-analyzer.d.ts +150 -0
- package/dist/call-graph/analysis/impact-analyzer.d.ts.map +1 -0
- package/dist/call-graph/analysis/impact-analyzer.js +329 -0
- package/dist/call-graph/analysis/impact-analyzer.js.map +1 -0
- package/dist/call-graph/analysis/index.d.ts +11 -0
- package/dist/call-graph/analysis/index.d.ts.map +1 -0
- package/dist/call-graph/analysis/index.js +9 -0
- package/dist/call-graph/analysis/index.js.map +1 -0
- package/dist/call-graph/analysis/path-finder.d.ts +117 -0
- package/dist/call-graph/analysis/path-finder.d.ts.map +1 -0
- package/dist/call-graph/analysis/path-finder.js +360 -0
- package/dist/call-graph/analysis/path-finder.js.map +1 -0
- package/dist/call-graph/analysis/reachability.d.ts +56 -0
- package/dist/call-graph/analysis/reachability.d.ts.map +1 -0
- package/dist/call-graph/analysis/reachability.js +357 -0
- package/dist/call-graph/analysis/reachability.js.map +1 -0
- package/dist/call-graph/demo.d.ts +11 -0
- package/dist/call-graph/demo.d.ts.map +1 -0
- package/dist/call-graph/demo.js +339 -0
- package/dist/call-graph/demo.js.map +1 -0
- package/dist/call-graph/enrichment/enrichment-engine.d.ts +126 -0
- package/dist/call-graph/enrichment/enrichment-engine.d.ts.map +1 -0
- package/dist/call-graph/enrichment/enrichment-engine.js +760 -0
- package/dist/call-graph/enrichment/enrichment-engine.js.map +1 -0
- package/dist/call-graph/enrichment/impact-scorer.d.ts +59 -0
- package/dist/call-graph/enrichment/impact-scorer.d.ts.map +1 -0
- package/dist/call-graph/enrichment/impact-scorer.js +328 -0
- package/dist/call-graph/enrichment/impact-scorer.js.map +1 -0
- package/dist/call-graph/enrichment/index.d.ts +12 -0
- package/dist/call-graph/enrichment/index.d.ts.map +1 -0
- package/dist/call-graph/enrichment/index.js +15 -0
- package/dist/call-graph/enrichment/index.js.map +1 -0
- package/dist/call-graph/enrichment/remediation-generator.d.ts +41 -0
- package/dist/call-graph/enrichment/remediation-generator.d.ts.map +1 -0
- package/dist/call-graph/enrichment/remediation-generator.js +609 -0
- package/dist/call-graph/enrichment/remediation-generator.js.map +1 -0
- package/dist/call-graph/enrichment/sensitivity-classifier.d.ts +71 -0
- package/dist/call-graph/enrichment/sensitivity-classifier.d.ts.map +1 -0
- package/dist/call-graph/enrichment/sensitivity-classifier.js +454 -0
- package/dist/call-graph/enrichment/sensitivity-classifier.js.map +1 -0
- package/dist/call-graph/enrichment/types.d.ts +402 -0
- package/dist/call-graph/enrichment/types.d.ts.map +1 -0
- package/dist/call-graph/enrichment/types.js +9 -0
- package/dist/call-graph/enrichment/types.js.map +1 -0
- package/dist/call-graph/extractors/base-extractor.d.ts +112 -0
- package/dist/call-graph/extractors/base-extractor.d.ts.map +1 -0
- package/dist/call-graph/extractors/base-extractor.js +140 -0
- package/dist/call-graph/extractors/base-extractor.js.map +1 -0
- package/dist/call-graph/extractors/csharp-data-access-extractor.d.ts +76 -0
- package/dist/call-graph/extractors/csharp-data-access-extractor.d.ts.map +1 -0
- package/dist/call-graph/extractors/csharp-data-access-extractor.js +387 -0
- package/dist/call-graph/extractors/csharp-data-access-extractor.js.map +1 -0
- package/dist/call-graph/extractors/csharp-extractor.d.ts +87 -0
- package/dist/call-graph/extractors/csharp-extractor.d.ts.map +1 -0
- package/dist/call-graph/extractors/csharp-extractor.js +470 -0
- package/dist/call-graph/extractors/csharp-extractor.js.map +1 -0
- package/dist/call-graph/extractors/data-access-extractor.d.ts +76 -0
- package/dist/call-graph/extractors/data-access-extractor.d.ts.map +1 -0
- package/dist/call-graph/extractors/data-access-extractor.js +234 -0
- package/dist/call-graph/extractors/data-access-extractor.js.map +1 -0
- package/dist/call-graph/extractors/index.d.ts +26 -0
- package/dist/call-graph/extractors/index.d.ts.map +1 -0
- package/dist/call-graph/extractors/index.js +36 -0
- package/dist/call-graph/extractors/index.js.map +1 -0
- package/dist/call-graph/extractors/java-data-access-extractor.d.ts +101 -0
- package/dist/call-graph/extractors/java-data-access-extractor.d.ts.map +1 -0
- package/dist/call-graph/extractors/java-data-access-extractor.js +611 -0
- package/dist/call-graph/extractors/java-data-access-extractor.js.map +1 -0
- package/dist/call-graph/extractors/java-extractor.d.ts +87 -0
- package/dist/call-graph/extractors/java-extractor.d.ts.map +1 -0
- package/dist/call-graph/extractors/java-extractor.js +510 -0
- package/dist/call-graph/extractors/java-extractor.js.map +1 -0
- package/dist/call-graph/extractors/php-data-access-extractor.d.ts +93 -0
- package/dist/call-graph/extractors/php-data-access-extractor.d.ts.map +1 -0
- package/dist/call-graph/extractors/php-data-access-extractor.js +589 -0
- package/dist/call-graph/extractors/php-data-access-extractor.js.map +1 -0
- package/dist/call-graph/extractors/php-extractor.d.ts +104 -0
- package/dist/call-graph/extractors/php-extractor.d.ts.map +1 -0
- package/dist/call-graph/extractors/php-extractor.js +619 -0
- package/dist/call-graph/extractors/php-extractor.js.map +1 -0
- package/dist/call-graph/extractors/python-data-access-extractor.d.ts +90 -0
- package/dist/call-graph/extractors/python-data-access-extractor.d.ts.map +1 -0
- package/dist/call-graph/extractors/python-data-access-extractor.js +537 -0
- package/dist/call-graph/extractors/python-data-access-extractor.js.map +1 -0
- package/dist/call-graph/extractors/python-extractor.d.ts +98 -0
- package/dist/call-graph/extractors/python-extractor.d.ts.map +1 -0
- package/dist/call-graph/extractors/python-extractor.js +681 -0
- package/dist/call-graph/extractors/python-extractor.js.map +1 -0
- package/dist/call-graph/extractors/semantic-data-access-scanner.d.ts +91 -0
- package/dist/call-graph/extractors/semantic-data-access-scanner.d.ts.map +1 -0
- package/dist/call-graph/extractors/semantic-data-access-scanner.js +498 -0
- package/dist/call-graph/extractors/semantic-data-access-scanner.js.map +1 -0
- package/dist/call-graph/extractors/typescript-data-access-extractor.d.ts +122 -0
- package/dist/call-graph/extractors/typescript-data-access-extractor.d.ts.map +1 -0
- package/dist/call-graph/extractors/typescript-data-access-extractor.js +788 -0
- package/dist/call-graph/extractors/typescript-data-access-extractor.js.map +1 -0
- package/dist/call-graph/extractors/typescript-extractor.d.ts +145 -0
- package/dist/call-graph/extractors/typescript-extractor.d.ts.map +1 -0
- package/dist/call-graph/extractors/typescript-extractor.js +904 -0
- package/dist/call-graph/extractors/typescript-extractor.js.map +1 -0
- package/dist/call-graph/index.d.ts +127 -0
- package/dist/call-graph/index.d.ts.map +1 -0
- package/dist/call-graph/index.js +247 -0
- package/dist/call-graph/index.js.map +1 -0
- package/dist/call-graph/store/call-graph-store.d.ts +70 -0
- package/dist/call-graph/store/call-graph-store.d.ts.map +1 -0
- package/dist/call-graph/store/call-graph-store.js +210 -0
- package/dist/call-graph/store/call-graph-store.js.map +1 -0
- package/dist/call-graph/store/index.d.ts +7 -0
- package/dist/call-graph/store/index.d.ts.map +1 -0
- package/dist/call-graph/store/index.js +7 -0
- package/dist/call-graph/store/index.js.map +1 -0
- package/dist/call-graph/types.d.ts +376 -0
- package/dist/call-graph/types.d.ts.map +1 -0
- package/dist/call-graph/types.js +8 -0
- package/dist/call-graph/types.js.map +1 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +12 -0
- package/dist/index.js.map +1 -1
- package/dist/lake/callgraph-shard-store.d.ts +168 -0
- package/dist/lake/callgraph-shard-store.d.ts.map +1 -0
- package/dist/lake/callgraph-shard-store.js +466 -0
- package/dist/lake/callgraph-shard-store.js.map +1 -0
- package/dist/lake/examples-store.d.ts +127 -0
- package/dist/lake/examples-store.d.ts.map +1 -0
- package/dist/lake/examples-store.js +389 -0
- package/dist/lake/examples-store.js.map +1 -0
- package/dist/lake/index-store.d.ts +82 -0
- package/dist/lake/index-store.d.ts.map +1 -0
- package/dist/lake/index-store.js +359 -0
- package/dist/lake/index-store.js.map +1 -0
- package/dist/lake/index.d.ts +93 -0
- package/dist/lake/index.d.ts.map +1 -0
- package/dist/lake/index.js +138 -0
- package/dist/lake/index.js.map +1 -0
- package/dist/lake/lake.bak/index-store.d.ts +82 -0
- package/dist/lake/lake.bak/index-store.d.ts.map +1 -0
- package/dist/lake/lake.bak/index-store.js +357 -0
- package/dist/lake/lake.bak/index-store.js.map +1 -0
- package/dist/lake/lake.bak/index.d.ts +81 -0
- package/dist/lake/lake.bak/index.d.ts.map +1 -0
- package/dist/lake/lake.bak/index.js +114 -0
- package/dist/lake/lake.bak/index.js.map +1 -0
- package/dist/lake/lake.bak/manifest-store.d.ts +51 -0
- package/dist/lake/lake.bak/manifest-store.d.ts.map +1 -0
- package/dist/lake/lake.bak/manifest-store.js +347 -0
- package/dist/lake/lake.bak/manifest-store.js.map +1 -0
- package/dist/lake/lake.bak/query-engine.d.ts +112 -0
- package/dist/lake/lake.bak/query-engine.d.ts.map +1 -0
- package/dist/lake/lake.bak/query-engine.js +370 -0
- package/dist/lake/lake.bak/query-engine.js.map +1 -0
- package/dist/lake/lake.bak/types.d.ts +428 -0
- package/dist/lake/lake.bak/types.d.ts.map +1 -0
- package/dist/lake/lake.bak/types.js +46 -0
- package/dist/lake/lake.bak/types.js.map +1 -0
- package/dist/lake/lake.bak/view-materializer.d.ts +70 -0
- package/dist/lake/lake.bak/view-materializer.d.ts.map +1 -0
- package/dist/lake/lake.bak/view-materializer.js +314 -0
- package/dist/lake/lake.bak/view-materializer.js.map +1 -0
- package/dist/lake/lake.bak/view-store.d.ts +57 -0
- package/dist/lake/lake.bak/view-store.d.ts.map +1 -0
- package/dist/lake/lake.bak/view-store.js +348 -0
- package/dist/lake/lake.bak/view-store.js.map +1 -0
- package/dist/lake/manifest-store.d.ts +51 -0
- package/dist/lake/manifest-store.d.ts.map +1 -0
- package/dist/lake/manifest-store.js +348 -0
- package/dist/lake/manifest-store.js.map +1 -0
- package/dist/lake/pattern-shard-store.d.ts +87 -0
- package/dist/lake/pattern-shard-store.d.ts.map +1 -0
- package/dist/lake/pattern-shard-store.js +347 -0
- package/dist/lake/pattern-shard-store.js.map +1 -0
- package/dist/lake/query-engine.d.ts +124 -0
- package/dist/lake/query-engine.d.ts.map +1 -0
- package/dist/lake/query-engine.js +453 -0
- package/dist/lake/query-engine.js.map +1 -0
- package/dist/lake/security-shard-store.d.ts +156 -0
- package/dist/lake/security-shard-store.d.ts.map +1 -0
- package/dist/lake/security-shard-store.js +498 -0
- package/dist/lake/security-shard-store.js.map +1 -0
- package/dist/lake/types.d.ts +428 -0
- package/dist/lake/types.d.ts.map +1 -0
- package/dist/lake/types.js +46 -0
- package/dist/lake/types.js.map +1 -0
- package/dist/lake/view-materializer.d.ts +70 -0
- package/dist/lake/view-materializer.d.ts.map +1 -0
- package/dist/lake/view-materializer.js +314 -0
- package/dist/lake/view-materializer.js.map +1 -0
- package/dist/lake/view-store.d.ts +57 -0
- package/dist/lake/view-store.d.ts.map +1 -0
- package/dist/lake/view-store.js +348 -0
- package/dist/lake/view-store.js.map +1 -0
- package/dist/parsers/tree-sitter/index.d.ts +1 -0
- package/dist/parsers/tree-sitter/index.d.ts.map +1 -1
- package/dist/parsers/tree-sitter/index.js +4 -0
- package/dist/parsers/tree-sitter/index.js.map +1 -1
- package/dist/parsers/tree-sitter/typescript-loader.d.ts +58 -0
- package/dist/parsers/tree-sitter/typescript-loader.d.ts.map +1 -0
- package/dist/parsers/tree-sitter/typescript-loader.js +250 -0
- package/dist/parsers/tree-sitter/typescript-loader.js.map +1 -0
- package/dist/store/project-config.d.ts +154 -0
- package/dist/store/project-config.d.ts.map +1 -0
- package/dist/store/project-config.js +235 -0
- package/dist/store/project-config.js.map +1 -0
- package/dist/store/project-registry.d.ts +241 -0
- package/dist/store/project-registry.d.ts.map +1 -0
- package/dist/store/project-registry.js +557 -0
- package/dist/store/project-registry.js.map +1 -0
- package/package.json +16 -14
|
@@ -0,0 +1,801 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Boundary Scanner - Detects data access patterns in source code
|
|
3
|
+
*
|
|
4
|
+
* Uses a two-phase approach following Drift's philosophy:
|
|
5
|
+
* 1. LEARN: First pass discovers patterns from YOUR codebase
|
|
6
|
+
* 2. DETECT: Second pass uses learned patterns, regex as fallback
|
|
7
|
+
*
|
|
8
|
+
* This ensures we capture how YOUR code accesses data, not hardcoded assumptions.
|
|
9
|
+
*/
|
|
10
|
+
import * as fs from 'node:fs/promises';
|
|
11
|
+
import * as path from 'node:path';
|
|
12
|
+
import { minimatch } from 'minimatch';
|
|
13
|
+
import { BoundaryStore, createBoundaryStore } from './boundary-store.js';
|
|
14
|
+
import { DataAccessLearner, createDataAccessLearner } from './data-access-learner.js';
|
|
15
|
+
// ============================================================================
|
|
16
|
+
// Language Detection
|
|
17
|
+
// ============================================================================
|
|
18
|
+
function getLanguage(filePath) {
|
|
19
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
20
|
+
switch (ext) {
|
|
21
|
+
case '.py':
|
|
22
|
+
case '.pyw':
|
|
23
|
+
return 'python';
|
|
24
|
+
case '.ts':
|
|
25
|
+
case '.tsx':
|
|
26
|
+
return 'typescript';
|
|
27
|
+
case '.js':
|
|
28
|
+
case '.jsx':
|
|
29
|
+
case '.mjs':
|
|
30
|
+
case '.cjs':
|
|
31
|
+
return 'javascript';
|
|
32
|
+
case '.cs':
|
|
33
|
+
return 'csharp';
|
|
34
|
+
case '.php':
|
|
35
|
+
return 'php';
|
|
36
|
+
case '.java':
|
|
37
|
+
return 'java';
|
|
38
|
+
default:
|
|
39
|
+
return null;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
function isDataAccessFile(filePath, content) {
|
|
43
|
+
// Skip test files for boundary detection
|
|
44
|
+
if (filePath.includes('.test.') || filePath.includes('.spec.') ||
|
|
45
|
+
filePath.includes('__tests__') || filePath.includes('/tests/')) {
|
|
46
|
+
return false;
|
|
47
|
+
}
|
|
48
|
+
// ORM patterns
|
|
49
|
+
const ormPatterns = [
|
|
50
|
+
'DbSet', 'DbContext', 'Entity', 'Table', 'Column',
|
|
51
|
+
'models.Model', 'CharField', 'ForeignKey', 'ManyToMany',
|
|
52
|
+
'declarative_base', 'relationship',
|
|
53
|
+
'prisma.', '@prisma/client',
|
|
54
|
+
'@Entity', '@Column', 'getRepository',
|
|
55
|
+
'sequelize.define', 'DataTypes',
|
|
56
|
+
'drizzle-orm',
|
|
57
|
+
'SELECT', 'INSERT', 'UPDATE', 'DELETE', 'FROM',
|
|
58
|
+
'execute', 'query', 'rawQuery',
|
|
59
|
+
'supabase', '.from(', '.select(', '.insert(', '.update(',
|
|
60
|
+
];
|
|
61
|
+
return ormPatterns.some(pattern => content.includes(pattern));
|
|
62
|
+
}
|
|
63
|
+
// ============================================================================
|
|
64
|
+
// Sensitive Field Detection
|
|
65
|
+
// ============================================================================
|
|
66
|
+
const SENSITIVE_PATTERNS = {
|
|
67
|
+
pii: [
|
|
68
|
+
/\bssn\b/i, /\bsocial_security\b/i, /\bdate_of_birth\b/i,
|
|
69
|
+
/\bdob\b/i, /\baddress\b/i, /\bphone\b/i, /\bphone_number\b/i,
|
|
70
|
+
/\bemail\b/i, /\bfull_name\b/i, /\bfirst_name\b/i, /\blast_name\b/i,
|
|
71
|
+
],
|
|
72
|
+
credentials: [
|
|
73
|
+
/\bpassword\b/i, /\bpassword_hash\b/i, /\bsecret\b/i, /\btoken\b/i,
|
|
74
|
+
/\bapi_key\b/i, /\bprivate_key\b/i, /\bhash\b/i, /\bsalt\b/i,
|
|
75
|
+
/\brefresh_token\b/i, /\baccess_token\b/i,
|
|
76
|
+
],
|
|
77
|
+
financial: [
|
|
78
|
+
/\bcredit_card\b/i, /\bcard_number\b/i, /\bcvv\b/i,
|
|
79
|
+
/\bbank_account\b/i, /\bsalary\b/i, /\bincome\b/i,
|
|
80
|
+
/\bpayment\b/i, /\bbalance\b/i,
|
|
81
|
+
],
|
|
82
|
+
health: [
|
|
83
|
+
/\bdiagnosis\b/i, /\bprescription\b/i, /\bmedical\b/i, /\bhealth\b/i,
|
|
84
|
+
],
|
|
85
|
+
};
|
|
86
|
+
function detectSensitiveFields(content, file) {
|
|
87
|
+
const fields = [];
|
|
88
|
+
const lines = content.split('\n');
|
|
89
|
+
for (let i = 0; i < lines.length; i++) {
|
|
90
|
+
const line = lines[i];
|
|
91
|
+
if (!line)
|
|
92
|
+
continue;
|
|
93
|
+
// Skip comments
|
|
94
|
+
const trimmed = line.trim();
|
|
95
|
+
if (trimmed.startsWith('//') || trimmed.startsWith('#') ||
|
|
96
|
+
trimmed.startsWith('*') || trimmed.startsWith('/*')) {
|
|
97
|
+
continue;
|
|
98
|
+
}
|
|
99
|
+
for (const [type, patterns] of Object.entries(SENSITIVE_PATTERNS)) {
|
|
100
|
+
for (const pattern of patterns) {
|
|
101
|
+
const match = line.match(pattern);
|
|
102
|
+
if (match) {
|
|
103
|
+
// Try to extract table name from context
|
|
104
|
+
let table = null;
|
|
105
|
+
// Look for class/model name in surrounding lines
|
|
106
|
+
for (let j = Math.max(0, i - 10); j < i; j++) {
|
|
107
|
+
const prevLine = lines[j];
|
|
108
|
+
if (!prevLine)
|
|
109
|
+
continue;
|
|
110
|
+
const classMatch = prevLine.match(/class\s+(\w+)/);
|
|
111
|
+
const modelMatch = prevLine.match(/model\s+(\w+)/);
|
|
112
|
+
const tableMatch = prevLine.match(/Table\s*\(\s*["'](\w+)["']/);
|
|
113
|
+
const fromMatch = prevLine.match(/\.from\s*\(\s*["'](\w+)["']/);
|
|
114
|
+
if (classMatch?.[1])
|
|
115
|
+
table = classMatch[1];
|
|
116
|
+
if (modelMatch?.[1])
|
|
117
|
+
table = modelMatch[1];
|
|
118
|
+
if (tableMatch?.[1])
|
|
119
|
+
table = tableMatch[1];
|
|
120
|
+
if (fromMatch?.[1])
|
|
121
|
+
table = fromMatch[1];
|
|
122
|
+
}
|
|
123
|
+
fields.push({
|
|
124
|
+
field: match[0],
|
|
125
|
+
table,
|
|
126
|
+
sensitivityType: type,
|
|
127
|
+
file,
|
|
128
|
+
line: i + 1,
|
|
129
|
+
confidence: 0.8,
|
|
130
|
+
});
|
|
131
|
+
break;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
return fields;
|
|
137
|
+
}
|
|
138
|
+
// ============================================================================
|
|
139
|
+
// ORM Model Detection
|
|
140
|
+
// ============================================================================
|
|
141
|
+
function detectORMModels(content, file) {
|
|
142
|
+
const models = [];
|
|
143
|
+
// Detect EF Core DbSet patterns
|
|
144
|
+
const dbSetPattern = /DbSet<(\w+)>\s+(\w+)/g;
|
|
145
|
+
let match;
|
|
146
|
+
while ((match = dbSetPattern.exec(content)) !== null) {
|
|
147
|
+
const modelName = match[1];
|
|
148
|
+
const propName = match[2];
|
|
149
|
+
if (modelName && propName) {
|
|
150
|
+
const lineNum = content.substring(0, match.index).split('\n').length;
|
|
151
|
+
models.push({
|
|
152
|
+
name: modelName,
|
|
153
|
+
tableName: propName.toLowerCase(),
|
|
154
|
+
fields: [],
|
|
155
|
+
file,
|
|
156
|
+
line: lineNum,
|
|
157
|
+
framework: 'efcore',
|
|
158
|
+
confidence: 0.9,
|
|
159
|
+
});
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
// Detect Django models
|
|
163
|
+
const djangoModelPattern = /class\s+(\w+)\s*\([^)]*models\.Model[^)]*\)/g;
|
|
164
|
+
while ((match = djangoModelPattern.exec(content)) !== null) {
|
|
165
|
+
const modelName = match[1];
|
|
166
|
+
if (modelName) {
|
|
167
|
+
const lineNum = content.substring(0, match.index).split('\n').length;
|
|
168
|
+
models.push({
|
|
169
|
+
name: modelName,
|
|
170
|
+
tableName: modelName.toLowerCase() + 's',
|
|
171
|
+
fields: [],
|
|
172
|
+
file,
|
|
173
|
+
line: lineNum,
|
|
174
|
+
framework: 'django',
|
|
175
|
+
confidence: 0.85,
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
// Detect SQLAlchemy models
|
|
180
|
+
const sqlalchemyPattern = /class\s+(\w+)\s*\([^)]*(?:Base|DeclarativeBase)[^)]*\)/g;
|
|
181
|
+
while ((match = sqlalchemyPattern.exec(content)) !== null) {
|
|
182
|
+
const modelName = match[1];
|
|
183
|
+
if (modelName) {
|
|
184
|
+
const lineNum = content.substring(0, match.index).split('\n').length;
|
|
185
|
+
models.push({
|
|
186
|
+
name: modelName,
|
|
187
|
+
tableName: modelName.toLowerCase() + 's',
|
|
188
|
+
fields: [],
|
|
189
|
+
file,
|
|
190
|
+
line: lineNum,
|
|
191
|
+
framework: 'sqlalchemy',
|
|
192
|
+
confidence: 0.85,
|
|
193
|
+
});
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
// Detect TypeORM entities
|
|
197
|
+
const typeormPattern = /@Entity\s*\([^)]*\)\s*(?:export\s+)?class\s+(\w+)/g;
|
|
198
|
+
while ((match = typeormPattern.exec(content)) !== null) {
|
|
199
|
+
const modelName = match[1];
|
|
200
|
+
if (modelName) {
|
|
201
|
+
const lineNum = content.substring(0, match.index).split('\n').length;
|
|
202
|
+
models.push({
|
|
203
|
+
name: modelName,
|
|
204
|
+
tableName: modelName.toLowerCase() + 's',
|
|
205
|
+
fields: [],
|
|
206
|
+
file,
|
|
207
|
+
line: lineNum,
|
|
208
|
+
framework: 'typeorm',
|
|
209
|
+
confidence: 0.9,
|
|
210
|
+
});
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
// Detect Prisma models (from schema.prisma)
|
|
214
|
+
if (file.endsWith('.prisma')) {
|
|
215
|
+
const prismaPattern = /model\s+(\w+)\s*\{/g;
|
|
216
|
+
while ((match = prismaPattern.exec(content)) !== null) {
|
|
217
|
+
const modelName = match[1];
|
|
218
|
+
if (modelName) {
|
|
219
|
+
const lineNum = content.substring(0, match.index).split('\n').length;
|
|
220
|
+
models.push({
|
|
221
|
+
name: modelName,
|
|
222
|
+
tableName: modelName.toLowerCase() + 's',
|
|
223
|
+
fields: [],
|
|
224
|
+
file,
|
|
225
|
+
line: lineNum,
|
|
226
|
+
framework: 'prisma',
|
|
227
|
+
confidence: 0.95,
|
|
228
|
+
});
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
// Detect Sequelize models
|
|
233
|
+
const sequelizePattern = /sequelize\.define\s*\(\s*['"](\w+)['"]/g;
|
|
234
|
+
while ((match = sequelizePattern.exec(content)) !== null) {
|
|
235
|
+
const modelName = match[1];
|
|
236
|
+
if (modelName) {
|
|
237
|
+
const lineNum = content.substring(0, match.index).split('\n').length;
|
|
238
|
+
models.push({
|
|
239
|
+
name: modelName,
|
|
240
|
+
tableName: modelName.toLowerCase() + 's',
|
|
241
|
+
fields: [],
|
|
242
|
+
file,
|
|
243
|
+
line: lineNum,
|
|
244
|
+
framework: 'sequelize',
|
|
245
|
+
confidence: 0.85,
|
|
246
|
+
});
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
return models;
|
|
250
|
+
}
|
|
251
|
+
// ============================================================================
|
|
252
|
+
// Query Access Detection
|
|
253
|
+
// ============================================================================
|
|
254
|
+
/**
|
|
255
|
+
* Common table name patterns - helps identify tables from variable names
|
|
256
|
+
* Maps common suffixes/patterns to likely table names
|
|
257
|
+
*/
|
|
258
|
+
const TABLE_NAME_HINTS = {
|
|
259
|
+
// User-related
|
|
260
|
+
'user': 'users',
|
|
261
|
+
'users': 'users',
|
|
262
|
+
'account': 'accounts',
|
|
263
|
+
'accounts': 'accounts',
|
|
264
|
+
'profile': 'profiles',
|
|
265
|
+
'profiles': 'profiles',
|
|
266
|
+
'auth': 'auth',
|
|
267
|
+
'session': 'sessions',
|
|
268
|
+
'sessions': 'sessions',
|
|
269
|
+
'token': 'tokens',
|
|
270
|
+
'tokens': 'tokens',
|
|
271
|
+
// Content
|
|
272
|
+
'post': 'posts',
|
|
273
|
+
'posts': 'posts',
|
|
274
|
+
'comment': 'comments',
|
|
275
|
+
'comments': 'comments',
|
|
276
|
+
'article': 'articles',
|
|
277
|
+
'articles': 'articles',
|
|
278
|
+
'document': 'documents',
|
|
279
|
+
'documents': 'documents',
|
|
280
|
+
'file': 'files',
|
|
281
|
+
'files': 'files',
|
|
282
|
+
'image': 'images',
|
|
283
|
+
'images': 'images',
|
|
284
|
+
'media': 'media',
|
|
285
|
+
// E-commerce
|
|
286
|
+
'order': 'orders',
|
|
287
|
+
'orders': 'orders',
|
|
288
|
+
'product': 'products',
|
|
289
|
+
'products': 'products',
|
|
290
|
+
'cart': 'carts',
|
|
291
|
+
'carts': 'carts',
|
|
292
|
+
'item': 'items',
|
|
293
|
+
'items': 'items',
|
|
294
|
+
'payment': 'payments',
|
|
295
|
+
'payments': 'payments',
|
|
296
|
+
'transaction': 'transactions',
|
|
297
|
+
'transactions': 'transactions',
|
|
298
|
+
'invoice': 'invoices',
|
|
299
|
+
'invoices': 'invoices',
|
|
300
|
+
// Organization
|
|
301
|
+
'company': 'companies',
|
|
302
|
+
'companies': 'companies',
|
|
303
|
+
'organization': 'organizations',
|
|
304
|
+
'organizations': 'organizations',
|
|
305
|
+
'team': 'teams',
|
|
306
|
+
'teams': 'teams',
|
|
307
|
+
'member': 'members',
|
|
308
|
+
'members': 'members',
|
|
309
|
+
'role': 'roles',
|
|
310
|
+
'roles': 'roles',
|
|
311
|
+
'permission': 'permissions',
|
|
312
|
+
'permissions': 'permissions',
|
|
313
|
+
// Communication
|
|
314
|
+
'message': 'messages',
|
|
315
|
+
'messages': 'messages',
|
|
316
|
+
'notification': 'notifications',
|
|
317
|
+
'notifications': 'notifications',
|
|
318
|
+
'email': 'emails',
|
|
319
|
+
'emails': 'emails',
|
|
320
|
+
// Analytics
|
|
321
|
+
'event': 'events',
|
|
322
|
+
'events': 'events',
|
|
323
|
+
'log': 'logs',
|
|
324
|
+
'logs': 'logs',
|
|
325
|
+
'audit': 'audit_logs',
|
|
326
|
+
'metric': 'metrics',
|
|
327
|
+
'metrics': 'metrics',
|
|
328
|
+
// Settings
|
|
329
|
+
'setting': 'settings',
|
|
330
|
+
'settings': 'settings',
|
|
331
|
+
'config': 'configs',
|
|
332
|
+
'preference': 'preferences',
|
|
333
|
+
'preferences': 'preferences',
|
|
334
|
+
// Relationships
|
|
335
|
+
'subscription': 'subscriptions',
|
|
336
|
+
'subscriptions': 'subscriptions',
|
|
337
|
+
'follower': 'followers',
|
|
338
|
+
'followers': 'followers',
|
|
339
|
+
'friend': 'friends',
|
|
340
|
+
'friends': 'friends',
|
|
341
|
+
'connection': 'connections',
|
|
342
|
+
'connections': 'connections',
|
|
343
|
+
// Generic
|
|
344
|
+
'record': 'records',
|
|
345
|
+
'records': 'records',
|
|
346
|
+
'entry': 'entries',
|
|
347
|
+
'entries': 'entries',
|
|
348
|
+
'data': 'data',
|
|
349
|
+
'result': 'results',
|
|
350
|
+
'results': 'results',
|
|
351
|
+
};
|
|
352
|
+
/**
|
|
353
|
+
* Common field patterns for different data types
|
|
354
|
+
*/
|
|
355
|
+
const COMMON_FIELD_PATTERNS = [
|
|
356
|
+
// ID fields
|
|
357
|
+
{ pattern: /\bid\b/i, fields: ['id'] },
|
|
358
|
+
{ pattern: /\buser_id\b/i, fields: ['user_id'] },
|
|
359
|
+
{ pattern: /\baccount_id\b/i, fields: ['account_id'] },
|
|
360
|
+
{ pattern: /\borganization_id\b/i, fields: ['organization_id'] },
|
|
361
|
+
// Timestamps
|
|
362
|
+
{ pattern: /\bcreated_at\b/i, fields: ['created_at'] },
|
|
363
|
+
{ pattern: /\bupdated_at\b/i, fields: ['updated_at'] },
|
|
364
|
+
{ pattern: /\bdeleted_at\b/i, fields: ['deleted_at'] },
|
|
365
|
+
// User fields
|
|
366
|
+
{ pattern: /\bemail\b/i, fields: ['email'] },
|
|
367
|
+
{ pattern: /\busername\b/i, fields: ['username'] },
|
|
368
|
+
{ pattern: /\bpassword\b/i, fields: ['password'] },
|
|
369
|
+
{ pattern: /\bname\b/i, fields: ['name'] },
|
|
370
|
+
{ pattern: /\bfirst_name\b/i, fields: ['first_name'] },
|
|
371
|
+
{ pattern: /\blast_name\b/i, fields: ['last_name'] },
|
|
372
|
+
{ pattern: /\bphone\b/i, fields: ['phone'] },
|
|
373
|
+
{ pattern: /\baddress\b/i, fields: ['address'] },
|
|
374
|
+
// Status fields
|
|
375
|
+
{ pattern: /\bstatus\b/i, fields: ['status'] },
|
|
376
|
+
{ pattern: /\bstate\b/i, fields: ['state'] },
|
|
377
|
+
{ pattern: /\bactive\b/i, fields: ['active'] },
|
|
378
|
+
{ pattern: /\benabled\b/i, fields: ['enabled'] },
|
|
379
|
+
// Content fields
|
|
380
|
+
{ pattern: /\btitle\b/i, fields: ['title'] },
|
|
381
|
+
{ pattern: /\bdescription\b/i, fields: ['description'] },
|
|
382
|
+
{ pattern: /\bcontent\b/i, fields: ['content'] },
|
|
383
|
+
{ pattern: /\bbody\b/i, fields: ['body'] },
|
|
384
|
+
// Financial
|
|
385
|
+
{ pattern: /\bamount\b/i, fields: ['amount'] },
|
|
386
|
+
{ pattern: /\bprice\b/i, fields: ['price'] },
|
|
387
|
+
{ pattern: /\btotal\b/i, fields: ['total'] },
|
|
388
|
+
{ pattern: /\bbalance\b/i, fields: ['balance'] },
|
|
389
|
+
];
|
|
390
|
+
/**
|
|
391
|
+
* Extract table name from a line of code using multiple strategies
|
|
392
|
+
*/
|
|
393
|
+
function extractTableName(line, context) {
|
|
394
|
+
// Strategy 1: Supabase .from('table_name')
|
|
395
|
+
const supabaseFrom = line.match(/\.from\s*\(\s*["']([a-zA-Z_][a-zA-Z0-9_]*)["']/i);
|
|
396
|
+
if (supabaseFrom?.[1])
|
|
397
|
+
return supabaseFrom[1];
|
|
398
|
+
// Strategy 2: Supabase with template literal .from(`table_name`)
|
|
399
|
+
const supabaseTemplate = line.match(/\.from\s*\(\s*`([a-zA-Z_][a-zA-Z0-9_]*)`/i);
|
|
400
|
+
if (supabaseTemplate?.[1])
|
|
401
|
+
return supabaseTemplate[1];
|
|
402
|
+
// Strategy 3: Prisma prisma.tableName.
|
|
403
|
+
const prisma = line.match(/prisma\.([a-zA-Z_][a-zA-Z0-9_]*)\./i);
|
|
404
|
+
if (prisma?.[1])
|
|
405
|
+
return prisma[1];
|
|
406
|
+
// Strategy 4: Drizzle db.select().from(tableName) or db.insert(tableName)
|
|
407
|
+
const drizzleFrom = line.match(/\.from\s*\(\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*\)/i);
|
|
408
|
+
if (drizzleFrom?.[1] && !drizzleFrom[1].startsWith('"') && !drizzleFrom[1].startsWith("'")) {
|
|
409
|
+
const hint = TABLE_NAME_HINTS[drizzleFrom[1].toLowerCase()];
|
|
410
|
+
if (hint)
|
|
411
|
+
return hint;
|
|
412
|
+
return drizzleFrom[1];
|
|
413
|
+
}
|
|
414
|
+
// Strategy 5: SQL FROM clause
|
|
415
|
+
const sqlFrom = line.match(/FROM\s+["'`]?([a-zA-Z_][a-zA-Z0-9_]*)["'`]?/i);
|
|
416
|
+
if (sqlFrom?.[1])
|
|
417
|
+
return sqlFrom[1];
|
|
418
|
+
// Strategy 6: SQL INTO clause
|
|
419
|
+
const sqlInto = line.match(/INTO\s+["'`]?([a-zA-Z_][a-zA-Z0-9_]*)["'`]?/i);
|
|
420
|
+
if (sqlInto?.[1])
|
|
421
|
+
return sqlInto[1];
|
|
422
|
+
// Strategy 7: SQL UPDATE clause
|
|
423
|
+
const sqlUpdate = line.match(/UPDATE\s+["'`]?([a-zA-Z_][a-zA-Z0-9_]*)["'`]?/i);
|
|
424
|
+
if (sqlUpdate?.[1])
|
|
425
|
+
return sqlUpdate[1];
|
|
426
|
+
// Strategy 8: SQL DELETE FROM clause
|
|
427
|
+
const sqlDelete = line.match(/DELETE\s+FROM\s+["'`]?([a-zA-Z_][a-zA-Z0-9_]*)["'`]?/i);
|
|
428
|
+
if (sqlDelete?.[1])
|
|
429
|
+
return sqlDelete[1];
|
|
430
|
+
// Strategy 9: Django Model.objects
|
|
431
|
+
const djangoModel = line.match(/([A-Z][a-zA-Z0-9]*)\.objects/);
|
|
432
|
+
if (djangoModel?.[1])
|
|
433
|
+
return djangoModel[1].toLowerCase() + 's';
|
|
434
|
+
// Strategy 10: SQLAlchemy session.query(Model)
|
|
435
|
+
const sqlalchemyQuery = line.match(/\.query\s*\(\s*([A-Z][a-zA-Z0-9]*)\s*\)/);
|
|
436
|
+
if (sqlalchemyQuery?.[1])
|
|
437
|
+
return sqlalchemyQuery[1].toLowerCase() + 's';
|
|
438
|
+
// Strategy 11: TypeORM getRepository(Entity)
|
|
439
|
+
const typeormRepo = line.match(/getRepository\s*\(\s*([A-Z][a-zA-Z0-9]*)\s*\)/);
|
|
440
|
+
if (typeormRepo?.[1])
|
|
441
|
+
return typeormRepo[1].toLowerCase() + 's';
|
|
442
|
+
// Strategy 12: Sequelize Model.findAll/create/etc
|
|
443
|
+
const sequelizeModel = line.match(/([A-Z][a-zA-Z0-9]*)\.(?:find|create|update|destroy|bulkCreate)/);
|
|
444
|
+
if (sequelizeModel?.[1])
|
|
445
|
+
return sequelizeModel[1].toLowerCase() + 's';
|
|
446
|
+
// Strategy 13: Mongoose Model.find/save/etc
|
|
447
|
+
const mongooseModel = line.match(/([A-Z][a-zA-Z0-9]*)\.(?:find|findOne|findById|create|save|updateOne|deleteOne)/);
|
|
448
|
+
if (mongooseModel?.[1])
|
|
449
|
+
return mongooseModel[1].toLowerCase() + 's';
|
|
450
|
+
// Strategy 14: Knex table('name')
|
|
451
|
+
const knexTable = line.match(/\.table\s*\(\s*["']([a-zA-Z_][a-zA-Z0-9_]*)["']/i);
|
|
452
|
+
if (knexTable?.[1])
|
|
453
|
+
return knexTable[1];
|
|
454
|
+
// Strategy 15: Generic ORM pattern - look for variable assignment with table hint
|
|
455
|
+
const varAssignment = line.match(/(?:const|let|var)\s+(\w+)\s*=.*\.(?:from|table|query)/i);
|
|
456
|
+
if (varAssignment?.[1]) {
|
|
457
|
+
const hint = TABLE_NAME_HINTS[varAssignment[1].toLowerCase()];
|
|
458
|
+
if (hint)
|
|
459
|
+
return hint;
|
|
460
|
+
}
|
|
461
|
+
// Strategy 16: Look in surrounding context for table hints
|
|
462
|
+
for (const ctxLine of context) {
|
|
463
|
+
const tableHint = ctxLine.match(/table\s*[=:]\s*["']([a-zA-Z_][a-zA-Z0-9_]*)["']/i);
|
|
464
|
+
if (tableHint?.[1])
|
|
465
|
+
return tableHint[1];
|
|
466
|
+
const collectionHint = ctxLine.match(/collection\s*[=:]\s*["']([a-zA-Z_][a-zA-Z0-9_]*)["']/i);
|
|
467
|
+
if (collectionHint?.[1])
|
|
468
|
+
return collectionHint[1];
|
|
469
|
+
}
|
|
470
|
+
// Strategy 17: Infer from common variable names in the line
|
|
471
|
+
for (const [hint, table] of Object.entries(TABLE_NAME_HINTS)) {
|
|
472
|
+
const varPattern = new RegExp(`\\b${hint}(?:s|Data|Result|Response|Query)?\\b`, 'i');
|
|
473
|
+
if (varPattern.test(line)) {
|
|
474
|
+
return table;
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
return 'unknown';
|
|
478
|
+
}
|
|
479
|
+
/**
|
|
480
|
+
* Extract field names from a line of code
|
|
481
|
+
*/
|
|
482
|
+
function extractFields(line) {
|
|
483
|
+
const fields = [];
|
|
484
|
+
// Extract from .select('field1, field2')
|
|
485
|
+
const selectMatch = line.match(/\.select\s*\(\s*["'`]([^"'`]+)["'`]/i);
|
|
486
|
+
if (selectMatch?.[1]) {
|
|
487
|
+
const selectFields = selectMatch[1].split(/\s*,\s*/);
|
|
488
|
+
fields.push(...selectFields.map(f => f.trim()).filter(f => f && f !== '*'));
|
|
489
|
+
}
|
|
490
|
+
// Extract from .eq('field', value) or .match({ field: value })
|
|
491
|
+
const eqMatch = line.match(/\.eq\s*\(\s*["'](\w+)["']/i);
|
|
492
|
+
if (eqMatch?.[1])
|
|
493
|
+
fields.push(eqMatch[1]);
|
|
494
|
+
// Extract from object keys in .insert({ field: value })
|
|
495
|
+
const insertMatch = line.match(/\.(?:insert|update|upsert)\s*\(\s*\{([^}]+)\}/i);
|
|
496
|
+
if (insertMatch?.[1]) {
|
|
497
|
+
const keyMatches = insertMatch[1].matchAll(/(\w+)\s*:/g);
|
|
498
|
+
for (const m of keyMatches) {
|
|
499
|
+
if (m[1])
|
|
500
|
+
fields.push(m[1]);
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
// Extract common fields from patterns
|
|
504
|
+
for (const { pattern, fields: patternFields } of COMMON_FIELD_PATTERNS) {
|
|
505
|
+
if (pattern.test(line)) {
|
|
506
|
+
fields.push(...patternFields);
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
// Deduplicate
|
|
510
|
+
return [...new Set(fields)];
|
|
511
|
+
}
|
|
512
|
+
// ============================================================================
|
|
513
|
+
// Boundary Scanner Class
|
|
514
|
+
// ============================================================================
|
|
515
|
+
/**
|
|
516
|
+
* Boundary Scanner - Scans source files for data access patterns
|
|
517
|
+
*
|
|
518
|
+
* Uses a two-phase approach:
|
|
519
|
+
* 1. LEARN: First pass discovers patterns from the codebase
|
|
520
|
+
* 2. DETECT: Second pass uses learned patterns + regex fallback
|
|
521
|
+
*/
|
|
522
|
+
export class BoundaryScanner {
|
|
523
|
+
config;
|
|
524
|
+
store;
|
|
525
|
+
learner;
|
|
526
|
+
learnedConventions = null;
|
|
527
|
+
constructor(config) {
|
|
528
|
+
this.config = config;
|
|
529
|
+
this.store = createBoundaryStore({ rootDir: config.rootDir });
|
|
530
|
+
this.learner = createDataAccessLearner();
|
|
531
|
+
}
|
|
532
|
+
async initialize() {
|
|
533
|
+
await this.store.initialize();
|
|
534
|
+
}
|
|
535
|
+
/**
|
|
536
|
+
* Scan files for data access patterns using learning approach
|
|
537
|
+
*/
|
|
538
|
+
async scanFiles(files) {
|
|
539
|
+
const startTime = Date.now();
|
|
540
|
+
let filesScanned = 0;
|
|
541
|
+
const allModels = [];
|
|
542
|
+
const allAccessPoints = [];
|
|
543
|
+
const allSensitiveFields = [];
|
|
544
|
+
// Reset learner for fresh scan
|
|
545
|
+
this.learner.reset();
|
|
546
|
+
// ========================================================================
|
|
547
|
+
// PHASE 1: LEARN - Discover patterns from the codebase
|
|
548
|
+
// ========================================================================
|
|
549
|
+
if (!this.config.skipLearning) {
|
|
550
|
+
if (this.config.verbose) {
|
|
551
|
+
console.log('Phase 1: Learning data access patterns from codebase...');
|
|
552
|
+
}
|
|
553
|
+
for (const file of files) {
|
|
554
|
+
const filePath = path.join(this.config.rootDir, file);
|
|
555
|
+
const language = getLanguage(file);
|
|
556
|
+
if (!language && !file.endsWith('.prisma'))
|
|
557
|
+
continue;
|
|
558
|
+
try {
|
|
559
|
+
const content = await fs.readFile(filePath, 'utf-8');
|
|
560
|
+
if (isDataAccessFile(file, content) || file.endsWith('.prisma')) {
|
|
561
|
+
this.learner.learnFromFile(content, file);
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
catch {
|
|
565
|
+
// Skip files that can't be read during learning
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
// Finalize learning
|
|
569
|
+
this.learnedConventions = this.learner.finalizeLearning(files.length);
|
|
570
|
+
if (this.config.verbose) {
|
|
571
|
+
console.log(` Learned ${this.learnedConventions.tables.size} tables`);
|
|
572
|
+
console.log(` Primary framework: ${this.learnedConventions.primaryFramework ?? 'mixed'}`);
|
|
573
|
+
console.log(` Naming convention: ${this.learnedConventions.tableNamingConvention}`);
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
// ========================================================================
|
|
577
|
+
// PHASE 2: DETECT - Use learned patterns + regex fallback
|
|
578
|
+
// ========================================================================
|
|
579
|
+
if (this.config.verbose) {
|
|
580
|
+
console.log('Phase 2: Detecting data access with learned patterns...');
|
|
581
|
+
}
|
|
582
|
+
for (const file of files) {
|
|
583
|
+
const filePath = path.join(this.config.rootDir, file);
|
|
584
|
+
const language = getLanguage(file);
|
|
585
|
+
if (!language && !file.endsWith('.prisma'))
|
|
586
|
+
continue;
|
|
587
|
+
try {
|
|
588
|
+
const content = await fs.readFile(filePath, 'utf-8');
|
|
589
|
+
if (isDataAccessFile(file, content) || file.endsWith('.prisma')) {
|
|
590
|
+
filesScanned++;
|
|
591
|
+
allModels.push(...detectORMModels(content, file));
|
|
592
|
+
// Use learned patterns for detection, with regex fallback
|
|
593
|
+
const accessPoints = this.detectWithLearning(content, file);
|
|
594
|
+
allAccessPoints.push(...accessPoints);
|
|
595
|
+
allSensitiveFields.push(...detectSensitiveFields(content, file));
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
catch (error) {
|
|
599
|
+
if (this.config.verbose) {
|
|
600
|
+
console.error(`Error scanning ${file}:`, error.message);
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
for (const model of allModels) {
|
|
605
|
+
this.store.addModel(model);
|
|
606
|
+
}
|
|
607
|
+
for (const accessPoint of allAccessPoints) {
|
|
608
|
+
this.store.addAccessPoint(accessPoint);
|
|
609
|
+
}
|
|
610
|
+
for (const field of allSensitiveFields) {
|
|
611
|
+
this.store.addSensitiveField(field);
|
|
612
|
+
}
|
|
613
|
+
const accessMap = this.store.getAccessMap();
|
|
614
|
+
await this.store.saveAccessMap(accessMap);
|
|
615
|
+
const violations = this.store.checkAllViolations();
|
|
616
|
+
const duration = Date.now() - startTime;
|
|
617
|
+
return {
|
|
618
|
+
accessMap,
|
|
619
|
+
violations,
|
|
620
|
+
stats: {
|
|
621
|
+
filesScanned,
|
|
622
|
+
tablesFound: Object.keys(accessMap.tables).length,
|
|
623
|
+
accessPointsFound: Object.keys(accessMap.accessPoints).length,
|
|
624
|
+
sensitiveFieldsFound: accessMap.sensitiveFields.length,
|
|
625
|
+
violationsFound: violations.length,
|
|
626
|
+
scanDurationMs: duration,
|
|
627
|
+
},
|
|
628
|
+
};
|
|
629
|
+
}
|
|
630
|
+
/**
|
|
631
|
+
* Detect data access using learned patterns first, regex as fallback
|
|
632
|
+
*/
|
|
633
|
+
detectWithLearning(content, file) {
|
|
634
|
+
const accessPoints = [];
|
|
635
|
+
const lines = content.split('\n');
|
|
636
|
+
for (let i = 0; i < lines.length; i++) {
|
|
637
|
+
const line = lines[i];
|
|
638
|
+
if (!line)
|
|
639
|
+
continue;
|
|
640
|
+
// Skip comments
|
|
641
|
+
const trimmed = line.trim();
|
|
642
|
+
if (trimmed.startsWith('//') || trimmed.startsWith('#') ||
|
|
643
|
+
trimmed.startsWith('*') || trimmed.startsWith('/*')) {
|
|
644
|
+
continue;
|
|
645
|
+
}
|
|
646
|
+
// Get surrounding context
|
|
647
|
+
const contextStart = Math.max(0, i - 5);
|
|
648
|
+
const contextEnd = Math.min(lines.length, i + 5);
|
|
649
|
+
const context = lines.slice(contextStart, contextEnd).filter(l => l !== undefined);
|
|
650
|
+
// Check if this line has a data access pattern
|
|
651
|
+
const operation = this.detectOperation(line);
|
|
652
|
+
if (!operation)
|
|
653
|
+
continue;
|
|
654
|
+
// Try to extract table name using multiple strategies
|
|
655
|
+
let table = 'unknown';
|
|
656
|
+
let confidence = 0.5;
|
|
657
|
+
// Strategy 1: Use learned patterns (highest confidence)
|
|
658
|
+
if (this.learnedConventions?.hasEnoughData) {
|
|
659
|
+
const learnedTable = this.extractTableWithLearning(line, context);
|
|
660
|
+
if (learnedTable) {
|
|
661
|
+
table = learnedTable;
|
|
662
|
+
confidence = 0.95; // High confidence for learned patterns
|
|
663
|
+
}
|
|
664
|
+
}
|
|
665
|
+
// Strategy 2: Regex fallback (lower confidence)
|
|
666
|
+
if (table === 'unknown') {
|
|
667
|
+
table = extractTableName(line, context);
|
|
668
|
+
confidence = table === 'unknown' ? 0.3 : 0.7;
|
|
669
|
+
}
|
|
670
|
+
const fields = extractFields(line);
|
|
671
|
+
const id = `${file}:${i + 1}:0:${table}`;
|
|
672
|
+
if (!accessPoints.find(ap => ap.id === id)) {
|
|
673
|
+
accessPoints.push({
|
|
674
|
+
id,
|
|
675
|
+
table,
|
|
676
|
+
fields,
|
|
677
|
+
operation,
|
|
678
|
+
file,
|
|
679
|
+
line: i + 1,
|
|
680
|
+
column: 0,
|
|
681
|
+
context: trimmed.slice(0, 100),
|
|
682
|
+
isRawSql: /\b(SELECT|INSERT|UPDATE|DELETE)\b/i.test(line),
|
|
683
|
+
confidence,
|
|
684
|
+
});
|
|
685
|
+
}
|
|
686
|
+
}
|
|
687
|
+
return accessPoints;
|
|
688
|
+
}
|
|
689
|
+
/**
|
|
690
|
+
* Extract table name using learned patterns
|
|
691
|
+
*/
|
|
692
|
+
extractTableWithLearning(line, context) {
|
|
693
|
+
if (!this.learnedConventions)
|
|
694
|
+
return null;
|
|
695
|
+
// Check if any learned table pattern matches
|
|
696
|
+
for (const [tableName] of this.learnedConventions.tables) {
|
|
697
|
+
// Check if the table name appears in the line
|
|
698
|
+
const tableRegex = new RegExp(`['"\`]${tableName}['"\`]|\\b${tableName}\\b`, 'i');
|
|
699
|
+
if (tableRegex.test(line)) {
|
|
700
|
+
return tableName;
|
|
701
|
+
}
|
|
702
|
+
}
|
|
703
|
+
// Try to infer from variable names
|
|
704
|
+
const varMatch = line.match(/(?:const|let|var)\s+(\w+)|(\w+)\s*=\s*(?:await\s+)?/);
|
|
705
|
+
if (varMatch) {
|
|
706
|
+
const varName = (varMatch[1] || varMatch[2])?.toLowerCase();
|
|
707
|
+
if (varName) {
|
|
708
|
+
const inferredTable = this.learner.inferTableFromVariable(varName);
|
|
709
|
+
if (inferredTable)
|
|
710
|
+
return inferredTable;
|
|
711
|
+
}
|
|
712
|
+
}
|
|
713
|
+
// Check context for learned tables
|
|
714
|
+
for (const ctxLine of context) {
|
|
715
|
+
for (const [tableName] of this.learnedConventions.tables) {
|
|
716
|
+
const tableRegex = new RegExp(`['"\`]${tableName}['"\`]`, 'i');
|
|
717
|
+
if (tableRegex.test(ctxLine)) {
|
|
718
|
+
return tableName;
|
|
719
|
+
}
|
|
720
|
+
}
|
|
721
|
+
}
|
|
722
|
+
return null;
|
|
723
|
+
}
|
|
724
|
+
/**
|
|
725
|
+
* Detect the operation type from a line
|
|
726
|
+
*/
|
|
727
|
+
detectOperation(line) {
|
|
728
|
+
// Read operations
|
|
729
|
+
if (/\.find\w*\s*\(|\.get\w*\s*\(|\.where\s*\(|\.select\s*\(|\.filter\s*\(|\.all\s*\(|\.objects\.|\.query\s*\(|\.from\s*\(|\bSELECT\b/i.test(line)) {
|
|
730
|
+
return 'read';
|
|
731
|
+
}
|
|
732
|
+
// Write operations
|
|
733
|
+
if (/\.create\s*\(|\.save\s*\(|\.update\s*\(|\.insert\s*\(|\.upsert\s*\(|\bINSERT\b|\bUPDATE\b/i.test(line)) {
|
|
734
|
+
return 'write';
|
|
735
|
+
}
|
|
736
|
+
// Delete operations
|
|
737
|
+
if (/\.delete\s*\(|\.remove\s*\(|\.destroy\s*\(|\bDELETE\b/i.test(line)) {
|
|
738
|
+
return 'delete';
|
|
739
|
+
}
|
|
740
|
+
return null;
|
|
741
|
+
}
|
|
742
|
+
/**
|
|
743
|
+
* Scan directory with glob patterns
|
|
744
|
+
*/
|
|
745
|
+
async scanDirectory(options = {}) {
|
|
746
|
+
const patterns = options.patterns ?? ['**/*.ts', '**/*.tsx', '**/*.js', '**/*.jsx', '**/*.py'];
|
|
747
|
+
const ignorePatterns = options.ignorePatterns ?? ['node_modules', '.git', 'dist', 'build', '__pycache__', '.drift'];
|
|
748
|
+
const files = await this.findFiles(patterns, ignorePatterns);
|
|
749
|
+
return this.scanFiles(files);
|
|
750
|
+
}
|
|
751
|
+
/**
|
|
752
|
+
* Get learned conventions (for debugging/inspection)
|
|
753
|
+
*/
|
|
754
|
+
getLearnedConventions() {
|
|
755
|
+
return this.learnedConventions;
|
|
756
|
+
}
|
|
757
|
+
/**
|
|
758
|
+
* Get the underlying store
|
|
759
|
+
*/
|
|
760
|
+
getStore() {
|
|
761
|
+
return this.store;
|
|
762
|
+
}
|
|
763
|
+
/**
|
|
764
|
+
* Find files matching patterns
|
|
765
|
+
*/
|
|
766
|
+
async findFiles(patterns, ignorePatterns) {
|
|
767
|
+
const files = [];
|
|
768
|
+
const walk = async (dir, relativePath = '') => {
|
|
769
|
+
const entries = await fs.readdir(dir, { withFileTypes: true });
|
|
770
|
+
for (const entry of entries) {
|
|
771
|
+
const fullPath = path.join(dir, entry.name);
|
|
772
|
+
const relPath = relativePath ? `${relativePath}/${entry.name}` : entry.name;
|
|
773
|
+
if (entry.isDirectory()) {
|
|
774
|
+
if (!ignorePatterns.includes(entry.name) && !entry.name.startsWith('.')) {
|
|
775
|
+
await walk(fullPath, relPath);
|
|
776
|
+
}
|
|
777
|
+
}
|
|
778
|
+
else if (entry.isFile()) {
|
|
779
|
+
for (const pattern of patterns) {
|
|
780
|
+
if (minimatch(relPath, pattern)) {
|
|
781
|
+
files.push(relPath);
|
|
782
|
+
break;
|
|
783
|
+
}
|
|
784
|
+
}
|
|
785
|
+
}
|
|
786
|
+
}
|
|
787
|
+
};
|
|
788
|
+
await walk(this.config.rootDir);
|
|
789
|
+
return files;
|
|
790
|
+
}
|
|
791
|
+
}
|
|
792
|
+
// ============================================================================
|
|
793
|
+
// Factory Function
|
|
794
|
+
// ============================================================================
|
|
795
|
+
/**
|
|
796
|
+
* Create a new BoundaryScanner instance
|
|
797
|
+
*/
|
|
798
|
+
export function createBoundaryScanner(config) {
|
|
799
|
+
return new BoundaryScanner(config);
|
|
800
|
+
}
|
|
801
|
+
//# sourceMappingURL=boundary-scanner.js.map
|