n8n-nodes-redactor 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +42 -0
- package/README.dev.md +153 -0
- package/README.md +443 -0
- package/README.npm.md +443 -0
- package/dist/nodes/PiiRedactor/PiiRedactor.node.d.ts +5 -0
- package/dist/nodes/PiiRedactor/PiiRedactor.node.js +1093 -0
- package/dist/nodes/PiiRedactor/__tests__/encryption.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/encryption.test.js +200 -0
- package/dist/nodes/PiiRedactor/__tests__/engine.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/engine.test.js +524 -0
- package/dist/nodes/PiiRedactor/__tests__/operations.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/operations.test.js +316 -0
- package/dist/nodes/PiiRedactor/__tests__/patterns-global.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/patterns-global.test.js +427 -0
- package/dist/nodes/PiiRedactor/__tests__/patterns.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/patterns.test.js +481 -0
- package/dist/nodes/PiiRedactor/__tests__/phase1.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/phase1.test.js +343 -0
- package/dist/nodes/PiiRedactor/__tests__/phase3.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/phase3.test.js +275 -0
- package/dist/nodes/PiiRedactor/__tests__/phase4.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/phase4.test.js +184 -0
- package/dist/nodes/PiiRedactor/__tests__/presidio.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/presidio.test.js +170 -0
- package/dist/nodes/PiiRedactor/__tests__/security.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/security.test.js +178 -0
- package/dist/nodes/PiiRedactor/__tests__/semantic.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/semantic.test.js +319 -0
- package/dist/nodes/PiiRedactor/__tests__/vault.test.d.ts +1 -0
- package/dist/nodes/PiiRedactor/__tests__/vault.test.js +247 -0
- package/dist/nodes/PiiRedactor/audit.d.ts +48 -0
- package/dist/nodes/PiiRedactor/audit.js +192 -0
- package/dist/nodes/PiiRedactor/classification.d.ts +33 -0
- package/dist/nodes/PiiRedactor/classification.js +118 -0
- package/dist/nodes/PiiRedactor/context.d.ts +57 -0
- package/dist/nodes/PiiRedactor/context.js +260 -0
- package/dist/nodes/PiiRedactor/encryption.d.ts +45 -0
- package/dist/nodes/PiiRedactor/encryption.js +158 -0
- package/dist/nodes/PiiRedactor/engine.d.ts +23 -0
- package/dist/nodes/PiiRedactor/engine.js +888 -0
- package/dist/nodes/PiiRedactor/injection.d.ts +46 -0
- package/dist/nodes/PiiRedactor/injection.js +425 -0
- package/dist/nodes/PiiRedactor/names.d.ts +25 -0
- package/dist/nodes/PiiRedactor/names.js +188 -0
- package/dist/nodes/PiiRedactor/patterns.d.ts +17 -0
- package/dist/nodes/PiiRedactor/patterns.js +1742 -0
- package/dist/nodes/PiiRedactor/presidio.d.ts +77 -0
- package/dist/nodes/PiiRedactor/presidio.js +264 -0
- package/dist/nodes/PiiRedactor/profiles.d.ts +47 -0
- package/dist/nodes/PiiRedactor/profiles.js +139 -0
- package/dist/nodes/PiiRedactor/pseudonymize.d.ts +20 -0
- package/dist/nodes/PiiRedactor/pseudonymize.js +203 -0
- package/dist/nodes/PiiRedactor/redact.png +0 -0
- package/dist/nodes/PiiRedactor/redact.svg +3 -0
- package/dist/nodes/PiiRedactor/ropa.d.ts +63 -0
- package/dist/nodes/PiiRedactor/ropa.js +70 -0
- package/dist/nodes/PiiRedactor/types.d.ts +82 -0
- package/dist/nodes/PiiRedactor/types.js +3 -0
- package/dist/nodes/PiiRedactor/vault.d.ts +61 -0
- package/dist/nodes/PiiRedactor/vault.js +352 -0
- package/package.json +87 -0
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Persistent Audit Log
|
|
4
|
+
*
|
|
5
|
+
* JSONL (JSON Lines) audit logger for compliance recording.
|
|
6
|
+
* One JSON object per line, append-only, auto-rotating.
|
|
7
|
+
*
|
|
8
|
+
* Compliant with:
|
|
9
|
+
* - GDPR Article 30 (Records of Processing Activities)
|
|
10
|
+
* - HIPAA (6-year retention)
|
|
11
|
+
* - SOX (7-year retention)
|
|
12
|
+
* - PCI DSS (1-year minimum)
|
|
13
|
+
*
|
|
14
|
+
* Format: JSONL (compatible with Splunk, ELK, Datadog, CloudWatch)
|
|
15
|
+
* Location: ~/.n8n/pii-audit/pii-audit-YYYY-MM-DD.jsonl
|
|
16
|
+
* Rotation: Daily + 100MB size limit
|
|
17
|
+
*/
|
|
18
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
19
|
+
if (k2 === undefined) k2 = k;
|
|
20
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
21
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
22
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
23
|
+
}
|
|
24
|
+
Object.defineProperty(o, k2, desc);
|
|
25
|
+
}) : (function(o, m, k, k2) {
|
|
26
|
+
if (k2 === undefined) k2 = k;
|
|
27
|
+
o[k2] = m[k];
|
|
28
|
+
}));
|
|
29
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
30
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
31
|
+
}) : function(o, v) {
|
|
32
|
+
o["default"] = v;
|
|
33
|
+
});
|
|
34
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
35
|
+
var ownKeys = function(o) {
|
|
36
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
37
|
+
var ar = [];
|
|
38
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
39
|
+
return ar;
|
|
40
|
+
};
|
|
41
|
+
return ownKeys(o);
|
|
42
|
+
};
|
|
43
|
+
return function (mod) {
|
|
44
|
+
if (mod && mod.__esModule) return mod;
|
|
45
|
+
var result = {};
|
|
46
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
47
|
+
__setModuleDefault(result, mod);
|
|
48
|
+
return result;
|
|
49
|
+
};
|
|
50
|
+
})();
|
|
51
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
52
|
+
exports.writeAuditLog = writeAuditLog;
|
|
53
|
+
exports.readAuditLog = readAuditLog;
|
|
54
|
+
const fs = __importStar(require("fs"));
|
|
55
|
+
const path = __importStar(require("path"));
|
|
56
|
+
const MAX_FILE_SIZE = 104857600; // 100MB
|
|
57
|
+
const DEFAULT_AUDIT_DIR = path.join(process.env.HOME || '/tmp', '.n8n', 'pii-audit');
|
|
58
|
+
/**
|
|
59
|
+
* Generate a simple UUID v4.
|
|
60
|
+
*/
|
|
61
|
+
function generateEventId() {
|
|
62
|
+
const bytes = new Array(16).fill(0).map(() => Math.floor(Math.random() * 256));
|
|
63
|
+
bytes[6] = (bytes[6] & 0x0f) | 0x40;
|
|
64
|
+
bytes[8] = (bytes[8] & 0x3f) | 0x80;
|
|
65
|
+
const hex = bytes.map((b) => b.toString(16).padStart(2, '0')).join('');
|
|
66
|
+
return `${hex.slice(0, 8)}-${hex.slice(8, 12)}-${hex.slice(12, 16)}-${hex.slice(16, 20)}-${hex.slice(20)}`;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Get today's audit log file path.
|
|
70
|
+
*/
|
|
71
|
+
function getLogFilePath(auditDir) {
|
|
72
|
+
const today = new Date().toISOString().split('T')[0]; // YYYY-MM-DD
|
|
73
|
+
return path.join(auditDir, `pii-audit-${today}.jsonl`);
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Determine severity from classification level and hit count.
|
|
77
|
+
*/
|
|
78
|
+
function determineSeverity(classificationLevel, totalHits = 0) {
|
|
79
|
+
if (classificationLevel === 'RESTRICTED' || totalHits >= 50)
|
|
80
|
+
return 'CRITICAL';
|
|
81
|
+
if (classificationLevel === 'CONFIDENTIAL' || totalHits >= 20)
|
|
82
|
+
return 'HIGH';
|
|
83
|
+
if (classificationLevel === 'INTERNAL' || totalHits >= 5)
|
|
84
|
+
return 'MEDIUM';
|
|
85
|
+
return 'LOW';
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Write an audit log entry.
|
|
89
|
+
* Creates the audit directory if it doesn't exist.
|
|
90
|
+
* Handles file rotation (daily + size-based).
|
|
91
|
+
* Graceful: never throws, never blocks the workflow.
|
|
92
|
+
*/
|
|
93
|
+
function writeAuditLog(action, mode, hits, inputItemCount, processingTimeMs, patternsUsed, presidioEnabled, classification, sessionId, auditDir) {
|
|
94
|
+
try {
|
|
95
|
+
const dir = auditDir || DEFAULT_AUDIT_DIR;
|
|
96
|
+
// Ensure directory exists
|
|
97
|
+
if (!fs.existsSync(dir)) {
|
|
98
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
99
|
+
}
|
|
100
|
+
// Build hit summaries
|
|
101
|
+
const hitsByCategory = {};
|
|
102
|
+
const hitsByPattern = {};
|
|
103
|
+
for (const hit of hits) {
|
|
104
|
+
hitsByCategory[hit.category] = (hitsByCategory[hit.category] || 0) + 1;
|
|
105
|
+
hitsByPattern[hit.patternLabel] = (hitsByPattern[hit.patternLabel] || 0) + 1;
|
|
106
|
+
}
|
|
107
|
+
const entry = {
|
|
108
|
+
version: '1.0',
|
|
109
|
+
timestamp: new Date().toISOString(),
|
|
110
|
+
eventId: generateEventId(),
|
|
111
|
+
severity: determineSeverity(classification?.level, hits.length),
|
|
112
|
+
action,
|
|
113
|
+
mode,
|
|
114
|
+
status: 'SUCCESS',
|
|
115
|
+
totalHits: hits.length,
|
|
116
|
+
hitsByCategory,
|
|
117
|
+
hitsByPattern,
|
|
118
|
+
classificationLevel: classification?.level,
|
|
119
|
+
inputItemCount,
|
|
120
|
+
processingTimeMs,
|
|
121
|
+
patternsUsed,
|
|
122
|
+
presidioEnabled,
|
|
123
|
+
sessionId,
|
|
124
|
+
};
|
|
125
|
+
// Get log file path
|
|
126
|
+
let logFile = getLogFilePath(dir);
|
|
127
|
+
// Size-based rotation
|
|
128
|
+
try {
|
|
129
|
+
if (fs.existsSync(logFile)) {
|
|
130
|
+
const stats = fs.statSync(logFile);
|
|
131
|
+
if (stats.size >= MAX_FILE_SIZE) {
|
|
132
|
+
// Rotate: rename current file with sequence number
|
|
133
|
+
let seq = 1;
|
|
134
|
+
while (fs.existsSync(`${logFile}.${seq}`))
|
|
135
|
+
seq++;
|
|
136
|
+
fs.renameSync(logFile, `${logFile}.${seq}`);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
catch {
|
|
141
|
+
// Rotation failed, continue writing to current file
|
|
142
|
+
}
|
|
143
|
+
// Append entry as JSONL (one JSON object per line)
|
|
144
|
+
fs.appendFileSync(logFile, JSON.stringify(entry) + '\n', 'utf-8');
|
|
145
|
+
}
|
|
146
|
+
catch {
|
|
147
|
+
// Audit logging should NEVER crash the workflow
|
|
148
|
+
// Silently fail — the redaction itself already succeeded
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Read audit log entries for a given date range.
|
|
153
|
+
* Returns parsed entries. Used by Stats operation for audit review.
|
|
154
|
+
*/
|
|
155
|
+
function readAuditLog(startDate, endDate, auditDir) {
|
|
156
|
+
try {
|
|
157
|
+
const dir = auditDir || DEFAULT_AUDIT_DIR;
|
|
158
|
+
if (!fs.existsSync(dir))
|
|
159
|
+
return [];
|
|
160
|
+
const files = fs.readdirSync(dir)
|
|
161
|
+
.filter((f) => f.startsWith('pii-audit-') && f.endsWith('.jsonl'))
|
|
162
|
+
.sort();
|
|
163
|
+
const entries = [];
|
|
164
|
+
for (const file of files) {
|
|
165
|
+
// Filter by date range if provided
|
|
166
|
+
const fileDate = file.replace('pii-audit-', '').replace('.jsonl', '');
|
|
167
|
+
if (startDate && fileDate < startDate)
|
|
168
|
+
continue;
|
|
169
|
+
if (endDate && fileDate > endDate)
|
|
170
|
+
continue;
|
|
171
|
+
try {
|
|
172
|
+
const content = fs.readFileSync(path.join(dir, file), 'utf-8');
|
|
173
|
+
const lines = content.split('\n').filter((l) => l.trim().length > 0);
|
|
174
|
+
for (const line of lines) {
|
|
175
|
+
try {
|
|
176
|
+
entries.push(JSON.parse(line));
|
|
177
|
+
}
|
|
178
|
+
catch {
|
|
179
|
+
// Skip malformed lines
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
catch {
|
|
184
|
+
// Skip unreadable files
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
return entries;
|
|
188
|
+
}
|
|
189
|
+
catch {
|
|
190
|
+
return [];
|
|
191
|
+
}
|
|
192
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Data Classification Engine
|
|
3
|
+
*
|
|
4
|
+
* Assigns sensitivity labels to data based on what PII categories were found.
|
|
5
|
+
* Modeled on Microsoft Purview and Google Cloud DLP classification taxonomy.
|
|
6
|
+
*
|
|
7
|
+
* Levels:
|
|
8
|
+
* PUBLIC (0) - No PII detected
|
|
9
|
+
* INTERNAL (1) - Low-sensitivity PII (network, location, vehicle, temporal)
|
|
10
|
+
* CONFIDENTIAL (2) - Personal PII (names, emails, phones, addresses, identity docs)
|
|
11
|
+
* RESTRICTED (3) - High-sensitivity PII (financial, medical, biometric, enterprise secrets)
|
|
12
|
+
*
|
|
13
|
+
* Escalation rules (based on Purview instance-count thresholds):
|
|
14
|
+
* - Any RESTRICTED-category hit -> RESTRICTED
|
|
15
|
+
* - 10+ CONFIDENTIAL hits -> escalate to RESTRICTED
|
|
16
|
+
* - 5+ INTERNAL hits -> escalate to CONFIDENTIAL
|
|
17
|
+
*/
|
|
18
|
+
import { PiiCategory, RedactionHit } from './types';
|
|
19
|
+
export type SensitivityLevel = 'PUBLIC' | 'INTERNAL' | 'CONFIDENTIAL' | 'RESTRICTED';
|
|
20
|
+
export interface ClassificationLabel {
|
|
21
|
+
level: SensitivityLevel;
|
|
22
|
+
numericLevel: 0 | 1 | 2 | 3;
|
|
23
|
+
reason: string;
|
|
24
|
+
topCategory: PiiCategory | 'none';
|
|
25
|
+
hitCountByLevel: Record<SensitivityLevel, number>;
|
|
26
|
+
escalated: boolean;
|
|
27
|
+
totalHits: number;
|
|
28
|
+
averageConfidence: number;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Classify data sensitivity based on detected PII hits.
|
|
32
|
+
*/
|
|
33
|
+
export declare function classifyData(hits: RedactionHit[]): ClassificationLabel;
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Data Classification Engine
|
|
4
|
+
*
|
|
5
|
+
* Assigns sensitivity labels to data based on what PII categories were found.
|
|
6
|
+
* Modeled on Microsoft Purview and Google Cloud DLP classification taxonomy.
|
|
7
|
+
*
|
|
8
|
+
* Levels:
|
|
9
|
+
* PUBLIC (0) - No PII detected
|
|
10
|
+
* INTERNAL (1) - Low-sensitivity PII (network, location, vehicle, temporal)
|
|
11
|
+
* CONFIDENTIAL (2) - Personal PII (names, emails, phones, addresses, identity docs)
|
|
12
|
+
* RESTRICTED (3) - High-sensitivity PII (financial, medical, biometric, enterprise secrets)
|
|
13
|
+
*
|
|
14
|
+
* Escalation rules (based on Purview instance-count thresholds):
|
|
15
|
+
* - Any RESTRICTED-category hit -> RESTRICTED
|
|
16
|
+
* - 10+ CONFIDENTIAL hits -> escalate to RESTRICTED
|
|
17
|
+
* - 5+ INTERNAL hits -> escalate to CONFIDENTIAL
|
|
18
|
+
*/
|
|
19
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
20
|
+
exports.classifyData = classifyData;
|
|
21
|
+
/** Map PII categories to base sensitivity levels */
|
|
22
|
+
const CATEGORY_SENSITIVITY = {
|
|
23
|
+
// RESTRICTED (3) - highest sensitivity
|
|
24
|
+
financial: 'RESTRICTED',
|
|
25
|
+
medical: 'RESTRICTED',
|
|
26
|
+
biometric: 'RESTRICTED',
|
|
27
|
+
enterprise: 'RESTRICTED',
|
|
28
|
+
crypto: 'RESTRICTED',
|
|
29
|
+
// CONFIDENTIAL (2) - personal data
|
|
30
|
+
identity: 'CONFIDENTIAL',
|
|
31
|
+
contact: 'CONFIDENTIAL',
|
|
32
|
+
// INTERNAL (1) - low sensitivity
|
|
33
|
+
network: 'INTERNAL',
|
|
34
|
+
location: 'INTERNAL',
|
|
35
|
+
vehicle: 'INTERNAL',
|
|
36
|
+
temporal: 'INTERNAL',
|
|
37
|
+
other: 'INTERNAL',
|
|
38
|
+
};
|
|
39
|
+
const LEVEL_NUMERIC = {
|
|
40
|
+
PUBLIC: 0,
|
|
41
|
+
INTERNAL: 1,
|
|
42
|
+
CONFIDENTIAL: 2,
|
|
43
|
+
RESTRICTED: 3,
|
|
44
|
+
};
|
|
45
|
+
/**
|
|
46
|
+
* Classify data sensitivity based on detected PII hits.
|
|
47
|
+
*/
|
|
48
|
+
function classifyData(hits) {
|
|
49
|
+
if (hits.length === 0) {
|
|
50
|
+
return {
|
|
51
|
+
level: 'PUBLIC',
|
|
52
|
+
numericLevel: 0,
|
|
53
|
+
reason: 'No sensitive data detected.',
|
|
54
|
+
topCategory: 'none',
|
|
55
|
+
hitCountByLevel: { PUBLIC: 0, INTERNAL: 0, CONFIDENTIAL: 0, RESTRICTED: 0 },
|
|
56
|
+
escalated: false,
|
|
57
|
+
totalHits: 0,
|
|
58
|
+
averageConfidence: 0,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
// Count hits by sensitivity level
|
|
62
|
+
const hitCountByLevel = {
|
|
63
|
+
PUBLIC: 0,
|
|
64
|
+
INTERNAL: 0,
|
|
65
|
+
CONFIDENTIAL: 0,
|
|
66
|
+
RESTRICTED: 0,
|
|
67
|
+
};
|
|
68
|
+
const categoryCounts = {};
|
|
69
|
+
let totalConfidence = 0;
|
|
70
|
+
for (const hit of hits) {
|
|
71
|
+
const catLevel = CATEGORY_SENSITIVITY[hit.category] || 'INTERNAL';
|
|
72
|
+
hitCountByLevel[catLevel]++;
|
|
73
|
+
categoryCounts[hit.category] = (categoryCounts[hit.category] || 0) + 1;
|
|
74
|
+
totalConfidence += hit.confidence || 0.65;
|
|
75
|
+
}
|
|
76
|
+
// Determine base level from highest-sensitivity hit
|
|
77
|
+
let level = 'PUBLIC';
|
|
78
|
+
if (hitCountByLevel.RESTRICTED > 0)
|
|
79
|
+
level = 'RESTRICTED';
|
|
80
|
+
else if (hitCountByLevel.CONFIDENTIAL > 0)
|
|
81
|
+
level = 'CONFIDENTIAL';
|
|
82
|
+
else if (hitCountByLevel.INTERNAL > 0)
|
|
83
|
+
level = 'INTERNAL';
|
|
84
|
+
// Escalation rules
|
|
85
|
+
let escalated = false;
|
|
86
|
+
// 10+ CONFIDENTIAL hits -> RESTRICTED
|
|
87
|
+
if (level === 'CONFIDENTIAL' && hitCountByLevel.CONFIDENTIAL >= 10) {
|
|
88
|
+
level = 'RESTRICTED';
|
|
89
|
+
escalated = true;
|
|
90
|
+
}
|
|
91
|
+
// 5+ INTERNAL hits -> CONFIDENTIAL
|
|
92
|
+
if (level === 'INTERNAL' && hitCountByLevel.INTERNAL >= 5) {
|
|
93
|
+
level = 'CONFIDENTIAL';
|
|
94
|
+
escalated = true;
|
|
95
|
+
}
|
|
96
|
+
// Find top category
|
|
97
|
+
const topCategory = Object.entries(categoryCounts)
|
|
98
|
+
.sort((a, b) => b[1] - a[1])[0][0];
|
|
99
|
+
// Build reason string
|
|
100
|
+
const reasonParts = [];
|
|
101
|
+
for (const [cat, count] of Object.entries(categoryCounts).sort((a, b) => b[1] - a[1])) {
|
|
102
|
+
reasonParts.push(`${count} ${cat}`);
|
|
103
|
+
}
|
|
104
|
+
let reason = `Contains: ${reasonParts.join(', ')}.`;
|
|
105
|
+
if (escalated) {
|
|
106
|
+
reason += ` Escalated due to high volume of detections.`;
|
|
107
|
+
}
|
|
108
|
+
return {
|
|
109
|
+
level,
|
|
110
|
+
numericLevel: LEVEL_NUMERIC[level],
|
|
111
|
+
reason,
|
|
112
|
+
topCategory,
|
|
113
|
+
hitCountByLevel,
|
|
114
|
+
escalated,
|
|
115
|
+
totalHits: hits.length,
|
|
116
|
+
averageConfidence: Math.round((totalConfidence / hits.length) * 100) / 100,
|
|
117
|
+
};
|
|
118
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Context words and confidence scoring for PII patterns.
|
|
3
|
+
*
|
|
4
|
+
* Context words are keywords that appear near a PII match and boost confidence.
|
|
5
|
+
* For example, "SSN" appearing before "123-45-6789" boosts confidence from 0.60 to 0.85.
|
|
6
|
+
*
|
|
7
|
+
* Base confidence scores reflect how reliable a pattern is:
|
|
8
|
+
* - 0.95: Checksum validated (Luhn, IBAN, PESEL, etc.)
|
|
9
|
+
* - 0.90: Semantic field-name match
|
|
10
|
+
* - 0.85: Context-aware ambiguous field
|
|
11
|
+
* - 0.80: Regex match WITH context words nearby
|
|
12
|
+
* - 0.60: Regex match WITHOUT context words (bare pattern)
|
|
13
|
+
* - 0.70: Custom pattern match
|
|
14
|
+
*/
|
|
15
|
+
/** Context words by pattern name. Case-insensitive matching. */
|
|
16
|
+
export declare const CONTEXT_WORDS: Record<string, string[]>;
|
|
17
|
+
/**
|
|
18
|
+
* Base confidence scores by pattern name.
|
|
19
|
+
* Patterns with checksum validation get higher scores.
|
|
20
|
+
*/
|
|
21
|
+
export declare const BASE_CONFIDENCE: Record<string, number>;
|
|
22
|
+
/**
|
|
23
|
+
* Default base confidence for patterns not in the BASE_CONFIDENCE map.
|
|
24
|
+
*/
|
|
25
|
+
export declare const DEFAULT_BASE_CONFIDENCE = 0.65;
|
|
26
|
+
/**
|
|
27
|
+
* Confidence boost when context words are found nearby.
|
|
28
|
+
*/
|
|
29
|
+
export declare const CONTEXT_BOOST = 0.2;
|
|
30
|
+
/**
|
|
31
|
+
* Confidence score for semantic field-name matches.
|
|
32
|
+
*/
|
|
33
|
+
export declare const SEMANTIC_CONFIDENCE = 0.9;
|
|
34
|
+
/**
|
|
35
|
+
* Confidence score for context-aware ambiguous field matches.
|
|
36
|
+
*/
|
|
37
|
+
export declare const AMBIGUOUS_FIELD_CONFIDENCE = 0.85;
|
|
38
|
+
/**
|
|
39
|
+
* Confidence score for custom pattern matches.
|
|
40
|
+
*/
|
|
41
|
+
export declare const CUSTOM_PATTERN_CONFIDENCE = 0.7;
|
|
42
|
+
/**
|
|
43
|
+
* Confidence score for deny list matches.
|
|
44
|
+
*/
|
|
45
|
+
export declare const DENY_LIST_CONFIDENCE = 1;
|
|
46
|
+
/**
|
|
47
|
+
* Window size (characters) to search for context words around a match.
|
|
48
|
+
*/
|
|
49
|
+
export declare const CONTEXT_WINDOW = 80;
|
|
50
|
+
/**
|
|
51
|
+
* Check if any context words appear near a match position in the text.
|
|
52
|
+
*/
|
|
53
|
+
export declare function hasContextWords(text: string, matchStart: number, matchEnd: number, words: string[]): boolean;
|
|
54
|
+
/**
|
|
55
|
+
* Calculate confidence score for a pattern match.
|
|
56
|
+
*/
|
|
57
|
+
export declare function calculateConfidence(patternName: string, hasValidator: boolean, validatorPassed: boolean, text: string, matchStart: number, matchEnd: number): number;
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Context words and confidence scoring for PII patterns.
|
|
4
|
+
*
|
|
5
|
+
* Context words are keywords that appear near a PII match and boost confidence.
|
|
6
|
+
* For example, "SSN" appearing before "123-45-6789" boosts confidence from 0.60 to 0.85.
|
|
7
|
+
*
|
|
8
|
+
* Base confidence scores reflect how reliable a pattern is:
|
|
9
|
+
* - 0.95: Checksum validated (Luhn, IBAN, PESEL, etc.)
|
|
10
|
+
* - 0.90: Semantic field-name match
|
|
11
|
+
* - 0.85: Context-aware ambiguous field
|
|
12
|
+
* - 0.80: Regex match WITH context words nearby
|
|
13
|
+
* - 0.60: Regex match WITHOUT context words (bare pattern)
|
|
14
|
+
* - 0.70: Custom pattern match
|
|
15
|
+
*/
|
|
16
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
+
exports.CONTEXT_WINDOW = exports.DENY_LIST_CONFIDENCE = exports.CUSTOM_PATTERN_CONFIDENCE = exports.AMBIGUOUS_FIELD_CONFIDENCE = exports.SEMANTIC_CONFIDENCE = exports.CONTEXT_BOOST = exports.DEFAULT_BASE_CONFIDENCE = exports.BASE_CONFIDENCE = exports.CONTEXT_WORDS = void 0;
|
|
18
|
+
exports.hasContextWords = hasContextWords;
|
|
19
|
+
exports.calculateConfidence = calculateConfidence;
|
|
20
|
+
/** Context words by pattern name. Case-insensitive matching. */
|
|
21
|
+
exports.CONTEXT_WORDS = {
|
|
22
|
+
// Contact
|
|
23
|
+
email: ['email', 'e-mail', 'mail', 'contact', 'reach', 'send', 'Kontakt', 'correo', 'posta'],
|
|
24
|
+
phone: ['phone', 'call', 'mobile', 'cell', 'tel', 'telephone', 'dial', 'ring', 'Telefon', 'anrufen', 'Handy', 'appeler', 'llamar'],
|
|
25
|
+
phoneDE: ['Telefon', 'anrufen', 'Handy', 'Rufnummer', 'Festnetz', 'mobil'],
|
|
26
|
+
phoneUK: ['phone', 'call', 'mobile', 'ring', 'dial', 'landline'],
|
|
27
|
+
phoneAT: ['Telefon', 'anrufen', 'Handy'],
|
|
28
|
+
phoneCH: ['Telefon', 'anrufen', 'Natel', 'Handy'],
|
|
29
|
+
phoneFR: ['telephone', 'appeler', 'portable', 'fixe', 'numero'],
|
|
30
|
+
phoneNL: ['telefoon', 'bellen', 'mobiel'],
|
|
31
|
+
phoneES: ['telefono', 'llamar', 'movil', 'celular'],
|
|
32
|
+
phoneIT: ['telefono', 'chiamare', 'cellulare'],
|
|
33
|
+
phoneAU: ['phone', 'call', 'mobile'],
|
|
34
|
+
phoneIN: ['phone', 'call', 'mobile'],
|
|
35
|
+
phoneBR: ['telefone', 'ligar', 'celular'],
|
|
36
|
+
phoneCN: ['phone', 'call', 'mobile'],
|
|
37
|
+
phoneKR: ['phone', 'call'],
|
|
38
|
+
phoneRU: ['telefon', 'pozvonit'],
|
|
39
|
+
phoneMX: ['telefono', 'llamar', 'celular'],
|
|
40
|
+
phoneZA: ['phone', 'call', 'mobile'],
|
|
41
|
+
// Identity - person names
|
|
42
|
+
personName: ['name', 'person', 'contact', 'customer', 'client', 'patient', 'employee', 'user', 'Mr', 'Mrs', 'Ms', 'Dr', 'Herr', 'Frau'],
|
|
43
|
+
// Identity - government IDs
|
|
44
|
+
ssn: ['social security', 'SSN', 'social', 'security number', 'Sozialversicherung'],
|
|
45
|
+
itinUS: ['ITIN', 'taxpayer', 'individual taxpayer'],
|
|
46
|
+
sinCA: ['SIN', 'social insurance', 'insurance number'],
|
|
47
|
+
ninoUK: ['national insurance', 'NINO', 'NI number'],
|
|
48
|
+
nhsNumber: ['NHS', 'national health', 'health service', 'patient'],
|
|
49
|
+
passportUS: ['passport', 'travel document', 'Reisepass'],
|
|
50
|
+
passportEU: ['passport', 'travel document', 'Reisepass', 'passeport'],
|
|
51
|
+
passportDE: ['Reisepass', 'passport', 'Passnummer'],
|
|
52
|
+
nationalIdDE: ['Personalausweis', 'Ausweis', 'ID card', 'identity card'],
|
|
53
|
+
taxIdUS: ['EIN', 'employer identification', 'tax ID', 'federal tax'],
|
|
54
|
+
taxIdDE: ['Steuer', 'Identifikationsnummer', 'Finanzamt'],
|
|
55
|
+
sozialversicherungDE: ['Sozialversicherung', 'Rentenversicherung', 'SV-Nummer'],
|
|
56
|
+
ahvCH: ['AHV', 'AVS', 'Sozialversicherung', 'assurance'],
|
|
57
|
+
nationalIdFR: ['securite sociale', 'NIR', 'numero national'],
|
|
58
|
+
codiceFiscaleIT: ['codice fiscale', 'fiscal code', 'CF'],
|
|
59
|
+
dniES: ['DNI', 'documento nacional', 'identidad'],
|
|
60
|
+
nieES: ['NIE', 'extranjero', 'foreigner'],
|
|
61
|
+
peselPL: ['PESEL', 'numer identyfikacyjny'],
|
|
62
|
+
bsnNL: ['BSN', 'burgerservicenummer', 'burger service'],
|
|
63
|
+
ppsIE: ['PPS', 'personal public service'],
|
|
64
|
+
tfnAU: ['TFN', 'tax file', 'tax number'],
|
|
65
|
+
nricSG: ['NRIC', 'identity card', 'IC'],
|
|
66
|
+
panIN: ['PAN', 'permanent account', 'income tax'],
|
|
67
|
+
aadhaarIN: ['Aadhaar', 'UID', 'unique identification'],
|
|
68
|
+
cpfBR: ['CPF', 'cadastro', 'pessoa fisica'],
|
|
69
|
+
hetuFI: ['HETU', 'henkilotunnus', 'personal identity'],
|
|
70
|
+
personnummerSE: ['personnummer', 'personal number'],
|
|
71
|
+
fodselsnummerNO: ['fodselsnummer', 'personal number'],
|
|
72
|
+
cprDK: ['CPR', 'personnummer'],
|
|
73
|
+
nifPT: ['NIF', 'numero fiscal', 'contribuinte'],
|
|
74
|
+
nationalIdCN: ['identity card', 'resident card', 'shenfenzheng'],
|
|
75
|
+
nationalIdTR: ['TC Kimlik', 'kimlik no', 'identity'],
|
|
76
|
+
curpMX: ['CURP', 'poblacion'],
|
|
77
|
+
rfcMX: ['RFC', 'registro federal'],
|
|
78
|
+
emiratesIdUAE: ['emirates ID', 'UAE ID'],
|
|
79
|
+
// Financial
|
|
80
|
+
creditCard: ['card', 'credit', 'debit', 'visa', 'mastercard', 'amex', 'payment', 'Kreditkarte', 'carte', 'tarjeta'],
|
|
81
|
+
amex: ['amex', 'american express', 'card', 'payment'],
|
|
82
|
+
iban: ['IBAN', 'bank account', 'Kontonummer', 'compte', 'cuenta', 'conto'],
|
|
83
|
+
bic: ['BIC', 'SWIFT', 'bank', 'routing'],
|
|
84
|
+
vatEU: ['VAT', 'MwSt', 'Umsatzsteuer', 'TVA', 'IVA', 'BTW'],
|
|
85
|
+
abaRouting: ['routing', 'ABA', 'bank', 'transfer'],
|
|
86
|
+
sortCodeUK: ['sort code', 'bank', 'branch'],
|
|
87
|
+
cardExpiry: ['expiry', 'expiration', 'valid', 'exp', 'Gultig'],
|
|
88
|
+
cvvCtx: ['CVV', 'CVC', 'security code', 'verification'],
|
|
89
|
+
bankAccountCtx: ['account', 'bank', 'checking', 'savings', 'Konto'],
|
|
90
|
+
insurancePolicyCtx: ['insurance', 'policy', 'Versicherung', 'assurance', 'polizza'],
|
|
91
|
+
salaryCtx: ['salary', 'compensation', 'pay', 'wage', 'Gehalt', 'salaire'],
|
|
92
|
+
// Network
|
|
93
|
+
ipv4: ['IP', 'address', 'server', 'host', 'network'],
|
|
94
|
+
ipv6: ['IP', 'address', 'server', 'IPv6'],
|
|
95
|
+
macAddress: ['MAC', 'hardware', 'network', 'device', 'adapter'],
|
|
96
|
+
url: ['URL', 'link', 'website', 'http', 'visit'],
|
|
97
|
+
privateIp: ['internal', 'private', 'LAN', 'intranet', 'network'],
|
|
98
|
+
// Location
|
|
99
|
+
addressDE: ['Adresse', 'Anschrift', 'wohnt', 'Straße', 'address'],
|
|
100
|
+
addressLabeledEN: ['address', 'lives', 'located', 'residing'],
|
|
101
|
+
addressLabeledDE: ['Adresse', 'Anschrift', 'wohnt', 'Wohnort'],
|
|
102
|
+
gpsCoordinates: ['GPS', 'coordinates', 'location', 'latitude', 'longitude', 'Standort'],
|
|
103
|
+
// Medical
|
|
104
|
+
medicalRecordNumber: ['MRN', 'medical record', 'patient', 'chart', 'Krankenakte'],
|
|
105
|
+
deaNumber: ['DEA', 'drug enforcement', 'prescriber'],
|
|
106
|
+
npiNumber: ['NPI', 'provider', 'physician', 'doctor'],
|
|
107
|
+
rxNumber: ['prescription', 'Rx', 'pharmacy', 'medication', 'Rezept'],
|
|
108
|
+
healthPlanCtx: ['health plan', 'insurance', 'member', 'subscriber', 'beneficiary'],
|
|
109
|
+
bloodTypeCtx: ['blood', 'type', 'group', 'Blutgruppe'],
|
|
110
|
+
// Enterprise
|
|
111
|
+
awsAccessKey: ['AWS', 'amazon', 'access key', 'credential'],
|
|
112
|
+
gcpApiKey: ['Google', 'GCP', 'API key', 'cloud'],
|
|
113
|
+
stripeKey: ['Stripe', 'payment', 'API key'],
|
|
114
|
+
openaiKey: ['OpenAI', 'API key', 'GPT'],
|
|
115
|
+
githubToken: ['GitHub', 'token', 'personal access'],
|
|
116
|
+
slackToken: ['Slack', 'token', 'bot'],
|
|
117
|
+
jwtToken: ['JWT', 'token', 'auth', 'bearer', 'session'],
|
|
118
|
+
pemPrivateKey: ['private key', 'RSA', 'certificate', 'SSL', 'TLS'],
|
|
119
|
+
sshPublicKey: ['SSH', 'key', 'public key', 'authorized'],
|
|
120
|
+
genericSecret: ['password', 'secret', 'credential', 'API key', 'token'],
|
|
121
|
+
dbConnectionString: ['database', 'connection', 'JDBC', 'mongo', 'postgres', 'mysql'],
|
|
122
|
+
internalHostname: ['server', 'host', 'internal', 'intranet'],
|
|
123
|
+
// Vehicle
|
|
124
|
+
vin: ['VIN', 'vehicle', 'chassis', 'Fahrgestell', 'vehicule'],
|
|
125
|
+
// Crypto
|
|
126
|
+
bitcoinAddress: ['bitcoin', 'BTC', 'wallet', 'crypto'],
|
|
127
|
+
ethereumAddress: ['ethereum', 'ETH', 'wallet', 'crypto', 'contract'],
|
|
128
|
+
};
|
|
129
|
+
/**
|
|
130
|
+
* Base confidence scores by pattern name.
|
|
131
|
+
* Patterns with checksum validation get higher scores.
|
|
132
|
+
*/
|
|
133
|
+
exports.BASE_CONFIDENCE = {
|
|
134
|
+
// Checksum validated = 0.95
|
|
135
|
+
creditCard: 0.95,
|
|
136
|
+
amex: 0.95,
|
|
137
|
+
iban: 0.95,
|
|
138
|
+
nhsNumber: 0.95,
|
|
139
|
+
sinCA: 0.95,
|
|
140
|
+
peselPL: 0.95,
|
|
141
|
+
bsnNL: 0.95,
|
|
142
|
+
nifPT: 0.95,
|
|
143
|
+
tfnAU: 0.95,
|
|
144
|
+
abaRouting: 0.95,
|
|
145
|
+
// Strong format (unique structure) = 0.85
|
|
146
|
+
email: 0.90,
|
|
147
|
+
codiceFiscaleIT: 0.90,
|
|
148
|
+
hetuFI: 0.90,
|
|
149
|
+
ahvCH: 0.90,
|
|
150
|
+
nricSG: 0.90,
|
|
151
|
+
panIN: 0.90,
|
|
152
|
+
passportUS: 0.85,
|
|
153
|
+
passportEU: 0.85,
|
|
154
|
+
passportDE: 0.85,
|
|
155
|
+
ssn: 0.85,
|
|
156
|
+
ninoUK: 0.85,
|
|
157
|
+
dniES: 0.85,
|
|
158
|
+
nieES: 0.85,
|
|
159
|
+
nationalIdCN: 0.85,
|
|
160
|
+
nationalIdTH: 0.85,
|
|
161
|
+
emiratesIdUAE: 0.85,
|
|
162
|
+
curpMX: 0.85,
|
|
163
|
+
bitcoinAddress: 0.85,
|
|
164
|
+
ethereumAddress: 0.85,
|
|
165
|
+
vin: 0.85,
|
|
166
|
+
jwtToken: 0.85,
|
|
167
|
+
awsAccessKey: 0.90,
|
|
168
|
+
gcpApiKey: 0.90,
|
|
169
|
+
stripeKey: 0.90,
|
|
170
|
+
openaiKey: 0.90,
|
|
171
|
+
githubToken: 0.90,
|
|
172
|
+
pemPrivateKey: 0.90,
|
|
173
|
+
sshPublicKey: 0.90,
|
|
174
|
+
// Medium format = 0.70
|
|
175
|
+
phone: 0.70,
|
|
176
|
+
phoneDE: 0.75,
|
|
177
|
+
phoneUK: 0.75,
|
|
178
|
+
phoneAT: 0.75,
|
|
179
|
+
phoneCH: 0.75,
|
|
180
|
+
phoneFR: 0.75,
|
|
181
|
+
personName: 0.80,
|
|
182
|
+
ipv4: 0.70,
|
|
183
|
+
ipv6: 0.80,
|
|
184
|
+
macAddress: 0.80,
|
|
185
|
+
url: 0.75,
|
|
186
|
+
vatEU: 0.80,
|
|
187
|
+
bic: 0.65,
|
|
188
|
+
// Broad patterns (high false positive risk) = 0.50-0.60
|
|
189
|
+
postalCodeDE: 0.40,
|
|
190
|
+
postalCodeAT: 0.40,
|
|
191
|
+
postalCodeCH: 0.40,
|
|
192
|
+
postalCodeUS: 0.50,
|
|
193
|
+
postalCodeIT: 0.40,
|
|
194
|
+
postalCodeES: 0.50,
|
|
195
|
+
postalCodeIN: 0.40,
|
|
196
|
+
dateSlash: 0.50,
|
|
197
|
+
dateDash: 0.50,
|
|
198
|
+
dateDot: 0.50,
|
|
199
|
+
sortCodeUK: 0.50,
|
|
200
|
+
cardExpiry: 0.50,
|
|
201
|
+
socialHandle: 0.60,
|
|
202
|
+
licensePlateDE: 0.55,
|
|
203
|
+
dnaSequence: 0.50,
|
|
204
|
+
};
|
|
205
|
+
/**
|
|
206
|
+
* Default base confidence for patterns not in the BASE_CONFIDENCE map.
|
|
207
|
+
*/
|
|
208
|
+
exports.DEFAULT_BASE_CONFIDENCE = 0.65;
|
|
209
|
+
/**
|
|
210
|
+
* Confidence boost when context words are found nearby.
|
|
211
|
+
*/
|
|
212
|
+
exports.CONTEXT_BOOST = 0.20;
|
|
213
|
+
/**
|
|
214
|
+
* Confidence score for semantic field-name matches.
|
|
215
|
+
*/
|
|
216
|
+
exports.SEMANTIC_CONFIDENCE = 0.90;
|
|
217
|
+
/**
|
|
218
|
+
* Confidence score for context-aware ambiguous field matches.
|
|
219
|
+
*/
|
|
220
|
+
exports.AMBIGUOUS_FIELD_CONFIDENCE = 0.85;
|
|
221
|
+
/**
|
|
222
|
+
* Confidence score for custom pattern matches.
|
|
223
|
+
*/
|
|
224
|
+
exports.CUSTOM_PATTERN_CONFIDENCE = 0.70;
|
|
225
|
+
/**
|
|
226
|
+
* Confidence score for deny list matches.
|
|
227
|
+
*/
|
|
228
|
+
exports.DENY_LIST_CONFIDENCE = 1.0;
|
|
229
|
+
/**
|
|
230
|
+
* Window size (characters) to search for context words around a match.
|
|
231
|
+
*/
|
|
232
|
+
exports.CONTEXT_WINDOW = 80;
|
|
233
|
+
/**
|
|
234
|
+
* Check if any context words appear near a match position in the text.
|
|
235
|
+
*/
|
|
236
|
+
function hasContextWords(text, matchStart, matchEnd, words) {
|
|
237
|
+
if (!words || words.length === 0)
|
|
238
|
+
return false;
|
|
239
|
+
const windowStart = Math.max(0, matchStart - exports.CONTEXT_WINDOW);
|
|
240
|
+
const windowEnd = Math.min(text.length, matchEnd + exports.CONTEXT_WINDOW);
|
|
241
|
+
const surrounding = text.slice(windowStart, windowEnd).toLowerCase();
|
|
242
|
+
return words.some((word) => surrounding.includes(word.toLowerCase()));
|
|
243
|
+
}
|
|
244
|
+
/**
|
|
245
|
+
* Calculate confidence score for a pattern match.
|
|
246
|
+
*/
|
|
247
|
+
function calculateConfidence(patternName, hasValidator, validatorPassed, text, matchStart, matchEnd) {
|
|
248
|
+
// Start with base confidence
|
|
249
|
+
let confidence = exports.BASE_CONFIDENCE[patternName] ?? exports.DEFAULT_BASE_CONFIDENCE;
|
|
250
|
+
// If has validator and passed, boost to at least 0.95
|
|
251
|
+
if (hasValidator && validatorPassed) {
|
|
252
|
+
confidence = Math.max(confidence, 0.95);
|
|
253
|
+
}
|
|
254
|
+
// Check context words
|
|
255
|
+
const contextWords = exports.CONTEXT_WORDS[patternName];
|
|
256
|
+
if (contextWords && hasContextWords(text, matchStart, matchEnd, contextWords)) {
|
|
257
|
+
confidence = Math.min(1.0, confidence + exports.CONTEXT_BOOST);
|
|
258
|
+
}
|
|
259
|
+
return Math.round(confidence * 100) / 100;
|
|
260
|
+
}
|