rehydra 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +615 -0
- package/dist/crypto/index.d.ts +6 -0
- package/dist/crypto/index.d.ts.map +1 -0
- package/dist/crypto/index.js +6 -0
- package/dist/crypto/index.js.map +1 -0
- package/dist/crypto/pii-map-crypto.d.ts +114 -0
- package/dist/crypto/pii-map-crypto.d.ts.map +1 -0
- package/dist/crypto/pii-map-crypto.js +228 -0
- package/dist/crypto/pii-map-crypto.js.map +1 -0
- package/dist/index.d.ts +180 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +384 -0
- package/dist/index.js.map +1 -0
- package/dist/ner/bio-decoder.d.ts +64 -0
- package/dist/ner/bio-decoder.d.ts.map +1 -0
- package/dist/ner/bio-decoder.js +216 -0
- package/dist/ner/bio-decoder.js.map +1 -0
- package/dist/ner/index.d.ts +10 -0
- package/dist/ner/index.d.ts.map +1 -0
- package/dist/ner/index.js +10 -0
- package/dist/ner/index.js.map +1 -0
- package/dist/ner/model-manager.d.ts +111 -0
- package/dist/ner/model-manager.d.ts.map +1 -0
- package/dist/ner/model-manager.js +325 -0
- package/dist/ner/model-manager.js.map +1 -0
- package/dist/ner/ner-model.d.ts +114 -0
- package/dist/ner/ner-model.d.ts.map +1 -0
- package/dist/ner/ner-model.js +253 -0
- package/dist/ner/ner-model.js.map +1 -0
- package/dist/ner/onnx-runtime.d.ts +46 -0
- package/dist/ner/onnx-runtime.d.ts.map +1 -0
- package/dist/ner/onnx-runtime.js +130 -0
- package/dist/ner/onnx-runtime.js.map +1 -0
- package/dist/ner/tokenizer.d.ts +118 -0
- package/dist/ner/tokenizer.d.ts.map +1 -0
- package/dist/ner/tokenizer.js +332 -0
- package/dist/ner/tokenizer.js.map +1 -0
- package/dist/pipeline/index.d.ts +12 -0
- package/dist/pipeline/index.d.ts.map +1 -0
- package/dist/pipeline/index.js +12 -0
- package/dist/pipeline/index.js.map +1 -0
- package/dist/pipeline/prenormalize.d.ts +48 -0
- package/dist/pipeline/prenormalize.d.ts.map +1 -0
- package/dist/pipeline/prenormalize.js +94 -0
- package/dist/pipeline/prenormalize.js.map +1 -0
- package/dist/pipeline/resolver.d.ts +56 -0
- package/dist/pipeline/resolver.d.ts.map +1 -0
- package/dist/pipeline/resolver.js +239 -0
- package/dist/pipeline/resolver.js.map +1 -0
- package/dist/pipeline/semantic-data-loader.d.ts +165 -0
- package/dist/pipeline/semantic-data-loader.d.ts.map +1 -0
- package/dist/pipeline/semantic-data-loader.js +655 -0
- package/dist/pipeline/semantic-data-loader.js.map +1 -0
- package/dist/pipeline/semantic-enricher.d.ts +112 -0
- package/dist/pipeline/semantic-enricher.d.ts.map +1 -0
- package/dist/pipeline/semantic-enricher.js +318 -0
- package/dist/pipeline/semantic-enricher.js.map +1 -0
- package/dist/pipeline/tagger.d.ts +114 -0
- package/dist/pipeline/tagger.d.ts.map +1 -0
- package/dist/pipeline/tagger.js +374 -0
- package/dist/pipeline/tagger.js.map +1 -0
- package/dist/pipeline/title-extractor.d.ts +79 -0
- package/dist/pipeline/title-extractor.d.ts.map +1 -0
- package/dist/pipeline/title-extractor.js +801 -0
- package/dist/pipeline/title-extractor.js.map +1 -0
- package/dist/pipeline/validator.d.ts +65 -0
- package/dist/pipeline/validator.d.ts.map +1 -0
- package/dist/pipeline/validator.js +264 -0
- package/dist/pipeline/validator.js.map +1 -0
- package/dist/recognizers/base.d.ts +78 -0
- package/dist/recognizers/base.d.ts.map +1 -0
- package/dist/recognizers/base.js +100 -0
- package/dist/recognizers/base.js.map +1 -0
- package/dist/recognizers/bic-swift.d.ts +10 -0
- package/dist/recognizers/bic-swift.d.ts.map +1 -0
- package/dist/recognizers/bic-swift.js +107 -0
- package/dist/recognizers/bic-swift.js.map +1 -0
- package/dist/recognizers/credit-card.d.ts +32 -0
- package/dist/recognizers/credit-card.d.ts.map +1 -0
- package/dist/recognizers/credit-card.js +160 -0
- package/dist/recognizers/credit-card.js.map +1 -0
- package/dist/recognizers/custom-id.d.ts +28 -0
- package/dist/recognizers/custom-id.d.ts.map +1 -0
- package/dist/recognizers/custom-id.js +116 -0
- package/dist/recognizers/custom-id.js.map +1 -0
- package/dist/recognizers/email.d.ts +10 -0
- package/dist/recognizers/email.d.ts.map +1 -0
- package/dist/recognizers/email.js +75 -0
- package/dist/recognizers/email.js.map +1 -0
- package/dist/recognizers/iban.d.ts +14 -0
- package/dist/recognizers/iban.d.ts.map +1 -0
- package/dist/recognizers/iban.js +67 -0
- package/dist/recognizers/iban.js.map +1 -0
- package/dist/recognizers/index.d.ts +20 -0
- package/dist/recognizers/index.d.ts.map +1 -0
- package/dist/recognizers/index.js +42 -0
- package/dist/recognizers/index.js.map +1 -0
- package/dist/recognizers/ip-address.d.ts +14 -0
- package/dist/recognizers/ip-address.d.ts.map +1 -0
- package/dist/recognizers/ip-address.js +183 -0
- package/dist/recognizers/ip-address.js.map +1 -0
- package/dist/recognizers/phone.d.ts +10 -0
- package/dist/recognizers/phone.d.ts.map +1 -0
- package/dist/recognizers/phone.js +145 -0
- package/dist/recognizers/phone.js.map +1 -0
- package/dist/recognizers/registry.d.ts +59 -0
- package/dist/recognizers/registry.d.ts.map +1 -0
- package/dist/recognizers/registry.js +113 -0
- package/dist/recognizers/registry.js.map +1 -0
- package/dist/recognizers/url.d.ts +14 -0
- package/dist/recognizers/url.d.ts.map +1 -0
- package/dist/recognizers/url.js +121 -0
- package/dist/recognizers/url.js.map +1 -0
- package/dist/types/index.d.ts +197 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +80 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/pii-types.d.ts +50 -0
- package/dist/types/pii-types.d.ts.map +1 -0
- package/dist/types/pii-types.js +114 -0
- package/dist/types/pii-types.js.map +1 -0
- package/dist/utils/iban-checksum.d.ts +23 -0
- package/dist/utils/iban-checksum.d.ts.map +1 -0
- package/dist/utils/iban-checksum.js +106 -0
- package/dist/utils/iban-checksum.js.map +1 -0
- package/dist/utils/index.d.ts +10 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +10 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/luhn.d.ts +17 -0
- package/dist/utils/luhn.d.ts.map +1 -0
- package/dist/utils/luhn.js +55 -0
- package/dist/utils/luhn.js.map +1 -0
- package/dist/utils/offsets.d.ts +86 -0
- package/dist/utils/offsets.d.ts.map +1 -0
- package/dist/utils/offsets.js +124 -0
- package/dist/utils/offsets.js.map +1 -0
- package/dist/utils/path.d.ts +34 -0
- package/dist/utils/path.d.ts.map +1 -0
- package/dist/utils/path.js +96 -0
- package/dist/utils/path.js.map +1 -0
- package/dist/utils/storage-browser.d.ts +51 -0
- package/dist/utils/storage-browser.d.ts.map +1 -0
- package/dist/utils/storage-browser.js +381 -0
- package/dist/utils/storage-browser.js.map +1 -0
- package/dist/utils/storage-node.d.ts +43 -0
- package/dist/utils/storage-node.d.ts.map +1 -0
- package/dist/utils/storage-node.js +93 -0
- package/dist/utils/storage-node.js.map +1 -0
- package/dist/utils/storage.d.ts +70 -0
- package/dist/utils/storage.d.ts.map +1 -0
- package/dist/utils/storage.js +69 -0
- package/dist/utils/storage.js.map +1 -0
- package/package.json +66 -0
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Recognizer Registry
|
|
3
|
+
* Central registry for all PII recognizers
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Registry for managing PII recognizers
|
|
7
|
+
*/
|
|
8
|
+
export class RecognizerRegistry {
|
|
9
|
+
recognizers = new Map();
|
|
10
|
+
/**
|
|
11
|
+
* Registers a recognizer for a PII type
|
|
12
|
+
*/
|
|
13
|
+
register(recognizer) {
|
|
14
|
+
const existing = this.recognizers.get(recognizer.type) ?? [];
|
|
15
|
+
existing.push(recognizer);
|
|
16
|
+
this.recognizers.set(recognizer.type, existing);
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Registers multiple recognizers
|
|
20
|
+
*/
|
|
21
|
+
registerAll(recognizers) {
|
|
22
|
+
for (const recognizer of recognizers) {
|
|
23
|
+
this.register(recognizer);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Gets all recognizers for a specific type
|
|
28
|
+
*/
|
|
29
|
+
getRecognizers(type) {
|
|
30
|
+
return this.recognizers.get(type) ?? [];
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Gets all registered recognizers
|
|
34
|
+
*/
|
|
35
|
+
getAllRecognizers() {
|
|
36
|
+
const all = [];
|
|
37
|
+
for (const recognizers of this.recognizers.values()) {
|
|
38
|
+
all.push(...recognizers);
|
|
39
|
+
}
|
|
40
|
+
return all;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Gets all registered PII types
|
|
44
|
+
*/
|
|
45
|
+
getRegisteredTypes() {
|
|
46
|
+
return Array.from(this.recognizers.keys());
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Checks if a recognizer is registered for a type
|
|
50
|
+
*/
|
|
51
|
+
hasRecognizer(type) {
|
|
52
|
+
const recognizers = this.recognizers.get(type);
|
|
53
|
+
return recognizers !== undefined && recognizers.length > 0;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Removes all recognizers for a type
|
|
57
|
+
*/
|
|
58
|
+
unregister(type) {
|
|
59
|
+
this.recognizers.delete(type);
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Clears all recognizers
|
|
63
|
+
*/
|
|
64
|
+
clear() {
|
|
65
|
+
this.recognizers.clear();
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Runs all enabled recognizers on text and returns matches
|
|
69
|
+
* @param text - Text to analyze
|
|
70
|
+
* @param policy - Anonymization policy to determine which types to detect
|
|
71
|
+
*/
|
|
72
|
+
findAll(text, policy) {
|
|
73
|
+
const matches = [];
|
|
74
|
+
for (const [type, recognizers] of this.recognizers) {
|
|
75
|
+
// Skip types not enabled in policy
|
|
76
|
+
if (!policy.enabledTypes.has(type) || !policy.regexEnabledTypes.has(type)) {
|
|
77
|
+
continue;
|
|
78
|
+
}
|
|
79
|
+
// Get confidence threshold for this type
|
|
80
|
+
const threshold = policy.confidenceThresholds.get(type) ?? 0.5;
|
|
81
|
+
for (const recognizer of recognizers) {
|
|
82
|
+
const typeMatches = recognizer.find(text);
|
|
83
|
+
// Filter by confidence threshold
|
|
84
|
+
for (const match of typeMatches) {
|
|
85
|
+
if (match.confidence >= threshold) {
|
|
86
|
+
matches.push(match);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
return matches;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Global singleton registry instance
|
|
96
|
+
*/
|
|
97
|
+
let globalRegistry = null;
|
|
98
|
+
/**
|
|
99
|
+
* Gets the global recognizer registry (singleton)
|
|
100
|
+
*/
|
|
101
|
+
export function getGlobalRegistry() {
|
|
102
|
+
if (globalRegistry === null) {
|
|
103
|
+
globalRegistry = new RecognizerRegistry();
|
|
104
|
+
}
|
|
105
|
+
return globalRegistry;
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Creates a new isolated registry (useful for testing)
|
|
109
|
+
*/
|
|
110
|
+
export function createRegistry() {
|
|
111
|
+
return new RecognizerRegistry();
|
|
112
|
+
}
|
|
113
|
+
//# sourceMappingURL=registry.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"registry.js","sourceRoot":"","sources":["../../src/recognizers/registry.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAKH;;GAEG;AACH,MAAM,OAAO,kBAAkB;IACrB,WAAW,GAA+B,IAAI,GAAG,EAAE,CAAC;IAE5D;;OAEG;IACH,QAAQ,CAAC,UAAsB;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QAC7D,QAAQ,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAC1B,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,UAAU,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;IAClD,CAAC;IAED;;OAEG;IACH,WAAW,CAAC,WAAyB;QACnC,KAAK,MAAM,UAAU,IAAI,WAAW,EAAE,CAAC;YACrC,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC;QAC5B,CAAC;IACH,CAAC;IAED;;OAEG;IACH,cAAc,CAAC,IAAa;QAC1B,OAAO,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;IAC1C,CAAC;IAED;;OAEG;IACH,iBAAiB;QACf,MAAM,GAAG,GAAiB,EAAE,CAAC;QAC7B,KAAK,MAAM,WAAW,IAAI,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,EAAE,CAAC;YACpD,GAAG,CAAC,IAAI,CAAC,GAAG,WAAW,CAAC,CAAC;QAC3B,CAAC;QACD,OAAO,GAAG,CAAC;IACb,CAAC;IAED;;OAEG;IACH,kBAAkB;QAChB,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,CAAC;IAC7C,CAAC;IAED;;OAEG;IACH,aAAa,CAAC,IAAa;QACzB,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAC/C,OAAO,WAAW,KAAK,SAAS,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC;IAC7D,CAAC;IAED;;OAEG;IACH,UAAU,CAAC,IAAa;QACtB,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAChC,CAAC;IAED;;OAEG;IACH,KAAK;QACH,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC;IAC3B,CAAC;IAED;;;;OAIG;IACH,OAAO,CAAC,IAAY,EAAE,MAA2B;QAC/C,MAAM,OAAO,GAAgB,EAAE,CAAC;QAEhC,KAAK,MAAM,CAAC,IAAI,EAAE,WAAW,CAAC,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACnD,mCAAmC;YACnC,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,iBAAiB,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC1E,SAAS;YACX,CAAC;YAED,yCAAyC;YACzC,MAAM,SAAS,GAAG,MAAM,CAAC,oBAAoB,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC;YAE/D,KAAK,MAAM,UAAU,IAAI,WAAW,EAAE,CAAC;gBACrC,MAAM,WAAW,GAAG,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBAE1C,iCAAiC;gBACjC,KAAK,MAAM,KAAK,IAAI,WAAW,EAAE,CAAC;oBAChC,IAAI,KAAK,CAAC,UAAU,IAAI,SAAS,EAAE,CAAC;wBAClC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;oBACtB,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;CACF;AAED;;GAEG;AACH,IAAI,cAAc,GAA8B,IAAI,CAAC;AAErD;;GAEG;AACH,MAAM,UAAU,iBAAiB;IAC/B,IAAI,cAAc,KAAK,IAAI,EAAE,CAAC;QAC5B,cAAc,GAAG,IAAI,kBAAkB,EAAE,CAAC;IAC5C,CAAC;IACD,OAAO,cAAc,CAAC;AACxB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc;IAC5B,OAAO,IAAI,kBAAkB,EAAE,CAAC;AAClC,CAAC"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* URL Recognizer
|
|
3
|
+
* Detects URLs with various protocols
|
|
4
|
+
*/
|
|
5
|
+
import type { Recognizer } from './base.js';
|
|
6
|
+
/**
|
|
7
|
+
* URL recognizer
|
|
8
|
+
*/
|
|
9
|
+
export declare const urlRecognizer: Recognizer;
|
|
10
|
+
/**
|
|
11
|
+
* Extracts the domain from a URL
|
|
12
|
+
*/
|
|
13
|
+
export declare function extractDomain(url: string): string | null;
|
|
14
|
+
//# sourceMappingURL=url.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"url.d.ts","sourceRoot":"","sources":["../../src/recognizers/url.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AAoB5C;;GAEG;AACH,eAAO,MAAM,aAAa,EAAE,UA6D3B,CAAC;AA4BF;;GAEG;AACH,wBAAgB,aAAa,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAgBxD"}
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* URL Recognizer
|
|
3
|
+
* Detects URLs with various protocols
|
|
4
|
+
*/
|
|
5
|
+
import { PIIType, DetectionSource } from '../types/index.js';
|
|
6
|
+
/**
|
|
7
|
+
* URL pattern - matches common URL formats
|
|
8
|
+
* Supports: http, https, ftp, mailto, file protocols
|
|
9
|
+
*/
|
|
10
|
+
const URL_PATTERN = /\b(?:https?|ftp|file):\/\/[-A-Za-z0-9+&@#/%?=~_|!:,.;]*[-A-Za-z0-9+&@#/%=~_|]/g;
|
|
11
|
+
/**
|
|
12
|
+
* Pattern for URLs without explicit protocol (www.)
|
|
13
|
+
*/
|
|
14
|
+
const WWW_PATTERN = /\bwww\.[-A-Za-z0-9+&@#/%?=~_|!:,.;]*[-A-Za-z0-9+&@#/%=~_|]/g;
|
|
15
|
+
/**
|
|
16
|
+
* Pattern for mailto: URLs
|
|
17
|
+
*/
|
|
18
|
+
const MAILTO_PATTERN = /\bmailto:[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g;
|
|
19
|
+
/**
|
|
20
|
+
* URL recognizer
|
|
21
|
+
*/
|
|
22
|
+
export const urlRecognizer = {
|
|
23
|
+
type: PIIType.URL,
|
|
24
|
+
name: 'URL',
|
|
25
|
+
defaultConfidence: 0.92,
|
|
26
|
+
find(text) {
|
|
27
|
+
const matches = [];
|
|
28
|
+
const seen = new Set();
|
|
29
|
+
const patterns = [URL_PATTERN, WWW_PATTERN, MAILTO_PATTERN];
|
|
30
|
+
for (const pattern of patterns) {
|
|
31
|
+
const globalPattern = new RegExp(pattern.source, 'g');
|
|
32
|
+
for (const match of text.matchAll(globalPattern)) {
|
|
33
|
+
if (match.index === undefined)
|
|
34
|
+
continue;
|
|
35
|
+
const url = match[0];
|
|
36
|
+
const key = `${match.index}:${match.index + url.length}`;
|
|
37
|
+
if (seen.has(key))
|
|
38
|
+
continue;
|
|
39
|
+
if (!this.validate(url))
|
|
40
|
+
continue;
|
|
41
|
+
seen.add(key);
|
|
42
|
+
matches.push({
|
|
43
|
+
type: PIIType.URL,
|
|
44
|
+
start: match.index,
|
|
45
|
+
end: match.index + url.length,
|
|
46
|
+
confidence: this.defaultConfidence,
|
|
47
|
+
source: DetectionSource.REGEX,
|
|
48
|
+
text: url,
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
// Remove overlapping matches (www. might be substring of http://www.)
|
|
53
|
+
return deduplicateOverlapping(matches);
|
|
54
|
+
},
|
|
55
|
+
validate(url) {
|
|
56
|
+
// Basic length check
|
|
57
|
+
if (url.length < 5)
|
|
58
|
+
return false;
|
|
59
|
+
// Should have at least one dot after the protocol
|
|
60
|
+
const withoutProtocol = url.replace(/^(?:https?|ftp|file|mailto):\/\/?/, '');
|
|
61
|
+
if (!withoutProtocol.includes('.'))
|
|
62
|
+
return false;
|
|
63
|
+
// TLD should be at least 2 characters
|
|
64
|
+
const parts = withoutProtocol.split('.');
|
|
65
|
+
const tld = parts[parts.length - 1];
|
|
66
|
+
if (tld === undefined)
|
|
67
|
+
return false;
|
|
68
|
+
// Remove any path/query from TLD
|
|
69
|
+
const cleanTld = tld.split(/[/?#]/)[0];
|
|
70
|
+
if (cleanTld === undefined || cleanTld.length < 2)
|
|
71
|
+
return false;
|
|
72
|
+
return true;
|
|
73
|
+
},
|
|
74
|
+
normalize(url) {
|
|
75
|
+
return url.trim();
|
|
76
|
+
},
|
|
77
|
+
};
|
|
78
|
+
/**
|
|
79
|
+
* Remove overlapping matches
|
|
80
|
+
*/
|
|
81
|
+
function deduplicateOverlapping(matches) {
|
|
82
|
+
if (matches.length <= 1)
|
|
83
|
+
return matches;
|
|
84
|
+
const sorted = [...matches].sort((a, b) => a.start - b.start);
|
|
85
|
+
const result = [];
|
|
86
|
+
for (const match of sorted) {
|
|
87
|
+
const last = result[result.length - 1];
|
|
88
|
+
if (last !== undefined && match.start < last.end) {
|
|
89
|
+
// Overlapping - keep the longer one
|
|
90
|
+
if (match.end > last.end) {
|
|
91
|
+
result.pop();
|
|
92
|
+
result.push(match);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
else {
|
|
96
|
+
result.push(match);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
return result;
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Extracts the domain from a URL
|
|
103
|
+
*/
|
|
104
|
+
export function extractDomain(url) {
|
|
105
|
+
try {
|
|
106
|
+
// Add protocol if missing for URL parsing
|
|
107
|
+
let normalizedUrl = url;
|
|
108
|
+
if (url.startsWith('www.')) {
|
|
109
|
+
normalizedUrl = 'https://' + url;
|
|
110
|
+
}
|
|
111
|
+
if (!normalizedUrl.includes('://')) {
|
|
112
|
+
normalizedUrl = 'https://' + normalizedUrl;
|
|
113
|
+
}
|
|
114
|
+
const parsed = new URL(normalizedUrl);
|
|
115
|
+
return parsed.hostname;
|
|
116
|
+
}
|
|
117
|
+
catch {
|
|
118
|
+
return null;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
//# sourceMappingURL=url.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"url.js","sourceRoot":"","sources":["../../src/recognizers/url.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,OAAO,EAAa,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAGxE;;;GAGG;AACH,MAAM,WAAW,GACf,gFAAgF,CAAC;AAEnF;;GAEG;AACH,MAAM,WAAW,GAAG,6DAA6D,CAAC;AAElF;;GAEG;AACH,MAAM,cAAc,GAClB,0DAA0D,CAAC;AAE7D;;GAEG;AACH,MAAM,CAAC,MAAM,aAAa,GAAe;IACvC,IAAI,EAAE,OAAO,CAAC,GAAG;IACjB,IAAI,EAAE,KAAK;IACX,iBAAiB,EAAE,IAAI;IAEvB,IAAI,CAAC,IAAY;QACf,MAAM,OAAO,GAAgB,EAAE,CAAC;QAChC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;QAE/B,MAAM,QAAQ,GAAG,CAAC,WAAW,EAAE,WAAW,EAAE,cAAc,CAAC,CAAC;QAE5D,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,MAAM,aAAa,GAAG,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;YAEtD,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;gBACjD,IAAI,KAAK,CAAC,KAAK,KAAK,SAAS;oBAAE,SAAS;gBAExC,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;gBACrB,MAAM,GAAG,GAAG,GAAG,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,KAAK,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC;gBAEzD,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;oBAAE,SAAS;gBAC5B,IAAI,CAAC,IAAI,CAAC,QAAS,CAAC,GAAG,CAAC;oBAAE,SAAS;gBAEnC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;gBACd,OAAO,CAAC,IAAI,CAAC;oBACX,IAAI,EAAE,OAAO,CAAC,GAAG;oBACjB,KAAK,EAAE,KAAK,CAAC,KAAK;oBAClB,GAAG,EAAE,KAAK,CAAC,KAAK,GAAG,GAAG,CAAC,MAAM;oBAC7B,UAAU,EAAE,IAAI,CAAC,iBAAiB;oBAClC,MAAM,EAAE,eAAe,CAAC,KAAK;oBAC7B,IAAI,EAAE,GAAG;iBACV,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,sEAAsE;QACtE,OAAO,sBAAsB,CAAC,OAAO,CAAC,CAAC;IACzC,CAAC;IAED,QAAQ,CAAC,GAAW;QAClB,qBAAqB;QACrB,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,KAAK,CAAC;QAEjC,kDAAkD;QAClD,MAAM,eAAe,GAAG,GAAG,CAAC,OAAO,CAAC,mCAAmC,EAAE,EAAE,CAAC,CAAC;QAC7E,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC,GAAG,CAAC;YAAE,OAAO,KAAK,CAAC;QAEjD,sCAAsC;QACtC,MAAM,KAAK,GAAG,eAAe,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QACzC,MAAM,GAAG,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACpC,IAAI,GAAG,KAAK,SAAS;YAAE,OAAO,KAAK,CAAC;QACpC,iCAAiC;QACjC,MAAM,QAAQ,GAAG,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;QACvC,IAAI,QAAQ,KAAK,SAAS,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,KAAK,CAAC;QAEhE,OAAO,IAAI,CAAC;IACd,CAAC;IAED,SAAS,CAAC,GAAW;QACnB,OAAO,GAAG,CAAC,IAAI,EAAE,CAAC;IACpB,CAAC;CACF,CAAC;AAEF;;GAEG;AACH,SAAS,sBAAsB,CAAC,OAAoB;IAClD,IAAI,OAAO,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,OAAO,CAAC;IAExC,MAAM,MAAM,GAAG,CAAC,GAAG,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IAC9D,MAAM,MAAM,GAAgB,EAAE,CAAC;IAE/B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAEvC,IAAI,IAAI,KAAK,SAAS,IAAI,KAAK,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YACjD,oCAAoC;YACpC,IAAI,KAAK,CAAC,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;gBACzB,MAAM,CAAC,GAAG,EAAE,CAAC;gBACb,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACrB,CAAC;QACH,CAAC;aAAM,CAAC;YACN,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACrB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,aAAa,CAAC,GAAW;IACvC,IAAI,CAAC;QACH,0CAA0C;QAC1C,IAAI,aAAa,GAAG,GAAG,CAAC;QACxB,IAAI,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;YAC3B,aAAa,GAAG,UAAU,GAAG,GAAG,CAAC;QACnC,CAAC;QACD,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;YACnC,aAAa,GAAG,UAAU,GAAG,aAAa,CAAC;QAC7C,CAAC;QAED,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,aAAa,CAAC,CAAC;QACtC,OAAO,MAAM,CAAC,QAAQ,CAAC;IACzB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
import { PIIType } from "./pii-types.js";
|
|
2
|
+
export * from "./pii-types.js";
|
|
3
|
+
/**
|
|
4
|
+
* Source of entity detection
|
|
5
|
+
*/
|
|
6
|
+
export declare enum DetectionSource {
|
|
7
|
+
REGEX = "REGEX",
|
|
8
|
+
NER = "NER",
|
|
9
|
+
HYBRID = "HYBRID"
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Gender attribute for PERSON entities
|
|
13
|
+
* Used to preserve grammatical agreement during machine translation
|
|
14
|
+
*/
|
|
15
|
+
export type PersonGender = "male" | "female" | "neutral" | "unknown";
|
|
16
|
+
/**
|
|
17
|
+
* Scope attribute for LOCATION entities
|
|
18
|
+
* Helps MT engines select correct prepositions (e.g., "in Berlin" vs "in Germany")
|
|
19
|
+
*/
|
|
20
|
+
export type LocationScope = "city" | "country" | "region" | "unknown";
|
|
21
|
+
/**
|
|
22
|
+
* Semantic attributes for PII entities
|
|
23
|
+
* These attributes help preserve linguistic context during translation
|
|
24
|
+
*/
|
|
25
|
+
export interface SemanticAttributes {
|
|
26
|
+
/** Gender for PERSON entities */
|
|
27
|
+
gender?: PersonGender;
|
|
28
|
+
/** Geographic scope for LOCATION entities */
|
|
29
|
+
scope?: LocationScope;
|
|
30
|
+
/** Honorific title extracted from PERSON entities (e.g., "Dr.", "Mrs.") */
|
|
31
|
+
title?: string;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Progress callback for semantic data downloads
|
|
35
|
+
*/
|
|
36
|
+
export type SemanticDownloadProgressCallback = (progress: {
|
|
37
|
+
file: string;
|
|
38
|
+
bytesDownloaded: number;
|
|
39
|
+
totalBytes: number | null;
|
|
40
|
+
percent: number | null;
|
|
41
|
+
}) => void;
|
|
42
|
+
/**
|
|
43
|
+
* Semantic enrichment configuration
|
|
44
|
+
* Controls automatic downloading and loading of semantic data for MT-friendly PII tags
|
|
45
|
+
*/
|
|
46
|
+
export interface SemanticConfig {
|
|
47
|
+
/**
|
|
48
|
+
* Whether to enable semantic masking (adds gender/scope attributes to PII tags)
|
|
49
|
+
* @default false
|
|
50
|
+
*/
|
|
51
|
+
enabled: boolean;
|
|
52
|
+
/**
|
|
53
|
+
* Whether to auto-download semantic data if not present
|
|
54
|
+
* Data files include name-gender mappings (~40K names) and location data (~25K cities)
|
|
55
|
+
* Total download size: ~4 MB
|
|
56
|
+
* @default true when enabled is true
|
|
57
|
+
*/
|
|
58
|
+
autoDownload?: boolean;
|
|
59
|
+
/**
|
|
60
|
+
* Callback for download progress
|
|
61
|
+
*/
|
|
62
|
+
onDownloadProgress?: SemanticDownloadProgressCallback;
|
|
63
|
+
/**
|
|
64
|
+
* Callback for status messages
|
|
65
|
+
*/
|
|
66
|
+
onStatus?: (status: string) => void;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* A detected PII entity with its position and metadata
|
|
70
|
+
*/
|
|
71
|
+
export interface DetectedEntity {
|
|
72
|
+
/** PII category */
|
|
73
|
+
type: PIIType;
|
|
74
|
+
/** Unique identifier within the document (1-based, monotonically increasing) */
|
|
75
|
+
id: number;
|
|
76
|
+
/** Start character offset in original text (0-based, inclusive) */
|
|
77
|
+
start: number;
|
|
78
|
+
/** End character offset in original text (0-based, exclusive) */
|
|
79
|
+
end: number;
|
|
80
|
+
/** Detection confidence score (0.0 to 1.0) */
|
|
81
|
+
confidence: number;
|
|
82
|
+
/** How this entity was detected */
|
|
83
|
+
source: DetectionSource;
|
|
84
|
+
/** Original text (only stored in encrypted pii_map, never logged) */
|
|
85
|
+
original: string;
|
|
86
|
+
/** Semantic attributes for MT-friendly tags (gender, scope, etc.) */
|
|
87
|
+
semantic?: SemanticAttributes;
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* A span match from a recognizer (before ID assignment)
|
|
91
|
+
*/
|
|
92
|
+
export interface SpanMatch {
|
|
93
|
+
/** PII category */
|
|
94
|
+
type: PIIType;
|
|
95
|
+
/** Start character offset (0-based, inclusive) */
|
|
96
|
+
start: number;
|
|
97
|
+
/** End character offset (0-based, exclusive) */
|
|
98
|
+
end: number;
|
|
99
|
+
/** Detection confidence score (0.0 to 1.0) */
|
|
100
|
+
confidence: number;
|
|
101
|
+
/** How this span was detected */
|
|
102
|
+
source: DetectionSource;
|
|
103
|
+
/** The matched text */
|
|
104
|
+
text: string;
|
|
105
|
+
/** Semantic attributes for MT-friendly tags (gender, scope, etc.) */
|
|
106
|
+
semantic?: SemanticAttributes;
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Custom ID pattern configuration
|
|
110
|
+
*/
|
|
111
|
+
export interface CustomIdPattern {
|
|
112
|
+
/** Pattern name for identification */
|
|
113
|
+
name: string;
|
|
114
|
+
/** Regular expression pattern */
|
|
115
|
+
pattern: RegExp;
|
|
116
|
+
/** PII type to assign (typically CASE_ID or CUSTOMER_ID) */
|
|
117
|
+
type: PIIType;
|
|
118
|
+
/** Optional validation function */
|
|
119
|
+
validate?: (match: string) => boolean;
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Anonymization policy configuration
|
|
123
|
+
*/
|
|
124
|
+
export interface AnonymizationPolicy {
|
|
125
|
+
/** Set of PII types to detect (both regex and NER) */
|
|
126
|
+
enabledTypes: Set<PIIType>;
|
|
127
|
+
/** Set of PII types to detect via regex */
|
|
128
|
+
regexEnabledTypes: Set<PIIType>;
|
|
129
|
+
/** Set of PII types to detect via NER */
|
|
130
|
+
nerEnabledTypes: Set<PIIType>;
|
|
131
|
+
/** Priority order for resolving overlapping entities (higher index = higher priority) */
|
|
132
|
+
typePriority: PIIType[];
|
|
133
|
+
/** Minimum confidence thresholds per type (default: 0.5) */
|
|
134
|
+
confidenceThresholds: Map<PIIType, number>;
|
|
135
|
+
/** Custom ID patterns for domain-specific identifiers */
|
|
136
|
+
customIdPatterns: CustomIdPattern[];
|
|
137
|
+
/** Terms that should not be treated as PII (case-insensitive) */
|
|
138
|
+
allowlistTerms: Set<string>;
|
|
139
|
+
/** Terms that should always be treated as PII */
|
|
140
|
+
denylistPatterns: RegExp[];
|
|
141
|
+
/** Whether to reuse IDs for identical repeated PII strings */
|
|
142
|
+
reuseIdsForRepeatedPII: boolean;
|
|
143
|
+
/** Whether to run leak scan on anonymized output */
|
|
144
|
+
enableLeakScan: boolean;
|
|
145
|
+
/** Enable semantic attribute enrichment for MT-friendly tags (gender, location scope) */
|
|
146
|
+
enableSemanticMasking: boolean;
|
|
147
|
+
}
|
|
148
|
+
/**
|
|
149
|
+
* Encrypted PII map entry
|
|
150
|
+
*/
|
|
151
|
+
export interface EncryptedPIIMap {
|
|
152
|
+
/** AES-256-GCM encrypted data (base64) */
|
|
153
|
+
ciphertext: string;
|
|
154
|
+
/** Initialization vector (base64) */
|
|
155
|
+
iv: string;
|
|
156
|
+
/** Authentication tag (base64) */
|
|
157
|
+
authTag: string;
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Statistics about the anonymization process
|
|
161
|
+
*/
|
|
162
|
+
export interface AnonymizationStats {
|
|
163
|
+
/** Count of entities detected per type */
|
|
164
|
+
countsByType: Record<PIIType, number>;
|
|
165
|
+
/** Total number of entities detected */
|
|
166
|
+
totalEntities: number;
|
|
167
|
+
/** NER model version used */
|
|
168
|
+
modelVersion: string;
|
|
169
|
+
/** Policy version/identifier */
|
|
170
|
+
policyVersion: string;
|
|
171
|
+
/** Processing time in milliseconds */
|
|
172
|
+
processingTimeMs: number;
|
|
173
|
+
/** Whether leak scan passed (if enabled) */
|
|
174
|
+
leakScanPassed?: boolean;
|
|
175
|
+
}
|
|
176
|
+
/**
|
|
177
|
+
* Result of the anonymization process
|
|
178
|
+
*/
|
|
179
|
+
export interface AnonymizationResult {
|
|
180
|
+
/** Text with PII replaced by placeholder tags */
|
|
181
|
+
anonymizedText: string;
|
|
182
|
+
/** List of detected entities (without original text for safety) */
|
|
183
|
+
entities: Omit<DetectedEntity, "original">[];
|
|
184
|
+
/** Encrypted mapping of (type, id) -> original string */
|
|
185
|
+
piiMap: EncryptedPIIMap;
|
|
186
|
+
/** Statistics about the anonymization */
|
|
187
|
+
stats: AnonymizationStats;
|
|
188
|
+
}
|
|
189
|
+
/**
|
|
190
|
+
* Creates a default anonymization policy with all types enabled
|
|
191
|
+
*/
|
|
192
|
+
export declare function createDefaultPolicy(): AnonymizationPolicy;
|
|
193
|
+
/**
|
|
194
|
+
* Merges a partial policy with defaults
|
|
195
|
+
*/
|
|
196
|
+
export declare function mergePolicy(partial: Partial<AnonymizationPolicy>): AnonymizationPolicy;
|
|
197
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAyB,MAAM,gBAAgB,CAAC;AAEhE,cAAc,gBAAgB,CAAC;AAE/B;;GAEG;AACH,oBAAY,eAAe;IACzB,KAAK,UAAU;IACf,GAAG,QAAQ;IACX,MAAM,WAAW;CAClB;AAMD;;;GAGG;AACH,MAAM,MAAM,YAAY,GAAG,MAAM,GAAG,QAAQ,GAAG,SAAS,GAAG,SAAS,CAAC;AAErE;;;GAGG;AACH,MAAM,MAAM,aAAa,GAAG,MAAM,GAAG,SAAS,GAAG,QAAQ,GAAG,SAAS,CAAC;AAEtE;;;GAGG;AACH,MAAM,WAAW,kBAAkB;IACjC,iCAAiC;IACjC,MAAM,CAAC,EAAE,YAAY,CAAC;IACtB,6CAA6C;IAC7C,KAAK,CAAC,EAAE,aAAa,CAAC;IACtB,2EAA2E;IAC3E,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,MAAM,gCAAgC,GAAG,CAAC,QAAQ,EAAE;IACxD,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,CAAC;IACxB,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;CACxB,KAAK,IAAI,CAAC;AAEX;;;GAGG;AACH,MAAM,WAAW,cAAc;IAC7B;;;OAGG;IACH,OAAO,EAAE,OAAO,CAAC;IAEjB;;;;;OAKG;IACH,YAAY,CAAC,EAAE,OAAO,CAAC;IAEvB;;OAEG;IACH,kBAAkB,CAAC,EAAE,gCAAgC,CAAC;IAEtD;;OAEG;IACH,QAAQ,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,IAAI,CAAC;CACrC;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,mBAAmB;IACnB,IAAI,EAAE,OAAO,CAAC;IACd,gFAAgF;IAChF,EAAE,EAAE,MAAM,CAAC;IACX,mEAAmE;IACnE,KAAK,EAAE,MAAM,CAAC;IACd,iEAAiE;IACjE,GAAG,EAAE,MAAM,CAAC;IACZ,8CAA8C;IAC9C,UAAU,EAAE,MAAM,CAAC;IACnB,mCAAmC;IACnC,MAAM,EAAE,eAAe,CAAC;IACxB,qEAAqE;IACrE,QAAQ,EAAE,MAAM,CAAC;IACjB,qEAAqE;IACrE,QAAQ,CAAC,EAAE,kBAAkB,CAAC;CAC/B;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,mBAAmB;IACnB,IAAI,EAAE,OAAO,CAAC;IACd,kDAAkD;IAClD,KAAK,EAAE,MAAM,CAAC;IACd,gDAAgD;IAChD,GAAG,EAAE,MAAM,CAAC;IACZ,8CAA8C;IAC9C,UAAU,EAAE,MAAM,CAAC;IACnB,iCAAiC;IACjC,MAAM,EAAE,eAAe,CAAC;IACxB,uBAAuB;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,qEAAqE;IACrE,QAAQ,CAAC,EAAE,kBAAkB,CAAC;CAC/B;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,sCAAsC;IACtC,IAAI,EAAE,MAAM,CAAC;IACb,iCAAiC;IACjC,OAAO,EAAE,MAAM,CAAC;IAChB,4DAA4D;IAC5D,IAAI,EAAE,OAAO,CAAC;IACd,mCAAmC;IACnC,QAAQ,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC;CACvC;AAED;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,sDAAsD;IACtD,YAAY,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC;IAC3B,2CAA2C;IAC3C,iBAAiB,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC;IAChC,yCAAyC;IACzC,eAAe,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC;IAC9B,yFAAyF;IACzF,YAAY,EAAE,OAAO,EAAE,CAAC;IACxB,4DAA4D;IAC5D,oBAAoB,EAAE,GAAG,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;IAC3C,yDAAyD;IACzD,gBAAgB,EAAE,eAAe,EAAE,CAAC;IACpC,iEAAiE;IACjE,cAAc,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IAC5B,iDAAiD;IACjD,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,8DAA8D;IAC9D,sBAAsB,EAAE,OAAO,CAAC;IAChC,oDAAoD;IACpD,cAAc,EAAE,OAAO,CAAC;IACxB,yFAAyF;IACzF,qBAAqB,EAAE,OAAO,CAAC;CAChC;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,0CAA0C;IAC1C,UAAU,EAAE,MAAM,CAAC;IACnB,qCAAqC;IACrC,EAAE,EAAE,MAAM,CAAC;IACX,kCAAkC;IAClC,OAAO,EAAE,MAAM,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,0CAA0C;IAC1C,YAAY,EAAE,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;IACtC,wCAAwC;IACxC,aAAa,EAAE,MAAM,CAAC;IACtB,6BAA6B;IAC7B,YAAY,EAAE,MAAM,CAAC;IACrB,gCAAgC;IAChC,aAAa,EAAE,MAAM,CAAC;IACtB,sCAAsC;IACtC,gBAAgB,EAAE,MAAM,CAAC;IACzB,4CAA4C;IAC5C,cAAc,CAAC,EAAE,OAAO,CAAC;CAC1B;AAED;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,iDAAiD;IACjD,cAAc,EAAE,MAAM,CAAC;IACvB,mEAAmE;IACnE,QAAQ,EAAE,IAAI,CAAC,cAAc,EAAE,UAAU,CAAC,EAAE,CAAC;IAC7C,yDAAyD;IACzD,MAAM,EAAE,eAAe,CAAC;IACxB,yCAAyC;IACzC,KAAK,EAAE,kBAAkB,CAAC;CAC3B;AAED;;GAEG;AACH,wBAAgB,mBAAmB,IAAI,mBAAmB,CAyCzD;AAED;;GAEG;AACH,wBAAgB,WAAW,CACzB,OAAO,EAAE,OAAO,CAAC,mBAAmB,CAAC,GACpC,mBAAmB,CA+BrB"}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import { PIIType, DEFAULT_TYPE_PRIORITY } from "./pii-types.js";
|
|
2
|
+
export * from "./pii-types.js";
|
|
3
|
+
/**
|
|
4
|
+
* Source of entity detection
|
|
5
|
+
*/
|
|
6
|
+
export var DetectionSource;
|
|
7
|
+
(function (DetectionSource) {
|
|
8
|
+
DetectionSource["REGEX"] = "REGEX";
|
|
9
|
+
DetectionSource["NER"] = "NER";
|
|
10
|
+
DetectionSource["HYBRID"] = "HYBRID";
|
|
11
|
+
})(DetectionSource || (DetectionSource = {}));
|
|
12
|
+
/**
|
|
13
|
+
* Creates a default anonymization policy with all types enabled
|
|
14
|
+
*/
|
|
15
|
+
export function createDefaultPolicy() {
|
|
16
|
+
const allTypes = new Set(Object.values(PIIType));
|
|
17
|
+
const defaultThresholds = new Map();
|
|
18
|
+
for (const type of allTypes) {
|
|
19
|
+
// Higher threshold for NER-detected types (more uncertainty)
|
|
20
|
+
defaultThresholds.set(type, type === PIIType.PERSON || type === PIIType.ORG ? 0.7 : 0.5);
|
|
21
|
+
}
|
|
22
|
+
return {
|
|
23
|
+
enabledTypes: allTypes,
|
|
24
|
+
regexEnabledTypes: new Set([
|
|
25
|
+
PIIType.EMAIL,
|
|
26
|
+
PIIType.PHONE,
|
|
27
|
+
PIIType.IBAN,
|
|
28
|
+
PIIType.BIC_SWIFT,
|
|
29
|
+
PIIType.CREDIT_CARD,
|
|
30
|
+
PIIType.IP_ADDRESS,
|
|
31
|
+
PIIType.URL,
|
|
32
|
+
PIIType.CASE_ID,
|
|
33
|
+
PIIType.CUSTOMER_ID,
|
|
34
|
+
]),
|
|
35
|
+
nerEnabledTypes: new Set([
|
|
36
|
+
PIIType.PERSON,
|
|
37
|
+
PIIType.ORG,
|
|
38
|
+
PIIType.LOCATION,
|
|
39
|
+
PIIType.ADDRESS,
|
|
40
|
+
PIIType.DATE_OF_BIRTH,
|
|
41
|
+
]),
|
|
42
|
+
typePriority: [...DEFAULT_TYPE_PRIORITY],
|
|
43
|
+
confidenceThresholds: defaultThresholds,
|
|
44
|
+
customIdPatterns: [],
|
|
45
|
+
allowlistTerms: new Set(),
|
|
46
|
+
denylistPatterns: [],
|
|
47
|
+
reuseIdsForRepeatedPII: false,
|
|
48
|
+
enableLeakScan: true,
|
|
49
|
+
enableSemanticMasking: false,
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Merges a partial policy with defaults
|
|
54
|
+
*/
|
|
55
|
+
export function mergePolicy(partial) {
|
|
56
|
+
const defaultPolicy = createDefaultPolicy();
|
|
57
|
+
// Deep merge confidenceThresholds Map
|
|
58
|
+
let confidenceThresholds = defaultPolicy.confidenceThresholds;
|
|
59
|
+
if (partial.confidenceThresholds !== undefined) {
|
|
60
|
+
confidenceThresholds = new Map(defaultPolicy.confidenceThresholds);
|
|
61
|
+
// Merge in partial thresholds
|
|
62
|
+
for (const [type, threshold] of partial.confidenceThresholds) {
|
|
63
|
+
confidenceThresholds.set(type, threshold);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
return {
|
|
67
|
+
enabledTypes: partial.enabledTypes ?? defaultPolicy.enabledTypes,
|
|
68
|
+
regexEnabledTypes: partial.regexEnabledTypes ?? defaultPolicy.regexEnabledTypes,
|
|
69
|
+
nerEnabledTypes: partial.nerEnabledTypes ?? defaultPolicy.nerEnabledTypes,
|
|
70
|
+
typePriority: partial.typePriority ?? defaultPolicy.typePriority,
|
|
71
|
+
confidenceThresholds,
|
|
72
|
+
customIdPatterns: partial.customIdPatterns ?? defaultPolicy.customIdPatterns,
|
|
73
|
+
allowlistTerms: partial.allowlistTerms ?? defaultPolicy.allowlistTerms,
|
|
74
|
+
denylistPatterns: partial.denylistPatterns ?? defaultPolicy.denylistPatterns,
|
|
75
|
+
reuseIdsForRepeatedPII: partial.reuseIdsForRepeatedPII ?? defaultPolicy.reuseIdsForRepeatedPII,
|
|
76
|
+
enableLeakScan: partial.enableLeakScan ?? defaultPolicy.enableLeakScan,
|
|
77
|
+
enableSemanticMasking: partial.enableSemanticMasking ?? defaultPolicy.enableSemanticMasking,
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AAEhE,cAAc,gBAAgB,CAAC;AAE/B;;GAEG;AACH,MAAM,CAAN,IAAY,eAIX;AAJD,WAAY,eAAe;IACzB,kCAAe,CAAA;IACf,8BAAW,CAAA;IACX,oCAAiB,CAAA;AACnB,CAAC,EAJW,eAAe,KAAf,eAAe,QAI1B;AAuMD;;GAEG;AACH,MAAM,UAAU,mBAAmB;IACjC,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAc,CAAC,CAAC;IAE9D,MAAM,iBAAiB,GAAG,IAAI,GAAG,EAAmB,CAAC;IACrD,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;QAC5B,6DAA6D;QAC7D,iBAAiB,CAAC,GAAG,CACnB,IAAI,EACJ,IAAI,KAAK,OAAO,CAAC,MAAM,IAAI,IAAI,KAAK,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAC5D,CAAC;IACJ,CAAC;IAED,OAAO;QACL,YAAY,EAAE,QAAQ;QACtB,iBAAiB,EAAE,IAAI,GAAG,CAAC;YACzB,OAAO,CAAC,KAAK;YACb,OAAO,CAAC,KAAK;YACb,OAAO,CAAC,IAAI;YACZ,OAAO,CAAC,SAAS;YACjB,OAAO,CAAC,WAAW;YACnB,OAAO,CAAC,UAAU;YAClB,OAAO,CAAC,GAAG;YACX,OAAO,CAAC,OAAO;YACf,OAAO,CAAC,WAAW;SACpB,CAAC;QACF,eAAe,EAAE,IAAI,GAAG,CAAC;YACvB,OAAO,CAAC,MAAM;YACd,OAAO,CAAC,GAAG;YACX,OAAO,CAAC,QAAQ;YAChB,OAAO,CAAC,OAAO;YACf,OAAO,CAAC,aAAa;SACtB,CAAC;QACF,YAAY,EAAE,CAAC,GAAG,qBAAqB,CAAC;QACxC,oBAAoB,EAAE,iBAAiB;QACvC,gBAAgB,EAAE,EAAE;QACpB,cAAc,EAAE,IAAI,GAAG,EAAE;QACzB,gBAAgB,EAAE,EAAE;QACpB,sBAAsB,EAAE,KAAK;QAC7B,cAAc,EAAE,IAAI;QACpB,qBAAqB,EAAE,KAAK;KAC7B,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CACzB,OAAqC;IAErC,MAAM,aAAa,GAAG,mBAAmB,EAAE,CAAC;IAE5C,sCAAsC;IACtC,IAAI,oBAAoB,GAAG,aAAa,CAAC,oBAAoB,CAAC;IAC9D,IAAI,OAAO,CAAC,oBAAoB,KAAK,SAAS,EAAE,CAAC;QAC/C,oBAAoB,GAAG,IAAI,GAAG,CAAC,aAAa,CAAC,oBAAoB,CAAC,CAAC;QACnE,8BAA8B;QAC9B,KAAK,MAAM,CAAC,IAAI,EAAE,SAAS,CAAC,IAAI,OAAO,CAAC,oBAAoB,EAAE,CAAC;YAC7D,oBAAoB,CAAC,GAAG,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;QAC5C,CAAC;IACH,CAAC;IAED,OAAO;QACL,YAAY,EAAE,OAAO,CAAC,YAAY,IAAI,aAAa,CAAC,YAAY;QAChE,iBAAiB,EACf,OAAO,CAAC,iBAAiB,IAAI,aAAa,CAAC,iBAAiB;QAC9D,eAAe,EAAE,OAAO,CAAC,eAAe,IAAI,aAAa,CAAC,eAAe;QACzE,YAAY,EAAE,OAAO,CAAC,YAAY,IAAI,aAAa,CAAC,YAAY;QAChE,oBAAoB;QACpB,gBAAgB,EACd,OAAO,CAAC,gBAAgB,IAAI,aAAa,CAAC,gBAAgB;QAC5D,cAAc,EAAE,OAAO,CAAC,cAAc,IAAI,aAAa,CAAC,cAAc;QACtE,gBAAgB,EACd,OAAO,CAAC,gBAAgB,IAAI,aAAa,CAAC,gBAAgB;QAC5D,sBAAsB,EACpB,OAAO,CAAC,sBAAsB,IAAI,aAAa,CAAC,sBAAsB;QACxE,cAAc,EAAE,OAAO,CAAC,cAAc,IAAI,aAAa,CAAC,cAAc;QACtE,qBAAqB,EACnB,OAAO,CAAC,qBAAqB,IAAI,aAAa,CAAC,qBAAqB;KACvE,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PII Type Enumeration
|
|
3
|
+
* Defines all supported PII categories for detection and anonymization
|
|
4
|
+
*/
|
|
5
|
+
export declare enum PIIType {
|
|
6
|
+
PERSON = "PERSON",
|
|
7
|
+
ORG = "ORG",
|
|
8
|
+
LOCATION = "LOCATION",
|
|
9
|
+
ADDRESS = "ADDRESS",
|
|
10
|
+
EMAIL = "EMAIL",
|
|
11
|
+
PHONE = "PHONE",
|
|
12
|
+
URL = "URL",
|
|
13
|
+
IP_ADDRESS = "IP_ADDRESS",
|
|
14
|
+
IBAN = "IBAN",
|
|
15
|
+
BIC_SWIFT = "BIC_SWIFT",
|
|
16
|
+
ACCOUNT_NUMBER = "ACCOUNT_NUMBER",
|
|
17
|
+
CREDIT_CARD = "CREDIT_CARD",
|
|
18
|
+
TAX_ID = "TAX_ID",
|
|
19
|
+
NATIONAL_ID = "NATIONAL_ID",
|
|
20
|
+
DATE_OF_BIRTH = "DATE_OF_BIRTH",
|
|
21
|
+
CASE_ID = "CASE_ID",
|
|
22
|
+
CUSTOMER_ID = "CUSTOMER_ID"
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* All PII types as a readonly array for iteration
|
|
26
|
+
*/
|
|
27
|
+
export declare const ALL_PII_TYPES: readonly PIIType[];
|
|
28
|
+
/**
|
|
29
|
+
* PII types that are detected via regex (structured PII)
|
|
30
|
+
*/
|
|
31
|
+
export declare const REGEX_PII_TYPES: readonly PIIType[];
|
|
32
|
+
/**
|
|
33
|
+
* PII types that are detected via NER model (soft PII)
|
|
34
|
+
*/
|
|
35
|
+
export declare const NER_PII_TYPES: readonly PIIType[];
|
|
36
|
+
/**
|
|
37
|
+
* Default priority order for resolving overlapping entities
|
|
38
|
+
* Higher index = higher priority
|
|
39
|
+
*/
|
|
40
|
+
export declare const DEFAULT_TYPE_PRIORITY: readonly PIIType[];
|
|
41
|
+
/**
|
|
42
|
+
* Maps NER model labels to PIIType
|
|
43
|
+
* Common label formats from NER models (B-PER, I-PER, B-ORG, etc.)
|
|
44
|
+
*/
|
|
45
|
+
export declare const NER_LABEL_TO_PII_TYPE: Record<string, PIIType>;
|
|
46
|
+
/**
|
|
47
|
+
* Get PIIType from NER label (handles B-/I- prefixes)
|
|
48
|
+
*/
|
|
49
|
+
export declare function getPIITypeFromNERLabel(label: string): PIIType | null;
|
|
50
|
+
//# sourceMappingURL=pii-types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pii-types.d.ts","sourceRoot":"","sources":["../../src/types/pii-types.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,oBAAY,OAAO;IAEjB,MAAM,WAAW;IACjB,GAAG,QAAQ;IACX,QAAQ,aAAa;IACrB,OAAO,YAAY;IAGnB,KAAK,UAAU;IACf,KAAK,UAAU;IACf,GAAG,QAAQ;IACX,UAAU,eAAe;IAGzB,IAAI,SAAS;IACb,SAAS,cAAc;IACvB,cAAc,mBAAmB;IACjC,WAAW,gBAAgB;IAG3B,MAAM,WAAW;IACjB,WAAW,gBAAgB;IAC3B,aAAa,kBAAkB;IAG/B,OAAO,YAAY;IACnB,WAAW,gBAAgB;CAC5B;AAED;;GAEG;AACH,eAAO,MAAM,aAAa,EAAE,SAAS,OAAO,EAAwC,CAAC;AAErF;;GAEG;AACH,eAAO,MAAM,eAAe,EAAE,SAAS,OAAO,EAa7C,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,aAAa,EAAE,SAAS,OAAO,EAM3C,CAAC;AAEF;;;GAGG;AACH,eAAO,MAAM,qBAAqB,EAAE,SAAS,OAAO,EAqBnD,CAAC;AAEF;;;GAGG;AACH,eAAO,MAAM,qBAAqB,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAWzD,CAAC;AAEF;;GAEG;AACH,wBAAgB,sBAAsB,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,GAAG,IAAI,CASpE"}
|