rehydra 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/LICENSE +22 -0
  2. package/README.md +615 -0
  3. package/dist/crypto/index.d.ts +6 -0
  4. package/dist/crypto/index.d.ts.map +1 -0
  5. package/dist/crypto/index.js +6 -0
  6. package/dist/crypto/index.js.map +1 -0
  7. package/dist/crypto/pii-map-crypto.d.ts +114 -0
  8. package/dist/crypto/pii-map-crypto.d.ts.map +1 -0
  9. package/dist/crypto/pii-map-crypto.js +228 -0
  10. package/dist/crypto/pii-map-crypto.js.map +1 -0
  11. package/dist/index.d.ts +180 -0
  12. package/dist/index.d.ts.map +1 -0
  13. package/dist/index.js +384 -0
  14. package/dist/index.js.map +1 -0
  15. package/dist/ner/bio-decoder.d.ts +64 -0
  16. package/dist/ner/bio-decoder.d.ts.map +1 -0
  17. package/dist/ner/bio-decoder.js +216 -0
  18. package/dist/ner/bio-decoder.js.map +1 -0
  19. package/dist/ner/index.d.ts +10 -0
  20. package/dist/ner/index.d.ts.map +1 -0
  21. package/dist/ner/index.js +10 -0
  22. package/dist/ner/index.js.map +1 -0
  23. package/dist/ner/model-manager.d.ts +111 -0
  24. package/dist/ner/model-manager.d.ts.map +1 -0
  25. package/dist/ner/model-manager.js +325 -0
  26. package/dist/ner/model-manager.js.map +1 -0
  27. package/dist/ner/ner-model.d.ts +114 -0
  28. package/dist/ner/ner-model.d.ts.map +1 -0
  29. package/dist/ner/ner-model.js +253 -0
  30. package/dist/ner/ner-model.js.map +1 -0
  31. package/dist/ner/onnx-runtime.d.ts +46 -0
  32. package/dist/ner/onnx-runtime.d.ts.map +1 -0
  33. package/dist/ner/onnx-runtime.js +130 -0
  34. package/dist/ner/onnx-runtime.js.map +1 -0
  35. package/dist/ner/tokenizer.d.ts +118 -0
  36. package/dist/ner/tokenizer.d.ts.map +1 -0
  37. package/dist/ner/tokenizer.js +332 -0
  38. package/dist/ner/tokenizer.js.map +1 -0
  39. package/dist/pipeline/index.d.ts +12 -0
  40. package/dist/pipeline/index.d.ts.map +1 -0
  41. package/dist/pipeline/index.js +12 -0
  42. package/dist/pipeline/index.js.map +1 -0
  43. package/dist/pipeline/prenormalize.d.ts +48 -0
  44. package/dist/pipeline/prenormalize.d.ts.map +1 -0
  45. package/dist/pipeline/prenormalize.js +94 -0
  46. package/dist/pipeline/prenormalize.js.map +1 -0
  47. package/dist/pipeline/resolver.d.ts +56 -0
  48. package/dist/pipeline/resolver.d.ts.map +1 -0
  49. package/dist/pipeline/resolver.js +239 -0
  50. package/dist/pipeline/resolver.js.map +1 -0
  51. package/dist/pipeline/semantic-data-loader.d.ts +165 -0
  52. package/dist/pipeline/semantic-data-loader.d.ts.map +1 -0
  53. package/dist/pipeline/semantic-data-loader.js +655 -0
  54. package/dist/pipeline/semantic-data-loader.js.map +1 -0
  55. package/dist/pipeline/semantic-enricher.d.ts +112 -0
  56. package/dist/pipeline/semantic-enricher.d.ts.map +1 -0
  57. package/dist/pipeline/semantic-enricher.js +318 -0
  58. package/dist/pipeline/semantic-enricher.js.map +1 -0
  59. package/dist/pipeline/tagger.d.ts +114 -0
  60. package/dist/pipeline/tagger.d.ts.map +1 -0
  61. package/dist/pipeline/tagger.js +374 -0
  62. package/dist/pipeline/tagger.js.map +1 -0
  63. package/dist/pipeline/title-extractor.d.ts +79 -0
  64. package/dist/pipeline/title-extractor.d.ts.map +1 -0
  65. package/dist/pipeline/title-extractor.js +801 -0
  66. package/dist/pipeline/title-extractor.js.map +1 -0
  67. package/dist/pipeline/validator.d.ts +65 -0
  68. package/dist/pipeline/validator.d.ts.map +1 -0
  69. package/dist/pipeline/validator.js +264 -0
  70. package/dist/pipeline/validator.js.map +1 -0
  71. package/dist/recognizers/base.d.ts +78 -0
  72. package/dist/recognizers/base.d.ts.map +1 -0
  73. package/dist/recognizers/base.js +100 -0
  74. package/dist/recognizers/base.js.map +1 -0
  75. package/dist/recognizers/bic-swift.d.ts +10 -0
  76. package/dist/recognizers/bic-swift.d.ts.map +1 -0
  77. package/dist/recognizers/bic-swift.js +107 -0
  78. package/dist/recognizers/bic-swift.js.map +1 -0
  79. package/dist/recognizers/credit-card.d.ts +32 -0
  80. package/dist/recognizers/credit-card.d.ts.map +1 -0
  81. package/dist/recognizers/credit-card.js +160 -0
  82. package/dist/recognizers/credit-card.js.map +1 -0
  83. package/dist/recognizers/custom-id.d.ts +28 -0
  84. package/dist/recognizers/custom-id.d.ts.map +1 -0
  85. package/dist/recognizers/custom-id.js +116 -0
  86. package/dist/recognizers/custom-id.js.map +1 -0
  87. package/dist/recognizers/email.d.ts +10 -0
  88. package/dist/recognizers/email.d.ts.map +1 -0
  89. package/dist/recognizers/email.js +75 -0
  90. package/dist/recognizers/email.js.map +1 -0
  91. package/dist/recognizers/iban.d.ts +14 -0
  92. package/dist/recognizers/iban.d.ts.map +1 -0
  93. package/dist/recognizers/iban.js +67 -0
  94. package/dist/recognizers/iban.js.map +1 -0
  95. package/dist/recognizers/index.d.ts +20 -0
  96. package/dist/recognizers/index.d.ts.map +1 -0
  97. package/dist/recognizers/index.js +42 -0
  98. package/dist/recognizers/index.js.map +1 -0
  99. package/dist/recognizers/ip-address.d.ts +14 -0
  100. package/dist/recognizers/ip-address.d.ts.map +1 -0
  101. package/dist/recognizers/ip-address.js +183 -0
  102. package/dist/recognizers/ip-address.js.map +1 -0
  103. package/dist/recognizers/phone.d.ts +10 -0
  104. package/dist/recognizers/phone.d.ts.map +1 -0
  105. package/dist/recognizers/phone.js +145 -0
  106. package/dist/recognizers/phone.js.map +1 -0
  107. package/dist/recognizers/registry.d.ts +59 -0
  108. package/dist/recognizers/registry.d.ts.map +1 -0
  109. package/dist/recognizers/registry.js +113 -0
  110. package/dist/recognizers/registry.js.map +1 -0
  111. package/dist/recognizers/url.d.ts +14 -0
  112. package/dist/recognizers/url.d.ts.map +1 -0
  113. package/dist/recognizers/url.js +121 -0
  114. package/dist/recognizers/url.js.map +1 -0
  115. package/dist/types/index.d.ts +197 -0
  116. package/dist/types/index.d.ts.map +1 -0
  117. package/dist/types/index.js +80 -0
  118. package/dist/types/index.js.map +1 -0
  119. package/dist/types/pii-types.d.ts +50 -0
  120. package/dist/types/pii-types.d.ts.map +1 -0
  121. package/dist/types/pii-types.js +114 -0
  122. package/dist/types/pii-types.js.map +1 -0
  123. package/dist/utils/iban-checksum.d.ts +23 -0
  124. package/dist/utils/iban-checksum.d.ts.map +1 -0
  125. package/dist/utils/iban-checksum.js +106 -0
  126. package/dist/utils/iban-checksum.js.map +1 -0
  127. package/dist/utils/index.d.ts +10 -0
  128. package/dist/utils/index.d.ts.map +1 -0
  129. package/dist/utils/index.js +10 -0
  130. package/dist/utils/index.js.map +1 -0
  131. package/dist/utils/luhn.d.ts +17 -0
  132. package/dist/utils/luhn.d.ts.map +1 -0
  133. package/dist/utils/luhn.js +55 -0
  134. package/dist/utils/luhn.js.map +1 -0
  135. package/dist/utils/offsets.d.ts +86 -0
  136. package/dist/utils/offsets.d.ts.map +1 -0
  137. package/dist/utils/offsets.js +124 -0
  138. package/dist/utils/offsets.js.map +1 -0
  139. package/dist/utils/path.d.ts +34 -0
  140. package/dist/utils/path.d.ts.map +1 -0
  141. package/dist/utils/path.js +96 -0
  142. package/dist/utils/path.js.map +1 -0
  143. package/dist/utils/storage-browser.d.ts +51 -0
  144. package/dist/utils/storage-browser.d.ts.map +1 -0
  145. package/dist/utils/storage-browser.js +381 -0
  146. package/dist/utils/storage-browser.js.map +1 -0
  147. package/dist/utils/storage-node.d.ts +43 -0
  148. package/dist/utils/storage-node.d.ts.map +1 -0
  149. package/dist/utils/storage-node.js +93 -0
  150. package/dist/utils/storage-node.js.map +1 -0
  151. package/dist/utils/storage.d.ts +70 -0
  152. package/dist/utils/storage.d.ts.map +1 -0
  153. package/dist/utils/storage.js +69 -0
  154. package/dist/utils/storage.js.map +1 -0
  155. package/package.json +66 -0
@@ -0,0 +1,113 @@
1
+ /**
2
+ * Recognizer Registry
3
+ * Central registry for all PII recognizers
4
+ */
5
+ /**
6
+ * Registry for managing PII recognizers
7
+ */
8
+ export class RecognizerRegistry {
9
+ recognizers = new Map();
10
+ /**
11
+ * Registers a recognizer for a PII type
12
+ */
13
+ register(recognizer) {
14
+ const existing = this.recognizers.get(recognizer.type) ?? [];
15
+ existing.push(recognizer);
16
+ this.recognizers.set(recognizer.type, existing);
17
+ }
18
+ /**
19
+ * Registers multiple recognizers
20
+ */
21
+ registerAll(recognizers) {
22
+ for (const recognizer of recognizers) {
23
+ this.register(recognizer);
24
+ }
25
+ }
26
+ /**
27
+ * Gets all recognizers for a specific type
28
+ */
29
+ getRecognizers(type) {
30
+ return this.recognizers.get(type) ?? [];
31
+ }
32
+ /**
33
+ * Gets all registered recognizers
34
+ */
35
+ getAllRecognizers() {
36
+ const all = [];
37
+ for (const recognizers of this.recognizers.values()) {
38
+ all.push(...recognizers);
39
+ }
40
+ return all;
41
+ }
42
+ /**
43
+ * Gets all registered PII types
44
+ */
45
+ getRegisteredTypes() {
46
+ return Array.from(this.recognizers.keys());
47
+ }
48
+ /**
49
+ * Checks if a recognizer is registered for a type
50
+ */
51
+ hasRecognizer(type) {
52
+ const recognizers = this.recognizers.get(type);
53
+ return recognizers !== undefined && recognizers.length > 0;
54
+ }
55
+ /**
56
+ * Removes all recognizers for a type
57
+ */
58
+ unregister(type) {
59
+ this.recognizers.delete(type);
60
+ }
61
+ /**
62
+ * Clears all recognizers
63
+ */
64
+ clear() {
65
+ this.recognizers.clear();
66
+ }
67
+ /**
68
+ * Runs all enabled recognizers on text and returns matches
69
+ * @param text - Text to analyze
70
+ * @param policy - Anonymization policy to determine which types to detect
71
+ */
72
+ findAll(text, policy) {
73
+ const matches = [];
74
+ for (const [type, recognizers] of this.recognizers) {
75
+ // Skip types not enabled in policy
76
+ if (!policy.enabledTypes.has(type) || !policy.regexEnabledTypes.has(type)) {
77
+ continue;
78
+ }
79
+ // Get confidence threshold for this type
80
+ const threshold = policy.confidenceThresholds.get(type) ?? 0.5;
81
+ for (const recognizer of recognizers) {
82
+ const typeMatches = recognizer.find(text);
83
+ // Filter by confidence threshold
84
+ for (const match of typeMatches) {
85
+ if (match.confidence >= threshold) {
86
+ matches.push(match);
87
+ }
88
+ }
89
+ }
90
+ }
91
+ return matches;
92
+ }
93
+ }
94
+ /**
95
+ * Global singleton registry instance
96
+ */
97
+ let globalRegistry = null;
98
+ /**
99
+ * Gets the global recognizer registry (singleton)
100
+ */
101
+ export function getGlobalRegistry() {
102
+ if (globalRegistry === null) {
103
+ globalRegistry = new RecognizerRegistry();
104
+ }
105
+ return globalRegistry;
106
+ }
107
+ /**
108
+ * Creates a new isolated registry (useful for testing)
109
+ */
110
+ export function createRegistry() {
111
+ return new RecognizerRegistry();
112
+ }
113
+ //# sourceMappingURL=registry.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"registry.js","sourceRoot":"","sources":["../../src/recognizers/registry.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAKH;;GAEG;AACH,MAAM,OAAO,kBAAkB;IACrB,WAAW,GAA+B,IAAI,GAAG,EAAE,CAAC;IAE5D;;OAEG;IACH,QAAQ,CAAC,UAAsB;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QAC7D,QAAQ,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAC1B,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,UAAU,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;IAClD,CAAC;IAED;;OAEG;IACH,WAAW,CAAC,WAAyB;QACnC,KAAK,MAAM,UAAU,IAAI,WAAW,EAAE,CAAC;YACrC,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC;QAC5B,CAAC;IACH,CAAC;IAED;;OAEG;IACH,cAAc,CAAC,IAAa;QAC1B,OAAO,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;IAC1C,CAAC;IAED;;OAEG;IACH,iBAAiB;QACf,MAAM,GAAG,GAAiB,EAAE,CAAC;QAC7B,KAAK,MAAM,WAAW,IAAI,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,EAAE,CAAC;YACpD,GAAG,CAAC,IAAI,CAAC,GAAG,WAAW,CAAC,CAAC;QAC3B,CAAC;QACD,OAAO,GAAG,CAAC;IACb,CAAC;IAED;;OAEG;IACH,kBAAkB;QAChB,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,CAAC;IAC7C,CAAC;IAED;;OAEG;IACH,aAAa,CAAC,IAAa;QACzB,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAC/C,OAAO,WAAW,KAAK,SAAS,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC;IAC7D,CAAC;IAED;;OAEG;IACH,UAAU,CAAC,IAAa;QACtB,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAChC,CAAC;IAED;;OAEG;IACH,KAAK;QACH,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC;IAC3B,CAAC;IAED;;;;OAIG;IACH,OAAO,CAAC,IAAY,EAAE,MAA2B;QAC/C,MAAM,OAAO,GAAgB,EAAE,CAAC;QAEhC,KAAK,MAAM,CAAC,IAAI,EAAE,WAAW,CAAC,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACnD,mCAAmC;YACnC,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,iBAAiB,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC1E,SAAS;YACX,CAAC;YAED,yCAAyC;YACzC,MAAM,SAAS,GAAG,MAAM,CAAC,oBAAoB,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC;YAE/D,KAAK,MAAM,UAAU,IAAI,WAAW,EAAE,CAAC;gBACrC,MAAM,WAAW,GAAG,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBAE1C,iCAAiC;gBACjC,KAAK,MAAM,KAAK,IAAI,WAAW,EAAE,CAAC;oBAChC,IAAI,KAAK,CAAC,UAAU,IAAI,SAAS,EAAE,CAAC;wBAClC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;oBACtB,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;CACF;AAED;;GAEG;AACH,IAAI,cAAc,GAA8B,IAAI,CAAC;AAErD;;GAEG;AACH,MAAM,UAAU,iBAAiB;IAC/B,IAAI,cAAc,KAAK,IAAI,EAAE,CAAC;QAC5B,cAAc,GAAG,IAAI,kBAAkB,EAAE,CAAC;IAC5C,CAAC;IACD,OAAO,cAAc,CAAC;AACxB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc;IAC5B,OAAO,IAAI,kBAAkB,EAAE,CAAC;AAClC,CAAC"}
@@ -0,0 +1,14 @@
1
+ /**
2
+ * URL Recognizer
3
+ * Detects URLs with various protocols
4
+ */
5
+ import type { Recognizer } from './base.js';
6
+ /**
7
+ * URL recognizer
8
+ */
9
+ export declare const urlRecognizer: Recognizer;
10
+ /**
11
+ * Extracts the domain from a URL
12
+ */
13
+ export declare function extractDomain(url: string): string | null;
14
+ //# sourceMappingURL=url.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"url.d.ts","sourceRoot":"","sources":["../../src/recognizers/url.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AAoB5C;;GAEG;AACH,eAAO,MAAM,aAAa,EAAE,UA6D3B,CAAC;AA4BF;;GAEG;AACH,wBAAgB,aAAa,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAgBxD"}
@@ -0,0 +1,121 @@
1
+ /**
2
+ * URL Recognizer
3
+ * Detects URLs with various protocols
4
+ */
5
+ import { PIIType, DetectionSource } from '../types/index.js';
6
+ /**
7
+ * URL pattern - matches common URL formats
8
+ * Supports: http, https, ftp, mailto, file protocols
9
+ */
10
+ const URL_PATTERN = /\b(?:https?|ftp|file):\/\/[-A-Za-z0-9+&@#/%?=~_|!:,.;]*[-A-Za-z0-9+&@#/%=~_|]/g;
11
+ /**
12
+ * Pattern for URLs without explicit protocol (www.)
13
+ */
14
+ const WWW_PATTERN = /\bwww\.[-A-Za-z0-9+&@#/%?=~_|!:,.;]*[-A-Za-z0-9+&@#/%=~_|]/g;
15
+ /**
16
+ * Pattern for mailto: URLs
17
+ */
18
+ const MAILTO_PATTERN = /\bmailto:[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g;
19
+ /**
20
+ * URL recognizer
21
+ */
22
+ export const urlRecognizer = {
23
+ type: PIIType.URL,
24
+ name: 'URL',
25
+ defaultConfidence: 0.92,
26
+ find(text) {
27
+ const matches = [];
28
+ const seen = new Set();
29
+ const patterns = [URL_PATTERN, WWW_PATTERN, MAILTO_PATTERN];
30
+ for (const pattern of patterns) {
31
+ const globalPattern = new RegExp(pattern.source, 'g');
32
+ for (const match of text.matchAll(globalPattern)) {
33
+ if (match.index === undefined)
34
+ continue;
35
+ const url = match[0];
36
+ const key = `${match.index}:${match.index + url.length}`;
37
+ if (seen.has(key))
38
+ continue;
39
+ if (!this.validate(url))
40
+ continue;
41
+ seen.add(key);
42
+ matches.push({
43
+ type: PIIType.URL,
44
+ start: match.index,
45
+ end: match.index + url.length,
46
+ confidence: this.defaultConfidence,
47
+ source: DetectionSource.REGEX,
48
+ text: url,
49
+ });
50
+ }
51
+ }
52
+ // Remove overlapping matches (www. might be substring of http://www.)
53
+ return deduplicateOverlapping(matches);
54
+ },
55
+ validate(url) {
56
+ // Basic length check
57
+ if (url.length < 5)
58
+ return false;
59
+ // Should have at least one dot after the protocol
60
+ const withoutProtocol = url.replace(/^(?:https?|ftp|file|mailto):\/\/?/, '');
61
+ if (!withoutProtocol.includes('.'))
62
+ return false;
63
+ // TLD should be at least 2 characters
64
+ const parts = withoutProtocol.split('.');
65
+ const tld = parts[parts.length - 1];
66
+ if (tld === undefined)
67
+ return false;
68
+ // Remove any path/query from TLD
69
+ const cleanTld = tld.split(/[/?#]/)[0];
70
+ if (cleanTld === undefined || cleanTld.length < 2)
71
+ return false;
72
+ return true;
73
+ },
74
+ normalize(url) {
75
+ return url.trim();
76
+ },
77
+ };
78
+ /**
79
+ * Remove overlapping matches
80
+ */
81
+ function deduplicateOverlapping(matches) {
82
+ if (matches.length <= 1)
83
+ return matches;
84
+ const sorted = [...matches].sort((a, b) => a.start - b.start);
85
+ const result = [];
86
+ for (const match of sorted) {
87
+ const last = result[result.length - 1];
88
+ if (last !== undefined && match.start < last.end) {
89
+ // Overlapping - keep the longer one
90
+ if (match.end > last.end) {
91
+ result.pop();
92
+ result.push(match);
93
+ }
94
+ }
95
+ else {
96
+ result.push(match);
97
+ }
98
+ }
99
+ return result;
100
+ }
101
+ /**
102
+ * Extracts the domain from a URL
103
+ */
104
+ export function extractDomain(url) {
105
+ try {
106
+ // Add protocol if missing for URL parsing
107
+ let normalizedUrl = url;
108
+ if (url.startsWith('www.')) {
109
+ normalizedUrl = 'https://' + url;
110
+ }
111
+ if (!normalizedUrl.includes('://')) {
112
+ normalizedUrl = 'https://' + normalizedUrl;
113
+ }
114
+ const parsed = new URL(normalizedUrl);
115
+ return parsed.hostname;
116
+ }
117
+ catch {
118
+ return null;
119
+ }
120
+ }
121
+ //# sourceMappingURL=url.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"url.js","sourceRoot":"","sources":["../../src/recognizers/url.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,OAAO,EAAa,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAGxE;;;GAGG;AACH,MAAM,WAAW,GACf,gFAAgF,CAAC;AAEnF;;GAEG;AACH,MAAM,WAAW,GAAG,6DAA6D,CAAC;AAElF;;GAEG;AACH,MAAM,cAAc,GAClB,0DAA0D,CAAC;AAE7D;;GAEG;AACH,MAAM,CAAC,MAAM,aAAa,GAAe;IACvC,IAAI,EAAE,OAAO,CAAC,GAAG;IACjB,IAAI,EAAE,KAAK;IACX,iBAAiB,EAAE,IAAI;IAEvB,IAAI,CAAC,IAAY;QACf,MAAM,OAAO,GAAgB,EAAE,CAAC;QAChC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;QAE/B,MAAM,QAAQ,GAAG,CAAC,WAAW,EAAE,WAAW,EAAE,cAAc,CAAC,CAAC;QAE5D,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,MAAM,aAAa,GAAG,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;YAEtD,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;gBACjD,IAAI,KAAK,CAAC,KAAK,KAAK,SAAS;oBAAE,SAAS;gBAExC,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;gBACrB,MAAM,GAAG,GAAG,GAAG,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,KAAK,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC;gBAEzD,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;oBAAE,SAAS;gBAC5B,IAAI,CAAC,IAAI,CAAC,QAAS,CAAC,GAAG,CAAC;oBAAE,SAAS;gBAEnC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;gBACd,OAAO,CAAC,IAAI,CAAC;oBACX,IAAI,EAAE,OAAO,CAAC,GAAG;oBACjB,KAAK,EAAE,KAAK,CAAC,KAAK;oBAClB,GAAG,EAAE,KAAK,CAAC,KAAK,GAAG,GAAG,CAAC,MAAM;oBAC7B,UAAU,EAAE,IAAI,CAAC,iBAAiB;oBAClC,MAAM,EAAE,eAAe,CAAC,KAAK;oBAC7B,IAAI,EAAE,GAAG;iBACV,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,sEAAsE;QACtE,OAAO,sBAAsB,CAAC,OAAO,CAAC,CAAC;IACzC,CAAC;IAED,QAAQ,CAAC,GAAW;QAClB,qBAAqB;QACrB,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,KAAK,CAAC;QAEjC,kDAAkD;QAClD,MAAM,eAAe,GAAG,GAAG,CAAC,OAAO,CAAC,mCAAmC,EAAE,EAAE,CAAC,CAAC;QAC7E,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC,GAAG,CAAC;YAAE,OAAO,KAAK,CAAC;QAEjD,sCAAsC;QACtC,MAAM,KAAK,GAAG,eAAe,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QACzC,MAAM,GAAG,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACpC,IAAI,GAAG,KAAK,SAAS;YAAE,OAAO,KAAK,CAAC;QACpC,iCAAiC;QACjC,MAAM,QAAQ,GAAG,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;QACvC,IAAI,QAAQ,KAAK,SAAS,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,KAAK,CAAC;QAEhE,OAAO,IAAI,CAAC;IACd,CAAC;IAED,SAAS,CAAC,GAAW;QACnB,OAAO,GAAG,CAAC,IAAI,EAAE,CAAC;IACpB,CAAC;CACF,CAAC;AAEF;;GAEG;AACH,SAAS,sBAAsB,CAAC,OAAoB;IAClD,IAAI,OAAO,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,OAAO,CAAC;IAExC,MAAM,MAAM,GAAG,CAAC,GAAG,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IAC9D,MAAM,MAAM,GAAgB,EAAE,CAAC;IAE/B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAEvC,IAAI,IAAI,KAAK,SAAS,IAAI,KAAK,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YACjD,oCAAoC;YACpC,IAAI,KAAK,CAAC,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;gBACzB,MAAM,CAAC,GAAG,EAAE,CAAC;gBACb,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACrB,CAAC;QACH,CAAC;aAAM,CAAC;YACN,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACrB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,aAAa,CAAC,GAAW;IACvC,IAAI,CAAC;QACH,0CAA0C;QAC1C,IAAI,aAAa,GAAG,GAAG,CAAC;QACxB,IAAI,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;YAC3B,aAAa,GAAG,UAAU,GAAG,GAAG,CAAC;QACnC,CAAC;QACD,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;YACnC,aAAa,GAAG,UAAU,GAAG,aAAa,CAAC;QAC7C,CAAC;QAED,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,aAAa,CAAC,CAAC;QACtC,OAAO,MAAM,CAAC,QAAQ,CAAC;IACzB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC"}
@@ -0,0 +1,197 @@
1
+ import { PIIType } from "./pii-types.js";
2
+ export * from "./pii-types.js";
3
+ /**
4
+ * Source of entity detection
5
+ */
6
+ export declare enum DetectionSource {
7
+ REGEX = "REGEX",
8
+ NER = "NER",
9
+ HYBRID = "HYBRID"
10
+ }
11
+ /**
12
+ * Gender attribute for PERSON entities
13
+ * Used to preserve grammatical agreement during machine translation
14
+ */
15
+ export type PersonGender = "male" | "female" | "neutral" | "unknown";
16
+ /**
17
+ * Scope attribute for LOCATION entities
18
+ * Helps MT engines select correct prepositions (e.g., "in Berlin" vs "in Germany")
19
+ */
20
+ export type LocationScope = "city" | "country" | "region" | "unknown";
21
+ /**
22
+ * Semantic attributes for PII entities
23
+ * These attributes help preserve linguistic context during translation
24
+ */
25
+ export interface SemanticAttributes {
26
+ /** Gender for PERSON entities */
27
+ gender?: PersonGender;
28
+ /** Geographic scope for LOCATION entities */
29
+ scope?: LocationScope;
30
+ /** Honorific title extracted from PERSON entities (e.g., "Dr.", "Mrs.") */
31
+ title?: string;
32
+ }
33
+ /**
34
+ * Progress callback for semantic data downloads
35
+ */
36
+ export type SemanticDownloadProgressCallback = (progress: {
37
+ file: string;
38
+ bytesDownloaded: number;
39
+ totalBytes: number | null;
40
+ percent: number | null;
41
+ }) => void;
42
+ /**
43
+ * Semantic enrichment configuration
44
+ * Controls automatic downloading and loading of semantic data for MT-friendly PII tags
45
+ */
46
+ export interface SemanticConfig {
47
+ /**
48
+ * Whether to enable semantic masking (adds gender/scope attributes to PII tags)
49
+ * @default false
50
+ */
51
+ enabled: boolean;
52
+ /**
53
+ * Whether to auto-download semantic data if not present
54
+ * Data files include name-gender mappings (~40K names) and location data (~25K cities)
55
+ * Total download size: ~4 MB
56
+ * @default true when enabled is true
57
+ */
58
+ autoDownload?: boolean;
59
+ /**
60
+ * Callback for download progress
61
+ */
62
+ onDownloadProgress?: SemanticDownloadProgressCallback;
63
+ /**
64
+ * Callback for status messages
65
+ */
66
+ onStatus?: (status: string) => void;
67
+ }
68
+ /**
69
+ * A detected PII entity with its position and metadata
70
+ */
71
+ export interface DetectedEntity {
72
+ /** PII category */
73
+ type: PIIType;
74
+ /** Unique identifier within the document (1-based, monotonically increasing) */
75
+ id: number;
76
+ /** Start character offset in original text (0-based, inclusive) */
77
+ start: number;
78
+ /** End character offset in original text (0-based, exclusive) */
79
+ end: number;
80
+ /** Detection confidence score (0.0 to 1.0) */
81
+ confidence: number;
82
+ /** How this entity was detected */
83
+ source: DetectionSource;
84
+ /** Original text (only stored in encrypted pii_map, never logged) */
85
+ original: string;
86
+ /** Semantic attributes for MT-friendly tags (gender, scope, etc.) */
87
+ semantic?: SemanticAttributes;
88
+ }
89
+ /**
90
+ * A span match from a recognizer (before ID assignment)
91
+ */
92
+ export interface SpanMatch {
93
+ /** PII category */
94
+ type: PIIType;
95
+ /** Start character offset (0-based, inclusive) */
96
+ start: number;
97
+ /** End character offset (0-based, exclusive) */
98
+ end: number;
99
+ /** Detection confidence score (0.0 to 1.0) */
100
+ confidence: number;
101
+ /** How this span was detected */
102
+ source: DetectionSource;
103
+ /** The matched text */
104
+ text: string;
105
+ /** Semantic attributes for MT-friendly tags (gender, scope, etc.) */
106
+ semantic?: SemanticAttributes;
107
+ }
108
+ /**
109
+ * Custom ID pattern configuration
110
+ */
111
+ export interface CustomIdPattern {
112
+ /** Pattern name for identification */
113
+ name: string;
114
+ /** Regular expression pattern */
115
+ pattern: RegExp;
116
+ /** PII type to assign (typically CASE_ID or CUSTOMER_ID) */
117
+ type: PIIType;
118
+ /** Optional validation function */
119
+ validate?: (match: string) => boolean;
120
+ }
121
+ /**
122
+ * Anonymization policy configuration
123
+ */
124
+ export interface AnonymizationPolicy {
125
+ /** Set of PII types to detect (both regex and NER) */
126
+ enabledTypes: Set<PIIType>;
127
+ /** Set of PII types to detect via regex */
128
+ regexEnabledTypes: Set<PIIType>;
129
+ /** Set of PII types to detect via NER */
130
+ nerEnabledTypes: Set<PIIType>;
131
+ /** Priority order for resolving overlapping entities (higher index = higher priority) */
132
+ typePriority: PIIType[];
133
+ /** Minimum confidence thresholds per type (default: 0.5) */
134
+ confidenceThresholds: Map<PIIType, number>;
135
+ /** Custom ID patterns for domain-specific identifiers */
136
+ customIdPatterns: CustomIdPattern[];
137
+ /** Terms that should not be treated as PII (case-insensitive) */
138
+ allowlistTerms: Set<string>;
139
+ /** Terms that should always be treated as PII */
140
+ denylistPatterns: RegExp[];
141
+ /** Whether to reuse IDs for identical repeated PII strings */
142
+ reuseIdsForRepeatedPII: boolean;
143
+ /** Whether to run leak scan on anonymized output */
144
+ enableLeakScan: boolean;
145
+ /** Enable semantic attribute enrichment for MT-friendly tags (gender, location scope) */
146
+ enableSemanticMasking: boolean;
147
+ }
148
+ /**
149
+ * Encrypted PII map entry
150
+ */
151
+ export interface EncryptedPIIMap {
152
+ /** AES-256-GCM encrypted data (base64) */
153
+ ciphertext: string;
154
+ /** Initialization vector (base64) */
155
+ iv: string;
156
+ /** Authentication tag (base64) */
157
+ authTag: string;
158
+ }
159
+ /**
160
+ * Statistics about the anonymization process
161
+ */
162
+ export interface AnonymizationStats {
163
+ /** Count of entities detected per type */
164
+ countsByType: Record<PIIType, number>;
165
+ /** Total number of entities detected */
166
+ totalEntities: number;
167
+ /** NER model version used */
168
+ modelVersion: string;
169
+ /** Policy version/identifier */
170
+ policyVersion: string;
171
+ /** Processing time in milliseconds */
172
+ processingTimeMs: number;
173
+ /** Whether leak scan passed (if enabled) */
174
+ leakScanPassed?: boolean;
175
+ }
176
+ /**
177
+ * Result of the anonymization process
178
+ */
179
+ export interface AnonymizationResult {
180
+ /** Text with PII replaced by placeholder tags */
181
+ anonymizedText: string;
182
+ /** List of detected entities (without original text for safety) */
183
+ entities: Omit<DetectedEntity, "original">[];
184
+ /** Encrypted mapping of (type, id) -> original string */
185
+ piiMap: EncryptedPIIMap;
186
+ /** Statistics about the anonymization */
187
+ stats: AnonymizationStats;
188
+ }
189
+ /**
190
+ * Creates a default anonymization policy with all types enabled
191
+ */
192
+ export declare function createDefaultPolicy(): AnonymizationPolicy;
193
+ /**
194
+ * Merges a partial policy with defaults
195
+ */
196
+ export declare function mergePolicy(partial: Partial<AnonymizationPolicy>): AnonymizationPolicy;
197
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAyB,MAAM,gBAAgB,CAAC;AAEhE,cAAc,gBAAgB,CAAC;AAE/B;;GAEG;AACH,oBAAY,eAAe;IACzB,KAAK,UAAU;IACf,GAAG,QAAQ;IACX,MAAM,WAAW;CAClB;AAMD;;;GAGG;AACH,MAAM,MAAM,YAAY,GAAG,MAAM,GAAG,QAAQ,GAAG,SAAS,GAAG,SAAS,CAAC;AAErE;;;GAGG;AACH,MAAM,MAAM,aAAa,GAAG,MAAM,GAAG,SAAS,GAAG,QAAQ,GAAG,SAAS,CAAC;AAEtE;;;GAGG;AACH,MAAM,WAAW,kBAAkB;IACjC,iCAAiC;IACjC,MAAM,CAAC,EAAE,YAAY,CAAC;IACtB,6CAA6C;IAC7C,KAAK,CAAC,EAAE,aAAa,CAAC;IACtB,2EAA2E;IAC3E,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,MAAM,gCAAgC,GAAG,CAAC,QAAQ,EAAE;IACxD,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,CAAC;IACxB,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;CACxB,KAAK,IAAI,CAAC;AAEX;;;GAGG;AACH,MAAM,WAAW,cAAc;IAC7B;;;OAGG;IACH,OAAO,EAAE,OAAO,CAAC;IAEjB;;;;;OAKG;IACH,YAAY,CAAC,EAAE,OAAO,CAAC;IAEvB;;OAEG;IACH,kBAAkB,CAAC,EAAE,gCAAgC,CAAC;IAEtD;;OAEG;IACH,QAAQ,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,IAAI,CAAC;CACrC;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,mBAAmB;IACnB,IAAI,EAAE,OAAO,CAAC;IACd,gFAAgF;IAChF,EAAE,EAAE,MAAM,CAAC;IACX,mEAAmE;IACnE,KAAK,EAAE,MAAM,CAAC;IACd,iEAAiE;IACjE,GAAG,EAAE,MAAM,CAAC;IACZ,8CAA8C;IAC9C,UAAU,EAAE,MAAM,CAAC;IACnB,mCAAmC;IACnC,MAAM,EAAE,eAAe,CAAC;IACxB,qEAAqE;IACrE,QAAQ,EAAE,MAAM,CAAC;IACjB,qEAAqE;IACrE,QAAQ,CAAC,EAAE,kBAAkB,CAAC;CAC/B;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,mBAAmB;IACnB,IAAI,EAAE,OAAO,CAAC;IACd,kDAAkD;IAClD,KAAK,EAAE,MAAM,CAAC;IACd,gDAAgD;IAChD,GAAG,EAAE,MAAM,CAAC;IACZ,8CAA8C;IAC9C,UAAU,EAAE,MAAM,CAAC;IACnB,iCAAiC;IACjC,MAAM,EAAE,eAAe,CAAC;IACxB,uBAAuB;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,qEAAqE;IACrE,QAAQ,CAAC,EAAE,kBAAkB,CAAC;CAC/B;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,sCAAsC;IACtC,IAAI,EAAE,MAAM,CAAC;IACb,iCAAiC;IACjC,OAAO,EAAE,MAAM,CAAC;IAChB,4DAA4D;IAC5D,IAAI,EAAE,OAAO,CAAC;IACd,mCAAmC;IACnC,QAAQ,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC;CACvC;AAED;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,sDAAsD;IACtD,YAAY,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC;IAC3B,2CAA2C;IAC3C,iBAAiB,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC;IAChC,yCAAyC;IACzC,eAAe,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC;IAC9B,yFAAyF;IACzF,YAAY,EAAE,OAAO,EAAE,CAAC;IACxB,4DAA4D;IAC5D,oBAAoB,EAAE,GAAG,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;IAC3C,yDAAyD;IACzD,gBAAgB,EAAE,eAAe,EAAE,CAAC;IACpC,iEAAiE;IACjE,cAAc,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IAC5B,iDAAiD;IACjD,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,8DAA8D;IAC9D,sBAAsB,EAAE,OAAO,CAAC;IAChC,oDAAoD;IACpD,cAAc,EAAE,OAAO,CAAC;IACxB,yFAAyF;IACzF,qBAAqB,EAAE,OAAO,CAAC;CAChC;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,0CAA0C;IAC1C,UAAU,EAAE,MAAM,CAAC;IACnB,qCAAqC;IACrC,EAAE,EAAE,MAAM,CAAC;IACX,kCAAkC;IAClC,OAAO,EAAE,MAAM,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,0CAA0C;IAC1C,YAAY,EAAE,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;IACtC,wCAAwC;IACxC,aAAa,EAAE,MAAM,CAAC;IACtB,6BAA6B;IAC7B,YAAY,EAAE,MAAM,CAAC;IACrB,gCAAgC;IAChC,aAAa,EAAE,MAAM,CAAC;IACtB,sCAAsC;IACtC,gBAAgB,EAAE,MAAM,CAAC;IACzB,4CAA4C;IAC5C,cAAc,CAAC,EAAE,OAAO,CAAC;CAC1B;AAED;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,iDAAiD;IACjD,cAAc,EAAE,MAAM,CAAC;IACvB,mEAAmE;IACnE,QAAQ,EAAE,IAAI,CAAC,cAAc,EAAE,UAAU,CAAC,EAAE,CAAC;IAC7C,yDAAyD;IACzD,MAAM,EAAE,eAAe,CAAC;IACxB,yCAAyC;IACzC,KAAK,EAAE,kBAAkB,CAAC;CAC3B;AAED;;GAEG;AACH,wBAAgB,mBAAmB,IAAI,mBAAmB,CAyCzD;AAED;;GAEG;AACH,wBAAgB,WAAW,CACzB,OAAO,EAAE,OAAO,CAAC,mBAAmB,CAAC,GACpC,mBAAmB,CA+BrB"}
@@ -0,0 +1,80 @@
1
+ import { PIIType, DEFAULT_TYPE_PRIORITY } from "./pii-types.js";
2
+ export * from "./pii-types.js";
3
+ /**
4
+ * Source of entity detection
5
+ */
6
+ export var DetectionSource;
7
+ (function (DetectionSource) {
8
+ DetectionSource["REGEX"] = "REGEX";
9
+ DetectionSource["NER"] = "NER";
10
+ DetectionSource["HYBRID"] = "HYBRID";
11
+ })(DetectionSource || (DetectionSource = {}));
12
+ /**
13
+ * Creates a default anonymization policy with all types enabled
14
+ */
15
+ export function createDefaultPolicy() {
16
+ const allTypes = new Set(Object.values(PIIType));
17
+ const defaultThresholds = new Map();
18
+ for (const type of allTypes) {
19
+ // Higher threshold for NER-detected types (more uncertainty)
20
+ defaultThresholds.set(type, type === PIIType.PERSON || type === PIIType.ORG ? 0.7 : 0.5);
21
+ }
22
+ return {
23
+ enabledTypes: allTypes,
24
+ regexEnabledTypes: new Set([
25
+ PIIType.EMAIL,
26
+ PIIType.PHONE,
27
+ PIIType.IBAN,
28
+ PIIType.BIC_SWIFT,
29
+ PIIType.CREDIT_CARD,
30
+ PIIType.IP_ADDRESS,
31
+ PIIType.URL,
32
+ PIIType.CASE_ID,
33
+ PIIType.CUSTOMER_ID,
34
+ ]),
35
+ nerEnabledTypes: new Set([
36
+ PIIType.PERSON,
37
+ PIIType.ORG,
38
+ PIIType.LOCATION,
39
+ PIIType.ADDRESS,
40
+ PIIType.DATE_OF_BIRTH,
41
+ ]),
42
+ typePriority: [...DEFAULT_TYPE_PRIORITY],
43
+ confidenceThresholds: defaultThresholds,
44
+ customIdPatterns: [],
45
+ allowlistTerms: new Set(),
46
+ denylistPatterns: [],
47
+ reuseIdsForRepeatedPII: false,
48
+ enableLeakScan: true,
49
+ enableSemanticMasking: false,
50
+ };
51
+ }
52
+ /**
53
+ * Merges a partial policy with defaults
54
+ */
55
+ export function mergePolicy(partial) {
56
+ const defaultPolicy = createDefaultPolicy();
57
+ // Deep merge confidenceThresholds Map
58
+ let confidenceThresholds = defaultPolicy.confidenceThresholds;
59
+ if (partial.confidenceThresholds !== undefined) {
60
+ confidenceThresholds = new Map(defaultPolicy.confidenceThresholds);
61
+ // Merge in partial thresholds
62
+ for (const [type, threshold] of partial.confidenceThresholds) {
63
+ confidenceThresholds.set(type, threshold);
64
+ }
65
+ }
66
+ return {
67
+ enabledTypes: partial.enabledTypes ?? defaultPolicy.enabledTypes,
68
+ regexEnabledTypes: partial.regexEnabledTypes ?? defaultPolicy.regexEnabledTypes,
69
+ nerEnabledTypes: partial.nerEnabledTypes ?? defaultPolicy.nerEnabledTypes,
70
+ typePriority: partial.typePriority ?? defaultPolicy.typePriority,
71
+ confidenceThresholds,
72
+ customIdPatterns: partial.customIdPatterns ?? defaultPolicy.customIdPatterns,
73
+ allowlistTerms: partial.allowlistTerms ?? defaultPolicy.allowlistTerms,
74
+ denylistPatterns: partial.denylistPatterns ?? defaultPolicy.denylistPatterns,
75
+ reuseIdsForRepeatedPII: partial.reuseIdsForRepeatedPII ?? defaultPolicy.reuseIdsForRepeatedPII,
76
+ enableLeakScan: partial.enableLeakScan ?? defaultPolicy.enableLeakScan,
77
+ enableSemanticMasking: partial.enableSemanticMasking ?? defaultPolicy.enableSemanticMasking,
78
+ };
79
+ }
80
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AAEhE,cAAc,gBAAgB,CAAC;AAE/B;;GAEG;AACH,MAAM,CAAN,IAAY,eAIX;AAJD,WAAY,eAAe;IACzB,kCAAe,CAAA;IACf,8BAAW,CAAA;IACX,oCAAiB,CAAA;AACnB,CAAC,EAJW,eAAe,KAAf,eAAe,QAI1B;AAuMD;;GAEG;AACH,MAAM,UAAU,mBAAmB;IACjC,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAc,CAAC,CAAC;IAE9D,MAAM,iBAAiB,GAAG,IAAI,GAAG,EAAmB,CAAC;IACrD,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;QAC5B,6DAA6D;QAC7D,iBAAiB,CAAC,GAAG,CACnB,IAAI,EACJ,IAAI,KAAK,OAAO,CAAC,MAAM,IAAI,IAAI,KAAK,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAC5D,CAAC;IACJ,CAAC;IAED,OAAO;QACL,YAAY,EAAE,QAAQ;QACtB,iBAAiB,EAAE,IAAI,GAAG,CAAC;YACzB,OAAO,CAAC,KAAK;YACb,OAAO,CAAC,KAAK;YACb,OAAO,CAAC,IAAI;YACZ,OAAO,CAAC,SAAS;YACjB,OAAO,CAAC,WAAW;YACnB,OAAO,CAAC,UAAU;YAClB,OAAO,CAAC,GAAG;YACX,OAAO,CAAC,OAAO;YACf,OAAO,CAAC,WAAW;SACpB,CAAC;QACF,eAAe,EAAE,IAAI,GAAG,CAAC;YACvB,OAAO,CAAC,MAAM;YACd,OAAO,CAAC,GAAG;YACX,OAAO,CAAC,QAAQ;YAChB,OAAO,CAAC,OAAO;YACf,OAAO,CAAC,aAAa;SACtB,CAAC;QACF,YAAY,EAAE,CAAC,GAAG,qBAAqB,CAAC;QACxC,oBAAoB,EAAE,iBAAiB;QACvC,gBAAgB,EAAE,EAAE;QACpB,cAAc,EAAE,IAAI,GAAG,EAAE;QACzB,gBAAgB,EAAE,EAAE;QACpB,sBAAsB,EAAE,KAAK;QAC7B,cAAc,EAAE,IAAI;QACpB,qBAAqB,EAAE,KAAK;KAC7B,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CACzB,OAAqC;IAErC,MAAM,aAAa,GAAG,mBAAmB,EAAE,CAAC;IAE5C,sCAAsC;IACtC,IAAI,oBAAoB,GAAG,aAAa,CAAC,oBAAoB,CAAC;IAC9D,IAAI,OAAO,CAAC,oBAAoB,KAAK,SAAS,EAAE,CAAC;QAC/C,oBAAoB,GAAG,IAAI,GAAG,CAAC,aAAa,CAAC,oBAAoB,CAAC,CAAC;QACnE,8BAA8B;QAC9B,KAAK,MAAM,CAAC,IAAI,EAAE,SAAS,CAAC,IAAI,OAAO,CAAC,oBAAoB,EAAE,CAAC;YAC7D,oBAAoB,CAAC,GAAG,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;QAC5C,CAAC;IACH,CAAC;IAED,OAAO;QACL,YAAY,EAAE,OAAO,CAAC,YAAY,IAAI,aAAa,CAAC,YAAY;QAChE,iBAAiB,EACf,OAAO,CAAC,iBAAiB,IAAI,aAAa,CAAC,iBAAiB;QAC9D,eAAe,EAAE,OAAO,CAAC,eAAe,IAAI,aAAa,CAAC,eAAe;QACzE,YAAY,EAAE,OAAO,CAAC,YAAY,IAAI,aAAa,CAAC,YAAY;QAChE,oBAAoB;QACpB,gBAAgB,EACd,OAAO,CAAC,gBAAgB,IAAI,aAAa,CAAC,gBAAgB;QAC5D,cAAc,EAAE,OAAO,CAAC,cAAc,IAAI,aAAa,CAAC,cAAc;QACtE,gBAAgB,EACd,OAAO,CAAC,gBAAgB,IAAI,aAAa,CAAC,gBAAgB;QAC5D,sBAAsB,EACpB,OAAO,CAAC,sBAAsB,IAAI,aAAa,CAAC,sBAAsB;QACxE,cAAc,EAAE,OAAO,CAAC,cAAc,IAAI,aAAa,CAAC,cAAc;QACtE,qBAAqB,EACnB,OAAO,CAAC,qBAAqB,IAAI,aAAa,CAAC,qBAAqB;KACvE,CAAC;AACJ,CAAC"}
@@ -0,0 +1,50 @@
1
+ /**
2
+ * PII Type Enumeration
3
+ * Defines all supported PII categories for detection and anonymization
4
+ */
5
+ export declare enum PIIType {
6
+ PERSON = "PERSON",
7
+ ORG = "ORG",
8
+ LOCATION = "LOCATION",
9
+ ADDRESS = "ADDRESS",
10
+ EMAIL = "EMAIL",
11
+ PHONE = "PHONE",
12
+ URL = "URL",
13
+ IP_ADDRESS = "IP_ADDRESS",
14
+ IBAN = "IBAN",
15
+ BIC_SWIFT = "BIC_SWIFT",
16
+ ACCOUNT_NUMBER = "ACCOUNT_NUMBER",
17
+ CREDIT_CARD = "CREDIT_CARD",
18
+ TAX_ID = "TAX_ID",
19
+ NATIONAL_ID = "NATIONAL_ID",
20
+ DATE_OF_BIRTH = "DATE_OF_BIRTH",
21
+ CASE_ID = "CASE_ID",
22
+ CUSTOMER_ID = "CUSTOMER_ID"
23
+ }
24
+ /**
25
+ * All PII types as a readonly array for iteration
26
+ */
27
+ export declare const ALL_PII_TYPES: readonly PIIType[];
28
+ /**
29
+ * PII types that are detected via regex (structured PII)
30
+ */
31
+ export declare const REGEX_PII_TYPES: readonly PIIType[];
32
+ /**
33
+ * PII types that are detected via NER model (soft PII)
34
+ */
35
+ export declare const NER_PII_TYPES: readonly PIIType[];
36
+ /**
37
+ * Default priority order for resolving overlapping entities
38
+ * Higher index = higher priority
39
+ */
40
+ export declare const DEFAULT_TYPE_PRIORITY: readonly PIIType[];
41
+ /**
42
+ * Maps NER model labels to PIIType
43
+ * Common label formats from NER models (B-PER, I-PER, B-ORG, etc.)
44
+ */
45
+ export declare const NER_LABEL_TO_PII_TYPE: Record<string, PIIType>;
46
+ /**
47
+ * Get PIIType from NER label (handles B-/I- prefixes)
48
+ */
49
+ export declare function getPIITypeFromNERLabel(label: string): PIIType | null;
50
+ //# sourceMappingURL=pii-types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pii-types.d.ts","sourceRoot":"","sources":["../../src/types/pii-types.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,oBAAY,OAAO;IAEjB,MAAM,WAAW;IACjB,GAAG,QAAQ;IACX,QAAQ,aAAa;IACrB,OAAO,YAAY;IAGnB,KAAK,UAAU;IACf,KAAK,UAAU;IACf,GAAG,QAAQ;IACX,UAAU,eAAe;IAGzB,IAAI,SAAS;IACb,SAAS,cAAc;IACvB,cAAc,mBAAmB;IACjC,WAAW,gBAAgB;IAG3B,MAAM,WAAW;IACjB,WAAW,gBAAgB;IAC3B,aAAa,kBAAkB;IAG/B,OAAO,YAAY;IACnB,WAAW,gBAAgB;CAC5B;AAED;;GAEG;AACH,eAAO,MAAM,aAAa,EAAE,SAAS,OAAO,EAAwC,CAAC;AAErF;;GAEG;AACH,eAAO,MAAM,eAAe,EAAE,SAAS,OAAO,EAa7C,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,aAAa,EAAE,SAAS,OAAO,EAM3C,CAAC;AAEF;;;GAGG;AACH,eAAO,MAAM,qBAAqB,EAAE,SAAS,OAAO,EAqBnD,CAAC;AAEF;;;GAGG;AACH,eAAO,MAAM,qBAAqB,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAWzD,CAAC;AAEF;;GAEG;AACH,wBAAgB,sBAAsB,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,GAAG,IAAI,CASpE"}