@elanlanguages/bridge-anonymization 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +255 -94
- package/dist/crypto/pii-map-crypto.d.ts +50 -36
- package/dist/crypto/pii-map-crypto.d.ts.map +1 -1
- package/dist/crypto/pii-map-crypto.js +133 -68
- package/dist/crypto/pii-map-crypto.js.map +1 -1
- package/dist/index.d.ts +3 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +48 -9
- package/dist/index.js.map +1 -1
- package/dist/ner/model-manager.d.ts +20 -11
- package/dist/ner/model-manager.d.ts.map +1 -1
- package/dist/ner/model-manager.js +147 -76
- package/dist/ner/model-manager.js.map +1 -1
- package/dist/ner/ner-model.d.ts +1 -1
- package/dist/ner/ner-model.d.ts.map +1 -1
- package/dist/ner/ner-model.js +40 -27
- package/dist/ner/ner-model.js.map +1 -1
- package/dist/ner/onnx-runtime.d.ts +6 -5
- package/dist/ner/onnx-runtime.d.ts.map +1 -1
- package/dist/ner/onnx-runtime.js +55 -24
- package/dist/ner/onnx-runtime.js.map +1 -1
- package/dist/ner/tokenizer.d.ts +5 -0
- package/dist/ner/tokenizer.d.ts.map +1 -1
- package/dist/ner/tokenizer.js +15 -2
- package/dist/ner/tokenizer.js.map +1 -1
- package/dist/pipeline/semantic-data-loader.d.ts +24 -16
- package/dist/pipeline/semantic-data-loader.d.ts.map +1 -1
- package/dist/pipeline/semantic-data-loader.js +169 -176
- package/dist/pipeline/semantic-data-loader.js.map +1 -1
- package/dist/pipeline/semantic-enricher.d.ts +12 -2
- package/dist/pipeline/semantic-enricher.d.ts.map +1 -1
- package/dist/pipeline/semantic-enricher.js +85 -35
- package/dist/pipeline/semantic-enricher.js.map +1 -1
- package/dist/utils/index.d.ts +2 -0
- package/dist/utils/index.d.ts.map +1 -1
- package/dist/utils/index.js +2 -0
- package/dist/utils/index.js.map +1 -1
- package/dist/utils/path.d.ts +34 -0
- package/dist/utils/path.d.ts.map +1 -0
- package/dist/utils/path.js +96 -0
- package/dist/utils/path.js.map +1 -0
- package/dist/utils/storage-browser.d.ts +51 -0
- package/dist/utils/storage-browser.d.ts.map +1 -0
- package/dist/utils/storage-browser.js +381 -0
- package/dist/utils/storage-browser.js.map +1 -0
- package/dist/utils/storage-node.d.ts +43 -0
- package/dist/utils/storage-node.d.ts.map +1 -0
- package/dist/utils/storage-node.js +93 -0
- package/dist/utils/storage-node.js.map +1 -0
- package/dist/utils/storage.d.ts +70 -0
- package/dist/utils/storage.d.ts.map +1 -0
- package/dist/utils/storage.js +69 -0
- package/dist/utils/storage.js.map +1 -0
- package/package.json +2 -1
package/README.md
CHANGED
|
@@ -4,23 +4,44 @@
|
|
|
4
4
|

|
|
5
5
|
[](https://codecov.io/github/elanlanguages/bridge-anonymization)
|
|
6
6
|
|
|
7
|
-
On-device PII anonymization module for high-privacy
|
|
7
|
+
On-device PII anonymization module for high-privacy AI workflows. Detects and replaces Personally Identifiable Information (PII) with placeholder tags while maintaining an encrypted mapping for later rehydration.
|
|
8
|
+
|
|
9
|
+
**Works in Node.js, Bun, and browsers** - zero server-side dependencies required.
|
|
8
10
|
|
|
9
11
|
## Features
|
|
10
12
|
|
|
11
13
|
- **Structured PII Detection**: Regex-based detection for emails, phones, IBANs, credit cards, IPs, URLs
|
|
12
|
-
- **Soft PII Detection**: ONNX-powered NER model for names, organizations, locations (auto-downloads on first use)
|
|
14
|
+
- **Soft PII Detection**: ONNX-powered NER model for names, organizations, locations (auto-downloads on first use if enabled)
|
|
15
|
+
- **Semantic Enrichment**: AI/MT-friendly tags with gender/location attributes for better translations
|
|
13
16
|
- **Secure PII Mapping**: AES-256-GCM encrypted storage of original PII values
|
|
17
|
+
- **Cross-Platform**: Works identically in Node.js, Bun, and browsers
|
|
14
18
|
- **Configurable Policies**: Customizable detection rules, thresholds, and allowlists
|
|
15
19
|
- **Validation & Leak Scanning**: Built-in validation and optional leak detection
|
|
16
20
|
|
|
17
21
|
## Installation
|
|
18
22
|
|
|
23
|
+
### Node.js / Bun
|
|
24
|
+
|
|
19
25
|
```bash
|
|
20
26
|
npm install @elanlanguages/bridge-anonymization
|
|
21
27
|
```
|
|
22
28
|
|
|
23
|
-
|
|
29
|
+
### Browser (with bundler)
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
npm install @elanlanguages/bridge-anonymization onnxruntime-web
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
### Browser (without bundler)
|
|
36
|
+
|
|
37
|
+
```html
|
|
38
|
+
<script type="module">
|
|
39
|
+
// Import directly from your dist folder or CDN
|
|
40
|
+
import { createAnonymizer } from './node_modules/@elanlanguages/bridge-anonymization/dist/index.js';
|
|
41
|
+
|
|
42
|
+
// onnxruntime-web is automatically loaded from CDN when needed
|
|
43
|
+
</script>
|
|
44
|
+
```
|
|
24
45
|
|
|
25
46
|
## Quick Start
|
|
26
47
|
|
|
@@ -49,7 +70,7 @@ import { createAnonymizer } from '@elanlanguages/bridge-anonymization';
|
|
|
49
70
|
const anonymizer = createAnonymizer({
|
|
50
71
|
ner: {
|
|
51
72
|
mode: 'quantized', // or 'standard' for full model (~1.1 GB)
|
|
52
|
-
onStatus: (status) => console.log(status),
|
|
73
|
+
onStatus: (status) => console.log(status),
|
|
53
74
|
}
|
|
54
75
|
});
|
|
55
76
|
|
|
@@ -61,20 +82,37 @@ const result = await anonymizer.anonymize(
|
|
|
61
82
|
|
|
62
83
|
console.log(result.anonymizedText);
|
|
63
84
|
// "Hello <PII type="PERSON" id="1"/> from <PII type="ORG" id="2"/> in <PII type="LOCATION" id="3"/>!"
|
|
85
|
+
|
|
86
|
+
// Clean up when done
|
|
87
|
+
await anonymizer.dispose();
|
|
64
88
|
```
|
|
65
89
|
|
|
66
|
-
###
|
|
90
|
+
### With Semantic Enrichment
|
|
91
|
+
|
|
92
|
+
Add gender and location scope for better machine translation:
|
|
67
93
|
|
|
68
94
|
```typescript
|
|
69
|
-
import {
|
|
95
|
+
import { createAnonymizer } from '@elanlanguages/bridge-anonymization';
|
|
70
96
|
|
|
71
|
-
const
|
|
72
|
-
|
|
73
|
-
|
|
97
|
+
const anonymizer = createAnonymizer({
|
|
98
|
+
ner: { mode: 'quantized' },
|
|
99
|
+
semantic: {
|
|
100
|
+
enabled: true, // Downloads ~12 MB of semantic data on first use
|
|
101
|
+
onStatus: (status) => console.log(status),
|
|
102
|
+
}
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
await anonymizer.initialize();
|
|
106
|
+
|
|
107
|
+
const result = await anonymizer.anonymize(
|
|
108
|
+
'Hello Maria Schmidt from Berlin!'
|
|
74
109
|
);
|
|
110
|
+
|
|
111
|
+
console.log(result.anonymizedText);
|
|
112
|
+
// "Hello <PII type="PERSON" gender="female" id="1"/> from <PII type="LOCATION" scope="city" id="2"/>!"
|
|
75
113
|
```
|
|
76
114
|
|
|
77
|
-
## Translation Workflow (Anonymize → Translate → Rehydrate)
|
|
115
|
+
## Example: Translation Workflow (Anonymize → Translate → Rehydrate)
|
|
78
116
|
|
|
79
117
|
The full workflow for privacy-preserving translation:
|
|
80
118
|
|
|
@@ -86,13 +124,13 @@ import {
|
|
|
86
124
|
InMemoryKeyProvider
|
|
87
125
|
} from '@elanlanguages/bridge-anonymization';
|
|
88
126
|
|
|
89
|
-
// 1. Create a key provider (
|
|
127
|
+
// 1. Create a key provider (required to decrypt later)
|
|
90
128
|
const keyProvider = new InMemoryKeyProvider();
|
|
91
129
|
|
|
92
130
|
// 2. Create anonymizer with key provider
|
|
93
131
|
const anonymizer = createAnonymizer({
|
|
94
132
|
ner: { mode: 'quantized' },
|
|
95
|
-
keyProvider: keyProvider
|
|
133
|
+
keyProvider: keyProvider
|
|
96
134
|
});
|
|
97
135
|
|
|
98
136
|
await anonymizer.initialize();
|
|
@@ -104,19 +142,22 @@ const result = await anonymizer.anonymize(original);
|
|
|
104
142
|
console.log(result.anonymizedText);
|
|
105
143
|
// "Hello <PII type="PERSON" id="1"/> from <PII type="ORG" id="2"/> in <PII type="LOCATION" id="3"/>!"
|
|
106
144
|
|
|
107
|
-
// 4. Translate (
|
|
108
|
-
const translated = await
|
|
145
|
+
// 4. Translate (or do other AI workloads that preserve placeholders)
|
|
146
|
+
const translated = await yourTranslationService(result.anonymizedText, { from: 'en', to: 'de' });
|
|
109
147
|
// "Hallo <PII type="PERSON" id="1"/> von <PII type="ORG" id="2"/> in <PII type="LOCATION" id="3"/>!"
|
|
110
148
|
|
|
111
149
|
// 5. Decrypt the PII map using the same key
|
|
112
150
|
const encryptionKey = await keyProvider.getKey();
|
|
113
|
-
const piiMap = decryptPIIMap(result.piiMap, encryptionKey);
|
|
151
|
+
const piiMap = await decryptPIIMap(result.piiMap, encryptionKey);
|
|
114
152
|
|
|
115
153
|
// 6. Rehydrate - replace placeholders with original values
|
|
116
154
|
const rehydrated = rehydrate(translated, piiMap);
|
|
117
155
|
|
|
118
156
|
console.log(rehydrated);
|
|
119
157
|
// "Hallo John Smith von Acme Corp in Berlin!"
|
|
158
|
+
|
|
159
|
+
// 7. Clean up
|
|
160
|
+
await anonymizer.dispose();
|
|
120
161
|
```
|
|
121
162
|
|
|
122
163
|
### Key Points
|
|
@@ -125,23 +166,6 @@ console.log(rehydrated);
|
|
|
125
166
|
- **Placeholders are XML-like** - Most translation services preserve them automatically
|
|
126
167
|
- **PII stays local** - Original values never leave your system during translation
|
|
127
168
|
|
|
128
|
-
### Production Key Management
|
|
129
|
-
|
|
130
|
-
For production, use a proper key provider:
|
|
131
|
-
|
|
132
|
-
```typescript
|
|
133
|
-
import { EnvKeyProvider } from '@elanlanguages/bridge-anonymization';
|
|
134
|
-
|
|
135
|
-
// Generate and store key: openssl rand -base64 32
|
|
136
|
-
// Set environment variable: export PII_ENCRYPTION_KEY=<base64-key>
|
|
137
|
-
|
|
138
|
-
const keyProvider = new EnvKeyProvider('PII_ENCRYPTION_KEY');
|
|
139
|
-
const anonymizer = createAnonymizer({
|
|
140
|
-
ner: { mode: 'quantized' },
|
|
141
|
-
keyProvider
|
|
142
|
-
});
|
|
143
|
-
```
|
|
144
|
-
|
|
145
169
|
## API Reference
|
|
146
170
|
|
|
147
171
|
### Configuration Options
|
|
@@ -152,21 +176,31 @@ import { createAnonymizer, InMemoryKeyProvider } from '@elanlanguages/bridge-ano
|
|
|
152
176
|
const anonymizer = createAnonymizer({
|
|
153
177
|
// NER configuration
|
|
154
178
|
ner: {
|
|
155
|
-
mode: 'quantized',
|
|
156
|
-
autoDownload: true,
|
|
157
|
-
onStatus: (
|
|
158
|
-
onDownloadProgress: (
|
|
179
|
+
mode: 'quantized', // 'standard' | 'quantized' | 'disabled' | 'custom'
|
|
180
|
+
autoDownload: true, // Auto-download model if not present
|
|
181
|
+
onStatus: (status) => {}, // Status messages callback
|
|
182
|
+
onDownloadProgress: (progress) => {
|
|
183
|
+
console.log(`${progress.file}: ${progress.percent}%`);
|
|
184
|
+
},
|
|
159
185
|
|
|
160
186
|
// For 'custom' mode only:
|
|
161
187
|
modelPath: './my-model.onnx',
|
|
162
188
|
vocabPath: './vocab.txt',
|
|
163
189
|
},
|
|
164
190
|
|
|
191
|
+
// Semantic enrichment (adds gender/scope attributes)
|
|
192
|
+
semantic: {
|
|
193
|
+
enabled: true, // Enable MT-friendly attributes
|
|
194
|
+
autoDownload: true, // Auto-download semantic data (~12 MB)
|
|
195
|
+
onStatus: (status) => {},
|
|
196
|
+
onDownloadProgress: (progress) => {},
|
|
197
|
+
},
|
|
198
|
+
|
|
165
199
|
// Encryption key provider
|
|
166
200
|
keyProvider: new InMemoryKeyProvider(),
|
|
167
201
|
|
|
168
|
-
// Custom policy
|
|
169
|
-
defaultPolicy: { /*
|
|
202
|
+
// Custom policy (optional)
|
|
203
|
+
defaultPolicy: { /* see Policy section */ },
|
|
170
204
|
});
|
|
171
205
|
|
|
172
206
|
await anonymizer.initialize();
|
|
@@ -185,7 +219,7 @@ await anonymizer.initialize();
|
|
|
185
219
|
|
|
186
220
|
#### `createAnonymizer(config?)`
|
|
187
221
|
|
|
188
|
-
Creates
|
|
222
|
+
Creates a reusable anonymizer instance:
|
|
189
223
|
|
|
190
224
|
```typescript
|
|
191
225
|
const anonymizer = createAnonymizer({
|
|
@@ -202,6 +236,8 @@ await anonymizer.dispose();
|
|
|
202
236
|
One-off anonymization (regex-only by default):
|
|
203
237
|
|
|
204
238
|
```typescript
|
|
239
|
+
import { anonymize } from '@elanlanguages/bridge-anonymization';
|
|
240
|
+
|
|
205
241
|
const result = await anonymize('Contact test@example.com');
|
|
206
242
|
```
|
|
207
243
|
|
|
@@ -210,6 +246,8 @@ const result = await anonymize('Contact test@example.com');
|
|
|
210
246
|
One-off anonymization with NER:
|
|
211
247
|
|
|
212
248
|
```typescript
|
|
249
|
+
import { anonymizeWithNER } from '@elanlanguages/bridge-anonymization';
|
|
250
|
+
|
|
213
251
|
const result = await anonymizeWithNER(
|
|
214
252
|
'Hello John Smith',
|
|
215
253
|
{ mode: 'quantized' }
|
|
@@ -221,9 +259,34 @@ const result = await anonymizeWithNER(
|
|
|
221
259
|
Fast regex-only anonymization:
|
|
222
260
|
|
|
223
261
|
```typescript
|
|
262
|
+
import { anonymizeRegexOnly } from '@elanlanguages/bridge-anonymization';
|
|
263
|
+
|
|
224
264
|
const result = await anonymizeRegexOnly('Card: 4111111111111111');
|
|
225
265
|
```
|
|
226
266
|
|
|
267
|
+
### Rehydration Functions
|
|
268
|
+
|
|
269
|
+
#### `decryptPIIMap(encryptedMap, key)`
|
|
270
|
+
|
|
271
|
+
Decrypts the PII map for rehydration:
|
|
272
|
+
|
|
273
|
+
```typescript
|
|
274
|
+
import { decryptPIIMap } from '@elanlanguages/bridge-anonymization';
|
|
275
|
+
|
|
276
|
+
const piiMap = await decryptPIIMap(result.piiMap, encryptionKey);
|
|
277
|
+
// Returns Map<string, string> where key is "PERSON:1" and value is "John Smith"
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
#### `rehydrate(text, piiMap)`
|
|
281
|
+
|
|
282
|
+
Replaces placeholders with original values:
|
|
283
|
+
|
|
284
|
+
```typescript
|
|
285
|
+
import { rehydrate } from '@elanlanguages/bridge-anonymization';
|
|
286
|
+
|
|
287
|
+
const original = rehydrate(translatedText, piiMap);
|
|
288
|
+
```
|
|
289
|
+
|
|
227
290
|
### Result Structure
|
|
228
291
|
|
|
229
292
|
```typescript
|
|
@@ -261,22 +324,22 @@ interface AnonymizationResult {
|
|
|
261
324
|
|
|
262
325
|
## Supported PII Types
|
|
263
326
|
|
|
264
|
-
| Type | Description | Detection
|
|
265
|
-
|
|
266
|
-
| `EMAIL` | Email addresses | Regex |
|
|
267
|
-
| `PHONE` | Phone numbers (international) | Regex |
|
|
268
|
-
| `IBAN` | International Bank Account Numbers | Regex + Checksum |
|
|
269
|
-
| `BIC_SWIFT` | Bank Identifier Codes | Regex |
|
|
270
|
-
| `CREDIT_CARD` | Credit card numbers | Regex + Luhn |
|
|
271
|
-
| `IP_ADDRESS` | IPv4 and IPv6 addresses | Regex |
|
|
272
|
-
| `URL` | Web URLs | Regex |
|
|
273
|
-
| `CASE_ID` | Case/ticket numbers | Regex (configurable) |
|
|
274
|
-
| `CUSTOMER_ID` | Customer identifiers | Regex (configurable) |
|
|
275
|
-
| `PERSON` | Person names | NER |
|
|
276
|
-
| `ORG` | Organization names | NER |
|
|
277
|
-
| `LOCATION` | Location/place names | NER |
|
|
278
|
-
| `ADDRESS` | Physical addresses | NER |
|
|
279
|
-
| `DATE_OF_BIRTH` | Dates of birth | NER |
|
|
327
|
+
| Type | Description | Detection | Semantic Attributes |
|
|
328
|
+
|------|-------------|-----------|---------------------|
|
|
329
|
+
| `EMAIL` | Email addresses | Regex | - |
|
|
330
|
+
| `PHONE` | Phone numbers (international) | Regex | - |
|
|
331
|
+
| `IBAN` | International Bank Account Numbers | Regex + Checksum | - |
|
|
332
|
+
| `BIC_SWIFT` | Bank Identifier Codes | Regex | - |
|
|
333
|
+
| `CREDIT_CARD` | Credit card numbers | Regex + Luhn | - |
|
|
334
|
+
| `IP_ADDRESS` | IPv4 and IPv6 addresses | Regex | - |
|
|
335
|
+
| `URL` | Web URLs | Regex | - |
|
|
336
|
+
| `CASE_ID` | Case/ticket numbers | Regex (configurable) | - |
|
|
337
|
+
| `CUSTOMER_ID` | Customer identifiers | Regex (configurable) | - |
|
|
338
|
+
| `PERSON` | Person names | NER | `gender` (male/female/neutral) |
|
|
339
|
+
| `ORG` | Organization names | NER | - |
|
|
340
|
+
| `LOCATION` | Location/place names | NER | `scope` (city/country/region) |
|
|
341
|
+
| `ADDRESS` | Physical addresses | NER | - |
|
|
342
|
+
| `DATE_OF_BIRTH` | Dates of birth | NER | - |
|
|
280
343
|
|
|
281
344
|
## Configuration
|
|
282
345
|
|
|
@@ -300,6 +363,9 @@ const anonymizer = createAnonymizer({
|
|
|
300
363
|
// Terms to never treat as PII
|
|
301
364
|
allowlistTerms: new Set(['Customer Service', 'Help Desk']),
|
|
302
365
|
|
|
366
|
+
// Enable semantic enrichment (gender/scope)
|
|
367
|
+
enableSemanticMasking: true,
|
|
368
|
+
|
|
303
369
|
// Enable leak scanning on output
|
|
304
370
|
enableLeakScan: true,
|
|
305
371
|
},
|
|
@@ -325,88 +391,175 @@ const anonymizer = createAnonymizer();
|
|
|
325
391
|
anonymizer.getRegistry().register(customRecognizer);
|
|
326
392
|
```
|
|
327
393
|
|
|
328
|
-
## Model
|
|
394
|
+
## Data & Model Storage
|
|
395
|
+
|
|
396
|
+
Models and semantic data are cached locally for offline use.
|
|
397
|
+
|
|
398
|
+
### Node.js Cache Locations
|
|
329
399
|
|
|
330
|
-
|
|
400
|
+
| Data | macOS | Linux | Windows |
|
|
401
|
+
|------|-------|-------|---------|
|
|
402
|
+
| NER Models | `~/Library/Caches/bridge-anonymization/models/` | `~/.cache/bridge-anonymization/models/` | `%LOCALAPPDATA%/bridge-anonymization/models/` |
|
|
403
|
+
| Semantic Data | `~/Library/Caches/bridge-anonymization/semantic-data/` | `~/.cache/bridge-anonymization/semantic-data/` | `%LOCALAPPDATA%/bridge-anonymization/semantic-data/` |
|
|
331
404
|
|
|
332
|
-
|
|
333
|
-
- **macOS**: `~/Library/Caches/bridge-anonymization/models/`
|
|
334
|
-
- **Linux**: `~/.cache/bridge-anonymization/models/`
|
|
335
|
-
- **Windows**: `%LOCALAPPDATA%/bridge-anonymization/models/`
|
|
405
|
+
### Browser Cache
|
|
336
406
|
|
|
337
|
-
|
|
407
|
+
In browsers, data is stored using:
|
|
408
|
+
- **IndexedDB**: For semantic data and smaller files
|
|
409
|
+
- **Origin Private File System (OPFS)**: For large model files (~280 MB)
|
|
410
|
+
|
|
411
|
+
Data persists across page reloads and browser sessions.
|
|
412
|
+
|
|
413
|
+
### Manual Data Management
|
|
338
414
|
|
|
339
415
|
```typescript
|
|
340
416
|
import {
|
|
417
|
+
// Model management
|
|
341
418
|
isModelDownloaded,
|
|
342
419
|
downloadModel,
|
|
343
420
|
clearModelCache,
|
|
344
421
|
listDownloadedModels,
|
|
345
|
-
|
|
422
|
+
|
|
423
|
+
// Semantic data management
|
|
424
|
+
isSemanticDataDownloaded,
|
|
425
|
+
downloadSemanticData,
|
|
426
|
+
clearSemanticDataCache,
|
|
346
427
|
} from '@elanlanguages/bridge-anonymization';
|
|
347
428
|
|
|
348
429
|
// Check if model is downloaded
|
|
349
430
|
const hasModel = await isModelDownloaded('quantized');
|
|
350
431
|
|
|
351
|
-
// Manually download
|
|
432
|
+
// Manually download model with progress
|
|
352
433
|
await downloadModel('quantized', (progress) => {
|
|
353
434
|
console.log(`${progress.file}: ${progress.percent}%`);
|
|
354
435
|
});
|
|
355
436
|
|
|
437
|
+
// Check semantic data
|
|
438
|
+
const hasSemanticData = await isSemanticDataDownloaded();
|
|
439
|
+
|
|
356
440
|
// List downloaded models
|
|
357
441
|
const models = await listDownloadedModels();
|
|
358
442
|
|
|
359
|
-
// Clear
|
|
443
|
+
// Clear caches
|
|
360
444
|
await clearModelCache('quantized'); // or clearModelCache() for all
|
|
445
|
+
await clearSemanticDataCache();
|
|
361
446
|
```
|
|
362
447
|
|
|
363
448
|
## Encryption & Security
|
|
364
449
|
|
|
365
|
-
The PII map is encrypted using AES-256-GCM
|
|
450
|
+
The PII map is encrypted using **AES-256-GCM** via the Web Crypto API (works in both Node.js and browsers).
|
|
451
|
+
|
|
452
|
+
### Key Providers
|
|
366
453
|
|
|
367
454
|
```typescript
|
|
368
|
-
import {
|
|
455
|
+
import {
|
|
456
|
+
InMemoryKeyProvider, // For development/testing
|
|
457
|
+
ConfigKeyProvider, // For production with pre-configured key
|
|
458
|
+
KeyProvider, // Interface for custom implementations
|
|
459
|
+
generateKey,
|
|
460
|
+
} from '@elanlanguages/bridge-anonymization';
|
|
461
|
+
|
|
462
|
+
// Development: In-memory key (generates random key, lost on page refresh)
|
|
463
|
+
const devKeyProvider = new InMemoryKeyProvider();
|
|
464
|
+
|
|
465
|
+
// Production: Pre-configured key
|
|
466
|
+
// Generate key: openssl rand -base64 32
|
|
467
|
+
const keyBase64 = process.env.PII_ENCRYPTION_KEY; // or read from config
|
|
468
|
+
const prodKeyProvider = new ConfigKeyProvider(keyBase64);
|
|
369
469
|
|
|
470
|
+
// Custom: Implement KeyProvider interface
|
|
370
471
|
class SecureKeyProvider implements KeyProvider {
|
|
371
|
-
async getKey(): Promise<
|
|
372
|
-
// Retrieve from
|
|
472
|
+
async getKey(): Promise<Uint8Array> {
|
|
473
|
+
// Retrieve from secure storage, HSM, keychain, etc.
|
|
373
474
|
return await getKeyFromSecureStorage();
|
|
374
475
|
}
|
|
375
476
|
}
|
|
376
|
-
|
|
377
|
-
const anonymizer = createAnonymizer({
|
|
378
|
-
keyProvider: new SecureKeyProvider(),
|
|
379
|
-
ner: { mode: 'quantized' },
|
|
380
|
-
});
|
|
381
477
|
```
|
|
382
478
|
|
|
383
479
|
### Security Best Practices
|
|
384
480
|
|
|
385
481
|
- **Never log the raw PII map** - Always use encrypted storage
|
|
482
|
+
- **Persist the encryption key securely** - Use platform keystores (iOS Keychain, Android Keystore, etc.)
|
|
386
483
|
- **Rotate keys** - Implement key rotation for long-running applications
|
|
387
|
-
- **Use platform keystores** - iOS Keychain, Android Keystore, or OS credential managers
|
|
388
484
|
- **Enable leak scanning** - Catch any missed PII in output
|
|
389
485
|
|
|
486
|
+
## Browser Usage
|
|
487
|
+
|
|
488
|
+
The library works seamlessly in browsers without any special configuration.
|
|
489
|
+
|
|
490
|
+
### Basic Browser Example
|
|
491
|
+
|
|
492
|
+
```html
|
|
493
|
+
<!DOCTYPE html>
|
|
494
|
+
<html>
|
|
495
|
+
<head>
|
|
496
|
+
<title>PII Anonymization</title>
|
|
497
|
+
</head>
|
|
498
|
+
<body>
|
|
499
|
+
<script type="module">
|
|
500
|
+
import {
|
|
501
|
+
createAnonymizer,
|
|
502
|
+
InMemoryKeyProvider,
|
|
503
|
+
decryptPIIMap,
|
|
504
|
+
rehydrate
|
|
505
|
+
} from './node_modules/@elanlanguages/bridge-anonymization/dist/index.js';
|
|
506
|
+
|
|
507
|
+
async function demo() {
|
|
508
|
+
// Create anonymizer
|
|
509
|
+
const keyProvider = new InMemoryKeyProvider();
|
|
510
|
+
const anonymizer = createAnonymizer({
|
|
511
|
+
ner: {
|
|
512
|
+
mode: 'quantized',
|
|
513
|
+
onStatus: (s) => console.log('NER:', s),
|
|
514
|
+
onDownloadProgress: (p) => console.log(`Download: ${p.percent}%`)
|
|
515
|
+
},
|
|
516
|
+
semantic: { enabled: true },
|
|
517
|
+
keyProvider
|
|
518
|
+
});
|
|
519
|
+
|
|
520
|
+
// Initialize (downloads models on first use)
|
|
521
|
+
await anonymizer.initialize();
|
|
522
|
+
|
|
523
|
+
// Anonymize
|
|
524
|
+
const result = await anonymizer.anonymize(
|
|
525
|
+
'Contact Maria Schmidt at maria@example.com in Berlin.'
|
|
526
|
+
);
|
|
527
|
+
|
|
528
|
+
console.log('Anonymized:', result.anonymizedText);
|
|
529
|
+
// "Contact <PII type="PERSON" gender="female" id="1"/> at <PII type="EMAIL" id="2"/> in <PII type="LOCATION" scope="city" id="3"/>."
|
|
530
|
+
|
|
531
|
+
// Rehydrate
|
|
532
|
+
const key = await keyProvider.getKey();
|
|
533
|
+
const piiMap = await decryptPIIMap(result.piiMap, key);
|
|
534
|
+
const original = rehydrate(result.anonymizedText, piiMap);
|
|
535
|
+
|
|
536
|
+
console.log('Rehydrated:', original);
|
|
537
|
+
|
|
538
|
+
await anonymizer.dispose();
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
demo().catch(console.error);
|
|
542
|
+
</script>
|
|
543
|
+
</body>
|
|
544
|
+
</html>
|
|
545
|
+
```
|
|
546
|
+
|
|
547
|
+
### Browser Notes
|
|
548
|
+
|
|
549
|
+
- **First-use downloads**: NER model (~280 MB) and semantic data (~12 MB) are downloaded on first use
|
|
550
|
+
- **ONNX runtime**: Automatically loaded from CDN if not bundled
|
|
551
|
+
- **Offline support**: After initial download, everything works offline
|
|
552
|
+
- **Storage**: Uses IndexedDB and OPFS - data persists across sessions
|
|
553
|
+
|
|
390
554
|
## Bun Support
|
|
391
555
|
|
|
392
|
-
This library works with [Bun](https://bun.sh). Since `onnxruntime-node` is a native Node.js addon, Bun
|
|
556
|
+
This library works with [Bun](https://bun.sh). Since `onnxruntime-node` is a native Node.js addon, Bun uses `onnxruntime-web`:
|
|
393
557
|
|
|
394
558
|
```bash
|
|
395
559
|
bun add @elanlanguages/bridge-anonymization onnxruntime-web
|
|
396
560
|
```
|
|
397
561
|
|
|
398
|
-
Usage is identical - the library auto-detects the runtime
|
|
399
|
-
|
|
400
|
-
```typescript
|
|
401
|
-
import { createAnonymizer } from '@elanlanguages/bridge-anonymization';
|
|
402
|
-
|
|
403
|
-
const anonymizer = createAnonymizer({
|
|
404
|
-
ner: { mode: 'quantized' }
|
|
405
|
-
});
|
|
406
|
-
|
|
407
|
-
await anonymizer.initialize();
|
|
408
|
-
const result = await anonymizer.anonymize('Hello John Smith');
|
|
409
|
-
```
|
|
562
|
+
Usage is identical - the library auto-detects the runtime.
|
|
410
563
|
|
|
411
564
|
## Performance
|
|
412
565
|
|
|
@@ -414,12 +567,22 @@ const result = await anonymizer.anonymize('Hello John Smith');
|
|
|
414
567
|
|-----------|-----------------|-------|
|
|
415
568
|
| Regex pass | ~5 ms | All regex recognizers |
|
|
416
569
|
| NER inference | ~100-150 ms | Quantized model |
|
|
570
|
+
| Semantic enrichment | ~1-2 ms | After data loaded |
|
|
417
571
|
| Total pipeline | ~150-200 ms | Full anonymization |
|
|
418
572
|
|
|
419
573
|
| Model | Size | First-Use Download |
|
|
420
574
|
|-------|------|-------------------|
|
|
421
575
|
| Quantized | ~280 MB | ~30s on fast connection |
|
|
422
576
|
| Standard | ~1.1 GB | ~2min on fast connection |
|
|
577
|
+
| Semantic Data | ~12 MB | ~5s on fast connection |
|
|
578
|
+
|
|
579
|
+
## Requirements
|
|
580
|
+
|
|
581
|
+
| Environment | Version | Notes |
|
|
582
|
+
|-------------|---------|-------|
|
|
583
|
+
| Node.js | >= 18.0.0 | Uses native `onnxruntime-node` |
|
|
584
|
+
| Bun | >= 1.0.0 | Requires `onnxruntime-web` |
|
|
585
|
+
| Browsers | Chrome 86+, Firefox 89+, Safari 15.4+, Edge 86+ | Uses OPFS for model storage |
|
|
423
586
|
|
|
424
587
|
## Development
|
|
425
588
|
|
|
@@ -432,11 +595,14 @@ npm test
|
|
|
432
595
|
|
|
433
596
|
# Build
|
|
434
597
|
npm run build
|
|
598
|
+
|
|
599
|
+
# Lint
|
|
600
|
+
npm run lint
|
|
435
601
|
```
|
|
436
602
|
|
|
437
603
|
### Building Custom Models
|
|
438
604
|
|
|
439
|
-
For development or custom models
|
|
605
|
+
For development or custom models:
|
|
440
606
|
|
|
441
607
|
```bash
|
|
442
608
|
# Requires Python 3.8+
|
|
@@ -444,11 +610,6 @@ npm run setup:ner # Standard model
|
|
|
444
610
|
npm run setup:ner:quantized # Quantized model
|
|
445
611
|
```
|
|
446
612
|
|
|
447
|
-
## Requirements
|
|
448
|
-
|
|
449
|
-
- Node.js >= 18.0.0 (ONNX runtime included automatically)
|
|
450
|
-
- Bun >= 1.0.0 (requires `onnxruntime-web`)
|
|
451
|
-
|
|
452
613
|
## License
|
|
453
614
|
|
|
454
615
|
MIT
|