openred 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 OpenRed Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,289 @@
1
+ # openred
2
+
3
+ Fast, zero-dependency PII redaction for AI pipelines. Drop-in support for OpenAI, Anthropic, LangChain, Express, and Fastify.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ npm install openred
9
+ ```
10
+
11
+ ## Quick Start
12
+
13
+ ```ts
14
+ import { RedactionPipeline } from 'openred';
15
+
16
+ const pipeline = new RedactionPipeline();
17
+
18
+ const result = pipeline.redact('Email john@example.com, call 555-123-4567');
19
+ console.log(result.text);
20
+ // → "Email [EMAIL_1], call [PHONE_2]"
21
+ console.log(result.stats);
22
+ // → { totalDetected: 2, byType: { EMAIL: 1, PHONE: 1 }, processingTimeMs: 0.12 }
23
+ ```
24
+
25
+ ## What It Detects
26
+
27
+ | Type | Examples | Confidence |
28
+ |------|----------|------------|
29
+ | Email | `john@example.com`, `first.last+tag@domain.co.uk` | 0.95 |
30
+ | Phone | `555-123-4567`, `(555) 123-4567`, `+44 20 7946 0958` | 0.85–0.95 |
31
+ | SSN | `123-45-6789` (validates area/group/serial) | 0.90 |
32
+ | Credit Card | `4111-1111-1111-1111` (Luhn validated) | 0.80–0.95 |
33
+ | IP Address | `203.0.113.1` (skips private/localhost) | 0.90 |
34
+ | URL | `https://example.com/path?q=search` | 0.95 |
35
+ | Date of Birth | `01/15/1990`, `January 15, 1990` (context-aware) | 0.50–0.85 |
36
+ | Address | `123 Main Street` (US street patterns) | 0.60–0.80 |
37
+
38
+ ## Strategies
39
+
40
+ ```ts
41
+ const pipeline = new RedactionPipeline({ strategy: 'placeholder' });
42
+ // "Email [EMAIL_1], call [PHONE_2]" — numbered, deterministic tokens (default)
43
+
44
+ const pipeline = new RedactionPipeline({ strategy: 'category' });
45
+ // "Email [EMAIL], call [PHONE]" — simple type labels
46
+
47
+ const pipeline = new RedactionPipeline({ strategy: 'hash' });
48
+ // "Email [EMAIL:855f96e9], call [PHONE:a3c1d2e4]" — SHA-256 truncated
49
+
50
+ const pipeline = new RedactionPipeline({ strategy: 'mask' });
51
+ // "Email j███@e██████.com, call ███-███-4567" — partial reveal
52
+
53
+ // Custom strategy function
54
+ const pipeline = new RedactionPipeline({
55
+ strategy: (match) => `<<${match.type}>>`,
56
+ });
57
+ ```
58
+
59
+ ## Pipeline Configuration
60
+
61
+ ```ts
62
+ import { RedactionPipeline, EmailDetector, PhoneDetector } from 'openred';
63
+
64
+ const pipeline = new RedactionPipeline({
65
+ // Pick specific detectors (default: all 8)
66
+ detectors: [EmailDetector, PhoneDetector],
67
+
68
+ // Redaction strategy (default: 'placeholder')
69
+ strategy: 'mask',
70
+
71
+ // Minimum confidence threshold (0-1, default: 0)
72
+ minConfidence: 0.7,
73
+
74
+ // Values to never redact
75
+ allowList: ['support@company.com'],
76
+
77
+ // How to resolve overlapping detections
78
+ overlapResolution: 'highest-confidence', // 'longest' | 'highest-confidence' | 'first'
79
+
80
+ // Enable vault for round-trip redaction
81
+ vault: true,
82
+ vaultTTL: 60000, // auto-expire entries after 60s
83
+
84
+ // Hooks for logging/audit
85
+ onDetection: (match) => console.log('Found:', match.type, match.value),
86
+ onRedaction: (result) => console.log('Redacted:', result.stats.totalDetected, 'items'),
87
+ });
88
+ ```
89
+
90
+ ### RedactionResult
91
+
92
+ Every call to `pipeline.redact()` returns a rich result object:
93
+
94
+ ```ts
95
+ interface RedactionResult {
96
+ text: string; // redacted output
97
+ matches: PIIMatch[]; // all detected PII with positions and confidence
98
+ tokens: TokenMapping[]; // original → replacement mappings
99
+ stats: {
100
+ totalDetected: number;
101
+ byType: Record<string, number>;
102
+ processingTimeMs: number;
103
+ };
104
+ }
105
+ ```
106
+
107
+ ## Vault (Round-Trip Redaction)
108
+
109
+ Redact PII before sending to an LLM, then restore original values in the response.
110
+
111
+ ```ts
112
+ const pipeline = new RedactionPipeline({ vault: true });
113
+
114
+ // Redact before sending to LLM
115
+ const result = pipeline.redact('My email is john@example.com');
116
+ // → "My email is [EMAIL_1]"
117
+
118
+ // LLM responds using the token
119
+ const llmResponse = 'I will contact you at [EMAIL_1].';
120
+
121
+ // Restore original values
122
+ const vault = pipeline.getVault();
123
+ const restored = vault.restore(llmResponse);
124
+ // → "I will contact you at john@example.com."
125
+
126
+ // Audit
127
+ vault.getEntries();
128
+ // → [{ original: 'john@example.com', replacement: '[EMAIL_1]', type: 'EMAIL' }]
129
+
130
+ // Export/import vault state
131
+ const state = vault.export();
132
+ vault.import(state);
133
+ ```
134
+
135
+ ## LLM Integrations
136
+
137
+ ### OpenAI
138
+
139
+ ```ts
140
+ import OpenAI from 'openai';
141
+ import { wrapOpenAI } from 'openred/integrations/openai';
142
+
143
+ const client = new OpenAI();
144
+ const safe = wrapOpenAI(client);
145
+
146
+ // Messages are redacted before the API call.
147
+ // Responses are automatically de-redacted.
148
+ const response = await safe.chat.completions.create({
149
+ model: 'gpt-4',
150
+ messages: [{ role: 'user', content: 'My email is john@test.com' }],
151
+ });
152
+ // response.choices[0].message.content has PII restored
153
+
154
+ // Access the pipeline for stats/config
155
+ safe.pipeline.getVault().getEntries();
156
+ ```
157
+
158
+ Set `autoRestore: false` to only redact outgoing messages without restoring responses:
159
+
160
+ ```ts
161
+ const safe = wrapOpenAI(client, { autoRestore: false, strategy: 'category' });
162
+ ```
163
+
164
+ ### Anthropic
165
+
166
+ ```ts
167
+ import Anthropic from '@anthropic-ai/sdk';
168
+ import { wrapAnthropic } from 'openred/integrations/anthropic';
169
+
170
+ const client = new Anthropic();
171
+ const safe = wrapAnthropic(client);
172
+
173
+ const response = await safe.messages.create({
174
+ model: 'claude-sonnet-4-5-20250929',
175
+ max_tokens: 1024,
176
+ messages: [{ role: 'user', content: 'My SSN is 123-45-6789' }],
177
+ });
178
+ // PII redacted before API call, restored in response
179
+ ```
180
+
181
+ ### LangChain
182
+
183
+ ```ts
184
+ import { ChatOpenAI } from '@langchain/openai';
185
+ import { wrapLangChain } from 'openred/integrations/langchain';
186
+
187
+ const llm = new ChatOpenAI({ model: 'gpt-4' });
188
+ const safe = wrapLangChain(llm);
189
+
190
+ const response = await safe.invoke('My phone is 555-123-4567');
191
+ // PII redacted before invoke, restored in response
192
+ ```
193
+
194
+ ## Middleware
195
+
196
+ ### Express
197
+
198
+ ```ts
199
+ import express from 'express';
200
+ import { piiMiddleware } from 'openred/middleware/express';
201
+
202
+ const app = express();
203
+ app.use(express.json());
204
+
205
+ // Redact all strings in req.body
206
+ app.use(piiMiddleware({ strategy: 'placeholder' }));
207
+
208
+ // Or target specific fields
209
+ app.use(piiMiddleware({
210
+ fields: ['body.email', 'body.user.name'],
211
+ passVault: true, // attaches vault to req.openredVault
212
+ }));
213
+ ```
214
+
215
+ ### Fastify
216
+
217
+ ```ts
218
+ import Fastify from 'fastify';
219
+ import { piiFastifyHook } from 'openred/middleware/fastify';
220
+
221
+ const app = Fastify();
222
+ app.addHook('preHandler', piiFastifyHook({ strategy: 'category' }));
223
+ ```
224
+
225
+ ## Locale Support
226
+
227
+ Built-in locale packs for region-specific PII patterns:
228
+
229
+ ```ts
230
+ import { RedactionPipeline, enGB, deDE } from 'openred';
231
+
232
+ // UK: detects UK phone (+44), National Insurance Numbers, UK postcodes
233
+ const uk = new RedactionPipeline({ detectors: enGB });
234
+
235
+ // German: detects German phone (+49), Steuer-ID, PLZ (postal codes)
236
+ const de = new RedactionPipeline({ detectors: deDE });
237
+ ```
238
+
239
+ ## Custom Detectors
240
+
241
+ Add your own PII detectors with the plugin interface:
242
+
243
+ ```ts
244
+ import { RedactionPipeline } from 'openred';
245
+ import type { PIIDetector } from 'openred';
246
+
247
+ const EmployeeIDDetector: PIIDetector = {
248
+ name: 'employee-id',
249
+ type: 'EMPLOYEE_ID', // extensible — any string works
250
+ confidence: 'high',
251
+ detect(text) {
252
+ const matches = [];
253
+ const re = /EMP-\d{6}/g;
254
+ let m;
255
+ while ((m = re.exec(text)) !== null) {
256
+ matches.push({
257
+ type: 'EMPLOYEE_ID' as const,
258
+ value: m[0],
259
+ start: m.index,
260
+ end: m.index + m[0].length,
261
+ confidence: 0.95,
262
+ detector: 'employee-id',
263
+ });
264
+ }
265
+ return matches;
266
+ },
267
+ };
268
+
269
+ const pipeline = new RedactionPipeline();
270
+ pipeline.addDetector(EmployeeIDDetector);
271
+
272
+ pipeline.redact('Contact EMP-123456 for details');
273
+ // → { text: 'Contact [EMPLOYEE_ID_1] for details', ... }
274
+ ```
275
+
276
+ ## Limitations
277
+
278
+ This package uses regex-based detection. It handles structured PII patterns well but cannot detect:
279
+
280
+ - **Names** — "Jordan" could be a name or a country
281
+ - **Context-dependent PII** — "My account number is 7483921" vs "there are 7483921 users"
282
+ - **Implicit PII** — "The CEO of Tesla" uniquely identifies a person
283
+ - **Non-English formats** — Limited to en-US, en-GB, and de-DE locales currently
284
+
285
+ For ML-powered contextual detection with higher accuracy, see the hosted API (coming soon).
286
+
287
+ ## License
288
+
289
+ MIT