redact-ai-stream 1.1.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +5 -0
- package/dist/index.d.ts +11 -3
- package/dist/index.js +88 -39
- package/package.json +4 -1
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Godfrey Lebo
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
CHANGED
|
@@ -5,11 +5,16 @@
|
|
|
5
5
|

|
|
6
6
|

|
|
7
7
|
|
|
8
|
+

|
|
9
|
+
|
|
8
10
|
**Bi-directional PII Redaction for AI Streams**
|
|
9
11
|
|
|
10
12
|
`redact-ai-stream` is a lightweight, specialized Node.js library designed to secure your AI applications. It acts as a middleware layer, automatically redacting Personally Identifiable Information (PII) from data streams *before* they exit your secure boundary (e.g., to OpenAI, Anthropic), and transparently restoring that data in the incoming response stream.
|
|
11
13
|
|
|
14
|
+

|
|
15
|
+
|
|
12
16
|
## Why use this?
|
|
17
|
+
|
|
13
18
|
When building RAG requests or chat interfaces, you often need to send user context to an LLM. However, sending raw email addresses, phone numbers, or credit card details violates privacy compliance (GDPR, CCPA) and security best practices. `redact-ai-stream` solves this by tokenizing sensitive data on the fly.
|
|
14
19
|
|
|
15
20
|
## Features
|
package/dist/index.d.ts
CHANGED
|
@@ -2,13 +2,21 @@ import { Transform } from 'stream';
|
|
|
2
2
|
declare class RedactionSession {
|
|
3
3
|
tokenMap: Map<string, string>;
|
|
4
4
|
constructor();
|
|
5
|
+
private redactBlock;
|
|
5
6
|
/**
|
|
6
|
-
*
|
|
7
|
-
*
|
|
7
|
+
* Returns a Transform stream that accepts strings/buffers, identifies PII,
|
|
8
|
+
* replaces it with reversible tokens, and stores the original mapping.
|
|
9
|
+
*
|
|
10
|
+
* Chunk-boundary safety: a hold-back buffer of up to MAX_PATTERN_LENGTH
|
|
11
|
+
* characters is retained between chunks so that a PII pattern split across
|
|
12
|
+
* chunk boundaries (e.g. ["te", "st@example.com"]) is still detected.
|
|
8
13
|
*/
|
|
9
14
|
redact(): Transform;
|
|
10
15
|
/**
|
|
11
|
-
*
|
|
16
|
+
* Returns a Transform stream that restores original PII from tokens.
|
|
17
|
+
*
|
|
18
|
+
* Tokens are fixed-shape (`<TYPE_<uuidv4>>`) so cross-chunk safety only
|
|
19
|
+
* requires holding back the last ~64 characters between chunks.
|
|
12
20
|
*/
|
|
13
21
|
restore(): Transform;
|
|
14
22
|
}
|
package/dist/index.js
CHANGED
|
@@ -6,13 +6,53 @@ const PATTERNS = {
|
|
|
6
6
|
PHONE: /\b\+?(\d{1,4}?[-. ]?)?(\(?\d{3}\)?[-. ]?)?\d{3}[-. ]?\d{4}\b/g,
|
|
7
7
|
CREDIT_CARD: /\b(?:\d[ -]*?){13,19}\b/g
|
|
8
8
|
};
|
|
9
|
+
// Maximum length any in-flight PII pattern could plausibly reach. Used as the
|
|
10
|
+
// hold-back window when chunks arrive mid-pattern so we never finalise a chunk
|
|
11
|
+
// while a PII match is still being assembled.
|
|
12
|
+
const MAX_PATTERN_LENGTH = 128;
|
|
9
13
|
class RedactionSession {
|
|
10
14
|
constructor() {
|
|
11
15
|
this.tokenMap = new Map();
|
|
12
16
|
}
|
|
17
|
+
redactBlock(text) {
|
|
18
|
+
// Park already-emitted tokens behind NUL sentinels while we run the
|
|
19
|
+
// remaining PII patterns, then splice them back. NUL cannot appear in
|
|
20
|
+
// normal text and breaks the \b / \d / \w boundaries our subsequent
|
|
21
|
+
// phone / credit-card regexes rely on, so parked tokens are inert.
|
|
22
|
+
const PARKED = [];
|
|
23
|
+
const park = (token) => {
|
|
24
|
+
const i = PARKED.push(token) - 1;
|
|
25
|
+
return `\x00${i}\x00`;
|
|
26
|
+
};
|
|
27
|
+
let out = text;
|
|
28
|
+
out = out.replace(PATTERNS.EMAIL, (match) => {
|
|
29
|
+
const token = `<EMAIL_${(0, uuid_1.v4)()}>`;
|
|
30
|
+
this.tokenMap.set(token, match);
|
|
31
|
+
return park(token);
|
|
32
|
+
});
|
|
33
|
+
out = out.replace(PATTERNS.CREDIT_CARD, (match) => {
|
|
34
|
+
if (match.replace(/\D/g, '').length < 13)
|
|
35
|
+
return match;
|
|
36
|
+
const token = `<CC_${(0, uuid_1.v4)()}>`;
|
|
37
|
+
this.tokenMap.set(token, match);
|
|
38
|
+
return park(token);
|
|
39
|
+
});
|
|
40
|
+
out = out.replace(PATTERNS.PHONE, (match) => {
|
|
41
|
+
if (match.replace(/\D/g, '').length < 10)
|
|
42
|
+
return match;
|
|
43
|
+
const token = `<PHONE_${(0, uuid_1.v4)()}>`;
|
|
44
|
+
this.tokenMap.set(token, match);
|
|
45
|
+
return park(token);
|
|
46
|
+
});
|
|
47
|
+
return out.replace(/\x00(\d+)\x00/g, (_, idx) => PARKED[Number(idx)]);
|
|
48
|
+
}
|
|
13
49
|
/**
|
|
14
|
-
*
|
|
15
|
-
*
|
|
50
|
+
* Returns a Transform stream that accepts strings/buffers, identifies PII,
|
|
51
|
+
* replaces it with reversible tokens, and stores the original mapping.
|
|
52
|
+
*
|
|
53
|
+
* Chunk-boundary safety: a hold-back buffer of up to MAX_PATTERN_LENGTH
|
|
54
|
+
* characters is retained between chunks so that a PII pattern split across
|
|
55
|
+
* chunk boundaries (e.g. ["te", "st@example.com"]) is still detected.
|
|
16
56
|
*/
|
|
17
57
|
redact() {
|
|
18
58
|
const session = this;
|
|
@@ -20,58 +60,67 @@ class RedactionSession {
|
|
|
20
60
|
return new stream_1.Transform({
|
|
21
61
|
objectMode: true,
|
|
22
62
|
transform(chunk, encoding, callback) {
|
|
23
|
-
|
|
24
|
-
//
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
const token = `<PHONE_${(0, uuid_1.v4)()}>`;
|
|
43
|
-
session.tokenMap.set(token, match);
|
|
44
|
-
return token;
|
|
45
|
-
});
|
|
46
|
-
this.push(text);
|
|
47
|
-
buffer = '';
|
|
63
|
+
const text = buffer + chunk.toString();
|
|
64
|
+
// Decide how much of `text` is safe to emit now: only the prefix
|
|
65
|
+
// that cannot be extended by the next chunk into a PII match.
|
|
66
|
+
// Hold the tail (up to MAX_PATTERN_LENGTH chars, snapped to the
|
|
67
|
+
// last whitespace boundary when possible) in the buffer.
|
|
68
|
+
let cut = text.length - MAX_PATTERN_LENGTH;
|
|
69
|
+
if (cut < 0)
|
|
70
|
+
cut = 0;
|
|
71
|
+
if (cut > 0) {
|
|
72
|
+
const ws = text.lastIndexOf(' ', cut);
|
|
73
|
+
if (ws > 0)
|
|
74
|
+
cut = ws + 1;
|
|
75
|
+
}
|
|
76
|
+
const head = text.slice(0, cut);
|
|
77
|
+
const tail = text.slice(cut);
|
|
78
|
+
if (head.length > 0) {
|
|
79
|
+
this.push(session.redactBlock(head));
|
|
80
|
+
}
|
|
81
|
+
buffer = tail;
|
|
48
82
|
callback();
|
|
49
83
|
},
|
|
50
84
|
flush(callback) {
|
|
51
|
-
if (buffer) {
|
|
52
|
-
this.push(buffer);
|
|
85
|
+
if (buffer.length > 0) {
|
|
86
|
+
this.push(session.redactBlock(buffer));
|
|
87
|
+
buffer = '';
|
|
53
88
|
}
|
|
54
89
|
callback();
|
|
55
90
|
}
|
|
56
91
|
});
|
|
57
92
|
}
|
|
58
93
|
/**
|
|
59
|
-
*
|
|
94
|
+
* Returns a Transform stream that restores original PII from tokens.
|
|
95
|
+
*
|
|
96
|
+
* Tokens are fixed-shape (`<TYPE_<uuidv4>>`) so cross-chunk safety only
|
|
97
|
+
* requires holding back the last ~64 characters between chunks.
|
|
60
98
|
*/
|
|
61
99
|
restore() {
|
|
62
100
|
const session = this;
|
|
101
|
+
let buffer = '';
|
|
102
|
+
const TOKEN_MAX = 64;
|
|
103
|
+
const tokenPattern = /<(EMAIL|CC|PHONE)_[0-9a-fA-F-]{36}>/g;
|
|
104
|
+
const sub = (s) => s.replace(tokenPattern, (token) => session.tokenMap.get(token) ?? token);
|
|
63
105
|
return new stream_1.Transform({
|
|
64
106
|
objectMode: true,
|
|
65
107
|
transform(chunk, encoding, callback) {
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
108
|
+
const text = buffer + chunk.toString();
|
|
109
|
+
let cut = text.length - TOKEN_MAX;
|
|
110
|
+
if (cut < 0)
|
|
111
|
+
cut = 0;
|
|
112
|
+
const head = text.slice(0, cut);
|
|
113
|
+
const tail = text.slice(cut);
|
|
114
|
+
if (head.length > 0)
|
|
115
|
+
this.push(sub(head));
|
|
116
|
+
buffer = tail;
|
|
117
|
+
callback();
|
|
118
|
+
},
|
|
119
|
+
flush(callback) {
|
|
120
|
+
if (buffer.length > 0) {
|
|
121
|
+
this.push(sub(buffer));
|
|
122
|
+
buffer = '';
|
|
123
|
+
}
|
|
75
124
|
callback();
|
|
76
125
|
}
|
|
77
126
|
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "redact-ai-stream",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.3.0",
|
|
4
4
|
"description": "Bi-directional PII redaction stream for AI applications",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -27,6 +27,9 @@
|
|
|
27
27
|
"type": "git",
|
|
28
28
|
"url": "git+https://github.com/emorilebo/redact-ai-stream.git"
|
|
29
29
|
},
|
|
30
|
+
"engines": {
|
|
31
|
+
"node": ">=18.0.0"
|
|
32
|
+
},
|
|
30
33
|
"dependencies": {
|
|
31
34
|
"uuid": "^9.0.0"
|
|
32
35
|
},
|