redact-ai-stream 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Godfrey Lebo
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md CHANGED
@@ -5,11 +5,16 @@
5
5
  ![Downloads](https://img.shields.io/npm/dm/redact-ai-stream)
6
6
  ![TypeScript](https://img.shields.io/badge/types-included-blue)
7
7
 
8
+ ![Redact AI Stream Hero](assets/redact_ai_stream_hero_1766409977618.png)
9
+
8
10
  **Bi-directional PII Redaction for AI Streams**
9
11
 
10
12
  `redact-ai-stream` is a lightweight, specialized Node.js library designed to secure your AI applications. It acts as a middleware layer, automatically redacting Personally Identifiable Information (PII) from data streams *before* they exit your secure boundary (e.g., to OpenAI, Anthropic), and transparently restoring that data in the incoming response stream.
11
13
 
14
+ ![How It Works](assets/redact_ai_stream_how_it_works_1766409909844.png)
15
+
12
16
  ## Why use this?
17
+
13
18
  When building RAG requests or chat interfaces, you often need to send user context to an LLM. However, sending raw email addresses, phone numbers, or credit card details violates privacy compliance (GDPR, CCPA) and security best practices. `redact-ai-stream` solves this by tokenizing sensitive data on the fly.
14
19
 
15
20
  ## Features
package/dist/index.d.ts CHANGED
@@ -2,13 +2,21 @@ import { Transform } from 'stream';
2
2
  declare class RedactionSession {
3
3
  tokenMap: Map<string, string>;
4
4
  constructor();
5
+ private redactBlock;
5
6
  /**
6
- * returns a Transform stream that accepts strings/buffers,
7
- * identifies PII, replaces it with tokens, and stores the mapping.
7
+ * Returns a Transform stream that accepts strings/buffers, identifies PII,
8
+ * replaces it with reversible tokens, and stores the original mapping.
9
+ *
10
+ * Chunk-boundary safety: a hold-back buffer of up to MAX_PATTERN_LENGTH
11
+ * characters is retained between chunks so that a PII pattern split across
12
+ * chunk boundaries (e.g. ["te", "st@example.com"]) is still detected.
8
13
  */
9
14
  redact(): Transform;
10
15
  /**
11
- * returns a Transform stream that restores original PII from tokens.
16
+ * Returns a Transform stream that restores original PII from tokens.
17
+ *
18
+ * Tokens are fixed-shape (`<TYPE_<uuidv4>>`) so cross-chunk safety only
19
+ * requires holding back the last ~64 characters between chunks.
12
20
  */
13
21
  restore(): Transform;
14
22
  }
package/dist/index.js CHANGED
@@ -6,13 +6,53 @@ const PATTERNS = {
6
6
  PHONE: /\b\+?(\d{1,4}?[-. ]?)?(\(?\d{3}\)?[-. ]?)?\d{3}[-. ]?\d{4}\b/g,
7
7
  CREDIT_CARD: /\b(?:\d[ -]*?){13,19}\b/g
8
8
  };
9
+ // Maximum length any in-flight PII pattern could plausibly reach. Used as the
10
+ // hold-back window when chunks arrive mid-pattern so we never finalise a chunk
11
+ // while a PII match is still being assembled.
12
+ const MAX_PATTERN_LENGTH = 128;
9
13
  class RedactionSession {
10
14
  constructor() {
11
15
  this.tokenMap = new Map();
12
16
  }
17
+ redactBlock(text) {
18
+ // Park already-emitted tokens behind NUL sentinels while we run the
19
+ // remaining PII patterns, then splice them back. NUL cannot appear in
20
+ // normal text and breaks the \b / \d / \w boundaries our subsequent
21
+ // phone / credit-card regexes rely on, so parked tokens are inert.
22
+ const PARKED = [];
23
+ const park = (token) => {
24
+ const i = PARKED.push(token) - 1;
25
+ return `\x00${i}\x00`;
26
+ };
27
+ let out = text;
28
+ out = out.replace(PATTERNS.EMAIL, (match) => {
29
+ const token = `<EMAIL_${(0, uuid_1.v4)()}>`;
30
+ this.tokenMap.set(token, match);
31
+ return park(token);
32
+ });
33
+ out = out.replace(PATTERNS.CREDIT_CARD, (match) => {
34
+ if (match.replace(/\D/g, '').length < 13)
35
+ return match;
36
+ const token = `<CC_${(0, uuid_1.v4)()}>`;
37
+ this.tokenMap.set(token, match);
38
+ return park(token);
39
+ });
40
+ out = out.replace(PATTERNS.PHONE, (match) => {
41
+ if (match.replace(/\D/g, '').length < 10)
42
+ return match;
43
+ const token = `<PHONE_${(0, uuid_1.v4)()}>`;
44
+ this.tokenMap.set(token, match);
45
+ return park(token);
46
+ });
47
+ return out.replace(/\x00(\d+)\x00/g, (_, idx) => PARKED[Number(idx)]);
48
+ }
13
49
  /**
14
- * returns a Transform stream that accepts strings/buffers,
15
- * identifies PII, replaces it with tokens, and stores the mapping.
50
+ * Returns a Transform stream that accepts strings/buffers, identifies PII,
51
+ * replaces it with reversible tokens, and stores the original mapping.
52
+ *
53
+ * Chunk-boundary safety: a hold-back buffer of up to MAX_PATTERN_LENGTH
54
+ * characters is retained between chunks so that a PII pattern split across
55
+ * chunk boundaries (e.g. ["te", "st@example.com"]) is still detected.
16
56
  */
17
57
  redact() {
18
58
  const session = this;
@@ -20,58 +60,67 @@ class RedactionSession {
20
60
  return new stream_1.Transform({
21
61
  objectMode: true,
22
62
  transform(chunk, encoding, callback) {
23
- let text = buffer + chunk.toString();
24
- // Redact Email
25
- text = text.replace(PATTERNS.EMAIL, (match) => {
26
- const token = `<EMAIL_${(0, uuid_1.v4)()}>`;
27
- session.tokenMap.set(token, match);
28
- return token;
29
- });
30
- // Redact Credit Card
31
- text = text.replace(PATTERNS.CREDIT_CARD, (match) => {
32
- if (match.replace(/\D/g, '').length < 13)
33
- return match;
34
- const token = `<CC_${(0, uuid_1.v4)()}>`;
35
- session.tokenMap.set(token, match);
36
- return token;
37
- });
38
- // Redact Phone
39
- text = text.replace(PATTERNS.PHONE, (match) => {
40
- if (match.replace(/\D/g, '').length < 10)
41
- return match;
42
- const token = `<PHONE_${(0, uuid_1.v4)()}>`;
43
- session.tokenMap.set(token, match);
44
- return token;
45
- });
46
- this.push(text);
47
- buffer = '';
63
+ const text = buffer + chunk.toString();
64
+ // Decide how much of `text` is safe to emit now: only the prefix
65
+ // that cannot be extended by the next chunk into a PII match.
66
+ // Hold the tail (up to MAX_PATTERN_LENGTH chars, snapped to the
67
+ // last whitespace boundary when possible) in the buffer.
68
+ let cut = text.length - MAX_PATTERN_LENGTH;
69
+ if (cut < 0)
70
+ cut = 0;
71
+ if (cut > 0) {
72
+ const ws = text.lastIndexOf(' ', cut);
73
+ if (ws > 0)
74
+ cut = ws + 1;
75
+ }
76
+ const head = text.slice(0, cut);
77
+ const tail = text.slice(cut);
78
+ if (head.length > 0) {
79
+ this.push(session.redactBlock(head));
80
+ }
81
+ buffer = tail;
48
82
  callback();
49
83
  },
50
84
  flush(callback) {
51
- if (buffer) {
52
- this.push(buffer);
85
+ if (buffer.length > 0) {
86
+ this.push(session.redactBlock(buffer));
87
+ buffer = '';
53
88
  }
54
89
  callback();
55
90
  }
56
91
  });
57
92
  }
58
93
  /**
59
- * returns a Transform stream that restores original PII from tokens.
94
+ * Returns a Transform stream that restores original PII from tokens.
95
+ *
96
+ * Tokens are fixed-shape (`<TYPE_<uuidv4>>`) so cross-chunk safety only
97
+ * requires holding back the last ~64 characters between chunks.
60
98
  */
61
99
  restore() {
62
100
  const session = this;
101
+ let buffer = '';
102
+ const TOKEN_MAX = 64;
103
+ const tokenPattern = /<(EMAIL|CC|PHONE)_[0-9a-fA-F-]{36}>/g;
104
+ const sub = (s) => s.replace(tokenPattern, (token) => session.tokenMap.get(token) ?? token);
63
105
  return new stream_1.Transform({
64
106
  objectMode: true,
65
107
  transform(chunk, encoding, callback) {
66
- let text = chunk.toString();
67
- const tokenPattern = /<(EMAIL|CC|PHONE)_[0-9a-fA-F-]{36}>/g;
68
- text = text.replace(tokenPattern, (token) => {
69
- if (session.tokenMap.has(token)) {
70
- return session.tokenMap.get(token);
71
- }
72
- return token;
73
- });
74
- this.push(text);
108
+ const text = buffer + chunk.toString();
109
+ let cut = text.length - TOKEN_MAX;
110
+ if (cut < 0)
111
+ cut = 0;
112
+ const head = text.slice(0, cut);
113
+ const tail = text.slice(cut);
114
+ if (head.length > 0)
115
+ this.push(sub(head));
116
+ buffer = tail;
117
+ callback();
118
+ },
119
+ flush(callback) {
120
+ if (buffer.length > 0) {
121
+ this.push(sub(buffer));
122
+ buffer = '';
123
+ }
75
124
  callback();
76
125
  }
77
126
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "redact-ai-stream",
3
- "version": "1.1.0",
3
+ "version": "1.3.0",
4
4
  "description": "Bi-directional PII redaction stream for AI applications",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -27,6 +27,9 @@
27
27
  "type": "git",
28
28
  "url": "git+https://github.com/emorilebo/redact-ai-stream.git"
29
29
  },
30
+ "engines": {
31
+ "node": ">=18.0.0"
32
+ },
30
33
  "dependencies": {
31
34
  "uuid": "^9.0.0"
32
35
  },