rehydra 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/README.md +173 -873
  2. package/dist/core/anonymizer.d.ts +9 -1
  3. package/dist/core/anonymizer.d.ts.map +1 -1
  4. package/dist/core/anonymizer.js +29 -7
  5. package/dist/core/anonymizer.js.map +1 -1
  6. package/dist/index.d.ts +2 -0
  7. package/dist/index.d.ts.map +1 -1
  8. package/dist/index.js +4 -0
  9. package/dist/index.js.map +1 -1
  10. package/dist/proxy/index.d.ts +12 -0
  11. package/dist/proxy/index.d.ts.map +1 -0
  12. package/dist/proxy/index.js +11 -0
  13. package/dist/proxy/index.js.map +1 -0
  14. package/dist/proxy/providers/anthropic.d.ts +17 -0
  15. package/dist/proxy/providers/anthropic.d.ts.map +1 -0
  16. package/dist/proxy/providers/anthropic.js +117 -0
  17. package/dist/proxy/providers/anthropic.js.map +1 -0
  18. package/dist/proxy/providers/index.d.ts +19 -0
  19. package/dist/proxy/providers/index.d.ts.map +1 -0
  20. package/dist/proxy/providers/index.js +40 -0
  21. package/dist/proxy/providers/index.js.map +1 -0
  22. package/dist/proxy/providers/openai.d.ts +17 -0
  23. package/dist/proxy/providers/openai.d.ts.map +1 -0
  24. package/dist/proxy/providers/openai.js +92 -0
  25. package/dist/proxy/providers/openai.js.map +1 -0
  26. package/dist/proxy/providers/types.d.ts +29 -0
  27. package/dist/proxy/providers/types.d.ts.map +1 -0
  28. package/dist/proxy/providers/types.js +6 -0
  29. package/dist/proxy/providers/types.js.map +1 -0
  30. package/dist/proxy/proxy-server.d.ts +53 -0
  31. package/dist/proxy/proxy-server.d.ts.map +1 -0
  32. package/dist/proxy/proxy-server.js +146 -0
  33. package/dist/proxy/proxy-server.js.map +1 -0
  34. package/dist/proxy/rehydra-fetch.d.ts +35 -0
  35. package/dist/proxy/rehydra-fetch.d.ts.map +1 -0
  36. package/dist/proxy/rehydra-fetch.js +217 -0
  37. package/dist/proxy/rehydra-fetch.js.map +1 -0
  38. package/dist/proxy/rehydra-proxy.d.ts +40 -0
  39. package/dist/proxy/rehydra-proxy.d.ts.map +1 -0
  40. package/dist/proxy/rehydra-proxy.js +82 -0
  41. package/dist/proxy/rehydra-proxy.js.map +1 -0
  42. package/dist/proxy/sse-parser.d.ts +59 -0
  43. package/dist/proxy/sse-parser.d.ts.map +1 -0
  44. package/dist/proxy/sse-parser.js +112 -0
  45. package/dist/proxy/sse-parser.js.map +1 -0
  46. package/dist/proxy/types.d.ts +49 -0
  47. package/dist/proxy/types.d.ts.map +1 -0
  48. package/dist/proxy/types.js +5 -0
  49. package/dist/proxy/types.js.map +1 -0
  50. package/dist/proxy/wrap-client.d.ts +47 -0
  51. package/dist/proxy/wrap-client.d.ts.map +1 -0
  52. package/dist/proxy/wrap-client.js +70 -0
  53. package/dist/proxy/wrap-client.js.map +1 -0
  54. package/dist/storage/session.d.ts +3 -0
  55. package/dist/storage/session.d.ts.map +1 -1
  56. package/dist/storage/session.js +24 -1
  57. package/dist/storage/session.js.map +1 -1
  58. package/dist/storage/types.d.ts +16 -0
  59. package/dist/storage/types.d.ts.map +1 -1
  60. package/dist/streaming/anonymizer-stream.d.ts +63 -0
  61. package/dist/streaming/anonymizer-stream.d.ts.map +1 -0
  62. package/dist/streaming/anonymizer-stream.js +184 -0
  63. package/dist/streaming/anonymizer-stream.js.map +1 -0
  64. package/dist/streaming/index.d.ts +9 -0
  65. package/dist/streaming/index.d.ts.map +1 -0
  66. package/dist/streaming/index.js +8 -0
  67. package/dist/streaming/index.js.map +1 -0
  68. package/dist/streaming/sentence-buffer.d.ts +78 -0
  69. package/dist/streaming/sentence-buffer.d.ts.map +1 -0
  70. package/dist/streaming/sentence-buffer.js +238 -0
  71. package/dist/streaming/sentence-buffer.js.map +1 -0
  72. package/dist/streaming/stream-factory.d.ts +38 -0
  73. package/dist/streaming/stream-factory.d.ts.map +1 -0
  74. package/dist/streaming/stream-factory.js +69 -0
  75. package/dist/streaming/stream-factory.js.map +1 -0
  76. package/dist/streaming/types.d.ts +121 -0
  77. package/dist/streaming/types.d.ts.map +1 -0
  78. package/dist/streaming/types.js +5 -0
  79. package/dist/streaming/types.js.map +1 -0
  80. package/dist/types/index.d.ts +8 -2
  81. package/dist/types/index.d.ts.map +1 -1
  82. package/dist/types/index.js.map +1 -1
  83. package/package.json +19 -2
@@ -0,0 +1,184 @@
1
+ /**
2
+ * Anonymizer Stream
3
+ * Node.js Transform stream that anonymizes text chunk-by-chunk
4
+ * using sentence buffering for NER context.
5
+ */
6
+ import { Transform } from "node:stream";
7
+ import { SentenceBuffer } from "./sentence-buffer.js";
8
+ import { encryptPIIMap } from "../crypto/index.js";
9
+ /**
10
+ * Transform stream that anonymizes text passing through it.
11
+ *
12
+ * Uses a SentenceBuffer to accumulate text and flush at sentence
13
+ * boundaries, maintaining overlap for NER context.
14
+ *
15
+ * @example
16
+ * ```typescript
17
+ * const stream = await createAnonymizerStream({
18
+ * anonymizer: { ner: { mode: 'quantized' } },
19
+ * sessionId: 'chat-123',
20
+ * piiStorageProvider: storage,
21
+ * keyProvider: keyProvider,
22
+ * });
23
+ *
24
+ * createReadStream('input.txt')
25
+ * .pipe(stream)
26
+ * .pipe(createWriteStream('anonymized.txt'));
27
+ * ```
28
+ */
29
+ export class AnonymizerStream extends Transform {
30
+ sentenceBuffer;
31
+ keyProvider;
32
+ piiStorageProvider;
33
+ sessionId;
34
+ onChunkCallback;
35
+ onFinishCallback;
36
+ saveIntervalMs;
37
+ lastSaveTime = 0;
38
+ totalEntities = 0;
39
+ startTime = 0;
40
+ chunkCount = 0;
41
+ constructor(anonymizer, config, initialPiiMap) {
42
+ super({
43
+ ...config.streamOptions,
44
+ decodeStrings: true,
45
+ encoding: "utf-8",
46
+ });
47
+ this.keyProvider = config.keyProvider ?? null;
48
+ this.piiStorageProvider = config.piiStorageProvider ?? null;
49
+ this.sessionId = config.sessionId ?? null;
50
+ this.onChunkCallback = config.onChunk;
51
+ this.onFinishCallback = config.onFinish;
52
+ this.saveIntervalMs = config.saveIntervalMs ?? null;
53
+ this.sentenceBuffer = new SentenceBuffer(anonymizer, config.buffer, {
54
+ keyProvider: this.keyProvider ?? undefined,
55
+ locale: config.locale,
56
+ policy: config.policy,
57
+ initialPiiMap,
58
+ });
59
+ }
60
+ /**
61
+ * Get the cumulative PII map built across all chunks.
62
+ * Available after stream finishes.
63
+ */
64
+ getPiiMap() {
65
+ return this.sentenceBuffer.getCumulativePiiMap();
66
+ }
67
+ /**
68
+ * Get stream statistics.
69
+ */
70
+ get stats() {
71
+ return {
72
+ totalEntities: this.totalEntities,
73
+ chunksProcessed: this.chunkCount,
74
+ totalProcessingTimeMs: this.startTime > 0 ? performance.now() - this.startTime : 0,
75
+ };
76
+ }
77
+ _transform(chunk, encoding, callback) {
78
+ if (this.startTime === 0) {
79
+ this.startTime = performance.now();
80
+ }
81
+ const text = typeof chunk === "string" ? chunk : chunk.toString("utf-8");
82
+ this.processChunk(text)
83
+ .then(() => callback())
84
+ .catch((err) => callback(err));
85
+ }
86
+ _flush(callback) {
87
+ this.processFlush()
88
+ .then(() => callback())
89
+ .catch((err) => callback(err));
90
+ }
91
+ async processChunk(text) {
92
+ const chunkStart = performance.now();
93
+ const results = await this.sentenceBuffer.append(text);
94
+ for (const result of results) {
95
+ this.chunkCount++;
96
+ this.totalEntities += result.entities.length;
97
+ this.push(result.anonymizedText);
98
+ this.onChunkCallback?.({
99
+ anonymizedText: result.anonymizedText,
100
+ entities: result.entities,
101
+ totalEntities: this.totalEntities,
102
+ processingTimeMs: performance.now() - chunkStart,
103
+ });
104
+ }
105
+ // Debounced save: save PII map at most once per saveIntervalMs
106
+ if (this.saveIntervalMs !== null && results.length > 0) {
107
+ await this.maybeSaveToStorage();
108
+ }
109
+ }
110
+ async maybeSaveToStorage() {
111
+ if (this.sessionId === null || this.piiStorageProvider === null || this.keyProvider === null) {
112
+ return;
113
+ }
114
+ const now = performance.now();
115
+ if (now - this.lastSaveTime < (this.saveIntervalMs ?? Infinity)) {
116
+ return;
117
+ }
118
+ const piiMap = this.sentenceBuffer.getCumulativePiiMap();
119
+ if (piiMap.size === 0)
120
+ return;
121
+ const key = await this.keyProvider.getKey();
122
+ const encrypted = await encryptPIIMap(piiMap, key);
123
+ await this.piiStorageProvider.save(this.sessionId, encrypted, {
124
+ createdAt: Date.now(),
125
+ entityCounts: this.buildEntityCounts(),
126
+ });
127
+ this.lastSaveTime = now;
128
+ }
129
+ async processFlush() {
130
+ // Flush remaining buffer
131
+ const results = await this.sentenceBuffer.flush();
132
+ for (const result of results) {
133
+ this.chunkCount++;
134
+ this.totalEntities += result.entities.length;
135
+ this.push(result.anonymizedText);
136
+ this.onChunkCallback?.({
137
+ anonymizedText: result.anonymizedText,
138
+ entities: result.entities,
139
+ totalEntities: this.totalEntities,
140
+ processingTimeMs: performance.now() - this.startTime,
141
+ });
142
+ }
143
+ // Save PII map to storage if session is configured
144
+ if (this.sessionId !== null && this.piiStorageProvider !== null && this.keyProvider !== null) {
145
+ const piiMap = this.sentenceBuffer.getCumulativePiiMap();
146
+ if (piiMap.size > 0) {
147
+ const key = await this.keyProvider.getKey();
148
+ const encrypted = await encryptPIIMap(piiMap, key);
149
+ await this.piiStorageProvider.save(this.sessionId, encrypted, {
150
+ createdAt: Date.now(),
151
+ entityCounts: this.buildEntityCounts(),
152
+ });
153
+ }
154
+ }
155
+ // Fire finish callback
156
+ const totalTime = performance.now() - this.startTime;
157
+ let finishPiiMap;
158
+ if (this.keyProvider) {
159
+ const piiMap = this.sentenceBuffer.getCumulativePiiMap();
160
+ if (piiMap.size > 0) {
161
+ const key = await this.keyProvider.getKey();
162
+ finishPiiMap = await encryptPIIMap(piiMap, key);
163
+ }
164
+ }
165
+ this.onFinishCallback?.({
166
+ totalEntities: this.totalEntities,
167
+ piiMap: finishPiiMap,
168
+ totalProcessingTimeMs: totalTime,
169
+ });
170
+ }
171
+ buildEntityCounts() {
172
+ const counts = {};
173
+ for (const key of this.sentenceBuffer.getCumulativePiiMap().keys()) {
174
+ // Keys are in format "TYPE_ID" e.g. "EMAIL_1", "PERSON_2"
175
+ const underscoreIdx = key.lastIndexOf("_");
176
+ if (underscoreIdx !== -1) {
177
+ const type = key.slice(0, underscoreIdx);
178
+ counts[type] = (counts[type] ?? 0) + 1;
179
+ }
180
+ }
181
+ return counts;
182
+ }
183
+ }
184
+ //# sourceMappingURL=anonymizer-stream.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"anonymizer-stream.js","sourceRoot":"","sources":["../../src/streaming/anonymizer-stream.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,SAAS,EAA0B,MAAM,aAAa,CAAC;AAEhE,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAItD,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAGnD;;;;;;;;;;;;;;;;;;;GAmBG;AACH,MAAM,OAAO,gBAAiB,SAAQ,SAAS;IAC5B,cAAc,CAAiB;IAC/B,WAAW,CAAqB;IAChC,kBAAkB,CAA4B;IAC9C,SAAS,CAAgB;IACzB,eAAe,CAAqC;IACpD,gBAAgB,CAAsC;IAEtD,cAAc,CAAgB;IACvC,YAAY,GAAG,CAAC,CAAC;IAEjB,aAAa,GAAG,CAAC,CAAC;IAClB,SAAS,GAAG,CAAC,CAAC;IACd,UAAU,GAAG,CAAC,CAAC;IAEvB,YACE,UAAuB,EACvB,MAAoB,EACpB,aAAyB;QAEzB,KAAK,CAAC;YACJ,GAAG,MAAM,CAAC,aAAa;YACvB,aAAa,EAAE,IAAI;YACnB,QAAQ,EAAE,OAAO;SAClB,CAAC,CAAC;QAEH,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,WAAW,IAAI,IAAI,CAAC;QAC9C,IAAI,CAAC,kBAAkB,GAAG,MAAM,CAAC,kBAAkB,IAAI,IAAI,CAAC;QAC5D,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC;QAC1C,IAAI,CAAC,eAAe,GAAG,MAAM,CAAC,OAAO,CAAC;QACtC,IAAI,CAAC,gBAAgB,GAAG,MAAM,CAAC,QAAQ,CAAC;QACxC,IAAI,CAAC,cAAc,GAAG,MAAM,CAAC,cAAc,IAAI,IAAI,CAAC;QAEpD,IAAI,CAAC,cAAc,GAAG,IAAI,cAAc,CAAC,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE;YAClE,WAAW,EAAE,IAAI,CAAC,WAAW,IAAI,SAAS;YAC1C,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,aAAa;SACd,CAAC,CAAC;IACL,CAAC;IAED;;;OAGG;IACH,SAAS;QACP,OAAO,IAAI,CAAC,cAAc,CAAC,mBAAmB,EAAE,CAAC;IACnD,CAAC;IAED;;OAEG;IACH,IAAI,KAAK;QAKP,OAAO;YACL,aAAa,EAAE,IAAI,CAAC,aAAa;YACjC,eAAe,EAAE,IAAI,CAAC,UAAU;YAChC,qBAAqB,EACnB,IAAI,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;SAC9D,CAAC;IACJ,CAAC;IAEQ,UAAU,CACjB,KAAsB,EACtB,QAAwB,EACxB,QAA2B;QAE3B,IAAI,IAAI,CAAC,SAAS,KAAK,CAAC,EAAE,CAAC;YACzB,IAAI,CAAC,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QACrC,CAAC;QAED,MAAM,IAAI,GAAG,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;QAEzE,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC;aACpB,IAAI,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC;aACtB,KAAK,CAAC,CAAC,GAAU,EAAE,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC;IAC1C,CAAC;IAEQ,MAAM,CAAC,QAA2B;QACzC,IAAI,CAAC,YAAY,EAAE;aAChB,IAAI,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC;aACtB,KAAK,CAAC,CAAC,GAAU,EAAE,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC;IAC1C,CAAC;IAEO,KAAK,CAAC,YAAY,CAAC,IAAY;QACrC,MAAM,UAAU,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QACrC,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QAEvD,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,IAAI,CAAC,UAAU,EAAE,CAAC;YAClB,IAAI,CAAC,aAAa,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC;YAC7C,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC;YAEjC,IAAI,CAAC,eAAe,EAAE,CAAC;gBACrB,cAAc,EAAE,MAAM,CAAC,cAAc;gBACrC,QAAQ,EAAE,MAAM,CAAC,QAAQ;gBACzB,aAAa,EAAE,IAAI,CAAC,aAAa;gBACjC,gBAAgB,EAAE,WAAW,CAAC,GAAG,EAAE,GAAG,UAAU;aACjD,CAAC,CAAC;QACL,CAAC;QAED,+DAA+D;QAC/D,IAAI,IAAI,CAAC,cAAc,KAAK,IAAI,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvD,MAAM,IAAI,CAAC,kBAAkB,EAAE,CAAC;QAClC,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,kBAAkB;QAC9B,IAAI,IAAI,CAAC,SAAS,KAAK,IAAI,IAAI,IAAI,CAAC,kBAAkB,KAAK,IAAI,IAAI,IAAI,CAAC,WAAW,KAAK,IAAI,EAAE,CAAC;YAC7F,OAAO;QACT,CAAC;QAED,MAAM,GAAG,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QAC9B,IAAI,GAAG,GAAG,IAAI,CAAC,YAAY,GAAG,CAAC,IAAI,CAAC,cAAc,IAAI,QAAQ,CAAC,EAAE,CAAC;YAChE,OAAO;QACT,CAAC;QAED,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,CAAC,mBAAmB,EAAE,CAAC;QACzD,IAAI,MAAM,CAAC,IAAI,KAAK,CAAC;YAAE,OAAO;QAE9B,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,CAAC;QAC5C,MAAM,SAAS,GAAG,MAAM,aAAa,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QACnD,MAAM,IAAI,CAAC,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,SAAS,EAAE;YAC5D,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;YACrB,YAAY,EAAE,IAAI,CAAC,iBAAiB,EAAE;SACvC,CAAC,CAAC;QACH,IAAI,CAAC,YAAY,GAAG,GAAG,CAAC;IAC1B,CAAC;IAEO,KAAK,CAAC,YAAY;QACxB,yBAAyB;QACzB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,KAAK,EAAE,CAAC;QAElD,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,IAAI,CAAC,UAAU,EAAE,CAAC;YAClB,IAAI,CAAC,aAAa,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC;YAC7C,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC;YAEjC,IAAI,CAAC,eAAe,EAAE,CAAC;gBACrB,cAAc,EAAE,MAAM,CAAC,cAAc;gBACrC,QAAQ,EAAE,MAAM,CAAC,QAAQ;gBACzB,aAAa,EAAE,IAAI,CAAC,aAAa;gBACjC,gBAAgB,EAAE,WAAW,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,SAAS;aACrD,CAAC,CAAC;QACL,CAAC;QAED,mDAAmD;QACnD,IAAI,IAAI,CAAC,SAAS,KAAK,IAAI,IAAI,IAAI,CAAC,kBAAkB,KAAK,IAAI,IAAI,IAAI,CAAC,WAAW,KAAK,IAAI,EAAE,CAAC;YAC7F,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,CAAC,mBAAmB,EAAE,CAAC;YACzD,IAAI,MAAM,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;gBACpB,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,CAAC;gBAC5C,MAAM,SAAS,GAAG,MAAM,aAAa,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;gBACnD,MAAM,IAAI,CAAC,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,SAAS,EAAE;oBAC5D,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;oBACrB,YAAY,EAAE,IAAI,CAAC,iBAAiB,EAAE;iBACvC,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,uBAAuB;QACvB,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC;QACrD,IAAI,YAAY,CAAC;QACjB,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACrB,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,CAAC,mBAAmB,EAAE,CAAC;YACzD,IAAI,MAAM,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;gBACpB,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,CAAC;gBAC5C,YAAY,GAAG,MAAM,aAAa,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;YAClD,CAAC;QACH,CAAC;QAED,IAAI,CAAC,gBAAgB,EAAE,CAAC;YACtB,aAAa,EAAE,IAAI,CAAC,aAAa;YACjC,MAAM,EAAE,YAAY;YACpB,qBAAqB,EAAE,SAAS;SACjC,CAAC,CAAC;IACL,CAAC;IAEO,iBAAiB;QACvB,MAAM,MAAM,GAA2B,EAAE,CAAC;QAC1C,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,cAAc,CAAC,mBAAmB,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC;YACnE,0DAA0D;YAC1D,MAAM,aAAa,GAAG,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;YAC3C,IAAI,aAAa,KAAK,CAAC,CAAC,EAAE,CAAC;gBACzB,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC;gBACzC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;YACzC,CAAC;QACH,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC;CACF"}
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Streaming Anonymization Module
3
+ * Node.js/Bun only — not available in browser builds.
4
+ */
5
+ export { AnonymizerStream } from "./anonymizer-stream.js";
6
+ export { createAnonymizerStream } from "./stream-factory.js";
7
+ export { SentenceBuffer, resolveBufferConfig } from "./sentence-buffer.js";
8
+ export type { StreamConfig, SentenceBufferConfig, StreamChunkEvent, StreamFinishEvent, FlushResult, ResolvedBufferConfig, } from "./types.js";
9
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/streaming/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC1D,OAAO,EAAE,sBAAsB,EAAE,MAAM,qBAAqB,CAAC;AAC7D,OAAO,EAAE,cAAc,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AAC3E,YAAY,EACV,YAAY,EACZ,oBAAoB,EACpB,gBAAgB,EAChB,iBAAiB,EACjB,WAAW,EACX,oBAAoB,GACrB,MAAM,YAAY,CAAC"}
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Streaming Anonymization Module
3
+ * Node.js/Bun only — not available in browser builds.
4
+ */
5
+ export { AnonymizerStream } from "./anonymizer-stream.js";
6
+ export { createAnonymizerStream } from "./stream-factory.js";
7
+ export { SentenceBuffer, resolveBufferConfig } from "./sentence-buffer.js";
8
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/streaming/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC1D,OAAO,EAAE,sBAAsB,EAAE,MAAM,qBAAqB,CAAC;AAC7D,OAAO,EAAE,cAAc,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC"}
@@ -0,0 +1,78 @@
1
+ /**
2
+ * Sentence Buffer
3
+ * Accumulates incoming text and flushes complete sentences for anonymization,
4
+ * keeping an overlap region for NER context at chunk boundaries.
5
+ */
6
+ import type { AnonymizationPolicy } from "../types/index.js";
7
+ import type { RawPIIMap } from "../pipeline/tagger.js";
8
+ import type { KeyProvider } from "../crypto/index.js";
9
+ import type { IAnonymizer } from "../storage/session.js";
10
+ import type { SentenceBufferConfig, ResolvedBufferConfig, FlushResult } from "./types.js";
11
+ /**
12
+ * Resolves user config with defaults
13
+ */
14
+ export declare function resolveBufferConfig(config?: SentenceBufferConfig): ResolvedBufferConfig;
15
+ /**
16
+ * SentenceBuffer accumulates text and flushes at sentence boundaries,
17
+ * maintaining an overlap region for NER context across boundaries.
18
+ */
19
+ export declare class SentenceBuffer {
20
+ private buffer;
21
+ private overlapSuffix;
22
+ private cumulativePiiMap;
23
+ private totalEntities;
24
+ private readonly config;
25
+ private readonly anonymizer;
26
+ private readonly keyProvider;
27
+ private readonly locale?;
28
+ private readonly policy?;
29
+ constructor(anonymizer: IAnonymizer, config?: SentenceBufferConfig, options?: {
30
+ keyProvider?: KeyProvider;
31
+ locale?: string;
32
+ policy?: Partial<AnonymizationPolicy>;
33
+ initialPiiMap?: RawPIIMap;
34
+ });
35
+ /**
36
+ * Append a chunk of text. Returns flush results if sentence boundaries
37
+ * were found and text was anonymized, or an empty array if buffering.
38
+ */
39
+ append(chunk: string): Promise<FlushResult[]>;
40
+ /**
41
+ * Flush all remaining buffered text (called on stream end).
42
+ */
43
+ flush(): Promise<FlushResult[]>;
44
+ /**
45
+ * Get the cumulative PII map built across all flushes.
46
+ */
47
+ getCumulativePiiMap(): RawPIIMap;
48
+ /**
49
+ * Get total entity count across all flushes.
50
+ */
51
+ getTotalEntities(): number;
52
+ /**
53
+ * Find and flush all available sentence-delimited segments from the buffer.
54
+ */
55
+ private flushAvailable;
56
+ /**
57
+ * Find the last sentence boundary position in the buffer.
58
+ * Returns the index after the boundary (split point), or -1 if none found.
59
+ */
60
+ private findLastBoundary;
61
+ /**
62
+ * Process a segment of text: prepend overlap, anonymize, extract new output.
63
+ */
64
+ private processSegment;
65
+ /**
66
+ * Extract the new (non-overlap) portion from the anonymized text.
67
+ *
68
+ * The anonymized text covers `overlapSuffix + segment`. We need to find
69
+ * the split point in the anonymized text that corresponds to the original
70
+ * offset where the new content begins.
71
+ *
72
+ * We walk through the original text and anonymized text simultaneously,
73
+ * advancing past PII tags in the anonymized text while tracking the
74
+ * corresponding position in the original text.
75
+ */
76
+ private extractNewOutput;
77
+ }
78
+ //# sourceMappingURL=sentence-buffer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"sentence-buffer.d.ts","sourceRoot":"","sources":["../../src/streaming/sentence-buffer.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AAC7D,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AACvD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAEtD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AACzD,OAAO,KAAK,EACV,oBAAoB,EACpB,oBAAoB,EACpB,WAAW,EACZ,MAAM,YAAY,CAAC;AAkBpB;;GAEG;AACH,wBAAgB,mBAAmB,CACjC,MAAM,CAAC,EAAE,oBAAoB,GAC5B,oBAAoB,CAYtB;AAED;;;GAGG;AACH,qBAAa,cAAc;IACzB,OAAO,CAAC,MAAM,CAAM;IACpB,OAAO,CAAC,aAAa,CAAM;IAC3B,OAAO,CAAC,gBAAgB,CAAwB;IAChD,OAAO,CAAC,aAAa,CAAK;IAE1B,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAuB;IAC9C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAc;IACzC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAqB;IACjD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,CAA+B;gBAGrD,UAAU,EAAE,WAAW,EACvB,MAAM,CAAC,EAAE,oBAAoB,EAC7B,OAAO,CAAC,EAAE;QACR,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,MAAM,CAAC,EAAE,OAAO,CAAC,mBAAmB,CAAC,CAAC;QACtC,aAAa,CAAC,EAAE,SAAS,CAAC;KAC3B;IAaH;;;OAGG;IACG,MAAM,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;IAYnD;;OAEG;IACG,KAAK,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;IAWrC;;OAEG;IACH,mBAAmB,IAAI,SAAS;IAIhC;;OAEG;IACH,gBAAgB,IAAI,MAAM;IAI1B;;OAEG;YACW,cAAc;IA8B5B;;;OAGG;IACH,OAAO,CAAC,gBAAgB;IAaxB;;OAEG;YACW,cAAc;IA2D5B;;;;;;;;;;OAUG;IACH,OAAO,CAAC,gBAAgB;CA2DzB"}
@@ -0,0 +1,238 @@
1
+ /**
2
+ * Sentence Buffer
3
+ * Accumulates incoming text and flushes complete sentences for anonymization,
4
+ * keeping an overlap region for NER context at chunk boundaries.
5
+ */
6
+ import { decryptPIIMap } from "../crypto/index.js";
7
+ const DEFAULT_SENTENCE_BOUNDARY = /[.!?]\s+|\n{2,}/;
8
+ const DEFAULT_CONFIG = {
9
+ overlapChars: 100,
10
+ maxBufferSize: 8192,
11
+ minBufferSize: 50,
12
+ lowLatency: false,
13
+ sentenceBoundary: DEFAULT_SENTENCE_BOUNDARY,
14
+ };
15
+ const LOW_LATENCY_CONFIG = {
16
+ overlapChars: 50,
17
+ maxBufferSize: 512,
18
+ minBufferSize: 20,
19
+ };
20
+ /**
21
+ * Resolves user config with defaults
22
+ */
23
+ export function resolveBufferConfig(config) {
24
+ const base = config?.lowLatency === true
25
+ ? { ...DEFAULT_CONFIG, ...LOW_LATENCY_CONFIG, lowLatency: true }
26
+ : { ...DEFAULT_CONFIG };
27
+ return {
28
+ overlapChars: config?.overlapChars ?? base.overlapChars,
29
+ maxBufferSize: config?.maxBufferSize ?? base.maxBufferSize,
30
+ minBufferSize: config?.minBufferSize ?? base.minBufferSize,
31
+ lowLatency: config?.lowLatency ?? base.lowLatency,
32
+ sentenceBoundary: config?.sentenceBoundary ?? base.sentenceBoundary,
33
+ };
34
+ }
35
+ /**
36
+ * SentenceBuffer accumulates text and flushes at sentence boundaries,
37
+ * maintaining an overlap region for NER context across boundaries.
38
+ */
39
+ export class SentenceBuffer {
40
+ buffer = "";
41
+ overlapSuffix = "";
42
+ cumulativePiiMap = new Map();
43
+ totalEntities = 0;
44
+ config;
45
+ anonymizer;
46
+ keyProvider;
47
+ locale;
48
+ policy;
49
+ constructor(anonymizer, config, options) {
50
+ this.anonymizer = anonymizer;
51
+ this.config = resolveBufferConfig(config);
52
+ this.keyProvider = options?.keyProvider ?? null;
53
+ this.locale = options?.locale;
54
+ this.policy = options?.policy;
55
+ if (options?.initialPiiMap) {
56
+ this.cumulativePiiMap = new Map(options.initialPiiMap);
57
+ }
58
+ }
59
+ /**
60
+ * Append a chunk of text. Returns flush results if sentence boundaries
61
+ * were found and text was anonymized, or an empty array if buffering.
62
+ */
63
+ async append(chunk) {
64
+ if (chunk.length === 0)
65
+ return [];
66
+ this.buffer += chunk;
67
+ if (this.buffer.length < this.config.minBufferSize) {
68
+ return [];
69
+ }
70
+ return this.flushAvailable();
71
+ }
72
+ /**
73
+ * Flush all remaining buffered text (called on stream end).
74
+ */
75
+ async flush() {
76
+ if (this.buffer.length === 0) {
77
+ return [];
78
+ }
79
+ const result = await this.processSegment(this.buffer, true);
80
+ this.buffer = "";
81
+ this.overlapSuffix = "";
82
+ return result ? [result] : [];
83
+ }
84
+ /**
85
+ * Get the cumulative PII map built across all flushes.
86
+ */
87
+ getCumulativePiiMap() {
88
+ return new Map(this.cumulativePiiMap);
89
+ }
90
+ /**
91
+ * Get total entity count across all flushes.
92
+ */
93
+ getTotalEntities() {
94
+ return this.totalEntities;
95
+ }
96
+ /**
97
+ * Find and flush all available sentence-delimited segments from the buffer.
98
+ */
99
+ async flushAvailable() {
100
+ const results = [];
101
+ while (this.buffer.length >= this.config.minBufferSize) {
102
+ const boundaryIndex = this.findLastBoundary();
103
+ if (boundaryIndex === -1) {
104
+ // No boundary found
105
+ if (this.buffer.length > this.config.maxBufferSize) {
106
+ // Force flush at maxBufferSize
107
+ const segment = this.buffer.slice(0, this.config.maxBufferSize);
108
+ this.buffer = this.buffer.slice(this.config.maxBufferSize);
109
+ const result = await this.processSegment(segment, false);
110
+ if (result)
111
+ results.push(result);
112
+ }
113
+ else {
114
+ // Wait for more data
115
+ break;
116
+ }
117
+ }
118
+ else {
119
+ // Flush up to the boundary
120
+ const segment = this.buffer.slice(0, boundaryIndex);
121
+ this.buffer = this.buffer.slice(boundaryIndex);
122
+ const result = await this.processSegment(segment, false);
123
+ if (result)
124
+ results.push(result);
125
+ }
126
+ }
127
+ return results;
128
+ }
129
+ /**
130
+ * Find the last sentence boundary position in the buffer.
131
+ * Returns the index after the boundary (split point), or -1 if none found.
132
+ */
133
+ findLastBoundary() {
134
+ const regex = new RegExp(this.config.sentenceBoundary.source, "g");
135
+ let lastIndex = -1;
136
+ let match;
137
+ while ((match = regex.exec(this.buffer)) !== null) {
138
+ // Split point is after the boundary match
139
+ lastIndex = match.index + match[0].length;
140
+ }
141
+ return lastIndex;
142
+ }
143
+ /**
144
+ * Process a segment of text: prepend overlap, anonymize, extract new output.
145
+ */
146
+ async processSegment(segment, isFinal) {
147
+ if (segment.length === 0)
148
+ return null;
149
+ // Build context text with overlap for NER boundary detection
150
+ const contextText = this.overlapSuffix + segment;
151
+ const overlapLength = this.overlapSuffix.length;
152
+ // Anonymize with cumulative PII map for ID consistency
153
+ const result = await this.anonymizer.anonymize(contextText, this.locale, this.policy, this.cumulativePiiMap);
154
+ // Decrypt the PII map from the result to merge into cumulative map
155
+ if (result.piiMap && this.keyProvider) {
156
+ const key = await this.keyProvider.getKey();
157
+ const decrypted = await decryptPIIMap(result.piiMap, key);
158
+ for (const [k, v] of decrypted) {
159
+ this.cumulativePiiMap.set(k, v);
160
+ }
161
+ }
162
+ // Extract only the new (non-overlap) portion of the anonymized text
163
+ const newOutput = this.extractNewOutput(contextText, result.anonymizedText, result.entities, overlapLength);
164
+ // Update overlap suffix for next segment
165
+ if (!isFinal) {
166
+ this.overlapSuffix = segment.slice(Math.max(0, segment.length - this.config.overlapChars));
167
+ }
168
+ // Filter entities to only those in the new portion
169
+ const newEntities = result.entities.filter((e) => e.start >= overlapLength);
170
+ // Adjust entity offsets to be relative to the segment (not contextText)
171
+ const adjustedEntities = newEntities.map((e) => ({
172
+ ...e,
173
+ start: e.start - overlapLength,
174
+ end: e.end - overlapLength,
175
+ }));
176
+ this.totalEntities += adjustedEntities.length;
177
+ return {
178
+ anonymizedText: newOutput,
179
+ entities: adjustedEntities,
180
+ };
181
+ }
182
+ /**
183
+ * Extract the new (non-overlap) portion from the anonymized text.
184
+ *
185
+ * The anonymized text covers `overlapSuffix + segment`. We need to find
186
+ * the split point in the anonymized text that corresponds to the original
187
+ * offset where the new content begins.
188
+ *
189
+ * We walk through the original text and anonymized text simultaneously,
190
+ * advancing past PII tags in the anonymized text while tracking the
191
+ * corresponding position in the original text.
192
+ */
193
+ extractNewOutput(originalContext, anonymizedText, entities, overlapLength) {
194
+ if (overlapLength === 0) {
195
+ return anonymizedText;
196
+ }
197
+ // Build a map of original positions to anonymized positions.
198
+ // Entities replace spans in the original text with PII tags of different lengths.
199
+ // We need to find where overlapLength in original space maps to in anonymized space.
200
+ // Sort entities by start position
201
+ const sortedEntities = [...entities].sort((a, b) => a.start - b.start);
202
+ // Walk through, tracking offset shift caused by tag replacements
203
+ let originalPos = 0;
204
+ let anonymizedPos = 0;
205
+ for (const entity of sortedEntities) {
206
+ if (entity.start >= overlapLength) {
207
+ // This entity is fully in the new portion — stop here
208
+ break;
209
+ }
210
+ // Advance to this entity's start
211
+ const gap = entity.start - originalPos;
212
+ anonymizedPos += gap;
213
+ originalPos = entity.start;
214
+ // The entity's original span length
215
+ const originalSpanLength = entity.end - entity.start;
216
+ // Find the tag in the anonymized text at this position
217
+ // Tags look like: <PII type="TYPE" id="N"/>
218
+ const tagStart = anonymizedPos;
219
+ const tagEnd = anonymizedText.indexOf("/>", tagStart);
220
+ const tagLength = tagEnd !== -1 ? tagEnd + 2 - tagStart : originalSpanLength;
221
+ if (entity.end <= overlapLength) {
222
+ // Entity is fully within the overlap — skip entirely
223
+ originalPos = entity.end;
224
+ anonymizedPos = tagStart + tagLength;
225
+ }
226
+ else {
227
+ // Entity spans the boundary — it belongs to overlap (already emitted)
228
+ originalPos = entity.end;
229
+ anonymizedPos = tagStart + tagLength;
230
+ }
231
+ }
232
+ // Advance remaining gap to the overlap boundary
233
+ const remainingGap = overlapLength - originalPos;
234
+ anonymizedPos += remainingGap;
235
+ return anonymizedText.slice(anonymizedPos);
236
+ }
237
+ }
238
+ //# sourceMappingURL=sentence-buffer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"sentence-buffer.js","sourceRoot":"","sources":["../../src/streaming/sentence-buffer.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAKH,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAQnD,MAAM,yBAAyB,GAAG,iBAAiB,CAAC;AAEpD,MAAM,cAAc,GAAyB;IAC3C,YAAY,EAAE,GAAG;IACjB,aAAa,EAAE,IAAI;IACnB,aAAa,EAAE,EAAE;IACjB,UAAU,EAAE,KAAK;IACjB,gBAAgB,EAAE,yBAAyB;CAC5C,CAAC;AAEF,MAAM,kBAAkB,GAAkC;IACxD,YAAY,EAAE,EAAE;IAChB,aAAa,EAAE,GAAG;IAClB,aAAa,EAAE,EAAE;CAClB,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,mBAAmB,CACjC,MAA6B;IAE7B,MAAM,IAAI,GAAG,MAAM,EAAE,UAAU,KAAK,IAAI;QACtC,CAAC,CAAC,EAAE,GAAG,cAAc,EAAE,GAAG,kBAAkB,EAAE,UAAU,EAAE,IAAI,EAAE;QAChE,CAAC,CAAC,EAAE,GAAG,cAAc,EAAE,CAAC;IAE1B,OAAO;QACL,YAAY,EAAE,MAAM,EAAE,YAAY,IAAI,IAAI,CAAC,YAAY;QACvD,aAAa,EAAE,MAAM,EAAE,aAAa,IAAI,IAAI,CAAC,aAAa;QAC1D,aAAa,EAAE,MAAM,EAAE,aAAa,IAAI,IAAI,CAAC,aAAa;QAC1D,UAAU,EAAE,MAAM,EAAE,UAAU,IAAI,IAAI,CAAC,UAAU;QACjD,gBAAgB,EAAE,MAAM,EAAE,gBAAgB,IAAI,IAAI,CAAC,gBAAgB;KACpE,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,MAAM,OAAO,cAAc;IACjB,MAAM,GAAG,EAAE,CAAC;IACZ,aAAa,GAAG,EAAE,CAAC;IACnB,gBAAgB,GAAc,IAAI,GAAG,EAAE,CAAC;IACxC,aAAa,GAAG,CAAC,CAAC;IAET,MAAM,CAAuB;IAC7B,UAAU,CAAc;IACxB,WAAW,CAAqB;IAChC,MAAM,CAAU;IAChB,MAAM,CAAgC;IAEvD,YACE,UAAuB,EACvB,MAA6B,EAC7B,OAKC;QAED,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;QAC7B,IAAI,CAAC,MAAM,GAAG,mBAAmB,CAAC,MAAM,CAAC,CAAC;QAC1C,IAAI,CAAC,WAAW,GAAG,OAAO,EAAE,WAAW,IAAI,IAAI,CAAC;QAChD,IAAI,CAAC,MAAM,GAAG,OAAO,EAAE,MAAM,CAAC;QAC9B,IAAI,CAAC,MAAM,GAAG,OAAO,EAAE,MAAM,CAAC;QAE9B,IAAI,OAAO,EAAE,aAAa,EAAE,CAAC;YAC3B,IAAI,CAAC,gBAAgB,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;QACzD,CAAC;IACH,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,MAAM,CAAC,KAAa;QACxB,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAElC,IAAI,CAAC,MAAM,IAAI,KAAK,CAAC;QAErB,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,aAAa,EAAE,CAAC;YACnD,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,OAAO,IAAI,CAAC,cAAc,EAAE,CAAC;IAC/B,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,KAAK;QACT,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC7B,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;QAC5D,IAAI,CAAC,MAAM,GAAG,EAAE,CAAC;QACjB,IAAI,CAAC,aAAa,GAAG,EAAE,CAAC;QACxB,OAAO,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAChC,CAAC;IAED;;OAEG;IACH,mBAAmB;QACjB,OAAO,IAAI,GAAG,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;IACxC,CAAC;IAED;;OAEG;IACH,gBAAgB;QACd,OAAO,IAAI,CAAC,aAAa,CAAC;IAC5B,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,cAAc;QAC1B,MAAM,OAAO,GAAkB,EAAE,CAAC;QAElC,OAAO,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,CAAC,aAAa,EAAE,CAAC;YACvD,MAAM,aAAa,GAAG,IAAI,CAAC,gBAAgB,EAAE,CAAC;YAE9C,IAAI,aAAa,KAAK,CAAC,CAAC,EAAE,CAAC;gBACzB,oBAAoB;gBACpB,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,aAAa,EAAE,CAAC;oBACnD,+BAA+B;oBAC/B,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC;oBAChE,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC;oBAC3D,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;oBACzD,IAAI,MAAM;wBAAE,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBACnC,CAAC;qBAAM,CAAC;oBACN,qBAAqB;oBACrB,MAAM;gBACR,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,2BAA2B;gBAC3B,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC;gBACpD,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;gBAC/C,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;gBACzD,IAAI,MAAM;oBAAE,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACnC,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;;OAGG;IACK,gBAAgB;QACtB,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,gBAAgB,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QACnE,IAAI,SAAS,GAAG,CAAC,CAAC,CAAC;QAEnB,IAAI,KAA6B,CAAC;QAClC,OAAO,CAAC,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YAClD,0CAA0C;YAC1C,SAAS,GAAG,KAAK,CAAC,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;QAC5C,CAAC;QAED,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,cAAc,CAC1B,OAAe,EACf,OAAgB;QAEhB,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,IAAI,CAAC;QAEtC,6DAA6D;QAC7D,MAAM,WAAW,GAAG,IAAI,CAAC,aAAa,GAAG,OAAO,CAAC;QACjD,MAAM,aAAa,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC;QAEhD,uDAAuD;QACvD,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,SAAS,CAC5C,WAAW,EACX,IAAI,CAAC,MAAM,EACX,IAAI,CAAC,MAAM,EACX,IAAI,CAAC,gBAAgB,CACtB,CAAC;QAEF,mEAAmE;QACnE,IAAI,MAAM,CAAC,MAAM,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACtC,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,CAAC;YAC5C,MAAM,SAAS,GAAG,MAAM,aAAa,CAAC,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;YAC1D,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,SAAS,EAAE,CAAC;gBAC/B,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;YAClC,CAAC;QACH,CAAC;QAED,oEAAoE;QACpE,MAAM,SAAS,GAAG,IAAI,CAAC,gBAAgB,CACrC,WAAW,EACX,MAAM,CAAC,cAAc,EACrB,MAAM,CAAC,QAAQ,EACf,aAAa,CACd,CAAC;QAEF,yCAAyC;QACzC,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,IAAI,CAAC,aAAa,GAAG,OAAO,CAAC,KAAK,CAChC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,CACvD,CAAC;QACJ,CAAC;QAED,mDAAmD;QACnD,MAAM,WAAW,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,IAAI,aAAa,CAAC,CAAC;QAC5E,wEAAwE;QACxE,MAAM,gBAAgB,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAC/C,GAAG,CAAC;YACJ,KAAK,EAAE,CAAC,CAAC,KAAK,GAAG,aAAa;YAC9B,GAAG,EAAE,CAAC,CAAC,GAAG,GAAG,aAAa;SAC3B,CAAC,CAAC,CAAC;QAEJ,IAAI,CAAC,aAAa,IAAI,gBAAgB,CAAC,MAAM,CAAC;QAE9C,OAAO;YACL,cAAc,EAAE,SAAS;YACzB,QAAQ,EAAE,gBAAgB;SAC3B,CAAC;IACJ,CAAC;IAED;;;;;;;;;;OAUG;IACK,gBAAgB,CACtB,eAAuB,EACvB,cAAsB,EACtB,QAAwE,EACxE,aAAqB;QAErB,IAAI,aAAa,KAAK,CAAC,EAAE,CAAC;YACxB,OAAO,cAAc,CAAC;QACxB,CAAC;QAED,6DAA6D;QAC7D,kFAAkF;QAClF,qFAAqF;QAErF,kCAAkC;QAClC,MAAM,cAAc,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;QAEvE,iEAAiE;QACjE,IAAI,WAAW,GAAG,CAAC,CAAC;QACpB,IAAI,aAAa,GAAG,CAAC,CAAC;QAEtB,KAAK,MAAM,MAAM,IAAI,cAAc,EAAE,CAAC;YACpC,IAAI,MAAM,CAAC,KAAK,IAAI,aAAa,EAAE,CAAC;gBAClC,sDAAsD;gBACtD,MAAM;YACR,CAAC;YAED,iCAAiC;YACjC,MAAM,GAAG,GAAG,MAAM,CAAC,KAAK,GAAG,WAAW,CAAC;YACvC,aAAa,IAAI,GAAG,CAAC;YACrB,WAAW,GAAG,MAAM,CAAC,KAAK,CAAC;YAE3B,oCAAoC;YACpC,MAAM,kBAAkB,GAAG,MAAM,CAAC,GAAG,GAAG,MAAM,CAAC,KAAK,CAAC;YAErD,uDAAuD;YACvD,4CAA4C;YAC5C,MAAM,QAAQ,GAAG,aAAa,CAAC;YAC/B,MAAM,MAAM,GAAG,cAAc,CAAC,OAAO,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;YACtD,MAAM,SAAS,GACb,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAC,kBAAkB,CAAC;YAE7D,IAAI,MAAM,CAAC,GAAG,IAAI,aAAa,EAAE,CAAC;gBAChC,qDAAqD;gBACrD,WAAW,GAAG,MAAM,CAAC,GAAG,CAAC;gBACzB,aAAa,GAAG,QAAQ,GAAG,SAAS,CAAC;YACvC,CAAC;iBAAM,CAAC;gBACN,sEAAsE;gBACtE,WAAW,GAAG,MAAM,CAAC,GAAG,CAAC;gBACzB,aAAa,GAAG,QAAQ,GAAG,SAAS,CAAC;YACvC,CAAC;QACH,CAAC;QAED,gDAAgD;QAChD,MAAM,YAAY,GAAG,aAAa,GAAG,WAAW,CAAC;QACjD,aAAa,IAAI,YAAY,CAAC;QAE9B,OAAO,cAAc,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;IAC7C,CAAC;CACF"}
@@ -0,0 +1,38 @@
1
+ /**
2
+ * Stream Factory
3
+ * Creates configured AnonymizerStream instances
4
+ */
5
+ import { AnonymizerStream } from "./anonymizer-stream.js";
6
+ import type { StreamConfig } from "./types.js";
7
+ /**
8
+ * Creates a streaming anonymizer that processes text chunk-by-chunk.
9
+ *
10
+ * @example
11
+ * ```typescript
12
+ * // Basic streaming with NER
13
+ * const stream = await createAnonymizerStream({
14
+ * anonymizer: { ner: { mode: 'quantized' } },
15
+ * });
16
+ *
17
+ * createReadStream('input.txt')
18
+ * .pipe(stream)
19
+ * .pipe(createWriteStream('anonymized.txt'));
20
+ * ```
21
+ *
22
+ * @example
23
+ * ```typescript
24
+ * // Low-latency mode for LLM token streams
25
+ * const stream = await createAnonymizerStream({
26
+ * buffer: { lowLatency: true },
27
+ * sessionId: 'chat-123',
28
+ * piiStorageProvider: storage,
29
+ * keyProvider: keyProvider,
30
+ * });
31
+ *
32
+ * llmTokenStream.pipe(stream).on('data', (chunk) => {
33
+ * ws.send(chunk.toString());
34
+ * });
35
+ * ```
36
+ */
37
+ export declare function createAnonymizerStream(config?: StreamConfig): Promise<AnonymizerStream>;
38
+ //# sourceMappingURL=stream-factory.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"stream-factory.d.ts","sourceRoot":"","sources":["../../src/streaming/stream-factory.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAQH,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC1D,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAE/C;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AACH,wBAAsB,sBAAsB,CAC1C,MAAM,GAAE,YAAiB,GACxB,OAAO,CAAC,gBAAgB,CAAC,CAkC3B"}
@@ -0,0 +1,69 @@
1
+ /**
2
+ * Stream Factory
3
+ * Creates configured AnonymizerStream instances
4
+ */
5
+ import { createAnonymizer, } from "../core/anonymizer.js";
6
+ import { decryptPIIMap } from "../crypto/index.js";
7
+ import { AnonymizerStream } from "./anonymizer-stream.js";
8
+ /**
9
+ * Creates a streaming anonymizer that processes text chunk-by-chunk.
10
+ *
11
+ * @example
12
+ * ```typescript
13
+ * // Basic streaming with NER
14
+ * const stream = await createAnonymizerStream({
15
+ * anonymizer: { ner: { mode: 'quantized' } },
16
+ * });
17
+ *
18
+ * createReadStream('input.txt')
19
+ * .pipe(stream)
20
+ * .pipe(createWriteStream('anonymized.txt'));
21
+ * ```
22
+ *
23
+ * @example
24
+ * ```typescript
25
+ * // Low-latency mode for LLM token streams
26
+ * const stream = await createAnonymizerStream({
27
+ * buffer: { lowLatency: true },
28
+ * sessionId: 'chat-123',
29
+ * piiStorageProvider: storage,
30
+ * keyProvider: keyProvider,
31
+ * });
32
+ *
33
+ * llmTokenStream.pipe(stream).on('data', (chunk) => {
34
+ * ws.send(chunk.toString());
35
+ * });
36
+ * ```
37
+ */
38
+ export async function createAnonymizerStream(config = {}) {
39
+ // In low-latency mode, force NER disabled for minimal latency
40
+ let anonymizerConfig = config.anonymizer;
41
+ if (config.buffer?.lowLatency === true) {
42
+ anonymizerConfig = {
43
+ ...anonymizerConfig,
44
+ ner: { mode: "disabled" },
45
+ };
46
+ }
47
+ // Ensure the anonymizer uses the same key provider as the stream
48
+ // so that PII maps encrypted by the anonymizer can be decrypted by the stream.
49
+ if (config.keyProvider) {
50
+ anonymizerConfig = {
51
+ ...anonymizerConfig,
52
+ keyProvider: config.keyProvider,
53
+ };
54
+ }
55
+ // Create and initialize the anonymizer
56
+ const anonymizer = createAnonymizer(anonymizerConfig);
57
+ await anonymizer.initialize();
58
+ // Load existing PII map from session storage if available
59
+ let initialPiiMap;
60
+ if (config.sessionId !== undefined && config.sessionId !== "" && config.piiStorageProvider !== undefined && config.keyProvider !== undefined) {
61
+ const existing = await config.piiStorageProvider.load(config.sessionId);
62
+ if (existing !== null) {
63
+ const key = await config.keyProvider.getKey();
64
+ initialPiiMap = await decryptPIIMap(existing.piiMap, key);
65
+ }
66
+ }
67
+ return new AnonymizerStream(anonymizer, config, initialPiiMap);
68
+ }
69
+ //# sourceMappingURL=stream-factory.js.map