agentic-qe 1.7.0 → 1.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/sherlock-review/SKILL.md +786 -0
- package/CHANGELOG.md +625 -0
- package/README.md +42 -55
- package/dist/agents/BaseAgent.d.ts +10 -10
- package/dist/agents/BaseAgent.d.ts.map +1 -1
- package/dist/agents/BaseAgent.js +96 -78
- package/dist/agents/BaseAgent.js.map +1 -1
- package/dist/agents/CoverageAnalyzerAgent.js +2 -2
- package/dist/agents/CoverageAnalyzerAgent.js.map +1 -1
- package/dist/agents/LearningAgent.d.ts +2 -2
- package/dist/agents/LearningAgent.d.ts.map +1 -1
- package/dist/agents/LearningAgent.js +4 -4
- package/dist/agents/LearningAgent.js.map +1 -1
- package/dist/agents/TestExecutorAgent.d.ts +41 -2
- package/dist/agents/TestExecutorAgent.d.ts.map +1 -1
- package/dist/agents/TestExecutorAgent.js +314 -64
- package/dist/agents/TestExecutorAgent.js.map +1 -1
- package/dist/agents/examples/batchAnalyze.d.ts +252 -0
- package/dist/agents/examples/batchAnalyze.d.ts.map +1 -0
- package/dist/agents/examples/batchAnalyze.js +259 -0
- package/dist/agents/examples/batchAnalyze.js.map +1 -0
- package/dist/agents/examples/batchGenerate.d.ts +153 -0
- package/dist/agents/examples/batchGenerate.d.ts.map +1 -0
- package/dist/agents/examples/batchGenerate.js +166 -0
- package/dist/agents/examples/batchGenerate.js.map +1 -0
- package/dist/agents/generateWithPII.d.ts +128 -0
- package/dist/agents/generateWithPII.d.ts.map +1 -0
- package/dist/agents/generateWithPII.js +175 -0
- package/dist/agents/generateWithPII.js.map +1 -0
- package/dist/agents/lifecycle/AgentLifecycleManager.d.ts +1 -6
- package/dist/agents/lifecycle/AgentLifecycleManager.d.ts.map +1 -1
- package/dist/agents/lifecycle/AgentLifecycleManager.js +0 -7
- package/dist/agents/lifecycle/AgentLifecycleManager.js.map +1 -1
- package/dist/cli/commands/init.d.ts +6 -3
- package/dist/cli/commands/init.d.ts.map +1 -1
- package/dist/cli/commands/init.js +51 -46
- package/dist/cli/commands/init.js.map +1 -1
- package/dist/cli/commands/learn/index.d.ts +4 -0
- package/dist/cli/commands/learn/index.d.ts.map +1 -1
- package/dist/cli/commands/learn/index.js +57 -0
- package/dist/cli/commands/learn/index.js.map +1 -1
- package/dist/cli/index.js +14 -0
- package/dist/cli/index.js.map +1 -1
- package/dist/core/memory/AdapterConfig.d.ts +108 -0
- package/dist/core/memory/AdapterConfig.d.ts.map +1 -0
- package/dist/core/memory/AdapterConfig.js +189 -0
- package/dist/core/memory/AdapterConfig.js.map +1 -0
- package/dist/core/memory/AdapterFactory.d.ts +72 -0
- package/dist/core/memory/AdapterFactory.d.ts.map +1 -0
- package/dist/core/memory/AdapterFactory.js +152 -0
- package/dist/core/memory/AdapterFactory.js.map +1 -0
- package/dist/core/memory/AgentDBManager.d.ts +28 -5
- package/dist/core/memory/AgentDBManager.d.ts.map +1 -1
- package/dist/core/memory/AgentDBManager.js +99 -73
- package/dist/core/memory/AgentDBManager.js.map +1 -1
- package/dist/core/memory/PatternCache.d.ts +105 -0
- package/dist/core/memory/PatternCache.d.ts.map +1 -0
- package/dist/core/memory/PatternCache.js +183 -0
- package/dist/core/memory/PatternCache.js.map +1 -0
- package/dist/core/memory/RealAgentDBAdapter.d.ts +14 -0
- package/dist/core/memory/RealAgentDBAdapter.d.ts.map +1 -1
- package/dist/core/memory/RealAgentDBAdapter.js +153 -16
- package/dist/core/memory/RealAgentDBAdapter.js.map +1 -1
- package/dist/core/memory/ReasoningBankAdapter.d.ts +4 -0
- package/dist/core/memory/ReasoningBankAdapter.d.ts.map +1 -1
- package/dist/core/memory/ReasoningBankAdapter.js +20 -0
- package/dist/core/memory/ReasoningBankAdapter.js.map +1 -1
- package/dist/core/memory/SwarmMemoryManager.d.ts +8 -0
- package/dist/core/memory/SwarmMemoryManager.d.ts.map +1 -1
- package/dist/core/memory/SwarmMemoryManager.js +33 -0
- package/dist/core/memory/SwarmMemoryManager.js.map +1 -1
- package/dist/core/memory/index.d.ts +6 -0
- package/dist/core/memory/index.d.ts.map +1 -1
- package/dist/core/memory/index.js +12 -1
- package/dist/core/memory/index.js.map +1 -1
- package/dist/core/neural/NeuralTrainer.d.ts +2 -6
- package/dist/core/neural/NeuralTrainer.d.ts.map +1 -1
- package/dist/core/neural/NeuralTrainer.js +7 -25
- package/dist/core/neural/NeuralTrainer.js.map +1 -1
- package/dist/learning/ImprovementLoop.js +2 -2
- package/dist/learning/ImprovementLoop.js.map +1 -1
- package/dist/learning/LearningEngine.d.ts +11 -7
- package/dist/learning/LearningEngine.d.ts.map +1 -1
- package/dist/learning/LearningEngine.js +156 -72
- package/dist/learning/LearningEngine.js.map +1 -1
- package/dist/mcp/handlers/filtered/coverage-analyzer-filtered.d.ts +83 -0
- package/dist/mcp/handlers/filtered/coverage-analyzer-filtered.d.ts.map +1 -0
- package/dist/mcp/handlers/filtered/coverage-analyzer-filtered.js +130 -0
- package/dist/mcp/handlers/filtered/coverage-analyzer-filtered.js.map +1 -0
- package/dist/mcp/handlers/filtered/flaky-detector-filtered.d.ts +58 -0
- package/dist/mcp/handlers/filtered/flaky-detector-filtered.d.ts.map +1 -0
- package/dist/mcp/handlers/filtered/flaky-detector-filtered.js +84 -0
- package/dist/mcp/handlers/filtered/flaky-detector-filtered.js.map +1 -0
- package/dist/mcp/handlers/filtered/index.d.ts +47 -0
- package/dist/mcp/handlers/filtered/index.d.ts.map +1 -0
- package/dist/mcp/handlers/filtered/index.js +63 -0
- package/dist/mcp/handlers/filtered/index.js.map +1 -0
- package/dist/mcp/handlers/filtered/performance-tester-filtered.d.ts +57 -0
- package/dist/mcp/handlers/filtered/performance-tester-filtered.d.ts.map +1 -0
- package/dist/mcp/handlers/filtered/performance-tester-filtered.js +83 -0
- package/dist/mcp/handlers/filtered/performance-tester-filtered.js.map +1 -0
- package/dist/mcp/handlers/filtered/quality-assessor-filtered.d.ts +57 -0
- package/dist/mcp/handlers/filtered/quality-assessor-filtered.d.ts.map +1 -0
- package/dist/mcp/handlers/filtered/quality-assessor-filtered.js +93 -0
- package/dist/mcp/handlers/filtered/quality-assessor-filtered.js.map +1 -0
- package/dist/mcp/handlers/filtered/security-scanner-filtered.d.ts +54 -0
- package/dist/mcp/handlers/filtered/security-scanner-filtered.d.ts.map +1 -0
- package/dist/mcp/handlers/filtered/security-scanner-filtered.js +73 -0
- package/dist/mcp/handlers/filtered/security-scanner-filtered.js.map +1 -0
- package/dist/mcp/handlers/filtered/test-executor-filtered.d.ts +61 -0
- package/dist/mcp/handlers/filtered/test-executor-filtered.d.ts.map +1 -0
- package/dist/mcp/handlers/filtered/test-executor-filtered.js +117 -0
- package/dist/mcp/handlers/filtered/test-executor-filtered.js.map +1 -0
- package/dist/mcp/handlers/phase2/Phase2Tools.js +2 -2
- package/dist/mcp/handlers/phase2/Phase2Tools.js.map +1 -1
- package/dist/scripts/backup-helper.d.ts +64 -0
- package/dist/scripts/backup-helper.d.ts.map +1 -0
- package/dist/scripts/backup-helper.js +251 -0
- package/dist/scripts/backup-helper.js.map +1 -0
- package/dist/scripts/migrate-with-backup.d.ts +15 -0
- package/dist/scripts/migrate-with-backup.d.ts.map +1 -0
- package/dist/scripts/migrate-with-backup.js +194 -0
- package/dist/scripts/migrate-with-backup.js.map +1 -0
- package/dist/security/pii-tokenization.d.ts +216 -0
- package/dist/security/pii-tokenization.d.ts.map +1 -0
- package/dist/security/pii-tokenization.js +325 -0
- package/dist/security/pii-tokenization.js.map +1 -0
- package/dist/utils/EmbeddingGenerator.d.ts +35 -0
- package/dist/utils/EmbeddingGenerator.d.ts.map +1 -0
- package/dist/utils/EmbeddingGenerator.js +72 -0
- package/dist/utils/EmbeddingGenerator.js.map +1 -0
- package/dist/utils/batch-operations.d.ts +215 -0
- package/dist/utils/batch-operations.d.ts.map +1 -0
- package/dist/utils/batch-operations.js +266 -0
- package/dist/utils/batch-operations.js.map +1 -0
- package/dist/utils/filtering.d.ts +180 -0
- package/dist/utils/filtering.d.ts.map +1 -0
- package/dist/utils/filtering.js +288 -0
- package/dist/utils/filtering.js.map +1 -0
- package/dist/utils/prompt-cache-examples.d.ts +111 -0
- package/dist/utils/prompt-cache-examples.d.ts.map +1 -0
- package/dist/utils/prompt-cache-examples.js +416 -0
- package/dist/utils/prompt-cache-examples.js.map +1 -0
- package/dist/utils/prompt-cache.d.ts +305 -0
- package/dist/utils/prompt-cache.d.ts.map +1 -0
- package/dist/utils/prompt-cache.js +448 -0
- package/dist/utils/prompt-cache.js.map +1 -0
- package/package.json +7 -16
- package/dist/mcp/tools/deprecated.d.ts +0 -1390
- package/dist/mcp/tools/deprecated.d.ts.map +0 -1
- package/dist/mcp/tools/deprecated.js +0 -859
- package/dist/mcp/tools/deprecated.js.map +0 -1
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PII Tokenization Layer
|
|
3
|
+
*
|
|
4
|
+
* Provides bidirectional tokenization/detokenization of Personally Identifiable Information (PII)
|
|
5
|
+
* for GDPR and CCPA compliance. Supports email, phone, SSN, credit card, and name detection.
|
|
6
|
+
*
|
|
7
|
+
* @module security/pii-tokenization
|
|
8
|
+
* @compliance GDPR Article 25 (Data Protection by Design), CCPA Section 1798.100
|
|
9
|
+
* @see docs/planning/mcp-improvement-plan-revised.md#CO-2
|
|
10
|
+
*/
|
|
11
|
+
/**
|
|
12
|
+
* Bidirectional mapping for tokenized PII values
|
|
13
|
+
*
|
|
14
|
+
* @compliance GDPR Article 32 - Stores original values temporarily for detokenization,
|
|
15
|
+
* must be cleared after use to prevent data retention issues
|
|
16
|
+
*/
|
|
17
|
+
export interface TokenizationMap {
|
|
18
|
+
/** Email addresses (RFC 5322 compliant) */
|
|
19
|
+
email: Map<string, string>;
|
|
20
|
+
/** Phone numbers (US E.164 format) */
|
|
21
|
+
phone: Map<string, string>;
|
|
22
|
+
/** Social Security Numbers (US format: XXX-XX-XXXX) */
|
|
23
|
+
ssn: Map<string, string>;
|
|
24
|
+
/** Credit card numbers (Luhn algorithm validation recommended) */
|
|
25
|
+
creditCard: Map<string, string>;
|
|
26
|
+
/** Personal names (First Last pattern, basic heuristic) */
|
|
27
|
+
name: Map<string, string>;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Result of tokenization operation with statistics
|
|
31
|
+
*/
|
|
32
|
+
export interface TokenizationResult {
|
|
33
|
+
/** Content with PII replaced by tokens */
|
|
34
|
+
tokenized: string;
|
|
35
|
+
/** Reverse mapping for detokenization (MUST be cleared after use) */
|
|
36
|
+
reverseMap: TokenizationMap;
|
|
37
|
+
/** Total count of PII instances found */
|
|
38
|
+
piiCount: number;
|
|
39
|
+
/** Breakdown by PII type for audit trail */
|
|
40
|
+
piiBreakdown: {
|
|
41
|
+
emails: number;
|
|
42
|
+
phones: number;
|
|
43
|
+
ssns: number;
|
|
44
|
+
creditCards: number;
|
|
45
|
+
names: number;
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* PIITokenizer - Secure PII detection and tokenization
|
|
50
|
+
*
|
|
51
|
+
* **IMPORTANT COMPLIANCE NOTES:**
|
|
52
|
+
*
|
|
53
|
+
* 1. **GDPR Article 25 (Data Protection by Design)**:
|
|
54
|
+
* - Tokenize PII BEFORE sending to LLM or storing in logs
|
|
55
|
+
* - Clear reverse map after detokenization to minimize data retention
|
|
56
|
+
*
|
|
57
|
+
* 2. **CCPA Section 1798.100 (Consumer Rights)**:
|
|
58
|
+
* - No PII sent to third-party systems (Anthropic API)
|
|
59
|
+
* - Tokenized version stored in databases/logs
|
|
60
|
+
* - Original PII only in final output files (user-controlled)
|
|
61
|
+
*
|
|
62
|
+
* 3. **PCI-DSS Requirement 3.4**:
|
|
63
|
+
* - Credit card numbers masked/tokenized in all non-production systems
|
|
64
|
+
* - No clear-text credit cards in logs or analytics
|
|
65
|
+
*
|
|
66
|
+
* 4. **HIPAA Privacy Rule** (if applicable):
|
|
67
|
+
* - SSN and name combinations constitute PHI
|
|
68
|
+
* - Must be de-identified before processing
|
|
69
|
+
*
|
|
70
|
+
* @example
|
|
71
|
+
* ```typescript
|
|
72
|
+
* const tokenizer = new PIITokenizer();
|
|
73
|
+
*
|
|
74
|
+
* // Tokenize test data
|
|
75
|
+
* const testCode = 'const email = "john.doe@example.com"; const ssn = "123-45-6789";';
|
|
76
|
+
* const { tokenized, reverseMap, piiCount } = tokenizer.tokenize(testCode);
|
|
77
|
+
*
|
|
78
|
+
* console.log(tokenized);
|
|
79
|
+
* // Output: 'const email = "[EMAIL_0]"; const ssn = "[SSN_0]";'
|
|
80
|
+
*
|
|
81
|
+
* // Store tokenized version in database (GDPR compliant)
|
|
82
|
+
* await db.storeTest({ code: tokenized });
|
|
83
|
+
*
|
|
84
|
+
* // Detokenize for file output (user-controlled)
|
|
85
|
+
* const finalCode = tokenizer.detokenize(tokenized, reverseMap);
|
|
86
|
+
* await fs.writeFile('test.ts', finalCode);
|
|
87
|
+
*
|
|
88
|
+
* // IMPORTANT: Clear reverse map after use
|
|
89
|
+
* tokenizer.clear();
|
|
90
|
+
* ```
|
|
91
|
+
*/
|
|
92
|
+
export declare class PIITokenizer {
|
|
93
|
+
/**
|
|
94
|
+
* Reverse mapping for detokenization
|
|
95
|
+
*
|
|
96
|
+
* @private
|
|
97
|
+
* @compliance GDPR Article 32 - Must be cleared after use to prevent data retention
|
|
98
|
+
*/
|
|
99
|
+
private reverseMap;
|
|
100
|
+
/**
|
|
101
|
+
* Regular expression patterns for PII detection
|
|
102
|
+
*
|
|
103
|
+
* @private
|
|
104
|
+
*/
|
|
105
|
+
private readonly patterns;
|
|
106
|
+
/**
|
|
107
|
+
* Tokenize PII in test code, data files, or generated content
|
|
108
|
+
*
|
|
109
|
+
* **COMPLIANCE WORKFLOW:**
|
|
110
|
+
* 1. Detect PII using regex patterns
|
|
111
|
+
* 2. Replace with tokens ([EMAIL_0], [PHONE_1], etc.)
|
|
112
|
+
* 3. Store reverse map for detokenization
|
|
113
|
+
* 4. Return tokenized content safe for LLM processing
|
|
114
|
+
*
|
|
115
|
+
* @param content - Raw content that may contain PII
|
|
116
|
+
* @returns Tokenization result with statistics and reverse map
|
|
117
|
+
*
|
|
118
|
+
* @example
|
|
119
|
+
* ```typescript
|
|
120
|
+
* const tokenizer = new PIITokenizer();
|
|
121
|
+
* const result = tokenizer.tokenize(`
|
|
122
|
+
* const user = {
|
|
123
|
+
* name: "John Doe",
|
|
124
|
+
* email: "john.doe@example.com",
|
|
125
|
+
* phone: "+1-555-123-4567",
|
|
126
|
+
* ssn: "123-45-6789"
|
|
127
|
+
* };
|
|
128
|
+
* `);
|
|
129
|
+
*
|
|
130
|
+
* console.log(result.piiCount); // 4
|
|
131
|
+
* console.log(result.piiBreakdown);
|
|
132
|
+
* // { emails: 1, phones: 1, ssns: 1, creditCards: 0, names: 1 }
|
|
133
|
+
* ```
|
|
134
|
+
*/
|
|
135
|
+
tokenize(content: string): TokenizationResult;
|
|
136
|
+
/**
|
|
137
|
+
* Reverse tokenization to restore original PII
|
|
138
|
+
*
|
|
139
|
+
* **COMPLIANCE WARNING:**
|
|
140
|
+
* - Only use for final file output (user-controlled)
|
|
141
|
+
* - NEVER store detokenized content in logs or databases
|
|
142
|
+
* - Call clear() immediately after use to minimize data retention
|
|
143
|
+
*
|
|
144
|
+
* @param tokenized - Content with PII tokens
|
|
145
|
+
* @param reverseMap - Tokenization map from tokenize() call
|
|
146
|
+
* @returns Original content with PII restored
|
|
147
|
+
*
|
|
148
|
+
* @example
|
|
149
|
+
* ```typescript
|
|
150
|
+
* const tokenizer = new PIITokenizer();
|
|
151
|
+
* const { tokenized, reverseMap } = tokenizer.tokenize('Email: john@example.com');
|
|
152
|
+
*
|
|
153
|
+
* // Store tokenized version (GDPR compliant)
|
|
154
|
+
* await db.storeTest({ code: tokenized });
|
|
155
|
+
*
|
|
156
|
+
* // Restore for file output
|
|
157
|
+
* const finalCode = tokenizer.detokenize(tokenized, reverseMap);
|
|
158
|
+
* await fs.writeFile('test.ts', finalCode);
|
|
159
|
+
*
|
|
160
|
+
* // IMPORTANT: Clear reverse map
|
|
161
|
+
* tokenizer.clear();
|
|
162
|
+
* ```
|
|
163
|
+
*/
|
|
164
|
+
detokenize(tokenized: string, reverseMap: TokenizationMap): string;
|
|
165
|
+
/**
|
|
166
|
+
* Get PII statistics for audit trail
|
|
167
|
+
*
|
|
168
|
+
* **COMPLIANCE USE:**
|
|
169
|
+
* - Generate audit logs showing PII detection
|
|
170
|
+
* - Monitor for unexpected PII in generated content
|
|
171
|
+
* - Track compliance metrics over time
|
|
172
|
+
*
|
|
173
|
+
* @returns Breakdown of detected PII by type
|
|
174
|
+
*
|
|
175
|
+
* @example
|
|
176
|
+
* ```typescript
|
|
177
|
+
* const tokenizer = new PIITokenizer();
|
|
178
|
+
* tokenizer.tokenize('Email: john@example.com, Phone: 555-123-4567');
|
|
179
|
+
*
|
|
180
|
+
* const stats = tokenizer.getStats();
|
|
181
|
+
* console.log(stats);
|
|
182
|
+
* // { emails: 1, phones: 1, ssns: 0, creditCards: 0, names: 0, total: 2 }
|
|
183
|
+
*
|
|
184
|
+
* // Log for audit trail
|
|
185
|
+
* logger.info('PII detected in generated content', stats);
|
|
186
|
+
* ```
|
|
187
|
+
*/
|
|
188
|
+
getStats(): Record<string, number>;
|
|
189
|
+
/**
|
|
190
|
+
* Clear reverse map to minimize data retention
|
|
191
|
+
*
|
|
192
|
+
* **COMPLIANCE REQUIREMENT:**
|
|
193
|
+
* - GDPR Article 5(1)(e) - Storage limitation principle
|
|
194
|
+
* - CCPA Section 1798.105 - Right to deletion
|
|
195
|
+
*
|
|
196
|
+
* MUST be called after detokenization to prevent storing PII longer than necessary.
|
|
197
|
+
*
|
|
198
|
+
* @example
|
|
199
|
+
* ```typescript
|
|
200
|
+
* const tokenizer = new PIITokenizer();
|
|
201
|
+
* const { tokenized, reverseMap } = tokenizer.tokenize(content);
|
|
202
|
+
*
|
|
203
|
+
* // Use tokenized content
|
|
204
|
+
* await processWithLLM(tokenized);
|
|
205
|
+
*
|
|
206
|
+
* // Detokenize for output
|
|
207
|
+
* const finalCode = tokenizer.detokenize(tokenized, reverseMap);
|
|
208
|
+
* await fs.writeFile('output.ts', finalCode);
|
|
209
|
+
*
|
|
210
|
+
* // CRITICAL: Clear reverse map
|
|
211
|
+
* tokenizer.clear(); // GDPR compliance
|
|
212
|
+
* ```
|
|
213
|
+
*/
|
|
214
|
+
clear(): void;
|
|
215
|
+
}
|
|
216
|
+
//# sourceMappingURL=pii-tokenization.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pii-tokenization.d.ts","sourceRoot":"","sources":["../../src/security/pii-tokenization.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH;;;;;GAKG;AACH,MAAM,WAAW,eAAe;IAC9B,2CAA2C;IAC3C,KAAK,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAE3B,sCAAsC;IACtC,KAAK,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAE3B,uDAAuD;IACvD,GAAG,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAEzB,kEAAkE;IAClE,UAAU,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAEhC,2DAA2D;IAC3D,IAAI,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,0CAA0C;IAC1C,SAAS,EAAE,MAAM,CAAC;IAElB,qEAAqE;IACrE,UAAU,EAAE,eAAe,CAAC;IAE5B,yCAAyC;IACzC,QAAQ,EAAE,MAAM,CAAC;IAEjB,4CAA4C;IAC5C,YAAY,EAAE;QACZ,MAAM,EAAE,MAAM,CAAC;QACf,MAAM,EAAE,MAAM,CAAC;QACf,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,EAAE,MAAM,CAAC;QACpB,KAAK,EAAE,MAAM,CAAC;KACf,CAAC;CACH;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2CG;AACH,qBAAa,YAAY;IACvB;;;;;OAKG;IACH,OAAO,CAAC,UAAU,CAMhB;IAEF;;;;OAIG;IACH,OAAO,CAAC,QAAQ,CAAC,QAAQ,CA8CvB;IAEF;;;;;;;;;;;;;;;;;;;;;;;;;;;;OA4BG;IACH,QAAQ,CAAC,OAAO,EAAE,MAAM,GAAG,kBAAkB;IAyE7C;;;;;;;;;;;;;;;;;;;;;;;;;;;OA2BG;IACH,UAAU,CAAC,SAAS,EAAE,MAAM,EAAE,UAAU,EAAE,eAAe,GAAG,MAAM;IAclE;;;;;;;;;;;;;;;;;;;;;;OAsBG;IACH,QAAQ,IAAI,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC;IAgBlC;;;;;;;;;;;;;;;;;;;;;;;;OAwBG;IACH,KAAK,IAAI,IAAI;CAOd"}
|
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* PII Tokenization Layer
|
|
4
|
+
*
|
|
5
|
+
* Provides bidirectional tokenization/detokenization of Personally Identifiable Information (PII)
|
|
6
|
+
* for GDPR and CCPA compliance. Supports email, phone, SSN, credit card, and name detection.
|
|
7
|
+
*
|
|
8
|
+
* @module security/pii-tokenization
|
|
9
|
+
* @compliance GDPR Article 25 (Data Protection by Design), CCPA Section 1798.100
|
|
10
|
+
* @see docs/planning/mcp-improvement-plan-revised.md#CO-2
|
|
11
|
+
*/
|
|
12
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
13
|
+
exports.PIITokenizer = void 0;
|
|
14
|
+
/**
|
|
15
|
+
* PIITokenizer - Secure PII detection and tokenization
|
|
16
|
+
*
|
|
17
|
+
* **IMPORTANT COMPLIANCE NOTES:**
|
|
18
|
+
*
|
|
19
|
+
* 1. **GDPR Article 25 (Data Protection by Design)**:
|
|
20
|
+
* - Tokenize PII BEFORE sending to LLM or storing in logs
|
|
21
|
+
* - Clear reverse map after detokenization to minimize data retention
|
|
22
|
+
*
|
|
23
|
+
* 2. **CCPA Section 1798.100 (Consumer Rights)**:
|
|
24
|
+
* - No PII sent to third-party systems (Anthropic API)
|
|
25
|
+
* - Tokenized version stored in databases/logs
|
|
26
|
+
* - Original PII only in final output files (user-controlled)
|
|
27
|
+
*
|
|
28
|
+
* 3. **PCI-DSS Requirement 3.4**:
|
|
29
|
+
* - Credit card numbers masked/tokenized in all non-production systems
|
|
30
|
+
* - No clear-text credit cards in logs or analytics
|
|
31
|
+
*
|
|
32
|
+
* 4. **HIPAA Privacy Rule** (if applicable):
|
|
33
|
+
* - SSN and name combinations constitute PHI
|
|
34
|
+
* - Must be de-identified before processing
|
|
35
|
+
*
|
|
36
|
+
* @example
|
|
37
|
+
* ```typescript
|
|
38
|
+
* const tokenizer = new PIITokenizer();
|
|
39
|
+
*
|
|
40
|
+
* // Tokenize test data
|
|
41
|
+
* const testCode = 'const email = "john.doe@example.com"; const ssn = "123-45-6789";';
|
|
42
|
+
* const { tokenized, reverseMap, piiCount } = tokenizer.tokenize(testCode);
|
|
43
|
+
*
|
|
44
|
+
* console.log(tokenized);
|
|
45
|
+
* // Output: 'const email = "[EMAIL_0]"; const ssn = "[SSN_0]";'
|
|
46
|
+
*
|
|
47
|
+
* // Store tokenized version in database (GDPR compliant)
|
|
48
|
+
* await db.storeTest({ code: tokenized });
|
|
49
|
+
*
|
|
50
|
+
* // Detokenize for file output (user-controlled)
|
|
51
|
+
* const finalCode = tokenizer.detokenize(tokenized, reverseMap);
|
|
52
|
+
* await fs.writeFile('test.ts', finalCode);
|
|
53
|
+
*
|
|
54
|
+
* // IMPORTANT: Clear reverse map after use
|
|
55
|
+
* tokenizer.clear();
|
|
56
|
+
* ```
|
|
57
|
+
*/
|
|
58
|
+
class PIITokenizer {
|
|
59
|
+
constructor() {
|
|
60
|
+
/**
|
|
61
|
+
* Reverse mapping for detokenization
|
|
62
|
+
*
|
|
63
|
+
* @private
|
|
64
|
+
* @compliance GDPR Article 32 - Must be cleared after use to prevent data retention
|
|
65
|
+
*/
|
|
66
|
+
this.reverseMap = {
|
|
67
|
+
email: new Map(),
|
|
68
|
+
phone: new Map(),
|
|
69
|
+
ssn: new Map(),
|
|
70
|
+
creditCard: new Map(),
|
|
71
|
+
name: new Map(),
|
|
72
|
+
};
|
|
73
|
+
/**
|
|
74
|
+
* Regular expression patterns for PII detection
|
|
75
|
+
*
|
|
76
|
+
* @private
|
|
77
|
+
*/
|
|
78
|
+
this.patterns = {
|
|
79
|
+
/**
|
|
80
|
+
* Email pattern (RFC 5322 simplified)
|
|
81
|
+
* Matches: john.doe@example.com, user+tag@domain.co.uk
|
|
82
|
+
*
|
|
83
|
+
* @compliance GDPR Article 4(1) - Email is personal data
|
|
84
|
+
*/
|
|
85
|
+
email: /\b[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,}\b/gi,
|
|
86
|
+
/**
|
|
87
|
+
* US Phone number pattern (E.164 and common formats)
|
|
88
|
+
* Matches: +1-555-123-4567, (555) 123-4567, 555.123.4567, 5551234567
|
|
89
|
+
*
|
|
90
|
+
* @compliance CCPA - Phone numbers are personal information
|
|
91
|
+
*
|
|
92
|
+
* Fixed: Removed \b (word boundary) which fails with parentheses
|
|
93
|
+
* Uses negative lookahead (?!\d) to prevent matching longer sequences
|
|
94
|
+
*/
|
|
95
|
+
phone: /(?:\+1[-.]?)?[(]?([0-9]{3})[)]?[-.\s]?([0-9]{3})[-.\s]?([0-9]{4})(?!\d)/g,
|
|
96
|
+
/**
|
|
97
|
+
* US Social Security Number (XXX-XX-XXXX)
|
|
98
|
+
* Matches: 123-45-6789
|
|
99
|
+
*
|
|
100
|
+
* @compliance HIPAA Privacy Rule - SSN is Protected Health Information (PHI)
|
|
101
|
+
*/
|
|
102
|
+
ssn: /\b\d{3}-\d{2}-\d{4}\b/g,
|
|
103
|
+
/**
|
|
104
|
+
* Credit Card Number (Visa, Mastercard, Amex, Discover)
|
|
105
|
+
* Matches: 1234-5678-9012-3456, 1234 5678 9012 3456, 1234567890123456
|
|
106
|
+
*
|
|
107
|
+
* @compliance PCI-DSS Requirement 3.4 - Must be masked/tokenized
|
|
108
|
+
*/
|
|
109
|
+
creditCard: /\b(?:\d{4}[-\s]?){3}\d{4}\b/g,
|
|
110
|
+
/**
|
|
111
|
+
* Personal name (basic heuristic: First Last with capital letters)
|
|
112
|
+
* Matches: John Doe, Mary Jane
|
|
113
|
+
*
|
|
114
|
+
* WARNING: This is a basic pattern and may produce false positives
|
|
115
|
+
* (e.g., class names, constants). Only tokenizes names longer than 2 characters.
|
|
116
|
+
*
|
|
117
|
+
* @compliance GDPR Article 4(1) - Names are personal data
|
|
118
|
+
*/
|
|
119
|
+
name: /\b([A-Z][a-z]+)\s+([A-Z][a-z]+)\b/g,
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Tokenize PII in test code, data files, or generated content
|
|
124
|
+
*
|
|
125
|
+
* **COMPLIANCE WORKFLOW:**
|
|
126
|
+
* 1. Detect PII using regex patterns
|
|
127
|
+
* 2. Replace with tokens ([EMAIL_0], [PHONE_1], etc.)
|
|
128
|
+
* 3. Store reverse map for detokenization
|
|
129
|
+
* 4. Return tokenized content safe for LLM processing
|
|
130
|
+
*
|
|
131
|
+
* @param content - Raw content that may contain PII
|
|
132
|
+
* @returns Tokenization result with statistics and reverse map
|
|
133
|
+
*
|
|
134
|
+
* @example
|
|
135
|
+
* ```typescript
|
|
136
|
+
* const tokenizer = new PIITokenizer();
|
|
137
|
+
* const result = tokenizer.tokenize(`
|
|
138
|
+
* const user = {
|
|
139
|
+
* name: "John Doe",
|
|
140
|
+
* email: "john.doe@example.com",
|
|
141
|
+
* phone: "+1-555-123-4567",
|
|
142
|
+
* ssn: "123-45-6789"
|
|
143
|
+
* };
|
|
144
|
+
* `);
|
|
145
|
+
*
|
|
146
|
+
* console.log(result.piiCount); // 4
|
|
147
|
+
* console.log(result.piiBreakdown);
|
|
148
|
+
* // { emails: 1, phones: 1, ssns: 1, creditCards: 0, names: 1 }
|
|
149
|
+
* ```
|
|
150
|
+
*/
|
|
151
|
+
tokenize(content) {
|
|
152
|
+
let tokenized = content;
|
|
153
|
+
const breakdown = {
|
|
154
|
+
emails: 0,
|
|
155
|
+
phones: 0,
|
|
156
|
+
ssns: 0,
|
|
157
|
+
creditCards: 0,
|
|
158
|
+
names: 0,
|
|
159
|
+
};
|
|
160
|
+
// IMPORTANT: Process patterns in order of specificity to avoid false matches
|
|
161
|
+
// 1. Credit cards first (most specific: 13-19 digits with separators)
|
|
162
|
+
// 2. SSNs second (specific: XXX-XX-XXXX format)
|
|
163
|
+
// 3. Phones third (less specific: 10 digits, can match parts of CC numbers)
|
|
164
|
+
// 4. Emails fourth
|
|
165
|
+
// 5. Names last (least specific, prone to false positives)
|
|
166
|
+
// Tokenize credit cards (BEFORE phones to prevent false matches)
|
|
167
|
+
tokenized = tokenized.replace(this.patterns.creditCard, (cc) => {
|
|
168
|
+
const token = `[CC_${this.reverseMap.creditCard.size}]`;
|
|
169
|
+
this.reverseMap.creditCard.set(token, cc);
|
|
170
|
+
breakdown.creditCards++;
|
|
171
|
+
return token;
|
|
172
|
+
});
|
|
173
|
+
// Tokenize SSNs
|
|
174
|
+
tokenized = tokenized.replace(this.patterns.ssn, (ssn) => {
|
|
175
|
+
const token = `[SSN_${this.reverseMap.ssn.size}]`;
|
|
176
|
+
this.reverseMap.ssn.set(token, ssn);
|
|
177
|
+
breakdown.ssns++;
|
|
178
|
+
return token;
|
|
179
|
+
});
|
|
180
|
+
// Tokenize phone numbers (US format) - AFTER credit cards
|
|
181
|
+
tokenized = tokenized.replace(this.patterns.phone, (phone) => {
|
|
182
|
+
const token = `[PHONE_${this.reverseMap.phone.size}]`;
|
|
183
|
+
this.reverseMap.phone.set(token, phone);
|
|
184
|
+
breakdown.phones++;
|
|
185
|
+
return token;
|
|
186
|
+
});
|
|
187
|
+
// Tokenize emails
|
|
188
|
+
tokenized = tokenized.replace(this.patterns.email, (email) => {
|
|
189
|
+
const token = `[EMAIL_${this.reverseMap.email.size}]`;
|
|
190
|
+
this.reverseMap.email.set(token, email);
|
|
191
|
+
breakdown.emails++;
|
|
192
|
+
return token;
|
|
193
|
+
});
|
|
194
|
+
// Tokenize names (with length filter to reduce false positives)
|
|
195
|
+
tokenized = tokenized.replace(this.patterns.name, (match, first, last) => {
|
|
196
|
+
// Only tokenize if both parts are longer than 2 characters
|
|
197
|
+
// This reduces false positives from code like "New User" or "Post Request"
|
|
198
|
+
if (first.length > 2 && last.length > 2) {
|
|
199
|
+
const token = `[NAME_${this.reverseMap.name.size}]`;
|
|
200
|
+
this.reverseMap.name.set(token, match);
|
|
201
|
+
breakdown.names++;
|
|
202
|
+
return token;
|
|
203
|
+
}
|
|
204
|
+
return match;
|
|
205
|
+
});
|
|
206
|
+
const piiCount = breakdown.emails + breakdown.phones + breakdown.ssns +
|
|
207
|
+
breakdown.creditCards + breakdown.names;
|
|
208
|
+
return {
|
|
209
|
+
tokenized,
|
|
210
|
+
reverseMap: this.reverseMap,
|
|
211
|
+
piiCount,
|
|
212
|
+
piiBreakdown: breakdown,
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
/**
|
|
216
|
+
* Reverse tokenization to restore original PII
|
|
217
|
+
*
|
|
218
|
+
* **COMPLIANCE WARNING:**
|
|
219
|
+
* - Only use for final file output (user-controlled)
|
|
220
|
+
* - NEVER store detokenized content in logs or databases
|
|
221
|
+
* - Call clear() immediately after use to minimize data retention
|
|
222
|
+
*
|
|
223
|
+
* @param tokenized - Content with PII tokens
|
|
224
|
+
* @param reverseMap - Tokenization map from tokenize() call
|
|
225
|
+
* @returns Original content with PII restored
|
|
226
|
+
*
|
|
227
|
+
* @example
|
|
228
|
+
* ```typescript
|
|
229
|
+
* const tokenizer = new PIITokenizer();
|
|
230
|
+
* const { tokenized, reverseMap } = tokenizer.tokenize('Email: john@example.com');
|
|
231
|
+
*
|
|
232
|
+
* // Store tokenized version (GDPR compliant)
|
|
233
|
+
* await db.storeTest({ code: tokenized });
|
|
234
|
+
*
|
|
235
|
+
* // Restore for file output
|
|
236
|
+
* const finalCode = tokenizer.detokenize(tokenized, reverseMap);
|
|
237
|
+
* await fs.writeFile('test.ts', finalCode);
|
|
238
|
+
*
|
|
239
|
+
* // IMPORTANT: Clear reverse map
|
|
240
|
+
* tokenizer.clear();
|
|
241
|
+
* ```
|
|
242
|
+
*/
|
|
243
|
+
detokenize(tokenized, reverseMap) {
|
|
244
|
+
let detokenized = tokenized;
|
|
245
|
+
// Restore all PII types
|
|
246
|
+
for (const [type, map] of Object.entries(reverseMap)) {
|
|
247
|
+
for (const [token, original] of map.entries()) {
|
|
248
|
+
// Use split/join instead of replaceAll for ES2020 compatibility
|
|
249
|
+
detokenized = detokenized.split(token).join(original);
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
return detokenized;
|
|
253
|
+
}
|
|
254
|
+
/**
|
|
255
|
+
* Get PII statistics for audit trail
|
|
256
|
+
*
|
|
257
|
+
* **COMPLIANCE USE:**
|
|
258
|
+
* - Generate audit logs showing PII detection
|
|
259
|
+
* - Monitor for unexpected PII in generated content
|
|
260
|
+
* - Track compliance metrics over time
|
|
261
|
+
*
|
|
262
|
+
* @returns Breakdown of detected PII by type
|
|
263
|
+
*
|
|
264
|
+
* @example
|
|
265
|
+
* ```typescript
|
|
266
|
+
* const tokenizer = new PIITokenizer();
|
|
267
|
+
* tokenizer.tokenize('Email: john@example.com, Phone: 555-123-4567');
|
|
268
|
+
*
|
|
269
|
+
* const stats = tokenizer.getStats();
|
|
270
|
+
* console.log(stats);
|
|
271
|
+
* // { emails: 1, phones: 1, ssns: 0, creditCards: 0, names: 0, total: 2 }
|
|
272
|
+
*
|
|
273
|
+
* // Log for audit trail
|
|
274
|
+
* logger.info('PII detected in generated content', stats);
|
|
275
|
+
* ```
|
|
276
|
+
*/
|
|
277
|
+
getStats() {
|
|
278
|
+
return {
|
|
279
|
+
emails: this.reverseMap.email.size,
|
|
280
|
+
phones: this.reverseMap.phone.size,
|
|
281
|
+
ssns: this.reverseMap.ssn.size,
|
|
282
|
+
creditCards: this.reverseMap.creditCard.size,
|
|
283
|
+
names: this.reverseMap.name.size,
|
|
284
|
+
total: this.reverseMap.email.size +
|
|
285
|
+
this.reverseMap.phone.size +
|
|
286
|
+
this.reverseMap.ssn.size +
|
|
287
|
+
this.reverseMap.creditCard.size +
|
|
288
|
+
this.reverseMap.name.size,
|
|
289
|
+
};
|
|
290
|
+
}
|
|
291
|
+
/**
|
|
292
|
+
* Clear reverse map to minimize data retention
|
|
293
|
+
*
|
|
294
|
+
* **COMPLIANCE REQUIREMENT:**
|
|
295
|
+
* - GDPR Article 5(1)(e) - Storage limitation principle
|
|
296
|
+
* - CCPA Section 1798.105 - Right to deletion
|
|
297
|
+
*
|
|
298
|
+
* MUST be called after detokenization to prevent storing PII longer than necessary.
|
|
299
|
+
*
|
|
300
|
+
* @example
|
|
301
|
+
* ```typescript
|
|
302
|
+
* const tokenizer = new PIITokenizer();
|
|
303
|
+
* const { tokenized, reverseMap } = tokenizer.tokenize(content);
|
|
304
|
+
*
|
|
305
|
+
* // Use tokenized content
|
|
306
|
+
* await processWithLLM(tokenized);
|
|
307
|
+
*
|
|
308
|
+
* // Detokenize for output
|
|
309
|
+
* const finalCode = tokenizer.detokenize(tokenized, reverseMap);
|
|
310
|
+
* await fs.writeFile('output.ts', finalCode);
|
|
311
|
+
*
|
|
312
|
+
* // CRITICAL: Clear reverse map
|
|
313
|
+
* tokenizer.clear(); // GDPR compliance
|
|
314
|
+
* ```
|
|
315
|
+
*/
|
|
316
|
+
clear() {
|
|
317
|
+
this.reverseMap.email.clear();
|
|
318
|
+
this.reverseMap.phone.clear();
|
|
319
|
+
this.reverseMap.ssn.clear();
|
|
320
|
+
this.reverseMap.creditCard.clear();
|
|
321
|
+
this.reverseMap.name.clear();
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
exports.PIITokenizer = PIITokenizer;
|
|
325
|
+
//# sourceMappingURL=pii-tokenization.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pii-tokenization.js","sourceRoot":"","sources":["../../src/security/pii-tokenization.ts"],"names":[],"mappings":";AAAA;;;;;;;;;GASG;;;AAgDH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2CG;AACH,MAAa,YAAY;IAAzB;QACE;;;;;WAKG;QACK,eAAU,GAAoB;YACpC,KAAK,EAAE,IAAI,GAAG,EAAE;YAChB,KAAK,EAAE,IAAI,GAAG,EAAE;YAChB,GAAG,EAAE,IAAI,GAAG,EAAE;YACd,UAAU,EAAE,IAAI,GAAG,EAAE;YACrB,IAAI,EAAE,IAAI,GAAG,EAAE;SAChB,CAAC;QAEF;;;;WAIG;QACc,aAAQ,GAAG;YAC1B;;;;;eAKG;YACH,KAAK,EAAE,6CAA6C;YAEpD;;;;;;;;eAQG;YACH,KAAK,EAAE,0EAA0E;YAEjF;;;;;eAKG;YACH,GAAG,EAAE,wBAAwB;YAE7B;;;;;eAKG;YACH,UAAU,EAAE,8BAA8B;YAE1C;;;;;;;;eAQG;YACH,IAAI,EAAE,oCAAoC;SAC3C,CAAC;IAyNJ,CAAC;IAvNC;;;;;;;;;;;;;;;;;;;;;;;;;;;;OA4BG;IACH,QAAQ,CAAC,OAAe;QACtB,IAAI,SAAS,GAAG,OAAO,CAAC;QACxB,MAAM,SAAS,GAAG;YAChB,MAAM,EAAE,CAAC;YACT,MAAM,EAAE,CAAC;YACT,IAAI,EAAE,CAAC;YACP,WAAW,EAAE,CAAC;YACd,KAAK,EAAE,CAAC;SACT,CAAC;QAEF,6EAA6E;QAC7E,sEAAsE;QACtE,gDAAgD;QAChD,4EAA4E;QAC5E,mBAAmB;QACnB,2DAA2D;QAE3D,iEAAiE;QACjE,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,UAAU,EAAE,CAAC,EAAE,EAAE,EAAE;YAC7D,MAAM,KAAK,GAAG,OAAO,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,IAAI,GAAG,CAAC;YACxD,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,GAAG,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;YAC1C,SAAS,CAAC,WAAW,EAAE,CAAC;YACxB,OAAO,KAAK,CAAC;QACf,CAAC,CAAC,CAAC;QAEH,gBAAgB;QAChB,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE,CAAC,GAAG,EAAE,EAAE;YACvD,MAAM,KAAK,GAAG,QAAQ,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC;YAClD,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;YACpC,SAAS,CAAC,IAAI,EAAE,CAAC;YACjB,OAAO,KAAK,CAAC;QACf,CAAC,CAAC,CAAC;QAEH,0DAA0D;QAC1D,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,CAAC,KAAK,EAAE,EAAE;YAC3D,MAAM,KAAK,GAAG,UAAU,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC;YACtD,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;YACxC,SAAS,CAAC,MAAM,EAAE,CAAC;YACnB,OAAO,KAAK,CAAC;QACf,CAAC,CAAC,CAAC;QAEH,kBAAkB;QAClB,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,CAAC,KAAK,EAAE,EAAE;YAC3D,MAAM,KAAK,GAAG,UAAU,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC;YACtD,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;YACxC,SAAS,CAAC,MAAM,EAAE,CAAC;YACnB,OAAO,KAAK,CAAC;QACf,CAAC,CAAC,CAAC;QAEH,gEAAgE;QAChE,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,KAAK,EAAE,KAAa,EAAE,IAAY,EAAE,EAAE;YACvF,2DAA2D;YAC3D,2EAA2E;YAC3E,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACxC,MAAM,KAAK,GAAG,SAAS,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC;gBACpD,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;gBACvC,SAAS,CAAC,KAAK,EAAE,CAAC;gBAClB,OAAO,KAAK,CAAC;YACf,CAAC;YACD,OAAO,KAAK,CAAC;QACf,CAAC,CAAC,CAAC;QAEH,MAAM,QAAQ,GAAG,SAAS,CAAC,MAAM,GAAG,SAAS,CAAC,MAAM,GAAG,SAAS,CAAC,IAAI;YACpD,SAAS,CAAC,WAAW,GAAG,SAAS,CAAC,KAAK,CAAC;QAEzD,OAAO;YACL,SAAS;YACT,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,QAAQ;YACR,YAAY,EAAE,SAAS;SACxB,CAAC;IACJ,CAAC;IAED;;;;;;;;;;;;;;;;;;;;;;;;;;;OA2BG;IACH,UAAU,CAAC,SAAiB,EAAE,UAA2B;QACvD,IAAI,WAAW,GAAG,SAAS,CAAC;QAE5B,wBAAwB;QACxB,KAAK,MAAM,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;YACrD,KAAK,MAAM,CAAC,KAAK,EAAE,QAAQ,CAAC,IAAI,GAAG,CAAC,OAAO,EAAE,EAAE,CAAC;gBAC9C,gEAAgE;gBAChE,WAAW,GAAG,WAAW,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YACxD,CAAC;QACH,CAAC;QAED,OAAO,WAAW,CAAC;IACrB,CAAC;IAED;;;;;;;;;;;;;;;;;;;;;;OAsBG;IACH,QAAQ;QACN,OAAO;YACL,MAAM,EAAE,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI;YAClC,MAAM,EAAE,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI;YAClC,IAAI,EAAE,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI;YAC9B,WAAW,EAAE,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,IAAI;YAC5C,KAAK,EAAE,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI;YAChC,KAAK,EACH,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI;gBAC1B,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI;gBAC1B,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI;gBACxB,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,IAAI;gBAC/B,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI;SAC5B,CAAC;IACJ,CAAC;IAED;;;;;;;;;;;;;;;;;;;;;;;;OAwBG;IACH,KAAK;QACH,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;QAC9B,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;QAC9B,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC;QAC5B,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC;QACnC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;IAC/B,CAAC;CACF;AA3RD,oCA2RC"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* EmbeddingGenerator - Consolidated utility for generating embeddings
|
|
3
|
+
*
|
|
4
|
+
* Provides a single source of truth for embedding generation across the codebase.
|
|
5
|
+
* In production, replace with actual embedding model (OpenAI, Cohere, local BERT).
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Generate a simple hash-based embedding from text
|
|
9
|
+
* This is a placeholder implementation for development/testing.
|
|
10
|
+
*
|
|
11
|
+
* In production, replace with:
|
|
12
|
+
* - OpenAI embeddings API (text-embedding-ada-002)
|
|
13
|
+
* - Cohere embeddings
|
|
14
|
+
* - Local transformer models (sentence-transformers)
|
|
15
|
+
* - Custom trained embeddings
|
|
16
|
+
*
|
|
17
|
+
* @param text Text to embed
|
|
18
|
+
* @param dimensions Embedding dimension (default: 384, common for sentence transformers)
|
|
19
|
+
* @returns Normalized embedding vector
|
|
20
|
+
*/
|
|
21
|
+
export declare function generateEmbedding(text: string, dimensions?: number): number[];
|
|
22
|
+
/**
|
|
23
|
+
* Check if the current configuration uses a real embedding model
|
|
24
|
+
* (vs. the placeholder hash-based implementation)
|
|
25
|
+
*
|
|
26
|
+
* @returns true if using a real embedding model, false if using placeholder
|
|
27
|
+
*/
|
|
28
|
+
export declare function isRealEmbeddingModel(): boolean;
|
|
29
|
+
/**
|
|
30
|
+
* Get the type of embedding model being used
|
|
31
|
+
*
|
|
32
|
+
* @returns Model type string for logging
|
|
33
|
+
*/
|
|
34
|
+
export declare function getEmbeddingModelType(): string;
|
|
35
|
+
//# sourceMappingURL=EmbeddingGenerator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"EmbeddingGenerator.d.ts","sourceRoot":"","sources":["../../src/utils/EmbeddingGenerator.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH;;;;;;;;;;;;;GAaG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,EAAE,UAAU,GAAE,MAAY,GAAG,MAAM,EAAE,CAmBlF;AAED;;;;;GAKG;AACH,wBAAgB,oBAAoB,IAAI,OAAO,CAO9C;AAED;;;;GAIG;AACH,wBAAgB,qBAAqB,IAAI,MAAM,CAW9C"}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* EmbeddingGenerator - Consolidated utility for generating embeddings
|
|
4
|
+
*
|
|
5
|
+
* Provides a single source of truth for embedding generation across the codebase.
|
|
6
|
+
* In production, replace with actual embedding model (OpenAI, Cohere, local BERT).
|
|
7
|
+
*/
|
|
8
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
9
|
+
exports.generateEmbedding = generateEmbedding;
|
|
10
|
+
exports.isRealEmbeddingModel = isRealEmbeddingModel;
|
|
11
|
+
exports.getEmbeddingModelType = getEmbeddingModelType;
|
|
12
|
+
/**
|
|
13
|
+
* Generate a simple hash-based embedding from text
|
|
14
|
+
* This is a placeholder implementation for development/testing.
|
|
15
|
+
*
|
|
16
|
+
* In production, replace with:
|
|
17
|
+
* - OpenAI embeddings API (text-embedding-ada-002)
|
|
18
|
+
* - Cohere embeddings
|
|
19
|
+
* - Local transformer models (sentence-transformers)
|
|
20
|
+
* - Custom trained embeddings
|
|
21
|
+
*
|
|
22
|
+
* @param text Text to embed
|
|
23
|
+
* @param dimensions Embedding dimension (default: 384, common for sentence transformers)
|
|
24
|
+
* @returns Normalized embedding vector
|
|
25
|
+
*/
|
|
26
|
+
function generateEmbedding(text, dimensions = 384) {
|
|
27
|
+
const embedding = new Array(dimensions).fill(0);
|
|
28
|
+
// Simple hash-based embedding (for demonstration only)
|
|
29
|
+
for (let i = 0; i < text.length; i++) {
|
|
30
|
+
const charCode = text.charCodeAt(i);
|
|
31
|
+
const index = (charCode * (i + 1)) % dimensions;
|
|
32
|
+
embedding[index] += Math.sin(charCode * 0.1) * 0.1;
|
|
33
|
+
}
|
|
34
|
+
// Normalize to unit vector
|
|
35
|
+
const magnitude = Math.sqrt(embedding.reduce((sum, val) => sum + val * val, 0));
|
|
36
|
+
if (magnitude > 0) {
|
|
37
|
+
for (let i = 0; i < dimensions; i++) {
|
|
38
|
+
embedding[i] /= magnitude;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return embedding;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Check if the current configuration uses a real embedding model
|
|
45
|
+
* (vs. the placeholder hash-based implementation)
|
|
46
|
+
*
|
|
47
|
+
* @returns true if using a real embedding model, false if using placeholder
|
|
48
|
+
*/
|
|
49
|
+
function isRealEmbeddingModel() {
|
|
50
|
+
// Check environment variables or configuration
|
|
51
|
+
return !!(process.env.OPENAI_API_KEY ||
|
|
52
|
+
process.env.COHERE_API_KEY ||
|
|
53
|
+
process.env.EMBEDDING_MODEL_PATH);
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Get the type of embedding model being used
|
|
57
|
+
*
|
|
58
|
+
* @returns Model type string for logging
|
|
59
|
+
*/
|
|
60
|
+
function getEmbeddingModelType() {
|
|
61
|
+
if (process.env.OPENAI_API_KEY) {
|
|
62
|
+
return 'OpenAI (text-embedding-ada-002)';
|
|
63
|
+
}
|
|
64
|
+
if (process.env.COHERE_API_KEY) {
|
|
65
|
+
return 'Cohere';
|
|
66
|
+
}
|
|
67
|
+
if (process.env.EMBEDDING_MODEL_PATH) {
|
|
68
|
+
return `Local model (${process.env.EMBEDDING_MODEL_PATH})`;
|
|
69
|
+
}
|
|
70
|
+
return 'Hash-based placeholder (NOT FOR PRODUCTION)';
|
|
71
|
+
}
|
|
72
|
+
//# sourceMappingURL=EmbeddingGenerator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"EmbeddingGenerator.js","sourceRoot":"","sources":["../../src/utils/EmbeddingGenerator.ts"],"names":[],"mappings":";AAAA;;;;;GAKG;;AAgBH,8CAmBC;AAQD,oDAOC;AAOD,sDAWC;AAlED;;;;;;;;;;;;;GAaG;AACH,SAAgB,iBAAiB,CAAC,IAAY,EAAE,aAAqB,GAAG;IACtE,MAAM,SAAS,GAAG,IAAI,KAAK,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAEhD,uDAAuD;IACvD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QACpC,MAAM,KAAK,GAAG,CAAC,QAAQ,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,UAAU,CAAC;QAChD,SAAS,CAAC,KAAK,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,QAAQ,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC;IACrD,CAAC;IAED,2BAA2B;IAC3B,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,GAAG,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;IAChF,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;QAClB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;YACpC,SAAS,CAAC,CAAC,CAAC,IAAI,SAAS,CAAC;QAC5B,CAAC;IACH,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;;;;GAKG;AACH,SAAgB,oBAAoB;IAClC,+CAA+C;IAC/C,OAAO,CAAC,CAAC,CACP,OAAO,CAAC,GAAG,CAAC,cAAc;QAC1B,OAAO,CAAC,GAAG,CAAC,cAAc;QAC1B,OAAO,CAAC,GAAG,CAAC,oBAAoB,CACjC,CAAC;AACJ,CAAC;AAED;;;;GAIG;AACH,SAAgB,qBAAqB;IACnC,IAAI,OAAO,CAAC,GAAG,CAAC,cAAc,EAAE,CAAC;QAC/B,OAAO,iCAAiC,CAAC;IAC3C,CAAC;IACD,IAAI,OAAO,CAAC,GAAG,CAAC,cAAc,EAAE,CAAC;QAC/B,OAAO,QAAQ,CAAC;IAClB,CAAC;IACD,IAAI,OAAO,CAAC,GAAG,CAAC,oBAAoB,EAAE,CAAC;QACrC,OAAO,gBAAgB,OAAO,CAAC,GAAG,CAAC,oBAAoB,GAAG,CAAC;IAC7D,CAAC;IACD,OAAO,6CAA6C,CAAC;AACvD,CAAC"}
|