agentic-qe 1.7.0 → 1.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. package/.claude/skills/sherlock-review/SKILL.md +786 -0
  2. package/CHANGELOG.md +625 -0
  3. package/README.md +42 -55
  4. package/dist/agents/BaseAgent.d.ts +10 -10
  5. package/dist/agents/BaseAgent.d.ts.map +1 -1
  6. package/dist/agents/BaseAgent.js +96 -78
  7. package/dist/agents/BaseAgent.js.map +1 -1
  8. package/dist/agents/CoverageAnalyzerAgent.js +2 -2
  9. package/dist/agents/CoverageAnalyzerAgent.js.map +1 -1
  10. package/dist/agents/LearningAgent.d.ts +2 -2
  11. package/dist/agents/LearningAgent.d.ts.map +1 -1
  12. package/dist/agents/LearningAgent.js +4 -4
  13. package/dist/agents/LearningAgent.js.map +1 -1
  14. package/dist/agents/TestExecutorAgent.d.ts +41 -2
  15. package/dist/agents/TestExecutorAgent.d.ts.map +1 -1
  16. package/dist/agents/TestExecutorAgent.js +314 -64
  17. package/dist/agents/TestExecutorAgent.js.map +1 -1
  18. package/dist/agents/examples/batchAnalyze.d.ts +252 -0
  19. package/dist/agents/examples/batchAnalyze.d.ts.map +1 -0
  20. package/dist/agents/examples/batchAnalyze.js +259 -0
  21. package/dist/agents/examples/batchAnalyze.js.map +1 -0
  22. package/dist/agents/examples/batchGenerate.d.ts +153 -0
  23. package/dist/agents/examples/batchGenerate.d.ts.map +1 -0
  24. package/dist/agents/examples/batchGenerate.js +166 -0
  25. package/dist/agents/examples/batchGenerate.js.map +1 -0
  26. package/dist/agents/generateWithPII.d.ts +128 -0
  27. package/dist/agents/generateWithPII.d.ts.map +1 -0
  28. package/dist/agents/generateWithPII.js +175 -0
  29. package/dist/agents/generateWithPII.js.map +1 -0
  30. package/dist/agents/lifecycle/AgentLifecycleManager.d.ts +1 -6
  31. package/dist/agents/lifecycle/AgentLifecycleManager.d.ts.map +1 -1
  32. package/dist/agents/lifecycle/AgentLifecycleManager.js +0 -7
  33. package/dist/agents/lifecycle/AgentLifecycleManager.js.map +1 -1
  34. package/dist/cli/commands/init.d.ts +6 -3
  35. package/dist/cli/commands/init.d.ts.map +1 -1
  36. package/dist/cli/commands/init.js +51 -46
  37. package/dist/cli/commands/init.js.map +1 -1
  38. package/dist/cli/commands/learn/index.d.ts +4 -0
  39. package/dist/cli/commands/learn/index.d.ts.map +1 -1
  40. package/dist/cli/commands/learn/index.js +57 -0
  41. package/dist/cli/commands/learn/index.js.map +1 -1
  42. package/dist/cli/index.js +14 -0
  43. package/dist/cli/index.js.map +1 -1
  44. package/dist/core/memory/AdapterConfig.d.ts +108 -0
  45. package/dist/core/memory/AdapterConfig.d.ts.map +1 -0
  46. package/dist/core/memory/AdapterConfig.js +189 -0
  47. package/dist/core/memory/AdapterConfig.js.map +1 -0
  48. package/dist/core/memory/AdapterFactory.d.ts +72 -0
  49. package/dist/core/memory/AdapterFactory.d.ts.map +1 -0
  50. package/dist/core/memory/AdapterFactory.js +152 -0
  51. package/dist/core/memory/AdapterFactory.js.map +1 -0
  52. package/dist/core/memory/AgentDBManager.d.ts +28 -5
  53. package/dist/core/memory/AgentDBManager.d.ts.map +1 -1
  54. package/dist/core/memory/AgentDBManager.js +99 -73
  55. package/dist/core/memory/AgentDBManager.js.map +1 -1
  56. package/dist/core/memory/PatternCache.d.ts +105 -0
  57. package/dist/core/memory/PatternCache.d.ts.map +1 -0
  58. package/dist/core/memory/PatternCache.js +183 -0
  59. package/dist/core/memory/PatternCache.js.map +1 -0
  60. package/dist/core/memory/RealAgentDBAdapter.d.ts +14 -0
  61. package/dist/core/memory/RealAgentDBAdapter.d.ts.map +1 -1
  62. package/dist/core/memory/RealAgentDBAdapter.js +153 -16
  63. package/dist/core/memory/RealAgentDBAdapter.js.map +1 -1
  64. package/dist/core/memory/ReasoningBankAdapter.d.ts +4 -0
  65. package/dist/core/memory/ReasoningBankAdapter.d.ts.map +1 -1
  66. package/dist/core/memory/ReasoningBankAdapter.js +20 -0
  67. package/dist/core/memory/ReasoningBankAdapter.js.map +1 -1
  68. package/dist/core/memory/SwarmMemoryManager.d.ts +8 -0
  69. package/dist/core/memory/SwarmMemoryManager.d.ts.map +1 -1
  70. package/dist/core/memory/SwarmMemoryManager.js +33 -0
  71. package/dist/core/memory/SwarmMemoryManager.js.map +1 -1
  72. package/dist/core/memory/index.d.ts +6 -0
  73. package/dist/core/memory/index.d.ts.map +1 -1
  74. package/dist/core/memory/index.js +12 -1
  75. package/dist/core/memory/index.js.map +1 -1
  76. package/dist/core/neural/NeuralTrainer.d.ts +2 -6
  77. package/dist/core/neural/NeuralTrainer.d.ts.map +1 -1
  78. package/dist/core/neural/NeuralTrainer.js +7 -25
  79. package/dist/core/neural/NeuralTrainer.js.map +1 -1
  80. package/dist/learning/ImprovementLoop.js +2 -2
  81. package/dist/learning/ImprovementLoop.js.map +1 -1
  82. package/dist/learning/LearningEngine.d.ts +11 -7
  83. package/dist/learning/LearningEngine.d.ts.map +1 -1
  84. package/dist/learning/LearningEngine.js +156 -72
  85. package/dist/learning/LearningEngine.js.map +1 -1
  86. package/dist/mcp/handlers/filtered/coverage-analyzer-filtered.d.ts +83 -0
  87. package/dist/mcp/handlers/filtered/coverage-analyzer-filtered.d.ts.map +1 -0
  88. package/dist/mcp/handlers/filtered/coverage-analyzer-filtered.js +130 -0
  89. package/dist/mcp/handlers/filtered/coverage-analyzer-filtered.js.map +1 -0
  90. package/dist/mcp/handlers/filtered/flaky-detector-filtered.d.ts +58 -0
  91. package/dist/mcp/handlers/filtered/flaky-detector-filtered.d.ts.map +1 -0
  92. package/dist/mcp/handlers/filtered/flaky-detector-filtered.js +84 -0
  93. package/dist/mcp/handlers/filtered/flaky-detector-filtered.js.map +1 -0
  94. package/dist/mcp/handlers/filtered/index.d.ts +47 -0
  95. package/dist/mcp/handlers/filtered/index.d.ts.map +1 -0
  96. package/dist/mcp/handlers/filtered/index.js +63 -0
  97. package/dist/mcp/handlers/filtered/index.js.map +1 -0
  98. package/dist/mcp/handlers/filtered/performance-tester-filtered.d.ts +57 -0
  99. package/dist/mcp/handlers/filtered/performance-tester-filtered.d.ts.map +1 -0
  100. package/dist/mcp/handlers/filtered/performance-tester-filtered.js +83 -0
  101. package/dist/mcp/handlers/filtered/performance-tester-filtered.js.map +1 -0
  102. package/dist/mcp/handlers/filtered/quality-assessor-filtered.d.ts +57 -0
  103. package/dist/mcp/handlers/filtered/quality-assessor-filtered.d.ts.map +1 -0
  104. package/dist/mcp/handlers/filtered/quality-assessor-filtered.js +93 -0
  105. package/dist/mcp/handlers/filtered/quality-assessor-filtered.js.map +1 -0
  106. package/dist/mcp/handlers/filtered/security-scanner-filtered.d.ts +54 -0
  107. package/dist/mcp/handlers/filtered/security-scanner-filtered.d.ts.map +1 -0
  108. package/dist/mcp/handlers/filtered/security-scanner-filtered.js +73 -0
  109. package/dist/mcp/handlers/filtered/security-scanner-filtered.js.map +1 -0
  110. package/dist/mcp/handlers/filtered/test-executor-filtered.d.ts +61 -0
  111. package/dist/mcp/handlers/filtered/test-executor-filtered.d.ts.map +1 -0
  112. package/dist/mcp/handlers/filtered/test-executor-filtered.js +117 -0
  113. package/dist/mcp/handlers/filtered/test-executor-filtered.js.map +1 -0
  114. package/dist/mcp/handlers/phase2/Phase2Tools.js +2 -2
  115. package/dist/mcp/handlers/phase2/Phase2Tools.js.map +1 -1
  116. package/dist/scripts/backup-helper.d.ts +64 -0
  117. package/dist/scripts/backup-helper.d.ts.map +1 -0
  118. package/dist/scripts/backup-helper.js +251 -0
  119. package/dist/scripts/backup-helper.js.map +1 -0
  120. package/dist/scripts/migrate-with-backup.d.ts +15 -0
  121. package/dist/scripts/migrate-with-backup.d.ts.map +1 -0
  122. package/dist/scripts/migrate-with-backup.js +194 -0
  123. package/dist/scripts/migrate-with-backup.js.map +1 -0
  124. package/dist/security/pii-tokenization.d.ts +216 -0
  125. package/dist/security/pii-tokenization.d.ts.map +1 -0
  126. package/dist/security/pii-tokenization.js +325 -0
  127. package/dist/security/pii-tokenization.js.map +1 -0
  128. package/dist/utils/EmbeddingGenerator.d.ts +35 -0
  129. package/dist/utils/EmbeddingGenerator.d.ts.map +1 -0
  130. package/dist/utils/EmbeddingGenerator.js +72 -0
  131. package/dist/utils/EmbeddingGenerator.js.map +1 -0
  132. package/dist/utils/batch-operations.d.ts +215 -0
  133. package/dist/utils/batch-operations.d.ts.map +1 -0
  134. package/dist/utils/batch-operations.js +266 -0
  135. package/dist/utils/batch-operations.js.map +1 -0
  136. package/dist/utils/filtering.d.ts +180 -0
  137. package/dist/utils/filtering.d.ts.map +1 -0
  138. package/dist/utils/filtering.js +288 -0
  139. package/dist/utils/filtering.js.map +1 -0
  140. package/dist/utils/prompt-cache-examples.d.ts +111 -0
  141. package/dist/utils/prompt-cache-examples.d.ts.map +1 -0
  142. package/dist/utils/prompt-cache-examples.js +416 -0
  143. package/dist/utils/prompt-cache-examples.js.map +1 -0
  144. package/dist/utils/prompt-cache.d.ts +305 -0
  145. package/dist/utils/prompt-cache.d.ts.map +1 -0
  146. package/dist/utils/prompt-cache.js +448 -0
  147. package/dist/utils/prompt-cache.js.map +1 -0
  148. package/package.json +7 -16
  149. package/dist/mcp/tools/deprecated.d.ts +0 -1390
  150. package/dist/mcp/tools/deprecated.d.ts.map +0 -1
  151. package/dist/mcp/tools/deprecated.js +0 -859
  152. package/dist/mcp/tools/deprecated.js.map +0 -1
@@ -0,0 +1,216 @@
1
+ /**
2
+ * PII Tokenization Layer
3
+ *
4
+ * Provides bidirectional tokenization/detokenization of Personally Identifiable Information (PII)
5
+ * for GDPR and CCPA compliance. Supports email, phone, SSN, credit card, and name detection.
6
+ *
7
+ * @module security/pii-tokenization
8
+ * @compliance GDPR Article 25 (Data Protection by Design), CCPA Section 1798.100
9
+ * @see docs/planning/mcp-improvement-plan-revised.md#CO-2
10
+ */
11
+ /**
12
+ * Bidirectional mapping for tokenized PII values
13
+ *
14
+ * @compliance GDPR Article 32 - Stores original values temporarily for detokenization,
15
+ * must be cleared after use to prevent data retention issues
16
+ */
17
+ export interface TokenizationMap {
18
+ /** Email addresses (RFC 5322 compliant) */
19
+ email: Map<string, string>;
20
+ /** Phone numbers (US E.164 format) */
21
+ phone: Map<string, string>;
22
+ /** Social Security Numbers (US format: XXX-XX-XXXX) */
23
+ ssn: Map<string, string>;
24
+ /** Credit card numbers (Luhn algorithm validation recommended) */
25
+ creditCard: Map<string, string>;
26
+ /** Personal names (First Last pattern, basic heuristic) */
27
+ name: Map<string, string>;
28
+ }
29
+ /**
30
+ * Result of tokenization operation with statistics
31
+ */
32
+ export interface TokenizationResult {
33
+ /** Content with PII replaced by tokens */
34
+ tokenized: string;
35
+ /** Reverse mapping for detokenization (MUST be cleared after use) */
36
+ reverseMap: TokenizationMap;
37
+ /** Total count of PII instances found */
38
+ piiCount: number;
39
+ /** Breakdown by PII type for audit trail */
40
+ piiBreakdown: {
41
+ emails: number;
42
+ phones: number;
43
+ ssns: number;
44
+ creditCards: number;
45
+ names: number;
46
+ };
47
+ }
48
+ /**
49
+ * PIITokenizer - Secure PII detection and tokenization
50
+ *
51
+ * **IMPORTANT COMPLIANCE NOTES:**
52
+ *
53
+ * 1. **GDPR Article 25 (Data Protection by Design)**:
54
+ * - Tokenize PII BEFORE sending to LLM or storing in logs
55
+ * - Clear reverse map after detokenization to minimize data retention
56
+ *
57
+ * 2. **CCPA Section 1798.100 (Consumer Rights)**:
58
+ * - No PII sent to third-party systems (Anthropic API)
59
+ * - Tokenized version stored in databases/logs
60
+ * - Original PII only in final output files (user-controlled)
61
+ *
62
+ * 3. **PCI-DSS Requirement 3.4**:
63
+ * - Credit card numbers masked/tokenized in all non-production systems
64
+ * - No clear-text credit cards in logs or analytics
65
+ *
66
+ * 4. **HIPAA Privacy Rule** (if applicable):
67
+ * - SSN and name combinations constitute PHI
68
+ * - Must be de-identified before processing
69
+ *
70
+ * @example
71
+ * ```typescript
72
+ * const tokenizer = new PIITokenizer();
73
+ *
74
+ * // Tokenize test data
75
+ * const testCode = 'const email = "john.doe@example.com"; const ssn = "123-45-6789";';
76
+ * const { tokenized, reverseMap, piiCount } = tokenizer.tokenize(testCode);
77
+ *
78
+ * console.log(tokenized);
79
+ * // Output: 'const email = "[EMAIL_0]"; const ssn = "[SSN_0]";'
80
+ *
81
+ * // Store tokenized version in database (GDPR compliant)
82
+ * await db.storeTest({ code: tokenized });
83
+ *
84
+ * // Detokenize for file output (user-controlled)
85
+ * const finalCode = tokenizer.detokenize(tokenized, reverseMap);
86
+ * await fs.writeFile('test.ts', finalCode);
87
+ *
88
+ * // IMPORTANT: Clear reverse map after use
89
+ * tokenizer.clear();
90
+ * ```
91
+ */
92
+ export declare class PIITokenizer {
93
+ /**
94
+ * Reverse mapping for detokenization
95
+ *
96
+ * @private
97
+ * @compliance GDPR Article 32 - Must be cleared after use to prevent data retention
98
+ */
99
+ private reverseMap;
100
+ /**
101
+ * Regular expression patterns for PII detection
102
+ *
103
+ * @private
104
+ */
105
+ private readonly patterns;
106
+ /**
107
+ * Tokenize PII in test code, data files, or generated content
108
+ *
109
+ * **COMPLIANCE WORKFLOW:**
110
+ * 1. Detect PII using regex patterns
111
+ * 2. Replace with tokens ([EMAIL_0], [PHONE_1], etc.)
112
+ * 3. Store reverse map for detokenization
113
+ * 4. Return tokenized content safe for LLM processing
114
+ *
115
+ * @param content - Raw content that may contain PII
116
+ * @returns Tokenization result with statistics and reverse map
117
+ *
118
+ * @example
119
+ * ```typescript
120
+ * const tokenizer = new PIITokenizer();
121
+ * const result = tokenizer.tokenize(`
122
+ * const user = {
123
+ * name: "John Doe",
124
+ * email: "john.doe@example.com",
125
+ * phone: "+1-555-123-4567",
126
+ * ssn: "123-45-6789"
127
+ * };
128
+ * `);
129
+ *
130
+ * console.log(result.piiCount); // 4
131
+ * console.log(result.piiBreakdown);
132
+ * // { emails: 1, phones: 1, ssns: 1, creditCards: 0, names: 1 }
133
+ * ```
134
+ */
135
+ tokenize(content: string): TokenizationResult;
136
+ /**
137
+ * Reverse tokenization to restore original PII
138
+ *
139
+ * **COMPLIANCE WARNING:**
140
+ * - Only use for final file output (user-controlled)
141
+ * - NEVER store detokenized content in logs or databases
142
+ * - Call clear() immediately after use to minimize data retention
143
+ *
144
+ * @param tokenized - Content with PII tokens
145
+ * @param reverseMap - Tokenization map from tokenize() call
146
+ * @returns Original content with PII restored
147
+ *
148
+ * @example
149
+ * ```typescript
150
+ * const tokenizer = new PIITokenizer();
151
+ * const { tokenized, reverseMap } = tokenizer.tokenize('Email: john@example.com');
152
+ *
153
+ * // Store tokenized version (GDPR compliant)
154
+ * await db.storeTest({ code: tokenized });
155
+ *
156
+ * // Restore for file output
157
+ * const finalCode = tokenizer.detokenize(tokenized, reverseMap);
158
+ * await fs.writeFile('test.ts', finalCode);
159
+ *
160
+ * // IMPORTANT: Clear reverse map
161
+ * tokenizer.clear();
162
+ * ```
163
+ */
164
+ detokenize(tokenized: string, reverseMap: TokenizationMap): string;
165
+ /**
166
+ * Get PII statistics for audit trail
167
+ *
168
+ * **COMPLIANCE USE:**
169
+ * - Generate audit logs showing PII detection
170
+ * - Monitor for unexpected PII in generated content
171
+ * - Track compliance metrics over time
172
+ *
173
+ * @returns Breakdown of detected PII by type
174
+ *
175
+ * @example
176
+ * ```typescript
177
+ * const tokenizer = new PIITokenizer();
178
+ * tokenizer.tokenize('Email: john@example.com, Phone: 555-123-4567');
179
+ *
180
+ * const stats = tokenizer.getStats();
181
+ * console.log(stats);
182
+ * // { emails: 1, phones: 1, ssns: 0, creditCards: 0, names: 0, total: 2 }
183
+ *
184
+ * // Log for audit trail
185
+ * logger.info('PII detected in generated content', stats);
186
+ * ```
187
+ */
188
+ getStats(): Record<string, number>;
189
+ /**
190
+ * Clear reverse map to minimize data retention
191
+ *
192
+ * **COMPLIANCE REQUIREMENT:**
193
+ * - GDPR Article 5(1)(e) - Storage limitation principle
194
+ * - CCPA Section 1798.105 - Right to deletion
195
+ *
196
+ * MUST be called after detokenization to prevent storing PII longer than necessary.
197
+ *
198
+ * @example
199
+ * ```typescript
200
+ * const tokenizer = new PIITokenizer();
201
+ * const { tokenized, reverseMap } = tokenizer.tokenize(content);
202
+ *
203
+ * // Use tokenized content
204
+ * await processWithLLM(tokenized);
205
+ *
206
+ * // Detokenize for output
207
+ * const finalCode = tokenizer.detokenize(tokenized, reverseMap);
208
+ * await fs.writeFile('output.ts', finalCode);
209
+ *
210
+ * // CRITICAL: Clear reverse map
211
+ * tokenizer.clear(); // GDPR compliance
212
+ * ```
213
+ */
214
+ clear(): void;
215
+ }
216
+ //# sourceMappingURL=pii-tokenization.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pii-tokenization.d.ts","sourceRoot":"","sources":["../../src/security/pii-tokenization.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH;;;;;GAKG;AACH,MAAM,WAAW,eAAe;IAC9B,2CAA2C;IAC3C,KAAK,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAE3B,sCAAsC;IACtC,KAAK,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAE3B,uDAAuD;IACvD,GAAG,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAEzB,kEAAkE;IAClE,UAAU,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAEhC,2DAA2D;IAC3D,IAAI,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,0CAA0C;IAC1C,SAAS,EAAE,MAAM,CAAC;IAElB,qEAAqE;IACrE,UAAU,EAAE,eAAe,CAAC;IAE5B,yCAAyC;IACzC,QAAQ,EAAE,MAAM,CAAC;IAEjB,4CAA4C;IAC5C,YAAY,EAAE;QACZ,MAAM,EAAE,MAAM,CAAC;QACf,MAAM,EAAE,MAAM,CAAC;QACf,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,EAAE,MAAM,CAAC;QACpB,KAAK,EAAE,MAAM,CAAC;KACf,CAAC;CACH;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2CG;AACH,qBAAa,YAAY;IACvB;;;;;OAKG;IACH,OAAO,CAAC,UAAU,CAMhB;IAEF;;;;OAIG;IACH,OAAO,CAAC,QAAQ,CAAC,QAAQ,CA8CvB;IAEF;;;;;;;;;;;;;;;;;;;;;;;;;;;;OA4BG;IACH,QAAQ,CAAC,OAAO,EAAE,MAAM,GAAG,kBAAkB;IAyE7C;;;;;;;;;;;;;;;;;;;;;;;;;;;OA2BG;IACH,UAAU,CAAC,SAAS,EAAE,MAAM,EAAE,UAAU,EAAE,eAAe,GAAG,MAAM;IAclE;;;;;;;;;;;;;;;;;;;;;;OAsBG;IACH,QAAQ,IAAI,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC;IAgBlC;;;;;;;;;;;;;;;;;;;;;;;;OAwBG;IACH,KAAK,IAAI,IAAI;CAOd"}
@@ -0,0 +1,325 @@
1
+ "use strict";
2
+ /**
3
+ * PII Tokenization Layer
4
+ *
5
+ * Provides bidirectional tokenization/detokenization of Personally Identifiable Information (PII)
6
+ * for GDPR and CCPA compliance. Supports email, phone, SSN, credit card, and name detection.
7
+ *
8
+ * @module security/pii-tokenization
9
+ * @compliance GDPR Article 25 (Data Protection by Design), CCPA Section 1798.100
10
+ * @see docs/planning/mcp-improvement-plan-revised.md#CO-2
11
+ */
12
+ Object.defineProperty(exports, "__esModule", { value: true });
13
+ exports.PIITokenizer = void 0;
14
+ /**
15
+ * PIITokenizer - Secure PII detection and tokenization
16
+ *
17
+ * **IMPORTANT COMPLIANCE NOTES:**
18
+ *
19
+ * 1. **GDPR Article 25 (Data Protection by Design)**:
20
+ * - Tokenize PII BEFORE sending to LLM or storing in logs
21
+ * - Clear reverse map after detokenization to minimize data retention
22
+ *
23
+ * 2. **CCPA Section 1798.100 (Consumer Rights)**:
24
+ * - No PII sent to third-party systems (Anthropic API)
25
+ * - Tokenized version stored in databases/logs
26
+ * - Original PII only in final output files (user-controlled)
27
+ *
28
+ * 3. **PCI-DSS Requirement 3.4**:
29
+ * - Credit card numbers masked/tokenized in all non-production systems
30
+ * - No clear-text credit cards in logs or analytics
31
+ *
32
+ * 4. **HIPAA Privacy Rule** (if applicable):
33
+ * - SSN and name combinations constitute PHI
34
+ * - Must be de-identified before processing
35
+ *
36
+ * @example
37
+ * ```typescript
38
+ * const tokenizer = new PIITokenizer();
39
+ *
40
+ * // Tokenize test data
41
+ * const testCode = 'const email = "john.doe@example.com"; const ssn = "123-45-6789";';
42
+ * const { tokenized, reverseMap, piiCount } = tokenizer.tokenize(testCode);
43
+ *
44
+ * console.log(tokenized);
45
+ * // Output: 'const email = "[EMAIL_0]"; const ssn = "[SSN_0]";'
46
+ *
47
+ * // Store tokenized version in database (GDPR compliant)
48
+ * await db.storeTest({ code: tokenized });
49
+ *
50
+ * // Detokenize for file output (user-controlled)
51
+ * const finalCode = tokenizer.detokenize(tokenized, reverseMap);
52
+ * await fs.writeFile('test.ts', finalCode);
53
+ *
54
+ * // IMPORTANT: Clear reverse map after use
55
+ * tokenizer.clear();
56
+ * ```
57
+ */
58
+ class PIITokenizer {
59
+ constructor() {
60
+ /**
61
+ * Reverse mapping for detokenization
62
+ *
63
+ * @private
64
+ * @compliance GDPR Article 32 - Must be cleared after use to prevent data retention
65
+ */
66
+ this.reverseMap = {
67
+ email: new Map(),
68
+ phone: new Map(),
69
+ ssn: new Map(),
70
+ creditCard: new Map(),
71
+ name: new Map(),
72
+ };
73
+ /**
74
+ * Regular expression patterns for PII detection
75
+ *
76
+ * @private
77
+ */
78
+ this.patterns = {
79
+ /**
80
+ * Email pattern (RFC 5322 simplified)
81
+ * Matches: john.doe@example.com, user+tag@domain.co.uk
82
+ *
83
+ * @compliance GDPR Article 4(1) - Email is personal data
84
+ */
85
+ email: /\b[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,}\b/gi,
86
+ /**
87
+ * US Phone number pattern (E.164 and common formats)
88
+ * Matches: +1-555-123-4567, (555) 123-4567, 555.123.4567, 5551234567
89
+ *
90
+ * @compliance CCPA - Phone numbers are personal information
91
+ *
92
+ * Fixed: Removed \b (word boundary) which fails with parentheses
93
+ * Uses negative lookahead (?!\d) to prevent matching longer sequences
94
+ */
95
+ phone: /(?:\+1[-.]?)?[(]?([0-9]{3})[)]?[-.\s]?([0-9]{3})[-.\s]?([0-9]{4})(?!\d)/g,
96
+ /**
97
+ * US Social Security Number (XXX-XX-XXXX)
98
+ * Matches: 123-45-6789
99
+ *
100
+ * @compliance HIPAA Privacy Rule - SSN is Protected Health Information (PHI)
101
+ */
102
+ ssn: /\b\d{3}-\d{2}-\d{4}\b/g,
103
+ /**
104
+ * Credit Card Number (Visa, Mastercard, Amex, Discover)
105
+ * Matches: 1234-5678-9012-3456, 1234 5678 9012 3456, 1234567890123456
106
+ *
107
+ * @compliance PCI-DSS Requirement 3.4 - Must be masked/tokenized
108
+ */
109
+ creditCard: /\b(?:\d{4}[-\s]?){3}\d{4}\b/g,
110
+ /**
111
+ * Personal name (basic heuristic: First Last with capital letters)
112
+ * Matches: John Doe, Mary Jane
113
+ *
114
+ * WARNING: This is a basic pattern and may produce false positives
115
+ * (e.g., class names, constants). Only tokenizes names longer than 2 characters.
116
+ *
117
+ * @compliance GDPR Article 4(1) - Names are personal data
118
+ */
119
+ name: /\b([A-Z][a-z]+)\s+([A-Z][a-z]+)\b/g,
120
+ };
121
+ }
122
+ /**
123
+ * Tokenize PII in test code, data files, or generated content
124
+ *
125
+ * **COMPLIANCE WORKFLOW:**
126
+ * 1. Detect PII using regex patterns
127
+ * 2. Replace with tokens ([EMAIL_0], [PHONE_1], etc.)
128
+ * 3. Store reverse map for detokenization
129
+ * 4. Return tokenized content safe for LLM processing
130
+ *
131
+ * @param content - Raw content that may contain PII
132
+ * @returns Tokenization result with statistics and reverse map
133
+ *
134
+ * @example
135
+ * ```typescript
136
+ * const tokenizer = new PIITokenizer();
137
+ * const result = tokenizer.tokenize(`
138
+ * const user = {
139
+ * name: "John Doe",
140
+ * email: "john.doe@example.com",
141
+ * phone: "+1-555-123-4567",
142
+ * ssn: "123-45-6789"
143
+ * };
144
+ * `);
145
+ *
146
+ * console.log(result.piiCount); // 4
147
+ * console.log(result.piiBreakdown);
148
+ * // { emails: 1, phones: 1, ssns: 1, creditCards: 0, names: 1 }
149
+ * ```
150
+ */
151
+ tokenize(content) {
152
+ let tokenized = content;
153
+ const breakdown = {
154
+ emails: 0,
155
+ phones: 0,
156
+ ssns: 0,
157
+ creditCards: 0,
158
+ names: 0,
159
+ };
160
+ // IMPORTANT: Process patterns in order of specificity to avoid false matches
161
+ // 1. Credit cards first (most specific: 13-19 digits with separators)
162
+ // 2. SSNs second (specific: XXX-XX-XXXX format)
163
+ // 3. Phones third (less specific: 10 digits, can match parts of CC numbers)
164
+ // 4. Emails fourth
165
+ // 5. Names last (least specific, prone to false positives)
166
+ // Tokenize credit cards (BEFORE phones to prevent false matches)
167
+ tokenized = tokenized.replace(this.patterns.creditCard, (cc) => {
168
+ const token = `[CC_${this.reverseMap.creditCard.size}]`;
169
+ this.reverseMap.creditCard.set(token, cc);
170
+ breakdown.creditCards++;
171
+ return token;
172
+ });
173
+ // Tokenize SSNs
174
+ tokenized = tokenized.replace(this.patterns.ssn, (ssn) => {
175
+ const token = `[SSN_${this.reverseMap.ssn.size}]`;
176
+ this.reverseMap.ssn.set(token, ssn);
177
+ breakdown.ssns++;
178
+ return token;
179
+ });
180
+ // Tokenize phone numbers (US format) - AFTER credit cards
181
+ tokenized = tokenized.replace(this.patterns.phone, (phone) => {
182
+ const token = `[PHONE_${this.reverseMap.phone.size}]`;
183
+ this.reverseMap.phone.set(token, phone);
184
+ breakdown.phones++;
185
+ return token;
186
+ });
187
+ // Tokenize emails
188
+ tokenized = tokenized.replace(this.patterns.email, (email) => {
189
+ const token = `[EMAIL_${this.reverseMap.email.size}]`;
190
+ this.reverseMap.email.set(token, email);
191
+ breakdown.emails++;
192
+ return token;
193
+ });
194
+ // Tokenize names (with length filter to reduce false positives)
195
+ tokenized = tokenized.replace(this.patterns.name, (match, first, last) => {
196
+ // Only tokenize if both parts are longer than 2 characters
197
+ // This reduces false positives from code like "New User" or "Post Request"
198
+ if (first.length > 2 && last.length > 2) {
199
+ const token = `[NAME_${this.reverseMap.name.size}]`;
200
+ this.reverseMap.name.set(token, match);
201
+ breakdown.names++;
202
+ return token;
203
+ }
204
+ return match;
205
+ });
206
+ const piiCount = breakdown.emails + breakdown.phones + breakdown.ssns +
207
+ breakdown.creditCards + breakdown.names;
208
+ return {
209
+ tokenized,
210
+ reverseMap: this.reverseMap,
211
+ piiCount,
212
+ piiBreakdown: breakdown,
213
+ };
214
+ }
215
+ /**
216
+ * Reverse tokenization to restore original PII
217
+ *
218
+ * **COMPLIANCE WARNING:**
219
+ * - Only use for final file output (user-controlled)
220
+ * - NEVER store detokenized content in logs or databases
221
+ * - Call clear() immediately after use to minimize data retention
222
+ *
223
+ * @param tokenized - Content with PII tokens
224
+ * @param reverseMap - Tokenization map from tokenize() call
225
+ * @returns Original content with PII restored
226
+ *
227
+ * @example
228
+ * ```typescript
229
+ * const tokenizer = new PIITokenizer();
230
+ * const { tokenized, reverseMap } = tokenizer.tokenize('Email: john@example.com');
231
+ *
232
+ * // Store tokenized version (GDPR compliant)
233
+ * await db.storeTest({ code: tokenized });
234
+ *
235
+ * // Restore for file output
236
+ * const finalCode = tokenizer.detokenize(tokenized, reverseMap);
237
+ * await fs.writeFile('test.ts', finalCode);
238
+ *
239
+ * // IMPORTANT: Clear reverse map
240
+ * tokenizer.clear();
241
+ * ```
242
+ */
243
+ detokenize(tokenized, reverseMap) {
244
+ let detokenized = tokenized;
245
+ // Restore all PII types
246
+ for (const [type, map] of Object.entries(reverseMap)) {
247
+ for (const [token, original] of map.entries()) {
248
+ // Use split/join instead of replaceAll for ES2020 compatibility
249
+ detokenized = detokenized.split(token).join(original);
250
+ }
251
+ }
252
+ return detokenized;
253
+ }
254
+ /**
255
+ * Get PII statistics for audit trail
256
+ *
257
+ * **COMPLIANCE USE:**
258
+ * - Generate audit logs showing PII detection
259
+ * - Monitor for unexpected PII in generated content
260
+ * - Track compliance metrics over time
261
+ *
262
+ * @returns Breakdown of detected PII by type
263
+ *
264
+ * @example
265
+ * ```typescript
266
+ * const tokenizer = new PIITokenizer();
267
+ * tokenizer.tokenize('Email: john@example.com, Phone: 555-123-4567');
268
+ *
269
+ * const stats = tokenizer.getStats();
270
+ * console.log(stats);
271
+ * // { emails: 1, phones: 1, ssns: 0, creditCards: 0, names: 0, total: 2 }
272
+ *
273
+ * // Log for audit trail
274
+ * logger.info('PII detected in generated content', stats);
275
+ * ```
276
+ */
277
+ getStats() {
278
+ return {
279
+ emails: this.reverseMap.email.size,
280
+ phones: this.reverseMap.phone.size,
281
+ ssns: this.reverseMap.ssn.size,
282
+ creditCards: this.reverseMap.creditCard.size,
283
+ names: this.reverseMap.name.size,
284
+ total: this.reverseMap.email.size +
285
+ this.reverseMap.phone.size +
286
+ this.reverseMap.ssn.size +
287
+ this.reverseMap.creditCard.size +
288
+ this.reverseMap.name.size,
289
+ };
290
+ }
291
+ /**
292
+ * Clear reverse map to minimize data retention
293
+ *
294
+ * **COMPLIANCE REQUIREMENT:**
295
+ * - GDPR Article 5(1)(e) - Storage limitation principle
296
+ * - CCPA Section 1798.105 - Right to deletion
297
+ *
298
+ * MUST be called after detokenization to prevent storing PII longer than necessary.
299
+ *
300
+ * @example
301
+ * ```typescript
302
+ * const tokenizer = new PIITokenizer();
303
+ * const { tokenized, reverseMap } = tokenizer.tokenize(content);
304
+ *
305
+ * // Use tokenized content
306
+ * await processWithLLM(tokenized);
307
+ *
308
+ * // Detokenize for output
309
+ * const finalCode = tokenizer.detokenize(tokenized, reverseMap);
310
+ * await fs.writeFile('output.ts', finalCode);
311
+ *
312
+ * // CRITICAL: Clear reverse map
313
+ * tokenizer.clear(); // GDPR compliance
314
+ * ```
315
+ */
316
+ clear() {
317
+ this.reverseMap.email.clear();
318
+ this.reverseMap.phone.clear();
319
+ this.reverseMap.ssn.clear();
320
+ this.reverseMap.creditCard.clear();
321
+ this.reverseMap.name.clear();
322
+ }
323
+ }
324
+ exports.PIITokenizer = PIITokenizer;
325
+ //# sourceMappingURL=pii-tokenization.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pii-tokenization.js","sourceRoot":"","sources":["../../src/security/pii-tokenization.ts"],"names":[],"mappings":";AAAA;;;;;;;;;GASG;;;AAgDH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2CG;AACH,MAAa,YAAY;IAAzB;QACE;;;;;WAKG;QACK,eAAU,GAAoB;YACpC,KAAK,EAAE,IAAI,GAAG,EAAE;YAChB,KAAK,EAAE,IAAI,GAAG,EAAE;YAChB,GAAG,EAAE,IAAI,GAAG,EAAE;YACd,UAAU,EAAE,IAAI,GAAG,EAAE;YACrB,IAAI,EAAE,IAAI,GAAG,EAAE;SAChB,CAAC;QAEF;;;;WAIG;QACc,aAAQ,GAAG;YAC1B;;;;;eAKG;YACH,KAAK,EAAE,6CAA6C;YAEpD;;;;;;;;eAQG;YACH,KAAK,EAAE,0EAA0E;YAEjF;;;;;eAKG;YACH,GAAG,EAAE,wBAAwB;YAE7B;;;;;eAKG;YACH,UAAU,EAAE,8BAA8B;YAE1C;;;;;;;;eAQG;YACH,IAAI,EAAE,oCAAoC;SAC3C,CAAC;IAyNJ,CAAC;IAvNC;;;;;;;;;;;;;;;;;;;;;;;;;;;;OA4BG;IACH,QAAQ,CAAC,OAAe;QACtB,IAAI,SAAS,GAAG,OAAO,CAAC;QACxB,MAAM,SAAS,GAAG;YAChB,MAAM,EAAE,CAAC;YACT,MAAM,EAAE,CAAC;YACT,IAAI,EAAE,CAAC;YACP,WAAW,EAAE,CAAC;YACd,KAAK,EAAE,CAAC;SACT,CAAC;QAEF,6EAA6E;QAC7E,sEAAsE;QACtE,gDAAgD;QAChD,4EAA4E;QAC5E,mBAAmB;QACnB,2DAA2D;QAE3D,iEAAiE;QACjE,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,UAAU,EAAE,CAAC,EAAE,EAAE,EAAE;YAC7D,MAAM,KAAK,GAAG,OAAO,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,IAAI,GAAG,CAAC;YACxD,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,GAAG,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;YAC1C,SAAS,CAAC,WAAW,EAAE,CAAC;YACxB,OAAO,KAAK,CAAC;QACf,CAAC,CAAC,CAAC;QAEH,gBAAgB;QAChB,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE,CAAC,GAAG,EAAE,EAAE;YACvD,MAAM,KAAK,GAAG,QAAQ,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC;YAClD,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;YACpC,SAAS,CAAC,IAAI,EAAE,CAAC;YACjB,OAAO,KAAK,CAAC;QACf,CAAC,CAAC,CAAC;QAEH,0DAA0D;QAC1D,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,CAAC,KAAK,EAAE,EAAE;YAC3D,MAAM,KAAK,GAAG,UAAU,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC;YACtD,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;YACxC,SAAS,CAAC,MAAM,EAAE,CAAC;YACnB,OAAO,KAAK,CAAC;QACf,CAAC,CAAC,CAAC;QAEH,kBAAkB;QAClB,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,CAAC,KAAK,EAAE,EAAE;YAC3D,MAAM,KAAK,GAAG,UAAU,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC;YACtD,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;YACxC,SAAS,CAAC,MAAM,EAAE,CAAC;YACnB,OAAO,KAAK,CAAC;QACf,CAAC,CAAC,CAAC;QAEH,gEAAgE;QAChE,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,KAAK,EAAE,KAAa,EAAE,IAAY,EAAE,EAAE;YACvF,2DAA2D;YAC3D,2EAA2E;YAC3E,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACxC,MAAM,KAAK,GAAG,SAAS,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC;gBACpD,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;gBACvC,SAAS,CAAC,KAAK,EAAE,CAAC;gBAClB,OAAO,KAAK,CAAC;YACf,CAAC;YACD,OAAO,KAAK,CAAC;QACf,CAAC,CAAC,CAAC;QAEH,MAAM,QAAQ,GAAG,SAAS,CAAC,MAAM,GAAG,SAAS,CAAC,MAAM,GAAG,SAAS,CAAC,IAAI;YACpD,SAAS,CAAC,WAAW,GAAG,SAAS,CAAC,KAAK,CAAC;QAEzD,OAAO;YACL,SAAS;YACT,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,QAAQ;YACR,YAAY,EAAE,SAAS;SACxB,CAAC;IACJ,CAAC;IAED;;;;;;;;;;;;;;;;;;;;;;;;;;;OA2BG;IACH,UAAU,CAAC,SAAiB,EAAE,UAA2B;QACvD,IAAI,WAAW,GAAG,SAAS,CAAC;QAE5B,wBAAwB;QACxB,KAAK,MAAM,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;YACrD,KAAK,MAAM,CAAC,KAAK,EAAE,QAAQ,CAAC,IAAI,GAAG,CAAC,OAAO,EAAE,EAAE,CAAC;gBAC9C,gEAAgE;gBAChE,WAAW,GAAG,WAAW,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YACxD,CAAC;QACH,CAAC;QAED,OAAO,WAAW,CAAC;IACrB,CAAC;IAED;;;;;;;;;;;;;;;;;;;;;;OAsBG;IACH,QAAQ;QACN,OAAO;YACL,MAAM,EAAE,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI;YAClC,MAAM,EAAE,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI;YAClC,IAAI,EAAE,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI;YAC9B,WAAW,EAAE,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,IAAI;YAC5C,KAAK,EAAE,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI;YAChC,KAAK,EACH,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI;gBAC1B,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI;gBAC1B,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI;gBACxB,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,IAAI;gBAC/B,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI;SAC5B,CAAC;IACJ,CAAC;IAED;;;;;;;;;;;;;;;;;;;;;;;;OAwBG;IACH,KAAK;QACH,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;QAC9B,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;QAC9B,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC;QAC5B,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC;QACnC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;IAC/B,CAAC;CACF;AA3RD,oCA2RC"}
@@ -0,0 +1,35 @@
1
+ /**
2
+ * EmbeddingGenerator - Consolidated utility for generating embeddings
3
+ *
4
+ * Provides a single source of truth for embedding generation across the codebase.
5
+ * In production, replace with actual embedding model (OpenAI, Cohere, local BERT).
6
+ */
7
+ /**
8
+ * Generate a simple hash-based embedding from text
9
+ * This is a placeholder implementation for development/testing.
10
+ *
11
+ * In production, replace with:
12
+ * - OpenAI embeddings API (text-embedding-ada-002)
13
+ * - Cohere embeddings
14
+ * - Local transformer models (sentence-transformers)
15
+ * - Custom trained embeddings
16
+ *
17
+ * @param text Text to embed
18
+ * @param dimensions Embedding dimension (default: 384, common for sentence transformers)
19
+ * @returns Normalized embedding vector
20
+ */
21
+ export declare function generateEmbedding(text: string, dimensions?: number): number[];
22
+ /**
23
+ * Check if the current configuration uses a real embedding model
24
+ * (vs. the placeholder hash-based implementation)
25
+ *
26
+ * @returns true if using a real embedding model, false if using placeholder
27
+ */
28
+ export declare function isRealEmbeddingModel(): boolean;
29
+ /**
30
+ * Get the type of embedding model being used
31
+ *
32
+ * @returns Model type string for logging
33
+ */
34
+ export declare function getEmbeddingModelType(): string;
35
+ //# sourceMappingURL=EmbeddingGenerator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"EmbeddingGenerator.d.ts","sourceRoot":"","sources":["../../src/utils/EmbeddingGenerator.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH;;;;;;;;;;;;;GAaG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,EAAE,UAAU,GAAE,MAAY,GAAG,MAAM,EAAE,CAmBlF;AAED;;;;;GAKG;AACH,wBAAgB,oBAAoB,IAAI,OAAO,CAO9C;AAED;;;;GAIG;AACH,wBAAgB,qBAAqB,IAAI,MAAM,CAW9C"}
@@ -0,0 +1,72 @@
1
+ "use strict";
2
+ /**
3
+ * EmbeddingGenerator - Consolidated utility for generating embeddings
4
+ *
5
+ * Provides a single source of truth for embedding generation across the codebase.
6
+ * In production, replace with actual embedding model (OpenAI, Cohere, local BERT).
7
+ */
8
+ Object.defineProperty(exports, "__esModule", { value: true });
9
+ exports.generateEmbedding = generateEmbedding;
10
+ exports.isRealEmbeddingModel = isRealEmbeddingModel;
11
+ exports.getEmbeddingModelType = getEmbeddingModelType;
12
+ /**
13
+ * Generate a simple hash-based embedding from text
14
+ * This is a placeholder implementation for development/testing.
15
+ *
16
+ * In production, replace with:
17
+ * - OpenAI embeddings API (text-embedding-ada-002)
18
+ * - Cohere embeddings
19
+ * - Local transformer models (sentence-transformers)
20
+ * - Custom trained embeddings
21
+ *
22
+ * @param text Text to embed
23
+ * @param dimensions Embedding dimension (default: 384, common for sentence transformers)
24
+ * @returns Normalized embedding vector
25
+ */
26
+ function generateEmbedding(text, dimensions = 384) {
27
+ const embedding = new Array(dimensions).fill(0);
28
+ // Simple hash-based embedding (for demonstration only)
29
+ for (let i = 0; i < text.length; i++) {
30
+ const charCode = text.charCodeAt(i);
31
+ const index = (charCode * (i + 1)) % dimensions;
32
+ embedding[index] += Math.sin(charCode * 0.1) * 0.1;
33
+ }
34
+ // Normalize to unit vector
35
+ const magnitude = Math.sqrt(embedding.reduce((sum, val) => sum + val * val, 0));
36
+ if (magnitude > 0) {
37
+ for (let i = 0; i < dimensions; i++) {
38
+ embedding[i] /= magnitude;
39
+ }
40
+ }
41
+ return embedding;
42
+ }
43
+ /**
44
+ * Check if the current configuration uses a real embedding model
45
+ * (vs. the placeholder hash-based implementation)
46
+ *
47
+ * @returns true if using a real embedding model, false if using placeholder
48
+ */
49
+ function isRealEmbeddingModel() {
50
+ // Check environment variables or configuration
51
+ return !!(process.env.OPENAI_API_KEY ||
52
+ process.env.COHERE_API_KEY ||
53
+ process.env.EMBEDDING_MODEL_PATH);
54
+ }
55
+ /**
56
+ * Get the type of embedding model being used
57
+ *
58
+ * @returns Model type string for logging
59
+ */
60
+ function getEmbeddingModelType() {
61
+ if (process.env.OPENAI_API_KEY) {
62
+ return 'OpenAI (text-embedding-ada-002)';
63
+ }
64
+ if (process.env.COHERE_API_KEY) {
65
+ return 'Cohere';
66
+ }
67
+ if (process.env.EMBEDDING_MODEL_PATH) {
68
+ return `Local model (${process.env.EMBEDDING_MODEL_PATH})`;
69
+ }
70
+ return 'Hash-based placeholder (NOT FOR PRODUCTION)';
71
+ }
72
+ //# sourceMappingURL=EmbeddingGenerator.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"EmbeddingGenerator.js","sourceRoot":"","sources":["../../src/utils/EmbeddingGenerator.ts"],"names":[],"mappings":";AAAA;;;;;GAKG;;AAgBH,8CAmBC;AAQD,oDAOC;AAOD,sDAWC;AAlED;;;;;;;;;;;;;GAaG;AACH,SAAgB,iBAAiB,CAAC,IAAY,EAAE,aAAqB,GAAG;IACtE,MAAM,SAAS,GAAG,IAAI,KAAK,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAEhD,uDAAuD;IACvD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QACpC,MAAM,KAAK,GAAG,CAAC,QAAQ,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,UAAU,CAAC;QAChD,SAAS,CAAC,KAAK,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,QAAQ,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC;IACrD,CAAC;IAED,2BAA2B;IAC3B,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,GAAG,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;IAChF,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;QAClB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;YACpC,SAAS,CAAC,CAAC,CAAC,IAAI,SAAS,CAAC;QAC5B,CAAC;IACH,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;;;;GAKG;AACH,SAAgB,oBAAoB;IAClC,+CAA+C;IAC/C,OAAO,CAAC,CAAC,CACP,OAAO,CAAC,GAAG,CAAC,cAAc;QAC1B,OAAO,CAAC,GAAG,CAAC,cAAc;QAC1B,OAAO,CAAC,GAAG,CAAC,oBAAoB,CACjC,CAAC;AACJ,CAAC;AAED;;;;GAIG;AACH,SAAgB,qBAAqB;IACnC,IAAI,OAAO,CAAC,GAAG,CAAC,cAAc,EAAE,CAAC;QAC/B,OAAO,iCAAiC,CAAC;IAC3C,CAAC;IACD,IAAI,OAAO,CAAC,GAAG,CAAC,cAAc,EAAE,CAAC;QAC/B,OAAO,QAAQ,CAAC;IAClB,CAAC;IACD,IAAI,OAAO,CAAC,GAAG,CAAC,oBAAoB,EAAE,CAAC;QACrC,OAAO,gBAAgB,OAAO,CAAC,GAAG,CAAC,oBAAoB,GAAG,CAAC;IAC7D,CAAC;IACD,OAAO,6CAA6C,CAAC;AACvD,CAAC"}