catalist-support-agent 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/dist/admin-portal.d.ts +43 -0
  2. package/dist/admin-portal.d.ts.map +1 -0
  3. package/dist/admin-portal.js +166 -0
  4. package/dist/admin-portal.js.map +1 -0
  5. package/dist/analysis/entities.d.ts +73 -0
  6. package/dist/analysis/entities.d.ts.map +1 -0
  7. package/dist/analysis/entities.js +378 -0
  8. package/dist/analysis/entities.js.map +1 -0
  9. package/dist/analysis/index.d.ts +44 -0
  10. package/dist/analysis/index.d.ts.map +1 -0
  11. package/dist/analysis/index.js +243 -0
  12. package/dist/analysis/index.js.map +1 -0
  13. package/dist/analysis/intent.d.ts +49 -0
  14. package/dist/analysis/intent.d.ts.map +1 -0
  15. package/dist/analysis/intent.js +320 -0
  16. package/dist/analysis/intent.js.map +1 -0
  17. package/dist/analysis/sentiment.d.ts +57 -0
  18. package/dist/analysis/sentiment.d.ts.map +1 -0
  19. package/dist/analysis/sentiment.js +351 -0
  20. package/dist/analysis/sentiment.js.map +1 -0
  21. package/dist/brand/compliance.d.ts +122 -0
  22. package/dist/brand/compliance.d.ts.map +1 -0
  23. package/dist/brand/compliance.js +378 -0
  24. package/dist/brand/compliance.js.map +1 -0
  25. package/dist/brand/forbidden-terms.d.ts +99 -0
  26. package/dist/brand/forbidden-terms.d.ts.map +1 -0
  27. package/dist/brand/forbidden-terms.js +265 -0
  28. package/dist/brand/forbidden-terms.js.map +1 -0
  29. package/dist/brand/index.d.ts +10 -0
  30. package/dist/brand/index.d.ts.map +1 -0
  31. package/dist/brand/index.js +12 -0
  32. package/dist/brand/index.js.map +1 -0
  33. package/dist/config.d.ts +325 -0
  34. package/dist/config.d.ts.map +1 -0
  35. package/dist/config.js +492 -0
  36. package/dist/config.js.map +1 -0
  37. package/dist/delivery/index.d.ts +84 -0
  38. package/dist/delivery/index.d.ts.map +1 -0
  39. package/dist/delivery/index.js +435 -0
  40. package/dist/delivery/index.js.map +1 -0
  41. package/dist/embeddings/cache.d.ts +96 -0
  42. package/dist/embeddings/cache.d.ts.map +1 -0
  43. package/dist/embeddings/cache.js +193 -0
  44. package/dist/embeddings/cache.js.map +1 -0
  45. package/dist/embeddings/index.d.ts +152 -0
  46. package/dist/embeddings/index.d.ts.map +1 -0
  47. package/dist/embeddings/index.js +337 -0
  48. package/dist/embeddings/index.js.map +1 -0
  49. package/dist/embeddings/openai-client.d.ts +67 -0
  50. package/dist/embeddings/openai-client.d.ts.map +1 -0
  51. package/dist/embeddings/openai-client.js +190 -0
  52. package/dist/embeddings/openai-client.js.map +1 -0
  53. package/dist/errors.d.ts +302 -0
  54. package/dist/errors.d.ts.map +1 -0
  55. package/dist/errors.js +508 -0
  56. package/dist/errors.js.map +1 -0
  57. package/dist/escalation/index.d.ts +93 -0
  58. package/dist/escalation/index.d.ts.map +1 -0
  59. package/dist/escalation/index.js +436 -0
  60. package/dist/escalation/index.js.map +1 -0
  61. package/dist/extraction/deduplication.d.ts +97 -0
  62. package/dist/extraction/deduplication.d.ts.map +1 -0
  63. package/dist/extraction/deduplication.js +271 -0
  64. package/dist/extraction/deduplication.js.map +1 -0
  65. package/dist/extraction/gmail-extractor.d.ts +160 -0
  66. package/dist/extraction/gmail-extractor.d.ts.map +1 -0
  67. package/dist/extraction/gmail-extractor.js +396 -0
  68. package/dist/extraction/gmail-extractor.js.map +1 -0
  69. package/dist/extraction/gmail-token-manager.d.ts +36 -0
  70. package/dist/extraction/gmail-token-manager.d.ts.map +1 -0
  71. package/dist/extraction/gmail-token-manager.js +146 -0
  72. package/dist/extraction/gmail-token-manager.js.map +1 -0
  73. package/dist/extraction/index.d.ts +13 -0
  74. package/dist/extraction/index.d.ts.map +1 -0
  75. package/dist/extraction/index.js +20 -0
  76. package/dist/extraction/index.js.map +1 -0
  77. package/dist/extraction/pii-handler.d.ts +100 -0
  78. package/dist/extraction/pii-handler.d.ts.map +1 -0
  79. package/dist/extraction/pii-handler.js +295 -0
  80. package/dist/extraction/pii-handler.js.map +1 -0
  81. package/dist/extraction/pipeline.d.ts +94 -0
  82. package/dist/extraction/pipeline.d.ts.map +1 -0
  83. package/dist/extraction/pipeline.js +380 -0
  84. package/dist/extraction/pipeline.js.map +1 -0
  85. package/dist/extraction/quality-filter.d.ts +99 -0
  86. package/dist/extraction/quality-filter.d.ts.map +1 -0
  87. package/dist/extraction/quality-filter.js +370 -0
  88. package/dist/extraction/quality-filter.js.map +1 -0
  89. package/dist/extraction/rate-limiter.d.ts +90 -0
  90. package/dist/extraction/rate-limiter.d.ts.map +1 -0
  91. package/dist/extraction/rate-limiter.js +242 -0
  92. package/dist/extraction/rate-limiter.js.map +1 -0
  93. package/dist/extraction/state-manager.d.ts +126 -0
  94. package/dist/extraction/state-manager.d.ts.map +1 -0
  95. package/dist/extraction/state-manager.js +344 -0
  96. package/dist/extraction/state-manager.js.map +1 -0
  97. package/dist/generation/index.d.ts +75 -0
  98. package/dist/generation/index.d.ts.map +1 -0
  99. package/dist/generation/index.js +641 -0
  100. package/dist/generation/index.js.map +1 -0
  101. package/dist/index.d.ts +96 -0
  102. package/dist/index.d.ts.map +1 -0
  103. package/dist/index.js +233 -0
  104. package/dist/index.js.map +1 -0
  105. package/dist/intake/index.d.ts +15 -0
  106. package/dist/intake/index.d.ts.map +1 -0
  107. package/dist/intake/index.js +19 -0
  108. package/dist/intake/index.js.map +1 -0
  109. package/dist/intake/normalizer.d.ts +163 -0
  110. package/dist/intake/normalizer.d.ts.map +1 -0
  111. package/dist/intake/normalizer.js +309 -0
  112. package/dist/intake/normalizer.js.map +1 -0
  113. package/dist/intake/postmark.d.ts +72 -0
  114. package/dist/intake/postmark.d.ts.map +1 -0
  115. package/dist/intake/postmark.js +276 -0
  116. package/dist/intake/postmark.js.map +1 -0
  117. package/dist/intake/slack.d.ts +106 -0
  118. package/dist/intake/slack.d.ts.map +1 -0
  119. package/dist/intake/slack.js +378 -0
  120. package/dist/intake/slack.js.map +1 -0
  121. package/dist/intake/twilio.d.ts +86 -0
  122. package/dist/intake/twilio.d.ts.map +1 -0
  123. package/dist/intake/twilio.js +283 -0
  124. package/dist/intake/twilio.js.map +1 -0
  125. package/dist/knowledge/index.d.ts +100 -0
  126. package/dist/knowledge/index.d.ts.map +1 -0
  127. package/dist/knowledge/index.js +516 -0
  128. package/dist/knowledge/index.js.map +1 -0
  129. package/dist/knowledge/invoice-resolver.d.ts +62 -0
  130. package/dist/knowledge/invoice-resolver.d.ts.map +1 -0
  131. package/dist/knowledge/invoice-resolver.js +267 -0
  132. package/dist/knowledge/invoice-resolver.js.map +1 -0
  133. package/dist/types.d.ts +535 -0
  134. package/dist/types.d.ts.map +1 -0
  135. package/dist/types.js +48 -0
  136. package/dist/types.js.map +1 -0
  137. package/ga-service-account.json +13 -0
  138. package/gmail-knowledge-migration.sql +149 -0
  139. package/nul +1 -0
  140. package/package.json +55 -0
@@ -0,0 +1,271 @@
1
+ /**
2
+ * Deduplication Module
3
+ *
4
+ * Detects duplicate Q&A pairs using content hashing and optional embedding similarity.
5
+ * Prevents storage of near-identical entries in the knowledge base.
6
+ */
7
+ import { createHash } from 'crypto';
8
+ import { createClient } from '@supabase/supabase-js';
9
+ // =============================================================================
10
+ // Default Configuration
11
+ // =============================================================================
12
+ const DEFAULT_CONFIG = {
13
+ similarityThreshold: 0.95, // Very high for near-exact matches
14
+ useEmbeddingSimilarity: false, // Start with hash-only for speed
15
+ };
16
+ // =============================================================================
17
+ // Deduplication Service
18
+ // =============================================================================
19
+ export class DeduplicationService {
20
+ config;
21
+ supabase;
22
+ // In-memory cache of content hashes for batch operations
23
+ hashCache = new Set();
24
+ constructor(config) {
25
+ this.config = { ...DEFAULT_CONFIG, ...config };
26
+ this.supabase = createClient(config.supabaseUrl, config.supabaseServiceRoleKey);
27
+ }
28
+ /**
29
+ * Generate content hash for a Q&A pair
30
+ *
31
+ * Hash is based on normalized question + response text.
32
+ * Subject is not included as it varies more between duplicates.
33
+ */
34
+ generateContentHash(content) {
35
+ // Normalize text: lowercase, collapse whitespace, remove punctuation
36
+ const normalizedQuestion = this.normalizeText(content.questionText);
37
+ const normalizedResponse = this.normalizeText(content.responseText);
38
+ // Create composite text for hashing
39
+ const compositeText = `Q:${normalizedQuestion}|R:${normalizedResponse}`;
40
+ // Generate SHA-256 hash
41
+ return createHash('sha256').update(compositeText).digest('hex');
42
+ }
43
+ /**
44
+ * Check if content is a duplicate
45
+ */
46
+ async checkDuplicate(content) {
47
+ const contentHash = this.generateContentHash(content);
48
+ // 1. Check in-memory cache first (for batch operations)
49
+ if (this.hashCache.has(contentHash)) {
50
+ return {
51
+ isDuplicate: true,
52
+ method: 'hash',
53
+ contentHash,
54
+ };
55
+ }
56
+ // 2. Check database for hash match
57
+ const hashMatch = await this.checkHashInDatabase(contentHash);
58
+ if (hashMatch) {
59
+ return {
60
+ isDuplicate: true,
61
+ duplicateId: hashMatch.id,
62
+ method: 'hash',
63
+ contentHash,
64
+ };
65
+ }
66
+ // 3. Optionally check embedding similarity (more expensive)
67
+ if (this.config.useEmbeddingSimilarity) {
68
+ const embeddingMatch = await this.checkEmbeddingSimilarity(content);
69
+ if (embeddingMatch) {
70
+ return {
71
+ isDuplicate: true,
72
+ duplicateId: embeddingMatch.id,
73
+ similarity: embeddingMatch.similarity,
74
+ method: 'embedding',
75
+ contentHash,
76
+ };
77
+ }
78
+ }
79
+ // Not a duplicate
80
+ return {
81
+ isDuplicate: false,
82
+ method: 'none',
83
+ contentHash,
84
+ };
85
+ }
86
+ /**
87
+ * Check for hash match in database
88
+ */
89
+ async checkHashInDatabase(contentHash) {
90
+ const { data, error } = await this.supabase
91
+ .from('gmail_knowledge_entries')
92
+ .select('id')
93
+ .eq('content_hash', contentHash)
94
+ .limit(1)
95
+ .single();
96
+ if (error || !data) {
97
+ return null;
98
+ }
99
+ return { id: data.id };
100
+ }
101
+ /**
102
+ * Check for similar entries using embedding similarity
103
+ * Note: Requires the entry to have an embedding
104
+ */
105
+ async checkEmbeddingSimilarity(_content) {
106
+ // This would require generating an embedding first
107
+ // For now, we'll skip this expensive operation
108
+ // The embedding-based dedup is better done as a post-processing step
109
+ return null;
110
+ }
111
+ /**
112
+ * Add hash to local cache (for batch operations)
113
+ */
114
+ addToCache(contentHash) {
115
+ this.hashCache.add(contentHash);
116
+ }
117
+ /**
118
+ * Clear local hash cache
119
+ */
120
+ clearCache() {
121
+ this.hashCache.clear();
122
+ }
123
+ /**
124
+ * Load existing hashes from database into cache
125
+ */
126
+ async loadHashCache(limit = 10000) {
127
+ const { data, error } = await this.supabase
128
+ .from('gmail_knowledge_entries')
129
+ .select('content_hash')
130
+ .limit(limit);
131
+ if (error || !data) {
132
+ return 0;
133
+ }
134
+ for (const entry of data) {
135
+ if (entry.content_hash) {
136
+ this.hashCache.add(entry.content_hash);
137
+ }
138
+ }
139
+ return data.length;
140
+ }
141
+ /**
142
+ * Batch check for duplicates
143
+ */
144
+ async checkBatchDuplicates(contents) {
145
+ const results = new Map();
146
+ // Generate all hashes first
147
+ const hashToContent = new Map();
148
+ for (const content of contents) {
149
+ const hash = this.generateContentHash(content);
150
+ hashToContent.set(hash, content);
151
+ }
152
+ const hashes = Array.from(hashToContent.keys());
153
+ // Batch query for existing hashes
154
+ const { data: existingEntries } = await this.supabase
155
+ .from('gmail_knowledge_entries')
156
+ .select('id, content_hash')
157
+ .in('content_hash', hashes);
158
+ const existingHashMap = new Map((existingEntries || []).map((e) => [e.content_hash, e.id]));
159
+ // Process each content
160
+ const seenHashes = new Set();
161
+ for (const content of contents) {
162
+ const hash = this.generateContentHash(content);
163
+ // Check if duplicate in current batch
164
+ if (seenHashes.has(hash)) {
165
+ results.set(hash, {
166
+ isDuplicate: true,
167
+ method: 'hash',
168
+ contentHash: hash,
169
+ });
170
+ continue;
171
+ }
172
+ // Check if exists in database
173
+ const existingId = existingHashMap.get(hash);
174
+ if (existingId) {
175
+ results.set(hash, {
176
+ isDuplicate: true,
177
+ duplicateId: existingId,
178
+ method: 'hash',
179
+ contentHash: hash,
180
+ });
181
+ continue;
182
+ }
183
+ // Check in-memory cache
184
+ if (this.hashCache.has(hash)) {
185
+ results.set(hash, {
186
+ isDuplicate: true,
187
+ method: 'hash',
188
+ contentHash: hash,
189
+ });
190
+ continue;
191
+ }
192
+ // Not a duplicate
193
+ results.set(hash, {
194
+ isDuplicate: false,
195
+ method: 'none',
196
+ contentHash: hash,
197
+ });
198
+ seenHashes.add(hash);
199
+ }
200
+ return results;
201
+ }
202
+ /**
203
+ * Normalize text for consistent hashing
204
+ */
205
+ normalizeText(text) {
206
+ return text
207
+ .toLowerCase()
208
+ .replace(/\s+/g, ' ') // Collapse whitespace
209
+ .replace(/[^\w\s]/g, '') // Remove punctuation
210
+ .trim()
211
+ .substring(0, 5000); // Limit length
212
+ }
213
+ /**
214
+ * Get cache statistics
215
+ */
216
+ getCacheStats() {
217
+ return { size: this.hashCache.size };
218
+ }
219
+ /**
220
+ * Update configuration
221
+ */
222
+ updateConfig(config) {
223
+ this.config = { ...this.config, ...config };
224
+ }
225
+ }
226
+ // =============================================================================
227
+ // Singleton Instance
228
+ // =============================================================================
229
+ let deduplicationServiceInstance = null;
230
+ /**
231
+ * Get singleton deduplication service instance
232
+ */
233
+ export function getDeduplicationService() {
234
+ if (!deduplicationServiceInstance) {
235
+ const supabaseUrl = process.env.SUPABASE_URL || process.env.NEXT_PUBLIC_SUPABASE_URL;
236
+ const supabaseServiceRoleKey = process.env.SUPABASE_SERVICE_ROLE_KEY;
237
+ if (!supabaseUrl || !supabaseServiceRoleKey) {
238
+ throw new Error('SUPABASE_URL and SUPABASE_SERVICE_ROLE_KEY are required');
239
+ }
240
+ deduplicationServiceInstance = new DeduplicationService({
241
+ supabaseUrl,
242
+ supabaseServiceRoleKey,
243
+ similarityThreshold: 0.95,
244
+ useEmbeddingSimilarity: false,
245
+ });
246
+ }
247
+ return deduplicationServiceInstance;
248
+ }
249
+ /**
250
+ * Reset the singleton (for testing)
251
+ */
252
+ export function resetDeduplicationService() {
253
+ deduplicationServiceInstance = null;
254
+ }
255
+ // =============================================================================
256
+ // Utility Functions
257
+ // =============================================================================
258
+ /**
259
+ * Quick content hash generation
260
+ */
261
+ export function hashContent(content) {
262
+ return getDeduplicationService().generateContentHash(content);
263
+ }
264
+ /**
265
+ * Quick duplicate check
266
+ */
267
+ export async function isDuplicate(content) {
268
+ const result = await getDeduplicationService().checkDuplicate(content);
269
+ return result.isDuplicate;
270
+ }
271
+ //# sourceMappingURL=deduplication.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"deduplication.js","sourceRoot":"","sources":["../../src/extraction/deduplication.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AACpC,OAAO,EAAE,YAAY,EAAkB,MAAM,uBAAuB,CAAC;AAgCrE,gFAAgF;AAChF,wBAAwB;AACxB,gFAAgF;AAEhF,MAAM,cAAc,GAAwE;IAC1F,mBAAmB,EAAE,IAAI,EAAE,mCAAmC;IAC9D,sBAAsB,EAAE,KAAK,EAAE,iCAAiC;CACjE,CAAC;AAEF,gFAAgF;AAChF,wBAAwB;AACxB,gFAAgF;AAEhF,MAAM,OAAO,oBAAoB;IACvB,MAAM,CAAsB;IAC5B,QAAQ,CAAiB;IAEjC,yDAAyD;IACjD,SAAS,GAAgB,IAAI,GAAG,EAAE,CAAC;IAE3C,YAAY,MAA2B;QACrC,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,cAAc,EAAE,GAAG,MAAM,EAAE,CAAC;QAC/C,IAAI,CAAC,QAAQ,GAAG,YAAY,CAAC,MAAM,CAAC,WAAW,EAAE,MAAM,CAAC,sBAAsB,CAAC,CAAC;IAClF,CAAC;IAED;;;;;OAKG;IACH,mBAAmB,CAAC,OAAkB;QACpC,qEAAqE;QACrE,MAAM,kBAAkB,GAAG,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;QACpE,MAAM,kBAAkB,GAAG,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;QAEpE,oCAAoC;QACpC,MAAM,aAAa,GAAG,KAAK,kBAAkB,MAAM,kBAAkB,EAAE,CAAC;QAExE,wBAAwB;QACxB,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAClE,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,cAAc,CAAC,OAAkB;QACrC,MAAM,WAAW,GAAG,IAAI,CAAC,mBAAmB,CAAC,OAAO,CAAC,CAAC;QAEtD,wDAAwD;QACxD,IAAI,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,WAAW,CAAC,EAAE,CAAC;YACpC,OAAO;gBACL,WAAW,EAAE,IAAI;gBACjB,MAAM,EAAE,MAAM;gBACd,WAAW;aACZ,CAAC;QACJ,CAAC;QAED,mCAAmC;QACnC,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,mBAAmB,CAAC,WAAW,CAAC,CAAC;QAC9D,IAAI,SAAS,EAAE,CAAC;YACd,OAAO;gBACL,WAAW,EAAE,IAAI;gBACjB,WAAW,EAAE,SAAS,CAAC,EAAE;gBACzB,MAAM,EAAE,MAAM;gBACd,WAAW;aACZ,CAAC;QACJ,CAAC;QAED,4DAA4D;QAC5D,IAAI,IAAI,CAAC,MAAM,CAAC,sBAAsB,EAAE,CAAC;YACvC,MAAM,cAAc,GAAG,MAAM,IAAI,CAAC,wBAAwB,CAAC,OAAO,CAAC,CAAC;YACpE,IAAI,cAAc,EAAE,CAAC;gBACnB,OAAO;oBACL,WAAW,EAAE,IAAI;oBACjB,WAAW,EAAE,cAAc,CAAC,EAAE;oBAC9B,UAAU,EAAE,cAAc,CAAC,UAAU;oBACrC,MAAM,EAAE,WAAW;oBACnB,WAAW;iBACZ,CAAC;YACJ,CAAC;QACH,CAAC;QAED,kBAAkB;QAClB,OAAO;YACL,WAAW,EAAE,KAAK;YAClB,MAAM,EAAE,MAAM;YACd,WAAW;SACZ,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,mBAAmB,CAC/B,WAAmB;QAEnB,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,MAAM,IAAI,CAAC,QAAQ;aACxC,IAAI,CAAC,yBAAyB,CAAC;aAC/B,MAAM,CAAC,IAAI,CAAC;aACZ,EAAE,CAAC,cAAc,EAAE,WAAW,CAAC;aAC/B,KAAK,CAAC,CAAC,CAAC;aACR,MAAM,EAAE,CAAC;QAEZ,IAAI,KAAK,IAAI,CAAC,IAAI,EAAE,CAAC;YACnB,OAAO,IAAI,CAAC;QACd,CAAC;QAED,OAAO,EAAE,EAAE,EAAE,IAAI,CAAC,EAAE,EAAE,CAAC;IACzB,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,wBAAwB,CACpC,QAAmB;QAEnB,mDAAmD;QACnD,+CAA+C;QAC/C,qEAAqE;QAErE,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;OAEG;IACH,UAAU,CAAC,WAAmB;QAC5B,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;IAClC,CAAC;IAED;;OAEG;IACH,UAAU;QACR,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,CAAC;IACzB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,aAAa,CAAC,QAAgB,KAAK;QACvC,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,MAAM,IAAI,CAAC,QAAQ;aACxC,IAAI,CAAC,yBAAyB,CAAC;aAC/B,MAAM,CAAC,cAAc,CAAC;aACtB,KAAK,CAAC,KAAK,CAAC,CAAC;QAEhB,IAAI,KAAK,IAAI,CAAC,IAAI,EAAE,CAAC;YACnB,OAAO,CAAC,CAAC;QACX,CAAC;QAED,KAAK,MAAM,KAAK,IAAI,IAAI,EAAE,CAAC;YACzB,IAAI,KAAK,CAAC,YAAY,EAAE,CAAC;gBACvB,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;YACzC,CAAC;QACH,CAAC;QAED,OAAO,IAAI,CAAC,MAAM,CAAC;IACrB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,oBAAoB,CACxB,QAAqB;QAErB,MAAM,OAAO,GAAG,IAAI,GAAG,EAA+B,CAAC;QAEvD,4BAA4B;QAC5B,MAAM,aAAa,GAAG,IAAI,GAAG,EAAqB,CAAC;QACnD,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,MAAM,IAAI,GAAG,IAAI,CAAC,mBAAmB,CAAC,OAAO,CAAC,CAAC;YAC/C,aAAa,CAAC,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QACnC,CAAC;QAED,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,IAAI,EAAE,CAAC,CAAC;QAEhD,kCAAkC;QAClC,MAAM,EAAE,IAAI,EAAE,eAAe,EAAE,GAAG,MAAM,IAAI,CAAC,QAAQ;aAClD,IAAI,CAAC,yBAAyB,CAAC;aAC/B,MAAM,CAAC,kBAAkB,CAAC;aAC1B,EAAE,CAAC,cAAc,EAAE,MAAM,CAAC,CAAC;QAE9B,MAAM,eAAe,GAAG,IAAI,GAAG,CAC7B,CAAC,eAAe,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,YAAY,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAC3D,CAAC;QAEF,uBAAuB;QACvB,MAAM,UAAU,GAAG,IAAI,GAAG,EAAU,CAAC;QAErC,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,MAAM,IAAI,GAAG,IAAI,CAAC,mBAAmB,CAAC,OAAO,CAAC,CAAC;YAE/C,sCAAsC;YACtC,IAAI,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;gBACzB,OAAO,CAAC,GAAG,CAAC,IAAI,EAAE;oBAChB,WAAW,EAAE,IAAI;oBACjB,MAAM,EAAE,MAAM;oBACd,WAAW,EAAE,IAAI;iBAClB,CAAC,CAAC;gBACH,SAAS;YACX,CAAC;YAED,8BAA8B;YAC9B,MAAM,UAAU,GAAG,eAAe,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YAC7C,IAAI,UAAU,EAAE,CAAC;gBACf,OAAO,CAAC,GAAG,CAAC,IAAI,EAAE;oBAChB,WAAW,EAAE,IAAI;oBACjB,WAAW,EAAE,UAAU;oBACvB,MAAM,EAAE,MAAM;oBACd,WAAW,EAAE,IAAI;iBAClB,CAAC,CAAC;gBACH,SAAS;YACX,CAAC;YAED,wBAAwB;YACxB,IAAI,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC7B,OAAO,CAAC,GAAG,CAAC,IAAI,EAAE;oBAChB,WAAW,EAAE,IAAI;oBACjB,MAAM,EAAE,MAAM;oBACd,WAAW,EAAE,IAAI;iBAClB,CAAC,CAAC;gBACH,SAAS;YACX,CAAC;YAED,kBAAkB;YAClB,OAAO,CAAC,GAAG,CAAC,IAAI,EAAE;gBAChB,WAAW,EAAE,KAAK;gBAClB,MAAM,EAAE,MAAM;gBACd,WAAW,EAAE,IAAI;aAClB,CAAC,CAAC;YAEH,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACvB,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,IAAY;QAChC,OAAO,IAAI;aACR,WAAW,EAAE;aACb,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,sBAAsB;aAC3C,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,qBAAqB;aAC7C,IAAI,EAAE;aACN,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC,eAAe;IACxC,CAAC;IAED;;OAEG;IACH,aAAa;QACX,OAAO,EAAE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;IACvC,CAAC;IAED;;OAEG;IACH,YAAY,CAAC,MAAoC;QAC/C,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,MAAM,EAAE,CAAC;IAC9C,CAAC;CACF;AAED,gFAAgF;AAChF,qBAAqB;AACrB,gFAAgF;AAEhF,IAAI,4BAA4B,GAAgC,IAAI,CAAC;AAErE;;GAEG;AACH,MAAM,UAAU,uBAAuB;IACrC,IAAI,CAAC,4BAA4B,EAAE,CAAC;QAClC,MAAM,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC,YAAY,IAAI,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC;QACrF,MAAM,sBAAsB,GAAG,OAAO,CAAC,GAAG,CAAC,yBAAyB,CAAC;QAErE,IAAI,CAAC,WAAW,IAAI,CAAC,sBAAsB,EAAE,CAAC;YAC5C,MAAM,IAAI,KAAK,CAAC,yDAAyD,CAAC,CAAC;QAC7E,CAAC;QAED,4BAA4B,GAAG,IAAI,oBAAoB,CAAC;YACtD,WAAW;YACX,sBAAsB;YACtB,mBAAmB,EAAE,IAAI;YACzB,sBAAsB,EAAE,KAAK;SAC9B,CAAC,CAAC;IACL,CAAC;IAED,OAAO,4BAA4B,CAAC;AACtC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,yBAAyB;IACvC,4BAA4B,GAAG,IAAI,CAAC;AACtC,CAAC;AAED,gFAAgF;AAChF,oBAAoB;AACpB,gFAAgF;AAEhF;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,OAAkB;IAC5C,OAAO,uBAAuB,EAAE,CAAC,mBAAmB,CAAC,OAAO,CAAC,CAAC;AAChE,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,OAAkB;IAClD,MAAM,MAAM,GAAG,MAAM,uBAAuB,EAAE,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;IACvE,OAAO,MAAM,CAAC,WAAW,CAAC;AAC5B,CAAC"}
@@ -0,0 +1,160 @@
1
+ /**
2
+ * Gmail Extractor Module
3
+ *
4
+ * Extracts Q&A pairs from Gmail threads for the knowledge base.
5
+ * Uses Gmail API to fetch threads and parses them to identify
6
+ * inbound questions and outbound responses.
7
+ */
8
+ import { PIIHandler } from './pii-handler.js';
9
+ import { QualityFilter } from './quality-filter.js';
10
+ import { DeduplicationService } from './deduplication.js';
11
+ import type { IntentCategory } from '../types.js';
12
+ export interface GmailMessage {
13
+ id: string;
14
+ threadId: string;
15
+ internalDate: string;
16
+ snippet: string;
17
+ payload: {
18
+ headers: Array<{
19
+ name: string;
20
+ value: string;
21
+ }>;
22
+ mimeType?: string;
23
+ body?: {
24
+ data?: string;
25
+ size?: number;
26
+ };
27
+ parts?: Array<{
28
+ mimeType?: string;
29
+ body?: {
30
+ data?: string;
31
+ size?: number;
32
+ };
33
+ parts?: Array<{
34
+ mimeType?: string;
35
+ body?: {
36
+ data?: string;
37
+ };
38
+ }>;
39
+ }>;
40
+ };
41
+ labelIds?: string[];
42
+ }
43
+ export interface GmailThread {
44
+ id: string;
45
+ historyId?: string;
46
+ messages: GmailMessage[];
47
+ }
48
+ export interface ExtractedQAPair {
49
+ gmailThreadId: string;
50
+ gmailMessageId: string;
51
+ questionSubject: string | null;
52
+ questionText: string;
53
+ responseText: string;
54
+ customerEmail: string;
55
+ responderEmail: string;
56
+ emailReceivedAt: Date;
57
+ emailRespondedAt: Date;
58
+ responseTimeMs: number;
59
+ intentCategory: IntentCategory | null;
60
+ qualityScore: number;
61
+ contentHash: string;
62
+ piiRedacted: boolean;
63
+ piiTypesFound: string[];
64
+ resolutionIndicator: string | null;
65
+ }
66
+ export interface ExtractionResult {
67
+ extracted: ExtractedQAPair[];
68
+ skipped: {
69
+ reason: string;
70
+ threadId: string;
71
+ messageId?: string;
72
+ }[];
73
+ stats: {
74
+ threadsProcessed: number;
75
+ messagesProcessed: number;
76
+ pairsExtracted: number;
77
+ duplicatesSkipped: number;
78
+ qualityFilteredOut: number;
79
+ errors: number;
80
+ };
81
+ }
82
+ export interface GmailExtractorConfig {
83
+ supabaseUrl: string;
84
+ supabaseServiceRoleKey: string;
85
+ salesEmailAddress: string;
86
+ preserveEmailDomains?: string[];
87
+ }
88
+ export declare class GmailExtractor {
89
+ private supabase;
90
+ private piiHandler;
91
+ private qualityFilter;
92
+ private deduplicationService;
93
+ private salesEmail;
94
+ constructor(config: GmailExtractorConfig);
95
+ /**
96
+ * Extract Q&A pairs from a list of Gmail threads
97
+ */
98
+ extractFromThreads(threads: GmailThread[]): Promise<ExtractionResult>;
99
+ /**
100
+ * Extract Q&A pairs from a single thread
101
+ */
102
+ private extractFromThread;
103
+ /**
104
+ * Get a header value from a Gmail message
105
+ */
106
+ private getHeader;
107
+ /**
108
+ * Extract email address from a From/To header value
109
+ */
110
+ private extractEmail;
111
+ /**
112
+ * Check if an email is from the company
113
+ */
114
+ private isCompanyEmail;
115
+ /**
116
+ * Extract plain text body from a Gmail message
117
+ */
118
+ private extractBody;
119
+ /**
120
+ * Find body part with specified MIME type
121
+ */
122
+ private findBodyPart;
123
+ /**
124
+ * Decode base64url encoded string
125
+ */
126
+ private decodeBase64Url;
127
+ /**
128
+ * Strip HTML tags from string
129
+ */
130
+ private stripHtml;
131
+ /**
132
+ * Clean email body (remove signatures, quoted replies, etc.)
133
+ */
134
+ private cleanEmailBody;
135
+ /**
136
+ * Clean subject line (remove Re:, Fwd:, etc.)
137
+ */
138
+ private cleanSubject;
139
+ /**
140
+ * Store extracted pairs in the database
141
+ */
142
+ storeExtractedPairs(pairs: ExtractedQAPair[], batchId?: string): Promise<{
143
+ stored: number;
144
+ errors: number;
145
+ }>;
146
+ /**
147
+ * Get deduplication service for external use
148
+ */
149
+ getDeduplicationService(): DeduplicationService;
150
+ /**
151
+ * Get quality filter for external use
152
+ */
153
+ getQualityFilter(): QualityFilter;
154
+ /**
155
+ * Get PII handler for external use
156
+ */
157
+ getPIIHandler(): PIIHandler;
158
+ }
159
+ export { PIIHandler, QualityFilter, DeduplicationService };
160
+ //# sourceMappingURL=gmail-extractor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"gmail-extractor.d.ts","sourceRoot":"","sources":["../../src/extraction/gmail-extractor.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAGH,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAC9C,OAAO,EAAE,aAAa,EAA4B,MAAM,qBAAqB,CAAC;AAC9E,OAAO,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC1D,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAMlD,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,EAAE,MAAM,CAAC;IACjB,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE;QACP,OAAO,EAAE,KAAK,CAAC;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,KAAK,EAAE,MAAM,CAAA;SAAE,CAAC,CAAC;QAChD,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,IAAI,CAAC,EAAE;YAAE,IAAI,CAAC,EAAE,MAAM,CAAC;YAAC,IAAI,CAAC,EAAE,MAAM,CAAA;SAAE,CAAC;QACxC,KAAK,CAAC,EAAE,KAAK,CAAC;YACZ,QAAQ,CAAC,EAAE,MAAM,CAAC;YAClB,IAAI,CAAC,EAAE;gBAAE,IAAI,CAAC,EAAE,MAAM,CAAC;gBAAC,IAAI,CAAC,EAAE,MAAM,CAAA;aAAE,CAAC;YACxC,KAAK,CAAC,EAAE,KAAK,CAAC;gBAAE,QAAQ,CAAC,EAAE,MAAM,CAAC;gBAAC,IAAI,CAAC,EAAE;oBAAE,IAAI,CAAC,EAAE,MAAM,CAAA;iBAAE,CAAA;aAAE,CAAC,CAAC;SAChE,CAAC,CAAC;KACJ,CAAC;IACF,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;CACrB;AAED,MAAM,WAAW,WAAW;IAC1B,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,YAAY,EAAE,CAAC;CAC1B;AAED,MAAM,WAAW,eAAe;IAC9B,aAAa,EAAE,MAAM,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;IACvB,eAAe,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;IACvB,eAAe,EAAE,IAAI,CAAC;IACtB,gBAAgB,EAAE,IAAI,CAAC;IACvB,cAAc,EAAE,MAAM,CAAC;IACvB,cAAc,EAAE,cAAc,GAAG,IAAI,CAAC;IACtC,YAAY,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,OAAO,CAAC;IACrB,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,mBAAmB,EAAE,MAAM,GAAG,IAAI,CAAC;CACpC;AAED,MAAM,WAAW,gBAAgB;IAC/B,SAAS,EAAE,eAAe,EAAE,CAAC;IAC7B,OAAO,EAAE;QACP,MAAM,EAAE,MAAM,CAAC;QACf,QAAQ,EAAE,MAAM,CAAC;QACjB,SAAS,CAAC,EAAE,MAAM,CAAC;KACpB,EAAE,CAAC;IACJ,KAAK,EAAE;QACL,gBAAgB,EAAE,MAAM,CAAC;QACzB,iBAAiB,EAAE,MAAM,CAAC;QAC1B,cAAc,EAAE,MAAM,CAAC;QACvB,iBAAiB,EAAE,MAAM,CAAC;QAC1B,kBAAkB,EAAE,MAAM,CAAC;QAC3B,MAAM,EAAE,MAAM,CAAC;KAChB,CAAC;CACH;AAED,MAAM,WAAW,oBAAoB;IACnC,WAAW,EAAE,MAAM,CAAC;IACpB,sBAAsB,EAAE,MAAM,CAAC;IAC/B,iBAAiB,EAAE,MAAM,CAAC;IAC1B,oBAAoB,CAAC,EAAE,MAAM,EAAE,CAAC;CACjC;AAMD,qBAAa,cAAc;IACzB,OAAO,CAAC,QAAQ,CAAiB;IACjC,OAAO,CAAC,UAAU,CAAa;IAC/B,OAAO,CAAC,aAAa,CAAgB;IACrC,OAAO,CAAC,oBAAoB,CAAuB;IACnD,OAAO,CAAC,UAAU,CAAS;gBAEf,MAAM,EAAE,oBAAoB;IAmBxC;;OAEG;IACG,kBAAkB,CAAC,OAAO,EAAE,WAAW,EAAE,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAoF3E;;OAEG;YACW,iBAAiB;IAyG/B;;OAEG;IACH,OAAO,CAAC,SAAS;IAOjB;;OAEG;IACH,OAAO,CAAC,YAAY;IAOpB;;OAEG;IACH,OAAO,CAAC,cAAc;IAQtB;;OAEG;IACH,OAAO,CAAC,WAAW;IAqBnB;;OAEG;IACH,OAAO,CAAC,YAAY;IA6BpB;;OAEG;IACH,OAAO,CAAC,eAAe;IAMvB;;OAEG;IACH,OAAO,CAAC,SAAS;IAejB;;OAEG;IACH,OAAO,CAAC,cAAc;IAmCtB;;OAEG;IACH,OAAO,CAAC,YAAY;IAMpB;;OAEG;IACG,mBAAmB,CACvB,KAAK,EAAE,eAAe,EAAE,EACxB,OAAO,CAAC,EAAE,MAAM,GACf,OAAO,CAAC;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC;IA0C9C;;OAEG;IACH,uBAAuB,IAAI,oBAAoB;IAI/C;;OAEG;IACH,gBAAgB,IAAI,aAAa;IAIjC;;OAEG;IACH,aAAa,IAAI,UAAU;CAG5B;AAMD,OAAO,EAAE,UAAU,EAAE,aAAa,EAAE,oBAAoB,EAAE,CAAC"}