catalist-support-agent 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/admin-portal.d.ts +43 -0
- package/dist/admin-portal.d.ts.map +1 -0
- package/dist/admin-portal.js +166 -0
- package/dist/admin-portal.js.map +1 -0
- package/dist/analysis/entities.d.ts +73 -0
- package/dist/analysis/entities.d.ts.map +1 -0
- package/dist/analysis/entities.js +378 -0
- package/dist/analysis/entities.js.map +1 -0
- package/dist/analysis/index.d.ts +44 -0
- package/dist/analysis/index.d.ts.map +1 -0
- package/dist/analysis/index.js +243 -0
- package/dist/analysis/index.js.map +1 -0
- package/dist/analysis/intent.d.ts +49 -0
- package/dist/analysis/intent.d.ts.map +1 -0
- package/dist/analysis/intent.js +320 -0
- package/dist/analysis/intent.js.map +1 -0
- package/dist/analysis/sentiment.d.ts +57 -0
- package/dist/analysis/sentiment.d.ts.map +1 -0
- package/dist/analysis/sentiment.js +351 -0
- package/dist/analysis/sentiment.js.map +1 -0
- package/dist/brand/compliance.d.ts +122 -0
- package/dist/brand/compliance.d.ts.map +1 -0
- package/dist/brand/compliance.js +378 -0
- package/dist/brand/compliance.js.map +1 -0
- package/dist/brand/forbidden-terms.d.ts +99 -0
- package/dist/brand/forbidden-terms.d.ts.map +1 -0
- package/dist/brand/forbidden-terms.js +265 -0
- package/dist/brand/forbidden-terms.js.map +1 -0
- package/dist/brand/index.d.ts +10 -0
- package/dist/brand/index.d.ts.map +1 -0
- package/dist/brand/index.js +12 -0
- package/dist/brand/index.js.map +1 -0
- package/dist/config.d.ts +325 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +492 -0
- package/dist/config.js.map +1 -0
- package/dist/delivery/index.d.ts +84 -0
- package/dist/delivery/index.d.ts.map +1 -0
- package/dist/delivery/index.js +435 -0
- package/dist/delivery/index.js.map +1 -0
- package/dist/embeddings/cache.d.ts +96 -0
- package/dist/embeddings/cache.d.ts.map +1 -0
- package/dist/embeddings/cache.js +193 -0
- package/dist/embeddings/cache.js.map +1 -0
- package/dist/embeddings/index.d.ts +152 -0
- package/dist/embeddings/index.d.ts.map +1 -0
- package/dist/embeddings/index.js +337 -0
- package/dist/embeddings/index.js.map +1 -0
- package/dist/embeddings/openai-client.d.ts +67 -0
- package/dist/embeddings/openai-client.d.ts.map +1 -0
- package/dist/embeddings/openai-client.js +190 -0
- package/dist/embeddings/openai-client.js.map +1 -0
- package/dist/errors.d.ts +302 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +508 -0
- package/dist/errors.js.map +1 -0
- package/dist/escalation/index.d.ts +93 -0
- package/dist/escalation/index.d.ts.map +1 -0
- package/dist/escalation/index.js +436 -0
- package/dist/escalation/index.js.map +1 -0
- package/dist/extraction/deduplication.d.ts +97 -0
- package/dist/extraction/deduplication.d.ts.map +1 -0
- package/dist/extraction/deduplication.js +271 -0
- package/dist/extraction/deduplication.js.map +1 -0
- package/dist/extraction/gmail-extractor.d.ts +160 -0
- package/dist/extraction/gmail-extractor.d.ts.map +1 -0
- package/dist/extraction/gmail-extractor.js +396 -0
- package/dist/extraction/gmail-extractor.js.map +1 -0
- package/dist/extraction/gmail-token-manager.d.ts +36 -0
- package/dist/extraction/gmail-token-manager.d.ts.map +1 -0
- package/dist/extraction/gmail-token-manager.js +146 -0
- package/dist/extraction/gmail-token-manager.js.map +1 -0
- package/dist/extraction/index.d.ts +13 -0
- package/dist/extraction/index.d.ts.map +1 -0
- package/dist/extraction/index.js +20 -0
- package/dist/extraction/index.js.map +1 -0
- package/dist/extraction/pii-handler.d.ts +100 -0
- package/dist/extraction/pii-handler.d.ts.map +1 -0
- package/dist/extraction/pii-handler.js +295 -0
- package/dist/extraction/pii-handler.js.map +1 -0
- package/dist/extraction/pipeline.d.ts +94 -0
- package/dist/extraction/pipeline.d.ts.map +1 -0
- package/dist/extraction/pipeline.js +380 -0
- package/dist/extraction/pipeline.js.map +1 -0
- package/dist/extraction/quality-filter.d.ts +99 -0
- package/dist/extraction/quality-filter.d.ts.map +1 -0
- package/dist/extraction/quality-filter.js +370 -0
- package/dist/extraction/quality-filter.js.map +1 -0
- package/dist/extraction/rate-limiter.d.ts +90 -0
- package/dist/extraction/rate-limiter.d.ts.map +1 -0
- package/dist/extraction/rate-limiter.js +242 -0
- package/dist/extraction/rate-limiter.js.map +1 -0
- package/dist/extraction/state-manager.d.ts +126 -0
- package/dist/extraction/state-manager.d.ts.map +1 -0
- package/dist/extraction/state-manager.js +344 -0
- package/dist/extraction/state-manager.js.map +1 -0
- package/dist/generation/index.d.ts +75 -0
- package/dist/generation/index.d.ts.map +1 -0
- package/dist/generation/index.js +641 -0
- package/dist/generation/index.js.map +1 -0
- package/dist/index.d.ts +96 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +233 -0
- package/dist/index.js.map +1 -0
- package/dist/intake/index.d.ts +15 -0
- package/dist/intake/index.d.ts.map +1 -0
- package/dist/intake/index.js +19 -0
- package/dist/intake/index.js.map +1 -0
- package/dist/intake/normalizer.d.ts +163 -0
- package/dist/intake/normalizer.d.ts.map +1 -0
- package/dist/intake/normalizer.js +309 -0
- package/dist/intake/normalizer.js.map +1 -0
- package/dist/intake/postmark.d.ts +72 -0
- package/dist/intake/postmark.d.ts.map +1 -0
- package/dist/intake/postmark.js +276 -0
- package/dist/intake/postmark.js.map +1 -0
- package/dist/intake/slack.d.ts +106 -0
- package/dist/intake/slack.d.ts.map +1 -0
- package/dist/intake/slack.js +378 -0
- package/dist/intake/slack.js.map +1 -0
- package/dist/intake/twilio.d.ts +86 -0
- package/dist/intake/twilio.d.ts.map +1 -0
- package/dist/intake/twilio.js +283 -0
- package/dist/intake/twilio.js.map +1 -0
- package/dist/knowledge/index.d.ts +100 -0
- package/dist/knowledge/index.d.ts.map +1 -0
- package/dist/knowledge/index.js +516 -0
- package/dist/knowledge/index.js.map +1 -0
- package/dist/knowledge/invoice-resolver.d.ts +62 -0
- package/dist/knowledge/invoice-resolver.d.ts.map +1 -0
- package/dist/knowledge/invoice-resolver.js +267 -0
- package/dist/knowledge/invoice-resolver.js.map +1 -0
- package/dist/types.d.ts +535 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +48 -0
- package/dist/types.js.map +1 -0
- package/ga-service-account.json +13 -0
- package/gmail-knowledge-migration.sql +149 -0
- package/nul +1 -0
- package/package.json +55 -0
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Extraction Pipeline Module
|
|
3
|
+
*
|
|
4
|
+
* Main orchestrator for Gmail knowledge base extraction.
|
|
5
|
+
* Coordinates Gmail API calls, extraction, quality filtering,
|
|
6
|
+
* deduplication, intent classification, and storage.
|
|
7
|
+
*/
|
|
8
|
+
// Note: Supabase client is created by individual services (GmailExtractor, StateManager, etc.)
|
|
9
|
+
import { GmailExtractor, } from './gmail-extractor.js';
|
|
10
|
+
import { StateManager } from './state-manager.js';
|
|
11
|
+
import { gmailRateLimiter } from './rate-limiter.js';
|
|
12
|
+
import { EmbeddingService } from '../embeddings/index.js';
|
|
13
|
+
// Gmail API URL
|
|
14
|
+
const GMAIL_API_URL = 'https://gmail.googleapis.com/gmail/v1';
|
|
15
|
+
// =============================================================================
|
|
16
|
+
// Extraction Pipeline
|
|
17
|
+
// =============================================================================
|
|
18
|
+
export class ExtractionPipeline {
|
|
19
|
+
config;
|
|
20
|
+
gmailExtractor;
|
|
21
|
+
stateManager;
|
|
22
|
+
embeddingService;
|
|
23
|
+
rateLimiter;
|
|
24
|
+
constructor(config) {
|
|
25
|
+
this.config = {
|
|
26
|
+
lookbackDays: 180,
|
|
27
|
+
batchSize: 50,
|
|
28
|
+
maxThreads: 1000,
|
|
29
|
+
dryRun: false,
|
|
30
|
+
generateEmbeddings: true,
|
|
31
|
+
classifyIntents: true,
|
|
32
|
+
onProgress: undefined,
|
|
33
|
+
onError: undefined,
|
|
34
|
+
gmailAccessToken: undefined,
|
|
35
|
+
gmailLabels: ['inbox'],
|
|
36
|
+
...config,
|
|
37
|
+
};
|
|
38
|
+
this.gmailExtractor = new GmailExtractor({
|
|
39
|
+
supabaseUrl: config.supabaseUrl,
|
|
40
|
+
supabaseServiceRoleKey: config.supabaseServiceRoleKey,
|
|
41
|
+
salesEmailAddress: config.salesEmailAddress,
|
|
42
|
+
});
|
|
43
|
+
this.stateManager = new StateManager({
|
|
44
|
+
supabaseUrl: config.supabaseUrl,
|
|
45
|
+
supabaseServiceRoleKey: config.supabaseServiceRoleKey,
|
|
46
|
+
});
|
|
47
|
+
// Only create embedding service if OpenAI key provided
|
|
48
|
+
if (config.openaiApiKey) {
|
|
49
|
+
this.embeddingService = new EmbeddingService({
|
|
50
|
+
openaiApiKey: config.openaiApiKey,
|
|
51
|
+
supabaseUrl: config.supabaseUrl,
|
|
52
|
+
supabaseServiceRoleKey: config.supabaseServiceRoleKey,
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
else {
|
|
56
|
+
this.embeddingService = null;
|
|
57
|
+
}
|
|
58
|
+
this.rateLimiter = gmailRateLimiter;
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Run the full extraction pipeline
|
|
62
|
+
*/
|
|
63
|
+
async run() {
|
|
64
|
+
const startTime = Date.now();
|
|
65
|
+
const errors = [];
|
|
66
|
+
const result = {
|
|
67
|
+
batchId: null,
|
|
68
|
+
stats: {
|
|
69
|
+
threadsFound: 0,
|
|
70
|
+
threadsProcessed: 0,
|
|
71
|
+
pairsExtracted: 0,
|
|
72
|
+
pairsStored: 0,
|
|
73
|
+
duplicatesSkipped: 0,
|
|
74
|
+
qualityFilteredOut: 0,
|
|
75
|
+
embeddingsGenerated: 0,
|
|
76
|
+
errors: 0,
|
|
77
|
+
},
|
|
78
|
+
duration: {
|
|
79
|
+
totalMs: 0,
|
|
80
|
+
extractionMs: 0,
|
|
81
|
+
storageMs: 0,
|
|
82
|
+
embeddingsMs: 0,
|
|
83
|
+
},
|
|
84
|
+
errors,
|
|
85
|
+
};
|
|
86
|
+
try {
|
|
87
|
+
// 1. Create batch for tracking
|
|
88
|
+
const dateRangeEnd = new Date();
|
|
89
|
+
const dateRangeStart = new Date();
|
|
90
|
+
dateRangeStart.setDate(dateRangeStart.getDate() - this.config.lookbackDays);
|
|
91
|
+
if (!this.config.dryRun) {
|
|
92
|
+
result.batchId = await this.stateManager.createBatch(this.config.salesEmailAddress, dateRangeStart, dateRangeEnd, { lookbackDays: this.config.lookbackDays, dryRun: this.config.dryRun });
|
|
93
|
+
}
|
|
94
|
+
this.reportProgress('initialization', 0, 1);
|
|
95
|
+
// 2. Fetch Gmail threads
|
|
96
|
+
const extractionStartTime = Date.now();
|
|
97
|
+
let threads = [];
|
|
98
|
+
if (this.config.gmailAccessToken) {
|
|
99
|
+
threads = await this.fetchGmailThreads(dateRangeStart, dateRangeEnd);
|
|
100
|
+
result.stats.threadsFound = threads.length;
|
|
101
|
+
}
|
|
102
|
+
else {
|
|
103
|
+
// If no access token, we can't fetch from Gmail
|
|
104
|
+
console.warn('No Gmail access token provided, using mock data for testing');
|
|
105
|
+
threads = [];
|
|
106
|
+
}
|
|
107
|
+
if (!this.config.dryRun && result.batchId) {
|
|
108
|
+
await this.stateManager.startBatch(result.batchId, threads.length);
|
|
109
|
+
}
|
|
110
|
+
this.reportProgress('threads_fetched', threads.length, threads.length);
|
|
111
|
+
// 3. Extract Q&A pairs from threads
|
|
112
|
+
const extractionResult = await this.gmailExtractor.extractFromThreads(threads);
|
|
113
|
+
result.stats.threadsProcessed = extractionResult.stats.threadsProcessed;
|
|
114
|
+
result.stats.pairsExtracted = extractionResult.stats.pairsExtracted;
|
|
115
|
+
result.stats.duplicatesSkipped = extractionResult.stats.duplicatesSkipped;
|
|
116
|
+
result.stats.qualityFilteredOut = extractionResult.stats.qualityFilteredOut;
|
|
117
|
+
result.stats.errors += extractionResult.stats.errors;
|
|
118
|
+
result.duration.extractionMs = Date.now() - extractionStartTime;
|
|
119
|
+
this.reportProgress('extraction', extractionResult.stats.pairsExtracted, threads.length);
|
|
120
|
+
// 4. Classify intents (if enabled)
|
|
121
|
+
if (this.config.classifyIntents && extractionResult.extracted.length > 0) {
|
|
122
|
+
await this.classifyIntents(extractionResult.extracted);
|
|
123
|
+
}
|
|
124
|
+
// 5. Store extracted pairs
|
|
125
|
+
const storageStartTime = Date.now();
|
|
126
|
+
if (!this.config.dryRun && extractionResult.extracted.length > 0) {
|
|
127
|
+
const storeResult = await this.gmailExtractor.storeExtractedPairs(extractionResult.extracted, result.batchId || undefined);
|
|
128
|
+
result.stats.pairsStored = storeResult.stored;
|
|
129
|
+
result.stats.errors += storeResult.errors;
|
|
130
|
+
}
|
|
131
|
+
else {
|
|
132
|
+
result.stats.pairsStored = 0;
|
|
133
|
+
}
|
|
134
|
+
result.duration.storageMs = Date.now() - storageStartTime;
|
|
135
|
+
this.reportProgress('storage', result.stats.pairsStored, extractionResult.extracted.length);
|
|
136
|
+
// 6. Generate embeddings (if enabled and service available)
|
|
137
|
+
const embeddingsStartTime = Date.now();
|
|
138
|
+
if (this.config.generateEmbeddings &&
|
|
139
|
+
this.embeddingService &&
|
|
140
|
+
!this.config.dryRun &&
|
|
141
|
+
result.stats.pairsStored > 0) {
|
|
142
|
+
try {
|
|
143
|
+
const embeddingResult = await this.embeddingService.backfillEmbeddings(this.config.batchSize, (processed, total) => {
|
|
144
|
+
this.reportProgress('embeddings', processed, total);
|
|
145
|
+
});
|
|
146
|
+
result.stats.embeddingsGenerated = embeddingResult.processed;
|
|
147
|
+
result.stats.errors += embeddingResult.errors;
|
|
148
|
+
}
|
|
149
|
+
catch (embeddingError) {
|
|
150
|
+
const error = embeddingError instanceof Error ? embeddingError : new Error(String(embeddingError));
|
|
151
|
+
errors.push({
|
|
152
|
+
stage: 'embeddings',
|
|
153
|
+
message: error.message,
|
|
154
|
+
});
|
|
155
|
+
this.reportError(error, { stage: 'embeddings' });
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
result.duration.embeddingsMs = Date.now() - embeddingsStartTime;
|
|
159
|
+
// 7. Complete batch
|
|
160
|
+
if (!this.config.dryRun && result.batchId) {
|
|
161
|
+
await this.stateManager.updateBatchProgress(result.batchId, {
|
|
162
|
+
threadsProcessed: result.stats.threadsProcessed,
|
|
163
|
+
entriesCreated: result.stats.pairsStored,
|
|
164
|
+
entriesSkipped: result.stats.duplicatesSkipped + result.stats.qualityFilteredOut,
|
|
165
|
+
errorsEncountered: result.stats.errors,
|
|
166
|
+
});
|
|
167
|
+
const batchStatus = result.stats.errors > 0 && result.stats.pairsStored === 0
|
|
168
|
+
? 'failed'
|
|
169
|
+
: result.stats.errors > 0
|
|
170
|
+
? 'partial'
|
|
171
|
+
: 'completed';
|
|
172
|
+
await this.stateManager.completeBatch(result.batchId, batchStatus);
|
|
173
|
+
// Update extraction state
|
|
174
|
+
await this.stateManager.updateStateSuccess(this.config.salesEmailAddress, {
|
|
175
|
+
entriesExtracted: result.stats.pairsStored,
|
|
176
|
+
batchId: result.batchId,
|
|
177
|
+
});
|
|
178
|
+
}
|
|
179
|
+
// Add skipped entries to errors for reporting
|
|
180
|
+
for (const skipped of extractionResult.skipped.slice(0, 10)) {
|
|
181
|
+
errors.push({
|
|
182
|
+
stage: 'extraction',
|
|
183
|
+
message: skipped.reason,
|
|
184
|
+
context: { threadId: skipped.threadId, messageId: skipped.messageId },
|
|
185
|
+
});
|
|
186
|
+
}
|
|
187
|
+
result.duration.totalMs = Date.now() - startTime;
|
|
188
|
+
return result;
|
|
189
|
+
}
|
|
190
|
+
catch (error) {
|
|
191
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
192
|
+
errors.push({
|
|
193
|
+
stage: 'pipeline',
|
|
194
|
+
message: err.message,
|
|
195
|
+
});
|
|
196
|
+
this.reportError(err, { stage: 'pipeline' });
|
|
197
|
+
if (!this.config.dryRun && result.batchId) {
|
|
198
|
+
await this.stateManager.addBatchError(result.batchId, err.message);
|
|
199
|
+
await this.stateManager.completeBatch(result.batchId, 'failed');
|
|
200
|
+
await this.stateManager.recordError(this.config.salesEmailAddress, err.message);
|
|
201
|
+
}
|
|
202
|
+
result.duration.totalMs = Date.now() - startTime;
|
|
203
|
+
result.stats.errors++;
|
|
204
|
+
return result;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
/**
|
|
208
|
+
* Fetch Gmail threads from API
|
|
209
|
+
*/
|
|
210
|
+
async fetchGmailThreads(dateRangeStart, dateRangeEnd) {
|
|
211
|
+
if (!this.config.gmailAccessToken) {
|
|
212
|
+
throw new Error('Gmail access token is required');
|
|
213
|
+
}
|
|
214
|
+
const threads = [];
|
|
215
|
+
let pageToken;
|
|
216
|
+
// Build query for threads in date range with label filtering
|
|
217
|
+
const afterDate = Math.floor(dateRangeStart.getTime() / 1000);
|
|
218
|
+
const beforeDate = Math.floor(dateRangeEnd.getTime() / 1000);
|
|
219
|
+
// Build label query parts
|
|
220
|
+
const labelParts = this.config.gmailLabels.map((label) => {
|
|
221
|
+
// Handle exclusions (e.g., '-spam')
|
|
222
|
+
if (label.startsWith('-')) {
|
|
223
|
+
return `-label:${label.slice(1)}`;
|
|
224
|
+
}
|
|
225
|
+
// Handle special folders (inbox, sent, etc.)
|
|
226
|
+
if (['inbox', 'sent', 'drafts', 'spam', 'trash', 'starred'].includes(label.toLowerCase())) {
|
|
227
|
+
return `in:${label.toLowerCase()}`;
|
|
228
|
+
}
|
|
229
|
+
// Regular labels
|
|
230
|
+
return `label:${label}`;
|
|
231
|
+
});
|
|
232
|
+
const query = `after:${afterDate} before:${beforeDate} ${labelParts.join(' ')}`;
|
|
233
|
+
do {
|
|
234
|
+
// Rate limit Gmail API calls
|
|
235
|
+
await this.rateLimiter.acquire();
|
|
236
|
+
const url = new URL(`${GMAIL_API_URL}/users/${encodeURIComponent(this.config.salesEmailAddress)}/threads`);
|
|
237
|
+
url.searchParams.set('q', query);
|
|
238
|
+
url.searchParams.set('maxResults', '100');
|
|
239
|
+
if (pageToken) {
|
|
240
|
+
url.searchParams.set('pageToken', pageToken);
|
|
241
|
+
}
|
|
242
|
+
const response = await fetch(url.toString(), {
|
|
243
|
+
headers: {
|
|
244
|
+
Authorization: `Bearer ${this.config.gmailAccessToken}`,
|
|
245
|
+
},
|
|
246
|
+
});
|
|
247
|
+
if (!response.ok) {
|
|
248
|
+
const errorData = await response.json().catch(() => ({}));
|
|
249
|
+
throw new Error(`Gmail API error: ${errorData.error?.message || response.statusText}`);
|
|
250
|
+
}
|
|
251
|
+
const data = await response.json();
|
|
252
|
+
// Fetch full thread details for each thread
|
|
253
|
+
for (const threadSummary of data.threads || []) {
|
|
254
|
+
if (threads.length >= this.config.maxThreads) {
|
|
255
|
+
break;
|
|
256
|
+
}
|
|
257
|
+
const thread = await this.fetchGmailThread(threadSummary.id);
|
|
258
|
+
if (thread) {
|
|
259
|
+
threads.push(thread);
|
|
260
|
+
}
|
|
261
|
+
this.reportProgress('fetching_threads', threads.length, this.config.maxThreads);
|
|
262
|
+
}
|
|
263
|
+
pageToken = data.nextPageToken;
|
|
264
|
+
} while (pageToken && threads.length < this.config.maxThreads);
|
|
265
|
+
return threads;
|
|
266
|
+
}
|
|
267
|
+
/**
|
|
268
|
+
* Fetch a single Gmail thread with full message details
|
|
269
|
+
*/
|
|
270
|
+
async fetchGmailThread(threadId) {
|
|
271
|
+
try {
|
|
272
|
+
await this.rateLimiter.acquire();
|
|
273
|
+
const url = `${GMAIL_API_URL}/users/${encodeURIComponent(this.config.salesEmailAddress)}/threads/${threadId}?format=full`;
|
|
274
|
+
const response = await fetch(url, {
|
|
275
|
+
headers: {
|
|
276
|
+
Authorization: `Bearer ${this.config.gmailAccessToken}`,
|
|
277
|
+
},
|
|
278
|
+
});
|
|
279
|
+
if (!response.ok) {
|
|
280
|
+
console.warn(`Failed to fetch thread ${threadId}: ${response.statusText}`);
|
|
281
|
+
return null;
|
|
282
|
+
}
|
|
283
|
+
return await response.json();
|
|
284
|
+
}
|
|
285
|
+
catch (error) {
|
|
286
|
+
console.warn(`Error fetching thread ${threadId}:`, error);
|
|
287
|
+
return null;
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
/**
|
|
291
|
+
* Classify intents for extracted pairs
|
|
292
|
+
*/
|
|
293
|
+
async classifyIntents(pairs) {
|
|
294
|
+
// Simple keyword-based intent classification
|
|
295
|
+
// In production, this would use Claude or another ML model
|
|
296
|
+
const intentKeywords = {
|
|
297
|
+
order_status: ['order', 'shipping', 'tracking', 'status', 'delivery', 'shipped', 'arrived'],
|
|
298
|
+
pricing_inquiry: ['price', 'pricing', 'cost', 'quote', 'discount', 'deal'],
|
|
299
|
+
availability_check: ['available', 'stock', 'inventory', 'in stock', 'out of stock'],
|
|
300
|
+
documentation_request: ['invoice', 'receipt', 'document', 'pdf', 'coa', 'certificate'],
|
|
301
|
+
returns_damages: ['return', 'damage', 'damaged', 'refund', 'broken', 'wrong'],
|
|
302
|
+
account_inquiry: ['account', 'login', 'password', 'access', 'profile'],
|
|
303
|
+
product_sourcing: ['find', 'source', 'looking for', 'need', 'product'],
|
|
304
|
+
onboarding_help: ['new', 'start', 'how do i', 'getting started', 'sign up'],
|
|
305
|
+
complaint: ['complaint', 'unhappy', 'terrible', 'worst', 'never'],
|
|
306
|
+
feedback: ['feedback', 'suggestion', 'improve', 'great job', 'thank you'],
|
|
307
|
+
general_question: ['question', 'help', 'information', 'wondering'],
|
|
308
|
+
other: [],
|
|
309
|
+
};
|
|
310
|
+
for (const pair of pairs) {
|
|
311
|
+
const text = ((pair.questionSubject || '') +
|
|
312
|
+
' ' +
|
|
313
|
+
pair.questionText).toLowerCase();
|
|
314
|
+
let bestIntent = 'other';
|
|
315
|
+
let maxMatches = 0;
|
|
316
|
+
for (const [intent, keywords] of Object.entries(intentKeywords)) {
|
|
317
|
+
const matches = keywords.filter((kw) => text.includes(kw)).length;
|
|
318
|
+
if (matches > maxMatches) {
|
|
319
|
+
maxMatches = matches;
|
|
320
|
+
bestIntent = intent;
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
pair.intentCategory = bestIntent;
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
/**
|
|
327
|
+
* Report progress callback
|
|
328
|
+
*/
|
|
329
|
+
reportProgress(stage, progress, total) {
|
|
330
|
+
if (this.config.onProgress) {
|
|
331
|
+
this.config.onProgress(stage, progress, total);
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
/**
|
|
335
|
+
* Report error callback
|
|
336
|
+
*/
|
|
337
|
+
reportError(error, context) {
|
|
338
|
+
if (this.config.onError) {
|
|
339
|
+
this.config.onError(error, context);
|
|
340
|
+
}
|
|
341
|
+
else {
|
|
342
|
+
console.error(`Pipeline error [${context.stage}]:`, error.message);
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
/**
|
|
346
|
+
* Get state manager for external use
|
|
347
|
+
*/
|
|
348
|
+
getStateManager() {
|
|
349
|
+
return this.stateManager;
|
|
350
|
+
}
|
|
351
|
+
/**
|
|
352
|
+
* Get embedding service for external use
|
|
353
|
+
*/
|
|
354
|
+
getEmbeddingService() {
|
|
355
|
+
return this.embeddingService;
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
// =============================================================================
|
|
359
|
+
// Factory Function
|
|
360
|
+
// =============================================================================
|
|
361
|
+
/**
|
|
362
|
+
* Create an extraction pipeline with environment configuration
|
|
363
|
+
*/
|
|
364
|
+
export function createExtractionPipeline(options = {}) {
|
|
365
|
+
const supabaseUrl = process.env.SUPABASE_URL || process.env.NEXT_PUBLIC_SUPABASE_URL;
|
|
366
|
+
const supabaseServiceRoleKey = process.env.SUPABASE_SERVICE_ROLE_KEY;
|
|
367
|
+
const openaiApiKey = process.env.OPENAI_API_KEY;
|
|
368
|
+
const salesEmail = process.env.GMAIL_SALES_EMAIL || 'sales@catalistgroup.co';
|
|
369
|
+
if (!supabaseUrl || !supabaseServiceRoleKey) {
|
|
370
|
+
throw new Error('SUPABASE_URL and SUPABASE_SERVICE_ROLE_KEY are required');
|
|
371
|
+
}
|
|
372
|
+
return new ExtractionPipeline({
|
|
373
|
+
supabaseUrl,
|
|
374
|
+
supabaseServiceRoleKey,
|
|
375
|
+
openaiApiKey: openaiApiKey || '',
|
|
376
|
+
salesEmailAddress: salesEmail,
|
|
377
|
+
...options,
|
|
378
|
+
});
|
|
379
|
+
}
|
|
380
|
+
//# sourceMappingURL=pipeline.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../src/extraction/pipeline.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,+FAA+F;AAC/F,OAAO,EACL,cAAc,GAGf,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAe,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAClE,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAyD1D,gBAAgB;AAChB,MAAM,aAAa,GAAG,uCAAuC,CAAC;AAE9D,gFAAgF;AAChF,sBAAsB;AACtB,gFAAgF;AAEhF,MAAM,OAAO,kBAAkB;IACrB,MAAM,CAA2B;IACjC,cAAc,CAAiB;IAC/B,YAAY,CAAe;IAC3B,gBAAgB,CAA0B;IAC1C,WAAW,CAAc;IAEjC,YAAY,MAAsB;QAChC,IAAI,CAAC,MAAM,GAAG;YACZ,YAAY,EAAE,GAAG;YACjB,SAAS,EAAE,EAAE;YACb,UAAU,EAAE,IAAI;YAChB,MAAM,EAAE,KAAK;YACb,kBAAkB,EAAE,IAAI;YACxB,eAAe,EAAE,IAAI;YACrB,UAAU,EAAE,SAAS;YACrB,OAAO,EAAE,SAAS;YAClB,gBAAgB,EAAE,SAAS;YAC3B,WAAW,EAAE,CAAC,OAAO,CAAC;YACtB,GAAG,MAAM;SACkB,CAAC;QAE9B,IAAI,CAAC,cAAc,GAAG,IAAI,cAAc,CAAC;YACvC,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,sBAAsB,EAAE,MAAM,CAAC,sBAAsB;YACrD,iBAAiB,EAAE,MAAM,CAAC,iBAAiB;SAC5C,CAAC,CAAC;QAEH,IAAI,CAAC,YAAY,GAAG,IAAI,YAAY,CAAC;YACnC,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,sBAAsB,EAAE,MAAM,CAAC,sBAAsB;SACtD,CAAC,CAAC;QAEH,uDAAuD;QACvD,IAAI,MAAM,CAAC,YAAY,EAAE,CAAC;YACxB,IAAI,CAAC,gBAAgB,GAAG,IAAI,gBAAgB,CAAC;gBAC3C,YAAY,EAAE,MAAM,CAAC,YAAY;gBACjC,WAAW,EAAE,MAAM,CAAC,WAAW;gBAC/B,sBAAsB,EAAE,MAAM,CAAC,sBAAsB;aACtD,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,gBAAgB,GAAG,IAAI,CAAC;QAC/B,CAAC;QAED,IAAI,CAAC,WAAW,GAAG,gBAAgB,CAAC;IACtC,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,GAAG;QACP,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,MAAM,GAA6B,EAAE,CAAC;QAE5C,MAAM,MAAM,GAAmB;YAC7B,OAAO,EAAE,IAAI;YACb,KAAK,EAAE;gBACL,YAAY,EAAE,CAAC;gBACf,gBAAgB,EAAE,CAAC;gBACnB,cAAc,EAAE,CAAC;gBACjB,WAAW,EAAE,CAAC;gBACd,iBAAiB,EAAE,CAAC;gBACpB,kBAAkB,EAAE,CAAC;gBACrB,mBAAmB,EAAE,CAAC;gBACtB,MAAM,EAAE,CAAC;aACV;YACD,QAAQ,EAAE;gBACR,OAAO,EAAE,CAAC;gBACV,YAAY,EAAE,CAAC;gBACf,SAAS,EAAE,CAAC;gBACZ,YAAY,EAAE,CAAC;aAChB;YACD,MAAM;SACP,CAAC;QAEF,IAAI,CAAC;YACH,+BAA+B;YAC/B,MAAM,YAAY,GAAG,IAAI,IAAI,EAAE,CAAC;YAChC,MAAM,cAAc,GAAG,IAAI,IAAI,EAAE,CAAC;YAClC,cAAc,CAAC,OAAO,CAAC,cAAc,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;YAE5E,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;gBACxB,MAAM,CAAC,OAAO,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,WAAW,CAClD,IAAI,CAAC,MAAM,CAAC,iBAAiB,EAC7B,cAAc,EACd,YAAY,EACZ,EAAE,YAAY,EAAE,IAAI,CAAC,MAAM,CAAC,YAAY,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CACvE,CAAC;YACJ,CAAC;YAED,IAAI,CAAC,cAAc,CAAC,gBAAgB,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;YAE5C,yBAAyB;YACzB,MAAM,mBAAmB,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YACvC,IAAI,OAAO,GAAkB,EAAE,CAAC;YAEhC,IAAI,IAAI,CAAC,MAAM,CAAC,gBAAgB,EAAE,CAAC;gBACjC,OAAO,GAAG,MAAM,IAAI,CAAC,iBAAiB,CAAC,cAAc,EAAE,YAAY,CAAC,CAAC;gBACrE,MAAM,CAAC,KAAK,CAAC,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC;YAC7C,CAAC;iBAAM,CAAC;gBACN,gDAAgD;gBAChD,OAAO,CAAC,IAAI,CAAC,6DAA6D,CAAC,CAAC;gBAC5E,OAAO,GAAG,EAAE,CAAC;YACf,CAAC;YAED,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;gBAC1C,MAAM,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,MAAM,CAAC,OAAO,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;YACrE,CAAC;YAED,IAAI,CAAC,cAAc,CAAC,iBAAiB,EAAE,OAAO,CAAC,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;YAEvE,oCAAoC;YACpC,MAAM,gBAAgB,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,kBAAkB,CAAC,OAAO,CAAC,CAAC;YAE/E,MAAM,CAAC,KAAK,CAAC,gBAAgB,GAAG,gBAAgB,CAAC,KAAK,CAAC,gBAAgB,CAAC;YACxE,MAAM,CAAC,KAAK,CAAC,cAAc,GAAG,gBAAgB,CAAC,KAAK,CAAC,cAAc,CAAC;YACpE,MAAM,CAAC,KAAK,CAAC,iBAAiB,GAAG,gBAAgB,CAAC,KAAK,CAAC,iBAAiB,CAAC;YAC1E,MAAM,CAAC,KAAK,CAAC,kBAAkB,GAAG,gBAAgB,CAAC,KAAK,CAAC,kBAAkB,CAAC;YAC5E,MAAM,CAAC,KAAK,CAAC,MAAM,IAAI,gBAAgB,CAAC,KAAK,CAAC,MAAM,CAAC;YAErD,MAAM,CAAC,QAAQ,CAAC,YAAY,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,mBAAmB,CAAC;YAEhE,IAAI,CAAC,cAAc,CAAC,YAAY,EAAE,gBAAgB,CAAC,KAAK,CAAC,cAAc,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;YAEzF,mCAAmC;YACnC,IAAI,IAAI,CAAC,MAAM,CAAC,eAAe,IAAI,gBAAgB,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACzE,MAAM,IAAI,CAAC,eAAe,CAAC,gBAAgB,CAAC,SAAS,CAAC,CAAC;YACzD,CAAC;YAED,2BAA2B;YAC3B,MAAM,gBAAgB,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAEpC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,gBAAgB,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACjE,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,mBAAmB,CAC/D,gBAAgB,CAAC,SAAS,EAC1B,MAAM,CAAC,OAAO,IAAI,SAAS,CAC5B,CAAC;gBAEF,MAAM,CAAC,KAAK,CAAC,WAAW,GAAG,WAAW,CAAC,MAAM,CAAC;gBAC9C,MAAM,CAAC,KAAK,CAAC,MAAM,IAAI,WAAW,CAAC,MAAM,CAAC;YAC5C,CAAC;iBAAM,CAAC;gBACN,MAAM,CAAC,KAAK,CAAC,WAAW,GAAG,CAAC,CAAC;YAC/B,CAAC;YAED,MAAM,CAAC,QAAQ,CAAC,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,gBAAgB,CAAC;YAE1D,IAAI,CAAC,cAAc,CAAC,SAAS,EAAE,MAAM,CAAC,KAAK,CAAC,WAAW,EAAE,gBAAgB,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;YAE5F,4DAA4D;YAC5D,MAAM,mBAAmB,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAEvC,IACE,IAAI,CAAC,MAAM,CAAC,kBAAkB;gBAC9B,IAAI,CAAC,gBAAgB;gBACrB,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM;gBACnB,MAAM,CAAC,KAAK,CAAC,WAAW,GAAG,CAAC,EAC5B,CAAC;gBACD,IAAI,CAAC;oBACH,MAAM,eAAe,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,kBAAkB,CACpE,IAAI,CAAC,MAAM,CAAC,SAAS,EACrB,CAAC,SAAS,EAAE,KAAK,EAAE,EAAE;wBACnB,IAAI,CAAC,cAAc,CAAC,YAAY,EAAE,SAAS,EAAE,KAAK,CAAC,CAAC;oBACtD,CAAC,CACF,CAAC;oBACF,MAAM,CAAC,KAAK,CAAC,mBAAmB,GAAG,eAAe,CAAC,SAAS,CAAC;oBAC7D,MAAM,CAAC,KAAK,CAAC,MAAM,IAAI,eAAe,CAAC,MAAM,CAAC;gBAChD,CAAC;gBAAC,OAAO,cAAc,EAAE,CAAC;oBACxB,MAAM,KAAK,GAAG,cAAc,YAAY,KAAK,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC;oBACnG,MAAM,CAAC,IAAI,CAAC;wBACV,KAAK,EAAE,YAAY;wBACnB,OAAO,EAAE,KAAK,CAAC,OAAO;qBACvB,CAAC,CAAC;oBACH,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,EAAE,KAAK,EAAE,YAAY,EAAE,CAAC,CAAC;gBACnD,CAAC;YACH,CAAC;YAED,MAAM,CAAC,QAAQ,CAAC,YAAY,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,mBAAmB,CAAC;YAEhE,oBAAoB;YACpB,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;gBAC1C,MAAM,IAAI,CAAC,YAAY,CAAC,mBAAmB,CAAC,MAAM,CAAC,OAAO,EAAE;oBAC1D,gBAAgB,EAAE,MAAM,CAAC,KAAK,CAAC,gBAAgB;oBAC/C,cAAc,EAAE,MAAM,CAAC,KAAK,CAAC,WAAW;oBACxC,cAAc,EAAE,MAAM,CAAC,KAAK,CAAC,iBAAiB,GAAG,MAAM,CAAC,KAAK,CAAC,kBAAkB;oBAChF,iBAAiB,EAAE,MAAM,CAAC,KAAK,CAAC,MAAM;iBACvC,CAAC,CAAC;gBAEH,MAAM,WAAW,GACf,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,MAAM,CAAC,KAAK,CAAC,WAAW,KAAK,CAAC;oBACvD,CAAC,CAAC,QAAQ;oBACV,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC;wBACvB,CAAC,CAAC,SAAS;wBACX,CAAC,CAAC,WAAW,CAAC;gBAEpB,MAAM,IAAI,CAAC,YAAY,CAAC,aAAa,CAAC,MAAM,CAAC,OAAO,EAAE,WAAW,CAAC,CAAC;gBAEnE,0BAA0B;gBAC1B,MAAM,IAAI,CAAC,YAAY,CAAC,kBAAkB,CAAC,IAAI,CAAC,MAAM,CAAC,iBAAiB,EAAE;oBACxE,gBAAgB,EAAE,MAAM,CAAC,KAAK,CAAC,WAAW;oBAC1C,OAAO,EAAE,MAAM,CAAC,OAAO;iBACxB,CAAC,CAAC;YACL,CAAC;YAED,8CAA8C;YAC9C,KAAK,MAAM,OAAO,IAAI,gBAAgB,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;gBAC5D,MAAM,CAAC,IAAI,CAAC;oBACV,KAAK,EAAE,YAAY;oBACnB,OAAO,EAAE,OAAO,CAAC,MAAM;oBACvB,OAAO,EAAE,EAAE,QAAQ,EAAE,OAAO,CAAC,QAAQ,EAAE,SAAS,EAAE,OAAO,CAAC,SAAS,EAAE;iBACtE,CAAC,CAAC;YACL,CAAC;YAED,MAAM,CAAC,QAAQ,CAAC,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YAEjD,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,GAAG,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;YACtE,MAAM,CAAC,IAAI,CAAC;gBACV,KAAK,EAAE,UAAU;gBACjB,OAAO,EAAE,GAAG,CAAC,OAAO;aACrB,CAAC,CAAC;YACH,IAAI,CAAC,WAAW,CAAC,GAAG,EAAE,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC,CAAC;YAE7C,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;gBAC1C,MAAM,IAAI,CAAC,YAAY,CAAC,aAAa,CAAC,MAAM,CAAC,OAAO,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC;gBACnE,MAAM,IAAI,CAAC,YAAY,CAAC,aAAa,CAAC,MAAM,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;gBAChE,MAAM,IAAI,CAAC,YAAY,CAAC,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,iBAAiB,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC;YAClF,CAAC;YAED,MAAM,CAAC,QAAQ,CAAC,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACjD,MAAM,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC;YAEtB,OAAO,MAAM,CAAC;QAChB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,iBAAiB,CAC7B,cAAoB,EACpB,YAAkB;QAElB,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,gBAAgB,EAAE,CAAC;YAClC,MAAM,IAAI,KAAK,CAAC,gCAAgC,CAAC,CAAC;QACpD,CAAC;QAED,MAAM,OAAO,GAAkB,EAAE,CAAC;QAClC,IAAI,SAA6B,CAAC;QAElC,6DAA6D;QAC7D,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,cAAc,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC,CAAC;QAC9D,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC,CAAC;QAE7D,0BAA0B;QAC1B,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE;YACvD,oCAAoC;YACpC,IAAI,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC1B,OAAO,UAAU,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;YACpC,CAAC;YACD,6CAA6C;YAC7C,IAAI,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC;gBAC1F,OAAO,MAAM,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACrC,CAAC;YACD,iBAAiB;YACjB,OAAO,SAAS,KAAK,EAAE,CAAC;QAC1B,CAAC,CAAC,CAAC;QAEH,MAAM,KAAK,GAAG,SAAS,SAAS,WAAW,UAAU,IAAI,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;QAEhF,GAAG,CAAC;YACF,6BAA6B;YAC7B,MAAM,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,CAAC;YAEjC,MAAM,GAAG,GAAG,IAAI,GAAG,CACjB,GAAG,aAAa,UAAU,kBAAkB,CAAC,IAAI,CAAC,MAAM,CAAC,iBAAiB,CAAC,UAAU,CACtF,CAAC;YACF,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;YACjC,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,YAAY,EAAE,KAAK,CAAC,CAAC;YAC1C,IAAI,SAAS,EAAE,CAAC;gBACd,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC;YAC/C,CAAC;YAED,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE;gBAC3C,OAAO,EAAE;oBACP,aAAa,EAAE,UAAU,IAAI,CAAC,MAAM,CAAC,gBAAgB,EAAE;iBACxD;aACF,CAAC,CAAC;YAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBACjB,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;gBAC1D,MAAM,IAAI,KAAK,CACb,oBAAoB,SAAS,CAAC,KAAK,EAAE,OAAO,IAAI,QAAQ,CAAC,UAAU,EAAE,CACtE,CAAC;YACJ,CAAC;YAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YAEnC,4CAA4C;YAC5C,KAAK,MAAM,aAAa,IAAI,IAAI,CAAC,OAAO,IAAI,EAAE,EAAE,CAAC;gBAC/C,IAAI,OAAO,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,CAAC,UAAU,EAAE,CAAC;oBAC7C,MAAM;gBACR,CAAC;gBAED,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,aAAa,CAAC,EAAE,CAAC,CAAC;gBAC7D,IAAI,MAAM,EAAE,CAAC;oBACX,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBACvB,CAAC;gBAED,IAAI,CAAC,cAAc,CAAC,kBAAkB,EAAE,OAAO,CAAC,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;YAClF,CAAC;YAED,SAAS,GAAG,IAAI,CAAC,aAAa,CAAC;QACjC,CAAC,QAAQ,SAAS,IAAI,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,UAAU,EAAE;QAE/D,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,gBAAgB,CAAC,QAAgB;QAC7C,IAAI,CAAC;YACH,MAAM,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,CAAC;YAEjC,MAAM,GAAG,GAAG,GAAG,aAAa,UAAU,kBAAkB,CAAC,IAAI,CAAC,MAAM,CAAC,iBAAiB,CAAC,YAAY,QAAQ,cAAc,CAAC;YAE1H,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;gBAChC,OAAO,EAAE;oBACP,aAAa,EAAE,UAAU,IAAI,CAAC,MAAM,CAAC,gBAAgB,EAAE;iBACxD;aACF,CAAC,CAAC;YAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBACjB,OAAO,CAAC,IAAI,CAAC,0BAA0B,QAAQ,KAAK,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;gBAC3E,OAAO,IAAI,CAAC;YACd,CAAC;YAED,OAAO,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QAC/B,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,IAAI,CAAC,yBAAyB,QAAQ,GAAG,EAAE,KAAK,CAAC,CAAC;YAC1D,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,eAAe,CAAC,KAAwB;QACpD,6CAA6C;QAC7C,2DAA2D;QAE3D,MAAM,cAAc,GAAqC;YACvD,YAAY,EAAE,CAAC,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,QAAQ,EAAE,UAAU,EAAE,SAAS,EAAE,SAAS,CAAC;YAC3F,eAAe,EAAE,CAAC,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE,MAAM,CAAC;YAC1E,kBAAkB,EAAE,CAAC,WAAW,EAAE,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,cAAc,CAAC;YACnF,qBAAqB,EAAE,CAAC,SAAS,EAAE,SAAS,EAAE,UAAU,EAAE,KAAK,EAAE,KAAK,EAAE,aAAa,CAAC;YACtF,eAAe,EAAE,CAAC,QAAQ,EAAE,QAAQ,EAAE,SAAS,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO,CAAC;YAC7E,eAAe,EAAE,CAAC,SAAS,EAAE,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,SAAS,CAAC;YACtE,gBAAgB,EAAE,CAAC,MAAM,EAAE,QAAQ,EAAE,aAAa,EAAE,MAAM,EAAE,SAAS,CAAC;YACtE,eAAe,EAAE,CAAC,KAAK,EAAE,OAAO,EAAE,UAAU,EAAE,iBAAiB,EAAE,SAAS,CAAC;YAC3E,SAAS,EAAE,CAAC,WAAW,EAAE,SAAS,EAAE,UAAU,EAAE,OAAO,EAAE,OAAO,CAAC;YACjE,QAAQ,EAAE,CAAC,UAAU,EAAE,YAAY,EAAE,SAAS,EAAE,WAAW,EAAE,WAAW,CAAC;YACzE,gBAAgB,EAAE,CAAC,UAAU,EAAE,MAAM,EAAE,aAAa,EAAE,WAAW,CAAC;YAClE,KAAK,EAAE,EAAE;SACV,CAAC;QAEF,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,IAAI,GAAG,CACX,CAAC,IAAI,CAAC,eAAe,IAAI,EAAE,CAAC;gBAC5B,GAAG;gBACH,IAAI,CAAC,YAAY,CAClB,CAAC,WAAW,EAAE,CAAC;YAEhB,IAAI,UAAU,GAAmB,OAAO,CAAC;YACzC,IAAI,UAAU,GAAG,CAAC,CAAC;YAEnB,KAAK,MAAM,CAAC,MAAM,EAAE,QAAQ,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,cAAc,CAAC,EAAE,CAAC;gBAChE,MAAM,OAAO,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC;gBAClE,IAAI,OAAO,GAAG,UAAU,EAAE,CAAC;oBACzB,UAAU,GAAG,OAAO,CAAC;oBACrB,UAAU,GAAG,MAAwB,CAAC;gBACxC,CAAC;YACH,CAAC;YAED,IAAI,CAAC,cAAc,GAAG,UAAU,CAAC;QACnC,CAAC;IACH,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,KAAa,EAAE,QAAgB,EAAE,KAAa;QACnE,IAAI,IAAI,CAAC,MAAM,CAAC,UAAU,EAAE,CAAC;YAC3B,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,KAAK,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC;QACjD,CAAC;IACH,CAAC;IAED;;OAEG;IACK,WAAW,CAAC,KAAY,EAAE,OAAgC;QAChE,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YACxB,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QACtC,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,KAAK,CAAC,mBAAmB,OAAO,CAAC,KAAK,IAAI,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC;QACrE,CAAC;IACH,CAAC;IAED;;OAEG;IACH,eAAe;QACb,OAAO,IAAI,CAAC,YAAY,CAAC;IAC3B,CAAC;IAED;;OAEG;IACH,mBAAmB;QACjB,OAAO,IAAI,CAAC,gBAAgB,CAAC;IAC/B,CAAC;CACF;AAED,gFAAgF;AAChF,mBAAmB;AACnB,gFAAgF;AAEhF;;GAEG;AACH,MAAM,UAAU,wBAAwB,CACtC,UAAmC,EAAE;IAErC,MAAM,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC,YAAY,IAAI,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC;IACrF,MAAM,sBAAsB,GAAG,OAAO,CAAC,GAAG,CAAC,yBAAyB,CAAC;IACrE,MAAM,YAAY,GAAG,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC;IAChD,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,wBAAwB,CAAC;IAE7E,IAAI,CAAC,WAAW,IAAI,CAAC,sBAAsB,EAAE,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,yDAAyD,CAAC,CAAC;IAC7E,CAAC;IAED,OAAO,IAAI,kBAAkB,CAAC;QAC5B,WAAW;QACX,sBAAsB;QACtB,YAAY,EAAE,YAAY,IAAI,EAAE;QAChC,iBAAiB,EAAE,UAAU;QAC7B,GAAG,OAAO;KACX,CAAC,CAAC;AACL,CAAC"}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Quality Filter Module
|
|
3
|
+
*
|
|
4
|
+
* Assesses quality of extracted Q&A pairs for inclusion in the knowledge base.
|
|
5
|
+
* Filters based on minimum length requirements, response time, and resolution indicators.
|
|
6
|
+
*/
|
|
7
|
+
export interface QualityAssessment {
|
|
8
|
+
score: number;
|
|
9
|
+
passed: boolean;
|
|
10
|
+
factors: QualityFactor[];
|
|
11
|
+
summary: string;
|
|
12
|
+
}
|
|
13
|
+
export interface QualityFactor {
|
|
14
|
+
name: string;
|
|
15
|
+
score: number;
|
|
16
|
+
weight: number;
|
|
17
|
+
details: string;
|
|
18
|
+
}
|
|
19
|
+
export interface QualityFilterConfig {
|
|
20
|
+
minQuestionLength: number;
|
|
21
|
+
minResponseLength: number;
|
|
22
|
+
maxResponseTimeMs: number;
|
|
23
|
+
minQualityScore: number;
|
|
24
|
+
weights: {
|
|
25
|
+
questionLength: number;
|
|
26
|
+
responseLength: number;
|
|
27
|
+
responseTime: number;
|
|
28
|
+
hasSubject: number;
|
|
29
|
+
hasResolution: number;
|
|
30
|
+
conversationDepth: number;
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
export interface QAInput {
|
|
34
|
+
questionSubject?: string;
|
|
35
|
+
questionText: string;
|
|
36
|
+
responseText: string;
|
|
37
|
+
responseTimeMs?: number;
|
|
38
|
+
resolutionIndicator?: string;
|
|
39
|
+
conversationTurnCount?: number;
|
|
40
|
+
}
|
|
41
|
+
export declare class QualityFilter {
|
|
42
|
+
private config;
|
|
43
|
+
constructor(config?: Partial<QualityFilterConfig>);
|
|
44
|
+
/**
|
|
45
|
+
* Assess quality of a Q&A pair
|
|
46
|
+
*/
|
|
47
|
+
assess(input: QAInput): QualityAssessment;
|
|
48
|
+
/**
|
|
49
|
+
* Quick check if Q&A pair meets minimum requirements
|
|
50
|
+
*/
|
|
51
|
+
meetsMinimumRequirements(input: QAInput): boolean;
|
|
52
|
+
/**
|
|
53
|
+
* Batch assess multiple Q&A pairs
|
|
54
|
+
*/
|
|
55
|
+
assessBatch(inputs: QAInput[]): QualityAssessment[];
|
|
56
|
+
/**
|
|
57
|
+
* Filter batch to only passing entries
|
|
58
|
+
*/
|
|
59
|
+
filterPassing(inputs: QAInput[]): {
|
|
60
|
+
input: QAInput;
|
|
61
|
+
assessment: QualityAssessment;
|
|
62
|
+
}[];
|
|
63
|
+
private assessQuestionLength;
|
|
64
|
+
private assessResponseLength;
|
|
65
|
+
private assessResponseTime;
|
|
66
|
+
private assessHasSubject;
|
|
67
|
+
private assessResolution;
|
|
68
|
+
private assessConversationDepth;
|
|
69
|
+
private generateSummary;
|
|
70
|
+
/**
|
|
71
|
+
* Update configuration
|
|
72
|
+
*/
|
|
73
|
+
updateConfig(config: Partial<QualityFilterConfig>): void;
|
|
74
|
+
/**
|
|
75
|
+
* Get current configuration
|
|
76
|
+
*/
|
|
77
|
+
getConfig(): QualityFilterConfig;
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Get singleton quality filter instance
|
|
81
|
+
*/
|
|
82
|
+
export declare function getQualityFilter(): QualityFilter;
|
|
83
|
+
/**
|
|
84
|
+
* Reset the singleton (for testing)
|
|
85
|
+
*/
|
|
86
|
+
export declare function resetQualityFilter(): void;
|
|
87
|
+
/**
|
|
88
|
+
* Quick quality assessment using singleton
|
|
89
|
+
*/
|
|
90
|
+
export declare function assessQuality(input: QAInput): QualityAssessment;
|
|
91
|
+
/**
|
|
92
|
+
* Quick check for minimum requirements
|
|
93
|
+
*/
|
|
94
|
+
export declare function meetsMinimumQuality(input: QAInput): boolean;
|
|
95
|
+
/**
|
|
96
|
+
* Infer resolution indicator from text
|
|
97
|
+
*/
|
|
98
|
+
export declare function inferResolutionIndicator(questionText: string, responseText: string): 'resolved' | 'ongoing' | 'unknown';
|
|
99
|
+
//# sourceMappingURL=quality-filter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"quality-filter.d.ts","sourceRoot":"","sources":["../../src/extraction/quality-filter.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAMH,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,OAAO,CAAC;IAChB,OAAO,EAAE,aAAa,EAAE,CAAC;IACzB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,mBAAmB;IAElC,iBAAiB,EAAE,MAAM,CAAC;IAC1B,iBAAiB,EAAE,MAAM,CAAC;IAG1B,iBAAiB,EAAE,MAAM,CAAC;IAG1B,eAAe,EAAE,MAAM,CAAC;IAGxB,OAAO,EAAE;QACP,cAAc,EAAE,MAAM,CAAC;QACvB,cAAc,EAAE,MAAM,CAAC;QACvB,YAAY,EAAE,MAAM,CAAC;QACrB,UAAU,EAAE,MAAM,CAAC;QACnB,aAAa,EAAE,MAAM,CAAC;QACtB,iBAAiB,EAAE,MAAM,CAAC;KAC3B,CAAC;CACH;AAED,MAAM,WAAW,OAAO;IACtB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B,qBAAqB,CAAC,EAAE,MAAM,CAAC;CAChC;AAkED,qBAAa,aAAa;IACxB,OAAO,CAAC,MAAM,CAAsB;gBAExB,MAAM,GAAE,OAAO,CAAC,mBAAmB,CAAM;IAIrD;;OAEG;IACH,MAAM,CAAC,KAAK,EAAE,OAAO,GAAG,iBAAiB;IAgDzC;;OAEG;IACH,wBAAwB,CAAC,KAAK,EAAE,OAAO,GAAG,OAAO;IAOjD;;OAEG;IACH,WAAW,CAAC,MAAM,EAAE,OAAO,EAAE,GAAG,iBAAiB,EAAE;IAInD;;OAEG;IACH,aAAa,CAAC,MAAM,EAAE,OAAO,EAAE,GAAG;QAAE,KAAK,EAAE,OAAO,CAAC;QAAC,UAAU,EAAE,iBAAiB,CAAA;KAAE,EAAE;IAUrF,OAAO,CAAC,oBAAoB;IAuB5B,OAAO,CAAC,oBAAoB;IAuB5B,OAAO,CAAC,kBAAkB;IAyB1B,OAAO,CAAC,gBAAgB;IAYxB,OAAO,CAAC,gBAAgB;IAwDxB,OAAO,CAAC,uBAAuB;IAkC/B,OAAO,CAAC,eAAe;IAkCvB;;OAEG;IACH,YAAY,CAAC,MAAM,EAAE,OAAO,CAAC,mBAAmB,CAAC,GAAG,IAAI;IAIxD;;OAEG;IACH,SAAS,IAAI,mBAAmB;CAGjC;AAQD;;GAEG;AACH,wBAAgB,gBAAgB,IAAI,aAAa,CAKhD;AAED;;GAEG;AACH,wBAAgB,kBAAkB,IAAI,IAAI,CAEzC;AAMD;;GAEG;AACH,wBAAgB,aAAa,CAAC,KAAK,EAAE,OAAO,GAAG,iBAAiB,CAE/D;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,OAAO,GAAG,OAAO,CAE3D;AAED;;GAEG;AACH,wBAAgB,wBAAwB,CACtC,YAAY,EAAE,MAAM,EACpB,YAAY,EAAE,MAAM,GACnB,UAAU,GAAG,SAAS,GAAG,SAAS,CAkBpC"}
|