ms365-mcp-server 1.1.15 โ 1.1.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +1123 -10
- package/dist/utils/batch-performance-monitor.js +106 -0
- package/dist/utils/batch-test-scenarios.js +277 -0
- package/dist/utils/context-aware-search.js +499 -0
- package/dist/utils/cross-reference-detector.js +352 -0
- package/dist/utils/document-workflow.js +433 -0
- package/dist/utils/enhanced-fuzzy-search.js +514 -0
- package/dist/utils/error-handler.js +337 -0
- package/dist/utils/intelligence-engine.js +71 -0
- package/dist/utils/intelligent-cache.js +379 -0
- package/dist/utils/large-mailbox-search.js +599 -0
- package/dist/utils/ms365-operations.js +799 -219
- package/dist/utils/performance-monitor.js +395 -0
- package/dist/utils/proactive-intelligence.js +390 -0
- package/dist/utils/rate-limiter.js +284 -0
- package/dist/utils/search-batch-pipeline.js +222 -0
- package/dist/utils/thread-reconstruction.js +700 -0
- package/package.json +1 -1
|
@@ -0,0 +1,700 @@
|
|
|
1
|
+
import { logger } from './api.js';
|
|
2
|
+
export class ThreadReconstruction {
|
|
3
|
+
constructor(ms365Operations) {
|
|
4
|
+
this.ms365Operations = ms365Operations;
|
|
5
|
+
}
|
|
6
|
+
/**
|
|
7
|
+
* Reconstruct thread from a single email
|
|
8
|
+
*/
|
|
9
|
+
async reconstructThread(email, allEmails, options = {}) {
|
|
10
|
+
const opts = { ...ThreadReconstruction.DEFAULT_OPTIONS, ...options };
|
|
11
|
+
logger.log(`๐ Reconstructing thread for email: ${email.subject}`);
|
|
12
|
+
// Find all related emails
|
|
13
|
+
const relatedEmails = this.findRelatedEmails(email, allEmails, opts);
|
|
14
|
+
logger.log(`๐ Found ${relatedEmails.length} related emails`);
|
|
15
|
+
// Build thread tree
|
|
16
|
+
const threadTree = this.buildThreadTree(relatedEmails, opts);
|
|
17
|
+
// Analyze thread
|
|
18
|
+
const threadAnalysis = this.analyzeThread(threadTree, opts);
|
|
19
|
+
// Create reconstructed thread
|
|
20
|
+
const reconstructedThread = {
|
|
21
|
+
id: this.generateThreadId(email),
|
|
22
|
+
rootEmail: threadAnalysis.rootEmail,
|
|
23
|
+
allEmails: relatedEmails,
|
|
24
|
+
threadTree,
|
|
25
|
+
totalMessages: relatedEmails.length,
|
|
26
|
+
threadSpan: this.calculateThreadSpan(relatedEmails),
|
|
27
|
+
participants: this.analyzeParticipants(relatedEmails),
|
|
28
|
+
summary: this.generateThreadSummary(threadTree, threadAnalysis, opts)
|
|
29
|
+
};
|
|
30
|
+
logger.log(`๐ Thread reconstruction completed: ${reconstructedThread.totalMessages} messages`);
|
|
31
|
+
return reconstructedThread;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Find all emails related to the target email
|
|
35
|
+
*/
|
|
36
|
+
findRelatedEmails(targetEmail, allEmails, options) {
|
|
37
|
+
const relatedEmails = [targetEmail];
|
|
38
|
+
const processedIds = new Set([targetEmail.id]);
|
|
39
|
+
// 1. Find by conversation ID
|
|
40
|
+
const conversationEmails = allEmails.filter(email => email.conversationId === targetEmail.conversationId &&
|
|
41
|
+
!processedIds.has(email.id));
|
|
42
|
+
relatedEmails.push(...conversationEmails);
|
|
43
|
+
conversationEmails.forEach(email => processedIds.add(email.id));
|
|
44
|
+
// 2. Find by subject similarity and patterns
|
|
45
|
+
const subjectRelatedEmails = this.findSubjectRelatedEmails(targetEmail, allEmails.filter(email => !processedIds.has(email.id)), options);
|
|
46
|
+
relatedEmails.push(...subjectRelatedEmails);
|
|
47
|
+
subjectRelatedEmails.forEach(email => processedIds.add(email.id));
|
|
48
|
+
// 3. Find by content patterns (forwarded content detection)
|
|
49
|
+
if (options.enableAdvancedPatternMatching) {
|
|
50
|
+
const contentRelatedEmails = this.findContentRelatedEmails(targetEmail, allEmails.filter(email => !processedIds.has(email.id)), options);
|
|
51
|
+
relatedEmails.push(...contentRelatedEmails);
|
|
52
|
+
contentRelatedEmails.forEach(email => processedIds.add(email.id));
|
|
53
|
+
}
|
|
54
|
+
// Filter by time window
|
|
55
|
+
const timeFilteredEmails = this.filterByTimeWindow(relatedEmails, targetEmail, options.timeWindowDays);
|
|
56
|
+
return timeFilteredEmails;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Find emails related by subject patterns
|
|
60
|
+
*/
|
|
61
|
+
findSubjectRelatedEmails(targetEmail, emails, options) {
|
|
62
|
+
const relatedEmails = [];
|
|
63
|
+
const cleanTargetSubject = this.cleanSubject(targetEmail.subject);
|
|
64
|
+
for (const email of emails) {
|
|
65
|
+
const cleanEmailSubject = this.cleanSubject(email.subject);
|
|
66
|
+
// Check for subject similarity
|
|
67
|
+
if (this.subjectsMatch(cleanTargetSubject, cleanEmailSubject)) {
|
|
68
|
+
relatedEmails.push(email);
|
|
69
|
+
}
|
|
70
|
+
// Check for forward/reply patterns
|
|
71
|
+
if (this.hasForwardOrReplyPattern(email.subject, targetEmail.subject)) {
|
|
72
|
+
relatedEmails.push(email);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
return relatedEmails;
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Find emails related by content patterns
|
|
79
|
+
*/
|
|
80
|
+
findContentRelatedEmails(targetEmail, emails, options) {
|
|
81
|
+
const relatedEmails = [];
|
|
82
|
+
const targetContent = this.extractContentSignatures(targetEmail);
|
|
83
|
+
for (const email of emails) {
|
|
84
|
+
const emailContent = this.extractContentSignatures(email);
|
|
85
|
+
// Check for forwarded content patterns
|
|
86
|
+
if (this.hasForwardedContent(email, targetEmail)) {
|
|
87
|
+
relatedEmails.push(email);
|
|
88
|
+
}
|
|
89
|
+
// Check for content similarity
|
|
90
|
+
if (this.calculateContentSimilarity(targetContent, emailContent) > 0.7) {
|
|
91
|
+
relatedEmails.push(email);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
return relatedEmails;
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Build thread tree from related emails
|
|
98
|
+
*/
|
|
99
|
+
buildThreadTree(emails, options) {
|
|
100
|
+
// Sort emails by date
|
|
101
|
+
const sortedEmails = emails.sort((a, b) => new Date(a.receivedDateTime).getTime() - new Date(b.receivedDateTime).getTime());
|
|
102
|
+
// Find root email (likely the original)
|
|
103
|
+
const rootEmail = this.findRootEmail(sortedEmails);
|
|
104
|
+
// Create root node
|
|
105
|
+
const rootNode = {
|
|
106
|
+
email: rootEmail,
|
|
107
|
+
children: [],
|
|
108
|
+
depth: 0,
|
|
109
|
+
threadType: 'original',
|
|
110
|
+
confidence: 1.0,
|
|
111
|
+
metadata: {
|
|
112
|
+
isRoot: true,
|
|
113
|
+
hasChildren: false,
|
|
114
|
+
chainPosition: 0,
|
|
115
|
+
estimatedOriginalDate: new Date(rootEmail.receivedDateTime)
|
|
116
|
+
}
|
|
117
|
+
};
|
|
118
|
+
// Build tree recursively
|
|
119
|
+
this.buildThreadNodes(rootNode, sortedEmails, options);
|
|
120
|
+
return rootNode;
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Build thread nodes recursively
|
|
124
|
+
*/
|
|
125
|
+
buildThreadNodes(parentNode, remainingEmails, options) {
|
|
126
|
+
if (parentNode.depth >= options.maxDepth)
|
|
127
|
+
return;
|
|
128
|
+
const childEmails = remainingEmails.filter(email => email.id !== parentNode.email.id &&
|
|
129
|
+
this.isChildOf(email, parentNode.email));
|
|
130
|
+
for (const childEmail of childEmails) {
|
|
131
|
+
const childNode = {
|
|
132
|
+
email: childEmail,
|
|
133
|
+
children: [],
|
|
134
|
+
parent: parentNode,
|
|
135
|
+
depth: parentNode.depth + 1,
|
|
136
|
+
threadType: this.determineThreadType(childEmail, parentNode.email),
|
|
137
|
+
confidence: this.calculateRelationshipConfidence(childEmail, parentNode.email),
|
|
138
|
+
metadata: {
|
|
139
|
+
isRoot: false,
|
|
140
|
+
hasChildren: false,
|
|
141
|
+
chainPosition: parentNode.depth + 1,
|
|
142
|
+
estimatedOriginalDate: this.estimateOriginalDate(childEmail)
|
|
143
|
+
}
|
|
144
|
+
};
|
|
145
|
+
// Set parent and forwarded references
|
|
146
|
+
if (childNode.threadType === 'forward') {
|
|
147
|
+
childNode.metadata.forwardedFrom = parentNode.email;
|
|
148
|
+
}
|
|
149
|
+
else if (childNode.threadType === 'reply') {
|
|
150
|
+
childNode.metadata.replyTo = parentNode.email;
|
|
151
|
+
}
|
|
152
|
+
parentNode.children.push(childNode);
|
|
153
|
+
parentNode.metadata.hasChildren = true;
|
|
154
|
+
// Recursively build children
|
|
155
|
+
this.buildThreadNodes(childNode, remainingEmails, options);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
/**
|
|
159
|
+
* Find the root email (original email in the thread)
|
|
160
|
+
*/
|
|
161
|
+
findRootEmail(emails) {
|
|
162
|
+
// Look for email without forward/reply patterns
|
|
163
|
+
const originalEmails = emails.filter(email => !this.hasAnyForwardOrReplyPattern(email.subject));
|
|
164
|
+
if (originalEmails.length > 0) {
|
|
165
|
+
// Return the earliest original email
|
|
166
|
+
return originalEmails.reduce((earliest, current) => new Date(current.receivedDateTime) < new Date(earliest.receivedDateTime)
|
|
167
|
+
? current : earliest);
|
|
168
|
+
}
|
|
169
|
+
// If no clear original, return the earliest email
|
|
170
|
+
return emails[0];
|
|
171
|
+
}
|
|
172
|
+
/**
|
|
173
|
+
* Determine if an email is a child of another email
|
|
174
|
+
*/
|
|
175
|
+
isChildOf(childEmail, parentEmail) {
|
|
176
|
+
// Check if child is newer than parent
|
|
177
|
+
if (new Date(childEmail.receivedDateTime) <= new Date(parentEmail.receivedDateTime)) {
|
|
178
|
+
return false;
|
|
179
|
+
}
|
|
180
|
+
// Check subject patterns
|
|
181
|
+
if (this.hasForwardOrReplyPattern(childEmail.subject, parentEmail.subject)) {
|
|
182
|
+
return true;
|
|
183
|
+
}
|
|
184
|
+
// Check for forwarded content
|
|
185
|
+
if (this.hasForwardedContent(childEmail, parentEmail)) {
|
|
186
|
+
return true;
|
|
187
|
+
}
|
|
188
|
+
// Check for reply patterns
|
|
189
|
+
if (this.hasReplyPattern(childEmail.subject) &&
|
|
190
|
+
this.subjectsMatch(this.cleanSubject(childEmail.subject), this.cleanSubject(parentEmail.subject))) {
|
|
191
|
+
return true;
|
|
192
|
+
}
|
|
193
|
+
return false;
|
|
194
|
+
}
|
|
195
|
+
/**
|
|
196
|
+
* Determine thread type for an email
|
|
197
|
+
*/
|
|
198
|
+
determineThreadType(email, parentEmail) {
|
|
199
|
+
const subject = email.subject.toLowerCase();
|
|
200
|
+
// Check for nested forwards
|
|
201
|
+
if (ThreadReconstruction.NESTED_FORWARD_PATTERNS.some(pattern => pattern.test(subject))) {
|
|
202
|
+
return 'nested_forward';
|
|
203
|
+
}
|
|
204
|
+
// Check for forwards
|
|
205
|
+
if (ThreadReconstruction.FORWARD_PATTERNS.some(pattern => pattern.test(subject))) {
|
|
206
|
+
return 'forward';
|
|
207
|
+
}
|
|
208
|
+
// Check for replies
|
|
209
|
+
if (ThreadReconstruction.REPLY_PATTERNS.some(pattern => pattern.test(subject))) {
|
|
210
|
+
return 'reply';
|
|
211
|
+
}
|
|
212
|
+
// Check content for forwarded patterns
|
|
213
|
+
if (this.hasForwardedContent(email, parentEmail)) {
|
|
214
|
+
return 'forward';
|
|
215
|
+
}
|
|
216
|
+
return 'reply'; // Default assumption
|
|
217
|
+
}
|
|
218
|
+
/**
|
|
219
|
+
* Calculate relationship confidence between two emails
|
|
220
|
+
*/
|
|
221
|
+
calculateRelationshipConfidence(email, parentEmail) {
|
|
222
|
+
let confidence = 0;
|
|
223
|
+
// Subject similarity
|
|
224
|
+
const subjectSim = this.calculateSubjectSimilarity(email.subject, parentEmail.subject);
|
|
225
|
+
confidence += subjectSim * 0.3;
|
|
226
|
+
// Time proximity (closer in time = higher confidence)
|
|
227
|
+
const timeDiff = Math.abs(new Date(email.receivedDateTime).getTime() -
|
|
228
|
+
new Date(parentEmail.receivedDateTime).getTime());
|
|
229
|
+
const timeProximity = Math.max(0, 1 - (timeDiff / (30 * 24 * 60 * 60 * 1000))); // 30 days
|
|
230
|
+
confidence += timeProximity * 0.2;
|
|
231
|
+
// Content similarity
|
|
232
|
+
const contentSim = this.calculateContentSimilarity(this.extractContentSignatures(email), this.extractContentSignatures(parentEmail));
|
|
233
|
+
confidence += contentSim * 0.3;
|
|
234
|
+
// Pattern matching
|
|
235
|
+
if (this.hasForwardOrReplyPattern(email.subject, parentEmail.subject)) {
|
|
236
|
+
confidence += 0.2;
|
|
237
|
+
}
|
|
238
|
+
return Math.min(1, confidence);
|
|
239
|
+
}
|
|
240
|
+
/**
|
|
241
|
+
* Clean subject by removing prefixes
|
|
242
|
+
*/
|
|
243
|
+
cleanSubject(subject) {
|
|
244
|
+
let cleaned = subject.toLowerCase().trim();
|
|
245
|
+
// Remove forward prefixes
|
|
246
|
+
for (const pattern of ThreadReconstruction.FORWARD_PATTERNS) {
|
|
247
|
+
cleaned = cleaned.replace(pattern, '');
|
|
248
|
+
}
|
|
249
|
+
// Remove reply prefixes
|
|
250
|
+
for (const pattern of ThreadReconstruction.REPLY_PATTERNS) {
|
|
251
|
+
cleaned = cleaned.replace(pattern, '');
|
|
252
|
+
}
|
|
253
|
+
return cleaned.trim();
|
|
254
|
+
}
|
|
255
|
+
/**
|
|
256
|
+
* Check if subjects match after cleaning
|
|
257
|
+
*/
|
|
258
|
+
subjectsMatch(subject1, subject2) {
|
|
259
|
+
return subject1 === subject2 ||
|
|
260
|
+
this.calculateStringSimilarity(subject1, subject2) > 0.8;
|
|
261
|
+
}
|
|
262
|
+
/**
|
|
263
|
+
* Check if email has forward or reply pattern
|
|
264
|
+
*/
|
|
265
|
+
hasForwardOrReplyPattern(subject, parentSubject) {
|
|
266
|
+
const cleanSubject = this.cleanSubject(subject);
|
|
267
|
+
const cleanParentSubject = this.cleanSubject(parentSubject);
|
|
268
|
+
return this.hasAnyForwardOrReplyPattern(subject) &&
|
|
269
|
+
this.subjectsMatch(cleanSubject, cleanParentSubject);
|
|
270
|
+
}
|
|
271
|
+
/**
|
|
272
|
+
* Check if subject has any forward or reply pattern
|
|
273
|
+
*/
|
|
274
|
+
hasAnyForwardOrReplyPattern(subject) {
|
|
275
|
+
const lowerSubject = subject.toLowerCase();
|
|
276
|
+
return ThreadReconstruction.FORWARD_PATTERNS.some(pattern => pattern.test(lowerSubject)) ||
|
|
277
|
+
ThreadReconstruction.REPLY_PATTERNS.some(pattern => pattern.test(lowerSubject));
|
|
278
|
+
}
|
|
279
|
+
/**
|
|
280
|
+
* Check if subject has reply pattern
|
|
281
|
+
*/
|
|
282
|
+
hasReplyPattern(subject) {
|
|
283
|
+
return ThreadReconstruction.REPLY_PATTERNS.some(pattern => pattern.test(subject.toLowerCase()));
|
|
284
|
+
}
|
|
285
|
+
/**
|
|
286
|
+
* Check if email contains forwarded content
|
|
287
|
+
*/
|
|
288
|
+
hasForwardedContent(email, parentEmail) {
|
|
289
|
+
const emailContent = email.bodyPreview.toLowerCase();
|
|
290
|
+
const parentContent = parentEmail.bodyPreview.toLowerCase();
|
|
291
|
+
// Look for forwarded message patterns
|
|
292
|
+
const forwardedPatterns = [
|
|
293
|
+
/from:.*sent:/i,
|
|
294
|
+
/forwarded message/i,
|
|
295
|
+
/original message/i,
|
|
296
|
+
/---------- forwarded message/i,
|
|
297
|
+
/-----original message-----/i,
|
|
298
|
+
/begin forwarded message/i,
|
|
299
|
+
];
|
|
300
|
+
const hasForwardPattern = forwardedPatterns.some(pattern => pattern.test(emailContent));
|
|
301
|
+
// Check if parent content appears in current email
|
|
302
|
+
const hasParentContent = parentContent.length > 50 &&
|
|
303
|
+
emailContent.includes(parentContent.substring(0, 100));
|
|
304
|
+
return hasForwardPattern || hasParentContent;
|
|
305
|
+
}
|
|
306
|
+
/**
|
|
307
|
+
* Extract content signatures for comparison
|
|
308
|
+
*/
|
|
309
|
+
extractContentSignatures(email) {
|
|
310
|
+
const content = `${email.subject} ${email.bodyPreview}`.toLowerCase();
|
|
311
|
+
return {
|
|
312
|
+
keywords: this.extractKeywords(content),
|
|
313
|
+
phrases: this.extractPhrases(content),
|
|
314
|
+
entities: this.extractEntities(content)
|
|
315
|
+
};
|
|
316
|
+
}
|
|
317
|
+
/**
|
|
318
|
+
* Extract keywords from content
|
|
319
|
+
*/
|
|
320
|
+
extractKeywords(content) {
|
|
321
|
+
const stopWords = new Set(['the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by']);
|
|
322
|
+
const words = content.match(/\b\w{3,}\b/g) || [];
|
|
323
|
+
return words.filter(word => !stopWords.has(word));
|
|
324
|
+
}
|
|
325
|
+
/**
|
|
326
|
+
* Extract phrases from content
|
|
327
|
+
*/
|
|
328
|
+
extractPhrases(content) {
|
|
329
|
+
// Extract phrases of 2-4 words
|
|
330
|
+
const phrases = [];
|
|
331
|
+
const words = content.split(/\s+/);
|
|
332
|
+
for (let i = 0; i < words.length - 1; i++) {
|
|
333
|
+
// 2-word phrases
|
|
334
|
+
phrases.push(`${words[i]} ${words[i + 1]}`);
|
|
335
|
+
// 3-word phrases
|
|
336
|
+
if (i < words.length - 2) {
|
|
337
|
+
phrases.push(`${words[i]} ${words[i + 1]} ${words[i + 2]}`);
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
return phrases;
|
|
341
|
+
}
|
|
342
|
+
/**
|
|
343
|
+
* Extract entities from content
|
|
344
|
+
*/
|
|
345
|
+
extractEntities(content) {
|
|
346
|
+
const entities = [];
|
|
347
|
+
// Email addresses
|
|
348
|
+
const emailMatches = content.match(/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g);
|
|
349
|
+
if (emailMatches)
|
|
350
|
+
entities.push(...emailMatches);
|
|
351
|
+
// Dates
|
|
352
|
+
const dateMatches = content.match(/\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b/g);
|
|
353
|
+
if (dateMatches)
|
|
354
|
+
entities.push(...dateMatches);
|
|
355
|
+
// Numbers
|
|
356
|
+
const numberMatches = content.match(/\b\d{4,}\b/g);
|
|
357
|
+
if (numberMatches)
|
|
358
|
+
entities.push(...numberMatches);
|
|
359
|
+
return entities;
|
|
360
|
+
}
|
|
361
|
+
/**
|
|
362
|
+
* Calculate content similarity
|
|
363
|
+
*/
|
|
364
|
+
calculateContentSimilarity(content1, content2) {
|
|
365
|
+
const keywordSim = this.calculateArraySimilarity(content1.keywords, content2.keywords);
|
|
366
|
+
const phraseSim = this.calculateArraySimilarity(content1.phrases, content2.phrases);
|
|
367
|
+
const entitySim = this.calculateArraySimilarity(content1.entities, content2.entities);
|
|
368
|
+
return (keywordSim * 0.4 + phraseSim * 0.4 + entitySim * 0.2);
|
|
369
|
+
}
|
|
370
|
+
/**
|
|
371
|
+
* Calculate array similarity
|
|
372
|
+
*/
|
|
373
|
+
calculateArraySimilarity(arr1, arr2) {
|
|
374
|
+
if (arr1.length === 0 && arr2.length === 0)
|
|
375
|
+
return 1;
|
|
376
|
+
if (arr1.length === 0 || arr2.length === 0)
|
|
377
|
+
return 0;
|
|
378
|
+
const set1 = new Set(arr1);
|
|
379
|
+
const set2 = new Set(arr2);
|
|
380
|
+
const intersection = new Set([...set1].filter(x => set2.has(x)));
|
|
381
|
+
const union = new Set([...set1, ...set2]);
|
|
382
|
+
return intersection.size / union.size;
|
|
383
|
+
}
|
|
384
|
+
/**
|
|
385
|
+
* Calculate string similarity
|
|
386
|
+
*/
|
|
387
|
+
calculateStringSimilarity(str1, str2) {
|
|
388
|
+
if (str1 === str2)
|
|
389
|
+
return 1;
|
|
390
|
+
const longer = str1.length > str2.length ? str1 : str2;
|
|
391
|
+
const shorter = str1.length > str2.length ? str2 : str1;
|
|
392
|
+
if (longer.length === 0)
|
|
393
|
+
return 1;
|
|
394
|
+
const editDistance = this.levenshteinDistance(longer, shorter);
|
|
395
|
+
return (longer.length - editDistance) / longer.length;
|
|
396
|
+
}
|
|
397
|
+
/**
|
|
398
|
+
* Calculate Levenshtein distance
|
|
399
|
+
*/
|
|
400
|
+
levenshteinDistance(str1, str2) {
|
|
401
|
+
const matrix = Array(str2.length + 1).fill(null).map(() => Array(str1.length + 1).fill(null));
|
|
402
|
+
for (let i = 0; i <= str1.length; i++)
|
|
403
|
+
matrix[0][i] = i;
|
|
404
|
+
for (let j = 0; j <= str2.length; j++)
|
|
405
|
+
matrix[j][0] = j;
|
|
406
|
+
for (let j = 1; j <= str2.length; j++) {
|
|
407
|
+
for (let i = 1; i <= str1.length; i++) {
|
|
408
|
+
const indicator = str1[i - 1] === str2[j - 1] ? 0 : 1;
|
|
409
|
+
matrix[j][i] = Math.min(matrix[j][i - 1] + 1, matrix[j - 1][i] + 1, matrix[j - 1][i - 1] + indicator);
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
return matrix[str2.length][str1.length];
|
|
413
|
+
}
|
|
414
|
+
/**
|
|
415
|
+
* Calculate subject similarity
|
|
416
|
+
*/
|
|
417
|
+
calculateSubjectSimilarity(subject1, subject2) {
|
|
418
|
+
const clean1 = this.cleanSubject(subject1);
|
|
419
|
+
const clean2 = this.cleanSubject(subject2);
|
|
420
|
+
return this.calculateStringSimilarity(clean1, clean2);
|
|
421
|
+
}
|
|
422
|
+
/**
|
|
423
|
+
* Estimate original date for forwarded emails
|
|
424
|
+
*/
|
|
425
|
+
estimateOriginalDate(email) {
|
|
426
|
+
// Look for date patterns in forwarded content
|
|
427
|
+
const datePatterns = [
|
|
428
|
+
/sent:\s*(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})/i,
|
|
429
|
+
/date:\s*(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})/i,
|
|
430
|
+
/on\s*(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})/i,
|
|
431
|
+
];
|
|
432
|
+
for (const pattern of datePatterns) {
|
|
433
|
+
const match = email.bodyPreview.match(pattern);
|
|
434
|
+
if (match) {
|
|
435
|
+
const estimatedDate = new Date(match[1]);
|
|
436
|
+
if (!isNaN(estimatedDate.getTime())) {
|
|
437
|
+
return estimatedDate;
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
return new Date(email.receivedDateTime);
|
|
442
|
+
}
|
|
443
|
+
/**
|
|
444
|
+
* Filter emails by time window
|
|
445
|
+
*/
|
|
446
|
+
filterByTimeWindow(emails, targetEmail, days) {
|
|
447
|
+
const targetDate = new Date(targetEmail.receivedDateTime);
|
|
448
|
+
const windowStart = new Date(targetDate.getTime() - (days * 24 * 60 * 60 * 1000));
|
|
449
|
+
const windowEnd = new Date(targetDate.getTime() + (days * 24 * 60 * 60 * 1000));
|
|
450
|
+
return emails.filter(email => {
|
|
451
|
+
const emailDate = new Date(email.receivedDateTime);
|
|
452
|
+
return emailDate >= windowStart && emailDate <= windowEnd;
|
|
453
|
+
});
|
|
454
|
+
}
|
|
455
|
+
/**
|
|
456
|
+
* Analyze thread structure
|
|
457
|
+
*/
|
|
458
|
+
analyzeThread(threadTree, options) {
|
|
459
|
+
const analysis = {
|
|
460
|
+
rootEmail: threadTree.email,
|
|
461
|
+
maxDepth: 0,
|
|
462
|
+
totalNodes: 0,
|
|
463
|
+
forwardCount: 0,
|
|
464
|
+
replyCount: 0,
|
|
465
|
+
complexity: 'simple'
|
|
466
|
+
};
|
|
467
|
+
this.analyzeNode(threadTree, analysis);
|
|
468
|
+
// Determine complexity
|
|
469
|
+
if (analysis.totalNodes <= 3) {
|
|
470
|
+
analysis.complexity = 'simple';
|
|
471
|
+
}
|
|
472
|
+
else if (analysis.totalNodes <= 10) {
|
|
473
|
+
analysis.complexity = 'moderate';
|
|
474
|
+
}
|
|
475
|
+
else {
|
|
476
|
+
analysis.complexity = 'complex';
|
|
477
|
+
}
|
|
478
|
+
return analysis;
|
|
479
|
+
}
|
|
480
|
+
/**
|
|
481
|
+
* Analyze individual node
|
|
482
|
+
*/
|
|
483
|
+
analyzeNode(node, analysis) {
|
|
484
|
+
analysis.totalNodes++;
|
|
485
|
+
analysis.maxDepth = Math.max(analysis.maxDepth, node.depth);
|
|
486
|
+
if (node.threadType === 'forward' || node.threadType === 'nested_forward') {
|
|
487
|
+
analysis.forwardCount++;
|
|
488
|
+
}
|
|
489
|
+
else if (node.threadType === 'reply') {
|
|
490
|
+
analysis.replyCount++;
|
|
491
|
+
}
|
|
492
|
+
for (const child of node.children) {
|
|
493
|
+
this.analyzeNode(child, analysis);
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
/**
|
|
497
|
+
* Calculate thread span
|
|
498
|
+
*/
|
|
499
|
+
calculateThreadSpan(emails) {
|
|
500
|
+
const dates = emails.map(email => new Date(email.receivedDateTime));
|
|
501
|
+
const startDate = new Date(Math.min(...dates.map(d => d.getTime())));
|
|
502
|
+
const endDate = new Date(Math.max(...dates.map(d => d.getTime())));
|
|
503
|
+
const durationDays = Math.ceil((endDate.getTime() - startDate.getTime()) / (1000 * 60 * 60 * 24));
|
|
504
|
+
return { startDate, endDate, durationDays };
|
|
505
|
+
}
|
|
506
|
+
/**
|
|
507
|
+
* Analyze thread participants
|
|
508
|
+
*/
|
|
509
|
+
analyzeParticipants(emails) {
|
|
510
|
+
const participantMap = new Map();
|
|
511
|
+
for (const email of emails) {
|
|
512
|
+
// Add sender
|
|
513
|
+
const senderKey = email.from.address.toLowerCase();
|
|
514
|
+
if (!participantMap.has(senderKey)) {
|
|
515
|
+
participantMap.set(senderKey, {
|
|
516
|
+
name: email.from.name,
|
|
517
|
+
messageCount: 0,
|
|
518
|
+
roles: new Set()
|
|
519
|
+
});
|
|
520
|
+
}
|
|
521
|
+
participantMap.get(senderKey).messageCount++;
|
|
522
|
+
participantMap.get(senderKey).roles.add('sender');
|
|
523
|
+
// Add recipients
|
|
524
|
+
for (const recipient of email.toRecipients) {
|
|
525
|
+
const recipientKey = recipient.address.toLowerCase();
|
|
526
|
+
if (!participantMap.has(recipientKey)) {
|
|
527
|
+
participantMap.set(recipientKey, {
|
|
528
|
+
name: recipient.name,
|
|
529
|
+
messageCount: 0,
|
|
530
|
+
roles: new Set()
|
|
531
|
+
});
|
|
532
|
+
}
|
|
533
|
+
participantMap.get(recipientKey).roles.add('recipient');
|
|
534
|
+
}
|
|
535
|
+
// Add CC recipients
|
|
536
|
+
for (const ccRecipient of email.ccRecipients) {
|
|
537
|
+
const ccKey = ccRecipient.address.toLowerCase();
|
|
538
|
+
if (!participantMap.has(ccKey)) {
|
|
539
|
+
participantMap.set(ccKey, {
|
|
540
|
+
name: ccRecipient.name,
|
|
541
|
+
messageCount: 0,
|
|
542
|
+
roles: new Set()
|
|
543
|
+
});
|
|
544
|
+
}
|
|
545
|
+
participantMap.get(ccKey).roles.add('cc');
|
|
546
|
+
}
|
|
547
|
+
}
|
|
548
|
+
const participants = Array.from(participantMap.entries()).map(([address, data]) => ({
|
|
549
|
+
name: data.name,
|
|
550
|
+
address,
|
|
551
|
+
messageCount: data.messageCount,
|
|
552
|
+
role: data.roles.size > 1 ? 'mixed' :
|
|
553
|
+
data.roles.has('sender') ? 'sender' :
|
|
554
|
+
data.roles.has('recipient') ? 'recipient' :
|
|
555
|
+
'cc'
|
|
556
|
+
}));
|
|
557
|
+
return participants.sort((a, b) => b.messageCount - a.messageCount);
|
|
558
|
+
}
|
|
559
|
+
/**
|
|
560
|
+
* Generate thread summary
|
|
561
|
+
*/
|
|
562
|
+
generateThreadSummary(threadTree, analysis, options) {
|
|
563
|
+
const cleanSubject = this.cleanSubject(threadTree.email.subject);
|
|
564
|
+
const mainTopic = this.extractMainTopic(threadTree);
|
|
565
|
+
const keyParticipants = this.extractKeyParticipants(threadTree);
|
|
566
|
+
return {
|
|
567
|
+
subject: cleanSubject,
|
|
568
|
+
mainTopic,
|
|
569
|
+
keyParticipants,
|
|
570
|
+
isForwardChain: analysis.forwardCount > 0,
|
|
571
|
+
isReplyChain: analysis.replyCount > 0,
|
|
572
|
+
complexity: analysis.complexity
|
|
573
|
+
};
|
|
574
|
+
}
|
|
575
|
+
/**
|
|
576
|
+
* Extract main topic from thread
|
|
577
|
+
*/
|
|
578
|
+
extractMainTopic(threadTree) {
|
|
579
|
+
const subjects = this.collectAllSubjects(threadTree);
|
|
580
|
+
const keywords = subjects.flatMap(subject => this.extractKeywords(subject));
|
|
581
|
+
const keywordCounts = new Map();
|
|
582
|
+
for (const keyword of keywords) {
|
|
583
|
+
keywordCounts.set(keyword, (keywordCounts.get(keyword) || 0) + 1);
|
|
584
|
+
}
|
|
585
|
+
// Find most common keywords
|
|
586
|
+
const sortedKeywords = Array.from(keywordCounts.entries())
|
|
587
|
+
.sort((a, b) => b[1] - a[1])
|
|
588
|
+
.slice(0, 3)
|
|
589
|
+
.map(([keyword]) => keyword);
|
|
590
|
+
return sortedKeywords.join(', ') || 'General Discussion';
|
|
591
|
+
}
|
|
592
|
+
/**
|
|
593
|
+
* Extract key participants
|
|
594
|
+
*/
|
|
595
|
+
extractKeyParticipants(threadTree) {
|
|
596
|
+
const participants = new Set();
|
|
597
|
+
this.collectAllParticipants(threadTree, participants);
|
|
598
|
+
return Array.from(participants).slice(0, 5);
|
|
599
|
+
}
|
|
600
|
+
/**
|
|
601
|
+
* Collect all subjects from thread tree
|
|
602
|
+
*/
|
|
603
|
+
collectAllSubjects(node) {
|
|
604
|
+
const subjects = [node.email.subject];
|
|
605
|
+
for (const child of node.children) {
|
|
606
|
+
subjects.push(...this.collectAllSubjects(child));
|
|
607
|
+
}
|
|
608
|
+
return subjects;
|
|
609
|
+
}
|
|
610
|
+
/**
|
|
611
|
+
* Collect all participants from thread tree
|
|
612
|
+
*/
|
|
613
|
+
collectAllParticipants(node, participants) {
|
|
614
|
+
participants.add(node.email.from.name);
|
|
615
|
+
for (const recipient of node.email.toRecipients) {
|
|
616
|
+
participants.add(recipient.name);
|
|
617
|
+
}
|
|
618
|
+
for (const child of node.children) {
|
|
619
|
+
this.collectAllParticipants(child, participants);
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
/**
|
|
623
|
+
* Generate thread ID
|
|
624
|
+
*/
|
|
625
|
+
generateThreadId(email) {
|
|
626
|
+
return `thread_${email.conversationId}_${Date.now()}`;
|
|
627
|
+
}
|
|
628
|
+
/**
|
|
629
|
+
* Batch process multiple emails for thread reconstruction
|
|
630
|
+
*/
|
|
631
|
+
async batchReconstructThreads(emails, options = {}) {
|
|
632
|
+
const threads = new Map();
|
|
633
|
+
const processedEmails = new Set();
|
|
634
|
+
logger.log(`๐ Batch processing ${emails.length} emails for thread reconstruction`);
|
|
635
|
+
for (const email of emails) {
|
|
636
|
+
if (processedEmails.has(email.id))
|
|
637
|
+
continue;
|
|
638
|
+
try {
|
|
639
|
+
const thread = await this.reconstructThread(email, emails, options);
|
|
640
|
+
threads.set(thread.id, thread);
|
|
641
|
+
// Mark all emails in this thread as processed
|
|
642
|
+
for (const threadEmail of thread.allEmails) {
|
|
643
|
+
processedEmails.add(threadEmail.id);
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
catch (error) {
|
|
647
|
+
logger.error(`Error reconstructing thread for email ${email.id}:`, error);
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
logger.log(`๐ Batch thread reconstruction completed: ${threads.size} threads`);
|
|
651
|
+
return threads;
|
|
652
|
+
}
|
|
653
|
+
}
|
|
654
|
+
ThreadReconstruction.DEFAULT_OPTIONS = {
|
|
655
|
+
maxDepth: 10,
|
|
656
|
+
timeWindowDays: 365,
|
|
657
|
+
includeForwardChains: true,
|
|
658
|
+
includeReplyChains: true,
|
|
659
|
+
enableAdvancedPatternMatching: true,
|
|
660
|
+
minimumConfidence: 0.6,
|
|
661
|
+
analysisDepth: 'detailed'
|
|
662
|
+
};
|
|
663
|
+
// Common forwarded email patterns
|
|
664
|
+
ThreadReconstruction.FORWARD_PATTERNS = [
|
|
665
|
+
// Standard forward prefixes
|
|
666
|
+
/^(fw|fwd|forward):\s*/i,
|
|
667
|
+
/^re:\s*(fw|fwd|forward):\s*/i,
|
|
668
|
+
// Forwarded message headers
|
|
669
|
+
/---------- forwarded message ----------/i,
|
|
670
|
+
/-----original message-----/i,
|
|
671
|
+
/forwarded message/i,
|
|
672
|
+
/original message/i,
|
|
673
|
+
// Email client specific patterns
|
|
674
|
+
/begin forwarded message/i,
|
|
675
|
+
/forwarded by/i,
|
|
676
|
+
/forwarding.*message/i,
|
|
677
|
+
// International patterns
|
|
678
|
+
/tr:\s*/i, // Turkish (Tr:)
|
|
679
|
+
/fw:\s*/i, // Forward abbreviation
|
|
680
|
+
/weitergeleitet:\s*/i, // German
|
|
681
|
+
/reenvio:\s*/i, // Spanish
|
|
682
|
+
/transfรฉrรฉ:\s*/i, // French
|
|
683
|
+
];
|
|
684
|
+
// Reply patterns
|
|
685
|
+
ThreadReconstruction.REPLY_PATTERNS = [
|
|
686
|
+
/^re:\s*/i,
|
|
687
|
+
/^re\[\d+\]:\s*/i,
|
|
688
|
+
/^reply:\s*/i,
|
|
689
|
+
/^response:\s*/i,
|
|
690
|
+
/^answer:\s*/i,
|
|
691
|
+
/^antw:\s*/i, // German
|
|
692
|
+
/^resp:\s*/i, // Spanish
|
|
693
|
+
/^rรฉp:\s*/i, // French
|
|
694
|
+
];
|
|
695
|
+
// Nested forward patterns (forwards within forwards)
|
|
696
|
+
ThreadReconstruction.NESTED_FORWARD_PATTERNS = [
|
|
697
|
+
/^(fw|fwd):\s*(fw|fwd):\s*/i,
|
|
698
|
+
/^re:\s*(fw|fwd):\s*(fw|fwd):\s*/i,
|
|
699
|
+
/^(fw|fwd):\s*re:\s*(fw|fwd):\s*/i,
|
|
700
|
+
];
|