ms365-mcp-server 1.1.16 → 1.1.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,514 @@
1
+ import { logger } from './api.js';
2
+ export class EnhancedFuzzySearch {
3
+ constructor(ms365Operations) {
4
+ this.ms365Operations = ms365Operations;
5
+ }
6
+ /**
7
+ * Enhanced fuzzy search with multiple matching strategies
8
+ */
9
+ async search(query, emails, options = {}) {
10
+ const opts = { ...EnhancedFuzzySearch.DEFAULT_OPTIONS, ...options };
11
+ const results = [];
12
+ logger.log(`🔍 Enhanced fuzzy search for query: "${query}"`);
13
+ // Normalize and preprocess query
14
+ const normalizedQuery = this.normalizeQuery(query);
15
+ const queryTerms = this.extractQueryTerms(normalizedQuery);
16
+ const expandedTerms = this.expandQueryWithSynonyms(queryTerms, opts);
17
+ for (const email of emails) {
18
+ const matchResults = this.matchEmail(email, query, queryTerms, expandedTerms, opts);
19
+ if (matchResults.length > 0) {
20
+ // Combine multiple matches into single result
21
+ const bestMatch = matchResults.reduce((best, current) => current.score > best.score ? current : best);
22
+ results.push(bestMatch);
23
+ }
24
+ }
25
+ // Sort by score and apply limits
26
+ const sortedResults = this.sortAndFilterResults(results, opts);
27
+ logger.log(`🔍 Enhanced fuzzy search found ${sortedResults.length} results`);
28
+ return sortedResults;
29
+ }
30
+ /**
31
+ * Match an email against search criteria using multiple strategies
32
+ */
33
+ matchEmail(email, originalQuery, queryTerms, expandedTerms, options) {
34
+ const results = [];
35
+ const emailText = this.extractEmailText(email);
36
+ // 1. Exact matching
37
+ if (this.hasExactMatch(emailText, originalQuery)) {
38
+ results.push({
39
+ email,
40
+ score: 1.0,
41
+ matchedFields: this.getMatchedFields(email, originalQuery),
42
+ matchType: 'exact',
43
+ explanation: `Exact match found for "${originalQuery}"`
44
+ });
45
+ }
46
+ // 2. Fuzzy matching with typo tolerance
47
+ if (options.enableTypoTolerance) {
48
+ const fuzzyScore = this.calculateFuzzyScore(emailText, queryTerms, options.typoTolerance);
49
+ if (fuzzyScore > 0.5) {
50
+ results.push({
51
+ email,
52
+ score: fuzzyScore * 0.9, // Slightly lower than exact
53
+ matchedFields: this.getMatchedFields(email, queryTerms.join(' ')),
54
+ matchType: 'fuzzy',
55
+ explanation: `Fuzzy match with ${Math.round(fuzzyScore * 100)}% similarity`
56
+ });
57
+ }
58
+ }
59
+ // 3. Semantic matching
60
+ if (options.enableSemanticMatching) {
61
+ const semanticScore = this.calculateSemanticScore(emailText, queryTerms, options.semanticThreshold);
62
+ if (semanticScore > options.semanticThreshold) {
63
+ results.push({
64
+ email,
65
+ score: semanticScore * 0.8,
66
+ matchedFields: this.getMatchedFields(email, queryTerms.join(' ')),
67
+ matchType: 'semantic',
68
+ explanation: `Semantic match with ${Math.round(semanticScore * 100)}% relevance`
69
+ });
70
+ }
71
+ }
72
+ // 4. Synonym matching
73
+ if (options.enableSynonyms && expandedTerms.length > queryTerms.length) {
74
+ const synonymScore = this.calculateSynonymScore(emailText, expandedTerms);
75
+ if (synonymScore > 0.6) {
76
+ results.push({
77
+ email,
78
+ score: synonymScore * 0.7,
79
+ matchedFields: this.getMatchedFields(email, expandedTerms.join(' ')),
80
+ matchType: 'synonym',
81
+ explanation: `Synonym match found`
82
+ });
83
+ }
84
+ }
85
+ // 5. Phonetic matching
86
+ if (options.enablePhoneticMatching) {
87
+ const phoneticScore = this.calculatePhoneticScore(emailText, queryTerms);
88
+ if (phoneticScore > 0.7) {
89
+ results.push({
90
+ email,
91
+ score: phoneticScore * 0.6,
92
+ matchedFields: this.getMatchedFields(email, queryTerms.join(' ')),
93
+ matchType: 'phonetic',
94
+ explanation: `Phonetic match found`
95
+ });
96
+ }
97
+ }
98
+ return results;
99
+ }
100
+ /**
101
+ * Normalize query for better matching
102
+ */
103
+ normalizeQuery(query) {
104
+ return query
105
+ .toLowerCase()
106
+ .trim()
107
+ .replace(/[^\w\s-]/g, ' ')
108
+ .replace(/\s+/g, ' ');
109
+ }
110
+ /**
111
+ * Extract meaningful terms from query
112
+ */
113
+ extractQueryTerms(query) {
114
+ const stopWords = new Set(['the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'can', 'this', 'that', 'these', 'those']);
115
+ const terms = query.split(/\s+/)
116
+ .filter(term => term.length > 2 && !stopWords.has(term));
117
+ return terms;
118
+ }
119
+ /**
120
+ * Expand query terms with synonyms
121
+ */
122
+ expandQueryWithSynonyms(terms, options) {
123
+ if (!options.enableSynonyms)
124
+ return terms;
125
+ const expanded = [...terms];
126
+ for (const term of terms) {
127
+ const synonyms = EnhancedFuzzySearch.SYNONYMS[term] || [];
128
+ expanded.push(...synonyms);
129
+ }
130
+ return expanded;
131
+ }
132
+ /**
133
+ * Extract searchable text from email
134
+ */
135
+ extractEmailText(email) {
136
+ return [
137
+ email.subject,
138
+ email.bodyPreview,
139
+ email.from.name,
140
+ email.from.address,
141
+ ...email.toRecipients.map(r => `${r.name} ${r.address}`),
142
+ ...email.ccRecipients.map(r => `${r.name} ${r.address}`)
143
+ ].join(' ').toLowerCase();
144
+ }
145
+ /**
146
+ * Check for exact matches
147
+ */
148
+ hasExactMatch(text, query) {
149
+ return text.includes(query.toLowerCase());
150
+ }
151
+ /**
152
+ * Calculate fuzzy score with typo tolerance
153
+ */
154
+ calculateFuzzyScore(text, queryTerms, tolerance) {
155
+ let totalScore = 0;
156
+ let matchedTerms = 0;
157
+ for (const term of queryTerms) {
158
+ const termScore = this.findBestTermMatch(text, term, tolerance);
159
+ if (termScore > 0.5) {
160
+ totalScore += termScore;
161
+ matchedTerms++;
162
+ }
163
+ }
164
+ return matchedTerms > 0 ? totalScore / queryTerms.length : 0;
165
+ }
166
+ /**
167
+ * Find best match for a term in text
168
+ */
169
+ findBestTermMatch(text, term, tolerance) {
170
+ const words = text.split(/\s+/);
171
+ let bestScore = 0;
172
+ for (const word of words) {
173
+ const similarity = this.calculateStringSimilarity(word, term);
174
+ if (similarity > bestScore && similarity >= tolerance) {
175
+ bestScore = similarity;
176
+ }
177
+ }
178
+ return bestScore;
179
+ }
180
+ /**
181
+ * Calculate semantic relevance score
182
+ */
183
+ calculateSemanticScore(text, queryTerms, threshold) {
184
+ // Simple semantic scoring based on term co-occurrence and context
185
+ let score = 0;
186
+ const words = text.split(/\s+/);
187
+ const windowSize = 10; // Look at words within 10 positions
188
+ for (const term of queryTerms) {
189
+ const termPositions = this.findTermPositions(words, term);
190
+ for (const pos of termPositions) {
191
+ // Score based on term frequency and context
192
+ score += 0.1;
193
+ // Bonus for terms appearing close to each other
194
+ const windowStart = Math.max(0, pos - windowSize);
195
+ const windowEnd = Math.min(words.length, pos + windowSize);
196
+ const windowWords = words.slice(windowStart, windowEnd);
197
+ for (const otherTerm of queryTerms) {
198
+ if (otherTerm !== term && windowWords.includes(otherTerm)) {
199
+ score += 0.2;
200
+ }
201
+ }
202
+ }
203
+ }
204
+ return Math.min(1, score);
205
+ }
206
+ /**
207
+ * Find positions of a term in word array
208
+ */
209
+ findTermPositions(words, term) {
210
+ const positions = [];
211
+ for (let i = 0; i < words.length; i++) {
212
+ if (words[i].includes(term) || this.calculateStringSimilarity(words[i], term) > 0.8) {
213
+ positions.push(i);
214
+ }
215
+ }
216
+ return positions;
217
+ }
218
+ /**
219
+ * Calculate synonym matching score
220
+ */
221
+ calculateSynonymScore(text, expandedTerms) {
222
+ let matchCount = 0;
223
+ for (const term of expandedTerms) {
224
+ if (text.includes(term)) {
225
+ matchCount++;
226
+ }
227
+ }
228
+ return expandedTerms.length > 0 ? matchCount / expandedTerms.length : 0;
229
+ }
230
+ /**
231
+ * Calculate phonetic matching score
232
+ */
233
+ calculatePhoneticScore(text, queryTerms) {
234
+ let score = 0;
235
+ const words = text.split(/\s+/);
236
+ for (const term of queryTerms) {
237
+ const termSoundex = this.soundex(term);
238
+ for (const word of words) {
239
+ const wordSoundex = this.soundex(word);
240
+ if (termSoundex === wordSoundex) {
241
+ score += 1;
242
+ break;
243
+ }
244
+ }
245
+ }
246
+ return queryTerms.length > 0 ? score / queryTerms.length : 0;
247
+ }
248
+ /**
249
+ * Soundex phonetic algorithm implementation
250
+ */
251
+ soundex(word) {
252
+ if (!word || word.length === 0)
253
+ return '';
254
+ const soundexMap = {
255
+ 'B': '1', 'F': '1', 'P': '1', 'V': '1',
256
+ 'C': '2', 'G': '2', 'J': '2', 'K': '2', 'Q': '2', 'S': '2', 'X': '2', 'Z': '2',
257
+ 'D': '3', 'T': '3',
258
+ 'L': '4',
259
+ 'M': '5', 'N': '5',
260
+ 'R': '6'
261
+ };
262
+ let soundexCode = word.charAt(0).toUpperCase();
263
+ let prevCode = '';
264
+ for (let i = 1; i < word.length && soundexCode.length < 4; i++) {
265
+ const char = word.charAt(i).toUpperCase();
266
+ const code = soundexMap[char] || '';
267
+ if (code && code !== prevCode) {
268
+ soundexCode += code;
269
+ prevCode = code;
270
+ }
271
+ }
272
+ return soundexCode.padEnd(4, '0');
273
+ }
274
+ /**
275
+ * Get fields that matched the query
276
+ */
277
+ getMatchedFields(email, query) {
278
+ const fields = [];
279
+ const lowerQuery = query.toLowerCase();
280
+ if (email.subject.toLowerCase().includes(lowerQuery)) {
281
+ fields.push('subject');
282
+ }
283
+ if (email.bodyPreview.toLowerCase().includes(lowerQuery)) {
284
+ fields.push('body');
285
+ }
286
+ if (email.from.name.toLowerCase().includes(lowerQuery) ||
287
+ email.from.address.toLowerCase().includes(lowerQuery)) {
288
+ fields.push('from');
289
+ }
290
+ if (email.toRecipients.some(r => r.name.toLowerCase().includes(lowerQuery) ||
291
+ r.address.toLowerCase().includes(lowerQuery))) {
292
+ fields.push('to');
293
+ }
294
+ return fields;
295
+ }
296
+ /**
297
+ * Sort and filter results
298
+ */
299
+ sortAndFilterResults(results, options) {
300
+ // Remove duplicates
301
+ const uniqueResults = this.removeDuplicateResults(results);
302
+ // Apply recency boost if enabled
303
+ if (options.boostRecentEmails) {
304
+ uniqueResults.forEach(result => {
305
+ const daysOld = this.getDaysOld(result.email.receivedDateTime);
306
+ if (daysOld < 7) {
307
+ result.score *= 1.2; // 20% boost for recent emails
308
+ }
309
+ else if (daysOld < 30) {
310
+ result.score *= 1.1; // 10% boost for emails within a month
311
+ }
312
+ });
313
+ }
314
+ // Sort by score (descending)
315
+ uniqueResults.sort((a, b) => b.score - a.score);
316
+ // Apply result limit
317
+ return uniqueResults.slice(0, options.maxResults);
318
+ }
319
+ /**
320
+ * Remove duplicate results
321
+ */
322
+ removeDuplicateResults(results) {
323
+ const seen = new Set();
324
+ const unique = [];
325
+ for (const result of results) {
326
+ if (!seen.has(result.email.id)) {
327
+ seen.add(result.email.id);
328
+ unique.push(result);
329
+ }
330
+ }
331
+ return unique;
332
+ }
333
+ /**
334
+ * Get days old for an email
335
+ */
336
+ getDaysOld(dateTime) {
337
+ const emailDate = new Date(dateTime);
338
+ const now = new Date();
339
+ const diffTime = Math.abs(now.getTime() - emailDate.getTime());
340
+ return Math.ceil(diffTime / (1000 * 60 * 60 * 24));
341
+ }
342
+ /**
343
+ * Calculate string similarity (Levenshtein distance)
344
+ */
345
+ calculateStringSimilarity(str1, str2) {
346
+ if (str1 === str2)
347
+ return 1;
348
+ const longer = str1.length > str2.length ? str1 : str2;
349
+ const shorter = str1.length > str2.length ? str2 : str1;
350
+ if (longer.length === 0)
351
+ return 1;
352
+ const editDistance = this.levenshteinDistance(longer, shorter);
353
+ return (longer.length - editDistance) / longer.length;
354
+ }
355
+ /**
356
+ * Calculate Levenshtein distance
357
+ */
358
+ levenshteinDistance(str1, str2) {
359
+ const matrix = Array(str2.length + 1).fill(null).map(() => Array(str1.length + 1).fill(null));
360
+ for (let i = 0; i <= str1.length; i++)
361
+ matrix[0][i] = i;
362
+ for (let j = 0; j <= str2.length; j++)
363
+ matrix[j][0] = j;
364
+ for (let j = 1; j <= str2.length; j++) {
365
+ for (let i = 1; i <= str1.length; i++) {
366
+ const indicator = str1[i - 1] === str2[j - 1] ? 0 : 1;
367
+ matrix[j][i] = Math.min(matrix[j][i - 1] + 1, matrix[j - 1][i] + 1, matrix[j - 1][i - 1] + indicator);
368
+ }
369
+ }
370
+ return matrix[str2.length][str1.length];
371
+ }
372
+ /**
373
+ * Apply stemming to a word
374
+ */
375
+ stemWord(word) {
376
+ for (const rule of EnhancedFuzzySearch.STEMMING_RULES) {
377
+ if (word.endsWith(rule.suffix)) {
378
+ return word.slice(0, -rule.suffix.length) + rule.replacement;
379
+ }
380
+ }
381
+ return word;
382
+ }
383
+ /**
384
+ * Search with natural language understanding
385
+ */
386
+ async naturalLanguageSearch(query, emails, options = {}) {
387
+ // Parse natural language patterns
388
+ const parsedQuery = this.parseNaturalLanguageQuery(query);
389
+ // Apply time-based filters if detected
390
+ let filteredEmails = emails;
391
+ if (parsedQuery.timeFilter) {
392
+ filteredEmails = this.applyTimeFilter(emails, parsedQuery.timeFilter);
393
+ }
394
+ // Apply sender/recipient filters
395
+ if (parsedQuery.senderFilter) {
396
+ filteredEmails = filteredEmails.filter(email => email.from.name.toLowerCase().includes(parsedQuery.senderFilter) ||
397
+ email.from.address.toLowerCase().includes(parsedQuery.senderFilter));
398
+ }
399
+ // Run enhanced search on filtered emails
400
+ return await this.search(parsedQuery.cleanQuery, filteredEmails, options);
401
+ }
402
+ /**
403
+ * Parse natural language query
404
+ */
405
+ parseNaturalLanguageQuery(query) {
406
+ let cleanQuery = query.toLowerCase();
407
+ let timeFilter;
408
+ let senderFilter;
409
+ let importanceFilter;
410
+ // Time patterns
411
+ const timePatterns = [
412
+ { pattern: /\b(recent|recently|latest|new)\b/g, filter: 'recent' },
413
+ { pattern: /\b(last week|past week|this week)\b/g, filter: 'last_week' },
414
+ { pattern: /\b(last month|past month|this month)\b/g, filter: 'last_month' },
415
+ { pattern: /\b(few weeks|several weeks|couple weeks)\b/g, filter: 'few_weeks' }
416
+ ];
417
+ for (const { pattern, filter } of timePatterns) {
418
+ if (pattern.test(cleanQuery)) {
419
+ timeFilter = filter;
420
+ cleanQuery = cleanQuery.replace(pattern, '').trim();
421
+ break;
422
+ }
423
+ }
424
+ // Sender patterns
425
+ const senderMatch = cleanQuery.match(/\b(from|by)\s+(\w+)/);
426
+ if (senderMatch) {
427
+ senderFilter = senderMatch[2];
428
+ cleanQuery = cleanQuery.replace(senderMatch[0], '').trim();
429
+ }
430
+ // Importance patterns
431
+ if (/\b(important|urgent|priority|critical)\b/.test(cleanQuery)) {
432
+ importanceFilter = 'high';
433
+ cleanQuery = cleanQuery.replace(/\b(important|urgent|priority|critical)\b/g, '').trim();
434
+ }
435
+ return {
436
+ cleanQuery: cleanQuery.trim(),
437
+ timeFilter: timeFilter,
438
+ senderFilter,
439
+ importanceFilter: importanceFilter
440
+ };
441
+ }
442
+ /**
443
+ * Apply time-based filters
444
+ */
445
+ applyTimeFilter(emails, timeFilter) {
446
+ const now = new Date();
447
+ let filterDate;
448
+ switch (timeFilter) {
449
+ case 'recent':
450
+ filterDate = new Date(now.getTime() - 7 * 24 * 60 * 60 * 1000); // 7 days
451
+ break;
452
+ case 'last_week':
453
+ filterDate = new Date(now.getTime() - 7 * 24 * 60 * 60 * 1000);
454
+ break;
455
+ case 'last_month':
456
+ filterDate = new Date(now.getTime() - 30 * 24 * 60 * 60 * 1000);
457
+ break;
458
+ case 'few_weeks':
459
+ filterDate = new Date(now.getTime() - 21 * 24 * 60 * 60 * 1000); // 3 weeks
460
+ break;
461
+ default:
462
+ return emails;
463
+ }
464
+ return emails.filter(email => new Date(email.receivedDateTime) >= filterDate);
465
+ }
466
+ }
467
+ EnhancedFuzzySearch.DEFAULT_OPTIONS = {
468
+ enableSemanticMatching: true,
469
+ enableTypoTolerance: true,
470
+ enableSynonyms: true,
471
+ enableStemming: true,
472
+ enablePhoneticMatching: true,
473
+ typoTolerance: 0.8,
474
+ semanticThreshold: 0.6,
475
+ maxResults: 100,
476
+ boostRecentEmails: true
477
+ };
478
+ // Common synonyms for email search
479
+ EnhancedFuzzySearch.SYNONYMS = {
480
+ 'important': ['urgent', 'critical', 'priority', 'asap', 'high priority'],
481
+ 'meeting': ['call', 'conference', 'discussion', 'session', 'appointment'],
482
+ 'document': ['file', 'paper', 'report', 'attachment', 'doc'],
483
+ 'invoice': ['bill', 'receipt', 'payment', 'charge', 'statement'],
484
+ 'order': ['purchase', 'request', 'requisition', 'procurement'],
485
+ 'contract': ['agreement', 'deal', 'arrangement', 'pact'],
486
+ 'issue': ['problem', 'bug', 'error', 'trouble', 'concern'],
487
+ 'update': ['news', 'information', 'status', 'progress', 'report'],
488
+ 'deadline': ['due date', 'expiry', 'cutoff', 'timeline'],
489
+ 'review': ['evaluation', 'assessment', 'analysis', 'feedback'],
490
+ 'approval': ['authorization', 'permission', 'consent', 'clearance'],
491
+ 'budget': ['cost', 'expense', 'funding', 'financial'],
492
+ 'project': ['initiative', 'task', 'assignment', 'work'],
493
+ 'client': ['customer', 'consumer', 'user', 'patron'],
494
+ 'vendor': ['supplier', 'provider', 'contractor', 'partner'],
495
+ 'government': ['federal', 'state', 'local', 'municipal', 'public', 'official'],
496
+ 'tax': ['irs', 'revenue', 'taxation', 'levy', 'duty'],
497
+ 'notice': ['notification', 'alert', 'warning', 'announcement'],
498
+ 'response': ['reply', 'answer', 'feedback', 'acknowledgment'],
499
+ 'request': ['ask', 'inquiry', 'question', 'petition']
500
+ };
501
+ // Common stemming rules
502
+ EnhancedFuzzySearch.STEMMING_RULES = [
503
+ { suffix: 'ies', replacement: 'y' },
504
+ { suffix: 'ied', replacement: 'y' },
505
+ { suffix: 'ying', replacement: 'y' },
506
+ { suffix: 'ing', replacement: '' },
507
+ { suffix: 'ly', replacement: '' },
508
+ { suffix: 'ed', replacement: '' },
509
+ { suffix: 'ies', replacement: 'y' },
510
+ { suffix: 'ied', replacement: 'y' },
511
+ { suffix: 'ies', replacement: 'y' },
512
+ { suffix: 'es', replacement: '' },
513
+ { suffix: 's', replacement: '' }
514
+ ];