ms365-mcp-server 1.1.15 → 1.1.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +1123 -10
- package/dist/utils/batch-performance-monitor.js +106 -0
- package/dist/utils/batch-test-scenarios.js +277 -0
- package/dist/utils/context-aware-search.js +499 -0
- package/dist/utils/cross-reference-detector.js +352 -0
- package/dist/utils/document-workflow.js +433 -0
- package/dist/utils/enhanced-fuzzy-search.js +514 -0
- package/dist/utils/error-handler.js +337 -0
- package/dist/utils/intelligence-engine.js +71 -0
- package/dist/utils/intelligent-cache.js +379 -0
- package/dist/utils/large-mailbox-search.js +599 -0
- package/dist/utils/ms365-operations.js +799 -219
- package/dist/utils/performance-monitor.js +395 -0
- package/dist/utils/proactive-intelligence.js +390 -0
- package/dist/utils/rate-limiter.js +284 -0
- package/dist/utils/search-batch-pipeline.js +222 -0
- package/dist/utils/thread-reconstruction.js +700 -0
- package/package.json +1 -1
|
@@ -0,0 +1,433 @@
|
|
|
1
|
+
import { logger } from './api.js';
|
|
2
|
+
import * as fs from 'fs/promises';
|
|
3
|
+
import * as path from 'path';
|
|
4
|
+
import * as crypto from 'crypto';
|
|
5
|
+
export class DocumentWorkflow {
|
|
6
|
+
constructor(options = {}) {
|
|
7
|
+
this.options = { ...DocumentWorkflow.DEFAULT_OPTIONS, ...options };
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* Process all attachments from an email
|
|
11
|
+
*/
|
|
12
|
+
async processEmailAttachments(email, attachments) {
|
|
13
|
+
const results = [];
|
|
14
|
+
logger.log(`📎 Processing ${attachments.length} attachments from email: ${email.subject}`);
|
|
15
|
+
// Ensure temp directory exists
|
|
16
|
+
await this.ensureTempDirectory();
|
|
17
|
+
for (const attachment of attachments) {
|
|
18
|
+
try {
|
|
19
|
+
const result = await this.processAttachment(email, attachment);
|
|
20
|
+
results.push(result);
|
|
21
|
+
}
|
|
22
|
+
catch (error) {
|
|
23
|
+
logger.error(`Error processing attachment ${attachment.name}:`, error);
|
|
24
|
+
results.push({
|
|
25
|
+
attachment,
|
|
26
|
+
status: 'failed',
|
|
27
|
+
metadata: {
|
|
28
|
+
originalName: attachment.name,
|
|
29
|
+
size: attachment.size || 0,
|
|
30
|
+
mimeType: attachment.contentType || 'unknown',
|
|
31
|
+
isPasswordProtected: false,
|
|
32
|
+
requiresPassword: false,
|
|
33
|
+
extractedText: false,
|
|
34
|
+
processingTime: 0
|
|
35
|
+
},
|
|
36
|
+
errors: [error instanceof Error ? error.message : String(error)]
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
logger.log(`📎 Processed ${results.length} attachments, ${results.filter(r => r.status === 'success').length} successful`);
|
|
41
|
+
return results;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Process a single attachment
|
|
45
|
+
*/
|
|
46
|
+
async processAttachment(email, attachment) {
|
|
47
|
+
const startTime = Date.now();
|
|
48
|
+
// Check file size
|
|
49
|
+
if (attachment.size && attachment.size > this.options.maxFileSize) {
|
|
50
|
+
return {
|
|
51
|
+
attachment,
|
|
52
|
+
status: 'too_large',
|
|
53
|
+
metadata: {
|
|
54
|
+
originalName: attachment.name,
|
|
55
|
+
size: attachment.size,
|
|
56
|
+
mimeType: attachment.contentType || 'unknown',
|
|
57
|
+
isPasswordProtected: false,
|
|
58
|
+
requiresPassword: false,
|
|
59
|
+
extractedText: false,
|
|
60
|
+
processingTime: Date.now() - startTime
|
|
61
|
+
},
|
|
62
|
+
errors: [`File size ${attachment.size} exceeds maximum ${this.options.maxFileSize}`]
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
// Check file type
|
|
66
|
+
const fileExtension = path.extname(attachment.name).toLowerCase().substring(1);
|
|
67
|
+
if (!this.options.allowedTypes.includes(fileExtension)) {
|
|
68
|
+
return {
|
|
69
|
+
attachment,
|
|
70
|
+
status: 'unsupported',
|
|
71
|
+
metadata: {
|
|
72
|
+
originalName: attachment.name,
|
|
73
|
+
size: attachment.size || 0,
|
|
74
|
+
mimeType: attachment.contentType || 'unknown',
|
|
75
|
+
isPasswordProtected: false,
|
|
76
|
+
requiresPassword: false,
|
|
77
|
+
extractedText: false,
|
|
78
|
+
processingTime: Date.now() - startTime
|
|
79
|
+
},
|
|
80
|
+
errors: [`File type ${fileExtension} not supported`]
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
// Save attachment to temp file
|
|
84
|
+
const tempFilePath = await this.saveAttachmentToTemp(attachment);
|
|
85
|
+
try {
|
|
86
|
+
// Check if password protected
|
|
87
|
+
const isPasswordProtected = await this.isPasswordProtected(tempFilePath, fileExtension);
|
|
88
|
+
let textContent;
|
|
89
|
+
let requiresPassword = false;
|
|
90
|
+
let extractedText = false;
|
|
91
|
+
if (isPasswordProtected) {
|
|
92
|
+
// Try to decrypt with common passwords
|
|
93
|
+
const decryptResult = await this.attemptDecryption(email, tempFilePath, fileExtension);
|
|
94
|
+
if (decryptResult.success) {
|
|
95
|
+
textContent = decryptResult.textContent;
|
|
96
|
+
extractedText = true;
|
|
97
|
+
}
|
|
98
|
+
else {
|
|
99
|
+
requiresPassword = true;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
else {
|
|
103
|
+
// Extract text content
|
|
104
|
+
if (this.options.extractTextContent) {
|
|
105
|
+
const extracted = await this.extractTextContent(tempFilePath, fileExtension);
|
|
106
|
+
textContent = extracted || undefined;
|
|
107
|
+
extractedText = !!extracted;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
return {
|
|
111
|
+
attachment,
|
|
112
|
+
status: requiresPassword ? 'password_protected' : 'success',
|
|
113
|
+
filePath: tempFilePath,
|
|
114
|
+
textContent,
|
|
115
|
+
metadata: {
|
|
116
|
+
originalName: attachment.name,
|
|
117
|
+
size: attachment.size || 0,
|
|
118
|
+
mimeType: attachment.contentType || 'unknown',
|
|
119
|
+
isPasswordProtected,
|
|
120
|
+
requiresPassword,
|
|
121
|
+
extractedText,
|
|
122
|
+
processingTime: Date.now() - startTime
|
|
123
|
+
}
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
catch (error) {
|
|
127
|
+
// Clean up temp file on error
|
|
128
|
+
await this.cleanupTempFile(tempFilePath);
|
|
129
|
+
throw error;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Save attachment to temporary file
|
|
134
|
+
*/
|
|
135
|
+
async saveAttachmentToTemp(attachment) {
|
|
136
|
+
const tempFileName = `${crypto.randomUUID()}_${attachment.name}`;
|
|
137
|
+
const tempFilePath = path.join(this.options.tempDir, tempFileName);
|
|
138
|
+
const buffer = Buffer.from(attachment.contentBytes, 'base64');
|
|
139
|
+
await fs.writeFile(tempFilePath, buffer);
|
|
140
|
+
return tempFilePath;
|
|
141
|
+
}
|
|
142
|
+
/**
|
|
143
|
+
* Check if file is password protected
|
|
144
|
+
*/
|
|
145
|
+
async isPasswordProtected(filePath, fileExtension) {
|
|
146
|
+
try {
|
|
147
|
+
switch (fileExtension) {
|
|
148
|
+
case 'pdf':
|
|
149
|
+
return await this.isPdfPasswordProtected(filePath);
|
|
150
|
+
case 'docx':
|
|
151
|
+
case 'xlsx':
|
|
152
|
+
case 'pptx':
|
|
153
|
+
return await this.isOfficeDocPasswordProtected(filePath);
|
|
154
|
+
default:
|
|
155
|
+
return false;
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
catch (error) {
|
|
159
|
+
logger.error(`Error checking password protection for ${filePath}:`, error);
|
|
160
|
+
return false;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
/**
|
|
164
|
+
* Check if PDF is password protected
|
|
165
|
+
*/
|
|
166
|
+
async isPdfPasswordProtected(filePath) {
|
|
167
|
+
try {
|
|
168
|
+
const buffer = await fs.readFile(filePath);
|
|
169
|
+
const content = buffer.toString('binary');
|
|
170
|
+
// Look for encryption markers in PDF
|
|
171
|
+
return content.includes('/Encrypt') || content.includes('/P -');
|
|
172
|
+
}
|
|
173
|
+
catch (error) {
|
|
174
|
+
return false;
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
/**
|
|
178
|
+
* Check if Office document is password protected
|
|
179
|
+
*/
|
|
180
|
+
async isOfficeDocPasswordProtected(filePath) {
|
|
181
|
+
try {
|
|
182
|
+
const buffer = await fs.readFile(filePath);
|
|
183
|
+
// Office documents are ZIP files, check for encryption
|
|
184
|
+
const content = buffer.toString('binary');
|
|
185
|
+
return content.includes('EncryptedPackage') || content.includes('EncryptionInfo');
|
|
186
|
+
}
|
|
187
|
+
catch (error) {
|
|
188
|
+
return false;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
/**
|
|
192
|
+
* Attempt to decrypt password-protected file
|
|
193
|
+
*/
|
|
194
|
+
async attemptDecryption(email, filePath, fileExtension) {
|
|
195
|
+
const passwords = this.generatePasswordList(email);
|
|
196
|
+
for (const password of passwords) {
|
|
197
|
+
try {
|
|
198
|
+
const textContent = await this.decryptAndExtractText(filePath, fileExtension, password);
|
|
199
|
+
if (textContent) {
|
|
200
|
+
logger.log(`✅ Successfully decrypted ${path.basename(filePath)} with password strategy`);
|
|
201
|
+
return { success: true, textContent, usedPassword: password };
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
catch (error) {
|
|
205
|
+
// Continue to next password
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
return { success: false };
|
|
209
|
+
}
|
|
210
|
+
/**
|
|
211
|
+
* Generate password list based on email context
|
|
212
|
+
*/
|
|
213
|
+
generatePasswordList(email) {
|
|
214
|
+
const passwords = [];
|
|
215
|
+
for (const strategy of DocumentWorkflow.PASSWORD_STRATEGIES) {
|
|
216
|
+
if (strategy.generator) {
|
|
217
|
+
const generated = strategy.generator(email);
|
|
218
|
+
passwords.push(...generated);
|
|
219
|
+
}
|
|
220
|
+
else {
|
|
221
|
+
passwords.push(...strategy.passwords);
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
// Remove duplicates and limit attempts
|
|
225
|
+
const uniquePasswords = Array.from(new Set(passwords));
|
|
226
|
+
return uniquePasswords.slice(0, this.options.passwordAttempts);
|
|
227
|
+
}
|
|
228
|
+
/**
|
|
229
|
+
* Decrypt and extract text from password-protected file
|
|
230
|
+
*/
|
|
231
|
+
async decryptAndExtractText(filePath, fileExtension, password) {
|
|
232
|
+
// This is a simplified implementation
|
|
233
|
+
// In practice, you'd use libraries like pdf-lib, node-pdftk, or office-specific libraries
|
|
234
|
+
switch (fileExtension) {
|
|
235
|
+
case 'pdf':
|
|
236
|
+
return await this.decryptPdf(filePath, password);
|
|
237
|
+
case 'docx':
|
|
238
|
+
case 'xlsx':
|
|
239
|
+
case 'pptx':
|
|
240
|
+
return await this.decryptOfficeDoc(filePath, password);
|
|
241
|
+
default:
|
|
242
|
+
return null;
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
/**
|
|
246
|
+
* Decrypt PDF (placeholder implementation)
|
|
247
|
+
*/
|
|
248
|
+
async decryptPdf(filePath, password) {
|
|
249
|
+
// This would use a library like pdf-lib or pdf2pic with password support
|
|
250
|
+
// For now, return null indicating we couldn't decrypt
|
|
251
|
+
return null;
|
|
252
|
+
}
|
|
253
|
+
/**
|
|
254
|
+
* Decrypt Office document (placeholder implementation)
|
|
255
|
+
*/
|
|
256
|
+
async decryptOfficeDoc(filePath, password) {
|
|
257
|
+
// This would use a library that supports password-protected Office documents
|
|
258
|
+
// For now, return null indicating we couldn't decrypt
|
|
259
|
+
return null;
|
|
260
|
+
}
|
|
261
|
+
/**
|
|
262
|
+
* Extract text content from unprotected file
|
|
263
|
+
*/
|
|
264
|
+
async extractTextContent(filePath, fileExtension) {
|
|
265
|
+
try {
|
|
266
|
+
switch (fileExtension) {
|
|
267
|
+
case 'txt':
|
|
268
|
+
return await this.extractTextFromTxt(filePath);
|
|
269
|
+
case 'pdf':
|
|
270
|
+
return await this.extractTextFromPdf(filePath);
|
|
271
|
+
case 'docx':
|
|
272
|
+
return await this.extractTextFromDocx(filePath);
|
|
273
|
+
default:
|
|
274
|
+
return null;
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
catch (error) {
|
|
278
|
+
logger.error(`Error extracting text from ${filePath}:`, error);
|
|
279
|
+
return null;
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
/**
|
|
283
|
+
* Extract text from TXT file
|
|
284
|
+
*/
|
|
285
|
+
async extractTextFromTxt(filePath) {
|
|
286
|
+
const buffer = await fs.readFile(filePath);
|
|
287
|
+
return buffer.toString('utf8');
|
|
288
|
+
}
|
|
289
|
+
/**
|
|
290
|
+
* Extract text from PDF (placeholder implementation)
|
|
291
|
+
*/
|
|
292
|
+
async extractTextFromPdf(filePath) {
|
|
293
|
+
// This would use a library like pdf-parse or pdf2pic
|
|
294
|
+
// For now, return placeholder
|
|
295
|
+
return `[PDF text content from ${path.basename(filePath)}]`;
|
|
296
|
+
}
|
|
297
|
+
/**
|
|
298
|
+
* Extract text from DOCX (placeholder implementation)
|
|
299
|
+
*/
|
|
300
|
+
async extractTextFromDocx(filePath) {
|
|
301
|
+
// This would use a library like mammoth or docx-parser
|
|
302
|
+
// For now, return placeholder
|
|
303
|
+
return `[DOCX text content from ${path.basename(filePath)}]`;
|
|
304
|
+
}
|
|
305
|
+
/**
|
|
306
|
+
* Ensure temp directory exists
|
|
307
|
+
*/
|
|
308
|
+
async ensureTempDirectory() {
|
|
309
|
+
try {
|
|
310
|
+
await fs.mkdir(this.options.tempDir, { recursive: true });
|
|
311
|
+
}
|
|
312
|
+
catch (error) {
|
|
313
|
+
logger.error(`Error creating temp directory ${this.options.tempDir}:`, error);
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
/**
|
|
317
|
+
* Clean up temporary file
|
|
318
|
+
*/
|
|
319
|
+
async cleanupTempFile(filePath) {
|
|
320
|
+
try {
|
|
321
|
+
await fs.unlink(filePath);
|
|
322
|
+
}
|
|
323
|
+
catch (error) {
|
|
324
|
+
logger.error(`Error cleaning up temp file ${filePath}:`, error);
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
/**
|
|
328
|
+
* Clean up all temporary files
|
|
329
|
+
*/
|
|
330
|
+
async cleanupAllTempFiles() {
|
|
331
|
+
try {
|
|
332
|
+
const files = await fs.readdir(this.options.tempDir);
|
|
333
|
+
for (const file of files) {
|
|
334
|
+
const filePath = path.join(this.options.tempDir, file);
|
|
335
|
+
await this.cleanupTempFile(filePath);
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
catch (error) {
|
|
339
|
+
logger.error(`Error cleaning up temp directory:`, error);
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
/**
|
|
343
|
+
* Get processing statistics
|
|
344
|
+
*/
|
|
345
|
+
getProcessingStats(results) {
|
|
346
|
+
const stats = {
|
|
347
|
+
total: results.length,
|
|
348
|
+
successful: results.filter(r => r.status === 'success').length,
|
|
349
|
+
passwordProtected: results.filter(r => r.status === 'password_protected').length,
|
|
350
|
+
failed: results.filter(r => r.status === 'failed').length,
|
|
351
|
+
textExtracted: results.filter(r => r.metadata.extractedText).length,
|
|
352
|
+
averageProcessingTime: 0
|
|
353
|
+
};
|
|
354
|
+
if (results.length > 0) {
|
|
355
|
+
const totalTime = results.reduce((sum, r) => sum + r.metadata.processingTime, 0);
|
|
356
|
+
stats.averageProcessingTime = totalTime / results.length;
|
|
357
|
+
}
|
|
358
|
+
return stats;
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
DocumentWorkflow.DEFAULT_OPTIONS = {
|
|
362
|
+
tempDir: '/tmp/ms365-documents',
|
|
363
|
+
maxFileSize: 50 * 1024 * 1024, // 50MB
|
|
364
|
+
allowedTypes: ['pdf', 'docx', 'xlsx', 'pptx', 'txt', 'rtf'],
|
|
365
|
+
autoDecryptPDFs: true,
|
|
366
|
+
extractTextContent: true,
|
|
367
|
+
scanForMalware: false,
|
|
368
|
+
passwordAttempts: 10
|
|
369
|
+
};
|
|
370
|
+
// Common password strategies
|
|
371
|
+
DocumentWorkflow.PASSWORD_STRATEGIES = [
|
|
372
|
+
{
|
|
373
|
+
name: 'common_passwords',
|
|
374
|
+
passwords: ['password', '123456', 'admin', 'user', 'default', 'temp', 'document']
|
|
375
|
+
},
|
|
376
|
+
{
|
|
377
|
+
name: 'email_based',
|
|
378
|
+
passwords: [],
|
|
379
|
+
generator: (email) => {
|
|
380
|
+
const senderName = email.from.name.toLowerCase().replace(/\s+/g, '');
|
|
381
|
+
const senderFirstName = email.from.name.split(' ')[0].toLowerCase();
|
|
382
|
+
const domain = email.from.address.split('@')[1]?.toLowerCase() || '';
|
|
383
|
+
const year = new Date().getFullYear().toString();
|
|
384
|
+
const lastYear = (new Date().getFullYear() - 1).toString();
|
|
385
|
+
return [
|
|
386
|
+
senderName,
|
|
387
|
+
senderFirstName,
|
|
388
|
+
domain,
|
|
389
|
+
`${senderName}${year}`,
|
|
390
|
+
`${senderFirstName}${year}`,
|
|
391
|
+
`${senderName}${lastYear}`,
|
|
392
|
+
`${domain}${year}`,
|
|
393
|
+
email.from.address.toLowerCase(),
|
|
394
|
+
email.from.address.split('@')[0].toLowerCase()
|
|
395
|
+
];
|
|
396
|
+
}
|
|
397
|
+
},
|
|
398
|
+
{
|
|
399
|
+
name: 'date_based',
|
|
400
|
+
passwords: [],
|
|
401
|
+
generator: (email) => {
|
|
402
|
+
const emailDate = new Date(email.receivedDateTime);
|
|
403
|
+
const formats = [
|
|
404
|
+
emailDate.toISOString().split('T')[0], // YYYY-MM-DD
|
|
405
|
+
emailDate.toISOString().split('T')[0].replace(/-/g, ''), // YYYYMMDD
|
|
406
|
+
`${emailDate.getMonth() + 1}/${emailDate.getDate()}/${emailDate.getFullYear()}`,
|
|
407
|
+
`${emailDate.getDate()}/${emailDate.getMonth() + 1}/${emailDate.getFullYear()}`,
|
|
408
|
+
emailDate.getFullYear().toString(),
|
|
409
|
+
(emailDate.getFullYear() - 1).toString()
|
|
410
|
+
];
|
|
411
|
+
return formats;
|
|
412
|
+
}
|
|
413
|
+
},
|
|
414
|
+
{
|
|
415
|
+
name: 'subject_based',
|
|
416
|
+
passwords: [],
|
|
417
|
+
generator: (email) => {
|
|
418
|
+
const subject = email.subject.toLowerCase();
|
|
419
|
+
const words = subject.split(/\s+/).filter(word => word.length > 3);
|
|
420
|
+
const combinations = [];
|
|
421
|
+
// Add individual words
|
|
422
|
+
combinations.push(...words);
|
|
423
|
+
// Add word combinations
|
|
424
|
+
for (let i = 0; i < words.length - 1; i++) {
|
|
425
|
+
combinations.push(words[i] + words[i + 1]);
|
|
426
|
+
}
|
|
427
|
+
// Add numbers found in subject
|
|
428
|
+
const numbers = subject.match(/\d+/g) || [];
|
|
429
|
+
combinations.push(...numbers);
|
|
430
|
+
return combinations;
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
];
|