@realtimex/email-automator 2.6.4 → 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/api/src/middleware/validation.ts +7 -0
- package/api/src/services/intelligence.ts +232 -7
- package/api/src/services/processor.ts +153 -69
- package/api/src/services/supabase.ts +5 -2
- package/api/src/utils/contentCleaner.ts +80 -66
- package/dist/api/src/middleware/validation.js +7 -0
- package/dist/api/src/services/intelligence.js +193 -2
- package/dist/api/src/services/processor.js +85 -24
- package/dist/api/src/utils/contentCleaner.js +74 -58
- package/dist/assets/index-aTk6SbAd.js +97 -0
- package/dist/assets/index-npWWfPF9.css +1 -0
- package/dist/index.html +2 -2
- package/package.json +1 -1
- package/supabase/migrations/20260119000000_context_aware_rules.sql +44 -0
- package/supabase/migrations/20260119000001_compiled_rule_context.sql +128 -0
- package/supabase/migrations/20260119000002_fix_compiled_context_conditions.sql +137 -0
- package/dist/assets/index-BSHZ3lFn.js +0 -97
- package/dist/assets/index-CRQKk5IW.css +0 -1
|
@@ -4,7 +4,7 @@ import { createLogger } from '../utils/logger.js';
|
|
|
4
4
|
import { config } from '../config/index.js';
|
|
5
5
|
import { getGmailService, GmailMessage } from './gmail.js';
|
|
6
6
|
import { getMicrosoftService, OutlookMessage } from './microsoft.js';
|
|
7
|
-
import { getIntelligenceService, EmailAnalysis } from './intelligence.js';
|
|
7
|
+
import { getIntelligenceService, EmailAnalysis, ContextAwareAnalysis, RuleContext } from './intelligence.js';
|
|
8
8
|
import { getStorageService } from './storage.js';
|
|
9
9
|
import { EmailAccount, Email, Rule, ProcessingLog } from './supabase.js';
|
|
10
10
|
import { EventLogger } from './eventLogger.js';
|
|
@@ -55,8 +55,8 @@ export class EmailProcessorService {
|
|
|
55
55
|
throw new Error('Account not found or access denied');
|
|
56
56
|
}
|
|
57
57
|
|
|
58
|
-
logger.info('Retrieved account settings', {
|
|
59
|
-
accountId: account.id,
|
|
58
|
+
logger.info('Retrieved account settings', {
|
|
59
|
+
accountId: account.id,
|
|
60
60
|
sync_start_date: account.sync_start_date,
|
|
61
61
|
last_sync_checkpoint: account.last_sync_checkpoint
|
|
62
62
|
});
|
|
@@ -104,7 +104,7 @@ export class EmailProcessorService {
|
|
|
104
104
|
await this.runRetentionRules(refreshedAccount, rules || [], settings, result, eventLogger);
|
|
105
105
|
|
|
106
106
|
// Trigger background worker (async) to process the queue
|
|
107
|
-
this.processQueue(userId, settings).catch(err =>
|
|
107
|
+
this.processQueue(userId, settings).catch(err =>
|
|
108
108
|
logger.error('Background worker failed', err)
|
|
109
109
|
);
|
|
110
110
|
|
|
@@ -160,7 +160,7 @@ export class EmailProcessorService {
|
|
|
160
160
|
if (errMsg.includes('Account not found') || errMsg.includes('access denied')) {
|
|
161
161
|
throw error;
|
|
162
162
|
}
|
|
163
|
-
|
|
163
|
+
|
|
164
164
|
// Otherwise, increment error count and return partial results
|
|
165
165
|
result.errors++;
|
|
166
166
|
}
|
|
@@ -199,7 +199,7 @@ export class EmailProcessorService {
|
|
|
199
199
|
const windowSizeMs = 7 * 24 * 60 * 60 * 1000;
|
|
200
200
|
const nowMs = Date.now();
|
|
201
201
|
const tomorrowMs = nowMs + (24 * 60 * 60 * 1000);
|
|
202
|
-
|
|
202
|
+
|
|
203
203
|
let currentStartMs = effectiveStartMs;
|
|
204
204
|
let messages: GmailMessage[] = [];
|
|
205
205
|
let hasMore = false;
|
|
@@ -231,7 +231,7 @@ export class EmailProcessorService {
|
|
|
231
231
|
logger.info('No emails in 7-day window, skipping forward', { start: new Date(currentStartMs).toISOString() });
|
|
232
232
|
currentStartMs = effectiveEndMs;
|
|
233
233
|
attempts++;
|
|
234
|
-
|
|
234
|
+
|
|
235
235
|
if (eventLogger && attempts % 3 === 0) {
|
|
236
236
|
await eventLogger.info('Sync', `Scanning history... reached ${new Date(currentStartMs).toLocaleDateString()}`);
|
|
237
237
|
}
|
|
@@ -262,17 +262,17 @@ export class EmailProcessorService {
|
|
|
262
262
|
|
|
263
263
|
// Update checkpoint once at the end of the batch if we made progress
|
|
264
264
|
if (maxInternalDate > effectiveStartMs) {
|
|
265
|
-
logger.info('Updating Gmail checkpoint', {
|
|
266
|
-
accountId: account.id,
|
|
265
|
+
logger.info('Updating Gmail checkpoint', {
|
|
266
|
+
accountId: account.id,
|
|
267
267
|
oldCheckpoint: account.last_sync_checkpoint,
|
|
268
|
-
newCheckpoint: maxInternalDate.toString()
|
|
268
|
+
newCheckpoint: maxInternalDate.toString()
|
|
269
269
|
});
|
|
270
|
-
|
|
270
|
+
|
|
271
271
|
const { error: updateError } = await this.supabase
|
|
272
272
|
.from('email_accounts')
|
|
273
273
|
.update({ last_sync_checkpoint: maxInternalDate.toString() })
|
|
274
274
|
.eq('id', account.id);
|
|
275
|
-
|
|
275
|
+
|
|
276
276
|
if (updateError) {
|
|
277
277
|
logger.error('Failed to update Gmail checkpoint', updateError);
|
|
278
278
|
}
|
|
@@ -346,12 +346,12 @@ export class EmailProcessorService {
|
|
|
346
346
|
|
|
347
347
|
// Update checkpoint once at the end of the batch if we made progress
|
|
348
348
|
if (latestCheckpoint && latestCheckpoint !== effectiveStartIso) {
|
|
349
|
-
logger.info('Updating Outlook checkpoint', {
|
|
350
|
-
accountId: account.id,
|
|
349
|
+
logger.info('Updating Outlook checkpoint', {
|
|
350
|
+
accountId: account.id,
|
|
351
351
|
oldCheckpoint: account.last_sync_checkpoint,
|
|
352
|
-
newCheckpoint: latestCheckpoint
|
|
352
|
+
newCheckpoint: latestCheckpoint
|
|
353
353
|
});
|
|
354
|
-
|
|
354
|
+
|
|
355
355
|
const { error: updateError } = await this.supabase
|
|
356
356
|
.from('email_accounts')
|
|
357
357
|
.update({ last_sync_checkpoint: latestCheckpoint })
|
|
@@ -387,11 +387,11 @@ export class EmailProcessorService {
|
|
|
387
387
|
if (existing) {
|
|
388
388
|
logger.debug('Message already processed', { messageId: message.id });
|
|
389
389
|
if (eventLogger) await eventLogger.info('Skipped', `Already processed ID: ${message.id}`);
|
|
390
|
-
|
|
390
|
+
|
|
391
391
|
// Still need to return the date for checkpointing even if skipped
|
|
392
|
-
const rawMime = 'raw' in message
|
|
393
|
-
? (account.provider === 'gmail'
|
|
394
|
-
? Buffer.from(message.raw, 'base64').toString('utf-8')
|
|
392
|
+
const rawMime = 'raw' in message
|
|
393
|
+
? (account.provider === 'gmail'
|
|
394
|
+
? Buffer.from(message.raw, 'base64').toString('utf-8')
|
|
395
395
|
: message.raw)
|
|
396
396
|
: '';
|
|
397
397
|
if (rawMime) {
|
|
@@ -400,11 +400,11 @@ export class EmailProcessorService {
|
|
|
400
400
|
}
|
|
401
401
|
return;
|
|
402
402
|
}
|
|
403
|
-
|
|
403
|
+
|
|
404
404
|
// Extract raw content string (Gmail is base64url, Outlook is raw text from $value)
|
|
405
|
-
const rawMime = 'raw' in message
|
|
406
|
-
? (account.provider === 'gmail'
|
|
407
|
-
? Buffer.from(message.raw, 'base64').toString('utf-8')
|
|
405
|
+
const rawMime = 'raw' in message
|
|
406
|
+
? (account.provider === 'gmail'
|
|
407
|
+
? Buffer.from(message.raw, 'base64').toString('utf-8')
|
|
408
408
|
: message.raw)
|
|
409
409
|
: '';
|
|
410
410
|
|
|
@@ -462,7 +462,7 @@ export class EmailProcessorService {
|
|
|
462
462
|
if (eventLogger) await eventLogger.info('Ingested', `Successfully ingested email: ${subject}`, { filePath }, savedEmail.id);
|
|
463
463
|
|
|
464
464
|
result.processed++;
|
|
465
|
-
|
|
465
|
+
|
|
466
466
|
return { date };
|
|
467
467
|
}
|
|
468
468
|
|
|
@@ -522,7 +522,7 @@ export class EmailProcessorService {
|
|
|
522
522
|
.select('processing_status')
|
|
523
523
|
.eq('id', email.id)
|
|
524
524
|
.single();
|
|
525
|
-
|
|
525
|
+
|
|
526
526
|
if (current?.processing_status !== 'pending') {
|
|
527
527
|
if (log) await this.supabase.from('processing_logs').delete().eq('id', log.id);
|
|
528
528
|
return;
|
|
@@ -539,7 +539,7 @@ export class EmailProcessorService {
|
|
|
539
539
|
if (!email.file_path) throw new Error('No file path found for email');
|
|
540
540
|
const rawMime = await this.storageService.readEmail(email.file_path);
|
|
541
541
|
const parsed = await simpleParser(rawMime);
|
|
542
|
-
|
|
542
|
+
|
|
543
543
|
// Extract clean content (prioritize text)
|
|
544
544
|
const cleanContent = parsed.text || parsed.textAsHtml || '';
|
|
545
545
|
|
|
@@ -551,7 +551,68 @@ export class EmailProcessorService {
|
|
|
551
551
|
mailer: parsed.headers.get('x-mailer')?.toString()
|
|
552
552
|
};
|
|
553
553
|
|
|
554
|
-
// 3.
|
|
554
|
+
// 3. Fetch account for action execution
|
|
555
|
+
const { data: account } = await this.supabase
|
|
556
|
+
.from('email_accounts')
|
|
557
|
+
.select('*')
|
|
558
|
+
.eq('id', email.account_id)
|
|
559
|
+
.single();
|
|
560
|
+
|
|
561
|
+
// 4. Fetch pre-compiled rule context (fast path - no loop/formatting)
|
|
562
|
+
// Falls back to building context if not cached
|
|
563
|
+
let compiledContext: string | null = settings?.compiled_rule_context || null;
|
|
564
|
+
|
|
565
|
+
// Fetch rules for action execution (need attachments, instructions)
|
|
566
|
+
const { data: rules } = await this.supabase
|
|
567
|
+
.from('rules')
|
|
568
|
+
.select('*')
|
|
569
|
+
.eq('user_id', userId)
|
|
570
|
+
.eq('is_enabled', true)
|
|
571
|
+
.order('priority', { ascending: false });
|
|
572
|
+
|
|
573
|
+
// Fallback: build context if not pre-compiled
|
|
574
|
+
if (!compiledContext && rules && rules.length > 0) {
|
|
575
|
+
compiledContext = rules.map((r, i) => {
|
|
576
|
+
// Build human-readable condition text
|
|
577
|
+
let conditionText = '';
|
|
578
|
+
if (r.condition) {
|
|
579
|
+
const cond = r.condition as any;
|
|
580
|
+
if (cond.field) {
|
|
581
|
+
conditionText = `When ${cond.field}`;
|
|
582
|
+
if (cond.operator === 'equals') {
|
|
583
|
+
conditionText += ` equals "${cond.value}"`;
|
|
584
|
+
} else if (cond.operator === 'contains') {
|
|
585
|
+
conditionText += ` contains "${cond.value}"`;
|
|
586
|
+
} else if (cond.operator === 'domain_equals') {
|
|
587
|
+
conditionText += ` domain equals "${cond.value}"`;
|
|
588
|
+
} else {
|
|
589
|
+
conditionText += ` ${cond.operator} "${cond.value}"`;
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
if (cond.is_useless === true) {
|
|
593
|
+
conditionText += (conditionText ? ' AND ' : 'When ') + 'email is useless/low-value';
|
|
594
|
+
}
|
|
595
|
+
if (cond.ai_priority) {
|
|
596
|
+
conditionText += (conditionText ? ' AND ' : 'When ') + `AI priority is "${cond.ai_priority}"`;
|
|
597
|
+
}
|
|
598
|
+
// Extract older_than_days from condition JSONB
|
|
599
|
+
if (cond.older_than_days) {
|
|
600
|
+
conditionText += (conditionText ? ' AND ' : 'When ') + `email is older than ${cond.older_than_days} days`;
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
return `Rule ${i + 1} [ID: ${r.id}]\n` +
|
|
605
|
+
` Name: ${r.name}\n` +
|
|
606
|
+
(r.description ? ` Description: ${r.description}\n` : '') +
|
|
607
|
+
(r.intent ? ` Intent: ${r.intent}\n` : '') +
|
|
608
|
+
(conditionText ? ` Condition: ${conditionText}\n` : '') +
|
|
609
|
+
` Actions: ${r.actions?.join(', ') || r.action || 'none'}\n` +
|
|
610
|
+
(r.instructions ? ` Draft Instructions: ${r.instructions}\n` : '') +
|
|
611
|
+
'\n';
|
|
612
|
+
}).join('');
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
// 5. Context-Aware Analysis: AI evaluates email against user's rules
|
|
555
616
|
const intelligenceService = getIntelligenceService(
|
|
556
617
|
settings?.llm_model || settings?.llm_base_url || settings?.llm_api_key
|
|
557
618
|
? {
|
|
@@ -562,63 +623,86 @@ export class EmailProcessorService {
|
|
|
562
623
|
: undefined
|
|
563
624
|
);
|
|
564
625
|
|
|
565
|
-
const analysis = await intelligenceService.
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
626
|
+
const analysis = await intelligenceService.analyzeEmailWithRules(
|
|
627
|
+
cleanContent,
|
|
628
|
+
{
|
|
629
|
+
subject: email.subject || '',
|
|
630
|
+
sender: email.sender || '',
|
|
631
|
+
date: email.date || '',
|
|
632
|
+
metadata,
|
|
633
|
+
userPreferences: {
|
|
634
|
+
autoTrashSpam: settings?.auto_trash_spam,
|
|
635
|
+
smartDrafts: settings?.smart_drafts,
|
|
636
|
+
},
|
|
573
637
|
},
|
|
574
|
-
|
|
638
|
+
compiledContext || '', // Pre-compiled context (fast path)
|
|
639
|
+
eventLogger || undefined,
|
|
640
|
+
email.id
|
|
641
|
+
);
|
|
575
642
|
|
|
576
643
|
if (!analysis) {
|
|
577
644
|
throw new Error('AI analysis returned no result');
|
|
578
645
|
}
|
|
579
646
|
|
|
580
|
-
//
|
|
647
|
+
// 6. Update the email record with context-aware results
|
|
581
648
|
await this.supabase
|
|
582
649
|
.from('emails')
|
|
583
650
|
.update({
|
|
584
651
|
category: analysis.category,
|
|
585
|
-
is_useless: analysis.is_useless,
|
|
586
652
|
ai_analysis: analysis as any,
|
|
587
|
-
suggested_actions: analysis.
|
|
588
|
-
suggested_action: analysis.
|
|
653
|
+
suggested_actions: analysis.actions_to_execute || [],
|
|
654
|
+
suggested_action: analysis.actions_to_execute?.[0] || 'none',
|
|
655
|
+
matched_rule_id: analysis.matched_rule.rule_id,
|
|
656
|
+
matched_rule_confidence: analysis.matched_rule.confidence,
|
|
589
657
|
processing_status: 'completed'
|
|
590
658
|
})
|
|
591
659
|
.eq('id', email.id);
|
|
592
660
|
|
|
593
|
-
//
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
.from('email_accounts')
|
|
597
|
-
.select('*')
|
|
598
|
-
.eq('id', email.account_id)
|
|
599
|
-
.single();
|
|
661
|
+
// 7. Execute actions if rule matched with sufficient confidence
|
|
662
|
+
if (account && analysis.matched_rule.rule_id && analysis.matched_rule.confidence >= 0.7) {
|
|
663
|
+
const matchedRule = rules?.find(r => r.id === analysis.matched_rule.rule_id);
|
|
600
664
|
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
665
|
+
if (eventLogger) {
|
|
666
|
+
await eventLogger.info('Rule Matched',
|
|
667
|
+
`"${analysis.matched_rule.rule_name}" matched with ${(analysis.matched_rule.confidence * 100).toFixed(0)}% confidence`,
|
|
668
|
+
{ reasoning: analysis.matched_rule.reasoning },
|
|
669
|
+
email.id
|
|
670
|
+
);
|
|
671
|
+
}
|
|
606
672
|
|
|
607
|
-
|
|
608
|
-
const
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
673
|
+
// Execute each action from the AI's decision
|
|
674
|
+
for (const action of analysis.actions_to_execute) {
|
|
675
|
+
if (action === 'none') continue;
|
|
676
|
+
|
|
677
|
+
// Use AI-generated draft content if available
|
|
678
|
+
const draftContent = action === 'draft' ? analysis.draft_content : undefined;
|
|
679
|
+
|
|
680
|
+
await this.executeAction(
|
|
681
|
+
account,
|
|
682
|
+
email,
|
|
683
|
+
action as any,
|
|
684
|
+
draftContent,
|
|
685
|
+
eventLogger,
|
|
686
|
+
`Rule: ${matchedRule?.name || analysis.matched_rule.rule_name}`,
|
|
687
|
+
matchedRule?.attachments
|
|
688
|
+
);
|
|
689
|
+
}
|
|
690
|
+
} else if (eventLogger && rules && rules.length > 0) {
|
|
691
|
+
await eventLogger.info('No Match',
|
|
692
|
+
analysis.matched_rule.reasoning,
|
|
693
|
+
{ confidence: analysis.matched_rule.confidence },
|
|
694
|
+
email.id
|
|
695
|
+
);
|
|
612
696
|
}
|
|
613
697
|
|
|
614
698
|
// Mark log as success
|
|
615
699
|
if (log) {
|
|
616
700
|
await this.supabase
|
|
617
701
|
.from('processing_logs')
|
|
618
|
-
.update({
|
|
619
|
-
status: 'success',
|
|
702
|
+
.update({
|
|
703
|
+
status: 'success',
|
|
620
704
|
completed_at: new Date().toISOString(),
|
|
621
|
-
emails_processed: 1
|
|
705
|
+
emails_processed: 1
|
|
622
706
|
})
|
|
623
707
|
.eq('id', log.id);
|
|
624
708
|
}
|
|
@@ -626,13 +710,13 @@ export class EmailProcessorService {
|
|
|
626
710
|
} catch (error) {
|
|
627
711
|
logger.error('Failed to process pending email', error, { emailId: email.id });
|
|
628
712
|
if (eventLogger) await eventLogger.error('Processing Failed', error, email.id);
|
|
629
|
-
|
|
713
|
+
|
|
630
714
|
// Mark log as failed
|
|
631
715
|
if (log) {
|
|
632
716
|
await this.supabase
|
|
633
717
|
.from('processing_logs')
|
|
634
|
-
.update({
|
|
635
|
-
status: 'failed',
|
|
718
|
+
.update({
|
|
719
|
+
status: 'failed',
|
|
636
720
|
completed_at: new Date().toISOString(),
|
|
637
721
|
error_message: error instanceof Error ? error.message : String(error)
|
|
638
722
|
})
|
|
@@ -641,7 +725,7 @@ export class EmailProcessorService {
|
|
|
641
725
|
|
|
642
726
|
await this.supabase
|
|
643
727
|
.from('emails')
|
|
644
|
-
.update({
|
|
728
|
+
.update({
|
|
645
729
|
processing_status: 'failed',
|
|
646
730
|
processing_error: error instanceof Error ? error.message : String(error),
|
|
647
731
|
retry_count: (email.retry_count || 0) + 1
|
|
@@ -711,10 +795,10 @@ export class EmailProcessorService {
|
|
|
711
795
|
|
|
712
796
|
private matchesCondition(email: Partial<Email>, analysis: EmailAnalysis, condition: Record<string, unknown>): boolean {
|
|
713
797
|
if (!analysis) return false;
|
|
714
|
-
|
|
798
|
+
|
|
715
799
|
for (const [key, value] of Object.entries(condition)) {
|
|
716
800
|
const val = value as string;
|
|
717
|
-
|
|
801
|
+
|
|
718
802
|
switch (key) {
|
|
719
803
|
case 'sender_email':
|
|
720
804
|
if (email.sender?.toLowerCase() !== val.toLowerCase()) return false;
|
|
@@ -755,7 +839,7 @@ export class EmailProcessorService {
|
|
|
755
839
|
// Handle array membership check (e.g. if condition expects "reply" to be in actions)
|
|
756
840
|
const requiredActions = Array.isArray(value) ? value : [value];
|
|
757
841
|
const actualActions = analysis.suggested_actions || [];
|
|
758
|
-
const hasAllActions = requiredActions.every(req =>
|
|
842
|
+
const hasAllActions = requiredActions.every(req =>
|
|
759
843
|
actualActions.includes(req as any)
|
|
760
844
|
);
|
|
761
845
|
if (!hasAllActions) return false;
|
|
@@ -881,7 +965,7 @@ export class EmailProcessorService {
|
|
|
881
965
|
.eq('id', email.id);
|
|
882
966
|
|
|
883
967
|
logger.debug('Action executed', { emailId: email.id, action });
|
|
884
|
-
|
|
968
|
+
|
|
885
969
|
if (eventLogger) {
|
|
886
970
|
await eventLogger.action('Acted', email.id, action, reason);
|
|
887
971
|
}
|
|
@@ -115,10 +115,13 @@ export interface Rule {
|
|
|
115
115
|
id: string;
|
|
116
116
|
user_id: string;
|
|
117
117
|
name: string;
|
|
118
|
-
|
|
118
|
+
description?: string; // Semantic context for AI matching
|
|
119
|
+
intent?: string; // The intent behind the rule (e.g., "Politely decline sales pitches")
|
|
120
|
+
priority?: number; // Higher = evaluated first by AI
|
|
121
|
+
condition: Record<string, unknown>; // Legacy - kept for backwards compatibility
|
|
119
122
|
action?: 'delete' | 'archive' | 'draft' | 'star' | 'read'; // Legacy single action
|
|
120
123
|
actions?: ('delete' | 'archive' | 'draft' | 'star' | 'read')[]; // New multi-action array
|
|
121
|
-
instructions?: string;
|
|
124
|
+
instructions?: string; // Draft generation instructions
|
|
122
125
|
attachments?: any[];
|
|
123
126
|
is_enabled: boolean;
|
|
124
127
|
created_at: string;
|
|
@@ -1,96 +1,111 @@
|
|
|
1
1
|
export class ContentCleaner {
|
|
2
2
|
/**
|
|
3
3
|
* Cleans email body by removing noise, quoted replies, and footers.
|
|
4
|
-
*
|
|
4
|
+
* optimized for LLM processing.
|
|
5
5
|
*/
|
|
6
6
|
static cleanEmailBody(text: string): string {
|
|
7
7
|
if (!text) return "";
|
|
8
8
|
const originalText = text;
|
|
9
9
|
|
|
10
|
-
//
|
|
11
|
-
|
|
12
|
-
// Structure: <br>, <p> -> Newlines
|
|
13
|
-
text = text.replace(/<br\s*\/?>/gi, '\n');
|
|
14
|
-
text = text.replace(/<\/p>/gi, '\n\n');
|
|
15
|
-
text = text.replace(/<p.*?>/gi, ''); // Open p tags just gone
|
|
16
|
-
|
|
17
|
-
// Structure: Headers <h1>-<h6> -> # Title
|
|
18
|
-
text = text.replace(/<h[1-6].*?>(.*?)<\/h[1-6]>/gsi, (match, p1) => `\n# ${p1}\n`);
|
|
19
|
-
|
|
20
|
-
// Structure: Lists <li> -> - Item
|
|
21
|
-
text = text.replace(/<li.*?>(.*?)<\/li>/gsi, (match, p1) => `\n- ${p1}`);
|
|
22
|
-
text = text.replace(/<ul.*?>/gi, '');
|
|
23
|
-
text = text.replace(/<\/ul>/gi, '\n');
|
|
24
|
-
|
|
25
|
-
// Links: <a href=\"...\">text</a> -> [text](href)
|
|
26
|
-
text = text.replace(/<a\s+(?:[^>]*?\s+)?href=\"([^\"]*)\"[^>]*>(.*?)<\/a>/gsi, (match, href, content) => `[${content}](${href})`);
|
|
27
|
-
|
|
28
|
-
// Images: <img src=\"...\" alt=\"...\"> -> 
|
|
29
|
-
text = text.replace(/<img\s+(?:[^>]*?\s+)?src=\"([^\"]*)\"(?:[^>]*?\s+)?alt=\"([^\"]*)\"[^>]*>/gsi, (match, src, alt) => ``);
|
|
10
|
+
// 1. Detect if content is actually HTML
|
|
11
|
+
const isHtml = /<[a-z][\s\S]*>/i.test(text);
|
|
30
12
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
13
|
+
if (isHtml) {
|
|
14
|
+
// Lightweight HTML -> Markdown Conversion
|
|
15
|
+
// Structure: <br>, <p> -> Newlines
|
|
16
|
+
text = text.replace(/<br\s*\/?>/gi, '\n');
|
|
17
|
+
text = text.replace(/<\/p>/gi, '\n\n');
|
|
18
|
+
text = text.replace(/<p.*?>/gi, '');
|
|
19
|
+
|
|
20
|
+
// Structure: Headers <h1>-<h6> -> # Title
|
|
21
|
+
text = text.replace(/<h[1-6].*?>(.*?)<\/h[1-6]>/gsi, (match, p1) => `\n# ${p1}\n`);
|
|
22
|
+
|
|
23
|
+
// Structure: Lists <li> -> - Item
|
|
24
|
+
text = text.replace(/<li.*?>(.*?)<\/li>/gsi, (match, p1) => `\n- ${p1}`);
|
|
25
|
+
text = text.replace(/<ul.*?>/gi, '');
|
|
26
|
+
text = text.replace(/<\/ul>/gi, '\n');
|
|
27
|
+
|
|
28
|
+
// Links: <a href=\"...\">text</a> -> [text](href)
|
|
29
|
+
text = text.replace(/<a\s+(?:[^>]*?\s+)?href=\"([^\"]*)\"[^>]*>(.*?)<\/a>/gsi, (match, href, content) => `[${content}](${href})`);
|
|
30
|
+
|
|
31
|
+
// Images: <img src=\"...\" alt=\"...\"> -> 
|
|
32
|
+
text = text.replace(/<img\s+(?:[^>]*?\s+)?src=\"([^\"]*)\"(?:[^>]*?\s+)?alt=\"([^\"]*)\"[^>]*>/gsi, (match, src, alt) => ``);
|
|
33
|
+
|
|
34
|
+
// Style/Script removal (strictly remove content)
|
|
35
|
+
text = text.replace(/<script.*?>.*?<\/script>/gsi, '');
|
|
36
|
+
text = text.replace(/<style.*?>.*?<\/style>/gsi, '');
|
|
37
|
+
|
|
38
|
+
// Final Strip of remaining tags
|
|
39
|
+
text = text.replace(/<[^>]+>/g, ' ');
|
|
40
|
+
|
|
41
|
+
// Entity decoding (Basic)
|
|
42
|
+
text = text.replace(/ /gi, ' ');
|
|
43
|
+
text = text.replace(/&/gi, '&');
|
|
44
|
+
text = text.replace(/</gi, '<');
|
|
45
|
+
text = text.replace(/>/gi, '>');
|
|
46
|
+
text = text.replace(/"/gi, '"');
|
|
47
|
+
text = text.replace(/'/gi, "'");
|
|
48
|
+
}
|
|
45
49
|
|
|
46
50
|
const lines = text.split('\n');
|
|
47
51
|
const cleanedLines: string[] = [];
|
|
48
52
|
|
|
49
|
-
//
|
|
50
|
-
const
|
|
53
|
+
// Patterns that usually mark the START of a reply chain or a generic footer
|
|
54
|
+
const truncationPatterns = [
|
|
51
55
|
/^On .* wrote:$/i,
|
|
52
|
-
/^From:
|
|
53
|
-
|
|
54
|
-
/^
|
|
55
|
-
/^
|
|
56
|
+
/^From: .* <.*>$/i,
|
|
57
|
+
/^-----Original Message-----$/i,
|
|
58
|
+
/^________________________________$/i,
|
|
59
|
+
/^Sent from my iPhone$/i,
|
|
60
|
+
/^Sent from my Android$/i,
|
|
61
|
+
/^Get Outlook for/i,
|
|
62
|
+
/^--$/ // Standard signature separator
|
|
56
63
|
];
|
|
57
64
|
|
|
58
|
-
//
|
|
59
|
-
const
|
|
60
|
-
/
|
|
65
|
+
// Patterns for lines that should be stripped but NOT truncate the whole email
|
|
66
|
+
const noisePatterns = [
|
|
67
|
+
/view in browser/i,
|
|
68
|
+
/click here to view/i,
|
|
69
|
+
/legal notice/i,
|
|
70
|
+
/all rights reserved/i,
|
|
61
71
|
/privacy policy/i,
|
|
62
72
|
/terms of service/i,
|
|
63
|
-
/
|
|
64
|
-
/copyright \d{4}/i
|
|
73
|
+
/unsubscribe/i
|
|
65
74
|
];
|
|
66
75
|
|
|
67
76
|
for (let line of lines) {
|
|
68
77
|
let lineStripped = line.trim();
|
|
78
|
+
if (!lineStripped) {
|
|
79
|
+
cleanedLines.push("");
|
|
80
|
+
continue;
|
|
81
|
+
}
|
|
69
82
|
|
|
70
83
|
// 2. Quoted text removal (lines starting with >)
|
|
71
84
|
if (lineStripped.startsWith('>')) {
|
|
72
85
|
continue;
|
|
73
86
|
}
|
|
74
87
|
|
|
75
|
-
// 3.
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
88
|
+
// 3. Truncation check: If we hit a reply header, we stop entirely
|
|
89
|
+
let shouldTruncate = false;
|
|
90
|
+
for (const pattern of truncationPatterns) {
|
|
91
|
+
if (pattern.test(lineStripped)) {
|
|
92
|
+
shouldTruncate = true;
|
|
93
|
+
break;
|
|
94
|
+
}
|
|
79
95
|
}
|
|
96
|
+
if (shouldTruncate) break;
|
|
80
97
|
|
|
81
|
-
// 4.
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
for (const pattern of
|
|
98
|
+
// 4. Noise check: Strip boilerplate lines
|
|
99
|
+
let isNoise = false;
|
|
100
|
+
if (lineStripped.length < 100) {
|
|
101
|
+
for (const pattern of noisePatterns) {
|
|
85
102
|
if (pattern.test(lineStripped)) {
|
|
86
|
-
|
|
103
|
+
isNoise = true;
|
|
87
104
|
break;
|
|
88
105
|
}
|
|
89
106
|
}
|
|
90
|
-
if (isFooter) {
|
|
91
|
-
continue;
|
|
92
|
-
}
|
|
93
107
|
}
|
|
108
|
+
if (isNoise) continue;
|
|
94
109
|
|
|
95
110
|
cleanedLines.push(line);
|
|
96
111
|
}
|
|
@@ -98,21 +113,20 @@ export class ContentCleaner {
|
|
|
98
113
|
// Reassemble
|
|
99
114
|
text = cleanedLines.join('\n');
|
|
100
115
|
|
|
101
|
-
//
|
|
102
|
-
if (!text.trim() || text.length < 10) {
|
|
103
|
-
text = originalText.substring(0, 3000);
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
// Collapse multiple newlines
|
|
116
|
+
// Collapse whitespace
|
|
107
117
|
text = text.replace(/\n{3,}/g, '\n\n');
|
|
118
|
+
text = text.replace(/[ \t]{2,}/g, ' ');
|
|
108
119
|
|
|
120
|
+
// Safety Fallback: If cleaning stripped too much, return original text truncated
|
|
121
|
+
if (text.trim().length < 20 && originalText.trim().length > 20) {
|
|
122
|
+
return originalText.substring(0, 3000).trim();
|
|
123
|
+
}
|
|
124
|
+
|
|
109
125
|
// Sanitize LLM Special Tokens
|
|
110
126
|
text = text.replace(/<\|/g, '< |');
|
|
111
127
|
text = text.replace(/\|>/g, '| >');
|
|
112
128
|
text = text.replace(/\[INST\]/gi, '[ INST ]');
|
|
113
129
|
text = text.replace(/\[\/INST\]/gi, '[ /INST ]');
|
|
114
|
-
text = text.replace(/<s>/gi, '<s>');
|
|
115
|
-
text = text.replace(/<\/s>/gi, '</s>');
|
|
116
130
|
|
|
117
131
|
return text.trim();
|
|
118
132
|
}
|
|
@@ -82,8 +82,12 @@ export const schemas = {
|
|
|
82
82
|
accessToken: z.string().optional(),
|
|
83
83
|
}),
|
|
84
84
|
// Rule schemas - supports both single action (legacy) and actions array
|
|
85
|
+
// Now includes description and intent for context-aware AI matching
|
|
85
86
|
createRule: z.object({
|
|
86
87
|
name: z.string().min(1).max(100),
|
|
88
|
+
description: z.string().max(500).optional(),
|
|
89
|
+
intent: z.string().max(200).optional(),
|
|
90
|
+
priority: z.number().int().min(0).max(100).optional(),
|
|
87
91
|
condition: z.record(z.unknown()),
|
|
88
92
|
action: z.enum(['delete', 'archive', 'draft', 'star', 'read']).optional(),
|
|
89
93
|
actions: z.array(z.enum(['delete', 'archive', 'draft', 'star', 'read'])).optional(),
|
|
@@ -94,6 +98,9 @@ export const schemas = {
|
|
|
94
98
|
}),
|
|
95
99
|
updateRule: z.object({
|
|
96
100
|
name: z.string().min(1).max(100).optional(),
|
|
101
|
+
description: z.string().max(500).optional(),
|
|
102
|
+
intent: z.string().max(200).optional(),
|
|
103
|
+
priority: z.number().int().min(0).max(100).optional(),
|
|
97
104
|
condition: z.record(z.unknown()).optional(),
|
|
98
105
|
action: z.enum(['delete', 'archive', 'draft', 'star', 'read']).optional(),
|
|
99
106
|
actions: z.array(z.enum(['delete', 'archive', 'draft', 'star', 'read'])).optional(),
|