@realtimex/email-automator 2.6.4 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@ import { createLogger } from '../utils/logger.js';
4
4
  import { config } from '../config/index.js';
5
5
  import { getGmailService, GmailMessage } from './gmail.js';
6
6
  import { getMicrosoftService, OutlookMessage } from './microsoft.js';
7
- import { getIntelligenceService, EmailAnalysis } from './intelligence.js';
7
+ import { getIntelligenceService, EmailAnalysis, ContextAwareAnalysis, RuleContext } from './intelligence.js';
8
8
  import { getStorageService } from './storage.js';
9
9
  import { EmailAccount, Email, Rule, ProcessingLog } from './supabase.js';
10
10
  import { EventLogger } from './eventLogger.js';
@@ -55,8 +55,8 @@ export class EmailProcessorService {
55
55
  throw new Error('Account not found or access denied');
56
56
  }
57
57
 
58
- logger.info('Retrieved account settings', {
59
- accountId: account.id,
58
+ logger.info('Retrieved account settings', {
59
+ accountId: account.id,
60
60
  sync_start_date: account.sync_start_date,
61
61
  last_sync_checkpoint: account.last_sync_checkpoint
62
62
  });
@@ -104,7 +104,7 @@ export class EmailProcessorService {
104
104
  await this.runRetentionRules(refreshedAccount, rules || [], settings, result, eventLogger);
105
105
 
106
106
  // Trigger background worker (async) to process the queue
107
- this.processQueue(userId, settings).catch(err =>
107
+ this.processQueue(userId, settings).catch(err =>
108
108
  logger.error('Background worker failed', err)
109
109
  );
110
110
 
@@ -160,7 +160,7 @@ export class EmailProcessorService {
160
160
  if (errMsg.includes('Account not found') || errMsg.includes('access denied')) {
161
161
  throw error;
162
162
  }
163
-
163
+
164
164
  // Otherwise, increment error count and return partial results
165
165
  result.errors++;
166
166
  }
@@ -199,7 +199,7 @@ export class EmailProcessorService {
199
199
  const windowSizeMs = 7 * 24 * 60 * 60 * 1000;
200
200
  const nowMs = Date.now();
201
201
  const tomorrowMs = nowMs + (24 * 60 * 60 * 1000);
202
-
202
+
203
203
  let currentStartMs = effectiveStartMs;
204
204
  let messages: GmailMessage[] = [];
205
205
  let hasMore = false;
@@ -231,7 +231,7 @@ export class EmailProcessorService {
231
231
  logger.info('No emails in 7-day window, skipping forward', { start: new Date(currentStartMs).toISOString() });
232
232
  currentStartMs = effectiveEndMs;
233
233
  attempts++;
234
-
234
+
235
235
  if (eventLogger && attempts % 3 === 0) {
236
236
  await eventLogger.info('Sync', `Scanning history... reached ${new Date(currentStartMs).toLocaleDateString()}`);
237
237
  }
@@ -262,17 +262,17 @@ export class EmailProcessorService {
262
262
 
263
263
  // Update checkpoint once at the end of the batch if we made progress
264
264
  if (maxInternalDate > effectiveStartMs) {
265
- logger.info('Updating Gmail checkpoint', {
266
- accountId: account.id,
265
+ logger.info('Updating Gmail checkpoint', {
266
+ accountId: account.id,
267
267
  oldCheckpoint: account.last_sync_checkpoint,
268
- newCheckpoint: maxInternalDate.toString()
268
+ newCheckpoint: maxInternalDate.toString()
269
269
  });
270
-
270
+
271
271
  const { error: updateError } = await this.supabase
272
272
  .from('email_accounts')
273
273
  .update({ last_sync_checkpoint: maxInternalDate.toString() })
274
274
  .eq('id', account.id);
275
-
275
+
276
276
  if (updateError) {
277
277
  logger.error('Failed to update Gmail checkpoint', updateError);
278
278
  }
@@ -346,12 +346,12 @@ export class EmailProcessorService {
346
346
 
347
347
  // Update checkpoint once at the end of the batch if we made progress
348
348
  if (latestCheckpoint && latestCheckpoint !== effectiveStartIso) {
349
- logger.info('Updating Outlook checkpoint', {
350
- accountId: account.id,
349
+ logger.info('Updating Outlook checkpoint', {
350
+ accountId: account.id,
351
351
  oldCheckpoint: account.last_sync_checkpoint,
352
- newCheckpoint: latestCheckpoint
352
+ newCheckpoint: latestCheckpoint
353
353
  });
354
-
354
+
355
355
  const { error: updateError } = await this.supabase
356
356
  .from('email_accounts')
357
357
  .update({ last_sync_checkpoint: latestCheckpoint })
@@ -387,11 +387,11 @@ export class EmailProcessorService {
387
387
  if (existing) {
388
388
  logger.debug('Message already processed', { messageId: message.id });
389
389
  if (eventLogger) await eventLogger.info('Skipped', `Already processed ID: ${message.id}`);
390
-
390
+
391
391
  // Still need to return the date for checkpointing even if skipped
392
- const rawMime = 'raw' in message
393
- ? (account.provider === 'gmail'
394
- ? Buffer.from(message.raw, 'base64').toString('utf-8')
392
+ const rawMime = 'raw' in message
393
+ ? (account.provider === 'gmail'
394
+ ? Buffer.from(message.raw, 'base64').toString('utf-8')
395
395
  : message.raw)
396
396
  : '';
397
397
  if (rawMime) {
@@ -400,11 +400,11 @@ export class EmailProcessorService {
400
400
  }
401
401
  return;
402
402
  }
403
-
403
+
404
404
  // Extract raw content string (Gmail is base64url, Outlook is raw text from $value)
405
- const rawMime = 'raw' in message
406
- ? (account.provider === 'gmail'
407
- ? Buffer.from(message.raw, 'base64').toString('utf-8')
405
+ const rawMime = 'raw' in message
406
+ ? (account.provider === 'gmail'
407
+ ? Buffer.from(message.raw, 'base64').toString('utf-8')
408
408
  : message.raw)
409
409
  : '';
410
410
 
@@ -462,7 +462,7 @@ export class EmailProcessorService {
462
462
  if (eventLogger) await eventLogger.info('Ingested', `Successfully ingested email: ${subject}`, { filePath }, savedEmail.id);
463
463
 
464
464
  result.processed++;
465
-
465
+
466
466
  return { date };
467
467
  }
468
468
 
@@ -522,7 +522,7 @@ export class EmailProcessorService {
522
522
  .select('processing_status')
523
523
  .eq('id', email.id)
524
524
  .single();
525
-
525
+
526
526
  if (current?.processing_status !== 'pending') {
527
527
  if (log) await this.supabase.from('processing_logs').delete().eq('id', log.id);
528
528
  return;
@@ -539,7 +539,7 @@ export class EmailProcessorService {
539
539
  if (!email.file_path) throw new Error('No file path found for email');
540
540
  const rawMime = await this.storageService.readEmail(email.file_path);
541
541
  const parsed = await simpleParser(rawMime);
542
-
542
+
543
543
  // Extract clean content (prioritize text)
544
544
  const cleanContent = parsed.text || parsed.textAsHtml || '';
545
545
 
@@ -551,7 +551,68 @@ export class EmailProcessorService {
551
551
  mailer: parsed.headers.get('x-mailer')?.toString()
552
552
  };
553
553
 
554
- // 3. Analyze with AI
554
+ // 3. Fetch account for action execution
555
+ const { data: account } = await this.supabase
556
+ .from('email_accounts')
557
+ .select('*')
558
+ .eq('id', email.account_id)
559
+ .single();
560
+
561
+ // 4. Fetch pre-compiled rule context (fast path - no loop/formatting)
562
+ // Falls back to building context if not cached
563
+ let compiledContext: string | null = settings?.compiled_rule_context || null;
564
+
565
+ // Fetch rules for action execution (need attachments, instructions)
566
+ const { data: rules } = await this.supabase
567
+ .from('rules')
568
+ .select('*')
569
+ .eq('user_id', userId)
570
+ .eq('is_enabled', true)
571
+ .order('priority', { ascending: false });
572
+
573
+ // Fallback: build context if not pre-compiled
574
+ if (!compiledContext && rules && rules.length > 0) {
575
+ compiledContext = rules.map((r, i) => {
576
+ // Build human-readable condition text
577
+ let conditionText = '';
578
+ if (r.condition) {
579
+ const cond = r.condition as any;
580
+ if (cond.field) {
581
+ conditionText = `When ${cond.field}`;
582
+ if (cond.operator === 'equals') {
583
+ conditionText += ` equals "${cond.value}"`;
584
+ } else if (cond.operator === 'contains') {
585
+ conditionText += ` contains "${cond.value}"`;
586
+ } else if (cond.operator === 'domain_equals') {
587
+ conditionText += ` domain equals "${cond.value}"`;
588
+ } else {
589
+ conditionText += ` ${cond.operator} "${cond.value}"`;
590
+ }
591
+ }
592
+ if (cond.is_useless === true) {
593
+ conditionText += (conditionText ? ' AND ' : 'When ') + 'email is useless/low-value';
594
+ }
595
+ if (cond.ai_priority) {
596
+ conditionText += (conditionText ? ' AND ' : 'When ') + `AI priority is "${cond.ai_priority}"`;
597
+ }
598
+ // Extract older_than_days from condition JSONB
599
+ if (cond.older_than_days) {
600
+ conditionText += (conditionText ? ' AND ' : 'When ') + `email is older than ${cond.older_than_days} days`;
601
+ }
602
+ }
603
+
604
+ return `Rule ${i + 1} [ID: ${r.id}]\n` +
605
+ ` Name: ${r.name}\n` +
606
+ (r.description ? ` Description: ${r.description}\n` : '') +
607
+ (r.intent ? ` Intent: ${r.intent}\n` : '') +
608
+ (conditionText ? ` Condition: ${conditionText}\n` : '') +
609
+ ` Actions: ${r.actions?.join(', ') || r.action || 'none'}\n` +
610
+ (r.instructions ? ` Draft Instructions: ${r.instructions}\n` : '') +
611
+ '\n';
612
+ }).join('');
613
+ }
614
+
615
+ // 5. Context-Aware Analysis: AI evaluates email against user's rules
555
616
  const intelligenceService = getIntelligenceService(
556
617
  settings?.llm_model || settings?.llm_base_url || settings?.llm_api_key
557
618
  ? {
@@ -562,63 +623,86 @@ export class EmailProcessorService {
562
623
  : undefined
563
624
  );
564
625
 
565
- const analysis = await intelligenceService.analyzeEmail(cleanContent, {
566
- subject: email.subject || '',
567
- sender: email.sender || '',
568
- date: email.date || '',
569
- metadata,
570
- userPreferences: {
571
- autoTrashSpam: settings?.auto_trash_spam,
572
- smartDrafts: settings?.smart_drafts,
626
+ const analysis = await intelligenceService.analyzeEmailWithRules(
627
+ cleanContent,
628
+ {
629
+ subject: email.subject || '',
630
+ sender: email.sender || '',
631
+ date: email.date || '',
632
+ metadata,
633
+ userPreferences: {
634
+ autoTrashSpam: settings?.auto_trash_spam,
635
+ smartDrafts: settings?.smart_drafts,
636
+ },
573
637
  },
574
- }, eventLogger || undefined, email.id);
638
+ compiledContext || '', // Pre-compiled context (fast path)
639
+ eventLogger || undefined,
640
+ email.id
641
+ );
575
642
 
576
643
  if (!analysis) {
577
644
  throw new Error('AI analysis returned no result');
578
645
  }
579
646
 
580
- // 4. Update the email record with results
647
+ // 6. Update the email record with context-aware results
581
648
  await this.supabase
582
649
  .from('emails')
583
650
  .update({
584
651
  category: analysis.category,
585
- is_useless: analysis.is_useless,
586
652
  ai_analysis: analysis as any,
587
- suggested_actions: analysis.suggested_actions || [],
588
- suggested_action: analysis.suggested_actions?.[0] || 'none',
653
+ suggested_actions: analysis.actions_to_execute || [],
654
+ suggested_action: analysis.actions_to_execute?.[0] || 'none',
655
+ matched_rule_id: analysis.matched_rule.rule_id,
656
+ matched_rule_confidence: analysis.matched_rule.confidence,
589
657
  processing_status: 'completed'
590
658
  })
591
659
  .eq('id', email.id);
592
660
 
593
- // 5. Execute automation rules
594
- // Fetch account and rules needed for execution
595
- const { data: account } = await this.supabase
596
- .from('email_accounts')
597
- .select('*')
598
- .eq('id', email.account_id)
599
- .single();
661
+ // 7. Execute actions if rule matched with sufficient confidence
662
+ if (account && analysis.matched_rule.rule_id && analysis.matched_rule.confidence >= 0.7) {
663
+ const matchedRule = rules?.find(r => r.id === analysis.matched_rule.rule_id);
600
664
 
601
- const { data: rules } = await this.supabase
602
- .from('rules')
603
- .select('*')
604
- .eq('user_id', userId)
605
- .eq('is_enabled', true);
665
+ if (eventLogger) {
666
+ await eventLogger.info('Rule Matched',
667
+ `"${analysis.matched_rule.rule_name}" matched with ${(analysis.matched_rule.confidence * 100).toFixed(0)}% confidence`,
668
+ { reasoning: analysis.matched_rule.reasoning },
669
+ email.id
670
+ );
671
+ }
606
672
 
607
- if (account && rules) {
608
- const tempResult = { processed: 0, deleted: 0, drafted: 0, errors: 0 };
609
- // Ensure email object for rules has the analysis fields merged in
610
- const emailForRules = { ...email, ...analysis };
611
- await this.executeRules(account, emailForRules as any, analysis, rules, settings, tempResult, eventLogger);
673
+ // Execute each action from the AI's decision
674
+ for (const action of analysis.actions_to_execute) {
675
+ if (action === 'none') continue;
676
+
677
+ // Use AI-generated draft content if available
678
+ const draftContent = action === 'draft' ? analysis.draft_content : undefined;
679
+
680
+ await this.executeAction(
681
+ account,
682
+ email,
683
+ action as any,
684
+ draftContent,
685
+ eventLogger,
686
+ `Rule: ${matchedRule?.name || analysis.matched_rule.rule_name}`,
687
+ matchedRule?.attachments
688
+ );
689
+ }
690
+ } else if (eventLogger && rules && rules.length > 0) {
691
+ await eventLogger.info('No Match',
692
+ analysis.matched_rule.reasoning,
693
+ { confidence: analysis.matched_rule.confidence },
694
+ email.id
695
+ );
612
696
  }
613
697
 
614
698
  // Mark log as success
615
699
  if (log) {
616
700
  await this.supabase
617
701
  .from('processing_logs')
618
- .update({
619
- status: 'success',
702
+ .update({
703
+ status: 'success',
620
704
  completed_at: new Date().toISOString(),
621
- emails_processed: 1
705
+ emails_processed: 1
622
706
  })
623
707
  .eq('id', log.id);
624
708
  }
@@ -626,13 +710,13 @@ export class EmailProcessorService {
626
710
  } catch (error) {
627
711
  logger.error('Failed to process pending email', error, { emailId: email.id });
628
712
  if (eventLogger) await eventLogger.error('Processing Failed', error, email.id);
629
-
713
+
630
714
  // Mark log as failed
631
715
  if (log) {
632
716
  await this.supabase
633
717
  .from('processing_logs')
634
- .update({
635
- status: 'failed',
718
+ .update({
719
+ status: 'failed',
636
720
  completed_at: new Date().toISOString(),
637
721
  error_message: error instanceof Error ? error.message : String(error)
638
722
  })
@@ -641,7 +725,7 @@ export class EmailProcessorService {
641
725
 
642
726
  await this.supabase
643
727
  .from('emails')
644
- .update({
728
+ .update({
645
729
  processing_status: 'failed',
646
730
  processing_error: error instanceof Error ? error.message : String(error),
647
731
  retry_count: (email.retry_count || 0) + 1
@@ -711,10 +795,10 @@ export class EmailProcessorService {
711
795
 
712
796
  private matchesCondition(email: Partial<Email>, analysis: EmailAnalysis, condition: Record<string, unknown>): boolean {
713
797
  if (!analysis) return false;
714
-
798
+
715
799
  for (const [key, value] of Object.entries(condition)) {
716
800
  const val = value as string;
717
-
801
+
718
802
  switch (key) {
719
803
  case 'sender_email':
720
804
  if (email.sender?.toLowerCase() !== val.toLowerCase()) return false;
@@ -755,7 +839,7 @@ export class EmailProcessorService {
755
839
  // Handle array membership check (e.g. if condition expects "reply" to be in actions)
756
840
  const requiredActions = Array.isArray(value) ? value : [value];
757
841
  const actualActions = analysis.suggested_actions || [];
758
- const hasAllActions = requiredActions.every(req =>
842
+ const hasAllActions = requiredActions.every(req =>
759
843
  actualActions.includes(req as any)
760
844
  );
761
845
  if (!hasAllActions) return false;
@@ -881,7 +965,7 @@ export class EmailProcessorService {
881
965
  .eq('id', email.id);
882
966
 
883
967
  logger.debug('Action executed', { emailId: email.id, action });
884
-
968
+
885
969
  if (eventLogger) {
886
970
  await eventLogger.action('Acted', email.id, action, reason);
887
971
  }
@@ -115,10 +115,13 @@ export interface Rule {
115
115
  id: string;
116
116
  user_id: string;
117
117
  name: string;
118
- condition: Record<string, unknown>;
118
+ description?: string; // Semantic context for AI matching
119
+ intent?: string; // The intent behind the rule (e.g., "Politely decline sales pitches")
120
+ priority?: number; // Higher = evaluated first by AI
121
+ condition: Record<string, unknown>; // Legacy - kept for backwards compatibility
119
122
  action?: 'delete' | 'archive' | 'draft' | 'star' | 'read'; // Legacy single action
120
123
  actions?: ('delete' | 'archive' | 'draft' | 'star' | 'read')[]; // New multi-action array
121
- instructions?: string;
124
+ instructions?: string; // Draft generation instructions
122
125
  attachments?: any[];
123
126
  is_enabled: boolean;
124
127
  created_at: string;
@@ -1,96 +1,111 @@
1
1
  export class ContentCleaner {
2
2
  /**
3
3
  * Cleans email body by removing noise, quoted replies, and footers.
4
- * Ported from Python ContentCleaner.
4
+ * optimized for LLM processing.
5
5
  */
6
6
  static cleanEmailBody(text: string): string {
7
7
  if (!text) return "";
8
8
  const originalText = text;
9
9
 
10
- // 0. Lightweight HTML -> Markdown Conversion
11
-
12
- // Structure: <br>, <p> -> Newlines
13
- text = text.replace(/<br\s*\/?>/gi, '\n');
14
- text = text.replace(/<\/p>/gi, '\n\n');
15
- text = text.replace(/<p.*?>/gi, ''); // Open p tags just gone
16
-
17
- // Structure: Headers <h1>-<h6> -> # Title
18
- text = text.replace(/<h[1-6].*?>(.*?)<\/h[1-6]>/gsi, (match, p1) => `\n# ${p1}\n`);
19
-
20
- // Structure: Lists <li> -> - Item
21
- text = text.replace(/<li.*?>(.*?)<\/li>/gsi, (match, p1) => `\n- ${p1}`);
22
- text = text.replace(/<ul.*?>/gi, '');
23
- text = text.replace(/<\/ul>/gi, '\n');
24
-
25
- // Links: <a href=\"...\">text</a> -> [text](href)
26
- text = text.replace(/<a\s+(?:[^>]*?\s+)?href=\"([^\"]*)\"[^>]*>(.*?)<\/a>/gsi, (match, href, content) => `[${content}](${href})`);
27
-
28
- // Images: <img src=\"...\" alt=\"...\"> -> ![alt](src)
29
- text = text.replace(/<img\s+(?:[^>]*?\s+)?src=\"([^\"]*)\"(?:[^>]*?\s+)?alt=\"([^\"]*)\"[^>]*>/gsi, (match, src, alt) => `![${alt}](${src})`);
10
+ // 1. Detect if content is actually HTML
11
+ const isHtml = /<[a-z][\s\S]*>/i.test(text);
30
12
 
31
- // Style/Script removal (strictly remove content)
32
- text = text.replace(/<script.*?>.*?<\/script>/gsi, '');
33
- text = text.replace(/<style.*?>.*?<\/style>/gsi, '');
34
-
35
- // Final Strip of remaining tags
36
- text = text.replace(/<[^>]+>/g, ' ');
37
-
38
- // Entity decoding (Basic)
39
- text = text.replace(/&nbsp;/gi, ' ');
40
- text = text.replace(/&amp;/gi, '&');
41
- text = text.replace(/&lt;/gi, '<');
42
- text = text.replace(/&gt;/gi, '>');
43
- text = text.replace(/&quot;/gi, '"');
44
- text = text.replace(/&#39;/gi, "'");
13
+ if (isHtml) {
14
+ // Lightweight HTML -> Markdown Conversion
15
+ // Structure: <br>, <p> -> Newlines
16
+ text = text.replace(/<br\s*\/?>/gi, '\n');
17
+ text = text.replace(/<\/p>/gi, '\n\n');
18
+ text = text.replace(/<p.*?>/gi, '');
19
+
20
+ // Structure: Headers <h1>-<h6> -> # Title
21
+ text = text.replace(/<h[1-6].*?>(.*?)<\/h[1-6]>/gsi, (match, p1) => `\n# ${p1}\n`);
22
+
23
+ // Structure: Lists <li> -> - Item
24
+ text = text.replace(/<li.*?>(.*?)<\/li>/gsi, (match, p1) => `\n- ${p1}`);
25
+ text = text.replace(/<ul.*?>/gi, '');
26
+ text = text.replace(/<\/ul>/gi, '\n');
27
+
28
+ // Links: <a href=\"...\">text</a> -> [text](href)
29
+ text = text.replace(/<a\s+(?:[^>]*?\s+)?href=\"([^\"]*)\"[^>]*>(.*?)<\/a>/gsi, (match, href, content) => `[${content}](${href})`);
30
+
31
+ // Images: <img src=\"...\" alt=\"...\"> -> ![alt](src)
32
+ text = text.replace(/<img\s+(?:[^>]*?\s+)?src=\"([^\"]*)\"(?:[^>]*?\s+)?alt=\"([^\"]*)\"[^>]*>/gsi, (match, src, alt) => `![${alt}](${src})`);
33
+
34
+ // Style/Script removal (strictly remove content)
35
+ text = text.replace(/<script.*?>.*?<\/script>/gsi, '');
36
+ text = text.replace(/<style.*?>.*?<\/style>/gsi, '');
37
+
38
+ // Final Strip of remaining tags
39
+ text = text.replace(/<[^>]+>/g, ' ');
40
+
41
+ // Entity decoding (Basic)
42
+ text = text.replace(/&nbsp;/gi, ' ');
43
+ text = text.replace(/&amp;/gi, '&');
44
+ text = text.replace(/&lt;/gi, '<');
45
+ text = text.replace(/&gt;/gi, '>');
46
+ text = text.replace(/&quot;/gi, '"');
47
+ text = text.replace(/&#39;/gi, "'");
48
+ }
45
49
 
46
50
  const lines = text.split('\n');
47
51
  const cleanedLines: string[] = [];
48
52
 
49
- // Heuristics for reply headers
50
- const replyHeaderPatterns = [
53
+ // Patterns that usually mark the START of a reply chain or a generic footer
54
+ const truncationPatterns = [
51
55
  /^On .* wrote:$/i,
52
- /^From: .*$/i,
53
- /^Sent: .*$/i,
54
- /^To: .*$/i,
55
- /^Subject: .*$/i
56
+ /^From: .* <.*>$/i,
57
+ /^-----Original Message-----$/i,
58
+ /^________________________________$/i,
59
+ /^Sent from my iPhone$/i,
60
+ /^Sent from my Android$/i,
61
+ /^Get Outlook for/i,
62
+ /^--$/ // Standard signature separator
56
63
  ];
57
64
 
58
- // Heuristics for footers
59
- const footerPatterns = [
60
- /unsubscribe/i,
65
+ // Patterns for lines that should be stripped but NOT truncate the whole email
66
+ const noisePatterns = [
67
+ /view in browser/i,
68
+ /click here to view/i,
69
+ /legal notice/i,
70
+ /all rights reserved/i,
61
71
  /privacy policy/i,
62
72
  /terms of service/i,
63
- /view in browser/i,
64
- /copyright \d{4}/i
73
+ /unsubscribe/i
65
74
  ];
66
75
 
67
76
  for (let line of lines) {
68
77
  let lineStripped = line.trim();
78
+ if (!lineStripped) {
79
+ cleanedLines.push("");
80
+ continue;
81
+ }
69
82
 
70
83
  // 2. Quoted text removal (lines starting with >)
71
84
  if (lineStripped.startsWith('>')) {
72
85
  continue;
73
86
  }
74
87
 
75
- // 3. Check for specific reply separators
76
- // If we hit a reply header, we truncate the rest
77
- if (/^On .* wrote:$/i.test(lineStripped)) {
78
- break;
88
+ // 3. Truncation check: If we hit a reply header, we stop entirely
89
+ let shouldTruncate = false;
90
+ for (const pattern of truncationPatterns) {
91
+ if (pattern.test(lineStripped)) {
92
+ shouldTruncate = true;
93
+ break;
94
+ }
79
95
  }
96
+ if (shouldTruncate) break;
80
97
 
81
- // 4. Footer removal (only on very short lines to avoid stripping body content)
82
- if (lineStripped.length < 60) {
83
- let isFooter = false;
84
- for (const pattern of footerPatterns) {
98
+ // 4. Noise check: Strip boilerplate lines
99
+ let isNoise = false;
100
+ if (lineStripped.length < 100) {
101
+ for (const pattern of noisePatterns) {
85
102
  if (pattern.test(lineStripped)) {
86
- isFooter = true;
103
+ isNoise = true;
87
104
  break;
88
105
  }
89
106
  }
90
- if (isFooter) {
91
- continue;
92
- }
93
107
  }
108
+ if (isNoise) continue;
94
109
 
95
110
  cleanedLines.push(line);
96
111
  }
@@ -98,21 +113,20 @@ export class ContentCleaner {
98
113
  // Reassemble
99
114
  text = cleanedLines.join('\n');
100
115
 
101
- // Safety Fallback: If cleaning stripped everything, return original (truncated)
102
- if (!text.trim() || text.length < 10) {
103
- text = originalText.substring(0, 3000);
104
- }
105
-
106
- // Collapse multiple newlines
116
+ // Collapse whitespace
107
117
  text = text.replace(/\n{3,}/g, '\n\n');
118
+ text = text.replace(/[ \t]{2,}/g, ' ');
108
119
 
120
+ // Safety Fallback: If cleaning stripped too much, return original text truncated
121
+ if (text.trim().length < 20 && originalText.trim().length > 20) {
122
+ return originalText.substring(0, 3000).trim();
123
+ }
124
+
109
125
  // Sanitize LLM Special Tokens
110
126
  text = text.replace(/<\|/g, '< |');
111
127
  text = text.replace(/\|>/g, '| >');
112
128
  text = text.replace(/\[INST\]/gi, '[ INST ]');
113
129
  text = text.replace(/\[\/INST\]/gi, '[ /INST ]');
114
- text = text.replace(/<s>/gi, '&lt;s&gt;');
115
- text = text.replace(/<\/s>/gi, '&lt;/s&gt;');
116
130
 
117
131
  return text.trim();
118
132
  }
@@ -82,8 +82,12 @@ export const schemas = {
82
82
  accessToken: z.string().optional(),
83
83
  }),
84
84
  // Rule schemas - supports both single action (legacy) and actions array
85
+ // Now includes description and intent for context-aware AI matching
85
86
  createRule: z.object({
86
87
  name: z.string().min(1).max(100),
88
+ description: z.string().max(500).optional(),
89
+ intent: z.string().max(200).optional(),
90
+ priority: z.number().int().min(0).max(100).optional(),
87
91
  condition: z.record(z.unknown()),
88
92
  action: z.enum(['delete', 'archive', 'draft', 'star', 'read']).optional(),
89
93
  actions: z.array(z.enum(['delete', 'archive', 'draft', 'star', 'read'])).optional(),
@@ -94,6 +98,9 @@ export const schemas = {
94
98
  }),
95
99
  updateRule: z.object({
96
100
  name: z.string().min(1).max(100).optional(),
101
+ description: z.string().max(500).optional(),
102
+ intent: z.string().max(200).optional(),
103
+ priority: z.number().int().min(0).max(100).optional(),
97
104
  condition: z.record(z.unknown()).optional(),
98
105
  action: z.enum(['delete', 'archive', 'draft', 'star', 'read']).optional(),
99
106
  actions: z.array(z.enum(['delete', 'archive', 'draft', 'star', 'read'])).optional(),