@realtimex/email-automator 2.4.5 → 2.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,9 +6,10 @@ export class ContentCleaner {
6
6
  static cleanEmailBody(text) {
7
7
  if (!text)
8
8
  return "";
9
+ const originalText = text;
9
10
  // 0. Lightweight HTML -> Markdown Conversion
10
11
  // Structure: <br>, <p> -> Newlines
11
- text = text.replace(/<br\s*\/?\?>/gi, '\n');
12
+ text = text.replace(/<br\s*\/?>/gi, '\n');
12
13
  text = text.replace(/<\/p>/gi, '\n\n');
13
14
  text = text.replace(/<p.*?>/gi, ''); // Open p tags just gone
14
15
  // Structure: Headers <h1>-<h6> -> # Title
@@ -58,12 +59,12 @@ export class ContentCleaner {
58
59
  continue;
59
60
  }
60
61
  // 3. Check for specific reply separators
61
- // If we hit a reply header, we truncate the rest (Aggressive strategy per Python code)
62
+ // If we hit a reply header, we truncate the rest
62
63
  if (/^On .* wrote:$/i.test(lineStripped)) {
63
64
  break;
64
65
  }
65
- // 4. Footer removal (simple check on short lines)
66
- if (lineStripped.length < 100) {
66
+ // 4. Footer removal (only on very short lines to avoid stripping body content)
67
+ if (lineStripped.length < 60) {
67
68
  let isFooter = false;
68
69
  for (const pattern of footerPatterns) {
69
70
  if (pattern.test(lineStripped)) {
@@ -79,10 +80,13 @@ export class ContentCleaner {
79
80
  }
80
81
  // Reassemble
81
82
  text = cleanedLines.join('\n');
83
+ // Safety Fallback: If cleaning stripped everything, return original (truncated)
84
+ if (!text.trim() || text.length < 10) {
85
+ text = originalText.substring(0, 3000);
86
+ }
82
87
  // Collapse multiple newlines
83
88
  text = text.replace(/\n{3,}/g, '\n\n');
84
- // Sanitize LLM Special Tokens (Prevent Prompt Injection/Confusion)
85
- // Break sequences like <|channel|>, [INST], <s>
89
+ // Sanitize LLM Special Tokens
86
90
  text = text.replace(/<\|/g, '< |');
87
91
  text = text.replace(/\|>/g, '| >');
88
92
  text = text.replace(/\[INST\]/gi, '[ INST ]');