email-body-parser 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +248 -0
- package/dist/cleaner.d.ts +17 -0
- package/dist/cleaner.d.ts.map +1 -0
- package/dist/cleaner.js +92 -0
- package/dist/cleaner.js.map +1 -0
- package/dist/index.d.ts +26 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +27 -0
- package/dist/index.js.map +1 -0
- package/dist/parser.d.ts +35 -0
- package/dist/parser.d.ts.map +1 -0
- package/dist/parser.js +136 -0
- package/dist/parser.js.map +1 -0
- package/dist/patterns/footers.d.ts +7 -0
- package/dist/patterns/footers.d.ts.map +1 -0
- package/dist/patterns/footers.js +42 -0
- package/dist/patterns/footers.js.map +1 -0
- package/dist/patterns/index.d.ts +4 -0
- package/dist/patterns/index.d.ts.map +1 -0
- package/dist/patterns/index.js +4 -0
- package/dist/patterns/index.js.map +1 -0
- package/dist/patterns/quotes.d.ts +7 -0
- package/dist/patterns/quotes.d.ts.map +1 -0
- package/dist/patterns/quotes.js +42 -0
- package/dist/patterns/quotes.js.map +1 -0
- package/dist/patterns/signatures.d.ts +7 -0
- package/dist/patterns/signatures.d.ts.map +1 -0
- package/dist/patterns/signatures.js +47 -0
- package/dist/patterns/signatures.js.map +1 -0
- package/dist/types.d.ts +38 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/package.json +70 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
22
|
+
|
package/README.md
ADDED
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
# email-body-parser
|
|
2
|
+
|
|
3
|
+
Parse and clean email content - removes quotes, auto-signatures, and mailing list footers while **preserving human signatures**.
|
|
4
|
+
|
|
5
|
+
[](https://www.npmjs.com/package/email-body-parser)
|
|
6
|
+
[](https://opensource.org/licenses/MIT)
|
|
7
|
+
|
|
8
|
+
**Maintained by [Pinenlime](https://pinenlime.com)**
|
|
9
|
+
|
|
10
|
+
## Why Another Email Parser?
|
|
11
|
+
|
|
12
|
+
Unlike other email parsing libraries that aggressively remove all signatures, `email-body-parser` follows a conservative philosophy: **only remove things we're 100% sure are not content**.
|
|
13
|
+
|
|
14
|
+
| Feature | Other Libraries | email-body-parser |
|
|
15
|
+
|---------|-----------------|-------------------|
|
|
16
|
+
| Human signatures ("Best, John") | ❌ Removes | ✅ **Keeps** |
|
|
17
|
+
| Mobile auto-signatures | ✅ Removes | ✅ Removes |
|
|
18
|
+
| Quote headers | ✅ Removes | ✅ Removes |
|
|
19
|
+
| Mailing list footers | ❌ Not handled | ✅ **Removes** |
|
|
20
|
+
| Legal disclaimers | ❌ Not handled | ✅ **Removes** |
|
|
21
|
+
| Compressed Outlook headers | ❌ Basic | ✅ **Comprehensive** |
|
|
22
|
+
|
|
23
|
+
## Installation
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
npm install email-body-parser
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Usage
|
|
30
|
+
|
|
31
|
+
### Simple API
|
|
32
|
+
|
|
33
|
+
For most use cases, the `cleanEmailContent()` function is all you need:
|
|
34
|
+
|
|
35
|
+
```typescript
|
|
36
|
+
import { cleanEmailContent } from 'email-body-parser';
|
|
37
|
+
|
|
38
|
+
const rawEmail = `Thanks for the update!
|
|
39
|
+
|
|
40
|
+
Best regards,
|
|
41
|
+
John Smith
|
|
42
|
+
Product Manager
|
|
43
|
+
|
|
44
|
+
On Mon, Mar 17, 2025 at 1:29 PM Jane Doe <jane@example.com> wrote:
|
|
45
|
+
> Here's the latest report...
|
|
46
|
+
|
|
47
|
+
Sent from my iPhone`;
|
|
48
|
+
|
|
49
|
+
const cleaned = cleanEmailContent(rawEmail);
|
|
50
|
+
console.log(cleaned);
|
|
51
|
+
// Output:
|
|
52
|
+
// Thanks for the update!
|
|
53
|
+
//
|
|
54
|
+
// Best regards,
|
|
55
|
+
// John Smith
|
|
56
|
+
// Product Manager
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### Advanced API
|
|
60
|
+
|
|
61
|
+
For more control, use the `EmailBodyParser` class:
|
|
62
|
+
|
|
63
|
+
```typescript
|
|
64
|
+
import EmailBodyParser from 'email-body-parser';
|
|
65
|
+
|
|
66
|
+
const parser = new EmailBodyParser();
|
|
67
|
+
const email = parser.parse(rawEmail);
|
|
68
|
+
|
|
69
|
+
// Get visible content (excludes quotes, auto-signatures)
|
|
70
|
+
console.log(email.getVisibleText());
|
|
71
|
+
|
|
72
|
+
// Get just the quoted portions
|
|
73
|
+
console.log(email.getQuotedText());
|
|
74
|
+
|
|
75
|
+
// Iterate over all fragments
|
|
76
|
+
for (const fragment of email.getFragments()) {
|
|
77
|
+
console.log({
|
|
78
|
+
content: fragment.content,
|
|
79
|
+
isHidden: fragment.isHidden,
|
|
80
|
+
isQuoted: fragment.isQuoted,
|
|
81
|
+
isSignature: fragment.isSignature,
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### Convenience Methods
|
|
87
|
+
|
|
88
|
+
```typescript
|
|
89
|
+
const parser = new EmailBodyParser();
|
|
90
|
+
|
|
91
|
+
// Get visible text directly
|
|
92
|
+
const visibleText = parser.parseReply(rawEmail);
|
|
93
|
+
|
|
94
|
+
// Get quoted text directly
|
|
95
|
+
const quotedText = parser.parseReplied(rawEmail);
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## What Gets Removed
|
|
99
|
+
|
|
100
|
+
### Quote Headers
|
|
101
|
+
- Gmail style: `On Mon, Mar 17, 2025 at 1:29 PM John <john@example.com> wrote:`
|
|
102
|
+
- Outlook style: `-----Original Message-----`
|
|
103
|
+
- Forward headers: `From: ... Sent: ... To: ... Subject: ...`
|
|
104
|
+
- Standard quote markers: `> quoted text`
|
|
105
|
+
|
|
106
|
+
### Auto-Generated Signatures
|
|
107
|
+
- Mobile: `Sent from my iPhone`, `Sent from my Android`
|
|
108
|
+
- Apps: `Sent via Superhuman`, `Get Outlook for iOS`
|
|
109
|
+
- Meeting links: `BOOK A MEETING...`
|
|
110
|
+
|
|
111
|
+
### Mailing List Footers
|
|
112
|
+
- Google Groups: `You received this message because...`
|
|
113
|
+
- Unsubscribe links: `Click here to unsubscribe`
|
|
114
|
+
- Marketing footers: `This email was sent to...`
|
|
115
|
+
|
|
116
|
+
### Legal Disclaimers
|
|
117
|
+
- `CONFIDENTIAL: This message contains...`
|
|
118
|
+
- `DISCLAIMER: This email and any files...`
|
|
119
|
+
|
|
120
|
+
## What Gets Preserved
|
|
121
|
+
|
|
122
|
+
**Human signatures are kept** because they provide valuable context:
|
|
123
|
+
- Contact information for follow-ups
|
|
124
|
+
- Job titles help understand urgency
|
|
125
|
+
- Avoids false positives
|
|
126
|
+
|
|
127
|
+
```typescript
|
|
128
|
+
const email = `Please review the attached document.
|
|
129
|
+
|
|
130
|
+
Best regards,
|
|
131
|
+
Sarah Williams
|
|
132
|
+
Senior Financial Analyst
|
|
133
|
+
Direct: (555) 234-5678
|
|
134
|
+
s.williams@example.com`;
|
|
135
|
+
|
|
136
|
+
cleanEmailContent(email);
|
|
137
|
+
// Returns the ENTIRE email - signature is preserved!
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## API Reference
|
|
141
|
+
|
|
142
|
+
### `cleanEmailContent(content: string): string`
|
|
143
|
+
|
|
144
|
+
Cleans email content by removing quotes, auto-signatures, and mailing list footers.
|
|
145
|
+
|
|
146
|
+
**Parameters:**
|
|
147
|
+
- `content` - The raw email content to clean
|
|
148
|
+
|
|
149
|
+
**Returns:** Cleaned email content with quotes and auto-signatures removed
|
|
150
|
+
|
|
151
|
+
### `EmailBodyParser`
|
|
152
|
+
|
|
153
|
+
#### `constructor(options?: ParserOptions)`
|
|
154
|
+
|
|
155
|
+
**Options:**
|
|
156
|
+
- `keepSignatures` (default: `true`) - Keep human signatures
|
|
157
|
+
- `removeDisclaimers` (default: `true`) - Remove legal disclaimers
|
|
158
|
+
- `removeMailingListFooters` (default: `true`) - Remove mailing list footers
|
|
159
|
+
|
|
160
|
+
#### `parse(text: string): ParsedEmail`
|
|
161
|
+
|
|
162
|
+
Parses email content into fragments.
|
|
163
|
+
|
|
164
|
+
#### `parseReply(text: string): string`
|
|
165
|
+
|
|
166
|
+
Convenience method that returns visible text directly.
|
|
167
|
+
|
|
168
|
+
#### `parseReplied(text: string): string`
|
|
169
|
+
|
|
170
|
+
Convenience method that returns quoted text directly.
|
|
171
|
+
|
|
172
|
+
### `ParsedEmail`
|
|
173
|
+
|
|
174
|
+
#### `getFragments(): EmailFragment[]`
|
|
175
|
+
|
|
176
|
+
Returns all email fragments.
|
|
177
|
+
|
|
178
|
+
#### `getVisibleText(): string`
|
|
179
|
+
|
|
180
|
+
Returns content that is not hidden (excludes quotes, auto-signatures).
|
|
181
|
+
|
|
182
|
+
#### `getQuotedText(): string`
|
|
183
|
+
|
|
184
|
+
Returns only the quoted portions of the email.
|
|
185
|
+
|
|
186
|
+
### `EmailFragment`
|
|
187
|
+
|
|
188
|
+
```typescript
|
|
189
|
+
interface EmailFragment {
|
|
190
|
+
content: string; // The fragment text
|
|
191
|
+
isHidden: boolean; // True if this should be hidden from display
|
|
192
|
+
isSignature: boolean; // True if this is an auto-signature
|
|
193
|
+
isQuoted: boolean; // True if this is quoted content
|
|
194
|
+
}
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
## Customizing Patterns
|
|
198
|
+
|
|
199
|
+
For advanced users, the pattern arrays are exported:
|
|
200
|
+
|
|
201
|
+
```typescript
|
|
202
|
+
import {
|
|
203
|
+
QUOTE_PATTERNS,
|
|
204
|
+
AUTO_SIGNATURE_PATTERNS,
|
|
205
|
+
MAILING_LIST_PATTERNS,
|
|
206
|
+
} from 'email-body-parser';
|
|
207
|
+
|
|
208
|
+
// Each pattern has metadata for debugging
|
|
209
|
+
QUOTE_PATTERNS.forEach(({ pattern, description, example }) => {
|
|
210
|
+
console.log(description, example);
|
|
211
|
+
});
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
## TypeScript Support
|
|
215
|
+
|
|
216
|
+
Full TypeScript support with exported types:
|
|
217
|
+
|
|
218
|
+
```typescript
|
|
219
|
+
import type {
|
|
220
|
+
EmailFragment,
|
|
221
|
+
ParsedEmail,
|
|
222
|
+
PatternDefinition,
|
|
223
|
+
ParserOptions,
|
|
224
|
+
} from 'email-body-parser';
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
## RE2 Support (Optional)
|
|
228
|
+
|
|
229
|
+
For better performance and ReDoS protection, install RE2 as an optional peer dependency:
|
|
230
|
+
|
|
231
|
+
```bash
|
|
232
|
+
npm install re2
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
The library will automatically use RE2 when available.
|
|
236
|
+
|
|
237
|
+
## License
|
|
238
|
+
|
|
239
|
+
MIT License - see [LICENSE](LICENSE) for details.
|
|
240
|
+
|
|
241
|
+
## Author
|
|
242
|
+
|
|
243
|
+
Created and maintained by [Pinenlime](https://pinenlime.com).
|
|
244
|
+
|
|
245
|
+
## Contributing
|
|
246
|
+
|
|
247
|
+
Contributions are welcome! Please feel free to submit issues and pull requests.
|
|
248
|
+
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Email Body Cleaner
|
|
3
|
+
*
|
|
4
|
+
* Strips unwanted content from emails while preserving meaningful text.
|
|
5
|
+
* Targets: quoted replies, device signatures, newsletter footers, disclaimers.
|
|
6
|
+
* Preserves: human sign-offs and contact details within the message body.
|
|
7
|
+
*/
|
|
8
|
+
/**
|
|
9
|
+
* Remove quoted replies, auto-signatures, and newsletter footers from email text.
|
|
10
|
+
* Human sign-offs like "Thanks, John" are intentionally preserved.
|
|
11
|
+
*
|
|
12
|
+
* @param text - Raw email body text
|
|
13
|
+
* @returns Cleaned email content
|
|
14
|
+
*/
|
|
15
|
+
export declare function cleanEmailContent(text: string): string;
|
|
16
|
+
export default cleanEmailContent;
|
|
17
|
+
//# sourceMappingURL=cleaner.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cleaner.d.ts","sourceRoot":"","sources":["../src/cleaner.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AA0BH;;;;;;GAMG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAatD;AAoDD,eAAe,iBAAiB,CAAC"}
|
package/dist/cleaner.js
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Email Body Cleaner
|
|
3
|
+
*
|
|
4
|
+
* Strips unwanted content from emails while preserving meaningful text.
|
|
5
|
+
* Targets: quoted replies, device signatures, newsletter footers, disclaimers.
|
|
6
|
+
* Preserves: human sign-offs and contact details within the message body.
|
|
7
|
+
*/
|
|
8
|
+
import { QUOTE_PATTERNS, AUTO_SIGNATURE_PATTERNS, MAILING_LIST_PATTERNS } from './patterns/index.js';
|
|
9
|
+
/**
|
|
10
|
+
* Inline artifacts to strip (don't affect where we truncate)
|
|
11
|
+
*/
|
|
12
|
+
const ARTIFACT_PATTERNS = [
|
|
13
|
+
{
|
|
14
|
+
pattern: /\[image:[^\]]*\]/gi,
|
|
15
|
+
description: 'Inline image reference',
|
|
16
|
+
example: '[image: photo.jpg]',
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
pattern: /\[Image\]/gi,
|
|
20
|
+
description: 'Generic image tag',
|
|
21
|
+
example: '[Image]',
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
pattern: /\[cid:[^\]]*\]/gi,
|
|
25
|
+
description: 'Content-ID reference',
|
|
26
|
+
example: '[cid:img001@domain.com]',
|
|
27
|
+
},
|
|
28
|
+
];
|
|
29
|
+
/**
|
|
30
|
+
* Remove quoted replies, auto-signatures, and newsletter footers from email text.
|
|
31
|
+
* Human sign-offs like "Thanks, John" are intentionally preserved.
|
|
32
|
+
*
|
|
33
|
+
* @param text - Raw email body text
|
|
34
|
+
* @returns Cleaned email content
|
|
35
|
+
*/
|
|
36
|
+
export function cleanEmailContent(text) {
|
|
37
|
+
if (!text)
|
|
38
|
+
return '';
|
|
39
|
+
// Strip inline artifacts first
|
|
40
|
+
let result = text;
|
|
41
|
+
for (const { pattern } of ARTIFACT_PATTERNS) {
|
|
42
|
+
result = result.replace(pattern, '');
|
|
43
|
+
}
|
|
44
|
+
const lines = result.split('\n');
|
|
45
|
+
const truncateAt = locateTruncationPoint(lines);
|
|
46
|
+
return trimTrailingBlanks(lines.slice(0, truncateAt)).join('\n');
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Scan lines to find where automated/quoted content begins.
|
|
50
|
+
* Returns the line index where we should stop (exclusive).
|
|
51
|
+
*/
|
|
52
|
+
function locateTruncationPoint(lines) {
|
|
53
|
+
for (let idx = 0; idx < lines.length; idx++) {
|
|
54
|
+
const trimmed = lines[idx]?.trim() ?? '';
|
|
55
|
+
// Quoted reply detected
|
|
56
|
+
if (matchesAnyPattern(trimmed, QUOTE_PATTERNS)) {
|
|
57
|
+
return idx;
|
|
58
|
+
}
|
|
59
|
+
// Auto-generated signature detected
|
|
60
|
+
if (matchesAnyPattern(trimmed, AUTO_SIGNATURE_PATTERNS)) {
|
|
61
|
+
return idx;
|
|
62
|
+
}
|
|
63
|
+
// Newsletter/mailing list footer detected
|
|
64
|
+
if (matchesAnyPattern(trimmed, MAILING_LIST_PATTERNS)) {
|
|
65
|
+
// Check if preceded by signature delimiter
|
|
66
|
+
const prevLine = lines[idx - 1]?.trim();
|
|
67
|
+
if (prevLine === '--') {
|
|
68
|
+
return idx - 1;
|
|
69
|
+
}
|
|
70
|
+
return idx;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
return lines.length;
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Test if text matches any pattern in the list
|
|
77
|
+
*/
|
|
78
|
+
function matchesAnyPattern(text, patterns) {
|
|
79
|
+
return patterns.some(({ pattern }) => pattern.test(text));
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Remove empty lines from the end of the array
|
|
83
|
+
*/
|
|
84
|
+
function trimTrailingBlanks(lines) {
|
|
85
|
+
const result = [...lines];
|
|
86
|
+
while (result.length > 0 && result[result.length - 1]?.trim() === '') {
|
|
87
|
+
result.pop();
|
|
88
|
+
}
|
|
89
|
+
return result;
|
|
90
|
+
}
|
|
91
|
+
export default cleanEmailContent;
|
|
92
|
+
//# sourceMappingURL=cleaner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cleaner.js","sourceRoot":"","sources":["../src/cleaner.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,cAAc,EAAE,uBAAuB,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AAGrG;;GAEG;AACH,MAAM,iBAAiB,GAAwB;IAC7C;QACE,OAAO,EAAE,oBAAoB;QAC7B,WAAW,EAAE,wBAAwB;QACrC,OAAO,EAAE,oBAAoB;KAC9B;IACD;QACE,OAAO,EAAE,aAAa;QACtB,WAAW,EAAE,mBAAmB;QAChC,OAAO,EAAE,SAAS;KACnB;IACD;QACE,OAAO,EAAE,kBAAkB;QAC3B,WAAW,EAAE,sBAAsB;QACnC,OAAO,EAAE,yBAAyB;KACnC;CACF,CAAC;AAEF;;;;;;GAMG;AACH,MAAM,UAAU,iBAAiB,CAAC,IAAY;IAC5C,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAC;IAErB,+BAA+B;IAC/B,IAAI,MAAM,GAAG,IAAI,CAAC;IAClB,KAAK,MAAM,EAAE,OAAO,EAAE,IAAI,iBAAiB,EAAE,CAAC;QAC5C,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;IACvC,CAAC;IAED,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACjC,MAAM,UAAU,GAAG,qBAAqB,CAAC,KAAK,CAAC,CAAC;IAEhD,OAAO,kBAAkB,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACnE,CAAC;AAED;;;GAGG;AACH,SAAS,qBAAqB,CAAC,KAAe;IAC5C,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,KAAK,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC;QAC5C,MAAM,OAAO,GAAG,KAAK,CAAC,GAAG,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QAEzC,wBAAwB;QACxB,IAAI,iBAAiB,CAAC,OAAO,EAAE,cAAc,CAAC,EAAE,CAAC;YAC/C,OAAO,GAAG,CAAC;QACb,CAAC;QAED,oCAAoC;QACpC,IAAI,iBAAiB,CAAC,OAAO,EAAE,uBAAuB,CAAC,EAAE,CAAC;YACxD,OAAO,GAAG,CAAC;QACb,CAAC;QAED,0CAA0C;QAC1C,IAAI,iBAAiB,CAAC,OAAO,EAAE,qBAAqB,CAAC,EAAE,CAAC;YACtD,2CAA2C;YAC3C,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC;YACxC,IAAI,QAAQ,KAAK,IAAI,EAAE,CAAC;gBACtB,OAAO,GAAG,GAAG,CAAC,CAAC;YACjB,CAAC;YACD,OAAO,GAAG,CAAC;QACb,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC,MAAM,CAAC;AACtB,CAAC;AAED;;GAEG;AACH,SAAS,iBAAiB,CAAC,IAAY,EAAE,QAA6B;IACpE,OAAO,QAAQ,CAAC,IAAI,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;AAC5D,CAAC;AAED;;GAEG;AACH,SAAS,kBAAkB,CAAC,KAAe;IACzC,MAAM,MAAM,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC;IAC1B,OAAO,MAAM,CAAC,MAAM,GAAG,CAAC,IAAI,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;QACrE,MAAM,CAAC,GAAG,EAAE,CAAC;IACf,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,eAAe,iBAAiB,CAAC"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* email-body-parser
|
|
3
|
+
*
|
|
4
|
+
* Parse and clean email content - removes quotes, auto-signatures,
|
|
5
|
+
* and mailing list footers while preserving human signatures.
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* // Simple API
|
|
9
|
+
* import { cleanEmailContent } from 'email-body-parser';
|
|
10
|
+
* const cleaned = cleanEmailContent(rawEmail);
|
|
11
|
+
*
|
|
12
|
+
* @example
|
|
13
|
+
* // Advanced API
|
|
14
|
+
* import EmailBodyParser from 'email-body-parser';
|
|
15
|
+
* const parser = new EmailBodyParser();
|
|
16
|
+
* const email = parser.parse(rawEmail);
|
|
17
|
+
* console.log(email.getVisibleText());
|
|
18
|
+
*/
|
|
19
|
+
export { cleanEmailContent } from './cleaner.js';
|
|
20
|
+
export { EmailBodyParser } from './parser.js';
|
|
21
|
+
export { default } from './parser.js';
|
|
22
|
+
export { QUOTE_PATTERNS } from './patterns/quotes.js';
|
|
23
|
+
export { AUTO_SIGNATURE_PATTERNS } from './patterns/signatures.js';
|
|
24
|
+
export { MAILING_LIST_PATTERNS } from './patterns/footers.js';
|
|
25
|
+
export type { EmailFragment, ParsedEmail, PatternDefinition, ParserOptions, } from './types.js';
|
|
26
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAGH,OAAO,EAAE,iBAAiB,EAAE,MAAM,cAAc,CAAC;AACjD,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAC9C,OAAO,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAGtC,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,uBAAuB,EAAE,MAAM,0BAA0B,CAAC;AACnE,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAG9D,YAAY,EACV,aAAa,EACb,WAAW,EACX,iBAAiB,EACjB,aAAa,GACd,MAAM,YAAY,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* email-body-parser
|
|
3
|
+
*
|
|
4
|
+
* Parse and clean email content - removes quotes, auto-signatures,
|
|
5
|
+
* and mailing list footers while preserving human signatures.
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* // Simple API
|
|
9
|
+
* import { cleanEmailContent } from 'email-body-parser';
|
|
10
|
+
* const cleaned = cleanEmailContent(rawEmail);
|
|
11
|
+
*
|
|
12
|
+
* @example
|
|
13
|
+
* // Advanced API
|
|
14
|
+
* import EmailBodyParser from 'email-body-parser';
|
|
15
|
+
* const parser = new EmailBodyParser();
|
|
16
|
+
* const email = parser.parse(rawEmail);
|
|
17
|
+
* console.log(email.getVisibleText());
|
|
18
|
+
*/
|
|
19
|
+
// Main exports
|
|
20
|
+
export { cleanEmailContent } from './cleaner.js';
|
|
21
|
+
export { EmailBodyParser } from './parser.js';
|
|
22
|
+
export { default } from './parser.js';
|
|
23
|
+
// Pattern exports (for advanced users who want to customize)
|
|
24
|
+
export { QUOTE_PATTERNS } from './patterns/quotes.js';
|
|
25
|
+
export { AUTO_SIGNATURE_PATTERNS } from './patterns/signatures.js';
|
|
26
|
+
export { MAILING_LIST_PATTERNS } from './patterns/footers.js';
|
|
27
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH,eAAe;AACf,OAAO,EAAE,iBAAiB,EAAE,MAAM,cAAc,CAAC;AACjD,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAC9C,OAAO,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAEtC,6DAA6D;AAC7D,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,uBAAuB,EAAE,MAAM,0BAA0B,CAAC;AACnE,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC"}
|
package/dist/parser.d.ts
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fragment-based Email Parser
|
|
3
|
+
*
|
|
4
|
+
* Breaks emails into semantic chunks: body content, quoted replies,
|
|
5
|
+
* auto-signatures, and newsletter footers. Designed to preserve
|
|
6
|
+
* human-written sign-offs while filtering automated additions.
|
|
7
|
+
*/
|
|
8
|
+
import type { ParsedEmail, ParserOptions } from './types.js';
|
|
9
|
+
/**
|
|
10
|
+
* Email parser that segments content into typed fragments.
|
|
11
|
+
*
|
|
12
|
+
* Key behavior: Human sign-offs ("Best, John") are treated as body content,
|
|
13
|
+
* while auto-generated signatures ("Sent from iPhone") are filtered.
|
|
14
|
+
*/
|
|
15
|
+
export declare class EmailBodyParser {
|
|
16
|
+
private config;
|
|
17
|
+
constructor(options?: ParserOptions);
|
|
18
|
+
/**
|
|
19
|
+
* Parse email text into categorized fragments
|
|
20
|
+
*/
|
|
21
|
+
parse(input: string): ParsedEmail;
|
|
22
|
+
/**
|
|
23
|
+
* Shorthand: parse and return only visible content
|
|
24
|
+
*/
|
|
25
|
+
parseReply(input: string): string;
|
|
26
|
+
/**
|
|
27
|
+
* Shorthand: parse and return only quoted content
|
|
28
|
+
*/
|
|
29
|
+
parseReplied(input: string): string;
|
|
30
|
+
private stripArtifacts;
|
|
31
|
+
private categorize;
|
|
32
|
+
private buildFragment;
|
|
33
|
+
}
|
|
34
|
+
export default EmailBodyParser;
|
|
35
|
+
//# sourceMappingURL=parser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parser.d.ts","sourceRoot":"","sources":["../src/parser.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAiB,WAAW,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AA4C5E;;;;;GAKG;AACH,qBAAa,eAAe;IAC1B,OAAO,CAAC,MAAM,CAA0B;gBAE5B,OAAO,GAAE,aAAkB;IAQvC;;OAEG;IACH,KAAK,CAAC,KAAK,EAAE,MAAM,GAAG,WAAW;IAkCjC;;OAEG;IACH,UAAU,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM;IAIjC;;OAEG;IACH,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM;IAInC,OAAO,CAAC,cAAc;IAOtB,OAAO,CAAC,UAAU;IAkBlB,OAAO,CAAC,aAAa;CAUtB;AAED,eAAe,eAAe,CAAC"}
|
package/dist/parser.js
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fragment-based Email Parser
|
|
3
|
+
*
|
|
4
|
+
* Breaks emails into semantic chunks: body content, quoted replies,
|
|
5
|
+
* auto-signatures, and newsletter footers. Designed to preserve
|
|
6
|
+
* human-written sign-offs while filtering automated additions.
|
|
7
|
+
*/
|
|
8
|
+
import { QUOTE_PATTERNS, AUTO_SIGNATURE_PATTERNS, MAILING_LIST_PATTERNS } from './patterns/index.js';
|
|
9
|
+
/** Internal fragment representation */
|
|
10
|
+
class TextFragment {
|
|
11
|
+
content;
|
|
12
|
+
isHidden;
|
|
13
|
+
isSignature;
|
|
14
|
+
isQuoted;
|
|
15
|
+
constructor(content, isHidden, isSignature, isQuoted) {
|
|
16
|
+
this.content = content;
|
|
17
|
+
this.isHidden = isHidden;
|
|
18
|
+
this.isSignature = isSignature;
|
|
19
|
+
this.isQuoted = isQuoted;
|
|
20
|
+
}
|
|
21
|
+
toString() {
|
|
22
|
+
return this.content;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
/** Parsed email with fragment access methods */
|
|
26
|
+
class ParsedEmailImpl {
|
|
27
|
+
fragments;
|
|
28
|
+
constructor(fragments) {
|
|
29
|
+
this.fragments = fragments;
|
|
30
|
+
}
|
|
31
|
+
getFragments() {
|
|
32
|
+
return this.fragments;
|
|
33
|
+
}
|
|
34
|
+
getVisibleText() {
|
|
35
|
+
return this.fragments
|
|
36
|
+
.filter((f) => !f.isHidden)
|
|
37
|
+
.map((f) => f.content)
|
|
38
|
+
.join('\n')
|
|
39
|
+
.replace(/~+$/, '');
|
|
40
|
+
}
|
|
41
|
+
getQuotedText() {
|
|
42
|
+
return this.fragments
|
|
43
|
+
.filter((f) => f.isQuoted)
|
|
44
|
+
.map((f) => f.content)
|
|
45
|
+
.join('\n')
|
|
46
|
+
.replace(/~+$/, '');
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Email parser that segments content into typed fragments.
|
|
51
|
+
*
|
|
52
|
+
* Key behavior: Human sign-offs ("Best, John") are treated as body content,
|
|
53
|
+
* while auto-generated signatures ("Sent from iPhone") are filtered.
|
|
54
|
+
*/
|
|
55
|
+
export class EmailBodyParser {
|
|
56
|
+
config;
|
|
57
|
+
constructor(options = {}) {
|
|
58
|
+
this.config = {
|
|
59
|
+
keepSignatures: options.keepSignatures ?? true,
|
|
60
|
+
removeDisclaimers: options.removeDisclaimers ?? true,
|
|
61
|
+
removeMailingListFooters: options.removeMailingListFooters ?? true,
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Parse email text into categorized fragments
|
|
66
|
+
*/
|
|
67
|
+
parse(input) {
|
|
68
|
+
if (!input) {
|
|
69
|
+
return new ParsedEmailImpl([]);
|
|
70
|
+
}
|
|
71
|
+
// Normalize and clean
|
|
72
|
+
let text = input.replace(/\r\n/g, '\n');
|
|
73
|
+
text = this.stripArtifacts(text);
|
|
74
|
+
const lines = text.split('\n');
|
|
75
|
+
const segments = [];
|
|
76
|
+
let buffer = [];
|
|
77
|
+
let bufferCategory = 'body';
|
|
78
|
+
for (const line of lines) {
|
|
79
|
+
const category = this.categorize(line.trim());
|
|
80
|
+
if (category !== bufferCategory && buffer.length > 0) {
|
|
81
|
+
segments.push(this.buildFragment(buffer.join('\n'), bufferCategory));
|
|
82
|
+
buffer = [];
|
|
83
|
+
}
|
|
84
|
+
bufferCategory = category;
|
|
85
|
+
buffer.push(line);
|
|
86
|
+
}
|
|
87
|
+
if (buffer.length > 0) {
|
|
88
|
+
segments.push(this.buildFragment(buffer.join('\n'), bufferCategory));
|
|
89
|
+
}
|
|
90
|
+
return new ParsedEmailImpl(segments);
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Shorthand: parse and return only visible content
|
|
94
|
+
*/
|
|
95
|
+
parseReply(input) {
|
|
96
|
+
return this.parse(input).getVisibleText();
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Shorthand: parse and return only quoted content
|
|
100
|
+
*/
|
|
101
|
+
parseReplied(input) {
|
|
102
|
+
return this.parse(input).getQuotedText();
|
|
103
|
+
}
|
|
104
|
+
stripArtifacts(text) {
|
|
105
|
+
return text
|
|
106
|
+
.replace(/\[image:[^\]]*\]/gi, '')
|
|
107
|
+
.replace(/\[Image\]/gi, '')
|
|
108
|
+
.replace(/\[cid:[^\]]*\]/gi, '');
|
|
109
|
+
}
|
|
110
|
+
categorize(line) {
|
|
111
|
+
for (const { pattern } of QUOTE_PATTERNS) {
|
|
112
|
+
if (pattern.test(line))
|
|
113
|
+
return 'quote';
|
|
114
|
+
}
|
|
115
|
+
for (const { pattern } of AUTO_SIGNATURE_PATTERNS) {
|
|
116
|
+
if (pattern.test(line))
|
|
117
|
+
return 'autosig';
|
|
118
|
+
}
|
|
119
|
+
if (this.config.removeMailingListFooters) {
|
|
120
|
+
for (const { pattern } of MAILING_LIST_PATTERNS) {
|
|
121
|
+
if (pattern.test(line))
|
|
122
|
+
return 'listfooter';
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
return 'body';
|
|
126
|
+
}
|
|
127
|
+
buildFragment(text, category) {
|
|
128
|
+
const cleaned = text.replace(/^\n+/, '').replace(/\n+$/, '');
|
|
129
|
+
return new TextFragment(cleaned, category !== 'body', // isHidden
|
|
130
|
+
category === 'autosig', // isSignature
|
|
131
|
+
category === 'quote' // isQuoted
|
|
132
|
+
);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
export default EmailBodyParser;
|
|
136
|
+
//# sourceMappingURL=parser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parser.js","sourceRoot":"","sources":["../src/parser.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAGH,OAAO,EAAE,cAAc,EAAE,uBAAuB,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AAErG,uCAAuC;AACvC,MAAM,YAAY;IAEP;IACA;IACA;IACA;IAJT,YACS,OAAe,EACf,QAAiB,EACjB,WAAoB,EACpB,QAAiB;QAHjB,YAAO,GAAP,OAAO,CAAQ;QACf,aAAQ,GAAR,QAAQ,CAAS;QACjB,gBAAW,GAAX,WAAW,CAAS;QACpB,aAAQ,GAAR,QAAQ,CAAS;IACvB,CAAC;IAEJ,QAAQ;QACN,OAAO,IAAI,CAAC,OAAO,CAAC;IACtB,CAAC;CACF;AAED,gDAAgD;AAChD,MAAM,eAAe;IACA;IAAnB,YAAmB,SAA0B;QAA1B,cAAS,GAAT,SAAS,CAAiB;IAAG,CAAC;IAEjD,YAAY;QACV,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAED,cAAc;QACZ,OAAO,IAAI,CAAC,SAAS;aAClB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;aAC1B,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;aACrB,IAAI,CAAC,IAAI,CAAC;aACV,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IACxB,CAAC;IAED,aAAa;QACX,OAAO,IAAI,CAAC,SAAS;aAClB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;aACzB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;aACrB,IAAI,CAAC,IAAI,CAAC;aACV,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IACxB,CAAC;CACF;AAID;;;;;GAKG;AACH,MAAM,OAAO,eAAe;IAClB,MAAM,CAA0B;IAExC,YAAY,UAAyB,EAAE;QACrC,IAAI,CAAC,MAAM,GAAG;YACZ,cAAc,EAAE,OAAO,CAAC,cAAc,IAAI,IAAI;YAC9C,iBAAiB,EAAE,OAAO,CAAC,iBAAiB,IAAI,IAAI;YACpD,wBAAwB,EAAE,OAAO,CAAC,wBAAwB,IAAI,IAAI;SACnE,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,KAAa;QACjB,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,OAAO,IAAI,eAAe,CAAC,EAAE,CAAC,CAAC;QACjC,CAAC;QAED,sBAAsB;QACtB,IAAI,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;QACxC,IAAI,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;QAEjC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC/B,MAAM,QAAQ,GAAoB,EAAE,CAAC;QAErC,IAAI,MAAM,GAAa,EAAE,CAAC;QAC1B,IAAI,cAAc,GAAiB,MAAM,CAAC;QAE1C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;YAE9C,IAAI,QAAQ,KAAK,cAAc,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACrD,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,cAAc,CAAC,CAAC,CAAC;gBACrE,MAAM,GAAG,EAAE,CAAC;YACd,CAAC;YAED,cAAc,GAAG,QAAQ,CAAC;YAC1B,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACpB,CAAC;QAED,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,cAAc,CAAC,CAAC,CAAC;QACvE,CAAC;QAED,OAAO,IAAI,eAAe,CAAC,QAAQ,CAAC,CAAC;IACvC,CAAC;IAED;;OAEG;IACH,UAAU,CAAC,KAAa;QACtB,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,cAAc,EAAE,CAAC;IAC5C,CAAC;IAED;;OAEG;IACH,YAAY,CAAC,KAAa;QACxB,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,aAAa,EAAE,CAAC;IAC3C,CAAC;IAEO,cAAc,CAAC,IAAY;QACjC,OAAO,IAAI;aACR,OAAO,CAAC,oBAAoB,EAAE,EAAE,CAAC;aACjC,OAAO,CAAC,aAAa,EAAE,EAAE,CAAC;aAC1B,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC,CAAC;IACrC,CAAC;IAEO,UAAU,CAAC,IAAY;QAC7B,KAAK,MAAM,EAAE,OAAO,EAAE,IAAI,cAAc,EAAE,CAAC;YACzC,IAAI,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC;gBAAE,OAAO,OAAO,CAAC;QACzC,CAAC;QAED,KAAK,MAAM,EAAE,OAAO,EAAE,IAAI,uBAAuB,EAAE,CAAC;YAClD,IAAI,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC;gBAAE,OAAO,SAAS,CAAC;QAC3C,CAAC;QAED,IAAI,IAAI,CAAC,MAAM,CAAC,wBAAwB,EAAE,CAAC;YACzC,KAAK,MAAM,EAAE,OAAO,EAAE,IAAI,qBAAqB,EAAE,CAAC;gBAChD,IAAI,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC;oBAAE,OAAO,YAAY,CAAC;YAC9C,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAEO,aAAa,CAAC,IAAY,EAAE,QAAsB;QACxD,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;QAE7D,OAAO,IAAI,YAAY,CACrB,OAAO,EACP,QAAQ,KAAK,MAAM,EAAO,WAAW;QACrC,QAAQ,KAAK,SAAS,EAAI,gBAAgB;QAC1C,QAAQ,KAAK,OAAO,CAAM,WAAW;SACtC,CAAC;IACJ,CAAC;CACF;AAED,eAAe,eAAe,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"footers.d.ts","sourceRoot":"","sources":["../../src/patterns/footers.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAErD;;GAEG;AACH,eAAO,MAAM,qBAAqB,EAAE,iBAAiB,EAoCpD,CAAC;AAEF,eAAe,qBAAqB,CAAC"}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Detectors for mailing list and newsletter footers
|
|
3
|
+
*/
|
|
4
|
+
export const MAILING_LIST_PATTERNS = [
|
|
5
|
+
{
|
|
6
|
+
pattern: /^You received this (message|email) because/i,
|
|
7
|
+
description: 'Mailing list attribution',
|
|
8
|
+
example: 'You received this message because you are subscribed...',
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
pattern: /^To (unsubscribe|stop receiving)/i,
|
|
12
|
+
description: 'Unsubscribe instruction',
|
|
13
|
+
example: 'To unsubscribe, click here',
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
pattern: /^To view this (discussion|thread)/i,
|
|
17
|
+
description: 'Web view link',
|
|
18
|
+
example: 'To view this discussion online...',
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
pattern: /^(Manage|Update) your (subscription|preferences)/i,
|
|
22
|
+
description: 'Preferences link',
|
|
23
|
+
example: 'Manage your subscription settings',
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
pattern: /^This email was sent to\s/i,
|
|
27
|
+
description: 'Recipient notice',
|
|
28
|
+
example: 'This email was sent to user@example.com',
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
pattern: /^(Click|Tap) here to unsubscribe/i,
|
|
32
|
+
description: 'Unsubscribe CTA',
|
|
33
|
+
example: 'Click here to unsubscribe',
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
pattern: /^If you (no longer|don't) (wish|want) to receive/i,
|
|
37
|
+
description: 'Opt-out notice',
|
|
38
|
+
example: "If you no longer wish to receive these emails...",
|
|
39
|
+
},
|
|
40
|
+
];
|
|
41
|
+
export default MAILING_LIST_PATTERNS;
|
|
42
|
+
//# sourceMappingURL=footers.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"footers.js","sourceRoot":"","sources":["../../src/patterns/footers.ts"],"names":[],"mappings":"AAEA;;GAEG;AACH,MAAM,CAAC,MAAM,qBAAqB,GAAwB;IACxD;QACE,OAAO,EAAE,6CAA6C;QACtD,WAAW,EAAE,0BAA0B;QACvC,OAAO,EAAE,yDAAyD;KACnE;IACD;QACE,OAAO,EAAE,mCAAmC;QAC5C,WAAW,EAAE,yBAAyB;QACtC,OAAO,EAAE,4BAA4B;KACtC;IACD;QACE,OAAO,EAAE,oCAAoC;QAC7C,WAAW,EAAE,eAAe;QAC5B,OAAO,EAAE,mCAAmC;KAC7C;IACD;QACE,OAAO,EAAE,mDAAmD;QAC5D,WAAW,EAAE,kBAAkB;QAC/B,OAAO,EAAE,mCAAmC;KAC7C;IACD;QACE,OAAO,EAAE,4BAA4B;QACrC,WAAW,EAAE,kBAAkB;QAC/B,OAAO,EAAE,yCAAyC;KACnD;IACD;QACE,OAAO,EAAE,mCAAmC;QAC5C,WAAW,EAAE,iBAAiB;QAC9B,OAAO,EAAE,2BAA2B;KACrC;IACD;QACE,OAAO,EAAE,mDAAmD;QAC5D,WAAW,EAAE,gBAAgB;QAC7B,OAAO,EAAE,kDAAkD;KAC5D;CACF,CAAC;AAEF,eAAe,qBAAqB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/patterns/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,IAAI,cAAc,EAAE,MAAM,aAAa,CAAC;AACxD,OAAO,EAAE,OAAO,IAAI,uBAAuB,EAAE,MAAM,iBAAiB,CAAC;AACrE,OAAO,EAAE,OAAO,IAAI,qBAAqB,EAAE,MAAM,cAAc,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/patterns/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,IAAI,cAAc,EAAE,MAAM,aAAa,CAAC;AACxD,OAAO,EAAE,OAAO,IAAI,uBAAuB,EAAE,MAAM,iBAAiB,CAAC;AACrE,OAAO,EAAE,OAAO,IAAI,qBAAqB,EAAE,MAAM,cAAc,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"quotes.d.ts","sourceRoot":"","sources":["../../src/patterns/quotes.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAErD;;GAEG;AACH,eAAO,MAAM,cAAc,EAAE,iBAAiB,EAoC7C,CAAC;AAEF,eAAe,cAAc,CAAC"}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Detectors for quoted/forwarded email content
|
|
3
|
+
*/
|
|
4
|
+
export const QUOTE_PATTERNS = [
|
|
5
|
+
{
|
|
6
|
+
pattern: /^>+/,
|
|
7
|
+
description: 'Traditional quote prefix',
|
|
8
|
+
example: '> previous message text',
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
pattern: /^On\s.+\swrote:$/i,
|
|
12
|
+
description: 'Reply attribution line',
|
|
13
|
+
example: 'On March 17, 2025, John Smith wrote:',
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
pattern: /^-+\s*Original Message\s*-+/i,
|
|
17
|
+
description: 'Forwarded message header',
|
|
18
|
+
example: '--- Original Message ---',
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
pattern: /^From:.+Sent:.+$/i,
|
|
22
|
+
description: 'Email metadata header',
|
|
23
|
+
example: 'From: sender@mail.com Sent: Monday 3:00 PM',
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
pattern: /From:.+Date:.+To:.+Subject:/i,
|
|
27
|
+
description: 'Concatenated email headers',
|
|
28
|
+
example: 'From: a@b.comDate: Jan 1To: c@d.comSubject: Hi',
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
pattern: /^_{8,}$/,
|
|
32
|
+
description: 'Underscore divider',
|
|
33
|
+
example: '________________',
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
pattern: /^[\u200b-\u200f\u202a-\u202e]/,
|
|
37
|
+
description: 'Unicode control character divider',
|
|
38
|
+
example: '(invisible characters)',
|
|
39
|
+
},
|
|
40
|
+
];
|
|
41
|
+
export default QUOTE_PATTERNS;
|
|
42
|
+
//# sourceMappingURL=quotes.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"quotes.js","sourceRoot":"","sources":["../../src/patterns/quotes.ts"],"names":[],"mappings":"AAEA;;GAEG;AACH,MAAM,CAAC,MAAM,cAAc,GAAwB;IACjD;QACE,OAAO,EAAE,KAAK;QACd,WAAW,EAAE,0BAA0B;QACvC,OAAO,EAAE,yBAAyB;KACnC;IACD;QACE,OAAO,EAAE,mBAAmB;QAC5B,WAAW,EAAE,wBAAwB;QACrC,OAAO,EAAE,sCAAsC;KAChD;IACD;QACE,OAAO,EAAE,8BAA8B;QACvC,WAAW,EAAE,0BAA0B;QACvC,OAAO,EAAE,0BAA0B;KACpC;IACD;QACE,OAAO,EAAE,mBAAmB;QAC5B,WAAW,EAAE,uBAAuB;QACpC,OAAO,EAAE,4CAA4C;KACtD;IACD;QACE,OAAO,EAAE,8BAA8B;QACvC,WAAW,EAAE,4BAA4B;QACzC,OAAO,EAAE,gDAAgD;KAC1D;IACD;QACE,OAAO,EAAE,SAAS;QAClB,WAAW,EAAE,oBAAoB;QACjC,OAAO,EAAE,kBAAkB;KAC5B;IACD;QACE,OAAO,EAAE,+BAA+B;QACxC,WAAW,EAAE,mCAAmC;QAChD,OAAO,EAAE,wBAAwB;KAClC;CACF,CAAC;AAEF,eAAe,cAAc,CAAC"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { PatternDefinition } from '../types.js';
|
|
2
|
+
/**
|
|
3
|
+
* Detectors for auto-generated signatures (NOT human sign-offs)
|
|
4
|
+
*/
|
|
5
|
+
export declare const AUTO_SIGNATURE_PATTERNS: PatternDefinition[];
|
|
6
|
+
export default AUTO_SIGNATURE_PATTERNS;
|
|
7
|
+
//# sourceMappingURL=signatures.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"signatures.d.ts","sourceRoot":"","sources":["../../src/patterns/signatures.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAErD;;GAEG;AACH,eAAO,MAAM,uBAAuB,EAAE,iBAAiB,EAyCtD,CAAC;AAEF,eAAe,uBAAuB,CAAC"}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Detectors for auto-generated signatures (NOT human sign-offs)
|
|
3
|
+
*/
|
|
4
|
+
export const AUTO_SIGNATURE_PATTERNS = [
|
|
5
|
+
{
|
|
6
|
+
pattern: /^--$/,
|
|
7
|
+
description: 'RFC signature delimiter',
|
|
8
|
+
example: '--',
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
pattern: /^Sent from my\s/i,
|
|
12
|
+
description: 'Mobile device tagline',
|
|
13
|
+
example: 'Sent from my iPhone',
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
pattern: /^Get Outlook for\s/i,
|
|
17
|
+
description: 'Email client promotion',
|
|
18
|
+
example: 'Get Outlook for Android',
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
pattern: /^Sent (via|with)\s/i,
|
|
22
|
+
description: 'Third-party app tagline',
|
|
23
|
+
example: 'Sent via Superhuman',
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
pattern: /BOOK A MEETING/i,
|
|
27
|
+
description: 'Calendar booking link',
|
|
28
|
+
example: 'BOOK A MEETING: https://...',
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
pattern: /^=+$/,
|
|
32
|
+
description: 'Equals sign divider',
|
|
33
|
+
example: '========',
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
pattern: /^(CONFIDENTIAL|DISCLAIMER|NOTICE):/i,
|
|
37
|
+
description: 'Legal notice header',
|
|
38
|
+
example: 'CONFIDENTIAL: This email...',
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
pattern: /confidential.*intended.*recipient/i,
|
|
42
|
+
description: 'Legal boilerplate text',
|
|
43
|
+
example: '...confidential and intended solely for the recipient...',
|
|
44
|
+
},
|
|
45
|
+
];
|
|
46
|
+
export default AUTO_SIGNATURE_PATTERNS;
|
|
47
|
+
//# sourceMappingURL=signatures.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"signatures.js","sourceRoot":"","sources":["../../src/patterns/signatures.ts"],"names":[],"mappings":"AAEA;;GAEG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAwB;IAC1D;QACE,OAAO,EAAE,MAAM;QACf,WAAW,EAAE,yBAAyB;QACtC,OAAO,EAAE,IAAI;KACd;IACD;QACE,OAAO,EAAE,kBAAkB;QAC3B,WAAW,EAAE,uBAAuB;QACpC,OAAO,EAAE,qBAAqB;KAC/B;IACD;QACE,OAAO,EAAE,qBAAqB;QAC9B,WAAW,EAAE,wBAAwB;QACrC,OAAO,EAAE,yBAAyB;KACnC;IACD;QACE,OAAO,EAAE,qBAAqB;QAC9B,WAAW,EAAE,yBAAyB;QACtC,OAAO,EAAE,qBAAqB;KAC/B;IACD;QACE,OAAO,EAAE,iBAAiB;QAC1B,WAAW,EAAE,uBAAuB;QACpC,OAAO,EAAE,6BAA6B;KACvC;IACD;QACE,OAAO,EAAE,MAAM;QACf,WAAW,EAAE,qBAAqB;QAClC,OAAO,EAAE,UAAU;KACpB;IACD;QACE,OAAO,EAAE,qCAAqC;QAC9C,WAAW,EAAE,qBAAqB;QAClC,OAAO,EAAE,6BAA6B;KACvC;IACD;QACE,OAAO,EAAE,oCAAoC;QAC7C,WAAW,EAAE,wBAAwB;QACrC,OAAO,EAAE,0DAA0D;KACpE;CACF,CAAC;AAEF,eAAe,uBAAuB,CAAC"}
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Email fragment types
|
|
3
|
+
*/
|
|
4
|
+
export interface EmailFragment {
|
|
5
|
+
content: string;
|
|
6
|
+
isHidden: boolean;
|
|
7
|
+
isSignature: boolean;
|
|
8
|
+
isQuoted: boolean;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Parsed email with fragments
|
|
12
|
+
*/
|
|
13
|
+
export interface ParsedEmail {
|
|
14
|
+
fragments: EmailFragment[];
|
|
15
|
+
getVisibleText(): string;
|
|
16
|
+
getQuotedText(): string;
|
|
17
|
+
getFragments(): EmailFragment[];
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Pattern definition with metadata for debugging/documentation
|
|
21
|
+
*/
|
|
22
|
+
export interface PatternDefinition {
|
|
23
|
+
pattern: RegExp;
|
|
24
|
+
description: string;
|
|
25
|
+
example: string;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Parser options
|
|
29
|
+
*/
|
|
30
|
+
export interface ParserOptions {
|
|
31
|
+
/** Keep human signatures (default: true) */
|
|
32
|
+
keepSignatures?: boolean;
|
|
33
|
+
/** Remove legal disclaimers (default: true) */
|
|
34
|
+
removeDisclaimers?: boolean;
|
|
35
|
+
/** Remove mailing list footers (default: true) */
|
|
36
|
+
removeMailingListFooters?: boolean;
|
|
37
|
+
}
|
|
38
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,OAAO,CAAC;IAClB,WAAW,EAAE,OAAO,CAAC;IACrB,QAAQ,EAAE,OAAO,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,SAAS,EAAE,aAAa,EAAE,CAAC;IAC3B,cAAc,IAAI,MAAM,CAAC;IACzB,aAAa,IAAI,MAAM,CAAC;IACxB,YAAY,IAAI,aAAa,EAAE,CAAC;CACjC;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,4CAA4C;IAC5C,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,+CAA+C;IAC/C,iBAAiB,CAAC,EAAE,OAAO,CAAC;IAC5B,kDAAkD;IAClD,wBAAwB,CAAC,EAAE,OAAO,CAAC;CACpC"}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":""}
|
package/package.json
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "email-body-parser",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Parse and clean email content - removes quotes, auto-signatures, and mailing list footers while preserving human signatures",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"module": "dist/index.js",
|
|
7
|
+
"types": "dist/index.d.ts",
|
|
8
|
+
"type": "module",
|
|
9
|
+
"exports": {
|
|
10
|
+
".": {
|
|
11
|
+
"types": "./dist/index.d.ts",
|
|
12
|
+
"import": "./dist/index.js",
|
|
13
|
+
"require": "./dist/index.cjs"
|
|
14
|
+
}
|
|
15
|
+
},
|
|
16
|
+
"files": [
|
|
17
|
+
"dist"
|
|
18
|
+
],
|
|
19
|
+
"scripts": {
|
|
20
|
+
"build": "tsc",
|
|
21
|
+
"test": "node --experimental-vm-modules node_modules/jest/bin/jest.js",
|
|
22
|
+
"prepublishOnly": "npm run build"
|
|
23
|
+
},
|
|
24
|
+
"keywords": [
|
|
25
|
+
"email",
|
|
26
|
+
"parser",
|
|
27
|
+
"email-parser",
|
|
28
|
+
"email-reply",
|
|
29
|
+
"email-content",
|
|
30
|
+
"email-cleaner",
|
|
31
|
+
"quote-removal",
|
|
32
|
+
"signature-removal"
|
|
33
|
+
],
|
|
34
|
+
"author": {
|
|
35
|
+
"name": "Pinenlime",
|
|
36
|
+
"url": "https://pinenlime.com"
|
|
37
|
+
},
|
|
38
|
+
"license": "MIT",
|
|
39
|
+
"repository": {
|
|
40
|
+
"type": "git",
|
|
41
|
+
"url": "git+https://github.com/Pine-Lime/email-body-parser.git"
|
|
42
|
+
},
|
|
43
|
+
"bugs": {
|
|
44
|
+
"url": "https://github.com/Pine-Lime/email-body-parser/issues"
|
|
45
|
+
},
|
|
46
|
+
"homepage": "https://pinenlime.com",
|
|
47
|
+
"funding": {
|
|
48
|
+
"type": "website",
|
|
49
|
+
"url": "https://pinenlime.com"
|
|
50
|
+
},
|
|
51
|
+
"engines": {
|
|
52
|
+
"node": ">=18.0.0"
|
|
53
|
+
},
|
|
54
|
+
"devDependencies": {
|
|
55
|
+
"@types/jest": "^29.5.12",
|
|
56
|
+
"@types/node": "^20.11.0",
|
|
57
|
+
"jest": "^29.7.0",
|
|
58
|
+
"ts-jest": "^29.1.2",
|
|
59
|
+
"typescript": "^5.3.3"
|
|
60
|
+
},
|
|
61
|
+
"peerDependencies": {
|
|
62
|
+
"re2": "^1.20.0"
|
|
63
|
+
},
|
|
64
|
+
"peerDependenciesMeta": {
|
|
65
|
+
"re2": {
|
|
66
|
+
"optional": true
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|