equoter 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +24 -0
- package/README.md +157 -0
- package/dist/enums.d.ts +5 -0
- package/dist/enums.d.ts.map +1 -0
- package/dist/html.d.ts +45 -0
- package/dist/html.d.ts.map +1 -0
- package/dist/index.cjs +918 -0
- package/dist/index.d.ts +64 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.mjs +913 -0
- package/dist/internal.d.ts +46 -0
- package/dist/internal.d.ts.map +1 -0
- package/dist/patterns.d.ts +23 -0
- package/dist/patterns.d.ts.map +1 -0
- package/package.json +49 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Wicher Heldring
|
|
4
|
+
|
|
5
|
+
Based on quotequail (https://github.com/closeio/quotequail)
|
|
6
|
+
Copyright (c) 2015 Elastic Inc. (Close.io)
|
|
7
|
+
|
|
8
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
9
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
10
|
+
in the Software without restriction, including without limitation the rights
|
|
11
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
12
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
13
|
+
furnished to do so, subject to the following conditions:
|
|
14
|
+
|
|
15
|
+
The above copyright notice and this permission notice shall be included in all
|
|
16
|
+
copies or substantial portions of the Software.
|
|
17
|
+
|
|
18
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
19
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
20
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
21
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
22
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
23
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
24
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
# equoter
|
|
2
|
+
|
|
3
|
+
> **Note:** This codebase was mostly written by AI (Claude), with human direction and review.
|
|
4
|
+
>
|
|
5
|
+
> There is an additional private test set based on real emails (not included in this repo for privacy reasons).
|
|
6
|
+
|
|
7
|
+
A TypeScript library that identifies and separates quoted text in email messages. Detects replies, forwards, and quoted blocks across multiple email clients and languages.
|
|
8
|
+
|
|
9
|
+
TypeScript port of [quotequail](https://github.com/closeio/quotequail), with additional improvements inspired by [Mailgun's Talon](https://github.com/mailgun/talon).
|
|
10
|
+
|
|
11
|
+
## Features
|
|
12
|
+
|
|
13
|
+
- **Plain text & HTML** — works with both `text/plain` and `text/html` email bodies
|
|
14
|
+
- **Client-specific detection** — fast-path selectors for Gmail (`div.gmail_quote`), Outlook (`#divRplyFwdMsg`, border styles), Outlook Web App (`#OLK_SRC_BODY_SECTION`), Zimbra (`hr[data-marker]`), and more
|
|
15
|
+
- **Two-phase HTML detection** — tries client-specific CSS selectors first, falls back to line-based pattern matching
|
|
16
|
+
- **Multi-language** — English, Dutch, German, French, Spanish, Russian, Swedish, Portuguese
|
|
17
|
+
- **Email clients** — Gmail, Apple Mail, Outlook (2003–2013, Web, iOS), Thunderbird, Mail.ru, Zimbra, Sparrow
|
|
18
|
+
- **Lightweight** — single runtime dependency ([linkedom](https://github.com/WebReflection/linkedom))
|
|
19
|
+
|
|
20
|
+
## Install
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pnpm add equoter
|
|
24
|
+
# or
|
|
25
|
+
npm install equoter
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Usage
|
|
29
|
+
|
|
30
|
+
### `quote(text, options?)`
|
|
31
|
+
|
|
32
|
+
Split a plain text email body into quoted and unquoted parts.
|
|
33
|
+
|
|
34
|
+
```ts
|
|
35
|
+
import { quote } from "equoter";
|
|
36
|
+
|
|
37
|
+
const result = quote(`Hello world.
|
|
38
|
+
|
|
39
|
+
On 2012-10-16 at 17:02, Someone <someone@example.com> wrote:
|
|
40
|
+
|
|
41
|
+
> Some quoted text`);
|
|
42
|
+
|
|
43
|
+
// [
|
|
44
|
+
// [true, "Hello world.\n\nOn 2012-10-16 at 17:02, Someone <someone@example.com> wrote:"],
|
|
45
|
+
// [false, "\n> Some quoted text"]
|
|
46
|
+
// ]
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Each tuple is `[shouldExpand, text]` — `true` means original content, `false` means quoted.
|
|
50
|
+
|
|
51
|
+
**Options:**
|
|
52
|
+
|
|
53
|
+
| Option | Default | Description |
|
|
54
|
+
|--------|---------|-------------|
|
|
55
|
+
| `limit` | `1000` | Auto-quote everything after this many lines if no pattern is found |
|
|
56
|
+
| `quoteIntroLine` | `false` | Include the "On ... wrote:" line in the quoted portion |
|
|
57
|
+
|
|
58
|
+
### `quoteHtml(html, options?)`
|
|
59
|
+
|
|
60
|
+
Same as `quote()` but for HTML email bodies. Uses a two-phase approach:
|
|
61
|
+
|
|
62
|
+
1. **Phase 1** — Try client-specific CSS selectors (Gmail, Outlook, Zimbra, etc.) for fast, reliable detection
|
|
63
|
+
2. **Phase 2** — Fall back to line-based pattern matching
|
|
64
|
+
|
|
65
|
+
```ts
|
|
66
|
+
import { quoteHtml } from "equoter";
|
|
67
|
+
|
|
68
|
+
const result = quoteHtml(`<div>Reply text</div>
|
|
69
|
+
<blockquote>On Jan 1, Someone wrote:<br>Original message</blockquote>`);
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Accepts the same options as `quote()`. Returns `[shouldExpand, htmlFragment][]`.
|
|
73
|
+
|
|
74
|
+
### `unwrap(text)`
|
|
75
|
+
|
|
76
|
+
Decompose a plain text email into structured parts.
|
|
77
|
+
|
|
78
|
+
```ts
|
|
79
|
+
import { unwrap } from "equoter";
|
|
80
|
+
|
|
81
|
+
const result = unwrap(`Hello
|
|
82
|
+
|
|
83
|
+
---------- Forwarded message ----------
|
|
84
|
+
From: Someone <someone@example.com>
|
|
85
|
+
Date: Fri, Apr 26, 2013 at 8:13 PM
|
|
86
|
+
Subject: Weekend classes
|
|
87
|
+
To: recipient@example.com
|
|
88
|
+
|
|
89
|
+
Learn something new`);
|
|
90
|
+
|
|
91
|
+
// {
|
|
92
|
+
// type: "forward",
|
|
93
|
+
// text_top: "Hello",
|
|
94
|
+
// from: "Someone <someone@example.com>",
|
|
95
|
+
// date: "Fri, Apr 26, 2013 at 8:13 PM",
|
|
96
|
+
// subject: "Weekend classes",
|
|
97
|
+
// to: "recipient@example.com",
|
|
98
|
+
// text: "Learn something new"
|
|
99
|
+
// }
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Returns `null` if no forwarded/replied/quoted structure is detected.
|
|
103
|
+
|
|
104
|
+
**Return fields:**
|
|
105
|
+
|
|
106
|
+
| Field | Description |
|
|
107
|
+
|-------|-------------|
|
|
108
|
+
| `type` | `"reply"`, `"forward"`, or `"quote"` |
|
|
109
|
+
| `text_top` / `text_bottom` | Text before/after the quoted content |
|
|
110
|
+
| `text` | The unwrapped message body |
|
|
111
|
+
| `from`, `to`, `cc`, `bcc`, `subject`, `date`, `reply-to` | Parsed headers (when present) |
|
|
112
|
+
|
|
113
|
+
### `unwrapHtml(html)`
|
|
114
|
+
|
|
115
|
+
Same as `unwrap()` but for HTML email bodies. Returns `html_top`, `html_bottom`, and `html` instead of their `text_` equivalents.
|
|
116
|
+
|
|
117
|
+
```ts
|
|
118
|
+
import { unwrapHtml } from "equoter";
|
|
119
|
+
|
|
120
|
+
const result = unwrapHtml(htmlEmailString);
|
|
121
|
+
|
|
122
|
+
// {
|
|
123
|
+
// type: "forward",
|
|
124
|
+
// html_top: "<div>Some intro text</div>",
|
|
125
|
+
// from: "Someone <someone@example.com>",
|
|
126
|
+
// subject: "The subject",
|
|
127
|
+
// html: "<div>The forwarded content</div>"
|
|
128
|
+
// }
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## Supported reply patterns
|
|
132
|
+
|
|
133
|
+
| Language | Pattern |
|
|
134
|
+
|----------|---------|
|
|
135
|
+
| English | `On [date], [name] wrote:` |
|
|
136
|
+
| Dutch | `Op [date] schreef [name]:` / `[name] schreef op [date]:` / `Op [date] heeft [name] het volgende geschreven:` |
|
|
137
|
+
| German | `Am [date] schrieb [name]:` |
|
|
138
|
+
| French | `Le [date] a écrit :` |
|
|
139
|
+
| Spanish | `El [date] escribió:` |
|
|
140
|
+
| Russian | `[name] написал(а):` |
|
|
141
|
+
| Swedish | `Den [date] skrev [name]:` |
|
|
142
|
+
| Portuguese | `Em [date] escreveu:` |
|
|
143
|
+
|
|
144
|
+
## Differences from quotequail
|
|
145
|
+
|
|
146
|
+
- Function names use camelCase: `quote_html` -> `quoteHtml`, `unwrap_html` -> `unwrapHtml`
|
|
147
|
+
- Options are passed as an object: `quote(text, { limit: 500, quoteIntroLine: true })`
|
|
148
|
+
- Uses [linkedom](https://github.com/WebReflection/linkedom) for HTML parsing instead of lxml
|
|
149
|
+
- Two-phase HTML detection with client-specific CSS selectors (inspired by Talon)
|
|
150
|
+
- Additional Dutch reply patterns (`schreef op`, `heeft ... het volgende geschreven:`)
|
|
151
|
+
- Outlook `#divRplyFwdMsg`, Outlook Web App, Zimbra, and Windows Mail detection
|
|
152
|
+
|
|
153
|
+
## License
|
|
154
|
+
|
|
155
|
+
MIT — see [LICENSE](LICENSE).
|
|
156
|
+
|
|
157
|
+
Based on [quotequail](https://github.com/closeio/quotequail) by Elastic Inc. (Close.io), also MIT licensed.
|
package/dist/enums.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"enums.d.ts","sourceRoot":"","sources":["../src/enums.ts"],"names":[],"mappings":"AAAA,oBAAY,QAAQ;IAClB,KAAK,UAAU;IACf,GAAG,QAAQ;CACZ"}
|
package/dist/html.d.ts
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import { Position } from "./enums.js";
|
|
2
|
+
export type ElementRef = [Element, Position];
|
|
3
|
+
/**
|
|
4
|
+
* Trim a slice tuple so it starts/ends at non-empty lines.
|
|
5
|
+
*/
|
|
6
|
+
export declare function trimSlice(lines: string[], sliceTuple: [number | null, number | null] | null): [number, number] | null;
|
|
7
|
+
/**
|
|
8
|
+
* Remove the outermost blockquote indentation by replacing it with a div.
|
|
9
|
+
*/
|
|
10
|
+
export declare function unindentTree(element: Element): void;
|
|
11
|
+
/**
|
|
12
|
+
* Get line info arrays from an element.
|
|
13
|
+
*/
|
|
14
|
+
export declare function getLineInfo(tree: Element, maxLines?: number | null): [ElementRef[], ElementRef[], string[]];
|
|
15
|
+
/**
|
|
16
|
+
* Parse an HTML string into a DOM tree and return the root element.
|
|
17
|
+
*/
|
|
18
|
+
export declare function getHtmlTree(html: string): Element;
|
|
19
|
+
/**
|
|
20
|
+
* Render an element tree back to HTML, stripping the wrapper div.
|
|
21
|
+
*/
|
|
22
|
+
export declare function renderHtmlTree(tree: Element): string;
|
|
23
|
+
/**
|
|
24
|
+
* Slice the HTML tree at the given range.
|
|
25
|
+
*/
|
|
26
|
+
export declare function sliceTree(tree: Element, startRefs: (ElementRef | null)[], endRefs: (ElementRef | null)[], sliceTuple: [number | null, number | null] | null, htmlCopy?: string): Element;
|
|
27
|
+
/**
|
|
28
|
+
* Try to find quoted content using client-specific HTML selectors.
|
|
29
|
+
* Returns the element that starts the quoted section, or null if none found.
|
|
30
|
+
*
|
|
31
|
+
* Tries all heuristics and returns whichever match appears earliest
|
|
32
|
+
* in the document. This handles cases where e.g. Outlook wraps a Gmail
|
|
33
|
+
* thread — the Outlook separator comes first in the document even though
|
|
34
|
+
* the Gmail class is also present deeper in.
|
|
35
|
+
*
|
|
36
|
+
* Heuristics:
|
|
37
|
+
* - Outlook Web App: #OLK_SRC_BODY_SECTION
|
|
38
|
+
* - Outlook desktop/mobile: #divRplyFwdMsg
|
|
39
|
+
* - Outlook border styles: div with known forward CSS
|
|
40
|
+
* - Zimbra: hr[data-marker="__DIVIDER__"]
|
|
41
|
+
* - Gmail: div.gmail_quote or div.x_gmail_quote
|
|
42
|
+
* - Last non-nested blockquote (not .gmail_quote)
|
|
43
|
+
*/
|
|
44
|
+
export declare function findClientSpecificQuote(tree: Element): Element | null;
|
|
45
|
+
//# sourceMappingURL=html.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"html.d.ts","sourceRoot":"","sources":["../src/html.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAGtC,MAAM,MAAM,UAAU,GAAG,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;AA0H7C;;GAEG;AACH,wBAAgB,SAAS,CACvB,KAAK,EAAE,MAAM,EAAE,EACf,UAAU,EAAE,CAAC,MAAM,GAAG,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC,GAAG,IAAI,GAChD,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,IAAI,CAiBzB;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAsBnD;AAiND;;GAEG;AACH,wBAAgB,WAAW,CACzB,IAAI,EAAE,OAAO,EACb,QAAQ,GAAE,MAAM,GAAG,IAAW,GAC7B,CAAC,UAAU,EAAE,EAAE,UAAU,EAAE,EAAE,MAAM,EAAE,CAAC,CAexC;AAED;;GAEG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAGjD;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,OAAO,GAAG,MAAM,CAGpD;AAED;;GAEG;AACH,wBAAgB,SAAS,CACvB,IAAI,EAAE,OAAO,EACb,SAAS,EAAE,CAAC,UAAU,GAAG,IAAI,CAAC,EAAE,EAChC,OAAO,EAAE,CAAC,UAAU,GAAG,IAAI,CAAC,EAAE,EAC9B,UAAU,EAAE,CAAC,MAAM,GAAG,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC,GAAG,IAAI,EACjD,QAAQ,CAAC,EAAE,MAAM,GAChB,OAAO,CAsET;AAkCD;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAgB,uBAAuB,CAAC,IAAI,EAAE,OAAO,GAAG,OAAO,GAAG,IAAI,CA8CrE"}
|