equoter 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,24 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Wicher Heldring
4
+
5
+ Based on quotequail (https://github.com/closeio/quotequail)
6
+ Copyright (c) 2015 Elastic Inc. (Close.io)
7
+
8
+ Permission is hereby granted, free of charge, to any person obtaining a copy
9
+ of this software and associated documentation files (the "Software"), to deal
10
+ in the Software without restriction, including without limitation the rights
11
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12
+ copies of the Software, and to permit persons to whom the Software is
13
+ furnished to do so, subject to the following conditions:
14
+
15
+ The above copyright notice and this permission notice shall be included in all
16
+ copies or substantial portions of the Software.
17
+
18
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,157 @@
1
+ # equoter
2
+
3
+ > **Note:** This codebase was mostly written by AI (Claude), with human direction and review.
4
+ >
5
+ > There is an additional private test set based on real emails (not included in this repo for privacy reasons).
6
+
7
+ A TypeScript library that identifies and separates quoted text in email messages. Detects replies, forwards, and quoted blocks across multiple email clients and languages.
8
+
9
+ TypeScript port of [quotequail](https://github.com/closeio/quotequail), with additional improvements inspired by [Mailgun's Talon](https://github.com/mailgun/talon).
10
+
11
+ ## Features
12
+
13
+ - **Plain text & HTML** — works with both `text/plain` and `text/html` email bodies
14
+ - **Client-specific detection** — fast-path selectors for Gmail (`div.gmail_quote`), Outlook (`#divRplyFwdMsg`, border styles), Outlook Web App (`#OLK_SRC_BODY_SECTION`), Zimbra (`hr[data-marker]`), and more
15
+ - **Two-phase HTML detection** — tries client-specific CSS selectors first, falls back to line-based pattern matching
16
+ - **Multi-language** — English, Dutch, German, French, Spanish, Russian, Swedish, Portuguese
17
+ - **Email clients** — Gmail, Apple Mail, Outlook (2003–2013, Web, iOS), Thunderbird, Mail.ru, Zimbra, Sparrow
18
+ - **Lightweight** — single runtime dependency ([linkedom](https://github.com/WebReflection/linkedom))
19
+
20
+ ## Install
21
+
22
+ ```bash
23
+ pnpm add equoter
24
+ # or
25
+ npm install equoter
26
+ ```
27
+
28
+ ## Usage
29
+
30
+ ### `quote(text, options?)`
31
+
32
+ Split a plain text email body into quoted and unquoted parts.
33
+
34
+ ```ts
35
+ import { quote } from "equoter";
36
+
37
+ const result = quote(`Hello world.
38
+
39
+ On 2012-10-16 at 17:02, Someone <someone@example.com> wrote:
40
+
41
+ > Some quoted text`);
42
+
43
+ // [
44
+ // [true, "Hello world.\n\nOn 2012-10-16 at 17:02, Someone <someone@example.com> wrote:"],
45
+ // [false, "\n> Some quoted text"]
46
+ // ]
47
+ ```
48
+
49
+ Each tuple is `[shouldExpand, text]` — `true` means original content, `false` means quoted.
50
+
51
+ **Options:**
52
+
53
+ | Option | Default | Description |
54
+ |--------|---------|-------------|
55
+ | `limit` | `1000` | Auto-quote everything after this many lines if no pattern is found |
56
+ | `quoteIntroLine` | `false` | Include the "On ... wrote:" line in the quoted portion |
57
+
58
+ ### `quoteHtml(html, options?)`
59
+
60
+ Same as `quote()` but for HTML email bodies. Uses a two-phase approach:
61
+
62
+ 1. **Phase 1** — Try client-specific CSS selectors (Gmail, Outlook, Zimbra, etc.) for fast, reliable detection
63
+ 2. **Phase 2** — Fall back to line-based pattern matching
64
+
65
+ ```ts
66
+ import { quoteHtml } from "equoter";
67
+
68
+ const result = quoteHtml(`<div>Reply text</div>
69
+ <blockquote>On Jan 1, Someone wrote:<br>Original message</blockquote>`);
70
+ ```
71
+
72
+ Accepts the same options as `quote()`. Returns `[shouldExpand, htmlFragment][]`.
73
+
74
+ ### `unwrap(text)`
75
+
76
+ Decompose a plain text email into structured parts.
77
+
78
+ ```ts
79
+ import { unwrap } from "equoter";
80
+
81
+ const result = unwrap(`Hello
82
+
83
+ ---------- Forwarded message ----------
84
+ From: Someone <someone@example.com>
85
+ Date: Fri, Apr 26, 2013 at 8:13 PM
86
+ Subject: Weekend classes
87
+ To: recipient@example.com
88
+
89
+ Learn something new`);
90
+
91
+ // {
92
+ // type: "forward",
93
+ // text_top: "Hello",
94
+ // from: "Someone <someone@example.com>",
95
+ // date: "Fri, Apr 26, 2013 at 8:13 PM",
96
+ // subject: "Weekend classes",
97
+ // to: "recipient@example.com",
98
+ // text: "Learn something new"
99
+ // }
100
+ ```
101
+
102
+ Returns `null` if no forwarded/replied/quoted structure is detected.
103
+
104
+ **Return fields:**
105
+
106
+ | Field | Description |
107
+ |-------|-------------|
108
+ | `type` | `"reply"`, `"forward"`, or `"quote"` |
109
+ | `text_top` / `text_bottom` | Text before/after the quoted content |
110
+ | `text` | The unwrapped message body |
111
+ | `from`, `to`, `cc`, `bcc`, `subject`, `date`, `reply-to` | Parsed headers (when present) |
112
+
113
+ ### `unwrapHtml(html)`
114
+
115
+ Same as `unwrap()` but for HTML email bodies. Returns `html_top`, `html_bottom`, and `html` instead of their `text_` equivalents.
116
+
117
+ ```ts
118
+ import { unwrapHtml } from "equoter";
119
+
120
+ const result = unwrapHtml(htmlEmailString);
121
+
122
+ // {
123
+ // type: "forward",
124
+ // html_top: "<div>Some intro text</div>",
125
+ // from: "Someone <someone@example.com>",
126
+ // subject: "The subject",
127
+ // html: "<div>The forwarded content</div>"
128
+ // }
129
+ ```
130
+
131
+ ## Supported reply patterns
132
+
133
+ | Language | Pattern |
134
+ |----------|---------|
135
+ | English | `On [date], [name] wrote:` |
136
+ | Dutch | `Op [date] schreef [name]:` / `[name] schreef op [date]:` / `Op [date] heeft [name] het volgende geschreven:` |
137
+ | German | `Am [date] schrieb [name]:` |
138
+ | French | `Le [date] a écrit :` |
139
+ | Spanish | `El [date] escribió:` |
140
+ | Russian | `[name] написал(а):` |
141
+ | Swedish | `Den [date] skrev [name]:` |
142
+ | Portuguese | `Em [date] escreveu:` |
143
+
144
+ ## Differences from quotequail
145
+
146
+ - Function names use camelCase: `quote_html` -> `quoteHtml`, `unwrap_html` -> `unwrapHtml`
147
+ - Options are passed as an object: `quote(text, { limit: 500, quoteIntroLine: true })`
148
+ - Uses [linkedom](https://github.com/WebReflection/linkedom) for HTML parsing instead of lxml
149
+ - Two-phase HTML detection with client-specific CSS selectors (inspired by Talon)
150
+ - Additional Dutch reply patterns (`schreef op`, `heeft ... het volgende geschreven:`)
151
+ - Outlook `#divRplyFwdMsg`, Outlook Web App, Zimbra, and Windows Mail detection
152
+
153
+ ## License
154
+
155
+ MIT — see [LICENSE](LICENSE).
156
+
157
+ Based on [quotequail](https://github.com/closeio/quotequail) by Elastic Inc. (Close.io), also MIT licensed.
@@ -0,0 +1,5 @@
1
+ export declare enum Position {
2
+ Begin = "begin",
3
+ End = "end"
4
+ }
5
+ //# sourceMappingURL=enums.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"enums.d.ts","sourceRoot":"","sources":["../src/enums.ts"],"names":[],"mappings":"AAAA,oBAAY,QAAQ;IAClB,KAAK,UAAU;IACf,GAAG,QAAQ;CACZ"}
package/dist/html.d.ts ADDED
@@ -0,0 +1,45 @@
1
+ import { Position } from "./enums.js";
2
+ export type ElementRef = [Element, Position];
3
+ /**
4
+ * Trim a slice tuple so it starts/ends at non-empty lines.
5
+ */
6
+ export declare function trimSlice(lines: string[], sliceTuple: [number | null, number | null] | null): [number, number] | null;
7
+ /**
8
+ * Remove the outermost blockquote indentation by replacing it with a div.
9
+ */
10
+ export declare function unindentTree(element: Element): void;
11
+ /**
12
+ * Get line info arrays from an element.
13
+ */
14
+ export declare function getLineInfo(tree: Element, maxLines?: number | null): [ElementRef[], ElementRef[], string[]];
15
+ /**
16
+ * Parse an HTML string into a DOM tree and return the root element.
17
+ */
18
+ export declare function getHtmlTree(html: string): Element;
19
+ /**
20
+ * Render an element tree back to HTML, stripping the wrapper div.
21
+ */
22
+ export declare function renderHtmlTree(tree: Element): string;
23
+ /**
24
+ * Slice the HTML tree at the given range.
25
+ */
26
+ export declare function sliceTree(tree: Element, startRefs: (ElementRef | null)[], endRefs: (ElementRef | null)[], sliceTuple: [number | null, number | null] | null, htmlCopy?: string): Element;
27
+ /**
28
+ * Try to find quoted content using client-specific HTML selectors.
29
+ * Returns the element that starts the quoted section, or null if none found.
30
+ *
31
+ * Tries all heuristics and returns whichever match appears earliest
32
+ * in the document. This handles cases where e.g. Outlook wraps a Gmail
33
+ * thread — the Outlook separator comes first in the document even though
34
+ * the Gmail class is also present deeper in.
35
+ *
36
+ * Heuristics:
37
+ * - Outlook Web App: #OLK_SRC_BODY_SECTION
38
+ * - Outlook desktop/mobile: #divRplyFwdMsg
39
+ * - Outlook border styles: div with known forward CSS
40
+ * - Zimbra: hr[data-marker="__DIVIDER__"]
41
+ * - Gmail: div.gmail_quote or div.x_gmail_quote
42
+ * - Last non-nested blockquote (not .gmail_quote)
43
+ */
44
+ export declare function findClientSpecificQuote(tree: Element): Element | null;
45
+ //# sourceMappingURL=html.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"html.d.ts","sourceRoot":"","sources":["../src/html.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAGtC,MAAM,MAAM,UAAU,GAAG,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;AA0H7C;;GAEG;AACH,wBAAgB,SAAS,CACvB,KAAK,EAAE,MAAM,EAAE,EACf,UAAU,EAAE,CAAC,MAAM,GAAG,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC,GAAG,IAAI,GAChD,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,IAAI,CAiBzB;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAsBnD;AAiND;;GAEG;AACH,wBAAgB,WAAW,CACzB,IAAI,EAAE,OAAO,EACb,QAAQ,GAAE,MAAM,GAAG,IAAW,GAC7B,CAAC,UAAU,EAAE,EAAE,UAAU,EAAE,EAAE,MAAM,EAAE,CAAC,CAexC;AAED;;GAEG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAGjD;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,OAAO,GAAG,MAAM,CAGpD;AAED;;GAEG;AACH,wBAAgB,SAAS,CACvB,IAAI,EAAE,OAAO,EACb,SAAS,EAAE,CAAC,UAAU,GAAG,IAAI,CAAC,EAAE,EAChC,OAAO,EAAE,CAAC,UAAU,GAAG,IAAI,CAAC,EAAE,EAC9B,UAAU,EAAE,CAAC,MAAM,GAAG,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC,GAAG,IAAI,EACjD,QAAQ,CAAC,EAAE,MAAM,GAChB,OAAO,CAsET;AAkCD;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAgB,uBAAuB,CAAC,IAAI,EAAE,OAAO,GAAG,OAAO,GAAG,IAAI,CA8CrE"}