equoter 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs ADDED
@@ -0,0 +1,913 @@
1
+ import { parseHTML } from "linkedom";
2
+ //#region src/enums.ts
3
+ let Position = /* @__PURE__ */ function(Position) {
4
+ Position["Begin"] = "begin";
5
+ Position["End"] = "end";
6
+ return Position;
7
+ }({});
8
+ //#endregion
9
+ //#region src/patterns.ts
10
+ const REPLY_PATTERNS = [
11
+ /^On (.*) wrote:$/,
12
+ /^Am (.*) schrieb (.*):$/,
13
+ /^Le (.*) a écrit :$/,
14
+ /El (.*) escribió:$/,
15
+ /^(.*) написал\(а\):$/,
16
+ /^Den (.*) skrev (.*):$/,
17
+ /^Em (.*) escreveu:$/,
18
+ /^Op (.*) schreef (.*):$/,
19
+ /^(.*) schreef op (.*):$/,
20
+ /^Op (.*) heeft (.*) het volgende geschreven:$/,
21
+ /([0-9]{4}\/[0-9]{1,2}\/[0-9]{1,2}) (.* <.*@.*>)$/
22
+ ];
23
+ const REPLY_DATE_SPLIT_REGEX = /^(.*(:[0-9]{2}( [apAP]\.?[mM]\.?)?)), (.*)?$/;
24
+ const FORWARD_MESSAGES = [
25
+ "Begin forwarded message",
26
+ "Anfang der weitergeleiteten E-Mail",
27
+ "Début du message réexpédié",
28
+ "Inicio del mensaje reenviado",
29
+ "Forwarded [mM]essage",
30
+ "Mensaje reenviado",
31
+ "Vidarebefordrat meddelande",
32
+ "Original [mM]essage",
33
+ "Ursprüngliche Nachricht",
34
+ "Mensaje [oO]riginal",
35
+ "Message transféré",
36
+ "Пересылаемое сообщение",
37
+ "Oorspronkelijk bericht",
38
+ "Doorgestuurd bericht"
39
+ ];
40
+ const FORWARD_LINE = "________________________________";
41
+ const FORWARD_PATTERNS = [
42
+ new RegExp(`^${FORWARD_LINE}$`),
43
+ ...FORWARD_MESSAGES.map((p) => new RegExp(`^---+ ?${p} ?---+$`)),
44
+ ...FORWARD_MESSAGES.map((p) => new RegExp(`^${p}:$`))
45
+ ];
46
+ const FORWARD_STYLES = [/^border:none;border-top:solid #[0-9a-fA-F]{6} 1\.0pt;padding:3\.0pt 0(in|cm) 0(in|cm) 0(in|cm)$/u, /padding-top:\s*5px;\s*border-top-color:\s*rgb\(229,\s*229,\s*229\);\s*border-top-width:\s*1px;\s*border-top-style:\s*solid/u];
47
+ const HEADER_RE = /^\*?([-\p{L}\p{N}_ ]+):\*?(.*)$/u;
48
+ const HEADER_MAP = {
49
+ from: "from",
50
+ von: "from",
51
+ de: "from",
52
+ "от кого": "from",
53
+ från: "from",
54
+ van: "from",
55
+ to: "to",
56
+ an: "to",
57
+ aan: "to",
58
+ para: "to",
59
+ à: "to",
60
+ pour: "to",
61
+ кому: "to",
62
+ till: "to",
63
+ cc: "cc",
64
+ kopie: "cc",
65
+ kopia: "cc",
66
+ bcc: "bcc",
67
+ cco: "bcc",
68
+ blindkopie: "bcc",
69
+ "reply-to": "reply-to",
70
+ "antwort an": "reply-to",
71
+ "répondre à": "reply-to",
72
+ "responder a": "reply-to",
73
+ date: "date",
74
+ sent: "date",
75
+ received: "date",
76
+ datum: "date",
77
+ gesendet: "date",
78
+ "enviado el": "date",
79
+ enviados: "date",
80
+ fecha: "date",
81
+ дата: "date",
82
+ verzonden: "date",
83
+ subject: "subject",
84
+ betreff: "subject",
85
+ asunto: "subject",
86
+ objet: "subject",
87
+ sujet: "subject",
88
+ тема: "subject",
89
+ ämne: "subject",
90
+ onderwerp: "subject"
91
+ };
92
+ const COMPILED_PATTERN_MAP = {
93
+ reply: REPLY_PATTERNS,
94
+ forward: FORWARD_PATTERNS
95
+ };
96
+ const MULTIPLE_WHITESPACE_RE = /\s+/g;
97
+ //#endregion
98
+ //#region src/html.ts
99
+ const INLINE_TAGS = new Set([
100
+ "a",
101
+ "b",
102
+ "em",
103
+ "i",
104
+ "strong",
105
+ "span",
106
+ "font",
107
+ "q",
108
+ "object",
109
+ "bdo",
110
+ "sub",
111
+ "sup",
112
+ "center",
113
+ "td",
114
+ "th"
115
+ ]);
116
+ function isText(node) {
117
+ return node.nodeType === 3;
118
+ }
119
+ function removeNode(node) {
120
+ node.parentNode?.removeChild(node);
121
+ }
122
+ /**
123
+ * Remove the document tree following the given element. If includeElement
124
+ * is true, the given element is kept; otherwise it is removed.
125
+ *
126
+ * Mirrors lxml's approach: at each ancestor level, remove all nodes
127
+ * (elements and text) that come after the reference point.
128
+ */
129
+ function trimTreeAfter(element, includeElement = true) {
130
+ let current = element;
131
+ let isFirst = true;
132
+ while (true) {
133
+ const parent = current.parentElement;
134
+ if (!parent) break;
135
+ let cutAfter;
136
+ if (isFirst && !includeElement) {
137
+ cutAfter = current.previousSibling;
138
+ removeNode(current);
139
+ } else cutAfter = current;
140
+ if (cutAfter) while (cutAfter.nextSibling) removeNode(cutAfter.nextSibling);
141
+ else while (parent.firstChild) removeNode(parent.firstChild);
142
+ current = parent;
143
+ isFirst = false;
144
+ }
145
+ }
146
+ /**
147
+ * Remove the document tree preceding the given element. If includeElement
148
+ * is true, the given element is kept; otherwise it is removed.
149
+ */
150
+ function trimTreeBefore(element, includeElement = true, keepHead = true) {
151
+ let current = element;
152
+ let isFirst = true;
153
+ while (true) {
154
+ const parent = current.parentElement;
155
+ if (!parent) break;
156
+ let cutBefore;
157
+ if (isFirst && !includeElement) {
158
+ cutBefore = current.nextSibling;
159
+ removeNode(current);
160
+ } else cutBefore = current;
161
+ if (cutBefore) while (cutBefore.previousSibling) {
162
+ const node = cutBefore.previousSibling;
163
+ if (!(keepHead && node.nodeType === 1 && node.tagName?.toLowerCase() === "head")) removeNode(node);
164
+ else break;
165
+ }
166
+ else while (parent.firstChild) {
167
+ const node = parent.firstChild;
168
+ if (!(keepHead && node.nodeType === 1 && node.tagName?.toLowerCase() === "head")) removeNode(node);
169
+ else break;
170
+ }
171
+ current = parent;
172
+ isFirst = false;
173
+ }
174
+ }
175
+ function isIndentationElement(element) {
176
+ return element.tagName?.toLowerCase() === "blockquote";
177
+ }
178
+ /**
179
+ * Trim a slice tuple so it starts/ends at non-empty lines.
180
+ */
181
+ function trimSlice(lines, sliceTuple) {
182
+ const empty = (line) => !line || line.trim() === ">";
183
+ if (!sliceTuple) return null;
184
+ let [sliceStart, sliceEnd] = sliceTuple;
185
+ if (sliceStart === null) sliceStart = 0;
186
+ if (sliceEnd === null) sliceEnd = lines.length;
187
+ while (sliceStart < sliceEnd && empty(lines[sliceStart])) sliceStart++;
188
+ while (sliceEnd > sliceStart && empty(lines[sliceEnd - 1])) sliceEnd--;
189
+ return [sliceStart, sliceEnd];
190
+ }
191
+ /**
192
+ * Remove the outermost blockquote indentation by replacing it with a div.
193
+ */
194
+ function unindentTree(element) {
195
+ const walker = element.ownerDocument.createTreeWalker(element, 1);
196
+ let node = walker.currentNode;
197
+ while (node) {
198
+ if (isIndentationElement(node)) {
199
+ while (node.attributes.length > 0) node.removeAttribute(node.attributes[0].name);
200
+ const div = node.ownerDocument.createElement("div");
201
+ while (node.firstChild) div.appendChild(node.firstChild);
202
+ node.replaceWith(div);
203
+ return;
204
+ }
205
+ node = walker.nextNode();
206
+ }
207
+ }
208
+ /**
209
+ * Yield tokens for the given HTML element.
210
+ */
211
+ function* treeTokenGenerator(el, indentationLevel = 0) {
212
+ if (!el.tagName) return;
213
+ const isIndentation = isIndentationElement(el);
214
+ if (isIndentation) indentationLevel++;
215
+ yield {
216
+ type: "element",
217
+ element: el,
218
+ position: Position.Begin,
219
+ indentationLevel
220
+ };
221
+ const firstText = getDirectText(el);
222
+ if (firstText) yield {
223
+ type: "text",
224
+ text: firstText
225
+ };
226
+ for (const child of Array.from(el.children)) {
227
+ const prevText = getTextBefore(child);
228
+ if (prevText) yield {
229
+ type: "text",
230
+ text: prevText
231
+ };
232
+ yield* treeTokenGenerator(child, indentationLevel);
233
+ }
234
+ if (isIndentation) indentationLevel--;
235
+ yield {
236
+ type: "element",
237
+ element: el,
238
+ position: Position.End,
239
+ indentationLevel
240
+ };
241
+ const tailText = getTailText(el);
242
+ if (tailText) yield {
243
+ type: "text",
244
+ text: tailText
245
+ };
246
+ }
247
+ /**
248
+ * Get text directly inside an element (before its first child).
249
+ */
250
+ function getDirectText(el) {
251
+ let text = "";
252
+ for (const child of Array.from(el.childNodes)) if (isText(child)) text += child.textContent ?? "";
253
+ else break;
254
+ return text || null;
255
+ }
256
+ /**
257
+ * Get text between the previous sibling and this element.
258
+ */
259
+ function getTextBefore(el) {
260
+ let prev = el.previousSibling;
261
+ const textNodes = [];
262
+ while (prev && isText(prev)) {
263
+ textNodes.unshift(prev);
264
+ prev = prev.previousSibling;
265
+ }
266
+ if (el.previousElementSibling) return null;
267
+ return null;
268
+ }
269
+ /**
270
+ * Get text after the closing tag of this element (tail text).
271
+ */
272
+ function getTailText(el) {
273
+ let text = "";
274
+ let next = el.nextSibling;
275
+ while (next && isText(next)) {
276
+ text += next.textContent ?? "";
277
+ next = next.nextSibling;
278
+ }
279
+ return text || null;
280
+ }
281
+ /**
282
+ * Iterate through a DOM tree and yield line information.
283
+ * Lines are blocks of text separated by <br> or block elements.
284
+ */
285
+ function* treeLineGenerator(el, maxLines = null) {
286
+ const trimSpaces = (text) => text.replace(MULTIPLE_WHITESPACE_RE, " ").trim();
287
+ let counter = 1;
288
+ if (maxLines !== null && counter > maxLines) return;
289
+ let line = "";
290
+ let startRef = null;
291
+ let startIndentationLevel = 0;
292
+ for (const token of treeTokenGenerator(el)) {
293
+ if (token === null) continue;
294
+ if (token.type === "element") {
295
+ const tagName = token.element.tagName.toLowerCase();
296
+ const lineBreak = tagName === "br" && token.position === Position.Begin;
297
+ const isBlock = !INLINE_TAGS.has(tagName);
298
+ const style = token.element.getAttribute("style");
299
+ const isForward = isBlock && token.position === Position.Begin && style !== null && FORWARD_STYLES.some((re) => re.test(style));
300
+ if (isBlock || lineBreak) {
301
+ line = trimSpaces(line);
302
+ if (line || lineBreak || isForward) {
303
+ const endRef = [token.element, token.position];
304
+ yield {
305
+ startRef,
306
+ endRef,
307
+ indentationLevel: startIndentationLevel,
308
+ text: line
309
+ };
310
+ counter++;
311
+ if (maxLines !== null && counter > maxLines) return;
312
+ line = "";
313
+ if (isForward) {
314
+ yield {
315
+ startRef: endRef,
316
+ endRef,
317
+ indentationLevel: startIndentationLevel,
318
+ text: FORWARD_LINE
319
+ };
320
+ counter++;
321
+ if (maxLines !== null && counter > maxLines) return;
322
+ }
323
+ }
324
+ if (!line) {
325
+ startRef = [token.element, token.position];
326
+ startIndentationLevel = token.indentationLevel;
327
+ }
328
+ }
329
+ } else if (token.type === "text") line += token.text;
330
+ }
331
+ }
332
+ /**
333
+ * Like treeLineGenerator but yields lines with "> " prepended for indentation.
334
+ */
335
+ function* indentedTreeLineGenerator(el, maxLines = null) {
336
+ for (const { startRef, endRef, indentationLevel, text } of treeLineGenerator(el, maxLines)) {
337
+ const fullLine = text.startsWith(">") ? "\\" + text : text;
338
+ yield [
339
+ startRef,
340
+ endRef,
341
+ "> ".repeat(indentationLevel) + fullLine
342
+ ];
343
+ }
344
+ }
345
+ /**
346
+ * Get line info arrays from an element.
347
+ */
348
+ function getLineInfo(tree, maxLines = null) {
349
+ const startRefs = [];
350
+ const endRefs = [];
351
+ const lines = [];
352
+ for (const [startRef, endRef, line] of indentedTreeLineGenerator(tree, maxLines)) {
353
+ startRefs.push(startRef);
354
+ endRefs.push(endRef);
355
+ lines.push(line);
356
+ }
357
+ return [
358
+ startRefs,
359
+ endRefs,
360
+ lines
361
+ ];
362
+ }
363
+ /**
364
+ * Parse an HTML string into a DOM tree and return the root element.
365
+ */
366
+ function getHtmlTree(html) {
367
+ const { document } = parseHTML(`<div>${html}</div>`);
368
+ return document.querySelector("div");
369
+ }
370
+ /**
371
+ * Render an element tree back to HTML, stripping the wrapper div.
372
+ */
373
+ function renderHtmlTree(tree) {
374
+ return tree.innerHTML.trim();
375
+ }
376
+ /**
377
+ * Slice the HTML tree at the given range.
378
+ */
379
+ function sliceTree(tree, startRefs, endRefs, sliceTuple, htmlCopy) {
380
+ let startRef = null;
381
+ let endRef = null;
382
+ let sliceStart = null;
383
+ let sliceEnd = null;
384
+ if (sliceTuple) {
385
+ [sliceStart, sliceEnd] = sliceTuple;
386
+ if (sliceStart !== null && sliceStart >= startRefs.length || sliceEnd !== null && sliceEnd <= 0) return getHtmlTree("");
387
+ if (sliceStart !== null && sliceStart <= 0) sliceStart = null;
388
+ if (sliceEnd !== null && sliceEnd >= startRefs.length) sliceEnd = null;
389
+ }
390
+ if (sliceStart !== null) startRef = startRefs[sliceStart];
391
+ if (sliceEnd !== null && sliceEnd < endRefs.length) endRef = endRefs[sliceEnd - 1];
392
+ let newTree;
393
+ if (htmlCopy !== void 0) {
394
+ newTree = getHtmlTree(htmlCopy);
395
+ if (startRef) {
396
+ const path = getElementPath(tree, startRef[0]);
397
+ const newEl = resolveElementPath(newTree, path);
398
+ if (newEl) startRef = [newEl, startRef[1]];
399
+ }
400
+ if (endRef) {
401
+ const path = getElementPath(tree, endRef[0]);
402
+ const newEl = resolveElementPath(newTree, path);
403
+ if (newEl) endRef = [newEl, endRef[1]];
404
+ }
405
+ } else newTree = tree;
406
+ const includeStart = startRef ? startRef[1] === Position.Begin : false;
407
+ const includeEnd = endRef ? endRef[1] === Position.End : false;
408
+ if (startRef && endRef && startRef[0] === endRef[0] && (!includeStart || !includeEnd)) return getHtmlTree("");
409
+ if (startRef) trimTreeBefore(startRef[0], includeStart);
410
+ if (endRef) trimTreeAfter(endRef[0], includeEnd);
411
+ return newTree;
412
+ }
413
+ /**
414
+ * Compute a path from root to element as an array of child indices.
415
+ */
416
+ function getElementPath(root, target) {
417
+ const path = [];
418
+ let current = target;
419
+ while (current && current !== root) {
420
+ const parent = current.parentElement;
421
+ if (!parent) break;
422
+ const index = Array.from(parent.children).indexOf(current);
423
+ path.unshift(index);
424
+ current = parent;
425
+ }
426
+ return path;
427
+ }
428
+ /**
429
+ * Resolve an element path from root.
430
+ */
431
+ function resolveElementPath(root, path) {
432
+ let current = root;
433
+ for (const index of path) {
434
+ const children = Array.from(current.children);
435
+ if (index >= children.length) return null;
436
+ current = children[index];
437
+ }
438
+ return current;
439
+ }
440
+ /**
441
+ * Try to find quoted content using client-specific HTML selectors.
442
+ * Returns the element that starts the quoted section, or null if none found.
443
+ *
444
+ * Tries all heuristics and returns whichever match appears earliest
445
+ * in the document. This handles cases where e.g. Outlook wraps a Gmail
446
+ * thread — the Outlook separator comes first in the document even though
447
+ * the Gmail class is also present deeper in.
448
+ *
449
+ * Heuristics:
450
+ * - Outlook Web App: #OLK_SRC_BODY_SECTION
451
+ * - Outlook desktop/mobile: #divRplyFwdMsg
452
+ * - Outlook border styles: div with known forward CSS
453
+ * - Zimbra: hr[data-marker="__DIVIDER__"]
454
+ * - Gmail: div.gmail_quote or div.x_gmail_quote
455
+ * - Last non-nested blockquote (not .gmail_quote)
456
+ */
457
+ function findClientSpecificQuote(tree) {
458
+ const candidates = [];
459
+ const olkSection = tree.querySelector("#OLK_SRC_BODY_SECTION");
460
+ if (olkSection) candidates.push(olkSection);
461
+ const divRplyFwd = tree.querySelector("#divRplyFwdMsg");
462
+ if (divRplyFwd) candidates.push(divRplyFwd);
463
+ const allDivs = tree.querySelectorAll("div[style]");
464
+ for (const div of Array.from(allDivs)) {
465
+ const style = div.getAttribute("style") ?? "";
466
+ if (FORWARD_STYLES.some((re) => re.test(style))) {
467
+ candidates.push(div);
468
+ break;
469
+ }
470
+ }
471
+ const zimbraHr = tree.querySelector("hr[data-marker=\"__DIVIDER__\"]");
472
+ if (zimbraHr) candidates.push(zimbraHr);
473
+ const gmailQuote = tree.querySelector("div.gmail_quote, div.x_gmail_quote");
474
+ if (gmailQuote) candidates.push(gmailQuote);
475
+ const blockquotes = tree.querySelectorAll("blockquote:not(.gmail_quote):not(blockquote blockquote)");
476
+ if (blockquotes.length > 0) candidates.push(blockquotes[blockquotes.length - 1]);
477
+ if (candidates.length === 0) return null;
478
+ if (candidates.length === 1) return candidates[0];
479
+ return candidates.reduce((earliest, current) => {
480
+ return earliest.compareDocumentPosition(current) & 4 ? earliest : current;
481
+ });
482
+ }
483
+ //#endregion
484
+ //#region src/internal.ts
485
+ /**
486
+ * Find a forward/reply pattern within the given lines on the given
487
+ * line number. Returns [lineNumber, type] or null.
488
+ */
489
+ function findPatternOnLine(lines, n, maxWrapLines, position) {
490
+ for (const [typ, regexes] of Object.entries(COMPILED_PATTERN_MAP)) for (const regex of regexes) for (let m = 0; m < maxWrapLines; m++) {
491
+ let matchLine = joinWrappedLines(lines.slice(n, n + 1 + m));
492
+ if (matchLine.startsWith(">")) matchLine = matchLine.slice(1).trim();
493
+ if (!matchLine) break;
494
+ if (regex.test(matchLine.trim())) switch (position) {
495
+ case Position.Begin: return [n, typ];
496
+ case Position.End: return [n + m, typ];
497
+ }
498
+ }
499
+ return null;
500
+ }
501
+ /**
502
+ * Return the beginning or ending line number of a quoting pattern.
503
+ */
504
+ function findQuotePosition(lines, maxWrapLines, limit = null, position = Position.End) {
505
+ for (let n = 0; n < lines.length; n++) {
506
+ const result = findPatternOnLine(lines, n, maxWrapLines, position);
507
+ if (result) return result[0];
508
+ if (limit !== null && n >= limit - 1) return n;
509
+ }
510
+ return null;
511
+ }
512
+ /**
513
+ * Join one or multiple lines that wrapped.
514
+ */
515
+ function joinWrappedLines(lines) {
516
+ if (lines.length === 1) return lines[0];
517
+ let joined = lines[0];
518
+ for (let i = 1; i < lines.length; i++) if (joined && "<([{\"'".includes(joined[joined.length - 1])) joined += lines[i];
519
+ else joined += " " + lines[i];
520
+ return joined;
521
+ }
522
+ /**
523
+ * Extract email headers from the given lines. Returns [headers, linesProcessed].
524
+ */
525
+ function extractHeaders(lines, maxWrapLines) {
526
+ const hdrs = {};
527
+ let headerName = null;
528
+ let extendLines = 0;
529
+ let linesProcessed = 0;
530
+ for (let n = 0; n < lines.length; n++) {
531
+ const line = lines[n];
532
+ if (!line.trim()) {
533
+ headerName = null;
534
+ continue;
535
+ }
536
+ const match = HEADER_RE.exec(line);
537
+ HEADER_RE.lastIndex = 0;
538
+ if (match) {
539
+ headerName = match[1].trim().toLowerCase();
540
+ const headerValue = match[2];
541
+ extendLines = 0;
542
+ if (headerName in HEADER_MAP) hdrs[HEADER_MAP[headerName]] = headerValue.trim();
543
+ linesProcessed = n + 1;
544
+ } else {
545
+ extendLines++;
546
+ if (extendLines < maxWrapLines && headerName !== null && headerName in HEADER_MAP) {
547
+ hdrs[HEADER_MAP[headerName]] = joinWrappedLines([hdrs[HEADER_MAP[headerName]], line.trim()]);
548
+ linesProcessed = n + 1;
549
+ } else break;
550
+ }
551
+ }
552
+ return [hdrs, linesProcessed];
553
+ }
554
+ /**
555
+ * Parse a reply line ("On DATE, USER wrote:") and return { date, from } or null.
556
+ */
557
+ function parseReply(line) {
558
+ if (line.startsWith(">")) line = line.slice(1).trim();
559
+ let date;
560
+ let user;
561
+ for (const pattern of COMPILED_PATTERN_MAP["reply"]) {
562
+ const match = pattern.exec(line);
563
+ if (match) {
564
+ const groups = match.slice(1);
565
+ if (groups.length === 2) {
566
+ date = groups[0];
567
+ user = groups[1];
568
+ } else {
569
+ const splitMatch = REPLY_DATE_SPLIT_REGEX.exec(groups[0]);
570
+ if (splitMatch) {
571
+ const splitGroups = splitMatch.slice(1);
572
+ date = splitGroups[0];
573
+ user = splitGroups[splitGroups.length - 1];
574
+ } else {
575
+ const splitIdx = groups[0].lastIndexOf(",");
576
+ if (splitIdx !== -1) {
577
+ date = groups[0].slice(0, splitIdx);
578
+ user = groups[0].slice(splitIdx + 1);
579
+ }
580
+ }
581
+ }
582
+ }
583
+ }
584
+ if (date) date = date.trim();
585
+ if (user) user = user.trim();
586
+ if (date && user) return {
587
+ date: date.trim(),
588
+ from: user.trim()
589
+ };
590
+ return null;
591
+ }
592
+ /**
593
+ * Find the starting point of a wrapped email.
594
+ * Returns [startLine, endLine, type] or null.
595
+ */
596
+ function findUnwrapStart(lines, maxWrapLines, minHeaderLines, minQuotedLines) {
597
+ for (let n = 0; n < lines.length; n++) {
598
+ const line = lines[n];
599
+ if (!line.trim()) continue;
600
+ const result = findPatternOnLine(lines, n, maxWrapLines, Position.End);
601
+ if (result) {
602
+ const [end, typ] = result;
603
+ return [
604
+ n,
605
+ end,
606
+ typ
607
+ ];
608
+ }
609
+ if (line.startsWith(">")) {
610
+ let matchedLines = 1;
611
+ if (matchedLines >= minQuotedLines) return [
612
+ n,
613
+ n,
614
+ "quoted"
615
+ ];
616
+ for (let i = n + 1; i < lines.length; i++) {
617
+ const peekLine = lines[i];
618
+ if (!peekLine.trim()) continue;
619
+ if (!peekLine.startsWith(">")) break;
620
+ matchedLines++;
621
+ if (matchedLines >= minQuotedLines) return [
622
+ n,
623
+ n,
624
+ "quoted"
625
+ ];
626
+ }
627
+ }
628
+ const headerMatch = HEADER_RE.exec(line);
629
+ HEADER_RE.lastIndex = 0;
630
+ if (headerMatch) {
631
+ const [hdrs] = extractHeaders(lines.slice(n), maxWrapLines);
632
+ if (Object.keys(hdrs).length >= minHeaderLines) return [
633
+ n,
634
+ n,
635
+ "headers"
636
+ ];
637
+ }
638
+ }
639
+ return null;
640
+ }
641
+ /**
642
+ * Unindent lines by stripping leading "> " or ">".
643
+ */
644
+ function unindentLines(lines) {
645
+ const unquoted = [];
646
+ for (const line of lines) if (line.startsWith("> ")) unquoted.push(line.slice(2));
647
+ else if (line.startsWith(">")) unquoted.push(line.slice(1));
648
+ else break;
649
+ return unquoted;
650
+ }
651
+ /**
652
+ * Core unwrap function. Returns the unwrap result tuple or null.
653
+ */
654
+ function unwrap$1(lines, maxWrapLines, minHeaderLines, minQuotedLines) {
655
+ let headers = {};
656
+ const result = findUnwrapStart(lines, maxWrapLines, minHeaderLines, minQuotedLines);
657
+ if (!result) return null;
658
+ const [start, end, typ] = result;
659
+ if (typ === "forward" || typ === "reply") {
660
+ const mainType = typ;
661
+ if (typ === "reply") {
662
+ const replyHeaders = parseReply(joinWrappedLines(lines.slice(start, end + 1)));
663
+ if (replyHeaders) Object.assign(headers, replyHeaders);
664
+ }
665
+ const result2 = findUnwrapStart(lines.slice(end + 1), maxWrapLines, minHeaderLines, 1);
666
+ const start2 = result2 ? result2[0] : 0;
667
+ const typ2 = result2 ? result2[2] : null;
668
+ if (typ2 === "quoted") {
669
+ const quotedStart = end + 1 + start2;
670
+ const unquoted = unindentLines(lines.slice(quotedStart));
671
+ const restStart = quotedStart + unquoted.length;
672
+ const result3 = findUnwrapStart(unquoted, maxWrapLines, minHeaderLines, minQuotedLines);
673
+ const start3 = result3 ? result3[0] : 0;
674
+ if ((result3 ? result3[2] : null) === "headers") {
675
+ const [hdrs, hdrsLength] = extractHeaders(unquoted.slice(start3), maxWrapLines);
676
+ if (Object.keys(hdrs).length > 0) Object.assign(headers, hdrs);
677
+ const rest2Start = quotedStart + start3 + hdrsLength;
678
+ return [
679
+ mainType,
680
+ [0, start],
681
+ headers,
682
+ [rest2Start, restStart],
683
+ [restStart, null],
684
+ true
685
+ ];
686
+ }
687
+ return [
688
+ mainType,
689
+ [0, start],
690
+ headers,
691
+ [quotedStart, restStart],
692
+ [restStart, null],
693
+ true
694
+ ];
695
+ }
696
+ if (typ2 === "headers") {
697
+ const [hdrs, hdrsLength] = extractHeaders(lines.slice(start + 1), maxWrapLines);
698
+ if (Object.keys(hdrs).length > 0) Object.assign(headers, hdrs);
699
+ const restStart = start + 1 + hdrsLength;
700
+ return [
701
+ mainType,
702
+ [0, start],
703
+ headers,
704
+ [restStart, null],
705
+ null,
706
+ false
707
+ ];
708
+ }
709
+ return [
710
+ mainType,
711
+ [0, start],
712
+ headers,
713
+ [start + (start2 || 0) + 1, null],
714
+ null,
715
+ false
716
+ ];
717
+ }
718
+ if (typ === "headers") {
719
+ const mainType = "forward";
720
+ const [hdrs, hdrsLength] = extractHeaders(lines.slice(start), maxWrapLines);
721
+ const restStart = start + hdrsLength;
722
+ return [
723
+ mainType,
724
+ [0, start],
725
+ hdrs,
726
+ [restStart, null],
727
+ null,
728
+ false
729
+ ];
730
+ }
731
+ if (typ === "quoted") {
732
+ const unquoted = unindentLines(lines.slice(start));
733
+ const restStart = start + unquoted.length;
734
+ const result2 = findUnwrapStart(unquoted, maxWrapLines, minHeaderLines, minQuotedLines);
735
+ const start2 = result2 ? result2[0] : 0;
736
+ if ((result2 ? result2[2] : null) === "headers") {
737
+ const mainType = "forward";
738
+ const [hdrs, hdrsLength] = extractHeaders(unquoted.slice(start2), maxWrapLines);
739
+ const rest2Start = start + hdrsLength;
740
+ return [
741
+ mainType,
742
+ [0, start],
743
+ hdrs,
744
+ [rest2Start, restStart],
745
+ [restStart, null],
746
+ true
747
+ ];
748
+ }
749
+ return [
750
+ "quote",
751
+ [null, start],
752
+ null,
753
+ [start, restStart],
754
+ [restStart, null],
755
+ true
756
+ ];
757
+ }
758
+ throw new Error(`invalid type: ${typ}`);
759
+ }
760
+ //#endregion
761
+ //#region src/index.ts
762
+ /**
763
+ * Divide email body into quoted parts.
764
+ *
765
+ * @param text - Plain text message.
766
+ * @param options.limit - If set, the text will automatically be quoted starting at the
767
+ * line where the limit is reached.
768
+ * @param options.quoteIntroLine - Whether the line introducing the quoted text ("On ...
769
+ * wrote:" / "Begin forwarded message:") should be part of the quoted text.
770
+ * @returns List of tuples [shouldExpand, textSegment].
771
+ */
772
+ function quote(text, options = {}) {
773
+ const { limit = 1e3, quoteIntroLine = false } = options;
774
+ const lines = text.split("\n");
775
+ const found = findQuotePosition(lines, 2, limit, quoteIntroLine ? Position.Begin : Position.End);
776
+ if (found === null) return [[true, text]];
777
+ const splitIdx = quoteIntroLine ? found : found + 1;
778
+ return [[true, lines.slice(0, splitIdx).join("\n")], [false, lines.slice(splitIdx).join("\n")]];
779
+ }
780
+ /**
781
+ * Like quote(), but takes an HTML message as an argument.
782
+ *
783
+ * Uses a two-phase approach inspired by Mailgun's Talon:
784
+ * 1. Try client-specific CSS selectors (Gmail, Zimbra, OWA, Outlook styles)
785
+ * 2. Fall back to line-based pattern matching
786
+ */
787
+ function quoteHtml(html, options = {}) {
788
+ const { limit = 1e3, quoteIntroLine = false } = options;
789
+ const tree = getHtmlTree(html);
790
+ const clientQuote = findClientSpecificQuote(tree);
791
+ if (clientQuote) {
792
+ const [startRefs, endRefs, lines] = getLineInfo(tree, limit + 1);
793
+ const quoteLineIdx = findQuoteSplitIndex(clientQuote, startRefs, endRefs);
794
+ if (quoteLineIdx !== null && quoteLineIdx > 0) {
795
+ const searchStart = Math.max(0, quoteLineIdx - 3);
796
+ const searchEnd = quoteLineIdx + 1;
797
+ const found = findQuotePosition(lines.slice(searchStart, searchEnd), 1, null, quoteIntroLine ? Position.Begin : Position.End);
798
+ let splitIdx;
799
+ if (found !== null) {
800
+ const effectiveFound = found + searchStart;
801
+ splitIdx = quoteIntroLine ? effectiveFound : effectiveFound + 1;
802
+ } else splitIdx = quoteLineIdx;
803
+ if (splitIdx > 0 && splitIdx <= lines.length) {
804
+ const startTree = sliceTree(tree, startRefs, endRefs, [0, splitIdx], html);
805
+ const endTree = sliceTree(tree, startRefs, endRefs, [splitIdx, null]);
806
+ return [[true, renderHtmlTree(startTree)], [false, renderHtmlTree(endTree)]];
807
+ }
808
+ }
809
+ }
810
+ const tree2 = getHtmlTree(html);
811
+ const [startRefs, endRefs, lines] = getLineInfo(tree2, limit + 1);
812
+ const found = findQuotePosition(lines, 1, limit, quoteIntroLine ? Position.Begin : Position.End);
813
+ if (found === null) return [[true, renderHtmlTree(tree2)]];
814
+ const splitIdx = quoteIntroLine ? found : found + 1;
815
+ const startTree = sliceTree(tree2, startRefs, endRefs, [0, splitIdx], html);
816
+ const endTree = sliceTree(tree2, startRefs, endRefs, [splitIdx, null]);
817
+ return [[true, renderHtmlTree(startTree)], [false, renderHtmlTree(endTree)]];
818
+ }
819
+ /**
820
+ * Find the line index where the quoted content starts, given a client-detected
821
+ * quote element. Returns the split index (first line of the quote).
822
+ *
823
+ * For container elements (div.gmail_quote, #OLK_SRC_BODY_SECTION): returns
824
+ * the first line whose content is inside the element.
825
+ *
826
+ * For separator elements (hr): returns the first line that follows the
827
+ * separator in document order.
828
+ */
829
+ function findQuoteSplitIndex(target, startRefs, endRefs) {
830
+ if (target.tagName?.toLowerCase() === "hr") for (let i = 0; i < startRefs.length; i++) {
831
+ const startEl = startRefs[i]?.[0];
832
+ if (!startEl) continue;
833
+ if (isAfterInDocument(target, startEl)) return i;
834
+ }
835
+ else for (let i = 0; i < startRefs.length; i++) {
836
+ const startEl = startRefs[i]?.[0];
837
+ const endEl = endRefs[i]?.[0];
838
+ if (startEl === target || endEl === target || target.contains(startEl) || target.contains(endEl)) return i;
839
+ }
840
+ return null;
841
+ }
842
+ /**
843
+ * Check if `after` comes after `before` in document order,
844
+ * and `after` is not contained within `before`.
845
+ */
846
+ function isAfterInDocument(before, after) {
847
+ if (before === after || before.contains(after)) return false;
848
+ let current = before;
849
+ while (current) {
850
+ let next = current.nextElementSibling;
851
+ while (next) {
852
+ if (next === after || next.contains(after)) return true;
853
+ next = next.nextElementSibling;
854
+ }
855
+ current = current.parentElement;
856
+ }
857
+ return false;
858
+ }
859
+ /**
860
+ * If the passed text is the text body of a forwarded message, a reply, or
861
+ * contains quoted text, returns a structured result. Otherwise returns null.
862
+ */
863
+ function unwrap(text) {
864
+ const lines = text.split("\n");
865
+ const unwrapResult = unwrap$1(lines, 2, 2, 3);
866
+ if (!unwrapResult) return null;
867
+ const [typ, topRange, hdrs, mainRange, bottomRange, needsUnindent] = unwrapResult;
868
+ const textTopLines = topRange ? lines.slice(topRange[0] ?? 0, topRange[1] ?? void 0) : [];
869
+ let textLines = mainRange ? lines.slice(mainRange[0] ?? 0, mainRange[1] ?? void 0) : [];
870
+ const textBottomLines = bottomRange ? lines.slice(bottomRange[0] ?? 0, bottomRange[1] ?? void 0) : [];
871
+ if (needsUnindent) textLines = unindentLines(textLines);
872
+ const result = { type: typ };
873
+ const textContent = textLines.join("\n").trim();
874
+ const textTop = textTopLines.join("\n").trim();
875
+ const textBottom = textBottomLines.join("\n").trim();
876
+ if (textContent) result.text = textContent;
877
+ if (textTop) result.text_top = textTop;
878
+ if (textBottom) result.text_bottom = textBottom;
879
+ if (hdrs) Object.assign(result, hdrs);
880
+ return result;
881
+ }
882
+ /**
883
+ * Like unwrap(), but for HTML email bodies.
884
+ */
885
+ function unwrapHtml(html) {
886
+ const tree = getHtmlTree(html);
887
+ const [startRefs, endRefs, lines] = getLineInfo(tree);
888
+ const unwrapResult = unwrap$1(lines, 1, 2, 1);
889
+ if (!unwrapResult) return null;
890
+ const [typ, topRange, hdrs, mainRange, bottomRange, needsUnindent] = unwrapResult;
891
+ const result = { type: typ };
892
+ const topRangeSlice = trimSlice(lines, topRange);
893
+ const mainRangeSlice = trimSlice(lines, mainRange);
894
+ const bottomRangeSlice = trimSlice(lines, bottomRange);
895
+ if (topRangeSlice) {
896
+ const htmlTop = renderHtmlTree(sliceTree(tree, startRefs, endRefs, topRangeSlice, html));
897
+ if (htmlTop) result.html_top = htmlTop;
898
+ }
899
+ if (bottomRangeSlice) {
900
+ const htmlBottom = renderHtmlTree(sliceTree(tree, startRefs, endRefs, bottomRangeSlice, html));
901
+ if (htmlBottom) result.html_bottom = htmlBottom;
902
+ }
903
+ if (mainRangeSlice) {
904
+ const mainTree = sliceTree(tree, startRefs, endRefs, mainRangeSlice);
905
+ if (needsUnindent) unindentTree(mainTree);
906
+ const htmlContent = renderHtmlTree(mainTree);
907
+ if (htmlContent) result.html = htmlContent;
908
+ }
909
+ if (hdrs) Object.assign(result, hdrs);
910
+ return result;
911
+ }
912
+ //#endregion
913
+ export { Position, quote, quoteHtml, unwrap, unwrapHtml };