forwarded_email_parser 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,916 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ForwardedEmailParser
4
+ class EmailParser
5
+ MAILBOXES_SEPARATORS = [
6
+ ",", # Apple Mail, Gmail, New Outlook 2019, Thunderbird
7
+ ";" # Outlook Live / 365, Yahoo Mail
8
+ ].freeze
9
+
10
+ LINE_REGEXES = %i[
11
+ separator
12
+ original_subject
13
+ original_subject_lax
14
+ original_to
15
+ original_reply_to
16
+ original_cc
17
+ original_date
18
+ ].freeze
19
+
20
+ REGEXES = {
21
+ quote_line_break: /^(>+)\s?$/m, # Apple Mail, Missive
22
+ quote: /^(>+)\s?/m, # Apple Mail
23
+ four_spaces: /^(\ {4})\s?/m, # Outlook 2019
24
+ carriage_return: /\r\n/m, # Outlook 2019
25
+ byte_order_mark: /\uFEFF/m, # Outlook 2019
26
+ trailing_non_breaking_space: /\u00A0$/m, # IONOS by 1 & 1
27
+ non_breaking_space: /\u00A0/m, # HubSpot
28
+
29
+ subject: [
30
+ /^Fw:(.*)/, # Outlook Live / 365 (cs, en, hr, hu, sk), Yahoo Mail (all locales)
31
+ /^VS:(.*)/, # Outlook Live / 365 (da), New Outlook 2019 (da)
32
+ /^WG:(.*)/, # Outlook Live / 365 (de), New Outlook 2019 (de)
33
+ /^RV:(.*)/, # Outlook Live / 365 (es), New Outlook 2019 (es)
34
+ /^TR:(.*)/, # Outlook Live / 365 (fr), New Outlook 2019 (fr)
35
+ /^I:(.*)/, # Outlook Live / 365 (it), New Outlook 2019 (it)
36
+ /^FW:(.*)/, # Outlook Live / 365 (nl, pt), New Outlook 2019 (cs, en, hu, nl, pt, ru, sk), Outlook 2019 (all locales)
37
+ /^Vs:(.*)/, # Outlook Live / 365 (no)
38
+ /^PD:(.*)/, # Outlook Live / 365 (pl), New Outlook 2019 (pl)
39
+ /^ENC:(.*)/, # Outlook Live / 365 (pt-br), New Outlook 2019 (pt-br)
40
+ /^Redir.:(.*)/, # Outlook Live / 365 (ro)
41
+ /^VB:(.*)/, # Outlook Live / 365 (sv), New Outlook 2019 (sv)
42
+ /^VL:(.*)/, # New Outlook 2019 (fi)
43
+ /^Videresend:(.*)/, # New Outlook 2019 (no)
44
+ /^İLT:(.*)/, # New Outlook 2019 (tr)
45
+ /^Fwd:(.*)/ # Gmail (all locales), Thunderbird (all locales), Missive (en), MailMate (en)
46
+ ],
47
+
48
+ separator: [
49
+ /^>?\s*Begin forwarded message\s?:/, # Apple Mail (en)
50
+ /^>?\s*Začátek přeposílané zprávy\s?:/, # Apple Mail (cs)
51
+ /^>?\s*Start på videresendt besked\s?:/, # Apple Mail (da)
52
+ /^>?\s*Anfang der weitergeleiteten Nachricht\s?:/, # Apple Mail (de)
53
+ /^>?\s*Inicio del mensaje reenviado\s?:/, # Apple Mail (es)
54
+ /^>?\s*Välitetty viesti alkaa\s?:/, # Apple Mail (fi)
55
+ /^>?\s*Début du message réexpédié\s?:/, # Apple Mail (fr)
56
+ /^>?\s*Début du message transféré\s?:/, # Apple Mail iOS (fr)
57
+ /^>?\s*Započni proslijeđenu poruku\s?:/, # Apple Mail (hr)
58
+ /^>?\s*Továbbított levél kezdete\s?:/, # Apple Mail (hu)
59
+ /^>?\s*Inizio messaggio inoltrato\s?:/, # Apple Mail (it)
60
+ /^>?\s*Begin doorgestuurd bericht\s?:/, # Apple Mail (nl)
61
+ /^>?\s*Videresendt melding\s?:/, # Apple Mail (no)
62
+ /^>?\s*Początek przekazywanej wiadomości\s?:/, # Apple Mail (pl)
63
+ /^>?\s*Início da mensagem reencaminhada\s?:/, # Apple Mail (pt)
64
+ /^>?\s*Início da mensagem encaminhada\s?:/, # Apple Mail (pt-br)
65
+ /^>?\s*Începe mesajul redirecționat\s?:/, # Apple Mail (ro)
66
+ /^>?\s*Начало переадресованного сообщения\s?:/, # Apple Mail (ro)
67
+ /^>?\s*Začiatok preposlanej správy\s?:/, # Apple Mail (sk)
68
+ /^>?\s*Vidarebefordrat mejl\s?:/, # Apple Mail (sv)
69
+ /^>?\s*İleti başlangıcı\s?:/, # Apple Mail (tr)
70
+ /^>?\s*Початок листа, що пересилається\s?:/, # Apple Mail (uk)
71
+ /^\s*-{8,10}\s*Forwarded message\s*-{8,10}\s*/, # Gmail (all locales), Missive (en), HubSpot (en)
72
+ /^\s*_{32}\s*$/, # Outlook Live / 365 (all locales)
73
+ /^\s?Forwarded message:/, # Mailmate
74
+ /^\s?Dne\s?.+,\s?.+\s*[\[|<].+[\]|>]\s?napsal\(a\)\s?:/, # Outlook 2019 (cz)
75
+ /^\s?D.\s?.+\s?skrev\s?".+"\s*[\[|<].+[\]|>]\s?:/, # Outlook 2019 (da)
76
+ /^\s?Am\s?.+\s?schrieb\s?".+"\s*[\[|<].+[\]|>]\s?:/, # Outlook 2019 (de)
77
+ /^\s?On\s?.+,\s?".+"\s*[\[|<].+[\]|>]\s?wrote\s?:/, # Outlook 2019 (en)
78
+ /^\s?El\s?.+,\s?".+"\s*[\[|<].+[\]|>]\s?escribió\s?:/, # Outlook 2019 (es)
79
+ /^\s?Le\s?.+,\s?«.+»\s*[\[|<].+[\]|>]\s?a écrit\s?:/, # Outlook 2019 (fr)
80
+ /^\s?.+\s*[\[|<].+[\]|>]\s?kirjoitti\s?.+\s?:/, # Outlook 2019 (fi)
81
+ /^\s?.+\s?időpontban\s?.+\s*[\[|<|(].+[\]|>|)]\s?ezt írta\s?:/, # Outlook 2019 (hu)
82
+ /^\s?Il giorno\s?.+\s?".+"\s*[\[|<].+[\]|>]\s?ha scritto\s?:/, # Outlook 2019 (it)
83
+ /^\s?Op\s?.+\s?heeft\s?.+\s*[\[|<].+[\]|>]\s?geschreven\s?:/, # Outlook 2019 (nl)
84
+ /^\s?.+\s*[\[|<].+[\]|>]\s?skrev følgende den\s?.+\s?:/, # Outlook 2019 (no)
85
+ /^\s?Dnia\s?.+\s?„.+”\s*[\[|<].+[\]|>]\s?napisał\s?:/, # Outlook 2019 (pl)
86
+ /^\s?Em\s?.+,\s?".+"\s*[\[|<].+[\]|>]\s?escreveu\s?:/, # Outlook 2019 (pt)
87
+ /^\s?.+\s?пользователь\s?".+"\s*[\[|<].+[\]|>]\s?написал\s?:/, # Outlook 2019 (ru)
88
+ /^\s?.+\s?používateľ\s?.+\s*\([\[|<].+[\]|>]\)\s?napísal\s?:/, # Outlook 2019 (sk)
89
+ /^\s?Den\s?.+\s?skrev\s?".+"\s*[\[|<].+[\]|>]\s?följande\s?:/, # Outlook 2019 (sv)
90
+ /^\s?".+"\s*[\[|<].+[\]|>],\s?.+\s?tarihinde şunu yazdı\s?:/, # Outlook 2019 (tr)
91
+ /^\s*-{5,8} Přeposlaná zpráva -{5,8}\s*/, # Yahoo Mail (cs), Thunderbird (cs)
92
+ /^\s*-{5,8} Videresendt meddelelse -{5,8}\s*/, # Yahoo Mail (da), Thunderbird (da)
93
+ /^\s*-{5,10} Weitergeleitete Nachricht -{5,10}\s*/, # Yahoo Mail (de), Thunderbird (de), HubSpot (de)
94
+ /^\s*-{5,8} Forwarded Message -{5,8}\s*/, # Yahoo Mail (en), Thunderbird (en)
95
+ /^\s*-{5,10} Mensaje reenviado -{5,10}\s*/, # Yahoo Mail (es), Thunderbird (es), HubSpot (es)
96
+ /^\s*-{5,10} Edelleenlähetetty viesti -{5,10}\s*/, # Yahoo Mail (fi), HubSpot (fi)
97
+ /^\s*-{5} Message transmis -{5}\s*/, # Yahoo Mail (fr)
98
+ /^\s*-{5,8} Továbbított üzenet -{5,8}\s*/, # Yahoo Mail (hu), Thunderbird (hu)
99
+ /^\s*-{5,10} Messaggio inoltrato -{5,10}\s*/, # Yahoo Mail (it), HubSpot (it)
100
+ /^\s*-{5,10} Doorgestuurd bericht -{5,10}\s*/, # Yahoo Mail (nl), Thunderbird (nl), HubSpot (nl)
101
+ /^\s*-{5,8} Videresendt melding -{5,8}\s*/, # Yahoo Mail (no), Thunderbird (no)
102
+ /^\s*-{5} Przekazana wiadomość -{5}\s*/, # Yahoo Mail (pl)
103
+ /^\s*-{5,8} Mensagem reencaminhada -{5,8}\s*/, # Yahoo Mail (pt), Thunderbird (pt)
104
+ /^\s*-{5,10} Mensagem encaminhada -{5,10}\s*/, # Yahoo Mail (pt-br), Thunderbird (pt-br), HubSpot (pt-br)
105
+ /^\s*-{5,8} Mesaj redirecționat -{5,8}\s*/, # Yahoo Mail (ro)
106
+ /^\s*-{5} Пересылаемое сообщение -{5}\s*/, # Yahoo Mail (ru)
107
+ /^\s*-{5} Preposlaná správa -{5}\s*/, # Yahoo Mail (sk)
108
+ /^\s*-{5,10} Vidarebefordrat meddelande -{5,10}\s*/, # Yahoo Mail (sv), Thunderbird (sv), HubSpot (sv)
109
+ /^\s*-{5} İletilmiş Mesaj -{5}\s*/, # Yahoo Mail (tr)
110
+ /^\s*-{5} Перенаправлене повідомлення -{5}\s*/, # Yahoo Mail (uk)
111
+ %r{^\s*-{8} Välitetty viesti / Fwd.Msg -{8}\s*}m, # Thunderbird (fi)
112
+ /^\s*-{8,10} Message transféré -{8,10}\s*/, # Thunderbird (fr), HubSpot (fr)
113
+ /^\s*-{8} Proslijeđena poruka -{8}\s*/, # Thunderbird (hr)
114
+ /^\s*-{8} Messaggio Inoltrato -{8}\s*/, # Thunderbird (it)
115
+ /^\s*-{3} Treść przekazanej wiadomości -{3}\s*/, # Thunderbird (pl)
116
+ /^\s*-{8} Перенаправленное сообщение -{8}\s*/, # Thunderbird (ru)
117
+ /^\s*-{8} Preposlaná správa --- Forwarded Message -{8}\s*/, # Thunderbird (sk)
118
+ /^\s*-{8} İletilen İleti -{8}\s*/, # Thunderbird (tr)
119
+ /^\s*-{8} Переслане повідомлення -{8}\s*/, # Thunderbird (uk)
120
+ /^\s*-{9,10} メッセージを転送 -{9,10}\s*/, # HubSpot (ja)
121
+ /^\s*-{9,10} Wiadomość przesłana dalej -{9,10}\s*/, # HubSpot (pl)
122
+ /^>?\s*-{10} Original Message -{10}\s*/ # IONOS by 1 & 1 (en)
123
+ ],
124
+
125
+ separator_with_information: [
126
+ /^\s?Dne\s?(?<date>.+),\s?(?<from_name>.+)\s*[\[|<](?<from_address>.+)[\]|>]\s?napsal\(a\)\s?:/, # Outlook 2019 (cz)
127
+ /^\s?D.\s?(?<date>.+)\s?skrev\s?"(?<from_name>.+)"\s*[\[|<](?<from_address>.+)[\]|>]\s?:/, # Outlook 2019 (da)
128
+ /^\s?Am\s?(?<date>.+)\s?schrieb\s?"(?<from_name>.+)"\s*[\[|<](?<from_address>.+)[\]|>]\s?:/, # Outlook 2019 (de)
129
+ /^\s?On\s?(?<date>.+),\s?"(?<from_name>.+)"\s*[\[|<](?<from_address>.+)[\]|>]\s?wrote\s?:/, # Outlook 2019 (en)
130
+ /^\s?El\s?(?<date>.+),\s?"(?<from_name>.+)"\s*[\[|<](?<from_address>.+)[\]|>]\s?escribió\s?:/, # Outlook 2019 (es)
131
+ /^\s?Le\s?(?<date>.+),\s?«(?<from_name>.+)»\s*[\[|<](?<from_address>.+)[\]|>]\s?a écrit\s?:/, # Outlook 2019 (fr)
132
+ /^\s?(?<from_name>.+)\s*[\[|<](?<from_address>.+)[\]|>]\s?kirjoitti\s?(?<date>.+)\s?:/, # Outlook 2019 (fi)
133
+ /^\s?(?<date>.+)\s?időpontban\s?(?<from_name>.+)\s*[\[|<|(](?<from_address>.+)[\]|>|)]\s?ezt írta\s?:/, # Outlook 2019 (hu)
134
+ /^\s?Il giorno\s?(?<date>.+)\s?"(?<from_name>.+)"\s*[\[|<](?<from_address>.+)[\]|>]\s?ha scritto\s?:/, # Outlook 2019 (it)
135
+ /^\s?Op\s?(?<date>.+)\s?heeft\s?(?<from_name>.+)\s*[\[|<](?<from_address>.+)[\]|>]\s?geschreven\s?:/, # Outlook 2019 (nl)
136
+ /^\s?(?<from_name>.+)\s*[\[|<](?<from_address>.+)[\]|>]\s?skrev følgende den\s?(?<date>.+)\s?:/, # Outlook 2019 (no)
137
+ /^\s?Dnia\s?(?<date>.+)\s?„(?<from_name>.+)”\s*[\[|<](?<from_address>.+)[\]|>]\s?napisał\s?:/, # Outlook 2019 (pl)
138
+ /^\s?Em\s?(?<date>.+),\s?"(?<from_name>.+)"\s*[\[|<](?<from_address>.+)[\]|>]\s?escreveu\s?:/, # Outlook 2019 (pt)
139
+ /^\s?(?<date>.+)\s?пользователь\s?"(?<from_name>.+)"\s*[\[|<](?<from_address>.+)[\]|>]\s?написал\s?:/, # Outlook 2019 (ru)
140
+ /^\s?(?<date>.+)\s?používateľ\s?(?<from_name>.+)\s*\([\[|<](?<from_address>.+)[\]|>]\)\s?napísal\s?:/, # Outlook 2019 (sk)
141
+ /^\s?Den\s?(?<date>.+)\s?skrev\s?"(?<from_name>.+)"\s*[\[|<](?<from_address>.+)[\]|>]\s?följande\s?:/, # Outlook 2019 (sv)
142
+ /^\s?"(?<from_name>.+)"\s*[\[|<](?<from_address>.+)[\]|>],\s?(?<date>.+)\s?tarihinde şunu yazdı\s?:/ # Outlook 2019 (tr)
143
+ ],
144
+
145
+ original_subject: [
146
+ /^\*?Subject\s?:\*?(.+)/i, # Apple Mail (en), Gmail (all locales), Outlook Live / 365 (all locales), New Outlook 2019 (en), Thunderbird (da, en), Missive (en), HubSpot (en)
147
+ /^Předmět\s?:(.+)/i, # Apple Mail (cs), New Outlook 2019 (cs), Thunderbird (cs)
148
+ /^Emne\s?:(.+)/i, # Apple Mail (da, no), New Outlook 2019 (da), Thunderbird (no)
149
+ /^Betreff\s?:(.+)/i, # Apple Mail (de), New Outlook 2019 (de), Thunderbird (de), HubSpot (de)
150
+ /^Asunto\s?:(.+)/i, # Apple Mail (es), New Outlook 2019 (es), Thunderbird (es), HubSpot (es)
151
+ /^Aihe\s?:(.+)/i, # Apple Mail (fi), New Outlook 2019 (fi), Thunderbird (fi), HubSpot (fi)
152
+ /^Objet\s?:(.+)/i, # Apple Mail (fr), New Outlook 2019 (fr), HubSpot (fr)
153
+ /^Predmet\s?:(.+)/i, # Apple Mail (hr, sk), New Outlook 2019 (sk), Thunderbird (sk)
154
+ /^Tárgy\s?:(.+)/i, # Apple Mail (hu), New Outlook 2019 (hu), Thunderbird (hu)
155
+ /^Oggetto\s?:(.+)/i, # Apple Mail (it), New Outlook 2019 (it), Thunderbird (it), HubSpot (it)
156
+ /^Onderwerp\s?:(.+)/i, # Apple Mail (nl), New Outlook 2019 (nl), Thunderbird (nl), HubSpot (nl)
157
+ /^Temat\s?:(.+)/i, # Apple Mail (pl), New Outlook 2019 (pl), Thunderbird (pl), HubSpot (pl)
158
+ /^Assunto\s?:(.+)/i, # Apple Mail (pt, pt-br), New Outlook 2019 (pt, pt-br), Thunderbird (pt, pt-br), HubSpot (pt-br)
159
+ /^Subiectul\s?:(.+)/i, # Apple Mail (ro), Thunderbird (ro)
160
+ /^Тема\s?:(.+)/i, # Apple Mail (ru, uk), New Outlook 2019 (ru), Thunderbird (ru, uk)
161
+ /^Ämne\s?:(.+)/i, # Apple Mail (sv), New Outlook 2019 (sv), Thunderbird (sv), HubSpot (sv)
162
+ /^Konu\s?:(.+)/i, # Apple Mail (tr), Thunderbird (tr)
163
+ /^Sujet\s?:(.+)/i, # Thunderbird (fr)
164
+ /^Naslov\s?:(.+)/i, # Thunderbird (hr)
165
+ /^件名:(.+)/i # HubSpot (ja)
166
+ ],
167
+
168
+ original_subject_lax: [
169
+ /Subject\s?:(.+)/i, # Yahoo Mail (en)
170
+ /Emne\s?:(.+)/i, # Yahoo Mail (da, no)
171
+ /Předmět\s?:(.+)/i, # Yahoo Mail (cs)
172
+ /Betreff\s?:(.+)/i, # Yahoo Mail (de)
173
+ /Asunto\s?:(.+)/i, # Yahoo Mail (es)
174
+ /Aihe\s?:(.+)/i, # Yahoo Mail (fi)
175
+ /Objet\s?:(.+)/i, # Yahoo Mail (fr)
176
+ /Tárgy\s?:(.+)/i, # Yahoo Mail (hu)
177
+ /Oggetto\s?:(.+)/i, # Yahoo Mail (it)
178
+ /Onderwerp\s?:(.+)/i, # Yahoo Mail (nl)
179
+ /Assunto\s?:?(.+)/i, # Yahoo Mail (pt, pt-br)
180
+ /Temat\s?:(.+)/i, # Yahoo Mail (pl)
181
+ /Subiect\s?:(.+)/i, # Yahoo Mail (ro)
182
+ /Тема\s?:(.+)/i, # Yahoo Mail (ru, uk)
183
+ /Predmet\s?:(.+)/i, # Yahoo Mail (sk)
184
+ /Ämne\s?:(.+)/i, # Yahoo Mail (sv)
185
+ /Konu\s?:(.+)/i # Yahoo Mail (tr)
186
+ ],
187
+
188
+ original_from: [
189
+ /^(\*?\s*From\s?:\*?(.+))$/, # Apple Mail (en), Outlook Live / 365 (all locales), New Outlook 2019 (en), Thunderbird (da, en), Missive (en), HubSpot (en)
190
+ /^(\s*Od\s?:(.+))$/, # Apple Mail (cs, pl, sk), Gmail (cs, pl, sk), New Outlook 2019 (cs, pl, sk), Thunderbird (cs, sk), HubSpot (pl)
191
+ /^(\s*Fra\s?:(.+))$/, # Apple Mail (da, no), Gmail (da, no), New Outlook 2019 (da), Thunderbird (no)
192
+ /^(\s*Von\s?:(.+))$/, # Apple Mail (de), Gmail (de), New Outlook 2019 (de), Thunderbird (de), HubSpot (de)
193
+ /^(\s*De\s?:(.+))$/, # Apple Mail (es, fr, pt, pt-br), Gmail (es, fr, pt, pt-br), New Outlook 2019 (es, fr, pt, pt-br), Thunderbird (fr, pt, pt-br), HubSpot (es, fr, pt-br)
194
+ /^(\s*Lähettäjä\s?:(.+))$/, # Apple Mail (fi), Gmail (fi), New Outlook 2019 (fi), Thunderbird (fi), HubSpot (fi)
195
+ /^(\s*Šalje\s?:(.+))$/, # Apple Mail (hr), Gmail (hr), Thunderbird (hr)
196
+ /^(\s*Feladó\s?:(.+))$/, # Apple Mail (hu), Gmail (hu), New Outlook 2019 (fr), Thunderbird (hu)
197
+ /^(\s*Da\s?:(.+))$/, # Apple Mail (it), Gmail (it), New Outlook 2019 (it), HubSpot (it)
198
+ /^(\s*Van\s?:(.+))$/, # Apple Mail (nl), Gmail (nl), New Outlook 2019 (nl), Thunderbird (nl), HubSpot (nl)
199
+ /^(\s*Expeditorul\s?:(.+))$/, # Apple Mail (ro)
200
+ /^(\s*Отправитель\s?:(.+))$/, # Apple Mail (ru)
201
+ /^(\s*Från\s?:(.+))$/, # Apple Mail (sv), Gmail (sv), New Outlook 2019 (sv), Thunderbird (sv), HubSpot (sv)
202
+ /^(\s*Kimden\s?:(.+))$/, # Apple Mail (tr), Thunderbird (tr)
203
+ /^(\s*Від кого\s?:(.+))$/, # Apple Mail (uk)
204
+ /^(\s*Saatja\s?:(.+))$/, # Gmail (et)
205
+ /^(\s*De la\s?:(.+))$/, # Gmail (ro)
206
+ /^(\s*Gönderen\s?:(.+))$/, # Gmail (tr)
207
+ /^(\s*От\s?:(.+))$/, # Gmail (ru), New Outlook 2019 (ru), Thunderbird (ru)
208
+ /^(\s*Від\s?:(.+))$/, # Gmail (uk), Thunderbird (uk)
209
+ /^(\s*Mittente\s?:(.+))$/, # Thunderbird (it)
210
+ /^(\s*Nadawca\s?:(.+))$/, # Thunderbird (pl)
211
+ /^(\s*de la\s?:(.+))$/, # Thunderbird (ro)
212
+ /^(\s*送信元:(.+))$/ # HubSpot (ja)
213
+ ],
214
+
215
+ original_from_lax: [
216
+ /(\s*From\s?:(.+?)\s?\n?\s*[\[|<](.+?)[\]|>])/, # Yahoo Mail (en)
217
+ /(\s*Od\s?:(.+?)\s?\n?\s*[\[|<](.+?)[\]|>])/, # Yahoo Mail (cs, pl, sk)
218
+ /(\s*Fra\s?:(.+?)\s?\n?\s*[\[|<](.+?)[\]|>])/, # Yahoo Mail (da, no)
219
+ /(\s*Von\s?:(.+?)\s?\n?\s*[\[|<](.+?)[\]|>])/, # Yahoo Mail (de)
220
+ /(\s*De\s?:(.+?)\s?\n?\s*[\[|<](.+?)[\]|>])/, # Yahoo Mail (es, fr, pt, pt-br)
221
+ /(\s*Lähettäjä\s?:(.+?)\s?\n?\s*[\[|<](.+?)[\]|>])/, # Yahoo Mail (fi)
222
+ /(\s*Feladó\s?:(.+?)\s?\n?\s*[\[|<](.+?)[\]|>])/, # Yahoo Mail (hu)
223
+ /(\s*Da\s?:(.+?)\s?\n?\s*[\[|<](.+?)[\]|>])/, # Yahoo Mail (it)
224
+ /(\s*Van\s?:(.+?)\s?\n?\s*[\[|<](.+?)[\]|>])/, # Yahoo Mail (nl)
225
+ /(\s*De la\s?:(.+?)\s?\n?\s*[\[|<](.+?)[\]|>])/, # Yahoo Mail (ro)
226
+ /(\s*От\s?:(.+?)\s?\n?\s*[\[|<](.+?)[\]|>])/, # Yahoo Mail (ru)
227
+ /(\s*Från\s?:(.+?)\s?\n?\s*[\[|<](.+?)[\]|>])/, # Yahoo Mail (sv)
228
+ /(\s*Kimden\s?:(.+?)\s?\n?\s*[\[|<](.+?)[\]|>])/, # Yahoo Mail (tr)
229
+ /(\s*Від\s?:(.+?)\s?\n?\s*[\[|<](.+?)[\]|>])/ # Yahoo Mail (uk)
230
+ ],
231
+
232
+ original_to: [
233
+ /^\*?\s*To\s?:\*?(.+)$/, # Apple Mail (en), Gmail (all locales), Outlook Live / 365 (all locales), Thunderbird (da, en), Missive (en), HubSpot (en)
234
+ /^\s*Komu\s?:(.+)$/, # Apple Mail (cs), New Outlook 2019 (cs, sk), Thunderbird (cs)
235
+ /^\s*Til\s?:(.+)$/, # Apple Mail (da, no), New Outlook 2019 (da), Thunderbird (no)
236
+ /^\s*An\s?:(.+)$/, # Apple Mail (de), New Outlook 2019 (de), Thunderbird (de), HubSpot (de)
237
+ /^\s*Para\s?:(.+)$/, # Apple Mail (es, pt, pt-br), New Outlook 2019 (es, pt, pt-br), Thunderbird (es, pt, pt-br), HubSpot (pt-br)
238
+ /^\s*Vastaanottaja\s?:(.+)$/, # Apple Mail (fi), New Outlook 2019 (fi), Thunderbird (fi), HubSpot (fi)
239
+ /^\s*À\s?:(.+)$/, # Apple Mail (fr), New Outlook 2019 (fr), HubSpot (fr)
240
+ /^\s*Prima\s?:(.+)$/, # Apple Mail (hr), Thunderbird (hr)
241
+ /^\s*Címzett\s?:(.+)$/, # Apple Mail (hu), New Outlook 2019 (hu), Thunderbird (hu)
242
+ /^\s*A\s?:(.+)$/, # Apple Mail (it), New Outlook 2019 (it), Thunderbird (it), HubSpot (es, it)
243
+ /^\s*Aan\s?:(.+)$/, # Apple Mail (nl), New Outlook 2019 (nl), Thunderbird (nl), HubSpot (nl)
244
+ /^\s*Do\s?:(.+)$/, # Apple Mail (pl), New Outlook 2019 (pl), HubSpot (pl)
245
+ /^\s*Destinatarul\s?:(.+)$/, # Apple Mail (ro)
246
+ /^\s*Кому\s?:(.+)$/, # Apple Mail (ru, uk), New Outlook 2019 (ru), Thunderbird (ru, uk)
247
+ /^\s*Pre\s?:(.+)$/, # Apple Mail (sk), Thunderbird (sk)
248
+ /^\s*Till\s?:(.+)$/, # Apple Mail (sv), New Outlook 2019 (sv), Thunderbird (sv)
249
+ /^\s*Kime\s?:(.+)$/, # Apple Mail (tr), Thunderbird (tr)
250
+ /^\s*Pour\s?:(.+)$/, # Thunderbird (fr)
251
+ /^\s*Adresat\s?:(.+)$/, # Thunderbird (pl)
252
+ /^\s*送信先:(.+)$/ # HubSpot (ja)
253
+ ],
254
+
255
+ original_to_lax: [
256
+ /\s*To\s?:(.+)$/, # Yahook Mail (en)
257
+ /\s*Komu\s?:(.+)$/, # Yahook Mail (cs, sk)
258
+ /\s*Til\s?:(.+)$/, # Yahook Mail (da, no, sv)
259
+ /\s*An\s?:(.+)$/, # Yahook Mail (de)
260
+ /\s*Para\s?:(.+)$/, # Yahook Mail (es, pt, pt-br)
261
+ /\s*Vastaanottaja\s?:(.+)$/, # Yahook Mail (fi)
262
+ /\s*À\s?:(.+)$/, # Yahook Mail (fr)
263
+ /\s*Címzett\s?:(.+)$/, # Yahook Mail (hu)
264
+ /\s*A\s?:(.+)$/, # Yahook Mail (it)
265
+ /\s*Aan\s?:(.+)$/, # Yahook Mail (nl)
266
+ /\s*Do\s?:(.+)$/, # Yahook Mail (pl)
267
+ /\s*Către\s?:(.+)$/, # Yahook Mail (ro), Thunderbird (ro)
268
+ /\s*Кому\s?:(.+)$/, # Yahook Mail (ru, uk)
269
+ /\s*Till\s?:(.+)$/, # Yahook Mail (sv)
270
+ /\s*Kime\s?:(.+)$/ # Yahook Mail (tr)
271
+ ],
272
+
273
+ original_reply_to: [
274
+ /^\s*Reply-To\s?:(.+)$/, # Apple Mail (en)
275
+ /^\s*Odgovori na\s?:(.+)$/, # Apple Mail (hr)
276
+ /^\s*Odpověď na\s?:(.+)$/, # Apple Mail (cs)
277
+ /^\s*Svar til\s?:(.+)$/, # Apple Mail (da)
278
+ /^\s*Antwoord aan\s?:(.+)$/, # Apple Mail (nl)
279
+ /^\s*Vastaus\s?:(.+)$/, # Apple Mail (fi)
280
+ /^\s*Répondre à\s?:(.+)$/, # Apple Mail (fr)
281
+ /^\s*Antwort an\s?:(.+)$/, # Apple Mail (de)
282
+ /^\s*Válaszcím\s?:(.+)$/, # Apple Mail (hu)
283
+ /^\s*Rispondi a\s?:(.+)$/, # Apple Mail (it)
284
+ /^\s*Svar til\s?:(.+)$/, # Apple Mail (no)
285
+ /^\s*Odpowiedź-do\s?:(.+)$/, # Apple Mail (pl)
286
+ /^\s*Responder A\s?:(.+)$/, # Apple Mail (pt)
287
+ /^\s*Responder a\s?:(.+)$/, # Apple Mail (pt-br, es)
288
+ /^\s*Răspuns către\s?:(.+)$/, # Apple Mail (ro)
289
+ /^\s*Ответ-Кому\s?:(.+)$/, # Apple Mail (ru)
290
+ /^\s*Odpovedať-Pre\s?:(.+)$/, # Apple Mail (sk)
291
+ /^\s*Svara till\s?:(.+)$/, # Apple Mail (sv)
292
+ /^\s*Yanıt Adresi\s?:(.+)$/, # Apple Mail (tr)
293
+ /^\s*Кому відповісти\s?:(.+)$/ # Apple Mail (uk)
294
+ ],
295
+
296
+ original_cc: [
297
+ /^\*?\s*Cc\s?:\*?(.+)$/, # Apple Mail (en, da, es, fr, hr, it, pt, pt-br, ro, sk), Gmail (all locales), Outlook Live / 365 (all locales), New Outlook 2019 (da, de, en, fr, it, pt-br), Missive (en), HubSpot (de, en, es, it, nl, pt-br)
298
+ /^\s*CC\s?:(.+)$/, # New Outlook 2019 (es, nl, pt), Thunderbird (da, en, es, fi, hr, hu, it, nl, no, pt, pt-br, ro, tr, uk)
299
+ /^\s*Kopie\s?:(.+)$/, # Apple Mail (cs, de, nl), New Outlook 2019 (cs), Thunderbird (cs)
300
+ /^\s*Kopio\s?:(.+)$/, # Apple Mail (fi), New Outlook 2019 (fi), HubSpot (fi)
301
+ /^\s*Másolat\s?:(.+)$/, # Apple Mail (hu)
302
+ /^\s*Kopi\s?:(.+)$/, # Apple Mail (no)
303
+ /^\s*Dw\s?:(.+)$/, # Apple Mail (pl)
304
+ /^\s*Копия\s?:(.+)$/, # Apple Mail (ru), New Outlook 2019 (ru), Thunderbird (ru)
305
+ /^\s*Kopia\s?:(.+)$/, # Apple Mail (sv), New Outlook 2019 (sv), Thunderbird (pl, sv), HubSpot (sv)
306
+ /^\s*Bilgi\s?:(.+)$/, # Apple Mail (tr)
307
+ /^\s*Копія\s?:(.+)$/, # Apple Mail (uk),
308
+ /^\s*Másolatot kap\s?:(.+)$/, # New Outlook 2019 (hu)
309
+ /^\s*Kópia\s?:(.+)$/, # New Outlook 2019 (sk), Thunderbird (sk)
310
+ /^\s*DW\s?:(.+)$/, # New Outlook 2019 (pl), HubSpot (pl)
311
+ /^\s*Kopie \(CC\)\s?:(.+)$/, # Thunderbird (de)
312
+ /^\s*Copie à\s?:(.+)$/, # Thunderbird (fr)
313
+ /^\s*CC:(.+)$/ # HubSpot (ja)
314
+ ],
315
+
316
+ original_cc_lax: [
317
+ /\s*Cc\s?:(.+)$/, # Yahoo Mail (da, en, it, nl, pt, pt-br, ro, tr)
318
+ /\s*CC\s?:(.+)$/, # Yahoo Mail (de, es)
319
+ /\s*Kopie\s?:(.+)$/, # Yahoo Mail (cs)
320
+ /\s*Kopio\s?:(.+)$/, # Yahoo Mail (fi)
321
+ /\s*Másolat\s?:(.+)$/, # Yahoo Mail (hu)
322
+ /\s*Kopi\s?:(.+)$/, # Yahoo Mail (no)
323
+ /\s*Dw\s?(.+)$/, # Yahoo Mail (pl)
324
+ /\s*Копия\s?:(.+)$/, # Yahoo Mail (ru)
325
+ /\s*Kópia\s?:(.+)$/, # Yahoo Mail (sk)
326
+ /\s*Kopia\s?:(.+)$/, # Yahoo Mail (sv)
327
+ /\s*Копія\s?:(.+)$/ # Yahoo Mail (uk)
328
+ ],
329
+
330
+ original_date: [
331
+ /^\s*Date\s?:(.+)$/, # Apple Mail (en, fr), Gmail (all locales), New Outlook 2019 (en, fr), Thunderbird (da, en, fr), Missive (en), HubSpot (en, fr)
332
+ /^\s*Datum\s?:(.+)$/, # Apple Mail (cs, de, hr, nl, sv), New Outlook 2019 (cs, de, nl, sv), Thunderbird (cs, de, hr, nl, sv), HubSpot (de, nl, sv)
333
+ /^\s*Dato\s?:(.+)$/, # Apple Mail (da, no), New Outlook 2019 (da), Thunderbird (no)
334
+ /^\s*Envoyé\s?:(.+)$/, # New Outlook 2019 (fr)
335
+ /^\s*Fecha\s?:(.+)$/, # Apple Mail (es), New Outlook 2019 (es), Thunderbird (es), HubSpot (es)
336
+ /^\s*Päivämäärä\s?:(.+)$/, # Apple Mail (fi), New Outlook 2019 (fi), HubSpot (fi)
337
+ /^\s*Dátum\s?:(.+)$/, # Apple Mail (hu, sk), New Outlook 2019 (sk), Thunderbird (hu, sk)
338
+ /^\s*Data\s?:(.+)$/, # Apple Mail (it, pl, pt, pt-br), New Outlook 2019 (it, pl, pt, pt-br), Thunderbird (it, pl, pt, pt-br), HubSpot (it, pl, pt-br)
339
+ /^\s*Dată\s?:(.+)$/, # Apple Mail (ro), Thunderbird (ro)
340
+ /^\s*Дата\s?:(.+)$/, # Apple Mail (ru, uk), New Outlook 2019 (ru), Thunderbird (ru, uk)
341
+ /^\s*Tarih\s?:(.+)$/, # Apple Mail (tr), Thunderbird (tr)
342
+ /^\*?\s*Sent\s?:\*?(.+)$/, # Outlook Live / 365 (all locales)
343
+ /^\s*Päiväys\s?:(.+)$/, # Thunderbird (fi)
344
+ /^\s*日付:(.+)$/ # HubSpot (ja)
345
+ ],
346
+
347
+ original_date_lax: [
348
+ /\s*Datum\s?:(.+)$/, # Yahoo Mail (cs)
349
+ /\s*Sendt\s?:(.+)$/, # Yahoo Mail (da, no)
350
+ /\s*Gesendet\s?:(.+)$/, # Yahoo Mail (de)
351
+ /\s*Sent\s?:(.+)$/, # Yahoo Mail (en)
352
+ /\s*Enviado\s?:(.+)$/, # Yahoo Mail (es, pt, pt-br)
353
+ /\s*Envoyé\s?:(.+)$/, # Yahoo Mail (fr)
354
+ /\s*Lähetetty\s?:(.+)$/, # Yahoo Mail (fi)
355
+ /\s*Elküldve\s?:(.+)$/, # Yahoo Mail (hu)
356
+ /\s*Inviato\s?:(.+)$/, # Yahoo Mail (it)
357
+ /\s*Verzonden\s?:(.+)$/, # Yahoo Mail (it)
358
+ /\s*Wysłano\s?:(.+)$/, # Yahoo Mail (pl)
359
+ /\s*Trimis\s?:(.+)$/, # Yahoo Mail (ro)
360
+ /\s*Отправлено\s?:(.+)$/, # Yahoo Mail (ru)
361
+ /\s*Odoslané\s?:(.+)$/, # Yahoo Mail (sk)
362
+ /\s*Skickat\s?:(.+)$/, # Yahoo Mail (sv)
363
+ /\s*Gönderilen\s?:(.+)$/, # Yahoo Mail (tr)
364
+ /\s*Відправлено\s?:(.+)$/ # Yahoo Mail (uk)
365
+ ],
366
+
367
+ mailbox: [
368
+ /^\s?\n?\s*<.+?<mailto:(.+?)>>/, # "<walter.sheltan@acme.com<mailto:walter.sheltan@acme.com>>"
369
+ /^(.+?)\s?\n?\s*<.+?<mailto:(.+?)>>/, # "Walter Sheltan <walter.sheltan@acme.com<mailto:walter.sheltan@acme.com>>"
370
+ /^(.+?)\s?\n?\s*[\[|<]mailto:(.+?)[\]|>]/, # "Walter Sheltan <mailto:walter.sheltan@acme.com>" or "Walter Sheltan [mailto:walter.sheltan@acme.com]" or "walter.sheltan@acme.com <mailto:walter.sheltan@acme.com>"
371
+ /^'(.+?)'\s?\n?\s*[\[|<](.+?)[\]|>]/, # "'Walter Sheltan' <walter.sheltan@acme.com>" or "'Walter Sheltan' [walter.sheltan@acme.com]" or "'walter.sheltan@acme.com' <walter.sheltan@acme.com>"
372
+ /^"'(.+?)'"\s?\n?\s*[\[|<](.+?)[\]|>]/, # ""'Walter Sheltan'" <walter.sheltan@acme.com>" or ""'Walter Sheltan'" [walter.sheltan@acme.com]" or ""'walter.sheltan@acme.com'" <walter.sheltan@acme.com>"
373
+ /^"(.+?)"\s?\n?\s*[\[|<](.+?)[\]|>]/, # ""Walter Sheltan" <walter.sheltan@acme.com>" or ""Walter Sheltan" [walter.sheltan@acme.com]" or ""walter.sheltan@acme.com" <walter.sheltan@acme.com>"
374
+ /^([^,;]+?)\s?\n?\s*[\[|<](.+?)[\]|>]/, # "Walter Sheltan <walter.sheltan@acme.com>" or "Walter Sheltan [walter.sheltan@acme.com]" or "walter.sheltan@acme.com <walter.sheltan@acme.com>"
375
+ /^(.?)\s?\n?\s*[\[|<](.+?)[\]|>]/, # "<walter.sheltan@acme.com>"
376
+ /^([^\s@]+@[^\s@]+\.[^\s@,]+)/, # "walter.sheltan@acme.com"
377
+ /^([^;].+?)\s?\n?\s*[\[|<](.+?)[\]|>]/ # "Walter, Sheltan <walter.sheltan@acme.com>" or "Walter, Sheltan [walter.sheltan@acme.com]"
378
+ ],
379
+
380
+ mailbox_address: [
381
+ /^(([^\s@]+)@([^\s@]+)\.([^\s@]+))$/
382
+ ]
383
+ }.freeze
384
+
385
+ def initialize
386
+ @regexes = load_regexes
387
+ end
388
+
389
+ def parse_subject(subject)
390
+ match = Utils.loop_regexes(@regexes[:subject], subject)
391
+
392
+ if match && match.length > 1
393
+ # Notice: return an empty string if the detected subject is empty
394
+ # (e.g. 'Fwd: ')
395
+ return Utils.trim_string(match[1]).to_s
396
+ end
397
+
398
+ nil
399
+ end
400
+
401
+ def parse_body(body, forwarded = false)
402
+ # Replace carriage return with regular line break
403
+ body = body.gsub(@regexes[:carriage_return], "\n")
404
+
405
+ # Remove Byte Order Mark
406
+ body.gsub!(@regexes[:byte_order_mark], "")
407
+
408
+ # Remove trailing Non-breaking space
409
+ body.gsub!(@regexes[:trailing_non_breaking_space], "")
410
+
411
+ # Replace Non-breaking space with regular space
412
+ body.gsub!(@regexes[:non_breaking_space], " ")
413
+
414
+ # First method: split via the separator (Apple Mail, Gmail,
415
+ # Outlook Live / 365, Outlook 2019, Yahoo Mail, Thunderbird)
416
+ # Notice: use 'line' regex that will capture the line itself, as we may
417
+ # need it to build the original email back (in case of nested emails)
418
+ match = Utils.loop_regexes(@regexes[:separator_line], body, "split")
419
+
420
+ if match && match.length > 2
421
+ # The `split` operation creates a match with 3 substrings:
422
+ # * 0: anything before the line with the separator (i.e. the message)
423
+ # * 1: the line with the separator
424
+ # * 2: anything after the line with the separator (i.e. the body of
425
+ # the original email)
426
+ # Notice: in case of nested emails, there may be several matches
427
+ # against 'separator_line'. In that case, the `split` operation
428
+ # creates a match with (n x 3) substrings. We need to reconciliate
429
+ # those substrings.
430
+ email = Utils.reconciliate_split_match(
431
+ match,
432
+ 3, # min_substrings
433
+ [2] # default_substrings (By default, attach anything after the line with the separator)
434
+ )
435
+
436
+ return {
437
+ body: body,
438
+ message: Utils.trim_string(match[0]),
439
+ email: Utils.trim_string(email)
440
+ }
441
+ end
442
+
443
+ # Attempt second method?
444
+ # Notice: as this second method is more uncertain (we split via the From
445
+ # part, without further verification), we have to be sure we can
446
+ # attempt it. The `forwarded` boolean gives the confirmation that the
447
+ # email was indeed forwarded (detected from the Subject part)
448
+ if forwarded == true
449
+ # Second method: split via the From part (New Outlook 2019,
450
+ # Outlook Live / 365)
451
+ match = Utils.loop_regexes(@regexes[:original_from], body, "split")
452
+
453
+ if match && match.length > 3
454
+ # The `split` operation creates a match with 4 substrings:
455
+ # * 0: anything before the line with the From part (i.e. the
456
+ # message before the original email)
457
+ # * 1: the line with the From part (in the original email)
458
+ # * 2: the From part itself
459
+ # * 3: anything after the line with the From part (i.e.
460
+ # the rest of the original email)
461
+ # Notice: in case of nested emails, there may be several matches
462
+ # against 'original_from'. In that case, the `split` operation
463
+ # creates a match with (n x 4) substrings. We need to reconciliate
464
+ # those substrings.
465
+ email = Utils.reconciliate_split_match(
466
+ match,
467
+ 4, # min_substrings
468
+ [1, 3], # default_substrings (By default, attach the line that contains the From part back to the rest of the original email (exclude the From part itself))
469
+ lambda { |i|
470
+ i % 3 == 2
471
+ } # fn_exclude (When reconciliating other substrings, we want to exclude the From part itself)
472
+ )
473
+
474
+ return {
475
+ body: body,
476
+ message: Utils.trim_string(match[0]),
477
+ email: Utils.trim_string(email)
478
+ }
479
+ end
480
+ end
481
+
482
+ {}
483
+ end
484
+
485
+ def parse_original_email(text, body)
486
+ # Remove Byte Order Mark
487
+ text = text.gsub(@regexes[:byte_order_mark], "")
488
+
489
+ # Remove ">" at the beginning of each line, while keeping line breaks
490
+ text = text.gsub(@regexes[:quote_line_break], "")
491
+
492
+ # Remove ">" at the beginning of other lines
493
+ text = text.gsub(@regexes[:quote], "")
494
+
495
+ # Remove " " at the beginning of lines
496
+ text = text.gsub(@regexes[:four_spaces], "")
497
+
498
+ {
499
+ body: parse_original_body(text),
500
+ from: parse_original_from(text, body),
501
+ to: parse_original_to(text),
502
+ cc: parse_original_cc(text),
503
+ subject: parse_original_subject(text),
504
+ date: parse_original_date(text, body)
505
+ }
506
+ end
507
+
508
+ private
509
+
510
+ def load_regexes
511
+ @regexes = {}
512
+ REGEXES.each do |key, entry|
513
+ key_line = "#{key}_line".to_sym
514
+ if entry.is_a?(Array)
515
+ @regexes[key] = []
516
+ @regexes[key_line] = []
517
+
518
+ entry.each do |regex|
519
+ # Build 'line' alternative?
520
+ if LINE_REGEXES.include?(key)
521
+ regex_line = build_line_regex(regex)
522
+ @regexes[key_line] << regex_line
523
+ end
524
+
525
+ @regexes[key] << Regexp.new(regex)
526
+ end
527
+ else
528
+ regex = entry
529
+
530
+ # Build 'line' alternative?
531
+ if LINE_REGEXES.include?(key)
532
+ regex_line = build_line_regex(regex)
533
+ @regexes[key_line] = regex_line
534
+ end
535
+
536
+ @regexes[key] = Regexp.new(regex)
537
+ end
538
+ end
539
+ @regexes
540
+ end
541
+
542
+ # Builds 'line' alternative regex
543
+ # @param regex [Regexp] The regular expression to build a line regex from
544
+ # @return [Regexp] The 'line' regex
545
+ def build_line_regex(regex)
546
+ # A 'line' regex will capture not only inner groups, but also the line itself
547
+ # Important: `regex` must be a Regexp object, not a string
548
+ source = "(#{regex.source})"
549
+ flags = 0
550
+ flags |= Regexp::IGNORECASE if regex.options & Regexp::IGNORECASE != 0
551
+ flags |= Regexp::MULTILINE if regex.options & Regexp::MULTILINE != 0
552
+ flags |= Regexp::EXTENDED if regex.options & Regexp::EXTENDED != 0
553
+
554
+ Regexp.new(source, flags)
555
+ end
556
+
557
+ def parse_original_body(text)
558
+ match = nil
559
+
560
+ # First method: extract the text after the Subject part
561
+ # (Outlook Live / 365) or after the Cc, To or Reply-To part
562
+ # (Apple Mail, Gmail) or Date part (MailMate). A new line must be
563
+ # present.
564
+ # Notice: use 'line' regexes that will capture not only the Subject, Cc,
565
+ # To or Reply-To part, but also the line itself, as we may need it
566
+ # to build the original body back (in case of nested emails)
567
+ regexes = [
568
+ @regexes[:original_subject_line],
569
+ @regexes[:original_cc_line],
570
+ @regexes[:original_to_line],
571
+ @regexes[:original_reply_to_line],
572
+ @regexes[:original_date_line]
573
+ ]
574
+
575
+ regexes.each do |regex|
576
+ match = Utils.loop_regexes(regex, text, "split")
577
+
578
+ # A new line must be present between the Cc, To, Reply-To or Subject
579
+ # part and the actual body
580
+ next unless match && match.length > 2 && match[3]&.start_with?("\n\n")
581
+
582
+ # The `split` operation creates a match with 4 substrings:
583
+ # * 0: anything before the line with the Subject, Cc, To or Reply-To
584
+ # part
585
+ # * 1: the line with the Subject, Cc, To or Reply-To part
586
+ # * 2: the Subject, Cc, To or Reply-To part itself
587
+ # * 3: anything after the line with the Subject, Cc, To or Reply-To
588
+ # part (i.e. the body of the original email)
589
+ # Notice: in case of nested emails, there may be several matches
590
+ # against 'original_subject_line', 'original_cc_line',
591
+ # 'original_to_line' or 'original_reply_to_line'. In that case, the
592
+ # `split` operation creates a match with (n x 4) substrings. We
593
+ # need to reconciliate those substrings.
594
+ body = Utils.reconciliate_split_match(
595
+ match,
596
+ 4,
597
+ [3],
598
+ ->(i) { i % 3 == 2 }
599
+ )
600
+
601
+ return Utils.trim_string(body)
602
+ end
603
+
604
+ # Second method: extract the text after the Subject part
605
+ # (New Outlook 2019, Yahoo Mail). No new line must be present.
606
+ # Notice: use 'line' regexes that will capture not only the Subject part,
607
+ # but also the line itself, as we may need it to build the original
608
+ # body back (in case of nested emails)
609
+ match = Utils.loop_regexes(
610
+ @regexes[:original_subject_line] + @regexes[:original_subject_lax_line],
611
+ text,
612
+ "split"
613
+ )
614
+
615
+ # Do not bother checking for new line between the Subject part and the
616
+ # actual body (specificity of New Outlook 2019 and Yahoo Mail)
617
+ if match && match.length > 3
618
+ # The `split` operation creates a match with 4 substrings:
619
+ # * 0: anything before the line with the Subject part
620
+ # * 1: the line with the Subject part (in the original email)
621
+ # * 2: the Subject part itself
622
+ # * 3: anything after the line with the Subject part (i.e. the body of
623
+ # the original email)
624
+ # Notice: in case of nested emails, there may be several matches
625
+ # against 'original_subject_line' and 'original_subject_lax_line'. In
626
+ # that case, the `split` operation creates a match with (n x 4)
627
+ # substrings. We need to reconciliate those substrings.
628
+ body = Utils.reconciliate_split_match(
629
+ match,
630
+ 4,
631
+ [3],
632
+ ->(i) { i % 3 == 2 }
633
+ )
634
+
635
+ return Utils.trim_string(body)
636
+ end
637
+
638
+ # Third method: return the raw text, as there is no original information
639
+ # embedded (no Cc, To, Subject, etc.) (Outlook 2019)
640
+ text
641
+ end
642
+
643
+ # Parses mailboxes(s)
644
+ # @private
645
+ # @param regexes [Array<Regexp>] Array of regular expressions to match mailboxes
646
+ # @param text [String] The text to parse
647
+ # @param force_array [Boolean] Whether to force the return value to be an array
648
+ # @return [Array<Hash>, Hash, nil] The parsed mailbox(es) or nil if not found
649
+ def parse_mailbox(regexes, text, force_array = false)
650
+ match = Utils.loop_regexes(regexes, text)
651
+ if match&.length&.positive?
652
+ mailboxes_line = Utils.trim_string(match[-1])
653
+
654
+ if mailboxes_line
655
+ mailboxes = []
656
+
657
+ while mailboxes_line
658
+ mailbox_match = Utils.loop_regexes(@regexes[:mailbox], mailboxes_line)
659
+
660
+ # Address and / or name available?
661
+ if mailbox_match&.length&.positive?
662
+ address = nil
663
+ name = nil
664
+
665
+ # Address and name available?
666
+ if mailbox_match.length == 3
667
+ address = mailbox_match[2]
668
+ name = mailbox_match[1]
669
+ else
670
+ address = mailbox_match[1]
671
+ end
672
+
673
+ mailboxes << prepare_mailbox(address, name)
674
+
675
+ # Remove matched mailbox from mailboxes line
676
+ mailboxes_line = Utils.trim_string(
677
+ mailboxes_line.sub(mailbox_match[0], "")
678
+ )
679
+
680
+ if mailboxes_line
681
+ # Remove leading mailboxes separator
682
+ MAILBOXES_SEPARATORS.each do |separator|
683
+ if mailboxes_line[0] == separator
684
+ mailboxes_line = Utils.trim_string(mailboxes_line[1..])
685
+ break
686
+ end
687
+ end
688
+ end
689
+ else
690
+ mailboxes << prepare_mailbox(mailboxes_line, nil)
691
+
692
+ # No more matches
693
+ mailboxes_line = nil
694
+ end
695
+ end
696
+
697
+ # Return multiple mailboxes
698
+ return mailboxes if mailboxes.length > 1
699
+
700
+ # Return single mailbox
701
+ return force_array ? mailboxes : mailboxes[0]
702
+ end
703
+ end
704
+
705
+ # No mailbox found
706
+ force_array ? [] : nil
707
+ end
708
+
709
+ # Parses the author (From)
710
+ # @private
711
+ # @param text [String]
712
+ # @param body [String]
713
+ # @return [Hash] The parsed author
714
+ def parse_original_from(text, body)
715
+ address = nil
716
+ name = nil
717
+
718
+ # First method: extract the author via the From part (Apple Mail, Gmail,
719
+ # Outlook Live / 365, New Outlook 2019, Thunderbird)
720
+ author = parse_mailbox(@regexes[:original_from], text)
721
+
722
+ # Author found?
723
+ return author if author.is_a?(Hash) && (author&.dig(:address) || author&.dig(:name))
724
+
725
+ # Multiple authors found?
726
+ return author.first if author.is_a?(Array) && (author[0][:address] || author[0][:name])
727
+
728
+ # Second method: extract the author via the separator (Outlook 2019)
729
+ match = Utils.loop_regexes(@regexes[:separator_with_information], body)
730
+
731
+ if match && match.length == 4 && match.is_a?(MatchData)
732
+ # Notice: the order of parts may change depending on the localization,
733
+ # hence the use of named captures
734
+ address = match[:from_address]
735
+ name = match[:from_name]
736
+
737
+ return prepare_mailbox(address, name)
738
+ end
739
+
740
+ # Third method: extract the author via the From part, using lax regexes
741
+ # (Yahoo Mail)
742
+ match = Utils.loop_regexes(@regexes[:original_from_lax], text)
743
+
744
+ if match && match.length > 1
745
+ address = match[3]
746
+ name = match[2]
747
+
748
+ return prepare_mailbox(address, name)
749
+ end
750
+
751
+ prepare_mailbox(address, name)
752
+ end
753
+
754
+ # Parses the subject part
755
+ # @private
756
+ # @param text [String]
757
+ # @return [String, nil] The parsed subject or nil if not found
758
+ def parse_original_subject(text)
759
+ # First method: extract the subject via the Subject part (Apple Mail,
760
+ # Gmail, Outlook Live / 365, New Outlook 2019, Thunderbird)
761
+ match = Utils.loop_regexes(@regexes[:original_subject], text)
762
+
763
+ return Utils.trim_string(match[1]) if match&.length&.positive?
764
+
765
+ # Second method: extract the subject via the Subject part, using lax
766
+ # regexes (Yahoo Mail)
767
+ match = Utils.loop_regexes(@regexes[:original_subject_lax], text)
768
+
769
+ return Utils.trim_string(match[1]) if match&.length&.positive?
770
+
771
+ nil
772
+ end
773
+
774
+ # Parses the primary recipient(s) (To)
775
+ # @private
776
+ # @param text [String]
777
+ # @return [Array<Hash>] The parsed primary recipient(s)
778
+ def parse_original_to(text)
779
+ # First method: extract the primary recipient(s) via the To part
780
+ # (Apple Mail, Gmail, Outlook Live / 365, New Outlook 2019, Thunderbird)
781
+ recipients = parse_mailbox(
782
+ @regexes[:original_to],
783
+ text,
784
+ force_array: true
785
+ )
786
+
787
+ # Recipient(s) found?
788
+ return recipients if recipients.is_a?(Array) && recipients.any?
789
+
790
+ # Second method: the Subject, Date and Cc parts are stuck to the To part,
791
+ # remove them before attempting a new extract, using lax regexes
792
+ # (Yahoo Mail)
793
+ clean_text = Utils.loop_regexes(
794
+ @regexes[:original_subject_lax],
795
+ text,
796
+ "replace"
797
+ )
798
+
799
+ clean_text = Utils.loop_regexes(
800
+ @regexes[:original_date_lax],
801
+ clean_text,
802
+ "replace"
803
+ )
804
+
805
+ clean_text = Utils.loop_regexes(
806
+ @regexes[:original_cc_lax],
807
+ clean_text,
808
+ "replace"
809
+ )
810
+
811
+ parse_mailbox(
812
+ @regexes[:original_to_lax],
813
+ clean_text,
814
+ force_array: true
815
+ )
816
+ end
817
+
818
+ def parse_original_cc(text)
819
+ # First method: extract the carbon-copy recipient(s) via the Cc part
820
+ # (Apple Mail, Gmail, Outlook Live / 365, New Outlook 2019, Thunderbird)
821
+ recipients = parse_mailbox(
822
+ @regexes[:original_cc],
823
+ text,
824
+ force_array: true
825
+ )
826
+
827
+ # Recipient(s) found?
828
+ return recipients if recipients.is_a?(Array) && recipients.any?
829
+
830
+ # Second method: the Subject and Date parts are stuck to the To part,
831
+ # remove them before attempting a new extract, using lax regexes
832
+ # (Yahoo Mail)
833
+ clean_text = Utils.loop_regexes(
834
+ @regexes[:original_subject_lax],
835
+ text,
836
+ "replace"
837
+ )
838
+
839
+ clean_text = Utils.loop_regexes(
840
+ @regexes[:original_date_lax],
841
+ clean_text,
842
+ "replace"
843
+ )
844
+
845
+ parse_mailbox(
846
+ @regexes[:original_cc_lax],
847
+ clean_text,
848
+ force_array: true
849
+ )
850
+ end
851
+
852
+ def parse_original_date(text, body)
853
+ # First method: extract the date via the Date part (Apple Mail, Gmail,
854
+ # Outlook Live / 365, New Outlook 2019, Thunderbird)
855
+ match = Utils.loop_regexes(@regexes[:original_date], text)
856
+
857
+ return Utils.trim_string(match[1]) if match&.length&.positive?
858
+
859
+ # Second method: extract the date via the separator (Outlook 2019)
860
+ match = Utils.loop_regexes(@regexes[:separator_with_information], body)
861
+
862
+ if match && match.length == 4 && match.is_a?(MatchData)
863
+ # Notice: the order of parts may change depending on the localization,
864
+ # hence the use of named captures
865
+ return Utils.trim_string(match[:date])
866
+ end
867
+
868
+ # Third method: the Subject part is stuck to the Date part, remove it
869
+ # before attempting a new extract, using lax regexes (Yahoo Mail)
870
+ clean_text = Utils.loop_regexes(
871
+ @regexes[:original_subject_lax],
872
+ text,
873
+ "replace"
874
+ )
875
+
876
+ match = Utils.loop_regexes(@regexes[:original_date_lax], clean_text)
877
+
878
+ return Utils.trim_string(match[1]) if match&.length&.positive?
879
+
880
+ nil
881
+ end
882
+
883
+ # Prepares mailbox
884
+ # @private
885
+ # @param address [String]
886
+ # @param name [String]
887
+ # @return [Hash] The prepared mailbox
888
+ def prepare_mailbox(address, name)
889
+ address = Utils.trim_string(address)
890
+ name = Utils.trim_string(name)
891
+
892
+ # Make sure mailbox address is valid
893
+ mailbox_address_match = Utils.loop_regexes(
894
+ @regexes[:mailbox_address],
895
+ address
896
+ )
897
+
898
+ # Invalid mailbox address? Some clients only include the name
899
+ if mailbox_address_match.nil?
900
+ name = address
901
+ address = nil
902
+ end
903
+
904
+ address = address.empty? || address.empty? ? nil : address
905
+ name = name.empty? ? nil : name
906
+
907
+ {
908
+ address: (address.nil? || address.empty? ? nil : address),
909
+
910
+ # Some clients fill the name with the address
911
+ # ("bessie.berry@acme.com <bessie.berry@acme.com>")
912
+ name: address != name ? name : nil
913
+ }
914
+ end
915
+ end
916
+ end