forwarded_email_parser 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.byebug_history +223 -0
- data/.rubocop.yml +8 -0
- data/CHANGELOG.md +5 -0
- data/CODE_OF_CONDUCT.md +3 -0
- data/LICENSE.txt +21 -0
- data/README.md +43 -0
- data/Rakefile +12 -0
- data/lib/forwarded_email_parser/email_parser.rb +916 -0
- data/lib/forwarded_email_parser/parsed_email.rb +20 -0
- data/lib/forwarded_email_parser/parser.rb +36 -0
- data/lib/forwarded_email_parser/utils.rb +87 -0
- data/lib/forwarded_email_parser/version.rb +5 -0
- data/lib/forwarded_email_parser.rb +11 -0
- data/sig/email_forward_parser.rbs +4 -0
- metadata +61 -0
@@ -0,0 +1,916 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ForwardedEmailParser
|
4
|
+
class EmailParser
|
5
|
+
MAILBOXES_SEPARATORS = [
|
6
|
+
",", # Apple Mail, Gmail, New Outlook 2019, Thunderbird
|
7
|
+
";" # Outlook Live / 365, Yahoo Mail
|
8
|
+
].freeze
|
9
|
+
|
10
|
+
LINE_REGEXES = %i[
|
11
|
+
separator
|
12
|
+
original_subject
|
13
|
+
original_subject_lax
|
14
|
+
original_to
|
15
|
+
original_reply_to
|
16
|
+
original_cc
|
17
|
+
original_date
|
18
|
+
].freeze
|
19
|
+
|
20
|
+
REGEXES = {
|
21
|
+
quote_line_break: /^(>+)\s?$/m, # Apple Mail, Missive
|
22
|
+
quote: /^(>+)\s?/m, # Apple Mail
|
23
|
+
four_spaces: /^(\ {4})\s?/m, # Outlook 2019
|
24
|
+
carriage_return: /\r\n/m, # Outlook 2019
|
25
|
+
byte_order_mark: /\uFEFF/m, # Outlook 2019
|
26
|
+
trailing_non_breaking_space: /\u00A0$/m, # IONOS by 1 & 1
|
27
|
+
non_breaking_space: /\u00A0/m, # HubSpot
|
28
|
+
|
29
|
+
subject: [
|
30
|
+
/^Fw:(.*)/, # Outlook Live / 365 (cs, en, hr, hu, sk), Yahoo Mail (all locales)
|
31
|
+
/^VS:(.*)/, # Outlook Live / 365 (da), New Outlook 2019 (da)
|
32
|
+
/^WG:(.*)/, # Outlook Live / 365 (de), New Outlook 2019 (de)
|
33
|
+
/^RV:(.*)/, # Outlook Live / 365 (es), New Outlook 2019 (es)
|
34
|
+
/^TR:(.*)/, # Outlook Live / 365 (fr), New Outlook 2019 (fr)
|
35
|
+
/^I:(.*)/, # Outlook Live / 365 (it), New Outlook 2019 (it)
|
36
|
+
/^FW:(.*)/, # Outlook Live / 365 (nl, pt), New Outlook 2019 (cs, en, hu, nl, pt, ru, sk), Outlook 2019 (all locales)
|
37
|
+
/^Vs:(.*)/, # Outlook Live / 365 (no)
|
38
|
+
/^PD:(.*)/, # Outlook Live / 365 (pl), New Outlook 2019 (pl)
|
39
|
+
/^ENC:(.*)/, # Outlook Live / 365 (pt-br), New Outlook 2019 (pt-br)
|
40
|
+
/^Redir.:(.*)/, # Outlook Live / 365 (ro)
|
41
|
+
/^VB:(.*)/, # Outlook Live / 365 (sv), New Outlook 2019 (sv)
|
42
|
+
/^VL:(.*)/, # New Outlook 2019 (fi)
|
43
|
+
/^Videresend:(.*)/, # New Outlook 2019 (no)
|
44
|
+
/^İLT:(.*)/, # New Outlook 2019 (tr)
|
45
|
+
/^Fwd:(.*)/ # Gmail (all locales), Thunderbird (all locales), Missive (en), MailMate (en)
|
46
|
+
],
|
47
|
+
|
48
|
+
separator: [
|
49
|
+
/^>?\s*Begin forwarded message\s?:/, # Apple Mail (en)
|
50
|
+
/^>?\s*Začátek přeposílané zprávy\s?:/, # Apple Mail (cs)
|
51
|
+
/^>?\s*Start på videresendt besked\s?:/, # Apple Mail (da)
|
52
|
+
/^>?\s*Anfang der weitergeleiteten Nachricht\s?:/, # Apple Mail (de)
|
53
|
+
/^>?\s*Inicio del mensaje reenviado\s?:/, # Apple Mail (es)
|
54
|
+
/^>?\s*Välitetty viesti alkaa\s?:/, # Apple Mail (fi)
|
55
|
+
/^>?\s*Début du message réexpédié\s?:/, # Apple Mail (fr)
|
56
|
+
/^>?\s*Début du message transféré\s?:/, # Apple Mail iOS (fr)
|
57
|
+
/^>?\s*Započni proslijeđenu poruku\s?:/, # Apple Mail (hr)
|
58
|
+
/^>?\s*Továbbított levél kezdete\s?:/, # Apple Mail (hu)
|
59
|
+
/^>?\s*Inizio messaggio inoltrato\s?:/, # Apple Mail (it)
|
60
|
+
/^>?\s*Begin doorgestuurd bericht\s?:/, # Apple Mail (nl)
|
61
|
+
/^>?\s*Videresendt melding\s?:/, # Apple Mail (no)
|
62
|
+
/^>?\s*Początek przekazywanej wiadomości\s?:/, # Apple Mail (pl)
|
63
|
+
/^>?\s*Início da mensagem reencaminhada\s?:/, # Apple Mail (pt)
|
64
|
+
/^>?\s*Início da mensagem encaminhada\s?:/, # Apple Mail (pt-br)
|
65
|
+
/^>?\s*Începe mesajul redirecționat\s?:/, # Apple Mail (ro)
|
66
|
+
/^>?\s*Начало переадресованного сообщения\s?:/, # Apple Mail (ro)
|
67
|
+
/^>?\s*Začiatok preposlanej správy\s?:/, # Apple Mail (sk)
|
68
|
+
/^>?\s*Vidarebefordrat mejl\s?:/, # Apple Mail (sv)
|
69
|
+
/^>?\s*İleti başlangıcı\s?:/, # Apple Mail (tr)
|
70
|
+
/^>?\s*Початок листа, що пересилається\s?:/, # Apple Mail (uk)
|
71
|
+
/^\s*-{8,10}\s*Forwarded message\s*-{8,10}\s*/, # Gmail (all locales), Missive (en), HubSpot (en)
|
72
|
+
/^\s*_{32}\s*$/, # Outlook Live / 365 (all locales)
|
73
|
+
/^\s?Forwarded message:/, # Mailmate
|
74
|
+
/^\s?Dne\s?.+,\s?.+\s*[\[|<].+[\]|>]\s?napsal\(a\)\s?:/, # Outlook 2019 (cz)
|
75
|
+
/^\s?D.\s?.+\s?skrev\s?".+"\s*[\[|<].+[\]|>]\s?:/, # Outlook 2019 (da)
|
76
|
+
/^\s?Am\s?.+\s?schrieb\s?".+"\s*[\[|<].+[\]|>]\s?:/, # Outlook 2019 (de)
|
77
|
+
/^\s?On\s?.+,\s?".+"\s*[\[|<].+[\]|>]\s?wrote\s?:/, # Outlook 2019 (en)
|
78
|
+
/^\s?El\s?.+,\s?".+"\s*[\[|<].+[\]|>]\s?escribió\s?:/, # Outlook 2019 (es)
|
79
|
+
/^\s?Le\s?.+,\s?«.+»\s*[\[|<].+[\]|>]\s?a écrit\s?:/, # Outlook 2019 (fr)
|
80
|
+
/^\s?.+\s*[\[|<].+[\]|>]\s?kirjoitti\s?.+\s?:/, # Outlook 2019 (fi)
|
81
|
+
/^\s?.+\s?időpontban\s?.+\s*[\[|<|(].+[\]|>|)]\s?ezt írta\s?:/, # Outlook 2019 (hu)
|
82
|
+
/^\s?Il giorno\s?.+\s?".+"\s*[\[|<].+[\]|>]\s?ha scritto\s?:/, # Outlook 2019 (it)
|
83
|
+
/^\s?Op\s?.+\s?heeft\s?.+\s*[\[|<].+[\]|>]\s?geschreven\s?:/, # Outlook 2019 (nl)
|
84
|
+
/^\s?.+\s*[\[|<].+[\]|>]\s?skrev følgende den\s?.+\s?:/, # Outlook 2019 (no)
|
85
|
+
/^\s?Dnia\s?.+\s?„.+”\s*[\[|<].+[\]|>]\s?napisał\s?:/, # Outlook 2019 (pl)
|
86
|
+
/^\s?Em\s?.+,\s?".+"\s*[\[|<].+[\]|>]\s?escreveu\s?:/, # Outlook 2019 (pt)
|
87
|
+
/^\s?.+\s?пользователь\s?".+"\s*[\[|<].+[\]|>]\s?написал\s?:/, # Outlook 2019 (ru)
|
88
|
+
/^\s?.+\s?používateľ\s?.+\s*\([\[|<].+[\]|>]\)\s?napísal\s?:/, # Outlook 2019 (sk)
|
89
|
+
/^\s?Den\s?.+\s?skrev\s?".+"\s*[\[|<].+[\]|>]\s?följande\s?:/, # Outlook 2019 (sv)
|
90
|
+
/^\s?".+"\s*[\[|<].+[\]|>],\s?.+\s?tarihinde şunu yazdı\s?:/, # Outlook 2019 (tr)
|
91
|
+
/^\s*-{5,8} Přeposlaná zpráva -{5,8}\s*/, # Yahoo Mail (cs), Thunderbird (cs)
|
92
|
+
/^\s*-{5,8} Videresendt meddelelse -{5,8}\s*/, # Yahoo Mail (da), Thunderbird (da)
|
93
|
+
/^\s*-{5,10} Weitergeleitete Nachricht -{5,10}\s*/, # Yahoo Mail (de), Thunderbird (de), HubSpot (de)
|
94
|
+
/^\s*-{5,8} Forwarded Message -{5,8}\s*/, # Yahoo Mail (en), Thunderbird (en)
|
95
|
+
/^\s*-{5,10} Mensaje reenviado -{5,10}\s*/, # Yahoo Mail (es), Thunderbird (es), HubSpot (es)
|
96
|
+
/^\s*-{5,10} Edelleenlähetetty viesti -{5,10}\s*/, # Yahoo Mail (fi), HubSpot (fi)
|
97
|
+
/^\s*-{5} Message transmis -{5}\s*/, # Yahoo Mail (fr)
|
98
|
+
/^\s*-{5,8} Továbbított üzenet -{5,8}\s*/, # Yahoo Mail (hu), Thunderbird (hu)
|
99
|
+
/^\s*-{5,10} Messaggio inoltrato -{5,10}\s*/, # Yahoo Mail (it), HubSpot (it)
|
100
|
+
/^\s*-{5,10} Doorgestuurd bericht -{5,10}\s*/, # Yahoo Mail (nl), Thunderbird (nl), HubSpot (nl)
|
101
|
+
/^\s*-{5,8} Videresendt melding -{5,8}\s*/, # Yahoo Mail (no), Thunderbird (no)
|
102
|
+
/^\s*-{5} Przekazana wiadomość -{5}\s*/, # Yahoo Mail (pl)
|
103
|
+
/^\s*-{5,8} Mensagem reencaminhada -{5,8}\s*/, # Yahoo Mail (pt), Thunderbird (pt)
|
104
|
+
/^\s*-{5,10} Mensagem encaminhada -{5,10}\s*/, # Yahoo Mail (pt-br), Thunderbird (pt-br), HubSpot (pt-br)
|
105
|
+
/^\s*-{5,8} Mesaj redirecționat -{5,8}\s*/, # Yahoo Mail (ro)
|
106
|
+
/^\s*-{5} Пересылаемое сообщение -{5}\s*/, # Yahoo Mail (ru)
|
107
|
+
/^\s*-{5} Preposlaná správa -{5}\s*/, # Yahoo Mail (sk)
|
108
|
+
/^\s*-{5,10} Vidarebefordrat meddelande -{5,10}\s*/, # Yahoo Mail (sv), Thunderbird (sv), HubSpot (sv)
|
109
|
+
/^\s*-{5} İletilmiş Mesaj -{5}\s*/, # Yahoo Mail (tr)
|
110
|
+
/^\s*-{5} Перенаправлене повідомлення -{5}\s*/, # Yahoo Mail (uk)
|
111
|
+
%r{^\s*-{8} Välitetty viesti / Fwd.Msg -{8}\s*}m, # Thunderbird (fi)
|
112
|
+
/^\s*-{8,10} Message transféré -{8,10}\s*/, # Thunderbird (fr), HubSpot (fr)
|
113
|
+
/^\s*-{8} Proslijeđena poruka -{8}\s*/, # Thunderbird (hr)
|
114
|
+
/^\s*-{8} Messaggio Inoltrato -{8}\s*/, # Thunderbird (it)
|
115
|
+
/^\s*-{3} Treść przekazanej wiadomości -{3}\s*/, # Thunderbird (pl)
|
116
|
+
/^\s*-{8} Перенаправленное сообщение -{8}\s*/, # Thunderbird (ru)
|
117
|
+
/^\s*-{8} Preposlaná správa --- Forwarded Message -{8}\s*/, # Thunderbird (sk)
|
118
|
+
/^\s*-{8} İletilen İleti -{8}\s*/, # Thunderbird (tr)
|
119
|
+
/^\s*-{8} Переслане повідомлення -{8}\s*/, # Thunderbird (uk)
|
120
|
+
/^\s*-{9,10} メッセージを転送 -{9,10}\s*/, # HubSpot (ja)
|
121
|
+
/^\s*-{9,10} Wiadomość przesłana dalej -{9,10}\s*/, # HubSpot (pl)
|
122
|
+
/^>?\s*-{10} Original Message -{10}\s*/ # IONOS by 1 & 1 (en)
|
123
|
+
],
|
124
|
+
|
125
|
+
separator_with_information: [
|
126
|
+
/^\s?Dne\s?(?<date>.+),\s?(?<from_name>.+)\s*[\[|<](?<from_address>.+)[\]|>]\s?napsal\(a\)\s?:/, # Outlook 2019 (cz)
|
127
|
+
/^\s?D.\s?(?<date>.+)\s?skrev\s?"(?<from_name>.+)"\s*[\[|<](?<from_address>.+)[\]|>]\s?:/, # Outlook 2019 (da)
|
128
|
+
/^\s?Am\s?(?<date>.+)\s?schrieb\s?"(?<from_name>.+)"\s*[\[|<](?<from_address>.+)[\]|>]\s?:/, # Outlook 2019 (de)
|
129
|
+
/^\s?On\s?(?<date>.+),\s?"(?<from_name>.+)"\s*[\[|<](?<from_address>.+)[\]|>]\s?wrote\s?:/, # Outlook 2019 (en)
|
130
|
+
/^\s?El\s?(?<date>.+),\s?"(?<from_name>.+)"\s*[\[|<](?<from_address>.+)[\]|>]\s?escribió\s?:/, # Outlook 2019 (es)
|
131
|
+
/^\s?Le\s?(?<date>.+),\s?«(?<from_name>.+)»\s*[\[|<](?<from_address>.+)[\]|>]\s?a écrit\s?:/, # Outlook 2019 (fr)
|
132
|
+
/^\s?(?<from_name>.+)\s*[\[|<](?<from_address>.+)[\]|>]\s?kirjoitti\s?(?<date>.+)\s?:/, # Outlook 2019 (fi)
|
133
|
+
/^\s?(?<date>.+)\s?időpontban\s?(?<from_name>.+)\s*[\[|<|(](?<from_address>.+)[\]|>|)]\s?ezt írta\s?:/, # Outlook 2019 (hu)
|
134
|
+
/^\s?Il giorno\s?(?<date>.+)\s?"(?<from_name>.+)"\s*[\[|<](?<from_address>.+)[\]|>]\s?ha scritto\s?:/, # Outlook 2019 (it)
|
135
|
+
/^\s?Op\s?(?<date>.+)\s?heeft\s?(?<from_name>.+)\s*[\[|<](?<from_address>.+)[\]|>]\s?geschreven\s?:/, # Outlook 2019 (nl)
|
136
|
+
/^\s?(?<from_name>.+)\s*[\[|<](?<from_address>.+)[\]|>]\s?skrev følgende den\s?(?<date>.+)\s?:/, # Outlook 2019 (no)
|
137
|
+
/^\s?Dnia\s?(?<date>.+)\s?„(?<from_name>.+)”\s*[\[|<](?<from_address>.+)[\]|>]\s?napisał\s?:/, # Outlook 2019 (pl)
|
138
|
+
/^\s?Em\s?(?<date>.+),\s?"(?<from_name>.+)"\s*[\[|<](?<from_address>.+)[\]|>]\s?escreveu\s?:/, # Outlook 2019 (pt)
|
139
|
+
/^\s?(?<date>.+)\s?пользователь\s?"(?<from_name>.+)"\s*[\[|<](?<from_address>.+)[\]|>]\s?написал\s?:/, # Outlook 2019 (ru)
|
140
|
+
/^\s?(?<date>.+)\s?používateľ\s?(?<from_name>.+)\s*\([\[|<](?<from_address>.+)[\]|>]\)\s?napísal\s?:/, # Outlook 2019 (sk)
|
141
|
+
/^\s?Den\s?(?<date>.+)\s?skrev\s?"(?<from_name>.+)"\s*[\[|<](?<from_address>.+)[\]|>]\s?följande\s?:/, # Outlook 2019 (sv)
|
142
|
+
/^\s?"(?<from_name>.+)"\s*[\[|<](?<from_address>.+)[\]|>],\s?(?<date>.+)\s?tarihinde şunu yazdı\s?:/ # Outlook 2019 (tr)
|
143
|
+
],
|
144
|
+
|
145
|
+
original_subject: [
|
146
|
+
/^\*?Subject\s?:\*?(.+)/i, # Apple Mail (en), Gmail (all locales), Outlook Live / 365 (all locales), New Outlook 2019 (en), Thunderbird (da, en), Missive (en), HubSpot (en)
|
147
|
+
/^Předmět\s?:(.+)/i, # Apple Mail (cs), New Outlook 2019 (cs), Thunderbird (cs)
|
148
|
+
/^Emne\s?:(.+)/i, # Apple Mail (da, no), New Outlook 2019 (da), Thunderbird (no)
|
149
|
+
/^Betreff\s?:(.+)/i, # Apple Mail (de), New Outlook 2019 (de), Thunderbird (de), HubSpot (de)
|
150
|
+
/^Asunto\s?:(.+)/i, # Apple Mail (es), New Outlook 2019 (es), Thunderbird (es), HubSpot (es)
|
151
|
+
/^Aihe\s?:(.+)/i, # Apple Mail (fi), New Outlook 2019 (fi), Thunderbird (fi), HubSpot (fi)
|
152
|
+
/^Objet\s?:(.+)/i, # Apple Mail (fr), New Outlook 2019 (fr), HubSpot (fr)
|
153
|
+
/^Predmet\s?:(.+)/i, # Apple Mail (hr, sk), New Outlook 2019 (sk), Thunderbird (sk)
|
154
|
+
/^Tárgy\s?:(.+)/i, # Apple Mail (hu), New Outlook 2019 (hu), Thunderbird (hu)
|
155
|
+
/^Oggetto\s?:(.+)/i, # Apple Mail (it), New Outlook 2019 (it), Thunderbird (it), HubSpot (it)
|
156
|
+
/^Onderwerp\s?:(.+)/i, # Apple Mail (nl), New Outlook 2019 (nl), Thunderbird (nl), HubSpot (nl)
|
157
|
+
/^Temat\s?:(.+)/i, # Apple Mail (pl), New Outlook 2019 (pl), Thunderbird (pl), HubSpot (pl)
|
158
|
+
/^Assunto\s?:(.+)/i, # Apple Mail (pt, pt-br), New Outlook 2019 (pt, pt-br), Thunderbird (pt, pt-br), HubSpot (pt-br)
|
159
|
+
/^Subiectul\s?:(.+)/i, # Apple Mail (ro), Thunderbird (ro)
|
160
|
+
/^Тема\s?:(.+)/i, # Apple Mail (ru, uk), New Outlook 2019 (ru), Thunderbird (ru, uk)
|
161
|
+
/^Ämne\s?:(.+)/i, # Apple Mail (sv), New Outlook 2019 (sv), Thunderbird (sv), HubSpot (sv)
|
162
|
+
/^Konu\s?:(.+)/i, # Apple Mail (tr), Thunderbird (tr)
|
163
|
+
/^Sujet\s?:(.+)/i, # Thunderbird (fr)
|
164
|
+
/^Naslov\s?:(.+)/i, # Thunderbird (hr)
|
165
|
+
/^件名:(.+)/i # HubSpot (ja)
|
166
|
+
],
|
167
|
+
|
168
|
+
original_subject_lax: [
|
169
|
+
/Subject\s?:(.+)/i, # Yahoo Mail (en)
|
170
|
+
/Emne\s?:(.+)/i, # Yahoo Mail (da, no)
|
171
|
+
/Předmět\s?:(.+)/i, # Yahoo Mail (cs)
|
172
|
+
/Betreff\s?:(.+)/i, # Yahoo Mail (de)
|
173
|
+
/Asunto\s?:(.+)/i, # Yahoo Mail (es)
|
174
|
+
/Aihe\s?:(.+)/i, # Yahoo Mail (fi)
|
175
|
+
/Objet\s?:(.+)/i, # Yahoo Mail (fr)
|
176
|
+
/Tárgy\s?:(.+)/i, # Yahoo Mail (hu)
|
177
|
+
/Oggetto\s?:(.+)/i, # Yahoo Mail (it)
|
178
|
+
/Onderwerp\s?:(.+)/i, # Yahoo Mail (nl)
|
179
|
+
/Assunto\s?:?(.+)/i, # Yahoo Mail (pt, pt-br)
|
180
|
+
/Temat\s?:(.+)/i, # Yahoo Mail (pl)
|
181
|
+
/Subiect\s?:(.+)/i, # Yahoo Mail (ro)
|
182
|
+
/Тема\s?:(.+)/i, # Yahoo Mail (ru, uk)
|
183
|
+
/Predmet\s?:(.+)/i, # Yahoo Mail (sk)
|
184
|
+
/Ämne\s?:(.+)/i, # Yahoo Mail (sv)
|
185
|
+
/Konu\s?:(.+)/i # Yahoo Mail (tr)
|
186
|
+
],
|
187
|
+
|
188
|
+
original_from: [
|
189
|
+
/^(\*?\s*From\s?:\*?(.+))$/, # Apple Mail (en), Outlook Live / 365 (all locales), New Outlook 2019 (en), Thunderbird (da, en), Missive (en), HubSpot (en)
|
190
|
+
/^(\s*Od\s?:(.+))$/, # Apple Mail (cs, pl, sk), Gmail (cs, pl, sk), New Outlook 2019 (cs, pl, sk), Thunderbird (cs, sk), HubSpot (pl)
|
191
|
+
/^(\s*Fra\s?:(.+))$/, # Apple Mail (da, no), Gmail (da, no), New Outlook 2019 (da), Thunderbird (no)
|
192
|
+
/^(\s*Von\s?:(.+))$/, # Apple Mail (de), Gmail (de), New Outlook 2019 (de), Thunderbird (de), HubSpot (de)
|
193
|
+
/^(\s*De\s?:(.+))$/, # Apple Mail (es, fr, pt, pt-br), Gmail (es, fr, pt, pt-br), New Outlook 2019 (es, fr, pt, pt-br), Thunderbird (fr, pt, pt-br), HubSpot (es, fr, pt-br)
|
194
|
+
/^(\s*Lähettäjä\s?:(.+))$/, # Apple Mail (fi), Gmail (fi), New Outlook 2019 (fi), Thunderbird (fi), HubSpot (fi)
|
195
|
+
/^(\s*Šalje\s?:(.+))$/, # Apple Mail (hr), Gmail (hr), Thunderbird (hr)
|
196
|
+
/^(\s*Feladó\s?:(.+))$/, # Apple Mail (hu), Gmail (hu), New Outlook 2019 (fr), Thunderbird (hu)
|
197
|
+
/^(\s*Da\s?:(.+))$/, # Apple Mail (it), Gmail (it), New Outlook 2019 (it), HubSpot (it)
|
198
|
+
/^(\s*Van\s?:(.+))$/, # Apple Mail (nl), Gmail (nl), New Outlook 2019 (nl), Thunderbird (nl), HubSpot (nl)
|
199
|
+
/^(\s*Expeditorul\s?:(.+))$/, # Apple Mail (ro)
|
200
|
+
/^(\s*Отправитель\s?:(.+))$/, # Apple Mail (ru)
|
201
|
+
/^(\s*Från\s?:(.+))$/, # Apple Mail (sv), Gmail (sv), New Outlook 2019 (sv), Thunderbird (sv), HubSpot (sv)
|
202
|
+
/^(\s*Kimden\s?:(.+))$/, # Apple Mail (tr), Thunderbird (tr)
|
203
|
+
/^(\s*Від кого\s?:(.+))$/, # Apple Mail (uk)
|
204
|
+
/^(\s*Saatja\s?:(.+))$/, # Gmail (et)
|
205
|
+
/^(\s*De la\s?:(.+))$/, # Gmail (ro)
|
206
|
+
/^(\s*Gönderen\s?:(.+))$/, # Gmail (tr)
|
207
|
+
/^(\s*От\s?:(.+))$/, # Gmail (ru), New Outlook 2019 (ru), Thunderbird (ru)
|
208
|
+
/^(\s*Від\s?:(.+))$/, # Gmail (uk), Thunderbird (uk)
|
209
|
+
/^(\s*Mittente\s?:(.+))$/, # Thunderbird (it)
|
210
|
+
/^(\s*Nadawca\s?:(.+))$/, # Thunderbird (pl)
|
211
|
+
/^(\s*de la\s?:(.+))$/, # Thunderbird (ro)
|
212
|
+
/^(\s*送信元:(.+))$/ # HubSpot (ja)
|
213
|
+
],
|
214
|
+
|
215
|
+
original_from_lax: [
|
216
|
+
/(\s*From\s?:(.+?)\s?\n?\s*[\[|<](.+?)[\]|>])/, # Yahoo Mail (en)
|
217
|
+
/(\s*Od\s?:(.+?)\s?\n?\s*[\[|<](.+?)[\]|>])/, # Yahoo Mail (cs, pl, sk)
|
218
|
+
/(\s*Fra\s?:(.+?)\s?\n?\s*[\[|<](.+?)[\]|>])/, # Yahoo Mail (da, no)
|
219
|
+
/(\s*Von\s?:(.+?)\s?\n?\s*[\[|<](.+?)[\]|>])/, # Yahoo Mail (de)
|
220
|
+
/(\s*De\s?:(.+?)\s?\n?\s*[\[|<](.+?)[\]|>])/, # Yahoo Mail (es, fr, pt, pt-br)
|
221
|
+
/(\s*Lähettäjä\s?:(.+?)\s?\n?\s*[\[|<](.+?)[\]|>])/, # Yahoo Mail (fi)
|
222
|
+
/(\s*Feladó\s?:(.+?)\s?\n?\s*[\[|<](.+?)[\]|>])/, # Yahoo Mail (hu)
|
223
|
+
/(\s*Da\s?:(.+?)\s?\n?\s*[\[|<](.+?)[\]|>])/, # Yahoo Mail (it)
|
224
|
+
/(\s*Van\s?:(.+?)\s?\n?\s*[\[|<](.+?)[\]|>])/, # Yahoo Mail (nl)
|
225
|
+
/(\s*De la\s?:(.+?)\s?\n?\s*[\[|<](.+?)[\]|>])/, # Yahoo Mail (ro)
|
226
|
+
/(\s*От\s?:(.+?)\s?\n?\s*[\[|<](.+?)[\]|>])/, # Yahoo Mail (ru)
|
227
|
+
/(\s*Från\s?:(.+?)\s?\n?\s*[\[|<](.+?)[\]|>])/, # Yahoo Mail (sv)
|
228
|
+
/(\s*Kimden\s?:(.+?)\s?\n?\s*[\[|<](.+?)[\]|>])/, # Yahoo Mail (tr)
|
229
|
+
/(\s*Від\s?:(.+?)\s?\n?\s*[\[|<](.+?)[\]|>])/ # Yahoo Mail (uk)
|
230
|
+
],
|
231
|
+
|
232
|
+
original_to: [
|
233
|
+
/^\*?\s*To\s?:\*?(.+)$/, # Apple Mail (en), Gmail (all locales), Outlook Live / 365 (all locales), Thunderbird (da, en), Missive (en), HubSpot (en)
|
234
|
+
/^\s*Komu\s?:(.+)$/, # Apple Mail (cs), New Outlook 2019 (cs, sk), Thunderbird (cs)
|
235
|
+
/^\s*Til\s?:(.+)$/, # Apple Mail (da, no), New Outlook 2019 (da), Thunderbird (no)
|
236
|
+
/^\s*An\s?:(.+)$/, # Apple Mail (de), New Outlook 2019 (de), Thunderbird (de), HubSpot (de)
|
237
|
+
/^\s*Para\s?:(.+)$/, # Apple Mail (es, pt, pt-br), New Outlook 2019 (es, pt, pt-br), Thunderbird (es, pt, pt-br), HubSpot (pt-br)
|
238
|
+
/^\s*Vastaanottaja\s?:(.+)$/, # Apple Mail (fi), New Outlook 2019 (fi), Thunderbird (fi), HubSpot (fi)
|
239
|
+
/^\s*À\s?:(.+)$/, # Apple Mail (fr), New Outlook 2019 (fr), HubSpot (fr)
|
240
|
+
/^\s*Prima\s?:(.+)$/, # Apple Mail (hr), Thunderbird (hr)
|
241
|
+
/^\s*Címzett\s?:(.+)$/, # Apple Mail (hu), New Outlook 2019 (hu), Thunderbird (hu)
|
242
|
+
/^\s*A\s?:(.+)$/, # Apple Mail (it), New Outlook 2019 (it), Thunderbird (it), HubSpot (es, it)
|
243
|
+
/^\s*Aan\s?:(.+)$/, # Apple Mail (nl), New Outlook 2019 (nl), Thunderbird (nl), HubSpot (nl)
|
244
|
+
/^\s*Do\s?:(.+)$/, # Apple Mail (pl), New Outlook 2019 (pl), HubSpot (pl)
|
245
|
+
/^\s*Destinatarul\s?:(.+)$/, # Apple Mail (ro)
|
246
|
+
/^\s*Кому\s?:(.+)$/, # Apple Mail (ru, uk), New Outlook 2019 (ru), Thunderbird (ru, uk)
|
247
|
+
/^\s*Pre\s?:(.+)$/, # Apple Mail (sk), Thunderbird (sk)
|
248
|
+
/^\s*Till\s?:(.+)$/, # Apple Mail (sv), New Outlook 2019 (sv), Thunderbird (sv)
|
249
|
+
/^\s*Kime\s?:(.+)$/, # Apple Mail (tr), Thunderbird (tr)
|
250
|
+
/^\s*Pour\s?:(.+)$/, # Thunderbird (fr)
|
251
|
+
/^\s*Adresat\s?:(.+)$/, # Thunderbird (pl)
|
252
|
+
/^\s*送信先:(.+)$/ # HubSpot (ja)
|
253
|
+
],
|
254
|
+
|
255
|
+
original_to_lax: [
|
256
|
+
/\s*To\s?:(.+)$/, # Yahook Mail (en)
|
257
|
+
/\s*Komu\s?:(.+)$/, # Yahook Mail (cs, sk)
|
258
|
+
/\s*Til\s?:(.+)$/, # Yahook Mail (da, no, sv)
|
259
|
+
/\s*An\s?:(.+)$/, # Yahook Mail (de)
|
260
|
+
/\s*Para\s?:(.+)$/, # Yahook Mail (es, pt, pt-br)
|
261
|
+
/\s*Vastaanottaja\s?:(.+)$/, # Yahook Mail (fi)
|
262
|
+
/\s*À\s?:(.+)$/, # Yahook Mail (fr)
|
263
|
+
/\s*Címzett\s?:(.+)$/, # Yahook Mail (hu)
|
264
|
+
/\s*A\s?:(.+)$/, # Yahook Mail (it)
|
265
|
+
/\s*Aan\s?:(.+)$/, # Yahook Mail (nl)
|
266
|
+
/\s*Do\s?:(.+)$/, # Yahook Mail (pl)
|
267
|
+
/\s*Către\s?:(.+)$/, # Yahook Mail (ro), Thunderbird (ro)
|
268
|
+
/\s*Кому\s?:(.+)$/, # Yahook Mail (ru, uk)
|
269
|
+
/\s*Till\s?:(.+)$/, # Yahook Mail (sv)
|
270
|
+
/\s*Kime\s?:(.+)$/ # Yahook Mail (tr)
|
271
|
+
],
|
272
|
+
|
273
|
+
original_reply_to: [
|
274
|
+
/^\s*Reply-To\s?:(.+)$/, # Apple Mail (en)
|
275
|
+
/^\s*Odgovori na\s?:(.+)$/, # Apple Mail (hr)
|
276
|
+
/^\s*Odpověď na\s?:(.+)$/, # Apple Mail (cs)
|
277
|
+
/^\s*Svar til\s?:(.+)$/, # Apple Mail (da)
|
278
|
+
/^\s*Antwoord aan\s?:(.+)$/, # Apple Mail (nl)
|
279
|
+
/^\s*Vastaus\s?:(.+)$/, # Apple Mail (fi)
|
280
|
+
/^\s*Répondre à\s?:(.+)$/, # Apple Mail (fr)
|
281
|
+
/^\s*Antwort an\s?:(.+)$/, # Apple Mail (de)
|
282
|
+
/^\s*Válaszcím\s?:(.+)$/, # Apple Mail (hu)
|
283
|
+
/^\s*Rispondi a\s?:(.+)$/, # Apple Mail (it)
|
284
|
+
/^\s*Svar til\s?:(.+)$/, # Apple Mail (no)
|
285
|
+
/^\s*Odpowiedź-do\s?:(.+)$/, # Apple Mail (pl)
|
286
|
+
/^\s*Responder A\s?:(.+)$/, # Apple Mail (pt)
|
287
|
+
/^\s*Responder a\s?:(.+)$/, # Apple Mail (pt-br, es)
|
288
|
+
/^\s*Răspuns către\s?:(.+)$/, # Apple Mail (ro)
|
289
|
+
/^\s*Ответ-Кому\s?:(.+)$/, # Apple Mail (ru)
|
290
|
+
/^\s*Odpovedať-Pre\s?:(.+)$/, # Apple Mail (sk)
|
291
|
+
/^\s*Svara till\s?:(.+)$/, # Apple Mail (sv)
|
292
|
+
/^\s*Yanıt Adresi\s?:(.+)$/, # Apple Mail (tr)
|
293
|
+
/^\s*Кому відповісти\s?:(.+)$/ # Apple Mail (uk)
|
294
|
+
],
|
295
|
+
|
296
|
+
original_cc: [
|
297
|
+
/^\*?\s*Cc\s?:\*?(.+)$/, # Apple Mail (en, da, es, fr, hr, it, pt, pt-br, ro, sk), Gmail (all locales), Outlook Live / 365 (all locales), New Outlook 2019 (da, de, en, fr, it, pt-br), Missive (en), HubSpot (de, en, es, it, nl, pt-br)
|
298
|
+
/^\s*CC\s?:(.+)$/, # New Outlook 2019 (es, nl, pt), Thunderbird (da, en, es, fi, hr, hu, it, nl, no, pt, pt-br, ro, tr, uk)
|
299
|
+
/^\s*Kopie\s?:(.+)$/, # Apple Mail (cs, de, nl), New Outlook 2019 (cs), Thunderbird (cs)
|
300
|
+
/^\s*Kopio\s?:(.+)$/, # Apple Mail (fi), New Outlook 2019 (fi), HubSpot (fi)
|
301
|
+
/^\s*Másolat\s?:(.+)$/, # Apple Mail (hu)
|
302
|
+
/^\s*Kopi\s?:(.+)$/, # Apple Mail (no)
|
303
|
+
/^\s*Dw\s?:(.+)$/, # Apple Mail (pl)
|
304
|
+
/^\s*Копия\s?:(.+)$/, # Apple Mail (ru), New Outlook 2019 (ru), Thunderbird (ru)
|
305
|
+
/^\s*Kopia\s?:(.+)$/, # Apple Mail (sv), New Outlook 2019 (sv), Thunderbird (pl, sv), HubSpot (sv)
|
306
|
+
/^\s*Bilgi\s?:(.+)$/, # Apple Mail (tr)
|
307
|
+
/^\s*Копія\s?:(.+)$/, # Apple Mail (uk),
|
308
|
+
/^\s*Másolatot kap\s?:(.+)$/, # New Outlook 2019 (hu)
|
309
|
+
/^\s*Kópia\s?:(.+)$/, # New Outlook 2019 (sk), Thunderbird (sk)
|
310
|
+
/^\s*DW\s?:(.+)$/, # New Outlook 2019 (pl), HubSpot (pl)
|
311
|
+
/^\s*Kopie \(CC\)\s?:(.+)$/, # Thunderbird (de)
|
312
|
+
/^\s*Copie à\s?:(.+)$/, # Thunderbird (fr)
|
313
|
+
/^\s*CC:(.+)$/ # HubSpot (ja)
|
314
|
+
],
|
315
|
+
|
316
|
+
original_cc_lax: [
|
317
|
+
/\s*Cc\s?:(.+)$/, # Yahoo Mail (da, en, it, nl, pt, pt-br, ro, tr)
|
318
|
+
/\s*CC\s?:(.+)$/, # Yahoo Mail (de, es)
|
319
|
+
/\s*Kopie\s?:(.+)$/, # Yahoo Mail (cs)
|
320
|
+
/\s*Kopio\s?:(.+)$/, # Yahoo Mail (fi)
|
321
|
+
/\s*Másolat\s?:(.+)$/, # Yahoo Mail (hu)
|
322
|
+
/\s*Kopi\s?:(.+)$/, # Yahoo Mail (no)
|
323
|
+
/\s*Dw\s?(.+)$/, # Yahoo Mail (pl)
|
324
|
+
/\s*Копия\s?:(.+)$/, # Yahoo Mail (ru)
|
325
|
+
/\s*Kópia\s?:(.+)$/, # Yahoo Mail (sk)
|
326
|
+
/\s*Kopia\s?:(.+)$/, # Yahoo Mail (sv)
|
327
|
+
/\s*Копія\s?:(.+)$/ # Yahoo Mail (uk)
|
328
|
+
],
|
329
|
+
|
330
|
+
original_date: [
|
331
|
+
/^\s*Date\s?:(.+)$/, # Apple Mail (en, fr), Gmail (all locales), New Outlook 2019 (en, fr), Thunderbird (da, en, fr), Missive (en), HubSpot (en, fr)
|
332
|
+
/^\s*Datum\s?:(.+)$/, # Apple Mail (cs, de, hr, nl, sv), New Outlook 2019 (cs, de, nl, sv), Thunderbird (cs, de, hr, nl, sv), HubSpot (de, nl, sv)
|
333
|
+
/^\s*Dato\s?:(.+)$/, # Apple Mail (da, no), New Outlook 2019 (da), Thunderbird (no)
|
334
|
+
/^\s*Envoyé\s?:(.+)$/, # New Outlook 2019 (fr)
|
335
|
+
/^\s*Fecha\s?:(.+)$/, # Apple Mail (es), New Outlook 2019 (es), Thunderbird (es), HubSpot (es)
|
336
|
+
/^\s*Päivämäärä\s?:(.+)$/, # Apple Mail (fi), New Outlook 2019 (fi), HubSpot (fi)
|
337
|
+
/^\s*Dátum\s?:(.+)$/, # Apple Mail (hu, sk), New Outlook 2019 (sk), Thunderbird (hu, sk)
|
338
|
+
/^\s*Data\s?:(.+)$/, # Apple Mail (it, pl, pt, pt-br), New Outlook 2019 (it, pl, pt, pt-br), Thunderbird (it, pl, pt, pt-br), HubSpot (it, pl, pt-br)
|
339
|
+
/^\s*Dată\s?:(.+)$/, # Apple Mail (ro), Thunderbird (ro)
|
340
|
+
/^\s*Дата\s?:(.+)$/, # Apple Mail (ru, uk), New Outlook 2019 (ru), Thunderbird (ru, uk)
|
341
|
+
/^\s*Tarih\s?:(.+)$/, # Apple Mail (tr), Thunderbird (tr)
|
342
|
+
/^\*?\s*Sent\s?:\*?(.+)$/, # Outlook Live / 365 (all locales)
|
343
|
+
/^\s*Päiväys\s?:(.+)$/, # Thunderbird (fi)
|
344
|
+
/^\s*日付:(.+)$/ # HubSpot (ja)
|
345
|
+
],
|
346
|
+
|
347
|
+
original_date_lax: [
|
348
|
+
/\s*Datum\s?:(.+)$/, # Yahoo Mail (cs)
|
349
|
+
/\s*Sendt\s?:(.+)$/, # Yahoo Mail (da, no)
|
350
|
+
/\s*Gesendet\s?:(.+)$/, # Yahoo Mail (de)
|
351
|
+
/\s*Sent\s?:(.+)$/, # Yahoo Mail (en)
|
352
|
+
/\s*Enviado\s?:(.+)$/, # Yahoo Mail (es, pt, pt-br)
|
353
|
+
/\s*Envoyé\s?:(.+)$/, # Yahoo Mail (fr)
|
354
|
+
/\s*Lähetetty\s?:(.+)$/, # Yahoo Mail (fi)
|
355
|
+
/\s*Elküldve\s?:(.+)$/, # Yahoo Mail (hu)
|
356
|
+
/\s*Inviato\s?:(.+)$/, # Yahoo Mail (it)
|
357
|
+
/\s*Verzonden\s?:(.+)$/, # Yahoo Mail (it)
|
358
|
+
/\s*Wysłano\s?:(.+)$/, # Yahoo Mail (pl)
|
359
|
+
/\s*Trimis\s?:(.+)$/, # Yahoo Mail (ro)
|
360
|
+
/\s*Отправлено\s?:(.+)$/, # Yahoo Mail (ru)
|
361
|
+
/\s*Odoslané\s?:(.+)$/, # Yahoo Mail (sk)
|
362
|
+
/\s*Skickat\s?:(.+)$/, # Yahoo Mail (sv)
|
363
|
+
/\s*Gönderilen\s?:(.+)$/, # Yahoo Mail (tr)
|
364
|
+
/\s*Відправлено\s?:(.+)$/ # Yahoo Mail (uk)
|
365
|
+
],
|
366
|
+
|
367
|
+
mailbox: [
|
368
|
+
/^\s?\n?\s*<.+?<mailto:(.+?)>>/, # "<walter.sheltan@acme.com<mailto:walter.sheltan@acme.com>>"
|
369
|
+
/^(.+?)\s?\n?\s*<.+?<mailto:(.+?)>>/, # "Walter Sheltan <walter.sheltan@acme.com<mailto:walter.sheltan@acme.com>>"
|
370
|
+
/^(.+?)\s?\n?\s*[\[|<]mailto:(.+?)[\]|>]/, # "Walter Sheltan <mailto:walter.sheltan@acme.com>" or "Walter Sheltan [mailto:walter.sheltan@acme.com]" or "walter.sheltan@acme.com <mailto:walter.sheltan@acme.com>"
|
371
|
+
/^'(.+?)'\s?\n?\s*[\[|<](.+?)[\]|>]/, # "'Walter Sheltan' <walter.sheltan@acme.com>" or "'Walter Sheltan' [walter.sheltan@acme.com]" or "'walter.sheltan@acme.com' <walter.sheltan@acme.com>"
|
372
|
+
/^"'(.+?)'"\s?\n?\s*[\[|<](.+?)[\]|>]/, # ""'Walter Sheltan'" <walter.sheltan@acme.com>" or ""'Walter Sheltan'" [walter.sheltan@acme.com]" or ""'walter.sheltan@acme.com'" <walter.sheltan@acme.com>"
|
373
|
+
/^"(.+?)"\s?\n?\s*[\[|<](.+?)[\]|>]/, # ""Walter Sheltan" <walter.sheltan@acme.com>" or ""Walter Sheltan" [walter.sheltan@acme.com]" or ""walter.sheltan@acme.com" <walter.sheltan@acme.com>"
|
374
|
+
/^([^,;]+?)\s?\n?\s*[\[|<](.+?)[\]|>]/, # "Walter Sheltan <walter.sheltan@acme.com>" or "Walter Sheltan [walter.sheltan@acme.com]" or "walter.sheltan@acme.com <walter.sheltan@acme.com>"
|
375
|
+
/^(.?)\s?\n?\s*[\[|<](.+?)[\]|>]/, # "<walter.sheltan@acme.com>"
|
376
|
+
/^([^\s@]+@[^\s@]+\.[^\s@,]+)/, # "walter.sheltan@acme.com"
|
377
|
+
/^([^;].+?)\s?\n?\s*[\[|<](.+?)[\]|>]/ # "Walter, Sheltan <walter.sheltan@acme.com>" or "Walter, Sheltan [walter.sheltan@acme.com]"
|
378
|
+
],
|
379
|
+
|
380
|
+
mailbox_address: [
|
381
|
+
/^(([^\s@]+)@([^\s@]+)\.([^\s@]+))$/
|
382
|
+
]
|
383
|
+
}.freeze
|
384
|
+
|
385
|
+
def initialize
|
386
|
+
@regexes = load_regexes
|
387
|
+
end
|
388
|
+
|
389
|
+
def parse_subject(subject)
|
390
|
+
match = Utils.loop_regexes(@regexes[:subject], subject)
|
391
|
+
|
392
|
+
if match && match.length > 1
|
393
|
+
# Notice: return an empty string if the detected subject is empty
|
394
|
+
# (e.g. 'Fwd: ')
|
395
|
+
return Utils.trim_string(match[1]).to_s
|
396
|
+
end
|
397
|
+
|
398
|
+
nil
|
399
|
+
end
|
400
|
+
|
401
|
+
def parse_body(body, forwarded = false)
|
402
|
+
# Replace carriage return with regular line break
|
403
|
+
body = body.gsub(@regexes[:carriage_return], "\n")
|
404
|
+
|
405
|
+
# Remove Byte Order Mark
|
406
|
+
body.gsub!(@regexes[:byte_order_mark], "")
|
407
|
+
|
408
|
+
# Remove trailing Non-breaking space
|
409
|
+
body.gsub!(@regexes[:trailing_non_breaking_space], "")
|
410
|
+
|
411
|
+
# Replace Non-breaking space with regular space
|
412
|
+
body.gsub!(@regexes[:non_breaking_space], " ")
|
413
|
+
|
414
|
+
# First method: split via the separator (Apple Mail, Gmail,
|
415
|
+
# Outlook Live / 365, Outlook 2019, Yahoo Mail, Thunderbird)
|
416
|
+
# Notice: use 'line' regex that will capture the line itself, as we may
|
417
|
+
# need it to build the original email back (in case of nested emails)
|
418
|
+
match = Utils.loop_regexes(@regexes[:separator_line], body, "split")
|
419
|
+
|
420
|
+
if match && match.length > 2
|
421
|
+
# The `split` operation creates a match with 3 substrings:
|
422
|
+
# * 0: anything before the line with the separator (i.e. the message)
|
423
|
+
# * 1: the line with the separator
|
424
|
+
# * 2: anything after the line with the separator (i.e. the body of
|
425
|
+
# the original email)
|
426
|
+
# Notice: in case of nested emails, there may be several matches
|
427
|
+
# against 'separator_line'. In that case, the `split` operation
|
428
|
+
# creates a match with (n x 3) substrings. We need to reconciliate
|
429
|
+
# those substrings.
|
430
|
+
email = Utils.reconciliate_split_match(
|
431
|
+
match,
|
432
|
+
3, # min_substrings
|
433
|
+
[2] # default_substrings (By default, attach anything after the line with the separator)
|
434
|
+
)
|
435
|
+
|
436
|
+
return {
|
437
|
+
body: body,
|
438
|
+
message: Utils.trim_string(match[0]),
|
439
|
+
email: Utils.trim_string(email)
|
440
|
+
}
|
441
|
+
end
|
442
|
+
|
443
|
+
# Attempt second method?
|
444
|
+
# Notice: as this second method is more uncertain (we split via the From
|
445
|
+
# part, without further verification), we have to be sure we can
|
446
|
+
# attempt it. The `forwarded` boolean gives the confirmation that the
|
447
|
+
# email was indeed forwarded (detected from the Subject part)
|
448
|
+
if forwarded == true
|
449
|
+
# Second method: split via the From part (New Outlook 2019,
|
450
|
+
# Outlook Live / 365)
|
451
|
+
match = Utils.loop_regexes(@regexes[:original_from], body, "split")
|
452
|
+
|
453
|
+
if match && match.length > 3
|
454
|
+
# The `split` operation creates a match with 4 substrings:
|
455
|
+
# * 0: anything before the line with the From part (i.e. the
|
456
|
+
# message before the original email)
|
457
|
+
# * 1: the line with the From part (in the original email)
|
458
|
+
# * 2: the From part itself
|
459
|
+
# * 3: anything after the line with the From part (i.e.
|
460
|
+
# the rest of the original email)
|
461
|
+
# Notice: in case of nested emails, there may be several matches
|
462
|
+
# against 'original_from'. In that case, the `split` operation
|
463
|
+
# creates a match with (n x 4) substrings. We need to reconciliate
|
464
|
+
# those substrings.
|
465
|
+
email = Utils.reconciliate_split_match(
|
466
|
+
match,
|
467
|
+
4, # min_substrings
|
468
|
+
[1, 3], # default_substrings (By default, attach the line that contains the From part back to the rest of the original email (exclude the From part itself))
|
469
|
+
lambda { |i|
|
470
|
+
i % 3 == 2
|
471
|
+
} # fn_exclude (When reconciliating other substrings, we want to exclude the From part itself)
|
472
|
+
)
|
473
|
+
|
474
|
+
return {
|
475
|
+
body: body,
|
476
|
+
message: Utils.trim_string(match[0]),
|
477
|
+
email: Utils.trim_string(email)
|
478
|
+
}
|
479
|
+
end
|
480
|
+
end
|
481
|
+
|
482
|
+
{}
|
483
|
+
end
|
484
|
+
|
485
|
+
def parse_original_email(text, body)
|
486
|
+
# Remove Byte Order Mark
|
487
|
+
text = text.gsub(@regexes[:byte_order_mark], "")
|
488
|
+
|
489
|
+
# Remove ">" at the beginning of each line, while keeping line breaks
|
490
|
+
text = text.gsub(@regexes[:quote_line_break], "")
|
491
|
+
|
492
|
+
# Remove ">" at the beginning of other lines
|
493
|
+
text = text.gsub(@regexes[:quote], "")
|
494
|
+
|
495
|
+
# Remove " " at the beginning of lines
|
496
|
+
text = text.gsub(@regexes[:four_spaces], "")
|
497
|
+
|
498
|
+
{
|
499
|
+
body: parse_original_body(text),
|
500
|
+
from: parse_original_from(text, body),
|
501
|
+
to: parse_original_to(text),
|
502
|
+
cc: parse_original_cc(text),
|
503
|
+
subject: parse_original_subject(text),
|
504
|
+
date: parse_original_date(text, body)
|
505
|
+
}
|
506
|
+
end
|
507
|
+
|
508
|
+
private
|
509
|
+
|
510
|
+
def load_regexes
|
511
|
+
@regexes = {}
|
512
|
+
REGEXES.each do |key, entry|
|
513
|
+
key_line = "#{key}_line".to_sym
|
514
|
+
if entry.is_a?(Array)
|
515
|
+
@regexes[key] = []
|
516
|
+
@regexes[key_line] = []
|
517
|
+
|
518
|
+
entry.each do |regex|
|
519
|
+
# Build 'line' alternative?
|
520
|
+
if LINE_REGEXES.include?(key)
|
521
|
+
regex_line = build_line_regex(regex)
|
522
|
+
@regexes[key_line] << regex_line
|
523
|
+
end
|
524
|
+
|
525
|
+
@regexes[key] << Regexp.new(regex)
|
526
|
+
end
|
527
|
+
else
|
528
|
+
regex = entry
|
529
|
+
|
530
|
+
# Build 'line' alternative?
|
531
|
+
if LINE_REGEXES.include?(key)
|
532
|
+
regex_line = build_line_regex(regex)
|
533
|
+
@regexes[key_line] = regex_line
|
534
|
+
end
|
535
|
+
|
536
|
+
@regexes[key] = Regexp.new(regex)
|
537
|
+
end
|
538
|
+
end
|
539
|
+
@regexes
|
540
|
+
end
|
541
|
+
|
542
|
+
# Builds 'line' alternative regex
|
543
|
+
# @param regex [Regexp] The regular expression to build a line regex from
|
544
|
+
# @return [Regexp] The 'line' regex
|
545
|
+
def build_line_regex(regex)
|
546
|
+
# A 'line' regex will capture not only inner groups, but also the line itself
|
547
|
+
# Important: `regex` must be a Regexp object, not a string
|
548
|
+
source = "(#{regex.source})"
|
549
|
+
flags = 0
|
550
|
+
flags |= Regexp::IGNORECASE if regex.options & Regexp::IGNORECASE != 0
|
551
|
+
flags |= Regexp::MULTILINE if regex.options & Regexp::MULTILINE != 0
|
552
|
+
flags |= Regexp::EXTENDED if regex.options & Regexp::EXTENDED != 0
|
553
|
+
|
554
|
+
Regexp.new(source, flags)
|
555
|
+
end
|
556
|
+
|
557
|
+
def parse_original_body(text)
|
558
|
+
match = nil
|
559
|
+
|
560
|
+
# First method: extract the text after the Subject part
|
561
|
+
# (Outlook Live / 365) or after the Cc, To or Reply-To part
|
562
|
+
# (Apple Mail, Gmail) or Date part (MailMate). A new line must be
|
563
|
+
# present.
|
564
|
+
# Notice: use 'line' regexes that will capture not only the Subject, Cc,
|
565
|
+
# To or Reply-To part, but also the line itself, as we may need it
|
566
|
+
# to build the original body back (in case of nested emails)
|
567
|
+
regexes = [
|
568
|
+
@regexes[:original_subject_line],
|
569
|
+
@regexes[:original_cc_line],
|
570
|
+
@regexes[:original_to_line],
|
571
|
+
@regexes[:original_reply_to_line],
|
572
|
+
@regexes[:original_date_line]
|
573
|
+
]
|
574
|
+
|
575
|
+
regexes.each do |regex|
|
576
|
+
match = Utils.loop_regexes(regex, text, "split")
|
577
|
+
|
578
|
+
# A new line must be present between the Cc, To, Reply-To or Subject
|
579
|
+
# part and the actual body
|
580
|
+
next unless match && match.length > 2 && match[3]&.start_with?("\n\n")
|
581
|
+
|
582
|
+
# The `split` operation creates a match with 4 substrings:
|
583
|
+
# * 0: anything before the line with the Subject, Cc, To or Reply-To
|
584
|
+
# part
|
585
|
+
# * 1: the line with the Subject, Cc, To or Reply-To part
|
586
|
+
# * 2: the Subject, Cc, To or Reply-To part itself
|
587
|
+
# * 3: anything after the line with the Subject, Cc, To or Reply-To
|
588
|
+
# part (i.e. the body of the original email)
|
589
|
+
# Notice: in case of nested emails, there may be several matches
|
590
|
+
# against 'original_subject_line', 'original_cc_line',
|
591
|
+
# 'original_to_line' or 'original_reply_to_line'. In that case, the
|
592
|
+
# `split` operation creates a match with (n x 4) substrings. We
|
593
|
+
# need to reconciliate those substrings.
|
594
|
+
body = Utils.reconciliate_split_match(
|
595
|
+
match,
|
596
|
+
4,
|
597
|
+
[3],
|
598
|
+
->(i) { i % 3 == 2 }
|
599
|
+
)
|
600
|
+
|
601
|
+
return Utils.trim_string(body)
|
602
|
+
end
|
603
|
+
|
604
|
+
# Second method: extract the text after the Subject part
|
605
|
+
# (New Outlook 2019, Yahoo Mail). No new line must be present.
|
606
|
+
# Notice: use 'line' regexes that will capture not only the Subject part,
|
607
|
+
# but also the line itself, as we may need it to build the original
|
608
|
+
# body back (in case of nested emails)
|
609
|
+
match = Utils.loop_regexes(
|
610
|
+
@regexes[:original_subject_line] + @regexes[:original_subject_lax_line],
|
611
|
+
text,
|
612
|
+
"split"
|
613
|
+
)
|
614
|
+
|
615
|
+
# Do not bother checking for new line between the Subject part and the
|
616
|
+
# actual body (specificity of New Outlook 2019 and Yahoo Mail)
|
617
|
+
if match && match.length > 3
|
618
|
+
# The `split` operation creates a match with 4 substrings:
|
619
|
+
# * 0: anything before the line with the Subject part
|
620
|
+
# * 1: the line with the Subject part (in the original email)
|
621
|
+
# * 2: the Subject part itself
|
622
|
+
# * 3: anything after the line with the Subject part (i.e. the body of
|
623
|
+
# the original email)
|
624
|
+
# Notice: in case of nested emails, there may be several matches
|
625
|
+
# against 'original_subject_line' and 'original_subject_lax_line'. In
|
626
|
+
# that case, the `split` operation creates a match with (n x 4)
|
627
|
+
# substrings. We need to reconciliate those substrings.
|
628
|
+
body = Utils.reconciliate_split_match(
|
629
|
+
match,
|
630
|
+
4,
|
631
|
+
[3],
|
632
|
+
->(i) { i % 3 == 2 }
|
633
|
+
)
|
634
|
+
|
635
|
+
return Utils.trim_string(body)
|
636
|
+
end
|
637
|
+
|
638
|
+
# Third method: return the raw text, as there is no original information
|
639
|
+
# embedded (no Cc, To, Subject, etc.) (Outlook 2019)
|
640
|
+
text
|
641
|
+
end
|
642
|
+
|
643
|
+
# Parses mailboxes(s)
|
644
|
+
# @private
|
645
|
+
# @param regexes [Array<Regexp>] Array of regular expressions to match mailboxes
|
646
|
+
# @param text [String] The text to parse
|
647
|
+
# @param force_array [Boolean] Whether to force the return value to be an array
|
648
|
+
# @return [Array<Hash>, Hash, nil] The parsed mailbox(es) or nil if not found
|
649
|
+
def parse_mailbox(regexes, text, force_array = false)
|
650
|
+
match = Utils.loop_regexes(regexes, text)
|
651
|
+
if match&.length&.positive?
|
652
|
+
mailboxes_line = Utils.trim_string(match[-1])
|
653
|
+
|
654
|
+
if mailboxes_line
|
655
|
+
mailboxes = []
|
656
|
+
|
657
|
+
while mailboxes_line
|
658
|
+
mailbox_match = Utils.loop_regexes(@regexes[:mailbox], mailboxes_line)
|
659
|
+
|
660
|
+
# Address and / or name available?
|
661
|
+
if mailbox_match&.length&.positive?
|
662
|
+
address = nil
|
663
|
+
name = nil
|
664
|
+
|
665
|
+
# Address and name available?
|
666
|
+
if mailbox_match.length == 3
|
667
|
+
address = mailbox_match[2]
|
668
|
+
name = mailbox_match[1]
|
669
|
+
else
|
670
|
+
address = mailbox_match[1]
|
671
|
+
end
|
672
|
+
|
673
|
+
mailboxes << prepare_mailbox(address, name)
|
674
|
+
|
675
|
+
# Remove matched mailbox from mailboxes line
|
676
|
+
mailboxes_line = Utils.trim_string(
|
677
|
+
mailboxes_line.sub(mailbox_match[0], "")
|
678
|
+
)
|
679
|
+
|
680
|
+
if mailboxes_line
|
681
|
+
# Remove leading mailboxes separator
|
682
|
+
MAILBOXES_SEPARATORS.each do |separator|
|
683
|
+
if mailboxes_line[0] == separator
|
684
|
+
mailboxes_line = Utils.trim_string(mailboxes_line[1..])
|
685
|
+
break
|
686
|
+
end
|
687
|
+
end
|
688
|
+
end
|
689
|
+
else
|
690
|
+
mailboxes << prepare_mailbox(mailboxes_line, nil)
|
691
|
+
|
692
|
+
# No more matches
|
693
|
+
mailboxes_line = nil
|
694
|
+
end
|
695
|
+
end
|
696
|
+
|
697
|
+
# Return multiple mailboxes
|
698
|
+
return mailboxes if mailboxes.length > 1
|
699
|
+
|
700
|
+
# Return single mailbox
|
701
|
+
return force_array ? mailboxes : mailboxes[0]
|
702
|
+
end
|
703
|
+
end
|
704
|
+
|
705
|
+
# No mailbox found
|
706
|
+
force_array ? [] : nil
|
707
|
+
end
|
708
|
+
|
709
|
+
# Parses the author (From)
|
710
|
+
# @private
|
711
|
+
# @param text [String]
|
712
|
+
# @param body [String]
|
713
|
+
# @return [Hash] The parsed author
|
714
|
+
def parse_original_from(text, body)
|
715
|
+
address = nil
|
716
|
+
name = nil
|
717
|
+
|
718
|
+
# First method: extract the author via the From part (Apple Mail, Gmail,
|
719
|
+
# Outlook Live / 365, New Outlook 2019, Thunderbird)
|
720
|
+
author = parse_mailbox(@regexes[:original_from], text)
|
721
|
+
|
722
|
+
# Author found?
|
723
|
+
return author if author.is_a?(Hash) && (author&.dig(:address) || author&.dig(:name))
|
724
|
+
|
725
|
+
# Multiple authors found?
|
726
|
+
return author.first if author.is_a?(Array) && (author[0][:address] || author[0][:name])
|
727
|
+
|
728
|
+
# Second method: extract the author via the separator (Outlook 2019)
|
729
|
+
match = Utils.loop_regexes(@regexes[:separator_with_information], body)
|
730
|
+
|
731
|
+
if match && match.length == 4 && match.is_a?(MatchData)
|
732
|
+
# Notice: the order of parts may change depending on the localization,
|
733
|
+
# hence the use of named captures
|
734
|
+
address = match[:from_address]
|
735
|
+
name = match[:from_name]
|
736
|
+
|
737
|
+
return prepare_mailbox(address, name)
|
738
|
+
end
|
739
|
+
|
740
|
+
# Third method: extract the author via the From part, using lax regexes
|
741
|
+
# (Yahoo Mail)
|
742
|
+
match = Utils.loop_regexes(@regexes[:original_from_lax], text)
|
743
|
+
|
744
|
+
if match && match.length > 1
|
745
|
+
address = match[3]
|
746
|
+
name = match[2]
|
747
|
+
|
748
|
+
return prepare_mailbox(address, name)
|
749
|
+
end
|
750
|
+
|
751
|
+
prepare_mailbox(address, name)
|
752
|
+
end
|
753
|
+
|
754
|
+
# Parses the subject part
|
755
|
+
# @private
|
756
|
+
# @param text [String]
|
757
|
+
# @return [String, nil] The parsed subject or nil if not found
|
758
|
+
def parse_original_subject(text)
|
759
|
+
# First method: extract the subject via the Subject part (Apple Mail,
|
760
|
+
# Gmail, Outlook Live / 365, New Outlook 2019, Thunderbird)
|
761
|
+
match = Utils.loop_regexes(@regexes[:original_subject], text)
|
762
|
+
|
763
|
+
return Utils.trim_string(match[1]) if match&.length&.positive?
|
764
|
+
|
765
|
+
# Second method: extract the subject via the Subject part, using lax
|
766
|
+
# regexes (Yahoo Mail)
|
767
|
+
match = Utils.loop_regexes(@regexes[:original_subject_lax], text)
|
768
|
+
|
769
|
+
return Utils.trim_string(match[1]) if match&.length&.positive?
|
770
|
+
|
771
|
+
nil
|
772
|
+
end
|
773
|
+
|
774
|
+
# Parses the primary recipient(s) (To)
|
775
|
+
# @private
|
776
|
+
# @param text [String]
|
777
|
+
# @return [Array<Hash>] The parsed primary recipient(s)
|
778
|
+
def parse_original_to(text)
|
779
|
+
# First method: extract the primary recipient(s) via the To part
|
780
|
+
# (Apple Mail, Gmail, Outlook Live / 365, New Outlook 2019, Thunderbird)
|
781
|
+
recipients = parse_mailbox(
|
782
|
+
@regexes[:original_to],
|
783
|
+
text,
|
784
|
+
force_array: true
|
785
|
+
)
|
786
|
+
|
787
|
+
# Recipient(s) found?
|
788
|
+
return recipients if recipients.is_a?(Array) && recipients.any?
|
789
|
+
|
790
|
+
# Second method: the Subject, Date and Cc parts are stuck to the To part,
|
791
|
+
# remove them before attempting a new extract, using lax regexes
|
792
|
+
# (Yahoo Mail)
|
793
|
+
clean_text = Utils.loop_regexes(
|
794
|
+
@regexes[:original_subject_lax],
|
795
|
+
text,
|
796
|
+
"replace"
|
797
|
+
)
|
798
|
+
|
799
|
+
clean_text = Utils.loop_regexes(
|
800
|
+
@regexes[:original_date_lax],
|
801
|
+
clean_text,
|
802
|
+
"replace"
|
803
|
+
)
|
804
|
+
|
805
|
+
clean_text = Utils.loop_regexes(
|
806
|
+
@regexes[:original_cc_lax],
|
807
|
+
clean_text,
|
808
|
+
"replace"
|
809
|
+
)
|
810
|
+
|
811
|
+
parse_mailbox(
|
812
|
+
@regexes[:original_to_lax],
|
813
|
+
clean_text,
|
814
|
+
force_array: true
|
815
|
+
)
|
816
|
+
end
|
817
|
+
|
818
|
+
def parse_original_cc(text)
|
819
|
+
# First method: extract the carbon-copy recipient(s) via the Cc part
|
820
|
+
# (Apple Mail, Gmail, Outlook Live / 365, New Outlook 2019, Thunderbird)
|
821
|
+
recipients = parse_mailbox(
|
822
|
+
@regexes[:original_cc],
|
823
|
+
text,
|
824
|
+
force_array: true
|
825
|
+
)
|
826
|
+
|
827
|
+
# Recipient(s) found?
|
828
|
+
return recipients if recipients.is_a?(Array) && recipients.any?
|
829
|
+
|
830
|
+
# Second method: the Subject and Date parts are stuck to the To part,
|
831
|
+
# remove them before attempting a new extract, using lax regexes
|
832
|
+
# (Yahoo Mail)
|
833
|
+
clean_text = Utils.loop_regexes(
|
834
|
+
@regexes[:original_subject_lax],
|
835
|
+
text,
|
836
|
+
"replace"
|
837
|
+
)
|
838
|
+
|
839
|
+
clean_text = Utils.loop_regexes(
|
840
|
+
@regexes[:original_date_lax],
|
841
|
+
clean_text,
|
842
|
+
"replace"
|
843
|
+
)
|
844
|
+
|
845
|
+
parse_mailbox(
|
846
|
+
@regexes[:original_cc_lax],
|
847
|
+
clean_text,
|
848
|
+
force_array: true
|
849
|
+
)
|
850
|
+
end
|
851
|
+
|
852
|
+
def parse_original_date(text, body)
|
853
|
+
# First method: extract the date via the Date part (Apple Mail, Gmail,
|
854
|
+
# Outlook Live / 365, New Outlook 2019, Thunderbird)
|
855
|
+
match = Utils.loop_regexes(@regexes[:original_date], text)
|
856
|
+
|
857
|
+
return Utils.trim_string(match[1]) if match&.length&.positive?
|
858
|
+
|
859
|
+
# Second method: extract the date via the separator (Outlook 2019)
|
860
|
+
match = Utils.loop_regexes(@regexes[:separator_with_information], body)
|
861
|
+
|
862
|
+
if match && match.length == 4 && match.is_a?(MatchData)
|
863
|
+
# Notice: the order of parts may change depending on the localization,
|
864
|
+
# hence the use of named captures
|
865
|
+
return Utils.trim_string(match[:date])
|
866
|
+
end
|
867
|
+
|
868
|
+
# Third method: the Subject part is stuck to the Date part, remove it
|
869
|
+
# before attempting a new extract, using lax regexes (Yahoo Mail)
|
870
|
+
clean_text = Utils.loop_regexes(
|
871
|
+
@regexes[:original_subject_lax],
|
872
|
+
text,
|
873
|
+
"replace"
|
874
|
+
)
|
875
|
+
|
876
|
+
match = Utils.loop_regexes(@regexes[:original_date_lax], clean_text)
|
877
|
+
|
878
|
+
return Utils.trim_string(match[1]) if match&.length&.positive?
|
879
|
+
|
880
|
+
nil
|
881
|
+
end
|
882
|
+
|
883
|
+
# Prepares mailbox
|
884
|
+
# @private
|
885
|
+
# @param address [String]
|
886
|
+
# @param name [String]
|
887
|
+
# @return [Hash] The prepared mailbox
|
888
|
+
def prepare_mailbox(address, name)
|
889
|
+
address = Utils.trim_string(address)
|
890
|
+
name = Utils.trim_string(name)
|
891
|
+
|
892
|
+
# Make sure mailbox address is valid
|
893
|
+
mailbox_address_match = Utils.loop_regexes(
|
894
|
+
@regexes[:mailbox_address],
|
895
|
+
address
|
896
|
+
)
|
897
|
+
|
898
|
+
# Invalid mailbox address? Some clients only include the name
|
899
|
+
if mailbox_address_match.nil?
|
900
|
+
name = address
|
901
|
+
address = nil
|
902
|
+
end
|
903
|
+
|
904
|
+
address = address.empty? || address.empty? ? nil : address
|
905
|
+
name = name.empty? ? nil : name
|
906
|
+
|
907
|
+
{
|
908
|
+
address: (address.nil? || address.empty? ? nil : address),
|
909
|
+
|
910
|
+
# Some clients fill the name with the address
|
911
|
+
# ("bessie.berry@acme.com <bessie.berry@acme.com>")
|
912
|
+
name: address != name ? name : nil
|
913
|
+
}
|
914
|
+
end
|
915
|
+
end
|
916
|
+
end
|