email_reply_trimmer 0.1.6 → 0.1.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/email_reply_trimmer.rb +57 -22
- data/lib/email_reply_trimmer/delimiter_matcher.rb +1 -1
- data/lib/email_reply_trimmer/email_header_matcher.rb +25 -13
- data/lib/email_reply_trimmer/embedded_email_matcher.rb +51 -25
- data/lib/email_reply_trimmer/signature_matcher.rb +16 -9
- data/test/elided/email_headers_5.txt +23 -0
- data/test/elided/embedded_ception.txt +3 -3
- data/test/elided/embedded_email_12.txt +2 -2
- data/test/elided/embedded_email_13.txt +9 -0
- data/test/elided/embedded_email_14.txt +11 -0
- data/test/elided/embedded_email_15.txt +4 -0
- data/test/elided/embedded_email_16.txt +4 -0
- data/test/elided/embedded_email_17.txt +2 -0
- data/test/elided/embedded_email_18.txt +1 -0
- data/test/elided/embedded_email_19.txt +0 -0
- data/test/elided/embedded_email_chinese.txt +4 -0
- data/test/elided/embedded_email_german_4.txt +15 -0
- data/test/elided/embedded_email_german_5.txt +20 -0
- data/test/elided/embedded_email_german_6.txt +8 -0
- data/test/elided/embedded_email_norwegian.txt +9 -0
- data/test/elided/embedded_email_quote_text.txt +5 -0
- data/test/elided/{embedded_email_russian.txt → embedded_email_russian_1.txt} +0 -0
- data/test/elided/embedded_email_russian_2.txt +23 -0
- data/test/elided/embedded_email_swedish.txt +8 -0
- data/test/elided/signatures.txt +2 -0
- data/test/emails/email_headers_5.txt +37 -0
- data/test/emails/embedded_email_1.txt +1 -1
- data/test/emails/embedded_email_13.txt +14 -0
- data/test/emails/embedded_email_14.txt +16 -0
- data/test/emails/embedded_email_15.txt +9 -0
- data/test/emails/embedded_email_16.txt +16 -0
- data/test/emails/embedded_email_17.txt +38 -0
- data/test/emails/embedded_email_18.txt +7 -0
- data/test/emails/embedded_email_19.txt +13 -0
- data/test/emails/embedded_email_4.txt +13 -13
- data/test/emails/embedded_email_7.txt +4 -4
- data/test/emails/embedded_email_chinese.txt +7 -0
- data/test/emails/embedded_email_german_4.txt +18 -0
- data/test/emails/embedded_email_german_5.txt +23 -0
- data/test/emails/embedded_email_german_6.txt +14 -0
- data/test/emails/embedded_email_norwegian.txt +11 -0
- data/test/emails/{embedded_email_russian.txt → embedded_email_russian_1.txt} +0 -0
- data/test/emails/embedded_email_russian_2.txt +26 -0
- data/test/emails/embedded_email_swedish.txt +20 -0
- data/test/emails/signatures.txt +2 -0
- data/test/test_email_reply_trimmer.rb +2 -2
- data/test/trimmed/email_headers_5.txt +11 -0
- data/test/trimmed/embedded_email_13.txt +3 -0
- data/test/trimmed/embedded_email_14.txt +3 -0
- data/test/trimmed/embedded_email_15.txt +3 -0
- data/test/trimmed/embedded_email_16.txt +11 -0
- data/test/trimmed/embedded_email_17.txt +35 -0
- data/test/trimmed/embedded_email_18.txt +5 -0
- data/test/trimmed/embedded_email_19.txt +13 -0
- data/test/trimmed/embedded_email_chinese.txt +2 -0
- data/test/trimmed/embedded_email_german_4.txt +1 -0
- data/test/trimmed/embedded_email_german_5.txt +1 -0
- data/test/trimmed/embedded_email_german_6.txt +4 -0
- data/test/trimmed/embedded_email_norwegian.txt +1 -0
- data/test/trimmed/embedded_email_quote_text.txt +0 -5
- data/test/trimmed/{embedded_email_russian.txt → embedded_email_russian_1.txt} +0 -0
- data/test/trimmed/embedded_email_russian_2.txt +1 -0
- data/test/trimmed/embedded_email_swedish.txt +9 -0
- metadata +51 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ae957d1af8aa2d31792525c2eeb9e87ee6343813
|
4
|
+
data.tar.gz: 0c40d729c66e243f8ac86a59bc861909abbeb786
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: afe3ca86183de852123dcc52e1ef2303ba947253c35ca41605377582cfb7b21a7c8fae7b6a9240e2fcab7cc14984f545e7a34c814f6a1742f8817a6c45760f3e
|
7
|
+
data.tar.gz: c0a3aafb8b37d12bb5751ca89fa99dfe5a4b07db23597227b102eca3939dfdb95685cd19629c3d818011a5b350fec68db9224748578b98a93096e1bb3dc2201d
|
data/lib/email_reply_trimmer.rb
CHANGED
@@ -6,7 +6,7 @@ require_relative "email_reply_trimmer/email_header_matcher"
|
|
6
6
|
require_relative "email_reply_trimmer/quote_matcher"
|
7
7
|
|
8
8
|
class EmailReplyTrimmer
|
9
|
-
VERSION = "0.1.
|
9
|
+
VERSION = "0.1.7"
|
10
10
|
|
11
11
|
DELIMITER = "d"
|
12
12
|
EMBEDDED = "b"
|
@@ -27,15 +27,10 @@ class EmailReplyTrimmer
|
|
27
27
|
end
|
28
28
|
|
29
29
|
def self.trim(text, split=false)
|
30
|
-
return if text.nil? || text =~ /\A[[:space:]]*\
|
30
|
+
return if text.nil? || text =~ /\A[[:space:]]*\z/m
|
31
31
|
|
32
|
-
#
|
33
|
-
|
34
|
-
|
35
|
-
# fix embedded email markers that might span over multiple lines
|
36
|
-
EmbeddedEmailMatcher::ON_DATE_SOMEONE_WROTE_REGEXES.each do |r|
|
37
|
-
text.gsub!(r) { |m| m.gsub(/\n[[:space:]>\-]*/, " ") }
|
38
|
-
end
|
32
|
+
# do some cleanup
|
33
|
+
preprocess!(text)
|
39
34
|
|
40
35
|
# from now on, we'll work on a line-by-line basis
|
41
36
|
lines = text.split("\n")
|
@@ -59,8 +54,8 @@ class EmailReplyTrimmer
|
|
59
54
|
end
|
60
55
|
|
61
56
|
# when the reply is at the end of the email
|
62
|
-
if pattern =~ /^b+
|
63
|
-
index = pattern =~ /t
|
57
|
+
if pattern =~ /^(b[^t]+)*b[bqeh]+t[et]*$/
|
58
|
+
index = pattern =~ /t[et]*$/
|
64
59
|
pattern = ""
|
65
60
|
lines = lines[index..-1]
|
66
61
|
end
|
@@ -75,12 +70,20 @@ class EmailReplyTrimmer
|
|
75
70
|
|
76
71
|
# if there is an embedded email marker, followed by a huge quote
|
77
72
|
# then take everything up to that marker
|
78
|
-
if pattern =~ /te*b[eqbh]*[te]
|
73
|
+
if pattern =~ /te*b[eqbh]*([te]*)$/ && $1.count("t") < 7
|
79
74
|
index = pattern =~ /te*b[eqbh]*[te]*$/
|
80
75
|
pattern = pattern[0..index]
|
81
76
|
lines = lines[0..index]
|
82
77
|
end
|
83
78
|
|
79
|
+
# if there is some text before a huge quote ending the email,
|
80
|
+
# then remove the quote
|
81
|
+
if pattern =~ /te*[qbe]+$/
|
82
|
+
index = pattern =~ /te*[qbe]+$/
|
83
|
+
pattern = pattern[0..index]
|
84
|
+
lines = lines[0..index]
|
85
|
+
end
|
86
|
+
|
84
87
|
# if there still are some embedded email markers, just remove them
|
85
88
|
while pattern =~ /b/
|
86
89
|
index = pattern =~ /b/
|
@@ -95,8 +98,8 @@ class EmailReplyTrimmer
|
|
95
98
|
size.times.each { |s| pattern[index + s] = EMAIL_HEADER }
|
96
99
|
end
|
97
100
|
|
98
|
-
# if there are at least 3 consecutive email headers,
|
99
|
-
# these headers
|
101
|
+
# if there are at least 3 consecutive email headers,
|
102
|
+
# take everything up to these headers
|
100
103
|
if pattern =~ /t[eq]*h{3,}/
|
101
104
|
index = pattern =~ /t[eq]*h{3,}/
|
102
105
|
pattern = pattern[0..index]
|
@@ -128,15 +131,10 @@ class EmailReplyTrimmer
|
|
128
131
|
end
|
129
132
|
|
130
133
|
def self.extract_embedded_email(text)
|
131
|
-
return if text.nil? || text =~ /\A[[:space:]]*\
|
132
|
-
|
133
|
-
# normalize line endings
|
134
|
-
text.gsub!("\r\n", "\n")
|
134
|
+
return if text.nil? || text =~ /\A[[:space:]]*\z/m
|
135
135
|
|
136
|
-
#
|
137
|
-
|
138
|
-
text.gsub!(r) { |m| m.gsub(/\n[[:space:]>\-]*/, " ") }
|
139
|
-
end
|
136
|
+
# do some cleanup
|
137
|
+
preprocess!(text)
|
140
138
|
|
141
139
|
# from now on, we'll work on a line-by-line basis
|
142
140
|
lines = text.split("\n")
|
@@ -153,6 +151,43 @@ class EmailReplyTrimmer
|
|
153
151
|
|
154
152
|
private
|
155
153
|
|
154
|
+
def self.preprocess!(text)
|
155
|
+
# normalize line endings
|
156
|
+
text.gsub!("\r\n", "\n")
|
157
|
+
|
158
|
+
# remove PGP markers
|
159
|
+
text.gsub!(/\A-----BEGIN PGP SIGNED MESSAGE-----\n(?:Hash: \w+)?\s+/i, "")
|
160
|
+
text.gsub!(/^-----BEGIN PGP SIGNATURE-----$[\s\S]+^-----END PGP SIGNATURE-----/, "")
|
161
|
+
|
162
|
+
# remove unsubscribe links
|
163
|
+
text.gsub!(/^Unsubscribe: .+@.+(\n.+http:.+)?\s*\z/i, "")
|
164
|
+
|
165
|
+
# remove alias-style quotes marker
|
166
|
+
text.gsub!(/^.*>{5} "[^"\n]+" == .+ writes:/, "")
|
167
|
+
|
168
|
+
# change enclosed-style quotes format
|
169
|
+
text.gsub!(/^>>> ?(.+) ?>>>$\n([\s\S]+?)\n^<<< ?\1 ?<<<$/) { $2.gsub(/^/, "> ") }
|
170
|
+
text.gsub!(/^>{4,}[[:blank:]]*$\n([\s\S]+?)\n^<{4,}[[:blank:]]*$/) { $1.gsub(/^/, "> ") }
|
171
|
+
|
172
|
+
# fix all quotes formats
|
173
|
+
text.gsub!(/^((?:[[:blank:]]*[[:alpha:]]*[>|])+)/) { $1.gsub(/([[:alpha:]]+>|\|)/, ">") }
|
174
|
+
|
175
|
+
# fix embedded email markers that might span over multiple lines
|
176
|
+
(
|
177
|
+
EmbeddedEmailMatcher::ON_DATE_SOMEONE_WROTE_REGEXES +
|
178
|
+
EmbeddedEmailMatcher::SOMEONE_WROTE_ON_DATE_REGEXES +
|
179
|
+
EmbeddedEmailMatcher::DATE_SOMEONE_WROTE_REGEXES +
|
180
|
+
[EmbeddedEmailMatcher::DATE_SOMEONE_EMAIL_REGEX]
|
181
|
+
).each do |r|
|
182
|
+
text.gsub!(r) do |m|
|
183
|
+
m.count("\n") > 4 ? m : m.gsub(/\n+[[:space:]]*/, " ")
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
# remove leading/trailing whitespaces
|
188
|
+
text.strip!
|
189
|
+
end
|
190
|
+
|
156
191
|
def self.compute_elided(text, lines)
|
157
192
|
elided = []
|
158
193
|
|
@@ -1,12 +1,12 @@
|
|
1
1
|
class EmailHeaderMatcher
|
2
2
|
|
3
|
-
EMAIL_HEADERS_WITH_DATE_MARKERS
|
4
|
-
#
|
3
|
+
EMAIL_HEADERS_WITH_DATE_MARKERS ||= [
|
4
|
+
# Norwegian
|
5
5
|
["Sendt"],
|
6
6
|
# English
|
7
|
-
["Sent"],
|
7
|
+
["Sent", "Date"],
|
8
8
|
# French
|
9
|
-
["Date"],
|
9
|
+
["Date", "Le"],
|
10
10
|
# German
|
11
11
|
["Gesendet"],
|
12
12
|
# Portuguese
|
@@ -17,19 +17,25 @@ class EmailHeaderMatcher
|
|
17
17
|
["Fecha"],
|
18
18
|
# Italian
|
19
19
|
["Data"],
|
20
|
+
# Dutch
|
21
|
+
["Datum"],
|
22
|
+
# Swedish
|
23
|
+
["Skickat"],
|
24
|
+
# Chinese
|
25
|
+
["发送时间"],
|
20
26
|
]
|
21
27
|
|
22
|
-
EMAIL_HEADERS_WITH_DATE_REGEXES
|
23
|
-
/^[[:blank:]
|
28
|
+
EMAIL_HEADERS_WITH_DATE_REGEXES ||= EMAIL_HEADERS_WITH_DATE_MARKERS.map do |header|
|
29
|
+
/^[[:blank:]*]*(?:#{header.join("|")})[[:blank:]*]*:.*\d+/
|
24
30
|
end
|
25
31
|
|
26
|
-
EMAIL_HEADERS_WITH_TEXT_MARKERS
|
27
|
-
#
|
32
|
+
EMAIL_HEADERS_WITH_TEXT_MARKERS ||= [
|
33
|
+
# Norwegian
|
28
34
|
["Fra", "Til", "Emne"],
|
29
35
|
# English
|
30
36
|
["From", "To", "Cc", "Reply-To", "Subject"],
|
31
37
|
# French
|
32
|
-
["De", "À", "Répondre à", "Objet"],
|
38
|
+
["De", "Expéditeur", "À", "Destinataire", "Répondre à", "Objet"],
|
33
39
|
# German
|
34
40
|
["Von", "An", "Betreff"],
|
35
41
|
# Portuguese
|
@@ -37,14 +43,20 @@ class EmailHeaderMatcher
|
|
37
43
|
# Spanish
|
38
44
|
["De", "Para", "Asunto"],
|
39
45
|
# Italian
|
40
|
-
["Da", "Risposta", "A", "Oggetto"]
|
46
|
+
["Da", "Risposta", "A", "Oggetto"],
|
47
|
+
# Dutch
|
48
|
+
["Van", "Beantwoorden - Aan", "Aan", "Onderwerp"],
|
49
|
+
# Swedish
|
50
|
+
["Från", "Till", "Ämne"],
|
51
|
+
# Chinese
|
52
|
+
["发件人", "收件人", "主题"],
|
41
53
|
]
|
42
54
|
|
43
|
-
EMAIL_HEADERS_WITH_TEXT_REGEXES
|
44
|
-
/^[[:blank:]
|
55
|
+
EMAIL_HEADERS_WITH_TEXT_REGEXES ||= EMAIL_HEADERS_WITH_TEXT_MARKERS.map do |header|
|
56
|
+
/^[[:blank:]*]*(?:#{header.join("|")})[[:blank:]*]*:.*[[:word:]]+/i
|
45
57
|
end
|
46
58
|
|
47
|
-
EMAIL_HEADER_REGEXES
|
59
|
+
EMAIL_HEADER_REGEXES ||= [
|
48
60
|
EMAIL_HEADERS_WITH_DATE_REGEXES,
|
49
61
|
EMAIL_HEADERS_WITH_TEXT_REGEXES,
|
50
62
|
].flatten
|
@@ -10,38 +10,43 @@ class EmbeddedEmailMatcher
|
|
10
10
|
# Dnia 14 lip 2015 o godz. 00:25 Michael Downey <info@discourse.org> napisał(a):
|
11
11
|
# Em seg, 27 de jul de 2015 17:13, Neil Lalonde <info@discourse.org> escreveu:
|
12
12
|
# El jueves, 21 de noviembre de 2013, codinghorror escribió:
|
13
|
-
#
|
14
|
-
|
13
|
+
# At 6/16/2016 08:32 PM, you wrote:
|
14
|
+
ON_DATE_SOMEONE_WROTE_REGEXES ||= [
|
15
|
+
# Chinese
|
16
|
+
/^[[:blank:]<>-]*在 ((?!\b(在|写道)\b)[\s\S])+?写道[[:blank:].:>-]*$/i,
|
15
17
|
# Dutch
|
16
|
-
[
|
18
|
+
/^[[:blank:]<>-]*Op ((?!\b(Op|het\svolgende\sgeschreven|schreef)\b)[\s\S])+?(het\svolgende\sgeschreven|schreef[^:]+)[[:blank:].:>-]*$/i,
|
17
19
|
# English
|
18
|
-
[
|
20
|
+
/^[[:blank:]<>-]*In message ((?!\b(In message|writes)\b)[\s\S])+?writes[[:blank:].:>-]*$/i,
|
21
|
+
/^[[:blank:]<>-]*(On|At) ((?!\b(On|wrote|writes|says|said)\b)[\s\S])+?(wrote|writes|says|said)[[:blank:].:>-]*$/i,
|
19
22
|
# French
|
20
|
-
[
|
23
|
+
/^[[:blank:]<>-]*Le ((?!\b(Le|nous\sa\sdit|a\s+écrit)\b)[\s\S])+?(nous\sa\sdit|a\s+écrit)[[:blank:].:>-]*$/i,
|
24
|
+
# German
|
25
|
+
/^[[:blank:]<>-]*Am ((?!\b(Am|schrieben\sSie)\b)[\s\S])+?schrieben\sSie[[:blank:].:>-]*$/i,
|
26
|
+
/^[[:blank:]<>-]*Am ((?!\b(Am|geschrieben)\b)[\s\S])+?(geschrieben|schrieb[^:]+)[[:blank:].:>-]*$/i,
|
21
27
|
# Italian
|
22
|
-
[
|
28
|
+
/^[[:blank:]<>-]*Il ((?!\b(Il|ha\sscritto)\b)[\s\S])+?ha\sscritto[[:blank:].:>-]*$/i,
|
23
29
|
# Polish
|
24
|
-
[
|
30
|
+
/^[[:blank:]<>-]*(Dnia|Dňa) ((?!\b(Dnia|Dňa|napisał)\b)[\s\S])+?napisał(\(a\))?[[:blank:].:>-]*$/i,
|
25
31
|
# Portuguese
|
26
|
-
[
|
32
|
+
/^[[:blank:]<>-]*Em ((?!\b(Em|escreveu)\b)[\s\S])+?escreveu[[:blank:].:>-]*$/i,
|
27
33
|
# Spanish
|
28
|
-
[
|
29
|
-
# German
|
30
|
-
["Am", "schrieb"],
|
34
|
+
/^[[:blank:]<>-]*El ((?!\b(El|escribió)\b)[\s\S])+?escribió[[:blank:].:>-]*$/i,
|
31
35
|
]
|
32
36
|
|
33
|
-
ON_DATE_SOMEONE_WROTE_REGEXES = ON_DATE_SOMEONE_WROTE_MARKERS.map do |on, wrote|
|
34
|
-
wrote.gsub!(/ +/, "[[:space:]]+") # the "wrote" part might span over multiple lines
|
35
|
-
/^([[:blank:]>\-]*#{on}\s(?:(?!#{on}\s|#{wrote}:?)[\s\S])*#{wrote}:?[[:blank:]\-]*)$/m
|
36
|
-
end
|
37
|
-
|
38
37
|
# Op 10 dec. 2015 18:35 schreef "Arpit Jalan" <info@discourse.org>:
|
39
38
|
# Am 18.09.2013 um 16:24 schrieb codinghorror <info@discourse.org>:
|
39
|
+
# Den 15. jun. 2016 kl. 20.42 skrev Jeff Atwood <info@discourse.org>:
|
40
|
+
# søn. 30. apr. 2017 kl. 00.26 skrev David Taylor <meta@discoursemail.com>:
|
40
41
|
ON_DATE_WROTE_SOMEONE_MARKERS = [
|
41
42
|
# Dutch
|
42
43
|
["Op", "schreef"],
|
43
44
|
# German
|
44
45
|
["Am", "schrieb"],
|
46
|
+
# Norwegian
|
47
|
+
["Den", "skrev"],
|
48
|
+
# Dutch
|
49
|
+
["søn\.", "skrev"],
|
45
50
|
]
|
46
51
|
|
47
52
|
ON_DATE_WROTE_SOMEONE_REGEXES = ON_DATE_WROTE_SOMEONE_MARKERS.map do |on, wrote|
|
@@ -52,20 +57,35 @@ class EmbeddedEmailMatcher
|
|
52
57
|
DATE_SOMEONE_WROTE_MARKERS = [
|
53
58
|
# Russian
|
54
59
|
["пользователь", "написал"],
|
60
|
+
# Polish
|
61
|
+
["", "napisał\\(a\\)"],
|
62
|
+
# Ukrainian
|
63
|
+
["", "пише"],
|
55
64
|
]
|
56
65
|
|
57
66
|
DATE_SOMEONE_WROTE_REGEXES = DATE_SOMEONE_WROTE_MARKERS.map do |user, wrote|
|
58
|
-
|
67
|
+
user.size == 0 ?
|
68
|
+
/^.*\d{4}.*?(?:(?!#{wrote})[\s\S])*#{wrote}:/ :
|
69
|
+
/^.*\d{4}.*?#{user}.*?(?:(?!#{wrote})[\s\S])*#{wrote}:/
|
59
70
|
end
|
60
71
|
|
72
|
+
# Max Mustermann <try_discourse@discoursemail.com> schrieb am Fr., 28. Apr. 2017 um 11:53 Uhr:
|
73
|
+
SOMEONE_WROTE_ON_DATE_REGEXES ||= [
|
74
|
+
# English
|
75
|
+
/^.+\bwrote\b[[:space:]]+\bon\b.+[^:]+:/,
|
76
|
+
# German
|
77
|
+
/^.+\bschrieb\b[[:space:]]+\bam\b.+[^:]+:/,
|
78
|
+
]
|
79
|
+
|
61
80
|
# 2016-03-03 17:21 GMT+01:00 Some One
|
62
81
|
ISO_DATE_SOMEONE_REGEX = /^[[:blank:]>]*20\d\d-\d\d-\d\d \d\d:\d\d GMT\+\d\d:\d\d [\w[:blank:]]+$/
|
63
82
|
|
83
|
+
|
64
84
|
# 2015-10-18 0:17 GMT+03:00 Matt Palmer <info@discourse.org>:
|
65
85
|
# 2013/10/2 camilohollanda <info@discourse.org>
|
66
86
|
# вт, 5 янв. 2016 г. в 23:39, Erlend Sogge Heggen <info@discourse.org>:
|
67
87
|
# ср, 1 апр. 2015, 18:29, Denis Didkovsky <info@discourse.org>:
|
68
|
-
DATE_SOMEONE_EMAIL_REGEX =
|
88
|
+
DATE_SOMEONE_EMAIL_REGEX = /^.*\d{4}.+\s?<[^@<>]+@[^@<>.]+\.[^@<>]+>:?$/
|
69
89
|
|
70
90
|
# codinghorror via Discourse Meta wrote:
|
71
91
|
# codinghorror via Discourse Meta <info@discourse.org> schrieb:
|
@@ -77,11 +97,12 @@ class EmbeddedEmailMatcher
|
|
77
97
|
]
|
78
98
|
|
79
99
|
SOMEONE_VIA_SOMETHING_WROTE_REGEXES = SOMEONE_VIA_SOMETHING_WROTE_MARKERS.map do |wrote|
|
80
|
-
|
100
|
+
/^.+ via .+ #{wrote}:?[[:blank:]]*$/
|
81
101
|
end
|
82
102
|
|
83
103
|
# Some One <info@discourse.org> wrote:
|
84
|
-
|
104
|
+
# Gavin Sinclair (gsinclair@soyabean.com.au) wrote:
|
105
|
+
SOMEONE_EMAIL_WROTE_REGEX = /^.+\b[\w.+-]+@[\w.-]+\.\w{2,}\b.+wrote:?$/
|
85
106
|
|
86
107
|
# Posted by mpalmer on 01/21/2016
|
87
108
|
POSTED_BY_SOMEONE_ON_DATE_REGEX = /^[[:blank:]>]*Posted by .+ on \d{2}\/\d{2}\/\d{4}$/i
|
@@ -92,17 +113,21 @@ class EmbeddedEmailMatcher
|
|
92
113
|
# ----- Original Message -----
|
93
114
|
# -----Original Message-----
|
94
115
|
# *----- Original Message -----*
|
116
|
+
# ----- Reply message -----
|
117
|
+
# ------------------ 原始邮件 ------------------
|
95
118
|
FORWARDED_EMAIL_REGEXES = [
|
96
119
|
# English
|
97
120
|
/^[[:blank:]>]*Begin forwarded message:/i,
|
98
|
-
/^[[:blank:]
|
99
|
-
/^[[:blank:]>\*]*-{2,}[[:blank:]]*(Forwarded|Original) Message[[:blank:]]*-{2,}/i,
|
121
|
+
/^[[:blank:]>*]*-{2,}[[:blank:]]*(Forwarded|Original|Reply) Message[[:blank:]]*-{2,}/i,
|
100
122
|
# French
|
101
|
-
/^[[:blank:]
|
123
|
+
/^[[:blank:]>]*Début du message transféré :/i,
|
124
|
+
/^[[:blank:]>*]*-{2,}[[:blank:]]*Message transféré[[:blank:]]*-{2,}/i,
|
102
125
|
# German
|
103
|
-
/^[[:blank:]
|
126
|
+
/^[[:blank:]>*]*-{2,}[[:blank:]]*Ursprüngliche Nachricht[[:blank:]]*-{2,}/i,
|
104
127
|
# Spanish
|
105
|
-
/^[[:blank:]
|
128
|
+
/^[[:blank:]>*]*-{2,}[[:blank:]]*Mensaje original[[:blank:]]*-{2,}/i,
|
129
|
+
# Chinese
|
130
|
+
/^[[:blank:]>*]*-{2,}[[:blank:]]*原始邮件[[:blank:]]*-{2,}/i,
|
106
131
|
]
|
107
132
|
|
108
133
|
EMBEDDED_REGEXES = [
|
@@ -110,6 +135,7 @@ class EmbeddedEmailMatcher
|
|
110
135
|
ON_DATE_WROTE_SOMEONE_REGEXES,
|
111
136
|
DATE_SOMEONE_WROTE_REGEXES,
|
112
137
|
DATE_SOMEONE_EMAIL_REGEX,
|
138
|
+
SOMEONE_WROTE_ON_DATE_REGEXES,
|
113
139
|
ISO_DATE_SOMEONE_REGEX,
|
114
140
|
SOMEONE_VIA_SOMETHING_WROTE_REGEXES,
|
115
141
|
SOMEONE_EMAIL_WROTE_REGEX,
|
@@ -13,20 +13,27 @@ class SignatureMatcher
|
|
13
13
|
# (sent from a phone)
|
14
14
|
# (Sent from mobile device)
|
15
15
|
# 從我的 iPhone 傳送
|
16
|
-
SIGNATURE_REGEXES
|
16
|
+
SIGNATURE_REGEXES ||= [
|
17
17
|
# Chinese
|
18
|
-
/^[[:blank:]
|
18
|
+
/^[[:blank:]]*從我的 iPhone 傳送/i,
|
19
19
|
# English
|
20
|
-
/^[[:blank:]
|
21
|
-
/^[[:blank:]
|
22
|
-
/^[[:blank:]
|
20
|
+
/^[[:blank:]]*[[:word:]]+ from mobile/i,
|
21
|
+
/^[[:blank:]]*[\(<]*Sent (from|via|with|by) .+[\)>]*/i,
|
22
|
+
/^[[:blank:]]*From my .{1,20}/i,
|
23
|
+
/^[[:blank:]]*Get Outlook for iOS/i,
|
23
24
|
# French
|
24
|
-
/^[[:blank:]
|
25
|
+
/^[[:blank:]]*Envoyé depuis (mon|Yahoo Mail)/i,
|
25
26
|
# German
|
26
|
-
/^[[:blank:]
|
27
|
-
/^[[:blank:]
|
27
|
+
/^[[:blank:]]*Von meinem .+ gesendet/i,
|
28
|
+
/^[[:blank:]]*Diese Nachricht wurde von .+ gesendet/i,
|
29
|
+
# Italian
|
30
|
+
/^[[:blank:]]*Inviato da /i,
|
31
|
+
# Norwegian
|
32
|
+
/^[[:blank:]]*Sendt fra min /i,
|
33
|
+
# Portuguese
|
34
|
+
/^[[:blank:]]*Enviado do meu /i,
|
28
35
|
# Spanish
|
29
|
-
/^[[:blank:]
|
36
|
+
/^[[:blank:]]*Enviado desde mi /i,
|
30
37
|
]
|
31
38
|
|
32
39
|
def self.match?(line)
|
@@ -0,0 +1,23 @@
|
|
1
|
+
From: Erlend Sogge Heggen <meta@discoursemail.com>
|
2
|
+
Reply-To: Erlend Sogge Heggen <meta+abcd@discoursemail.com>
|
3
|
+
Date: Wednesday, 5 April 2017 at 17:01
|
4
|
+
To: Jef <jef@bar.com>
|
5
|
+
Subject: [Discourse Meta] [PM] Discourse for Communities of Practice, educational organisation
|
6
|
+
|
7
|
+
|
8
|
+
erlend_sh<https://meta.discourse.org/u/erlend_sh> Erlend Sogge Heggen<https://meta.discourse.org/u/erlend_sh> Team
|
9
|
+
April 5
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
Hi Jef,
|
14
|
+
|
15
|
+
Is your University a legally recognised educational institution? Otherwise I'm afraid you're not eligible for this discount.
|
16
|
+
|
17
|
+
Sincerely,
|
18
|
+
|
19
|
+
Erlend
|
20
|
+
|
21
|
+
|
22
|
+
|
23
|
+
This email message and any attachments may contain confidential information and may be privileged. If you are not the intended recipient or otherwise not authorized to receive this message, you are prohibited to use, copy, disclose or take any action based on this email or any information contained herein. If you are not the intended recipient, please advise the sender immediately by replying to this email and permanently delete this message and any attachments from your system.
|
@@ -2,7 +2,7 @@ On Mon, Feb 1, 2016 at 6:32 PM, Jeff Atwood <info@discourse.org> wrote:
|
|
2
2
|
|
3
3
|
> This is Jeff's reply.
|
4
4
|
>
|
5
|
-
> On Mon, Feb 1, 2016 at 7:50 AM, Some One <foo@bar.com wrote:
|
5
|
+
> On Mon, Feb 1, 2016 at 7:50 AM, Some One <foo@bar.com > > wrote:
|
6
6
|
>
|
7
7
|
>> Great!
|
8
8
|
>>
|
@@ -14,7 +14,7 @@ On Mon, Feb 1, 2016 at 6:32 PM, Jeff Atwood <info@discourse.org> wrote:
|
|
14
14
|
>>
|
15
15
|
>>> WAT?
|
16
16
|
>>>
|
17
|
-
>>> On Wed, Jan 27, 2016 at 10:48 PM, Some One < foo@bar.com> wrote:
|
17
|
+
>>> On Wed, Jan 27, 2016 at 10:48 PM, Some One < >>> foo@bar.com> wrote:
|
18
18
|
>>>
|
19
19
|
>>>> Hi Team,
|
20
20
|
>>>>
|
@@ -22,7 +22,7 @@ On Mon, Feb 1, 2016 at 6:32 PM, Jeff Atwood <info@discourse.org> wrote:
|
|
22
22
|
>>>>
|
23
23
|
>>>> Some One
|
24
24
|
>>>>
|
25
|
-
>>>> On Wed, Jan 27, 2016 at 10:10 AM Discourse Team <team@discourse.org> wrote:
|
25
|
+
>>>> On Wed, Jan 27, 2016 at 10:10 AM Discourse Team <team@discourse.org> >>>> wrote:
|
26
26
|
>>>>
|
27
27
|
>>>>> Hello :waves_hand:
|
28
28
|
>>>>>
|