email_reply_trimmer 0.1.6 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/lib/email_reply_trimmer.rb +57 -22
  3. data/lib/email_reply_trimmer/delimiter_matcher.rb +1 -1
  4. data/lib/email_reply_trimmer/email_header_matcher.rb +25 -13
  5. data/lib/email_reply_trimmer/embedded_email_matcher.rb +51 -25
  6. data/lib/email_reply_trimmer/signature_matcher.rb +16 -9
  7. data/test/elided/email_headers_5.txt +23 -0
  8. data/test/elided/embedded_ception.txt +3 -3
  9. data/test/elided/embedded_email_12.txt +2 -2
  10. data/test/elided/embedded_email_13.txt +9 -0
  11. data/test/elided/embedded_email_14.txt +11 -0
  12. data/test/elided/embedded_email_15.txt +4 -0
  13. data/test/elided/embedded_email_16.txt +4 -0
  14. data/test/elided/embedded_email_17.txt +2 -0
  15. data/test/elided/embedded_email_18.txt +1 -0
  16. data/test/elided/embedded_email_19.txt +0 -0
  17. data/test/elided/embedded_email_chinese.txt +4 -0
  18. data/test/elided/embedded_email_german_4.txt +15 -0
  19. data/test/elided/embedded_email_german_5.txt +20 -0
  20. data/test/elided/embedded_email_german_6.txt +8 -0
  21. data/test/elided/embedded_email_norwegian.txt +9 -0
  22. data/test/elided/embedded_email_quote_text.txt +5 -0
  23. data/test/elided/{embedded_email_russian.txt → embedded_email_russian_1.txt} +0 -0
  24. data/test/elided/embedded_email_russian_2.txt +23 -0
  25. data/test/elided/embedded_email_swedish.txt +8 -0
  26. data/test/elided/signatures.txt +2 -0
  27. data/test/emails/email_headers_5.txt +37 -0
  28. data/test/emails/embedded_email_1.txt +1 -1
  29. data/test/emails/embedded_email_13.txt +14 -0
  30. data/test/emails/embedded_email_14.txt +16 -0
  31. data/test/emails/embedded_email_15.txt +9 -0
  32. data/test/emails/embedded_email_16.txt +16 -0
  33. data/test/emails/embedded_email_17.txt +38 -0
  34. data/test/emails/embedded_email_18.txt +7 -0
  35. data/test/emails/embedded_email_19.txt +13 -0
  36. data/test/emails/embedded_email_4.txt +13 -13
  37. data/test/emails/embedded_email_7.txt +4 -4
  38. data/test/emails/embedded_email_chinese.txt +7 -0
  39. data/test/emails/embedded_email_german_4.txt +18 -0
  40. data/test/emails/embedded_email_german_5.txt +23 -0
  41. data/test/emails/embedded_email_german_6.txt +14 -0
  42. data/test/emails/embedded_email_norwegian.txt +11 -0
  43. data/test/emails/{embedded_email_russian.txt → embedded_email_russian_1.txt} +0 -0
  44. data/test/emails/embedded_email_russian_2.txt +26 -0
  45. data/test/emails/embedded_email_swedish.txt +20 -0
  46. data/test/emails/signatures.txt +2 -0
  47. data/test/test_email_reply_trimmer.rb +2 -2
  48. data/test/trimmed/email_headers_5.txt +11 -0
  49. data/test/trimmed/embedded_email_13.txt +3 -0
  50. data/test/trimmed/embedded_email_14.txt +3 -0
  51. data/test/trimmed/embedded_email_15.txt +3 -0
  52. data/test/trimmed/embedded_email_16.txt +11 -0
  53. data/test/trimmed/embedded_email_17.txt +35 -0
  54. data/test/trimmed/embedded_email_18.txt +5 -0
  55. data/test/trimmed/embedded_email_19.txt +13 -0
  56. data/test/trimmed/embedded_email_chinese.txt +2 -0
  57. data/test/trimmed/embedded_email_german_4.txt +1 -0
  58. data/test/trimmed/embedded_email_german_5.txt +1 -0
  59. data/test/trimmed/embedded_email_german_6.txt +4 -0
  60. data/test/trimmed/embedded_email_norwegian.txt +1 -0
  61. data/test/trimmed/embedded_email_quote_text.txt +0 -5
  62. data/test/trimmed/{embedded_email_russian.txt → embedded_email_russian_1.txt} +0 -0
  63. data/test/trimmed/embedded_email_russian_2.txt +1 -0
  64. data/test/trimmed/embedded_email_swedish.txt +9 -0
  65. metadata +51 -6
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c6eabb8ce3f3327f3abe63f3ee2fe147fb161d96
4
- data.tar.gz: 7a07c2267ef47f4607a2ae2f05feac8c0a2584c1
3
+ metadata.gz: ae957d1af8aa2d31792525c2eeb9e87ee6343813
4
+ data.tar.gz: 0c40d729c66e243f8ac86a59bc861909abbeb786
5
5
  SHA512:
6
- metadata.gz: dba90e1fdc0b0a4f7032f9c2d1e9aabf575a390ee4cf39618037b9a820de8d7abad6338623b203211ef349dc3568b2e55440721dc89fa8ac144eda878cf0e463
7
- data.tar.gz: 19be9d9b0496d31e81f7f7adb296d273aeea84130a8243d669857de751748a4e923584a14d4c8f0a90f338428e7dd219fa4212fddce38575b44ec58a598c5455
6
+ metadata.gz: afe3ca86183de852123dcc52e1ef2303ba947253c35ca41605377582cfb7b21a7c8fae7b6a9240e2fcab7cc14984f545e7a34c814f6a1742f8817a6c45760f3e
7
+ data.tar.gz: c0a3aafb8b37d12bb5751ca89fa99dfe5a4b07db23597227b102eca3939dfdb95685cd19629c3d818011a5b350fec68db9224748578b98a93096e1bb3dc2201d
@@ -6,7 +6,7 @@ require_relative "email_reply_trimmer/email_header_matcher"
6
6
  require_relative "email_reply_trimmer/quote_matcher"
7
7
 
8
8
  class EmailReplyTrimmer
9
- VERSION = "0.1.6"
9
+ VERSION = "0.1.7"
10
10
 
11
11
  DELIMITER = "d"
12
12
  EMBEDDED = "b"
@@ -27,15 +27,10 @@ class EmailReplyTrimmer
27
27
  end
28
28
 
29
29
  def self.trim(text, split=false)
30
- return if text.nil? || text =~ /\A[[:space:]]*\Z/m
30
+ return if text.nil? || text =~ /\A[[:space:]]*\z/m
31
31
 
32
- # normalize line endings
33
- text.gsub!("\r\n", "\n")
34
-
35
- # fix embedded email markers that might span over multiple lines
36
- EmbeddedEmailMatcher::ON_DATE_SOMEONE_WROTE_REGEXES.each do |r|
37
- text.gsub!(r) { |m| m.gsub(/\n[[:space:]>\-]*/, " ") }
38
- end
32
+ # do some cleanup
33
+ preprocess!(text)
39
34
 
40
35
  # from now on, we'll work on a line-by-line basis
41
36
  lines = text.split("\n")
@@ -59,8 +54,8 @@ class EmailReplyTrimmer
59
54
  end
60
55
 
61
56
  # when the reply is at the end of the email
62
- if pattern =~ /^b+q+[eq]*t[te]*$/
63
- index = pattern =~ /t/
57
+ if pattern =~ /^(b[^t]+)*b[bqeh]+t[et]*$/
58
+ index = pattern =~ /t[et]*$/
64
59
  pattern = ""
65
60
  lines = lines[index..-1]
66
61
  end
@@ -75,12 +70,20 @@ class EmailReplyTrimmer
75
70
 
76
71
  # if there is an embedded email marker, followed by a huge quote
77
72
  # then take everything up to that marker
78
- if pattern =~ /te*b[eqbh]*[te]*$/
73
+ if pattern =~ /te*b[eqbh]*([te]*)$/ && $1.count("t") < 7
79
74
  index = pattern =~ /te*b[eqbh]*[te]*$/
80
75
  pattern = pattern[0..index]
81
76
  lines = lines[0..index]
82
77
  end
83
78
 
79
+ # if there is some text before a huge quote ending the email,
80
+ # then remove the quote
81
+ if pattern =~ /te*[qbe]+$/
82
+ index = pattern =~ /te*[qbe]+$/
83
+ pattern = pattern[0..index]
84
+ lines = lines[0..index]
85
+ end
86
+
84
87
  # if there still are some embedded email markers, just remove them
85
88
  while pattern =~ /b/
86
89
  index = pattern =~ /b/
@@ -95,8 +98,8 @@ class EmailReplyTrimmer
95
98
  size.times.each { |s| pattern[index + s] = EMAIL_HEADER }
96
99
  end
97
100
 
98
- # if there are at least 3 consecutive email headers, take everything up to
99
- # these headers
101
+ # if there are at least 3 consecutive email headers,
102
+ # take everything up to these headers
100
103
  if pattern =~ /t[eq]*h{3,}/
101
104
  index = pattern =~ /t[eq]*h{3,}/
102
105
  pattern = pattern[0..index]
@@ -128,15 +131,10 @@ class EmailReplyTrimmer
128
131
  end
129
132
 
130
133
  def self.extract_embedded_email(text)
131
- return if text.nil? || text =~ /\A[[:space:]]*\Z/m
132
-
133
- # normalize line endings
134
- text.gsub!("\r\n", "\n")
134
+ return if text.nil? || text =~ /\A[[:space:]]*\z/m
135
135
 
136
- # fix embedded email markers that might span over multiple lines
137
- EmbeddedEmailMatcher::ON_DATE_SOMEONE_WROTE_REGEXES.each do |r|
138
- text.gsub!(r) { |m| m.gsub(/\n[[:space:]>\-]*/, " ") }
139
- end
136
+ # do some cleanup
137
+ preprocess!(text)
140
138
 
141
139
  # from now on, we'll work on a line-by-line basis
142
140
  lines = text.split("\n")
@@ -153,6 +151,43 @@ class EmailReplyTrimmer
153
151
 
154
152
  private
155
153
 
154
+ def self.preprocess!(text)
155
+ # normalize line endings
156
+ text.gsub!("\r\n", "\n")
157
+
158
+ # remove PGP markers
159
+ text.gsub!(/\A-----BEGIN PGP SIGNED MESSAGE-----\n(?:Hash: \w+)?\s+/i, "")
160
+ text.gsub!(/^-----BEGIN PGP SIGNATURE-----$[\s\S]+^-----END PGP SIGNATURE-----/, "")
161
+
162
+ # remove unsubscribe links
163
+ text.gsub!(/^Unsubscribe: .+@.+(\n.+http:.+)?\s*\z/i, "")
164
+
165
+ # remove alias-style quotes marker
166
+ text.gsub!(/^.*>{5} "[^"\n]+" == .+ writes:/, "")
167
+
168
+ # change enclosed-style quotes format
169
+ text.gsub!(/^>>> ?(.+) ?>>>$\n([\s\S]+?)\n^<<< ?\1 ?<<<$/) { $2.gsub(/^/, "> ") }
170
+ text.gsub!(/^>{4,}[[:blank:]]*$\n([\s\S]+?)\n^<{4,}[[:blank:]]*$/) { $1.gsub(/^/, "> ") }
171
+
172
+ # fix all quotes formats
173
+ text.gsub!(/^((?:[[:blank:]]*[[:alpha:]]*[>|])+)/) { $1.gsub(/([[:alpha:]]+>|\|)/, ">") }
174
+
175
+ # fix embedded email markers that might span over multiple lines
176
+ (
177
+ EmbeddedEmailMatcher::ON_DATE_SOMEONE_WROTE_REGEXES +
178
+ EmbeddedEmailMatcher::SOMEONE_WROTE_ON_DATE_REGEXES +
179
+ EmbeddedEmailMatcher::DATE_SOMEONE_WROTE_REGEXES +
180
+ [EmbeddedEmailMatcher::DATE_SOMEONE_EMAIL_REGEX]
181
+ ).each do |r|
182
+ text.gsub!(r) do |m|
183
+ m.count("\n") > 4 ? m : m.gsub(/\n+[[:space:]]*/, " ")
184
+ end
185
+ end
186
+
187
+ # remove leading/trailing whitespaces
188
+ text.strip!
189
+ end
190
+
156
191
  def self.compute_elided(text, lines)
157
192
  elided = []
158
193
 
@@ -1,6 +1,6 @@
1
1
  class DelimiterMatcher
2
2
 
3
- DELIMITER_CHARACTERS ||= "-_,=+~#*ᐧ"
3
+ DELIMITER_CHARACTERS ||= "-_,=+~#*ᐧ—"
4
4
  DELIMITER_REGEX ||= /^[[:blank:]]*[#{Regexp.escape(DELIMITER_CHARACTERS)}]+[[:blank:]]*$/
5
5
 
6
6
  def self.match?(line)
@@ -1,12 +1,12 @@
1
1
  class EmailHeaderMatcher
2
2
 
3
- EMAIL_HEADERS_WITH_DATE_MARKERS = [
4
- # Dutch
3
+ EMAIL_HEADERS_WITH_DATE_MARKERS ||= [
4
+ # Norwegian
5
5
  ["Sendt"],
6
6
  # English
7
- ["Sent"],
7
+ ["Sent", "Date"],
8
8
  # French
9
- ["Date"],
9
+ ["Date", "Le"],
10
10
  # German
11
11
  ["Gesendet"],
12
12
  # Portuguese
@@ -17,19 +17,25 @@ class EmailHeaderMatcher
17
17
  ["Fecha"],
18
18
  # Italian
19
19
  ["Data"],
20
+ # Dutch
21
+ ["Datum"],
22
+ # Swedish
23
+ ["Skickat"],
24
+ # Chinese
25
+ ["发送时间"],
20
26
  ]
21
27
 
22
- EMAIL_HEADERS_WITH_DATE_REGEXES = EMAIL_HEADERS_WITH_DATE_MARKERS.map do |header|
23
- /^[[:blank:]>\*]*(?:#{header.join("|")})[[:blank:]\*]*:.*\d+/
28
+ EMAIL_HEADERS_WITH_DATE_REGEXES ||= EMAIL_HEADERS_WITH_DATE_MARKERS.map do |header|
29
+ /^[[:blank:]*]*(?:#{header.join("|")})[[:blank:]*]*:.*\d+/
24
30
  end
25
31
 
26
- EMAIL_HEADERS_WITH_TEXT_MARKERS = [
27
- # Dutch
32
+ EMAIL_HEADERS_WITH_TEXT_MARKERS ||= [
33
+ # Norwegian
28
34
  ["Fra", "Til", "Emne"],
29
35
  # English
30
36
  ["From", "To", "Cc", "Reply-To", "Subject"],
31
37
  # French
32
- ["De", "À", "Répondre à", "Objet"],
38
+ ["De", "Expéditeur", "À", "Destinataire", "Répondre à", "Objet"],
33
39
  # German
34
40
  ["Von", "An", "Betreff"],
35
41
  # Portuguese
@@ -37,14 +43,20 @@ class EmailHeaderMatcher
37
43
  # Spanish
38
44
  ["De", "Para", "Asunto"],
39
45
  # Italian
40
- ["Da", "Risposta", "A", "Oggetto"]
46
+ ["Da", "Risposta", "A", "Oggetto"],
47
+ # Dutch
48
+ ["Van", "Beantwoorden - Aan", "Aan", "Onderwerp"],
49
+ # Swedish
50
+ ["Från", "Till", "Ämne"],
51
+ # Chinese
52
+ ["发件人", "收件人", "主题"],
41
53
  ]
42
54
 
43
- EMAIL_HEADERS_WITH_TEXT_REGEXES = EMAIL_HEADERS_WITH_TEXT_MARKERS.map do |header|
44
- /^[[:blank:]>\*]*(?:#{header.join("|")})[[:blank:]\*]*:.*[[:word:]]+/
55
+ EMAIL_HEADERS_WITH_TEXT_REGEXES ||= EMAIL_HEADERS_WITH_TEXT_MARKERS.map do |header|
56
+ /^[[:blank:]*]*(?:#{header.join("|")})[[:blank:]*]*:.*[[:word:]]+/i
45
57
  end
46
58
 
47
- EMAIL_HEADER_REGEXES = [
59
+ EMAIL_HEADER_REGEXES ||= [
48
60
  EMAIL_HEADERS_WITH_DATE_REGEXES,
49
61
  EMAIL_HEADERS_WITH_TEXT_REGEXES,
50
62
  ].flatten
@@ -10,38 +10,43 @@ class EmbeddedEmailMatcher
10
10
  # Dnia 14 lip 2015 o godz. 00:25 Michael Downey <info@discourse.org> napisał(a):
11
11
  # Em seg, 27 de jul de 2015 17:13, Neil Lalonde <info@discourse.org> escreveu:
12
12
  # El jueves, 21 de noviembre de 2013, codinghorror escribió:
13
- # Am 03.02.2016 3:35 nachm. schrieb Max Mustermann <mail@example.com>:
14
- ON_DATE_SOMEONE_WROTE_MARKERS = [
13
+ # At 6/16/2016 08:32 PM, you wrote:
14
+ ON_DATE_SOMEONE_WROTE_REGEXES ||= [
15
+ # Chinese
16
+ /^[[:blank:]<>-]*在 ((?!\b(在|写道)\b)[\s\S])+?写道[[:blank:].:>-]*$/i,
15
17
  # Dutch
16
- ["Op","het volgende geschreven"],
18
+ /^[[:blank:]<>-]*Op ((?!\b(Op|het\svolgende\sgeschreven|schreef)\b)[\s\S])+?(het\svolgende\sgeschreven|schreef[^:]+)[[:blank:].:>-]*$/i,
17
19
  # English
18
- ["On", "wrote"],
20
+ /^[[:blank:]<>-]*In message ((?!\b(In message|writes)\b)[\s\S])+?writes[[:blank:].:>-]*$/i,
21
+ /^[[:blank:]<>-]*(On|At) ((?!\b(On|wrote|writes|says|said)\b)[\s\S])+?(wrote|writes|says|said)[[:blank:].:>-]*$/i,
19
22
  # French
20
- ["Le", "a écrit "],
23
+ /^[[:blank:]<>-]*Le ((?!\b(Le|nous\sa\sdit|a\s+écrit)\b)[\s\S])+?(nous\sa\sdit|a\s+écrit)[[:blank:].:>-]*$/i,
24
+ # German
25
+ /^[[:blank:]<>-]*Am ((?!\b(Am|schrieben\sSie)\b)[\s\S])+?schrieben\sSie[[:blank:].:>-]*$/i,
26
+ /^[[:blank:]<>-]*Am ((?!\b(Am|geschrieben)\b)[\s\S])+?(geschrieben|schrieb[^:]+)[[:blank:].:>-]*$/i,
21
27
  # Italian
22
- ["Il", "ha scritto"],
28
+ /^[[:blank:]<>-]*Il ((?!\b(Il|ha\sscritto)\b)[\s\S])+?ha\sscritto[[:blank:].:>-]*$/i,
23
29
  # Polish
24
- ["Dnia", "napisał\\(a\\)"],
30
+ /^[[:blank:]<>-]*(Dnia|Dňa) ((?!\b(Dnia|Dňa|napisał)\b)[\s\S])+?napisał(\(a\))?[[:blank:].:>-]*$/i,
25
31
  # Portuguese
26
- ["Em", "escreveu"],
32
+ /^[[:blank:]<>-]*Em ((?!\b(Em|escreveu)\b)[\s\S])+?escreveu[[:blank:].:>-]*$/i,
27
33
  # Spanish
28
- ["El", "escribió"],
29
- # German
30
- ["Am", "schrieb"],
34
+ /^[[:blank:]<>-]*El ((?!\b(El|escribió)\b)[\s\S])+?escribió[[:blank:].:>-]*$/i,
31
35
  ]
32
36
 
33
- ON_DATE_SOMEONE_WROTE_REGEXES = ON_DATE_SOMEONE_WROTE_MARKERS.map do |on, wrote|
34
- wrote.gsub!(/ +/, "[[:space:]]+") # the "wrote" part might span over multiple lines
35
- /^([[:blank:]>\-]*#{on}\s(?:(?!#{on}\s|#{wrote}:?)[\s\S])*#{wrote}:?[[:blank:]\-]*)$/m
36
- end
37
-
38
37
  # Op 10 dec. 2015 18:35 schreef "Arpit Jalan" <info@discourse.org>:
39
38
  # Am 18.09.2013 um 16:24 schrieb codinghorror <info@discourse.org>:
39
+ # Den 15. jun. 2016 kl. 20.42 skrev Jeff Atwood <info@discourse.org>:
40
+ # søn. 30. apr. 2017 kl. 00.26 skrev David Taylor <meta@discoursemail.com>:
40
41
  ON_DATE_WROTE_SOMEONE_MARKERS = [
41
42
  # Dutch
42
43
  ["Op", "schreef"],
43
44
  # German
44
45
  ["Am", "schrieb"],
46
+ # Norwegian
47
+ ["Den", "skrev"],
48
+ # Dutch
49
+ ["søn\.", "skrev"],
45
50
  ]
46
51
 
47
52
  ON_DATE_WROTE_SOMEONE_REGEXES = ON_DATE_WROTE_SOMEONE_MARKERS.map do |on, wrote|
@@ -52,20 +57,35 @@ class EmbeddedEmailMatcher
52
57
  DATE_SOMEONE_WROTE_MARKERS = [
53
58
  # Russian
54
59
  ["пользователь", "написал"],
60
+ # Polish
61
+ ["", "napisał\\(a\\)"],
62
+ # Ukrainian
63
+ ["", "пише"],
55
64
  ]
56
65
 
57
66
  DATE_SOMEONE_WROTE_REGEXES = DATE_SOMEONE_WROTE_MARKERS.map do |user, wrote|
58
- /.+#{user}.+#{wrote}:/
67
+ user.size == 0 ?
68
+ /^.*\d{4}.*?(?:(?!#{wrote})[\s\S])*#{wrote}:/ :
69
+ /^.*\d{4}.*?#{user}.*?(?:(?!#{wrote})[\s\S])*#{wrote}:/
59
70
  end
60
71
 
72
+ # Max Mustermann <try_discourse@discoursemail.com> schrieb am Fr., 28. Apr. 2017 um 11:53 Uhr:
73
+ SOMEONE_WROTE_ON_DATE_REGEXES ||= [
74
+ # English
75
+ /^.+\bwrote\b[[:space:]]+\bon\b.+[^:]+:/,
76
+ # German
77
+ /^.+\bschrieb\b[[:space:]]+\bam\b.+[^:]+:/,
78
+ ]
79
+
61
80
  # 2016-03-03 17:21 GMT+01:00 Some One
62
81
  ISO_DATE_SOMEONE_REGEX = /^[[:blank:]>]*20\d\d-\d\d-\d\d \d\d:\d\d GMT\+\d\d:\d\d [\w[:blank:]]+$/
63
82
 
83
+
64
84
  # 2015-10-18 0:17 GMT+03:00 Matt Palmer <info@discourse.org>:
65
85
  # 2013/10/2 camilohollanda <info@discourse.org>
66
86
  # вт, 5 янв. 2016 г. в 23:39, Erlend Sogge Heggen <info@discourse.org>:
67
87
  # ср, 1 апр. 2015, 18:29, Denis Didkovsky <info@discourse.org>:
68
- DATE_SOMEONE_EMAIL_REGEX = /^[[:blank:]>]*.*\d{4}.+<[^@<>]+@[^@<>.]+\.[^@<>]+>:?$/
88
+ DATE_SOMEONE_EMAIL_REGEX = /^.*\d{4}.+\s?<[^@<>]+@[^@<>.]+\.[^@<>]+>:?$/
69
89
 
70
90
  # codinghorror via Discourse Meta wrote:
71
91
  # codinghorror via Discourse Meta <info@discourse.org> schrieb:
@@ -77,11 +97,12 @@ class EmbeddedEmailMatcher
77
97
  ]
78
98
 
79
99
  SOMEONE_VIA_SOMETHING_WROTE_REGEXES = SOMEONE_VIA_SOMETHING_WROTE_MARKERS.map do |wrote|
80
- /^[[:blank:]>]*.+ via .+ #{wrote}:?[[:blank:]]*$/
100
+ /^.+ via .+ #{wrote}:?[[:blank:]]*$/
81
101
  end
82
102
 
83
103
  # Some One <info@discourse.org> wrote:
84
- SOMEONE_EMAIL_WROTE_REGEX = /^[[:blank:]>]*.+ <.+@.+\..+> wrote:?/
104
+ # Gavin Sinclair (gsinclair@soyabean.com.au) wrote:
105
+ SOMEONE_EMAIL_WROTE_REGEX = /^.+\b[\w.+-]+@[\w.-]+\.\w{2,}\b.+wrote:?$/
85
106
 
86
107
  # Posted by mpalmer on 01/21/2016
87
108
  POSTED_BY_SOMEONE_ON_DATE_REGEX = /^[[:blank:]>]*Posted by .+ on \d{2}\/\d{2}\/\d{4}$/i
@@ -92,17 +113,21 @@ class EmbeddedEmailMatcher
92
113
  # ----- Original Message -----
93
114
  # -----Original Message-----
94
115
  # *----- Original Message -----*
116
+ # ----- Reply message -----
117
+ # ------------------ 原始邮件 ------------------
95
118
  FORWARDED_EMAIL_REGEXES = [
96
119
  # English
97
120
  /^[[:blank:]>]*Begin forwarded message:/i,
98
- /^[[:blank:]>]*Reply message/i,
99
- /^[[:blank:]>\*]*-{2,}[[:blank:]]*(Forwarded|Original) Message[[:blank:]]*-{2,}/i,
121
+ /^[[:blank:]>*]*-{2,}[[:blank:]]*(Forwarded|Original|Reply) Message[[:blank:]]*-{2,}/i,
100
122
  # French
101
- /^[[:blank:]>\*]*-{2,}[[:blank:]]*Message transféré[[:blank:]]*-{2,}/i,
123
+ /^[[:blank:]>]*Début du message transféré :/i,
124
+ /^[[:blank:]>*]*-{2,}[[:blank:]]*Message transféré[[:blank:]]*-{2,}/i,
102
125
  # German
103
- /^[[:blank:]>\*]*-{2,}[[:blank:]]*Ursprüngliche Nachricht[[:blank:]]*-{2,}/i,
126
+ /^[[:blank:]>*]*-{2,}[[:blank:]]*Ursprüngliche Nachricht[[:blank:]]*-{2,}/i,
104
127
  # Spanish
105
- /^[[:blank:]>\*]*-{2,}[[:blank:]]*Mensaje original[[:blank:]]*-{2,}/i,
128
+ /^[[:blank:]>*]*-{2,}[[:blank:]]*Mensaje original[[:blank:]]*-{2,}/i,
129
+ # Chinese
130
+ /^[[:blank:]>*]*-{2,}[[:blank:]]*原始邮件[[:blank:]]*-{2,}/i,
106
131
  ]
107
132
 
108
133
  EMBEDDED_REGEXES = [
@@ -110,6 +135,7 @@ class EmbeddedEmailMatcher
110
135
  ON_DATE_WROTE_SOMEONE_REGEXES,
111
136
  DATE_SOMEONE_WROTE_REGEXES,
112
137
  DATE_SOMEONE_EMAIL_REGEX,
138
+ SOMEONE_WROTE_ON_DATE_REGEXES,
113
139
  ISO_DATE_SOMEONE_REGEX,
114
140
  SOMEONE_VIA_SOMETHING_WROTE_REGEXES,
115
141
  SOMEONE_EMAIL_WROTE_REGEX,
@@ -13,20 +13,27 @@ class SignatureMatcher
13
13
  # (sent from a phone)
14
14
  # (Sent from mobile device)
15
15
  # 從我的 iPhone 傳送
16
- SIGNATURE_REGEXES = [
16
+ SIGNATURE_REGEXES ||= [
17
17
  # Chinese
18
- /^[[:blank:]>]*從我的 iPhone 傳送/i,
18
+ /^[[:blank:]]*從我的 iPhone 傳送/i,
19
19
  # English
20
- /^[[:blank:]>]*[[:word:]]+ from mobile/i,
21
- /^[[:blank:]>]*[\(<]*sent (?:from|via|with|by) .+[\)>]*/i,
22
- /^[[:blank:]>]*from my .{1,20}/i, # don't match too much
20
+ /^[[:blank:]]*[[:word:]]+ from mobile/i,
21
+ /^[[:blank:]]*[\(<]*Sent (from|via|with|by) .+[\)>]*/i,
22
+ /^[[:blank:]]*From my .{1,20}/i,
23
+ /^[[:blank:]]*Get Outlook for iOS/i,
23
24
  # French
24
- /^[[:blank:]>]*Envoyé depuis mon .+/i,
25
+ /^[[:blank:]]*Envoyé depuis (mon|Yahoo Mail)/i,
25
26
  # German
26
- /^[[:blank:]>]*Von meinem .+ gesendet/i,
27
- /^[[:blank:]>]*Diese Nachricht wurde von .+ gesendet/i,
27
+ /^[[:blank:]]*Von meinem .+ gesendet/i,
28
+ /^[[:blank:]]*Diese Nachricht wurde von .+ gesendet/i,
29
+ # Italian
30
+ /^[[:blank:]]*Inviato da /i,
31
+ # Norwegian
32
+ /^[[:blank:]]*Sendt fra min /i,
33
+ # Portuguese
34
+ /^[[:blank:]]*Enviado do meu /i,
28
35
  # Spanish
29
- /^[[:blank:]>]*Enviado desde mi .+/i,
36
+ /^[[:blank:]]*Enviado desde mi /i,
30
37
  ]
31
38
 
32
39
  def self.match?(line)
@@ -0,0 +1,23 @@
1
+ From: Erlend Sogge Heggen <meta@discoursemail.com>
2
+ Reply-To: Erlend Sogge Heggen <meta+abcd@discoursemail.com>
3
+ Date: Wednesday, 5 April 2017 at 17:01
4
+ To: Jef <jef@bar.com>
5
+ Subject: [Discourse Meta] [PM] Discourse for Communities of Practice, educational organisation
6
+
7
+
8
+ erlend_sh<https://meta.discourse.org/u/erlend_sh> Erlend Sogge Heggen<https://meta.discourse.org/u/erlend_sh> Team
9
+ April 5
10
+
11
+
12
+
13
+ Hi Jef,
14
+
15
+ Is your University a legally recognised educational institution? Otherwise I'm afraid you're not eligible for this discount.
16
+
17
+ Sincerely,
18
+
19
+ Erlend
20
+
21
+
22
+
23
+ This email message and any attachments may contain confidential information and may be privileged. If you are not the intended recipient or otherwise not authorized to receive this message, you are prohibited to use, copy, disclose or take any action based on this email or any information contained herein. If you are not the intended recipient, please advise the sender immediately by replying to this email and permanently delete this message and any attachments from your system.
@@ -2,7 +2,7 @@ On Mon, Feb 1, 2016 at 6:32 PM, Jeff Atwood <info@discourse.org> wrote:
2
2
 
3
3
  > This is Jeff's reply.
4
4
  >
5
- > On Mon, Feb 1, 2016 at 7:50 AM, Some One <foo@bar.com wrote:
5
+ > On Mon, Feb 1, 2016 at 7:50 AM, Some One <foo@bar.com > > wrote:
6
6
  >
7
7
  >> Great!
8
8
  >>
@@ -14,7 +14,7 @@ On Mon, Feb 1, 2016 at 6:32 PM, Jeff Atwood <info@discourse.org> wrote:
14
14
  >>
15
15
  >>> WAT?
16
16
  >>>
17
- >>> On Wed, Jan 27, 2016 at 10:48 PM, Some One < foo@bar.com> wrote:
17
+ >>> On Wed, Jan 27, 2016 at 10:48 PM, Some One < >>> foo@bar.com> wrote:
18
18
  >>>
19
19
  >>>> Hi Team,
20
20
  >>>>
@@ -22,7 +22,7 @@ On Mon, Feb 1, 2016 at 6:32 PM, Jeff Atwood <info@discourse.org> wrote:
22
22
  >>>>
23
23
  >>>> Some One
24
24
  >>>>
25
- >>>> On Wed, Jan 27, 2016 at 10:10 AM Discourse Team <team@discourse.org> wrote:
25
+ >>>> On Wed, Jan 27, 2016 at 10:10 AM Discourse Team <team@discourse.org> >>>> wrote:
26
26
  >>>>
27
27
  >>>>> Hello :waves_hand:
28
28
  >>>>>