email_reply_trimmer 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/email_reply_trimmer.rb +27 -3
- data/lib/email_reply_trimmer/delimiter_matcher.rb +1 -1
- data/lib/email_reply_trimmer/email_header_matcher.rb +6 -2
- data/lib/email_reply_trimmer/embedded_email_matcher.rb +16 -14
- data/lib/email_reply_trimmer/empty_line_matcher.rb +1 -1
- data/lib/email_reply_trimmer/quote_matcher.rb +1 -1
- data/lib/email_reply_trimmer/signature_matcher.rb +8 -10
- data/test/before/email_headers_1.txt +1 -0
- data/test/before/email_headers_2.txt +1 -0
- data/test/before/email_headers_3.txt +1 -0
- data/test/before/email_headers_4.txt +1 -0
- data/test/before/embedded_email_10.txt +25 -0
- data/test/before/embedded_email_german_3.txt +1 -0
- data/test/before/embedded_email_spanish_2.txt +1 -0
- data/test/before/forwarded_message.txt +0 -0
- data/test/elided/email_headers_4.txt +15 -0
- data/test/emails/email_headers_4.txt +17 -0
- data/test/embedded/email_headers_1.txt +12 -0
- data/test/embedded/email_headers_2.txt +8 -0
- data/test/embedded/email_headers_3.txt +16 -0
- data/test/embedded/email_headers_4.txt +15 -0
- data/test/embedded/embedded_email_10.txt +15 -0
- data/test/embedded/embedded_email_german_3.txt +6 -0
- data/test/embedded/embedded_email_spanish_2.txt +9 -0
- data/test/embedded/forwarded_message.txt +8 -0
- data/test/test_email_reply_trimmer.rb +30 -0
- data/test/trimmed/email_headers_4.txt +1 -0
- metadata +21 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fa55f9a39546e85874c282caab367fdf9da3fa9a
|
4
|
+
data.tar.gz: 5be88ed9382527d944029b9ef4a41077ac820e29
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bf79309444ad1b47c679b475d8b943f8e7ad580d06b06305d9584336c803e84254fe019d269c6f5fb34660138263ac1028ac10ae2f647c40f934bc1fe937c040
|
7
|
+
data.tar.gz: 5d5adaa21d0aaf5e492002bcf1c10a1c9f4b5bebd9e73a8ef7200e8eb9daaa01595b4dcbedcc8122fb7f637608d59ca8029b758b22ffd2915a02794c2c89dabe
|
data/lib/email_reply_trimmer.rb
CHANGED
@@ -6,7 +6,7 @@ require_relative "email_reply_trimmer/email_header_matcher"
|
|
6
6
|
require_relative "email_reply_trimmer/quote_matcher"
|
7
7
|
|
8
8
|
class EmailReplyTrimmer
|
9
|
-
VERSION = "0.1.
|
9
|
+
VERSION = "0.1.5"
|
10
10
|
|
11
11
|
DELIMITER = "d"
|
12
12
|
EMBEDDED = "b"
|
@@ -27,10 +27,10 @@ class EmailReplyTrimmer
|
|
27
27
|
end
|
28
28
|
|
29
29
|
def self.trim(text, split=false)
|
30
|
-
return
|
30
|
+
return if text.nil? || text =~ /\A[[:space:]]*\Z/m
|
31
31
|
|
32
32
|
# normalize line endings
|
33
|
-
text.gsub!(
|
33
|
+
text.gsub!("\r\n", "\n")
|
34
34
|
|
35
35
|
# fix embedded email markers that might span over multiple lines
|
36
36
|
EmbeddedEmailMatcher::ON_DATE_SOMEONE_WROTE_REGEXES.each do |r|
|
@@ -120,6 +120,30 @@ class EmailReplyTrimmer
|
|
120
120
|
end
|
121
121
|
end
|
122
122
|
|
123
|
+
def self.extract_embedded_email(text)
|
124
|
+
return if text.nil? || text =~ /\A[[:space:]]*\Z/m
|
125
|
+
|
126
|
+
# normalize line endings
|
127
|
+
text.gsub!("\r\n", "\n")
|
128
|
+
|
129
|
+
# fix embedded email markers that might span over multiple lines
|
130
|
+
EmbeddedEmailMatcher::ON_DATE_SOMEONE_WROTE_REGEXES.each do |r|
|
131
|
+
text.gsub!(r) { |m| m.gsub(/\n[[:space:]>\-]*/, " ") }
|
132
|
+
end
|
133
|
+
|
134
|
+
# from now on, we'll work on a line-by-line basis
|
135
|
+
lines = text.split("\n")
|
136
|
+
|
137
|
+
# identify content of each lines
|
138
|
+
pattern = lines.map { |l| identify_line_content(l) }.join
|
139
|
+
|
140
|
+
if index = pattern =~ /(?:h[eqd]*?){3,}[tq]/
|
141
|
+
embedded = lines[index..-1].join("\n").strip
|
142
|
+
before = lines[0...(pattern[0...index] =~ /e*(b[eqd]*|b*[ed]*)$/)].join("\n").strip
|
143
|
+
return [embedded, before]
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
123
147
|
private
|
124
148
|
|
125
149
|
def self.compute_elided(text, lines)
|
@@ -1,7 +1,7 @@
|
|
1
1
|
class DelimiterMatcher
|
2
2
|
|
3
3
|
DELIMITER_CHARACTERS ||= "-_,=+~#*ᐧ"
|
4
|
-
DELIMITER_REGEX ||= /^[[:
|
4
|
+
DELIMITER_REGEX ||= /^[[:blank:]>]*[#{Regexp.escape(DELIMITER_CHARACTERS)}]+[[:blank:]]*$/
|
5
5
|
|
6
6
|
def self.match?(line)
|
7
7
|
line =~ DELIMITER_REGEX
|
@@ -15,10 +15,12 @@ class EmailHeaderMatcher
|
|
15
15
|
["Enviado"],
|
16
16
|
# Spanish (Mexican)
|
17
17
|
["Fecha"],
|
18
|
+
# Italian
|
19
|
+
["Data"],
|
18
20
|
]
|
19
21
|
|
20
22
|
EMAIL_HEADERS_WITH_DATE_REGEXES = EMAIL_HEADERS_WITH_DATE_MARKERS.map do |header|
|
21
|
-
/^[[:
|
23
|
+
/^[[:blank:]>\*]*(?:#{header.join("|")})[[:blank:]\*]*:.*\d+/
|
22
24
|
end
|
23
25
|
|
24
26
|
EMAIL_HEADERS_WITH_TEXT_MARKERS = [
|
@@ -34,10 +36,12 @@ class EmailHeaderMatcher
|
|
34
36
|
["De", "Para", "Assunto"],
|
35
37
|
# Spanish
|
36
38
|
["De", "Para", "Asunto"],
|
39
|
+
# Italian
|
40
|
+
["Da", "Risposta", "A", "Oggetto"]
|
37
41
|
]
|
38
42
|
|
39
43
|
EMAIL_HEADERS_WITH_TEXT_REGEXES = EMAIL_HEADERS_WITH_TEXT_MARKERS.map do |header|
|
40
|
-
/^[[:
|
44
|
+
/^[[:blank:]>\*]*(?:#{header.join("|")})[[:blank:]\*]*:.*[[:word:]]+/
|
41
45
|
end
|
42
46
|
|
43
47
|
EMAIL_HEADER_REGEXES = [
|
@@ -31,8 +31,8 @@ class EmbeddedEmailMatcher
|
|
31
31
|
]
|
32
32
|
|
33
33
|
ON_DATE_SOMEONE_WROTE_REGEXES = ON_DATE_SOMEONE_WROTE_MARKERS.map do |on, wrote|
|
34
|
-
wrote.gsub!(
|
35
|
-
/^([
|
34
|
+
wrote.gsub!(/ +/, "[[:space:]]+") # the "wrote" part might span over multiple lines
|
35
|
+
/^([[:blank:]>\-]*#{on}\s(?:(?!#{on}\s|#{wrote}:?)[\s\S])*#{wrote}:?[[:blank:]\-]*)$/m
|
36
36
|
end
|
37
37
|
|
38
38
|
# Op 10 dec. 2015 18:35 schreef "Arpit Jalan" <info@discourse.org>:
|
@@ -45,7 +45,7 @@ class EmbeddedEmailMatcher
|
|
45
45
|
]
|
46
46
|
|
47
47
|
ON_DATE_WROTE_SOMEONE_REGEXES = ON_DATE_WROTE_SOMEONE_MARKERS.map do |on, wrote|
|
48
|
-
/^[[:
|
48
|
+
/^[[:blank:]>]*#{on}\s.+\s#{wrote}\s[^:]+:/
|
49
49
|
end
|
50
50
|
|
51
51
|
# суббота, 14 марта 2015 г. пользователь etewiah написал:
|
@@ -55,17 +55,17 @@ class EmbeddedEmailMatcher
|
|
55
55
|
]
|
56
56
|
|
57
57
|
DATE_SOMEONE_WROTE_REGEXES = DATE_SOMEONE_WROTE_MARKERS.map do |user, wrote|
|
58
|
-
|
58
|
+
/.+#{user}.+#{wrote}:/
|
59
59
|
end
|
60
60
|
|
61
61
|
# 2016-03-03 17:21 GMT+01:00 Some One
|
62
|
-
ISO_DATE_SOMEONE_REGEX = /^[[:
|
62
|
+
ISO_DATE_SOMEONE_REGEX = /^[[:blank:]>]*20\d\d-\d\d-\d\d \d\d:\d\d GMT\+\d\d:\d\d [\w[:blank:]]+$/
|
63
63
|
|
64
64
|
# 2015-10-18 0:17 GMT+03:00 Matt Palmer <info@discourse.org>:
|
65
65
|
# 2013/10/2 camilohollanda <info@discourse.org>
|
66
66
|
# вт, 5 янв. 2016 г. в 23:39, Erlend Sogge Heggen <info@discourse.org>:
|
67
67
|
# ср, 1 апр. 2015, 18:29, Denis Didkovsky <info@discourse.org>:
|
68
|
-
DATE_SOMEONE_EMAIL_REGEX = /^[[:
|
68
|
+
DATE_SOMEONE_EMAIL_REGEX = /^[[:blank:]>]*.*\d{4}.+<[^@<>]+@[^@<>.]+\.[^@<>]+>:?$/
|
69
69
|
|
70
70
|
# codinghorror via Discourse Meta wrote:
|
71
71
|
# codinghorror via Discourse Meta <info@discourse.org> schrieb:
|
@@ -77,14 +77,14 @@ class EmbeddedEmailMatcher
|
|
77
77
|
]
|
78
78
|
|
79
79
|
SOMEONE_VIA_SOMETHING_WROTE_REGEXES = SOMEONE_VIA_SOMETHING_WROTE_MARKERS.map do |wrote|
|
80
|
-
/^[[:
|
80
|
+
/^[[:blank:]>]*.+ via .+ #{wrote}:?[[:blank:]]*$/
|
81
81
|
end
|
82
82
|
|
83
83
|
# Some One <info@discourse.org> wrote:
|
84
|
-
SOMEONE_EMAIL_WROTE_REGEX = /^[[:
|
84
|
+
SOMEONE_EMAIL_WROTE_REGEX = /^[[:blank:]>]*.+ <.+@.+\..+> wrote:?/
|
85
85
|
|
86
86
|
# Posted by mpalmer on 01/21/2016
|
87
|
-
POSTED_BY_SOMEONE_ON_DATE_REGEX = /^[[:
|
87
|
+
POSTED_BY_SOMEONE_ON_DATE_REGEX = /^[[:blank:]>]*Posted by .+ on \d{2}\/\d{2}\/\d{4}$/i
|
88
88
|
|
89
89
|
# Begin forwarded message:
|
90
90
|
# Reply Message
|
@@ -94,13 +94,15 @@ class EmbeddedEmailMatcher
|
|
94
94
|
# *----- Original Message -----*
|
95
95
|
FORWARDED_EMAIL_REGEXES = [
|
96
96
|
# English
|
97
|
-
/^[[:
|
98
|
-
/^[[:
|
99
|
-
/^[[:
|
97
|
+
/^[[:blank:]>]*Begin forwarded message:/i,
|
98
|
+
/^[[:blank:]>]*Reply message/i,
|
99
|
+
/^[[:blank:]>\*]*-{2,}[[:blank:]]*(Forwarded|Original) Message[[:blank:]]*-{2,}/i,
|
100
|
+
# French
|
101
|
+
/^[[:blank:]>\*]*-{2,}[[:blank:]]*Message transféré[[:blank:]]*-{2,}/i,
|
100
102
|
# German
|
101
|
-
/^[[:
|
103
|
+
/^[[:blank:]>\*]*-{2,}[[:blank:]]*Ursprüngliche Nachricht[[:blank:]]*-{2,}/i,
|
102
104
|
# Spanish
|
103
|
-
/^[[:
|
105
|
+
/^[[:blank:]>\*]*-{2,}[[:blank:]]*Mensaje original[[:blank:]]*-{2,}/i,
|
104
106
|
]
|
105
107
|
|
106
108
|
EMBEDDED_REGEXES = [
|
@@ -15,20 +15,18 @@ class SignatureMatcher
|
|
15
15
|
# 從我的 iPhone 傳送
|
16
16
|
SIGNATURE_REGEXES = [
|
17
17
|
# Chinese
|
18
|
-
/^[[:
|
18
|
+
/^[[:blank:]>]*從我的 iPhone 傳送/i,
|
19
19
|
# English
|
20
|
-
/^[[:
|
21
|
-
/^[[:
|
22
|
-
/^[[:
|
23
|
-
/^[[:space:]]*<<sent (?:from|via|with|by) .+>>/i,
|
24
|
-
/^[[:space:]]*from my .{1,20}/i, # don't match too much
|
20
|
+
/^[[:blank:]>]*[[:word:]]+ from mobile/i,
|
21
|
+
/^[[:blank:]>]*[\(<]*sent (?:from|via|with|by) .+[\)>]*/i,
|
22
|
+
/^[[:blank:]>]*from my .{1,20}/i, # don't match too much
|
25
23
|
# French
|
26
|
-
/^[[:
|
24
|
+
/^[[:blank:]>]*Envoyé depuis mon .+/i,
|
27
25
|
# German
|
28
|
-
/^[[:
|
29
|
-
/^[[:
|
26
|
+
/^[[:blank:]>]*Von meinem .+ gesendet/i,
|
27
|
+
/^[[:blank:]>]*Diese Nachricht wurde von .+ gesendet/i,
|
30
28
|
# Spanish
|
31
|
-
/^[[:
|
29
|
+
/^[[:blank:]>]*Enviado desde mi .+/i,
|
32
30
|
]
|
33
31
|
|
34
32
|
def self.match?(line)
|
@@ -0,0 +1 @@
|
|
1
|
+
This is a reply from Outlook!
|
@@ -0,0 +1 @@
|
|
1
|
+
This is a reply from Outlook!
|
@@ -0,0 +1 @@
|
|
1
|
+
This is the actual reply.
|
@@ -0,0 +1 @@
|
|
1
|
+
test
|
@@ -0,0 +1,25 @@
|
|
1
|
+
Thank you.
|
2
|
+
|
3
|
+
Sent from Outlook Mobile<https://foo.bar>
|
4
|
+
|
5
|
+
|
6
|
+
|
7
|
+
|
8
|
+
On Sun, Feb 7, 2016 at 12:12 AM -0800, "Arpit Jalan" <arpit.jalan@discourse.org<mailto:arpit.jalan@discourse.org>> wrote:
|
9
|
+
|
10
|
+
Hi Some,
|
11
|
+
|
12
|
+
https://meta.discourse.org is now running on latest Discourse version!
|
13
|
+
|
14
|
+
Regards,
|
15
|
+
Arpit
|
16
|
+
|
17
|
+
On Fri, Feb 5, 2016 at 10:43 AM Arpit Jalan <arpit.jalan@discourse.org<mailto:arpit.jalan@discourse.org>> wrote:
|
18
|
+
Okay, sure!
|
19
|
+
|
20
|
+
Arpit
|
21
|
+
On Fri, 5 Feb 2016 at 10:42, Some One <foo@bar.com<mailto:foo@bar.com>> wrote:
|
22
|
+
Arpit,
|
23
|
+
Yes that sounds good.
|
24
|
+
|
25
|
+
Sent from Outlook Mobile<https://foo.bar>
|
@@ -0,0 +1 @@
|
|
1
|
+
Gruß Discourse
|
@@ -0,0 +1 @@
|
|
1
|
+
Igual que siempre (inclusive ahora), sin nada raro :/
|
File without changes
|
@@ -0,0 +1,15 @@
|
|
1
|
+
Da: Sally54721
|
2
|
+
Risposta: Testy McTesterson / Test
|
3
|
+
Data: giovedì 8 ottobre 2015 15:26
|
4
|
+
A: Testy McTesterson
|
5
|
+
Oggetto: Test | Issue (#3)
|
6
|
+
|
7
|
+
[@example](http://example.com/u/example)
|
8
|
+
|
9
|
+
—
|
10
|
+
Reply to this email directly or [view it on GitLab](http://git.example.com/example/Test/issues/3). {"@context":"[http://schema.org","@type":"EmailMessage","action":{"@type":"ViewAction","name":"View](http://schema.org%22,%22@type%22:%22EmailMessage%22,%22action%22:%7B%22@type%22:%22ViewAction%22,%22name%22:%22View) Issue","url":"[http://git.example.com/example/Test/issues/3"}](http://git.example.com/example/Test/issues/3%22%7D)} You're receiving this notification because you are a member of the Testy McTesterson / Test project team.
|
11
|
+
|
12
|
+
--
|
13
|
+
Questo messaggio e' stato analizzato con Libra ESVA ed e' risultato non infetto.
|
14
|
+
[Clicca qui per segnalarlo come spam.](http://esva.example.com/cgi-bin/learn-msg.cgi?id=1234567890.ABCDEF)
|
15
|
+
[Clicca qui per metterlo in blacklist](http://esva.example.com/cgi-bin/learn-msg.cgi?blacklist=1&id=1234567890.ABCDEF)
|
@@ -0,0 +1,17 @@
|
|
1
|
+
test
|
2
|
+
|
3
|
+
Da: Sally54721
|
4
|
+
Risposta: Testy McTesterson / Test
|
5
|
+
Data: giovedì 8 ottobre 2015 15:26
|
6
|
+
A: Testy McTesterson
|
7
|
+
Oggetto: Test | Issue (#3)
|
8
|
+
|
9
|
+
[@example](http://example.com/u/example)
|
10
|
+
|
11
|
+
—
|
12
|
+
Reply to this email directly or [view it on GitLab](http://git.example.com/example/Test/issues/3). {"@context":"[http://schema.org","@type":"EmailMessage","action":{"@type":"ViewAction","name":"View](http://schema.org%22,%22@type%22:%22EmailMessage%22,%22action%22:%7B%22@type%22:%22ViewAction%22,%22name%22:%22View) Issue","url":"[http://git.example.com/example/Test/issues/3"}](http://git.example.com/example/Test/issues/3%22%7D)} You're receiving this notification because you are a member of the Testy McTesterson / Test project team.
|
13
|
+
|
14
|
+
--
|
15
|
+
Questo messaggio e' stato analizzato con Libra ESVA ed e' risultato non infetto.
|
16
|
+
[Clicca qui per segnalarlo come spam.](http://esva.example.com/cgi-bin/learn-msg.cgi?id=1234567890.ABCDEF)
|
17
|
+
[Clicca qui per metterlo in blacklist](http://esva.example.com/cgi-bin/learn-msg.cgi?blacklist=1&id=1234567890.ABCDEF)
|
@@ -0,0 +1,16 @@
|
|
1
|
+
From: Some One <discuss@foo.bar<mailto:discuss@foo.bar>>
|
2
|
+
Reply-To: "For.bar" <reply+275e18486b01289e3250bebe85ef6496@members.foo.bar<mailto:reply+275e18486b01289e3250bebe85ef6496@members.foo.bar>>
|
3
|
+
Date: Monday, February 8, 2016 11:44 AM
|
4
|
+
To: Discourse <discourse@discourse.org<mailto:discourse@discourse.org>>
|
5
|
+
Subject: VIS
|
6
|
+
|
7
|
+
|
8
|
+
Here's an email with some very important stuff.
|
9
|
+
|
10
|
+
|
11
|
+
________________________________
|
12
|
+
Reply here<http://foo.bar> or hit reply from your inbox to help members by sharing your ideas.
|
13
|
+
Mute this topic<http://42.wat> to stop getting updates, we'll send you the next one.
|
14
|
+
|
15
|
+
|
16
|
+
DO NOT FORWARD THIS EMAIL!
|
@@ -0,0 +1,15 @@
|
|
1
|
+
Da: Sally54721
|
2
|
+
Risposta: Testy McTesterson / Test
|
3
|
+
Data: giovedì 8 ottobre 2015 15:26
|
4
|
+
A: Testy McTesterson
|
5
|
+
Oggetto: Test | Issue (#3)
|
6
|
+
|
7
|
+
[@example](http://example.com/u/example)
|
8
|
+
|
9
|
+
—
|
10
|
+
Reply to this email directly or [view it on GitLab](http://git.example.com/example/Test/issues/3). {"@context":"[http://schema.org","@type":"EmailMessage","action":{"@type":"ViewAction","name":"View](http://schema.org%22,%22@type%22:%22EmailMessage%22,%22action%22:%7B%22@type%22:%22ViewAction%22,%22name%22:%22View) Issue","url":"[http://git.example.com/example/Test/issues/3"}](http://git.example.com/example/Test/issues/3%22%7D)} You're receiving this notification because you are a member of the Testy McTesterson / Test project team.
|
11
|
+
|
12
|
+
--
|
13
|
+
Questo messaggio e' stato analizzato con Libra ESVA ed e' risultato non infetto.
|
14
|
+
[Clicca qui per segnalarlo come spam.](http://esva.example.com/cgi-bin/learn-msg.cgi?id=1234567890.ABCDEF)
|
15
|
+
[Clicca qui per metterlo in blacklist](http://esva.example.com/cgi-bin/learn-msg.cgi?blacklist=1&id=1234567890.ABCDEF)
|
@@ -0,0 +1,15 @@
|
|
1
|
+
From: Arpit Jalan <arpit.jalan@discourse.org<mailto:arpit.jalan@discourse.org>>
|
2
|
+
Sent: Thursday, February 4, 2016 10:05 AM
|
3
|
+
Subject: Meta Discourse update
|
4
|
+
To: Some One <foo@bar.com<mailto:foo@bar.com>>, Discourse Team <team@discourse.org<mailto:team@discourse.org>>
|
5
|
+
|
6
|
+
|
7
|
+
|
8
|
+
Hi Some One,
|
9
|
+
|
10
|
+
Time to update meta to the latest Discourse version!
|
11
|
+
|
12
|
+
Do you want me to take care of it?
|
13
|
+
|
14
|
+
Regards,
|
15
|
+
Arpit
|
@@ -0,0 +1,9 @@
|
|
1
|
+
De: "Miguel" <foo@bar.es>
|
2
|
+
Enviado: =E2=80=8E16/=E2=80=8E02/=E2=80=8E2016 14:53
|
3
|
+
Para: "discourse" <discourse@discourse.org>
|
4
|
+
Asunto: [MP]Parser del email
|
5
|
+
|
6
|
+
Visita el tema o responde a este email para publicar.
|
7
|
+
Para no recibir m=C3=A1s notificaciones de este tema en particular, haz cli=
|
8
|
+
c aqu=C3=AD. Para darte de baja de estos emails, cambia tus preferencias
|
9
|
+
=
|
@@ -9,6 +9,9 @@ class TestEmailReplyTrimmer < Minitest::Test
|
|
9
9
|
|
10
10
|
def test_all_emails_have_a_matching_reply
|
11
11
|
assert_equal(EMAILS, TRIMMED, "Files in /emails and /trimmed folders should match 1-to-1.")
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_all_emails_have_a_matching_elided
|
12
15
|
assert_equal(EMAILS, ELIDED, "Files in /emails and /elided folders should match 1-to-1.")
|
13
16
|
end
|
14
17
|
|
@@ -24,6 +27,21 @@ class TestEmailReplyTrimmer < Minitest::Test
|
|
24
27
|
end
|
25
28
|
end
|
26
29
|
|
30
|
+
EMBEDDED_EMAILS = %w{
|
31
|
+
email_headers_1 email_headers_2 email_headers_3 email_headers_4
|
32
|
+
embedded_email_10 embedded_email_german_3 embedded_email_spanish_2
|
33
|
+
forwarded_message
|
34
|
+
}
|
35
|
+
|
36
|
+
EMBEDDED_EMAILS.each do |name|
|
37
|
+
filename = "#{name}.txt"
|
38
|
+
define_method("test_embedded_extraction_for_#{name}") do
|
39
|
+
e, b = extract_embedded_email(filename)
|
40
|
+
assert_equal(e, embedded(filename), "[EMBEDDED] EMAIL: #{filename}")
|
41
|
+
assert_equal(b, before(filename), "[BEFORE] EMAIL: #{filename}")
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
27
45
|
def trim(filename)
|
28
46
|
EmailReplyTrimmer.trim(email(filename))
|
29
47
|
end
|
@@ -32,6 +50,10 @@ class TestEmailReplyTrimmer < Minitest::Test
|
|
32
50
|
EmailReplyTrimmer.trim(email(filename), true)[1]
|
33
51
|
end
|
34
52
|
|
53
|
+
def extract_embedded_email(filename)
|
54
|
+
EmailReplyTrimmer.extract_embedded_email(email(filename))
|
55
|
+
end
|
56
|
+
|
35
57
|
def email(filename)
|
36
58
|
File.read("test/emails/#{filename}").strip
|
37
59
|
end
|
@@ -44,4 +66,12 @@ class TestEmailReplyTrimmer < Minitest::Test
|
|
44
66
|
File.read("test/elided/#{filename}").strip
|
45
67
|
end
|
46
68
|
|
69
|
+
def embedded(filename)
|
70
|
+
File.read("test/embedded/#{filename}").strip
|
71
|
+
end
|
72
|
+
|
73
|
+
def before(filename)
|
74
|
+
File.read("test/before/#{filename}").strip
|
75
|
+
end
|
76
|
+
|
47
77
|
end
|
@@ -0,0 +1 @@
|
|
1
|
+
test
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: email_reply_trimmer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Régis Hanol
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-11-16 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: EmailReplyTrimmer is a small library to trim replies from plain text
|
14
14
|
email.
|
@@ -29,11 +29,20 @@ files:
|
|
29
29
|
- lib/email_reply_trimmer/empty_line_matcher.rb
|
30
30
|
- lib/email_reply_trimmer/quote_matcher.rb
|
31
31
|
- lib/email_reply_trimmer/signature_matcher.rb
|
32
|
+
- test/before/email_headers_1.txt
|
33
|
+
- test/before/email_headers_2.txt
|
34
|
+
- test/before/email_headers_3.txt
|
35
|
+
- test/before/email_headers_4.txt
|
36
|
+
- test/before/embedded_email_10.txt
|
37
|
+
- test/before/embedded_email_german_3.txt
|
38
|
+
- test/before/embedded_email_spanish_2.txt
|
39
|
+
- test/before/forwarded_message.txt
|
32
40
|
- test/elided/delimiters.txt
|
33
41
|
- test/elided/dual_embedded.txt
|
34
42
|
- test/elided/email_headers_1.txt
|
35
43
|
- test/elided/email_headers_2.txt
|
36
44
|
- test/elided/email_headers_3.txt
|
45
|
+
- test/elided/email_headers_4.txt
|
37
46
|
- test/elided/embedded_ception.txt
|
38
47
|
- test/elided/embedded_email_1.txt
|
39
48
|
- test/elided/embedded_email_10.txt
|
@@ -74,6 +83,7 @@ files:
|
|
74
83
|
- test/emails/email_headers_1.txt
|
75
84
|
- test/emails/email_headers_2.txt
|
76
85
|
- test/emails/email_headers_3.txt
|
86
|
+
- test/emails/email_headers_4.txt
|
77
87
|
- test/emails/embedded_ception.txt
|
78
88
|
- test/emails/embedded_email_1.txt
|
79
89
|
- test/emails/embedded_email_10.txt
|
@@ -109,12 +119,21 @@ files:
|
|
109
119
|
- test/emails/strip.txt
|
110
120
|
- test/emails/text_only.txt
|
111
121
|
- test/emails/usenet.txt
|
122
|
+
- test/embedded/email_headers_1.txt
|
123
|
+
- test/embedded/email_headers_2.txt
|
124
|
+
- test/embedded/email_headers_3.txt
|
125
|
+
- test/embedded/email_headers_4.txt
|
126
|
+
- test/embedded/embedded_email_10.txt
|
127
|
+
- test/embedded/embedded_email_german_3.txt
|
128
|
+
- test/embedded/embedded_email_spanish_2.txt
|
129
|
+
- test/embedded/forwarded_message.txt
|
112
130
|
- test/test_email_reply_trimmer.rb
|
113
131
|
- test/trimmed/delimiters.txt
|
114
132
|
- test/trimmed/dual_embedded.txt
|
115
133
|
- test/trimmed/email_headers_1.txt
|
116
134
|
- test/trimmed/email_headers_2.txt
|
117
135
|
- test/trimmed/email_headers_3.txt
|
136
|
+
- test/trimmed/email_headers_4.txt
|
118
137
|
- test/trimmed/embedded_ception.txt
|
119
138
|
- test/trimmed/embedded_email_1.txt
|
120
139
|
- test/trimmed/embedded_email_10.txt
|