email_reply_trimmer 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/email_reply_trimmer/delimiter_matcher.rb +2 -2
- data/lib/email_reply_trimmer.rb +42 -22
- data/test/elided/delimiters.txt +10 -0
- data/test/elided/email_headers_1.txt +2 -0
- data/test/elided/email_headers_2.txt +1 -0
- data/test/elided/email_headers_3.txt +1 -0
- data/test/elided/embedded_ception.txt +1 -0
- data/test/elided/embedded_email_10.txt +7 -0
- data/test/elided/embedded_email_7.txt +10 -0
- data/test/elided/embedded_email_german_1.txt +3 -0
- data/test/elided/embedded_email_italian.txt +1 -0
- data/test/elided/embedded_email_polish.txt +3 -0
- data/test/elided/embedded_email_quote_text.txt +1 -0
- data/test/elided/embedded_email_spanish_2.txt +1 -0
- data/test/elided/forwarded_message.txt +6 -0
- data/test/elided/signatures.txt +26 -0
- data/test/elided/usenet.txt +7 -0
- data/test/emails/usenet.txt +9 -0
- data/test/test_email_reply_trimmer.rb +1 -1
- data/test/trimmed/embedded_email_7.txt +0 -9
- data/test/trimmed/embedded_email_polish.txt +0 -2
- data/test/trimmed/usenet.txt +1 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 61d47c5b31f114be7a83ff4ee618716f9a448726
|
4
|
+
data.tar.gz: 3654868aad0175023394df9c5a26fe5fd70f9ad5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 62bb96a32309ea18a3d777fc52cfa900e46c6b62e65c0a010027b01938e39ab5cb0c4f2c68b10dd1308ca2c57bbd7e2df00cc18dfc15c1c4713cdff468387ad9
|
7
|
+
data.tar.gz: 69d4a9a6868904d4e8cebdff0031e9fcba389e227eb6be2068d54e480b6ef8f09de1f52d276adc5599c0c3deec72faa68d3f96abd11b06b5ca92e57d04e581b3
|
@@ -1,7 +1,7 @@
|
|
1
1
|
class DelimiterMatcher
|
2
2
|
|
3
|
-
DELIMITER_CHARACTERS ||=
|
4
|
-
DELIMITER_REGEX ||= /^[[:space:]]*[#{Regexp.escape(DELIMITER_CHARACTERS
|
3
|
+
DELIMITER_CHARACTERS ||= "-_,=+~#*ᐧ"
|
4
|
+
DELIMITER_REGEX ||= /^[[:space:]]*[#{Regexp.escape(DELIMITER_CHARACTERS)}]+[[:space:]]*$/
|
5
5
|
|
6
6
|
def self.match?(line)
|
7
7
|
line =~ DELIMITER_REGEX
|
data/lib/email_reply_trimmer.rb
CHANGED
@@ -6,7 +6,7 @@ require_relative "email_reply_trimmer/email_header_matcher"
|
|
6
6
|
require_relative "email_reply_trimmer/quote_matcher"
|
7
7
|
|
8
8
|
class EmailReplyTrimmer
|
9
|
-
VERSION = "0.1.
|
9
|
+
VERSION = "0.1.4"
|
10
10
|
|
11
11
|
DELIMITER = "d"
|
12
12
|
EMBEDDED = "b"
|
@@ -17,12 +17,12 @@ class EmailReplyTrimmer
|
|
17
17
|
TEXT = "t"
|
18
18
|
|
19
19
|
def self.identify_line_content(line)
|
20
|
-
return EMPTY if EmptyLineMatcher.match?
|
21
|
-
return DELIMITER if DelimiterMatcher.match?
|
22
|
-
return SIGNATURE if SignatureMatcher.match?
|
23
|
-
return EMBEDDED if EmbeddedEmailMatcher.match?
|
24
|
-
return EMAIL_HEADER if EmailHeaderMatcher.match?
|
25
|
-
return QUOTE if QuoteMatcher.match?
|
20
|
+
return EMPTY if EmptyLineMatcher.match? line
|
21
|
+
return DELIMITER if DelimiterMatcher.match? line
|
22
|
+
return SIGNATURE if SignatureMatcher.match? line
|
23
|
+
return EMBEDDED if EmbeddedEmailMatcher.match? line
|
24
|
+
return EMAIL_HEADER if EmailHeaderMatcher.match? line
|
25
|
+
return QUOTE if QuoteMatcher.match? line
|
26
26
|
return TEXT
|
27
27
|
end
|
28
28
|
|
@@ -34,22 +34,26 @@ class EmailReplyTrimmer
|
|
34
34
|
|
35
35
|
# fix embedded email markers that might span over multiple lines
|
36
36
|
EmbeddedEmailMatcher::ON_DATE_SOMEONE_WROTE_REGEXES.each do |r|
|
37
|
-
|
38
|
-
text.gsub!(r) { |m| m.gsub(/\n[[:space:]>\-]*/, " ") }
|
39
|
-
end
|
37
|
+
text.gsub!(r) { |m| m.gsub(/\n[[:space:]>\-]*/, " ") }
|
40
38
|
end
|
41
39
|
|
42
|
-
removed = []
|
43
|
-
|
44
40
|
# from now on, we'll work on a line-by-line basis
|
45
41
|
lines = text.split("\n")
|
42
|
+
lines_dup = lines.dup
|
46
43
|
|
47
44
|
# identify content of each lines
|
48
45
|
pattern = lines.map { |l| identify_line_content(l) }.join
|
49
46
|
|
50
|
-
# remove
|
51
|
-
|
52
|
-
index = pattern =~ /
|
47
|
+
# remove everything after the first delimiter
|
48
|
+
if pattern =~ /d/
|
49
|
+
index = pattern =~ /d/
|
50
|
+
pattern = pattern[0...index]
|
51
|
+
lines = lines[0...index]
|
52
|
+
end
|
53
|
+
|
54
|
+
# remove all mobile signatures
|
55
|
+
while pattern =~ /s/
|
56
|
+
index = pattern =~ /s/
|
53
57
|
pattern.slice!(index)
|
54
58
|
lines.slice!(index)
|
55
59
|
end
|
@@ -58,7 +62,6 @@ class EmailReplyTrimmer
|
|
58
62
|
# then take everything up to that marker
|
59
63
|
if pattern =~ /te*b[^q]*$/
|
60
64
|
index = pattern =~ /te*b[^q]*$/
|
61
|
-
removed = lines[(index + 1)..-1]
|
62
65
|
pattern = pattern[0..index]
|
63
66
|
lines = lines[0..index]
|
64
67
|
end
|
@@ -67,7 +70,6 @@ class EmailReplyTrimmer
|
|
67
70
|
# then take everything up to that marker
|
68
71
|
if pattern =~ /te*b[eqbh]*[te]*$/
|
69
72
|
index = pattern =~ /te*b[eqbh]*[te]*$/
|
70
|
-
removed = lines[(index + 1)..-1]
|
71
73
|
pattern = pattern[0..index]
|
72
74
|
lines = lines[0..index]
|
73
75
|
end
|
@@ -75,8 +77,8 @@ class EmailReplyTrimmer
|
|
75
77
|
# if there still are some embedded email markers, just remove them
|
76
78
|
while pattern =~ /b/
|
77
79
|
index = pattern =~ /b/
|
78
|
-
pattern
|
79
|
-
lines
|
80
|
+
pattern.slice!(index)
|
81
|
+
lines.slice!(index)
|
80
82
|
end
|
81
83
|
|
82
84
|
# fix email headers when they span over multiple lines
|
@@ -90,7 +92,6 @@ class EmailReplyTrimmer
|
|
90
92
|
# these headers
|
91
93
|
if pattern =~ /t[eq]*h{3,}/
|
92
94
|
index = pattern =~ /t[eq]*h{3,}/
|
93
|
-
removed = lines[(index + 1)..-1]
|
94
95
|
pattern = pattern[0..index]
|
95
96
|
lines = lines[0..index]
|
96
97
|
end
|
@@ -111,13 +112,32 @@ class EmailReplyTrimmer
|
|
111
112
|
|
112
113
|
# results
|
113
114
|
trimmed = lines.join("\n").strip
|
114
|
-
elided = removed.join("\n").strip
|
115
115
|
|
116
116
|
if split
|
117
|
-
[trimmed,
|
117
|
+
[trimmed, compute_elided(lines_dup, lines)]
|
118
118
|
else
|
119
119
|
trimmed
|
120
120
|
end
|
121
121
|
end
|
122
122
|
|
123
|
+
private
|
124
|
+
|
125
|
+
def self.compute_elided(text, lines)
|
126
|
+
elided = []
|
127
|
+
|
128
|
+
t = 0
|
129
|
+
l = 0
|
130
|
+
|
131
|
+
while t < text.size
|
132
|
+
while l < lines.size && text[t] == lines[l]
|
133
|
+
t += 1
|
134
|
+
l += 1
|
135
|
+
end
|
136
|
+
elided << text[t]
|
137
|
+
t += 1
|
138
|
+
end
|
139
|
+
|
140
|
+
elided.join("\n").strip
|
141
|
+
end
|
142
|
+
|
123
143
|
end
|
data/test/elided/delimiters.txt
CHANGED
@@ -8,6 +8,7 @@ Subject: VIS
|
|
8
8
|
Here's an email with some very important stuff.
|
9
9
|
|
10
10
|
|
11
|
+
________________________________
|
11
12
|
Reply here<http://foo.bar> or hit reply from your inbox to help members by sharing your ideas.
|
12
13
|
Mute this topic<http://42.wat> to stop getting updates, we'll send you the next one.
|
13
14
|
|
@@ -1,3 +1,8 @@
|
|
1
|
+
Sent from Outlook Mobile<https://foo.bar>
|
2
|
+
|
3
|
+
|
4
|
+
|
5
|
+
|
1
6
|
On Sun, Feb 7, 2016 at 12:12 AM -0800, "Arpit Jalan" <arpit.jalan@discourse.org<mailto:arpit.jalan@discourse.org>> wrote:
|
2
7
|
|
3
8
|
Hi Some,
|
@@ -15,7 +20,9 @@ On Fri, 5 Feb 2016 at 10:42, Some One <foo@bar.com<mailto:foo@bar.com>> wrote:
|
|
15
20
|
Arpit,
|
16
21
|
Yes that sounds good.
|
17
22
|
|
23
|
+
Sent from Outlook Mobile<https://foo.bar>
|
18
24
|
|
25
|
+
_____________________________
|
19
26
|
From: Arpit Jalan <arpit.jalan@discourse.org<mailto:arpit.jalan@discourse.org>>
|
20
27
|
Sent: Thursday, February 4, 2016 10:05 AM
|
21
28
|
Subject: Meta Discourse update
|
@@ -17,3 +17,6 @@ codinghorror via Discourse Meta <info@discourse.org> schrieb:
|
|
17
17
|
>
|
18
18
|
>To unsubscribe from these emails, visit your [user
|
19
19
|
>preferences](http://meta.discourse.org/user_preferences).
|
20
|
+
|
21
|
+
--
|
22
|
+
Diese Nachricht wurde von meinem Android-Mobiltelefon mit K-9 Mail gesendet.
|
@@ -0,0 +1 @@
|
|
1
|
+
On Mon, Aug 19, 2013 at 2:36 AM, SomeOne via Discourse Meta < info@discourse.org> wrote:
|
data/test/elided/signatures.txt
CHANGED
@@ -0,0 +1,26 @@
|
|
1
|
+
Envoyé depuis mon iPhone
|
2
|
+
|
3
|
+
Von meinem Mobilgerät gesendet
|
4
|
+
Diese Nachricht wurde von meinem Android-Mobiltelefon mit K-9 Mail gesendet.
|
5
|
+
|
6
|
+
Someone from mobile
|
7
|
+
From My Iphone 6
|
8
|
+
Sent via mobile
|
9
|
+
Sent with Airmail
|
10
|
+
Sent from Windows Mail
|
11
|
+
Sent from Mailbox
|
12
|
+
Sent from Mailbox for iPad
|
13
|
+
Sent from Yahoo Mail on Android
|
14
|
+
Sent from my TI-85
|
15
|
+
Sent from my iPhone
|
16
|
+
Sent from my iPod
|
17
|
+
Sent from my Alcatel Flash2
|
18
|
+
Sent from my mobile device
|
19
|
+
Sent from my cell, please excuse any typos.
|
20
|
+
Sent from my Samsung Galaxy s5 Octacore device
|
21
|
+
Sent from my HTC M8 Android phone. Please excuse typoze
|
22
|
+
Sent from my Windows 8 PC <http://windows.microsoft.com/consumer-preview>
|
23
|
+
<<sent by galaxy>>
|
24
|
+
(sent from a phone)
|
25
|
+
(Sent from mobile device)
|
26
|
+
從我的 iPhone 傳送
|
@@ -1,7 +1,7 @@
|
|
1
1
|
require "minitest/autorun"
|
2
2
|
require "email_reply_trimmer"
|
3
3
|
|
4
|
-
class TestEmailReplyTrimmer < Minitest::
|
4
|
+
class TestEmailReplyTrimmer < Minitest::Test
|
5
5
|
|
6
6
|
EMAILS = Dir["test/emails/*.txt"].map { |path| File.basename(path) }
|
7
7
|
TRIMMED = Dir["test/trimmed/*.txt"].map { |path| File.basename(path) }
|
@@ -1,5 +1,4 @@
|
|
1
1
|
This is a line before the embedded email.
|
2
|
-
|
3
2
|
> Hello
|
4
3
|
>
|
5
4
|
> This is the embedded email.
|
@@ -9,11 +8,3 @@ This is some text
|
|
9
8
|
after the
|
10
9
|
|
11
10
|
embedded email.
|
12
|
-
|
13
|
-
>
|
14
|
-
> This is another part of the embedded email.
|
15
|
-
>
|
16
|
-
>
|
17
|
-
|
18
|
-
|
19
|
-
And here's my signature.
|
@@ -0,0 +1 @@
|
|
1
|
+
Mal sehen was hier mit der Signatur passiert!
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: email_reply_trimmer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Régis Hanol
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-10-12 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: EmailReplyTrimmer is a small library to trim replies from plain text
|
14
14
|
email.
|
@@ -68,6 +68,7 @@ files:
|
|
68
68
|
- test/elided/signatures.txt
|
69
69
|
- test/elided/strip.txt
|
70
70
|
- test/elided/text_only.txt
|
71
|
+
- test/elided/usenet.txt
|
71
72
|
- test/emails/delimiters.txt
|
72
73
|
- test/emails/dual_embedded.txt
|
73
74
|
- test/emails/email_headers_1.txt
|
@@ -107,6 +108,7 @@ files:
|
|
107
108
|
- test/emails/signatures.txt
|
108
109
|
- test/emails/strip.txt
|
109
110
|
- test/emails/text_only.txt
|
111
|
+
- test/emails/usenet.txt
|
110
112
|
- test/test_email_reply_trimmer.rb
|
111
113
|
- test/trimmed/delimiters.txt
|
112
114
|
- test/trimmed/dual_embedded.txt
|
@@ -147,6 +149,7 @@ files:
|
|
147
149
|
- test/trimmed/signatures.txt
|
148
150
|
- test/trimmed/strip.txt
|
149
151
|
- test/trimmed/text_only.txt
|
152
|
+
- test/trimmed/usenet.txt
|
150
153
|
homepage: https://github.com/discourse/email_reply_trimmer
|
151
154
|
licenses:
|
152
155
|
- MIT
|