email_reply_trimmer 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/email_reply_trimmer/delimiter_matcher.rb +2 -2
- data/lib/email_reply_trimmer.rb +42 -22
- data/test/elided/delimiters.txt +10 -0
- data/test/elided/email_headers_1.txt +2 -0
- data/test/elided/email_headers_2.txt +1 -0
- data/test/elided/email_headers_3.txt +1 -0
- data/test/elided/embedded_ception.txt +1 -0
- data/test/elided/embedded_email_10.txt +7 -0
- data/test/elided/embedded_email_7.txt +10 -0
- data/test/elided/embedded_email_german_1.txt +3 -0
- data/test/elided/embedded_email_italian.txt +1 -0
- data/test/elided/embedded_email_polish.txt +3 -0
- data/test/elided/embedded_email_quote_text.txt +1 -0
- data/test/elided/embedded_email_spanish_2.txt +1 -0
- data/test/elided/forwarded_message.txt +6 -0
- data/test/elided/signatures.txt +26 -0
- data/test/elided/usenet.txt +7 -0
- data/test/emails/usenet.txt +9 -0
- data/test/test_email_reply_trimmer.rb +1 -1
- data/test/trimmed/embedded_email_7.txt +0 -9
- data/test/trimmed/embedded_email_polish.txt +0 -2
- data/test/trimmed/usenet.txt +1 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 61d47c5b31f114be7a83ff4ee618716f9a448726
|
4
|
+
data.tar.gz: 3654868aad0175023394df9c5a26fe5fd70f9ad5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 62bb96a32309ea18a3d777fc52cfa900e46c6b62e65c0a010027b01938e39ab5cb0c4f2c68b10dd1308ca2c57bbd7e2df00cc18dfc15c1c4713cdff468387ad9
|
7
|
+
data.tar.gz: 69d4a9a6868904d4e8cebdff0031e9fcba389e227eb6be2068d54e480b6ef8f09de1f52d276adc5599c0c3deec72faa68d3f96abd11b06b5ca92e57d04e581b3
|
@@ -1,7 +1,7 @@
|
|
1
1
|
class DelimiterMatcher
|
2
2
|
|
3
|
-
DELIMITER_CHARACTERS ||=
|
4
|
-
DELIMITER_REGEX ||= /^[[:space:]]*[#{Regexp.escape(DELIMITER_CHARACTERS
|
3
|
+
DELIMITER_CHARACTERS ||= "-_,=+~#*ᐧ"
|
4
|
+
DELIMITER_REGEX ||= /^[[:space:]]*[#{Regexp.escape(DELIMITER_CHARACTERS)}]+[[:space:]]*$/
|
5
5
|
|
6
6
|
def self.match?(line)
|
7
7
|
line =~ DELIMITER_REGEX
|
data/lib/email_reply_trimmer.rb
CHANGED
@@ -6,7 +6,7 @@ require_relative "email_reply_trimmer/email_header_matcher"
|
|
6
6
|
require_relative "email_reply_trimmer/quote_matcher"
|
7
7
|
|
8
8
|
class EmailReplyTrimmer
|
9
|
-
VERSION = "0.1.
|
9
|
+
VERSION = "0.1.4"
|
10
10
|
|
11
11
|
DELIMITER = "d"
|
12
12
|
EMBEDDED = "b"
|
@@ -17,12 +17,12 @@ class EmailReplyTrimmer
|
|
17
17
|
TEXT = "t"
|
18
18
|
|
19
19
|
def self.identify_line_content(line)
|
20
|
-
return EMPTY if EmptyLineMatcher.match?
|
21
|
-
return DELIMITER if DelimiterMatcher.match?
|
22
|
-
return SIGNATURE if SignatureMatcher.match?
|
23
|
-
return EMBEDDED if EmbeddedEmailMatcher.match?
|
24
|
-
return EMAIL_HEADER if EmailHeaderMatcher.match?
|
25
|
-
return QUOTE if QuoteMatcher.match?
|
20
|
+
return EMPTY if EmptyLineMatcher.match? line
|
21
|
+
return DELIMITER if DelimiterMatcher.match? line
|
22
|
+
return SIGNATURE if SignatureMatcher.match? line
|
23
|
+
return EMBEDDED if EmbeddedEmailMatcher.match? line
|
24
|
+
return EMAIL_HEADER if EmailHeaderMatcher.match? line
|
25
|
+
return QUOTE if QuoteMatcher.match? line
|
26
26
|
return TEXT
|
27
27
|
end
|
28
28
|
|
@@ -34,22 +34,26 @@ class EmailReplyTrimmer
|
|
34
34
|
|
35
35
|
# fix embedded email markers that might span over multiple lines
|
36
36
|
EmbeddedEmailMatcher::ON_DATE_SOMEONE_WROTE_REGEXES.each do |r|
|
37
|
-
|
38
|
-
text.gsub!(r) { |m| m.gsub(/\n[[:space:]>\-]*/, " ") }
|
39
|
-
end
|
37
|
+
text.gsub!(r) { |m| m.gsub(/\n[[:space:]>\-]*/, " ") }
|
40
38
|
end
|
41
39
|
|
42
|
-
removed = []
|
43
|
-
|
44
40
|
# from now on, we'll work on a line-by-line basis
|
45
41
|
lines = text.split("\n")
|
42
|
+
lines_dup = lines.dup
|
46
43
|
|
47
44
|
# identify content of each lines
|
48
45
|
pattern = lines.map { |l| identify_line_content(l) }.join
|
49
46
|
|
50
|
-
# remove
|
51
|
-
|
52
|
-
index = pattern =~ /
|
47
|
+
# remove everything after the first delimiter
|
48
|
+
if pattern =~ /d/
|
49
|
+
index = pattern =~ /d/
|
50
|
+
pattern = pattern[0...index]
|
51
|
+
lines = lines[0...index]
|
52
|
+
end
|
53
|
+
|
54
|
+
# remove all mobile signatures
|
55
|
+
while pattern =~ /s/
|
56
|
+
index = pattern =~ /s/
|
53
57
|
pattern.slice!(index)
|
54
58
|
lines.slice!(index)
|
55
59
|
end
|
@@ -58,7 +62,6 @@ class EmailReplyTrimmer
|
|
58
62
|
# then take everything up to that marker
|
59
63
|
if pattern =~ /te*b[^q]*$/
|
60
64
|
index = pattern =~ /te*b[^q]*$/
|
61
|
-
removed = lines[(index + 1)..-1]
|
62
65
|
pattern = pattern[0..index]
|
63
66
|
lines = lines[0..index]
|
64
67
|
end
|
@@ -67,7 +70,6 @@ class EmailReplyTrimmer
|
|
67
70
|
# then take everything up to that marker
|
68
71
|
if pattern =~ /te*b[eqbh]*[te]*$/
|
69
72
|
index = pattern =~ /te*b[eqbh]*[te]*$/
|
70
|
-
removed = lines[(index + 1)..-1]
|
71
73
|
pattern = pattern[0..index]
|
72
74
|
lines = lines[0..index]
|
73
75
|
end
|
@@ -75,8 +77,8 @@ class EmailReplyTrimmer
|
|
75
77
|
# if there still are some embedded email markers, just remove them
|
76
78
|
while pattern =~ /b/
|
77
79
|
index = pattern =~ /b/
|
78
|
-
pattern
|
79
|
-
lines
|
80
|
+
pattern.slice!(index)
|
81
|
+
lines.slice!(index)
|
80
82
|
end
|
81
83
|
|
82
84
|
# fix email headers when they span over multiple lines
|
@@ -90,7 +92,6 @@ class EmailReplyTrimmer
|
|
90
92
|
# these headers
|
91
93
|
if pattern =~ /t[eq]*h{3,}/
|
92
94
|
index = pattern =~ /t[eq]*h{3,}/
|
93
|
-
removed = lines[(index + 1)..-1]
|
94
95
|
pattern = pattern[0..index]
|
95
96
|
lines = lines[0..index]
|
96
97
|
end
|
@@ -111,13 +112,32 @@ class EmailReplyTrimmer
|
|
111
112
|
|
112
113
|
# results
|
113
114
|
trimmed = lines.join("\n").strip
|
114
|
-
elided = removed.join("\n").strip
|
115
115
|
|
116
116
|
if split
|
117
|
-
[trimmed,
|
117
|
+
[trimmed, compute_elided(lines_dup, lines)]
|
118
118
|
else
|
119
119
|
trimmed
|
120
120
|
end
|
121
121
|
end
|
122
122
|
|
123
|
+
private
|
124
|
+
|
125
|
+
def self.compute_elided(text, lines)
|
126
|
+
elided = []
|
127
|
+
|
128
|
+
t = 0
|
129
|
+
l = 0
|
130
|
+
|
131
|
+
while t < text.size
|
132
|
+
while l < lines.size && text[t] == lines[l]
|
133
|
+
t += 1
|
134
|
+
l += 1
|
135
|
+
end
|
136
|
+
elided << text[t]
|
137
|
+
t += 1
|
138
|
+
end
|
139
|
+
|
140
|
+
elided.join("\n").strip
|
141
|
+
end
|
142
|
+
|
123
143
|
end
|
data/test/elided/delimiters.txt
CHANGED
@@ -8,6 +8,7 @@ Subject: VIS
|
|
8
8
|
Here's an email with some very important stuff.
|
9
9
|
|
10
10
|
|
11
|
+
________________________________
|
11
12
|
Reply here<http://foo.bar> or hit reply from your inbox to help members by sharing your ideas.
|
12
13
|
Mute this topic<http://42.wat> to stop getting updates, we'll send you the next one.
|
13
14
|
|
@@ -1,3 +1,8 @@
|
|
1
|
+
Sent from Outlook Mobile<https://foo.bar>
|
2
|
+
|
3
|
+
|
4
|
+
|
5
|
+
|
1
6
|
On Sun, Feb 7, 2016 at 12:12 AM -0800, "Arpit Jalan" <arpit.jalan@discourse.org<mailto:arpit.jalan@discourse.org>> wrote:
|
2
7
|
|
3
8
|
Hi Some,
|
@@ -15,7 +20,9 @@ On Fri, 5 Feb 2016 at 10:42, Some One <foo@bar.com<mailto:foo@bar.com>> wrote:
|
|
15
20
|
Arpit,
|
16
21
|
Yes that sounds good.
|
17
22
|
|
23
|
+
Sent from Outlook Mobile<https://foo.bar>
|
18
24
|
|
25
|
+
_____________________________
|
19
26
|
From: Arpit Jalan <arpit.jalan@discourse.org<mailto:arpit.jalan@discourse.org>>
|
20
27
|
Sent: Thursday, February 4, 2016 10:05 AM
|
21
28
|
Subject: Meta Discourse update
|
@@ -17,3 +17,6 @@ codinghorror via Discourse Meta <info@discourse.org> schrieb:
|
|
17
17
|
>
|
18
18
|
>To unsubscribe from these emails, visit your [user
|
19
19
|
>preferences](http://meta.discourse.org/user_preferences).
|
20
|
+
|
21
|
+
--
|
22
|
+
Diese Nachricht wurde von meinem Android-Mobiltelefon mit K-9 Mail gesendet.
|
@@ -0,0 +1 @@
|
|
1
|
+
On Mon, Aug 19, 2013 at 2:36 AM, SomeOne via Discourse Meta < info@discourse.org> wrote:
|
data/test/elided/signatures.txt
CHANGED
@@ -0,0 +1,26 @@
|
|
1
|
+
Envoyé depuis mon iPhone
|
2
|
+
|
3
|
+
Von meinem Mobilgerät gesendet
|
4
|
+
Diese Nachricht wurde von meinem Android-Mobiltelefon mit K-9 Mail gesendet.
|
5
|
+
|
6
|
+
Someone from mobile
|
7
|
+
From My Iphone 6
|
8
|
+
Sent via mobile
|
9
|
+
Sent with Airmail
|
10
|
+
Sent from Windows Mail
|
11
|
+
Sent from Mailbox
|
12
|
+
Sent from Mailbox for iPad
|
13
|
+
Sent from Yahoo Mail on Android
|
14
|
+
Sent from my TI-85
|
15
|
+
Sent from my iPhone
|
16
|
+
Sent from my iPod
|
17
|
+
Sent from my Alcatel Flash2
|
18
|
+
Sent from my mobile device
|
19
|
+
Sent from my cell, please excuse any typos.
|
20
|
+
Sent from my Samsung Galaxy s5 Octacore device
|
21
|
+
Sent from my HTC M8 Android phone. Please excuse typoze
|
22
|
+
Sent from my Windows 8 PC <http://windows.microsoft.com/consumer-preview>
|
23
|
+
<<sent by galaxy>>
|
24
|
+
(sent from a phone)
|
25
|
+
(Sent from mobile device)
|
26
|
+
從我的 iPhone 傳送
|
@@ -1,7 +1,7 @@
|
|
1
1
|
require "minitest/autorun"
|
2
2
|
require "email_reply_trimmer"
|
3
3
|
|
4
|
-
class TestEmailReplyTrimmer < Minitest::
|
4
|
+
class TestEmailReplyTrimmer < Minitest::Test
|
5
5
|
|
6
6
|
EMAILS = Dir["test/emails/*.txt"].map { |path| File.basename(path) }
|
7
7
|
TRIMMED = Dir["test/trimmed/*.txt"].map { |path| File.basename(path) }
|
@@ -1,5 +1,4 @@
|
|
1
1
|
This is a line before the embedded email.
|
2
|
-
|
3
2
|
> Hello
|
4
3
|
>
|
5
4
|
> This is the embedded email.
|
@@ -9,11 +8,3 @@ This is some text
|
|
9
8
|
after the
|
10
9
|
|
11
10
|
embedded email.
|
12
|
-
|
13
|
-
>
|
14
|
-
> This is another part of the embedded email.
|
15
|
-
>
|
16
|
-
>
|
17
|
-
|
18
|
-
|
19
|
-
And here's my signature.
|
@@ -0,0 +1 @@
|
|
1
|
+
Mal sehen was hier mit der Signatur passiert!
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: email_reply_trimmer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Régis Hanol
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-10-12 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: EmailReplyTrimmer is a small library to trim replies from plain text
|
14
14
|
email.
|
@@ -68,6 +68,7 @@ files:
|
|
68
68
|
- test/elided/signatures.txt
|
69
69
|
- test/elided/strip.txt
|
70
70
|
- test/elided/text_only.txt
|
71
|
+
- test/elided/usenet.txt
|
71
72
|
- test/emails/delimiters.txt
|
72
73
|
- test/emails/dual_embedded.txt
|
73
74
|
- test/emails/email_headers_1.txt
|
@@ -107,6 +108,7 @@ files:
|
|
107
108
|
- test/emails/signatures.txt
|
108
109
|
- test/emails/strip.txt
|
109
110
|
- test/emails/text_only.txt
|
111
|
+
- test/emails/usenet.txt
|
110
112
|
- test/test_email_reply_trimmer.rb
|
111
113
|
- test/trimmed/delimiters.txt
|
112
114
|
- test/trimmed/dual_embedded.txt
|
@@ -147,6 +149,7 @@ files:
|
|
147
149
|
- test/trimmed/signatures.txt
|
148
150
|
- test/trimmed/strip.txt
|
149
151
|
- test/trimmed/text_only.txt
|
152
|
+
- test/trimmed/usenet.txt
|
150
153
|
homepage: https://github.com/discourse/email_reply_trimmer
|
151
154
|
licenses:
|
152
155
|
- MIT
|