email_reply_parser 0.5.9 → 0.5.11
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +1 -1
- data/email_reply_parser.gemspec +8 -36
- data/lib/email_reply_parser.rb +6 -12
- data/test/email_reply_parser_test.rb +45 -28
- data/test/emails/email_2_3.txt +10 -0
- data/test/emails/email_long_quote.txt +16 -0
- metadata +10 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 19456eb75469b983f8db4a0807f6691213264309223f37d02c7fcf44748e2d7c
|
4
|
+
data.tar.gz: 0a67068492fd8c2fe7221420e383f69f7ad0f6e7f435882830d8553e6f917bcf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e6b065c74ce3eec383d570824c3c22a0ff9fc1cbc17a99cbbac230a92a0eb9c90adcc96f6288891a676ff65da1a791d11b7c1a3cce9a22bacb3752c6fab6abf0
|
7
|
+
data.tar.gz: 72e65bff3a0850354268451e89ea352a8ab59c5bb8fe1c27ff7558df740ecd233915daaf5e35830a4c9c07617bc5ce6634a0c50aa5d3bd3464821c3328824e7e
|
data/README.md
CHANGED
data/email_reply_parser.gemspec
CHANGED
@@ -1,60 +1,33 @@
|
|
1
1
|
$LOAD_PATH.unshift '.'
|
2
2
|
require 'lib/email_reply_parser'
|
3
3
|
|
4
|
-
## This is the rakegem gemspec template. Make sure you read and understand
|
5
|
-
## all of the comments. Some sections require modification, and others can
|
6
|
-
## be deleted if you don't need them. Once you understand the contents of
|
7
|
-
## this file, feel free to delete any comments that begin with two hash marks.
|
8
|
-
## You can find comprehensive Gem::Specification documentation, at
|
9
|
-
## http://docs.rubygems.org/read/chapter/20
|
10
4
|
Gem::Specification.new do |s|
|
11
5
|
s.specification_version = 2 if s.respond_to? :specification_version=
|
12
6
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
13
7
|
s.rubygems_version = '1.3.5'
|
8
|
+
s.license = 'MIT'
|
14
9
|
|
15
|
-
## Leave these as is they will be modified for you by the rake gemspec task.
|
16
|
-
## If your rubyforge_project name is different, then edit it and comment out
|
17
|
-
## the sub! line in the Rakefile
|
18
10
|
s.name = 'email_reply_parser'
|
19
11
|
s.version = EmailReplyParser::VERSION
|
20
12
|
s.date = Time.now.strftime('%Y-%m-%d')
|
21
|
-
s.rubyforge_project = 'email_reply_parser'
|
22
13
|
|
23
|
-
|
24
|
-
|
25
|
-
s.
|
26
|
-
|
14
|
+
s.summary = "EmailReplyParser is a small library to parse plain text " \
|
15
|
+
"email content."
|
16
|
+
s.description = "EmailReplyParser is a small library to parse plain text " \
|
17
|
+
"email content. This is what GitHub uses to display comments " \
|
18
|
+
"that were created from email replies."
|
27
19
|
|
28
|
-
## List the primary authors. If there are a bunch of authors, it's probably
|
29
|
-
## better to set the email to an email list or something. If you don't have
|
30
|
-
## a custom homepage, consider using your GitHub URL or the like.
|
31
20
|
s.authors = ["Rick Olson"]
|
32
21
|
s.email = 'technoweenie@gmail.com'
|
33
22
|
s.homepage = 'http://github.com/github/email_reply_parser'
|
34
23
|
|
35
|
-
## This gets added to the $LOAD_PATH so that 'lib/NAME.rb' can be required as
|
36
|
-
## require 'NAME.rb' or'/lib/NAME/file.rb' can be as require 'NAME/file.rb'
|
37
24
|
s.require_paths = %w[lib]
|
38
25
|
|
39
|
-
## This sections is only necessary if you have C extensions.
|
40
|
-
#s.require_paths << 'ext'
|
41
|
-
#s.extensions = %w[ext/extconf.rb]
|
42
|
-
|
43
|
-
## If your gem includes any executables, list them here.
|
44
|
-
#s.executables = ["name"]
|
45
|
-
#s.default_executable = 'name'
|
46
|
-
|
47
|
-
## Specify any RDoc options here. You'll want to add your README and
|
48
|
-
## LICENSE files to the extra_rdoc_files list.
|
49
26
|
s.rdoc_options = ["--charset=UTF-8"]
|
50
27
|
s.extra_rdoc_files = %w[README.md LICENSE]
|
51
28
|
|
52
|
-
## List your runtime dependencies here. Runtime dependencies are those
|
53
|
-
## that are needed for an end user to actually USE your code.
|
54
29
|
#s.add_dependency('DEPNAME', [">= 1.1.0", "< 2.0.0"])
|
55
30
|
|
56
|
-
## List your development dependencies here. Development dependencies are
|
57
|
-
## those that are only needed during development
|
58
31
|
#s.add_development_dependency('DEVDEPNAME', [">= 1.1.0", "< 2.0.0"])
|
59
32
|
|
60
33
|
## Leave this section as-is. It will be automatically generated from the
|
@@ -81,9 +54,11 @@ Gem::Specification.new do |s|
|
|
81
54
|
test/emails/email_1_8.txt
|
82
55
|
test/emails/email_2_1.txt
|
83
56
|
test/emails/email_2_2.txt
|
57
|
+
test/emails/email_2_3.txt
|
84
58
|
test/emails/email_BlackBerry.txt
|
85
59
|
test/emails/email_bullets.txt
|
86
60
|
test/emails/email_iPhone.txt
|
61
|
+
test/emails/email_long_quote.txt
|
87
62
|
test/emails/email_multi_word_sent_from_my_mobile_device.txt
|
88
63
|
test/emails/email_one_is_not_on.txt
|
89
64
|
test/emails/email_sent_from_my_not_signature.txt
|
@@ -93,8 +68,5 @@ Gem::Specification.new do |s|
|
|
93
68
|
]
|
94
69
|
# = MANIFEST =
|
95
70
|
|
96
|
-
## Test files will be grabbed from the file list. Make sure the path glob
|
97
|
-
## matches what you actually use.
|
98
71
|
s.test_files = s.files.select { |path| path =~ /^test\/.*_test\.rb/ }
|
99
72
|
end
|
100
|
-
|
data/lib/email_reply_parser.rb
CHANGED
@@ -30,7 +30,7 @@ require 'strscan'
|
|
30
30
|
#
|
31
31
|
# [mail]: https://github.com/mikel/mail
|
32
32
|
class EmailReplyParser
|
33
|
-
VERSION = "0.5.
|
33
|
+
VERSION = "0.5.11"
|
34
34
|
|
35
35
|
# Public: Splits an email body into a list of Fragments.
|
36
36
|
#
|
@@ -132,14 +132,8 @@ class EmailReplyParser
|
|
132
132
|
|
133
133
|
private
|
134
134
|
EMPTY = "".freeze
|
135
|
-
SIGNATURE = '(?m)(--\s*$|__\s*$|\w-$)|(^(\w+\s
|
136
|
-
|
137
|
-
begin
|
138
|
-
require 're2'
|
139
|
-
SIG_REGEX = RE2::Regexp.new(SIGNATURE)
|
140
|
-
rescue LoadError
|
141
|
-
SIG_REGEX = Regexp.new(SIGNATURE)
|
142
|
-
end
|
135
|
+
SIGNATURE = '(?m)(--\s*$|__\s*$|\w-$)|(^(\w+\s+){1,3}ym morf tneS$)'
|
136
|
+
SIG_REGEX = Regexp.new(SIGNATURE)
|
143
137
|
|
144
138
|
### Line-by-Line Parsing
|
145
139
|
|
@@ -153,9 +147,9 @@ class EmailReplyParser
|
|
153
147
|
line.chomp!("\n")
|
154
148
|
line.lstrip! unless SIG_REGEX.match(line)
|
155
149
|
|
156
|
-
# We're looking for leading `>`
|
150
|
+
# We're looking for a leading `>` to see if this line is part of a
|
157
151
|
# quoted Fragment.
|
158
|
-
is_quoted = !!(line =~ /(
|
152
|
+
is_quoted = !!(line =~ /(>)$/)
|
159
153
|
|
160
154
|
# Mark the current Fragment as a signature if the current line is empty
|
161
155
|
# and the Fragment starts with a common signature indicator.
|
@@ -188,7 +182,7 @@ class EmailReplyParser
|
|
188
182
|
#
|
189
183
|
# Returns true if the line is a valid header, or false.
|
190
184
|
def quote_header?(line)
|
191
|
-
line =~ /^:etorw.*nO$/
|
185
|
+
line =~ /^:etorw.*nO$/ || line =~ /^.*:(morF|tneS|oT|tcejbuS)$/
|
192
186
|
end
|
193
187
|
|
194
188
|
# Builds the fragment string and reverses it, after all lines have been
|
@@ -2,6 +2,7 @@ require 'rubygems'
|
|
2
2
|
require 'test/unit'
|
3
3
|
require 'pathname'
|
4
4
|
require 'pp'
|
5
|
+
require 'timeout'
|
5
6
|
|
6
7
|
dir = Pathname.new File.expand_path(File.dirname(__FILE__))
|
7
8
|
require dir + '..' + 'lib' + 'email_reply_parser'
|
@@ -50,10 +51,10 @@ I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
|
|
50
51
|
assert_equal [false, true, false, false, true],
|
51
52
|
reply.fragments.map { |f| f.signature? }
|
52
53
|
|
53
|
-
assert_match
|
54
|
-
assert_match
|
55
|
-
assert_match
|
56
|
-
assert_match
|
54
|
+
assert_match(/^Oh thanks.\n\nHaving/, reply.fragments[0].to_s)
|
55
|
+
assert_match(/^-A/, reply.fragments[1].to_s)
|
56
|
+
assert_match(/^On [^\:]+\:/, reply.fragments[2].to_s)
|
57
|
+
assert_match(/^_/, reply.fragments[4].to_s)
|
57
58
|
end
|
58
59
|
|
59
60
|
def test_reads_bottom_post
|
@@ -68,10 +69,10 @@ I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
|
|
68
69
|
reply.fragments.map { |f| f.hidden? }
|
69
70
|
|
70
71
|
assert_equal "Hi,", reply.fragments[0].to_s
|
71
|
-
assert_match
|
72
|
-
assert_match
|
73
|
-
assert_match
|
74
|
-
assert_match
|
72
|
+
assert_match(/^On [^\:]+\:/, reply.fragments[1].to_s)
|
73
|
+
assert_match(/^You can list/, reply.fragments[2].to_s)
|
74
|
+
assert_match(/^> /, reply.fragments[3].to_s)
|
75
|
+
assert_match(/^_/, reply.fragments[5].to_s)
|
75
76
|
end
|
76
77
|
|
77
78
|
def test_reads_inline_replies
|
@@ -85,11 +86,11 @@ I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
|
|
85
86
|
assert_equal [false, false, false, false, true, true, true],
|
86
87
|
reply.fragments.map { |f| f.hidden? }
|
87
88
|
|
88
|
-
assert_match
|
89
|
-
assert_match
|
89
|
+
assert_match(/^On [^\:]+\:/, reply.fragments[0].to_s)
|
90
|
+
assert_match(/^I will reply/, reply.fragments[1].to_s)
|
90
91
|
assert_match "okay?", reply.fragments[2].to_s
|
91
|
-
assert_match
|
92
|
-
assert_match
|
92
|
+
assert_match(/^and under this./, reply.fragments[3].to_s)
|
93
|
+
assert_match(/inline/, reply.fragments[4].to_s)
|
93
94
|
assert_equal "\n", reply.fragments[5].to_s
|
94
95
|
assert_equal "--\nHey there, this is my signature\n", reply.fragments[6].to_s
|
95
96
|
end
|
@@ -97,9 +98,9 @@ I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
|
|
97
98
|
def test_recognizes_date_string_above_quote
|
98
99
|
reply = email :email_1_4
|
99
100
|
|
100
|
-
assert_match
|
101
|
-
assert_match
|
102
|
-
assert_match
|
101
|
+
assert_match(/^Awesome/, reply.fragments[0].to_s)
|
102
|
+
assert_match(/^On/, reply.fragments[1].to_s)
|
103
|
+
assert_match(/Loader/, reply.fragments[1].to_s)
|
103
104
|
end
|
104
105
|
|
105
106
|
def test_a_complex_body_with_only_one_fragment
|
@@ -115,23 +116,28 @@ I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
|
|
115
116
|
assert_equal [false, false], reply.fragments.map { |f| f.quoted? }
|
116
117
|
assert_equal [false, true], reply.fragments.map { |f| f.signature? }
|
117
118
|
assert_equal [false, true], reply.fragments.map { |f| f.hidden? }
|
118
|
-
assert_match
|
119
|
+
assert_match(/^-- \nrick/, reply.fragments[1].to_s)
|
119
120
|
end
|
120
121
|
|
121
122
|
def test_deals_with_multiline_reply_headers
|
122
123
|
reply = email :email_1_6
|
123
124
|
|
124
|
-
assert_match
|
125
|
-
assert_match
|
126
|
-
assert_match
|
125
|
+
assert_match(/^I get/, reply.fragments[0].to_s)
|
126
|
+
assert_match(/^On/, reply.fragments[1].to_s)
|
127
|
+
assert_match(/Was this/, reply.fragments[1].to_s)
|
127
128
|
end
|
128
129
|
|
129
130
|
def test_deals_with_windows_line_endings
|
130
131
|
reply = email :email_1_7
|
131
132
|
|
132
|
-
assert_match
|
133
|
-
assert_match
|
134
|
-
assert_match
|
133
|
+
assert_match(/:\+1:/, reply.fragments[0].to_s)
|
134
|
+
assert_match(/^On/, reply.fragments[1].to_s)
|
135
|
+
assert_match(/Steps 0-2/, reply.fragments[1].to_s)
|
136
|
+
end
|
137
|
+
|
138
|
+
def test_handles_non_ascii_characters
|
139
|
+
non_ascii_body = "Here’s a test."
|
140
|
+
assert_equal non_ascii_body, EmailReplyParser.parse_reply(non_ascii_body)
|
135
141
|
end
|
136
142
|
|
137
143
|
def test_does_not_modify_input_string
|
@@ -155,6 +161,11 @@ I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
|
|
155
161
|
assert_equal "Outlook with a reply directly above line", EmailReplyParser.parse_reply(body)
|
156
162
|
end
|
157
163
|
|
164
|
+
def test_parse_out_just_top_for_outlook_with_no_line
|
165
|
+
body = IO.read EMAIL_FIXTURE_PATH.join("email_2_3.txt").to_s
|
166
|
+
assert_equal "Outlook with a reply directly above line", EmailReplyParser.parse_reply(body)
|
167
|
+
end
|
168
|
+
|
158
169
|
def test_parse_out_sent_from_iPhone
|
159
170
|
body = IO.read EMAIL_FIXTURE_PATH.join("email_iPhone.txt").to_s
|
160
171
|
assert_equal "Here is another email", EmailReplyParser.parse_reply(body)
|
@@ -188,14 +199,14 @@ I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
|
|
188
199
|
|
189
200
|
def test_one_is_not_on
|
190
201
|
reply = email("email_one_is_not_on")
|
191
|
-
assert_match
|
192
|
-
assert_match
|
202
|
+
assert_match(/One outstanding question/, reply.fragments[0].to_s)
|
203
|
+
assert_match(/^On Oct 1, 2012/, reply.fragments[1].to_s)
|
193
204
|
end
|
194
205
|
|
195
206
|
def test_mulitple_on
|
196
207
|
reply = email("greedy_on")
|
197
|
-
assert_match
|
198
|
-
assert_match
|
208
|
+
assert_match(/^On your remote host/, reply.fragments[0].to_s)
|
209
|
+
assert_match(/^On 9 Jan 2014/, reply.fragments[1].to_s)
|
199
210
|
assert_equal [false, true, false], reply.fragments.map { |f| f.quoted? }
|
200
211
|
assert_equal [false, false, false], reply.fragments.map { |f| f.signature? }
|
201
212
|
assert_equal [false, true, true], reply.fragments.map { |f| f.hidden? }
|
@@ -203,8 +214,8 @@ I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
|
|
203
214
|
|
204
215
|
def test_pathological_emails
|
205
216
|
t0 = Time.now
|
206
|
-
|
207
|
-
assert (Time.now - t0) < 1, "Took too long, upgrade to re2 gem."
|
217
|
+
email("pathological")
|
218
|
+
assert (Time.now - t0) < 1, "Took too long, upgrade to re2 gem. See https://rubygems.org/gems/re2"
|
208
219
|
end
|
209
220
|
|
210
221
|
def test_doesnt_remove_signature_delimiter_in_mid_line
|
@@ -212,6 +223,12 @@ I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
|
|
212
223
|
assert_equal 1, reply.fragments.size
|
213
224
|
end
|
214
225
|
|
226
|
+
def test_long_quote_processing_completes
|
227
|
+
reply = Timeout.timeout(1) { email(:email_long_quote) }
|
228
|
+
|
229
|
+
assert_equal 5, reply.fragments.size
|
230
|
+
end
|
231
|
+
|
215
232
|
def email(name)
|
216
233
|
body = IO.read EMAIL_FIXTURE_PATH.join("#{name}.txt").to_s
|
217
234
|
EmailReplyParser.read body
|
@@ -0,0 +1,10 @@
|
|
1
|
+
Outlook with a reply directly above line
|
2
|
+
|
3
|
+
From: CRM Comments [crm-comment@example.com]
|
4
|
+
Sent: Friday, 23 March 2012 5:08 p.m.
|
5
|
+
To: John S. Greene
|
6
|
+
Subject: [contact:106] John Greene
|
7
|
+
|
8
|
+
> A new comment has been added to the Contact named 'John Greene':
|
9
|
+
>
|
10
|
+
> I am replying to a comment.
|