email_reply_parser 0.5.9 → 0.5.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +1 -1
- data/email_reply_parser.gemspec +8 -36
- data/lib/email_reply_parser.rb +6 -12
- data/test/email_reply_parser_test.rb +45 -28
- data/test/emails/email_2_3.txt +10 -0
- data/test/emails/email_long_quote.txt +16 -0
- metadata +10 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 19456eb75469b983f8db4a0807f6691213264309223f37d02c7fcf44748e2d7c
|
4
|
+
data.tar.gz: 0a67068492fd8c2fe7221420e383f69f7ad0f6e7f435882830d8553e6f917bcf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e6b065c74ce3eec383d570824c3c22a0ff9fc1cbc17a99cbbac230a92a0eb9c90adcc96f6288891a676ff65da1a791d11b7c1a3cce9a22bacb3752c6fab6abf0
|
7
|
+
data.tar.gz: 72e65bff3a0850354268451e89ea352a8ab59c5bb8fe1c27ff7558df740ecd233915daaf5e35830a4c9c07617bc5ce6634a0c50aa5d3bd3464821c3328824e7e
|
data/README.md
CHANGED
data/email_reply_parser.gemspec
CHANGED
@@ -1,60 +1,33 @@
|
|
1
1
|
$LOAD_PATH.unshift '.'
|
2
2
|
require 'lib/email_reply_parser'
|
3
3
|
|
4
|
-
## This is the rakegem gemspec template. Make sure you read and understand
|
5
|
-
## all of the comments. Some sections require modification, and others can
|
6
|
-
## be deleted if you don't need them. Once you understand the contents of
|
7
|
-
## this file, feel free to delete any comments that begin with two hash marks.
|
8
|
-
## You can find comprehensive Gem::Specification documentation, at
|
9
|
-
## http://docs.rubygems.org/read/chapter/20
|
10
4
|
Gem::Specification.new do |s|
|
11
5
|
s.specification_version = 2 if s.respond_to? :specification_version=
|
12
6
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
13
7
|
s.rubygems_version = '1.3.5'
|
8
|
+
s.license = 'MIT'
|
14
9
|
|
15
|
-
## Leave these as is they will be modified for you by the rake gemspec task.
|
16
|
-
## If your rubyforge_project name is different, then edit it and comment out
|
17
|
-
## the sub! line in the Rakefile
|
18
10
|
s.name = 'email_reply_parser'
|
19
11
|
s.version = EmailReplyParser::VERSION
|
20
12
|
s.date = Time.now.strftime('%Y-%m-%d')
|
21
|
-
s.rubyforge_project = 'email_reply_parser'
|
22
13
|
|
23
|
-
|
24
|
-
|
25
|
-
s.
|
26
|
-
|
14
|
+
s.summary = "EmailReplyParser is a small library to parse plain text " \
|
15
|
+
"email content."
|
16
|
+
s.description = "EmailReplyParser is a small library to parse plain text " \
|
17
|
+
"email content. This is what GitHub uses to display comments " \
|
18
|
+
"that were created from email replies."
|
27
19
|
|
28
|
-
## List the primary authors. If there are a bunch of authors, it's probably
|
29
|
-
## better to set the email to an email list or something. If you don't have
|
30
|
-
## a custom homepage, consider using your GitHub URL or the like.
|
31
20
|
s.authors = ["Rick Olson"]
|
32
21
|
s.email = 'technoweenie@gmail.com'
|
33
22
|
s.homepage = 'http://github.com/github/email_reply_parser'
|
34
23
|
|
35
|
-
## This gets added to the $LOAD_PATH so that 'lib/NAME.rb' can be required as
|
36
|
-
## require 'NAME.rb' or'/lib/NAME/file.rb' can be as require 'NAME/file.rb'
|
37
24
|
s.require_paths = %w[lib]
|
38
25
|
|
39
|
-
## This sections is only necessary if you have C extensions.
|
40
|
-
#s.require_paths << 'ext'
|
41
|
-
#s.extensions = %w[ext/extconf.rb]
|
42
|
-
|
43
|
-
## If your gem includes any executables, list them here.
|
44
|
-
#s.executables = ["name"]
|
45
|
-
#s.default_executable = 'name'
|
46
|
-
|
47
|
-
## Specify any RDoc options here. You'll want to add your README and
|
48
|
-
## LICENSE files to the extra_rdoc_files list.
|
49
26
|
s.rdoc_options = ["--charset=UTF-8"]
|
50
27
|
s.extra_rdoc_files = %w[README.md LICENSE]
|
51
28
|
|
52
|
-
## List your runtime dependencies here. Runtime dependencies are those
|
53
|
-
## that are needed for an end user to actually USE your code.
|
54
29
|
#s.add_dependency('DEPNAME', [">= 1.1.0", "< 2.0.0"])
|
55
30
|
|
56
|
-
## List your development dependencies here. Development dependencies are
|
57
|
-
## those that are only needed during development
|
58
31
|
#s.add_development_dependency('DEVDEPNAME', [">= 1.1.0", "< 2.0.0"])
|
59
32
|
|
60
33
|
## Leave this section as-is. It will be automatically generated from the
|
@@ -81,9 +54,11 @@ Gem::Specification.new do |s|
|
|
81
54
|
test/emails/email_1_8.txt
|
82
55
|
test/emails/email_2_1.txt
|
83
56
|
test/emails/email_2_2.txt
|
57
|
+
test/emails/email_2_3.txt
|
84
58
|
test/emails/email_BlackBerry.txt
|
85
59
|
test/emails/email_bullets.txt
|
86
60
|
test/emails/email_iPhone.txt
|
61
|
+
test/emails/email_long_quote.txt
|
87
62
|
test/emails/email_multi_word_sent_from_my_mobile_device.txt
|
88
63
|
test/emails/email_one_is_not_on.txt
|
89
64
|
test/emails/email_sent_from_my_not_signature.txt
|
@@ -93,8 +68,5 @@ Gem::Specification.new do |s|
|
|
93
68
|
]
|
94
69
|
# = MANIFEST =
|
95
70
|
|
96
|
-
## Test files will be grabbed from the file list. Make sure the path glob
|
97
|
-
## matches what you actually use.
|
98
71
|
s.test_files = s.files.select { |path| path =~ /^test\/.*_test\.rb/ }
|
99
72
|
end
|
100
|
-
|
data/lib/email_reply_parser.rb
CHANGED
@@ -30,7 +30,7 @@ require 'strscan'
|
|
30
30
|
#
|
31
31
|
# [mail]: https://github.com/mikel/mail
|
32
32
|
class EmailReplyParser
|
33
|
-
VERSION = "0.5.
|
33
|
+
VERSION = "0.5.11"
|
34
34
|
|
35
35
|
# Public: Splits an email body into a list of Fragments.
|
36
36
|
#
|
@@ -132,14 +132,8 @@ class EmailReplyParser
|
|
132
132
|
|
133
133
|
private
|
134
134
|
EMPTY = "".freeze
|
135
|
-
SIGNATURE = '(?m)(--\s*$|__\s*$|\w-$)|(^(\w+\s
|
136
|
-
|
137
|
-
begin
|
138
|
-
require 're2'
|
139
|
-
SIG_REGEX = RE2::Regexp.new(SIGNATURE)
|
140
|
-
rescue LoadError
|
141
|
-
SIG_REGEX = Regexp.new(SIGNATURE)
|
142
|
-
end
|
135
|
+
SIGNATURE = '(?m)(--\s*$|__\s*$|\w-$)|(^(\w+\s+){1,3}ym morf tneS$)'
|
136
|
+
SIG_REGEX = Regexp.new(SIGNATURE)
|
143
137
|
|
144
138
|
### Line-by-Line Parsing
|
145
139
|
|
@@ -153,9 +147,9 @@ class EmailReplyParser
|
|
153
147
|
line.chomp!("\n")
|
154
148
|
line.lstrip! unless SIG_REGEX.match(line)
|
155
149
|
|
156
|
-
# We're looking for leading `>`
|
150
|
+
# We're looking for a leading `>` to see if this line is part of a
|
157
151
|
# quoted Fragment.
|
158
|
-
is_quoted = !!(line =~ /(
|
152
|
+
is_quoted = !!(line =~ /(>)$/)
|
159
153
|
|
160
154
|
# Mark the current Fragment as a signature if the current line is empty
|
161
155
|
# and the Fragment starts with a common signature indicator.
|
@@ -188,7 +182,7 @@ class EmailReplyParser
|
|
188
182
|
#
|
189
183
|
# Returns true if the line is a valid header, or false.
|
190
184
|
def quote_header?(line)
|
191
|
-
line =~ /^:etorw.*nO$/
|
185
|
+
line =~ /^:etorw.*nO$/ || line =~ /^.*:(morF|tneS|oT|tcejbuS)$/
|
192
186
|
end
|
193
187
|
|
194
188
|
# Builds the fragment string and reverses it, after all lines have been
|
@@ -2,6 +2,7 @@ require 'rubygems'
|
|
2
2
|
require 'test/unit'
|
3
3
|
require 'pathname'
|
4
4
|
require 'pp'
|
5
|
+
require 'timeout'
|
5
6
|
|
6
7
|
dir = Pathname.new File.expand_path(File.dirname(__FILE__))
|
7
8
|
require dir + '..' + 'lib' + 'email_reply_parser'
|
@@ -50,10 +51,10 @@ I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
|
|
50
51
|
assert_equal [false, true, false, false, true],
|
51
52
|
reply.fragments.map { |f| f.signature? }
|
52
53
|
|
53
|
-
assert_match
|
54
|
-
assert_match
|
55
|
-
assert_match
|
56
|
-
assert_match
|
54
|
+
assert_match(/^Oh thanks.\n\nHaving/, reply.fragments[0].to_s)
|
55
|
+
assert_match(/^-A/, reply.fragments[1].to_s)
|
56
|
+
assert_match(/^On [^\:]+\:/, reply.fragments[2].to_s)
|
57
|
+
assert_match(/^_/, reply.fragments[4].to_s)
|
57
58
|
end
|
58
59
|
|
59
60
|
def test_reads_bottom_post
|
@@ -68,10 +69,10 @@ I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
|
|
68
69
|
reply.fragments.map { |f| f.hidden? }
|
69
70
|
|
70
71
|
assert_equal "Hi,", reply.fragments[0].to_s
|
71
|
-
assert_match
|
72
|
-
assert_match
|
73
|
-
assert_match
|
74
|
-
assert_match
|
72
|
+
assert_match(/^On [^\:]+\:/, reply.fragments[1].to_s)
|
73
|
+
assert_match(/^You can list/, reply.fragments[2].to_s)
|
74
|
+
assert_match(/^> /, reply.fragments[3].to_s)
|
75
|
+
assert_match(/^_/, reply.fragments[5].to_s)
|
75
76
|
end
|
76
77
|
|
77
78
|
def test_reads_inline_replies
|
@@ -85,11 +86,11 @@ I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
|
|
85
86
|
assert_equal [false, false, false, false, true, true, true],
|
86
87
|
reply.fragments.map { |f| f.hidden? }
|
87
88
|
|
88
|
-
assert_match
|
89
|
-
assert_match
|
89
|
+
assert_match(/^On [^\:]+\:/, reply.fragments[0].to_s)
|
90
|
+
assert_match(/^I will reply/, reply.fragments[1].to_s)
|
90
91
|
assert_match "okay?", reply.fragments[2].to_s
|
91
|
-
assert_match
|
92
|
-
assert_match
|
92
|
+
assert_match(/^and under this./, reply.fragments[3].to_s)
|
93
|
+
assert_match(/inline/, reply.fragments[4].to_s)
|
93
94
|
assert_equal "\n", reply.fragments[5].to_s
|
94
95
|
assert_equal "--\nHey there, this is my signature\n", reply.fragments[6].to_s
|
95
96
|
end
|
@@ -97,9 +98,9 @@ I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
|
|
97
98
|
def test_recognizes_date_string_above_quote
|
98
99
|
reply = email :email_1_4
|
99
100
|
|
100
|
-
assert_match
|
101
|
-
assert_match
|
102
|
-
assert_match
|
101
|
+
assert_match(/^Awesome/, reply.fragments[0].to_s)
|
102
|
+
assert_match(/^On/, reply.fragments[1].to_s)
|
103
|
+
assert_match(/Loader/, reply.fragments[1].to_s)
|
103
104
|
end
|
104
105
|
|
105
106
|
def test_a_complex_body_with_only_one_fragment
|
@@ -115,23 +116,28 @@ I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
|
|
115
116
|
assert_equal [false, false], reply.fragments.map { |f| f.quoted? }
|
116
117
|
assert_equal [false, true], reply.fragments.map { |f| f.signature? }
|
117
118
|
assert_equal [false, true], reply.fragments.map { |f| f.hidden? }
|
118
|
-
assert_match
|
119
|
+
assert_match(/^-- \nrick/, reply.fragments[1].to_s)
|
119
120
|
end
|
120
121
|
|
121
122
|
def test_deals_with_multiline_reply_headers
|
122
123
|
reply = email :email_1_6
|
123
124
|
|
124
|
-
assert_match
|
125
|
-
assert_match
|
126
|
-
assert_match
|
125
|
+
assert_match(/^I get/, reply.fragments[0].to_s)
|
126
|
+
assert_match(/^On/, reply.fragments[1].to_s)
|
127
|
+
assert_match(/Was this/, reply.fragments[1].to_s)
|
127
128
|
end
|
128
129
|
|
129
130
|
def test_deals_with_windows_line_endings
|
130
131
|
reply = email :email_1_7
|
131
132
|
|
132
|
-
assert_match
|
133
|
-
assert_match
|
134
|
-
assert_match
|
133
|
+
assert_match(/:\+1:/, reply.fragments[0].to_s)
|
134
|
+
assert_match(/^On/, reply.fragments[1].to_s)
|
135
|
+
assert_match(/Steps 0-2/, reply.fragments[1].to_s)
|
136
|
+
end
|
137
|
+
|
138
|
+
def test_handles_non_ascii_characters
|
139
|
+
non_ascii_body = "Here’s a test."
|
140
|
+
assert_equal non_ascii_body, EmailReplyParser.parse_reply(non_ascii_body)
|
135
141
|
end
|
136
142
|
|
137
143
|
def test_does_not_modify_input_string
|
@@ -155,6 +161,11 @@ I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
|
|
155
161
|
assert_equal "Outlook with a reply directly above line", EmailReplyParser.parse_reply(body)
|
156
162
|
end
|
157
163
|
|
164
|
+
def test_parse_out_just_top_for_outlook_with_no_line
|
165
|
+
body = IO.read EMAIL_FIXTURE_PATH.join("email_2_3.txt").to_s
|
166
|
+
assert_equal "Outlook with a reply directly above line", EmailReplyParser.parse_reply(body)
|
167
|
+
end
|
168
|
+
|
158
169
|
def test_parse_out_sent_from_iPhone
|
159
170
|
body = IO.read EMAIL_FIXTURE_PATH.join("email_iPhone.txt").to_s
|
160
171
|
assert_equal "Here is another email", EmailReplyParser.parse_reply(body)
|
@@ -188,14 +199,14 @@ I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
|
|
188
199
|
|
189
200
|
def test_one_is_not_on
|
190
201
|
reply = email("email_one_is_not_on")
|
191
|
-
assert_match
|
192
|
-
assert_match
|
202
|
+
assert_match(/One outstanding question/, reply.fragments[0].to_s)
|
203
|
+
assert_match(/^On Oct 1, 2012/, reply.fragments[1].to_s)
|
193
204
|
end
|
194
205
|
|
195
206
|
def test_mulitple_on
|
196
207
|
reply = email("greedy_on")
|
197
|
-
assert_match
|
198
|
-
assert_match
|
208
|
+
assert_match(/^On your remote host/, reply.fragments[0].to_s)
|
209
|
+
assert_match(/^On 9 Jan 2014/, reply.fragments[1].to_s)
|
199
210
|
assert_equal [false, true, false], reply.fragments.map { |f| f.quoted? }
|
200
211
|
assert_equal [false, false, false], reply.fragments.map { |f| f.signature? }
|
201
212
|
assert_equal [false, true, true], reply.fragments.map { |f| f.hidden? }
|
@@ -203,8 +214,8 @@ I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
|
|
203
214
|
|
204
215
|
def test_pathological_emails
|
205
216
|
t0 = Time.now
|
206
|
-
|
207
|
-
assert (Time.now - t0) < 1, "Took too long, upgrade to re2 gem."
|
217
|
+
email("pathological")
|
218
|
+
assert (Time.now - t0) < 1, "Took too long, upgrade to re2 gem. See https://rubygems.org/gems/re2"
|
208
219
|
end
|
209
220
|
|
210
221
|
def test_doesnt_remove_signature_delimiter_in_mid_line
|
@@ -212,6 +223,12 @@ I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
|
|
212
223
|
assert_equal 1, reply.fragments.size
|
213
224
|
end
|
214
225
|
|
226
|
+
def test_long_quote_processing_completes
|
227
|
+
reply = Timeout.timeout(1) { email(:email_long_quote) }
|
228
|
+
|
229
|
+
assert_equal 5, reply.fragments.size
|
230
|
+
end
|
231
|
+
|
215
232
|
def email(name)
|
216
233
|
body = IO.read EMAIL_FIXTURE_PATH.join("#{name}.txt").to_s
|
217
234
|
EmailReplyParser.read body
|
@@ -0,0 +1,10 @@
|
|
1
|
+
Outlook with a reply directly above line
|
2
|
+
|
3
|
+
From: CRM Comments [crm-comment@example.com]
|
4
|
+
Sent: Friday, 23 March 2012 5:08 p.m.
|
5
|
+
To: John S. Greene
|
6
|
+
Subject: [contact:106] John Greene
|
7
|
+
|
8
|
+
> A new comment has been added to the Contact named 'John Greene':
|
9
|
+
>
|
10
|
+
> I am replying to a comment.
|