email_reply_parser 0.5.5 → 0.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +3 -3
- data/email_reply_parser.gemspec +2 -0
- data/lib/email_reply_parser.rb +9 -3
- data/test/email_reply_parser_test.rb +10 -0
- data/test/emails/email_2_2.txt +10 -0
- data/test/emails/email_sig_delimiter_in_middle_of_line.txt +7 -0
- metadata +10 -6
- checksums.yaml +0 -7
data/README.md
CHANGED
@@ -7,9 +7,9 @@ This is what GitHub uses to display comments that were created from
|
|
7
7
|
email replies. This code is being open sourced in an effort to
|
8
8
|
crowdsource the quality of our email representation.
|
9
9
|
|
10
|
-
See
|
10
|
+
See the [Ruby docs][rubydocs] for more information.
|
11
11
|
|
12
|
-
[
|
12
|
+
[rubydocs]: http://rubydoc.info/gems/email_reply_parser/
|
13
13
|
|
14
14
|
##Usage
|
15
15
|
|
@@ -65,7 +65,7 @@ multiple lines. GMail breaks up any lines over 80 characters for you.
|
|
65
65
|
wrote:
|
66
66
|
> blah
|
67
67
|
|
68
|
-
Not to mention that we're
|
68
|
+
Not to mention that we're searching for "on" and "wrote". It won't work
|
69
69
|
with other languages.
|
70
70
|
|
71
71
|
Possible solution: Remove "reply@reply.github.com" lines...
|
data/email_reply_parser.gemspec
CHANGED
@@ -77,12 +77,14 @@ Gem::Specification.new do |s|
|
|
77
77
|
test/emails/email_1_6.txt
|
78
78
|
test/emails/email_1_7.txt
|
79
79
|
test/emails/email_2_1.txt
|
80
|
+
test/emails/email_2_2.txt
|
80
81
|
test/emails/email_BlackBerry.txt
|
81
82
|
test/emails/email_bullets.txt
|
82
83
|
test/emails/email_iPhone.txt
|
83
84
|
test/emails/email_multi_word_sent_from_my_mobile_device.txt
|
84
85
|
test/emails/email_one_is_not_on.txt
|
85
86
|
test/emails/email_sent_from_my_not_signature.txt
|
87
|
+
test/emails/email_sig_delimiter_in_middle_of_line.txt
|
86
88
|
test/emails/pathological.txt
|
87
89
|
]
|
88
90
|
# = MANIFEST =
|
data/lib/email_reply_parser.rb
CHANGED
@@ -30,7 +30,7 @@ require 'strscan'
|
|
30
30
|
#
|
31
31
|
# [mail]: https://github.com/mikel/mail
|
32
32
|
class EmailReplyParser
|
33
|
-
VERSION = "0.5.
|
33
|
+
VERSION = "0.5.6"
|
34
34
|
|
35
35
|
# Public: Splits an email body into a list of Fragments.
|
36
36
|
#
|
@@ -89,6 +89,12 @@ class EmailReplyParser
|
|
89
89
|
text.gsub! $1, $1.gsub("\n", " ")
|
90
90
|
end
|
91
91
|
|
92
|
+
# Some users may reply directly above a line of underscores.
|
93
|
+
# In order to ensure that these fragments are split correctly,
|
94
|
+
# make sure that all lines of underscores are preceded by
|
95
|
+
# at least two newline characters.
|
96
|
+
text.gsub!(/([^\n])(?=\n_{7}_+)$/m, "\\1\n")
|
97
|
+
|
92
98
|
# The text is reversed initially due to the way we check for hidden
|
93
99
|
# fragments.
|
94
100
|
text = text.reverse
|
@@ -127,7 +133,7 @@ class EmailReplyParser
|
|
127
133
|
|
128
134
|
private
|
129
135
|
EMPTY = "".freeze
|
130
|
-
SIGNATURE = '(?m)(
|
136
|
+
SIGNATURE = '(?m)(--\s*$|__\s*$|\w-$)|(^(\w+\s*){1,3} ym morf tneS$)'
|
131
137
|
|
132
138
|
begin
|
133
139
|
require 're2'
|
@@ -146,7 +152,7 @@ class EmailReplyParser
|
|
146
152
|
# Returns nothing.
|
147
153
|
def scan_line(line)
|
148
154
|
line.chomp!("\n")
|
149
|
-
line.lstrip! unless SIG_REGEX.match(line)
|
155
|
+
line.lstrip! unless SIG_REGEX.match(line)
|
150
156
|
|
151
157
|
# We're looking for leading `>`'s to see if this line is part of a
|
152
158
|
# quoted Fragment.
|
@@ -119,6 +119,11 @@ I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
|
|
119
119
|
assert_equal "Outlook with a reply", EmailReplyParser.parse_reply(body)
|
120
120
|
end
|
121
121
|
|
122
|
+
def test_parse_out_just_top_for_outlook_with_reply_directly_above_line
|
123
|
+
body = IO.read EMAIL_FIXTURE_PATH.join("email_2_2.txt").to_s
|
124
|
+
assert_equal "Outlook with a reply directly above line", EmailReplyParser.parse_reply(body)
|
125
|
+
end
|
126
|
+
|
122
127
|
def test_parse_out_sent_from_iPhone
|
123
128
|
body = IO.read EMAIL_FIXTURE_PATH.join("email_iPhone.txt").to_s
|
124
129
|
assert_equal "Here is another email", EmailReplyParser.parse_reply(body)
|
@@ -162,6 +167,11 @@ I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
|
|
162
167
|
assert (Time.now - t0) < 1, "Took too long, upgrade to re2 gem."
|
163
168
|
end
|
164
169
|
|
170
|
+
def test_doesnt_remove_signature_delimiter_in_mid_line
|
171
|
+
reply = email(:email_sig_delimiter_in_middle_of_line)
|
172
|
+
assert_equal 1, reply.fragments.size
|
173
|
+
end
|
174
|
+
|
165
175
|
def email(name)
|
166
176
|
body = IO.read EMAIL_FIXTURE_PATH.join("#{name}.txt").to_s
|
167
177
|
EmailReplyParser.read body
|
@@ -0,0 +1,10 @@
|
|
1
|
+
Outlook with a reply directly above line
|
2
|
+
________________________________________
|
3
|
+
From: CRM Comments [crm-comment@example.com]
|
4
|
+
Sent: Friday, 23 March 2012 5:08 p.m.
|
5
|
+
To: John S. Greene
|
6
|
+
Subject: [contact:106] John Greene
|
7
|
+
|
8
|
+
A new comment has been added to the Contact named 'John Greene':
|
9
|
+
|
10
|
+
I am replying to a comment.
|
metadata
CHANGED
@@ -1,14 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: email_reply_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.6
|
5
|
+
prerelease:
|
5
6
|
platform: ruby
|
6
7
|
authors:
|
7
8
|
- Rick Olson
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date:
|
12
|
+
date: 2014-05-30 00:00:00.000000000 Z
|
12
13
|
dependencies: []
|
13
14
|
description: Long description. Maybe copied from the README.
|
14
15
|
email: technoweenie@gmail.com
|
@@ -33,34 +34,37 @@ files:
|
|
33
34
|
- test/emails/email_1_6.txt
|
34
35
|
- test/emails/email_1_7.txt
|
35
36
|
- test/emails/email_2_1.txt
|
37
|
+
- test/emails/email_2_2.txt
|
36
38
|
- test/emails/email_BlackBerry.txt
|
37
39
|
- test/emails/email_bullets.txt
|
38
40
|
- test/emails/email_iPhone.txt
|
39
41
|
- test/emails/email_multi_word_sent_from_my_mobile_device.txt
|
40
42
|
- test/emails/email_one_is_not_on.txt
|
41
43
|
- test/emails/email_sent_from_my_not_signature.txt
|
44
|
+
- test/emails/email_sig_delimiter_in_middle_of_line.txt
|
42
45
|
- test/emails/pathological.txt
|
43
46
|
homepage: http://github.com/github/email_reply_parser
|
44
47
|
licenses: []
|
45
|
-
metadata: {}
|
46
48
|
post_install_message:
|
47
49
|
rdoc_options:
|
48
50
|
- --charset=UTF-8
|
49
51
|
require_paths:
|
50
52
|
- lib
|
51
53
|
required_ruby_version: !ruby/object:Gem::Requirement
|
54
|
+
none: false
|
52
55
|
requirements:
|
53
|
-
- - '>='
|
56
|
+
- - ! '>='
|
54
57
|
- !ruby/object:Gem::Version
|
55
58
|
version: '0'
|
56
59
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
60
|
+
none: false
|
57
61
|
requirements:
|
58
|
-
- - '>='
|
62
|
+
- - ! '>='
|
59
63
|
- !ruby/object:Gem::Version
|
60
64
|
version: '0'
|
61
65
|
requirements: []
|
62
66
|
rubyforge_project: email_reply_parser
|
63
|
-
rubygems_version:
|
67
|
+
rubygems_version: 1.8.23
|
64
68
|
signing_key:
|
65
69
|
specification_version: 2
|
66
70
|
summary: Short description used in Gem listings.
|
checksums.yaml
DELETED
@@ -1,7 +0,0 @@
|
|
1
|
-
---
|
2
|
-
SHA1:
|
3
|
-
metadata.gz: 7ac6a5b1b44a4c6b58ec03ad6bca58d2bbfca5ab
|
4
|
-
data.tar.gz: a4c485aea71c6fdc5b221c80d7b37189ee7185ff
|
5
|
-
SHA512:
|
6
|
-
metadata.gz: 8409fbf234b0c3c8f63ba6cae894f9301b588ca065ce69e802278ef6083f31599698d61f83555e2cd4194abfad50c223358fb5994ccc4315f08a6bb5e879b5eb
|
7
|
-
data.tar.gz: 86c7e2e2ea27d775dc530503687e9ef116d97d612736ec13e08c4077eb631e5e76179a3d739ab6a6e07a8c41d1520ea35cb435c750c2fefd8b17392601914e7d
|