email_reply_parser 0.5.5 → 0.5.6
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +3 -3
- data/email_reply_parser.gemspec +2 -0
- data/lib/email_reply_parser.rb +9 -3
- data/test/email_reply_parser_test.rb +10 -0
- data/test/emails/email_2_2.txt +10 -0
- data/test/emails/email_sig_delimiter_in_middle_of_line.txt +7 -0
- metadata +10 -6
- checksums.yaml +0 -7
data/README.md
CHANGED
@@ -7,9 +7,9 @@ This is what GitHub uses to display comments that were created from
|
|
7
7
|
email replies. This code is being open sourced in an effort to
|
8
8
|
crowdsource the quality of our email representation.
|
9
9
|
|
10
|
-
See
|
10
|
+
See the [Ruby docs][rubydocs] for more information.
|
11
11
|
|
12
|
-
[
|
12
|
+
[rubydocs]: http://rubydoc.info/gems/email_reply_parser/
|
13
13
|
|
14
14
|
##Usage
|
15
15
|
|
@@ -65,7 +65,7 @@ multiple lines. GMail breaks up any lines over 80 characters for you.
|
|
65
65
|
wrote:
|
66
66
|
> blah
|
67
67
|
|
68
|
-
Not to mention that we're
|
68
|
+
Not to mention that we're searching for "on" and "wrote". It won't work
|
69
69
|
with other languages.
|
70
70
|
|
71
71
|
Possible solution: Remove "reply@reply.github.com" lines...
|
data/email_reply_parser.gemspec
CHANGED
@@ -77,12 +77,14 @@ Gem::Specification.new do |s|
|
|
77
77
|
test/emails/email_1_6.txt
|
78
78
|
test/emails/email_1_7.txt
|
79
79
|
test/emails/email_2_1.txt
|
80
|
+
test/emails/email_2_2.txt
|
80
81
|
test/emails/email_BlackBerry.txt
|
81
82
|
test/emails/email_bullets.txt
|
82
83
|
test/emails/email_iPhone.txt
|
83
84
|
test/emails/email_multi_word_sent_from_my_mobile_device.txt
|
84
85
|
test/emails/email_one_is_not_on.txt
|
85
86
|
test/emails/email_sent_from_my_not_signature.txt
|
87
|
+
test/emails/email_sig_delimiter_in_middle_of_line.txt
|
86
88
|
test/emails/pathological.txt
|
87
89
|
]
|
88
90
|
# = MANIFEST =
|
data/lib/email_reply_parser.rb
CHANGED
@@ -30,7 +30,7 @@ require 'strscan'
|
|
30
30
|
#
|
31
31
|
# [mail]: https://github.com/mikel/mail
|
32
32
|
class EmailReplyParser
|
33
|
-
VERSION = "0.5.
|
33
|
+
VERSION = "0.5.6"
|
34
34
|
|
35
35
|
# Public: Splits an email body into a list of Fragments.
|
36
36
|
#
|
@@ -89,6 +89,12 @@ class EmailReplyParser
|
|
89
89
|
text.gsub! $1, $1.gsub("\n", " ")
|
90
90
|
end
|
91
91
|
|
92
|
+
# Some users may reply directly above a line of underscores.
|
93
|
+
# In order to ensure that these fragments are split correctly,
|
94
|
+
# make sure that all lines of underscores are preceded by
|
95
|
+
# at least two newline characters.
|
96
|
+
text.gsub!(/([^\n])(?=\n_{7}_+)$/m, "\\1\n")
|
97
|
+
|
92
98
|
# The text is reversed initially due to the way we check for hidden
|
93
99
|
# fragments.
|
94
100
|
text = text.reverse
|
@@ -127,7 +133,7 @@ class EmailReplyParser
|
|
127
133
|
|
128
134
|
private
|
129
135
|
EMPTY = "".freeze
|
130
|
-
SIGNATURE = '(?m)(
|
136
|
+
SIGNATURE = '(?m)(--\s*$|__\s*$|\w-$)|(^(\w+\s*){1,3} ym morf tneS$)'
|
131
137
|
|
132
138
|
begin
|
133
139
|
require 're2'
|
@@ -146,7 +152,7 @@ class EmailReplyParser
|
|
146
152
|
# Returns nothing.
|
147
153
|
def scan_line(line)
|
148
154
|
line.chomp!("\n")
|
149
|
-
line.lstrip! unless SIG_REGEX.match(line)
|
155
|
+
line.lstrip! unless SIG_REGEX.match(line)
|
150
156
|
|
151
157
|
# We're looking for leading `>`'s to see if this line is part of a
|
152
158
|
# quoted Fragment.
|
@@ -119,6 +119,11 @@ I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
|
|
119
119
|
assert_equal "Outlook with a reply", EmailReplyParser.parse_reply(body)
|
120
120
|
end
|
121
121
|
|
122
|
+
def test_parse_out_just_top_for_outlook_with_reply_directly_above_line
|
123
|
+
body = IO.read EMAIL_FIXTURE_PATH.join("email_2_2.txt").to_s
|
124
|
+
assert_equal "Outlook with a reply directly above line", EmailReplyParser.parse_reply(body)
|
125
|
+
end
|
126
|
+
|
122
127
|
def test_parse_out_sent_from_iPhone
|
123
128
|
body = IO.read EMAIL_FIXTURE_PATH.join("email_iPhone.txt").to_s
|
124
129
|
assert_equal "Here is another email", EmailReplyParser.parse_reply(body)
|
@@ -162,6 +167,11 @@ I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
|
|
162
167
|
assert (Time.now - t0) < 1, "Took too long, upgrade to re2 gem."
|
163
168
|
end
|
164
169
|
|
170
|
+
def test_doesnt_remove_signature_delimiter_in_mid_line
|
171
|
+
reply = email(:email_sig_delimiter_in_middle_of_line)
|
172
|
+
assert_equal 1, reply.fragments.size
|
173
|
+
end
|
174
|
+
|
165
175
|
def email(name)
|
166
176
|
body = IO.read EMAIL_FIXTURE_PATH.join("#{name}.txt").to_s
|
167
177
|
EmailReplyParser.read body
|
@@ -0,0 +1,10 @@
|
|
1
|
+
Outlook with a reply directly above line
|
2
|
+
________________________________________
|
3
|
+
From: CRM Comments [crm-comment@example.com]
|
4
|
+
Sent: Friday, 23 March 2012 5:08 p.m.
|
5
|
+
To: John S. Greene
|
6
|
+
Subject: [contact:106] John Greene
|
7
|
+
|
8
|
+
A new comment has been added to the Contact named 'John Greene':
|
9
|
+
|
10
|
+
I am replying to a comment.
|
metadata
CHANGED
@@ -1,14 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: email_reply_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.6
|
5
|
+
prerelease:
|
5
6
|
platform: ruby
|
6
7
|
authors:
|
7
8
|
- Rick Olson
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date:
|
12
|
+
date: 2014-05-30 00:00:00.000000000 Z
|
12
13
|
dependencies: []
|
13
14
|
description: Long description. Maybe copied from the README.
|
14
15
|
email: technoweenie@gmail.com
|
@@ -33,34 +34,37 @@ files:
|
|
33
34
|
- test/emails/email_1_6.txt
|
34
35
|
- test/emails/email_1_7.txt
|
35
36
|
- test/emails/email_2_1.txt
|
37
|
+
- test/emails/email_2_2.txt
|
36
38
|
- test/emails/email_BlackBerry.txt
|
37
39
|
- test/emails/email_bullets.txt
|
38
40
|
- test/emails/email_iPhone.txt
|
39
41
|
- test/emails/email_multi_word_sent_from_my_mobile_device.txt
|
40
42
|
- test/emails/email_one_is_not_on.txt
|
41
43
|
- test/emails/email_sent_from_my_not_signature.txt
|
44
|
+
- test/emails/email_sig_delimiter_in_middle_of_line.txt
|
42
45
|
- test/emails/pathological.txt
|
43
46
|
homepage: http://github.com/github/email_reply_parser
|
44
47
|
licenses: []
|
45
|
-
metadata: {}
|
46
48
|
post_install_message:
|
47
49
|
rdoc_options:
|
48
50
|
- --charset=UTF-8
|
49
51
|
require_paths:
|
50
52
|
- lib
|
51
53
|
required_ruby_version: !ruby/object:Gem::Requirement
|
54
|
+
none: false
|
52
55
|
requirements:
|
53
|
-
- - '>='
|
56
|
+
- - ! '>='
|
54
57
|
- !ruby/object:Gem::Version
|
55
58
|
version: '0'
|
56
59
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
60
|
+
none: false
|
57
61
|
requirements:
|
58
|
-
- - '>='
|
62
|
+
- - ! '>='
|
59
63
|
- !ruby/object:Gem::Version
|
60
64
|
version: '0'
|
61
65
|
requirements: []
|
62
66
|
rubyforge_project: email_reply_parser
|
63
|
-
rubygems_version:
|
67
|
+
rubygems_version: 1.8.23
|
64
68
|
signing_key:
|
65
69
|
specification_version: 2
|
66
70
|
summary: Short description used in Gem listings.
|
checksums.yaml
DELETED
@@ -1,7 +0,0 @@
|
|
1
|
-
---
|
2
|
-
SHA1:
|
3
|
-
metadata.gz: 7ac6a5b1b44a4c6b58ec03ad6bca58d2bbfca5ab
|
4
|
-
data.tar.gz: a4c485aea71c6fdc5b221c80d7b37189ee7185ff
|
5
|
-
SHA512:
|
6
|
-
metadata.gz: 8409fbf234b0c3c8f63ba6cae894f9301b588ca065ce69e802278ef6083f31599698d61f83555e2cd4194abfad50c223358fb5994ccc4315f08a6bb5e879b5eb
|
7
|
-
data.tar.gz: 86c7e2e2ea27d775dc530503687e9ef116d97d612736ec13e08c4077eb631e5e76179a3d739ab6a6e07a8c41d1520ea35cb435c750c2fefd8b17392601914e7d
|