email_reply_parser 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/email_reply_parser.gemspec +7 -1
- data/lib/email_reply_parser.rb +21 -4
- data/test/email_reply_parser_test.rb +41 -0
- data/test/emails/email_2_1.txt +25 -0
- data/test/emails/email_BlackBerry.txt +3 -0
- data/test/emails/email_bullets.txt +22 -0
- data/test/emails/email_iPhone.txt +3 -0
- data/test/emails/email_multi_word_sent_from_my_mobile_device.txt +3 -0
- data/test/emails/email_sent_from_my_not_signature.txt +3 -0
- metadata +7 -1
data/email_reply_parser.gemspec
CHANGED
@@ -13,7 +13,7 @@ Gem::Specification.new do |s|
|
|
13
13
|
## If your rubyforge_project name is different, then edit it and comment out
|
14
14
|
## the sub! line in the Rakefile
|
15
15
|
s.name = 'email_reply_parser'
|
16
|
-
s.version = '0.
|
16
|
+
s.version = '0.5.0'
|
17
17
|
s.date = '2012-03-01'
|
18
18
|
s.rubyforge_project = 'email_reply_parser'
|
19
19
|
|
@@ -72,6 +72,12 @@ Gem::Specification.new do |s|
|
|
72
72
|
test/emails/email_1_4.txt
|
73
73
|
test/emails/email_1_5.txt
|
74
74
|
test/emails/email_1_6.txt
|
75
|
+
test/emails/email_2_1.txt
|
76
|
+
test/emails/email_BlackBerry.txt
|
77
|
+
test/emails/email_bullets.txt
|
78
|
+
test/emails/email_iPhone.txt
|
79
|
+
test/emails/email_multi_word_sent_from_my_mobile_device.txt
|
80
|
+
test/emails/email_sent_from_my_not_signature.txt
|
75
81
|
]
|
76
82
|
# = MANIFEST =
|
77
83
|
|
data/lib/email_reply_parser.rb
CHANGED
@@ -30,9 +30,9 @@ require 'strscan'
|
|
30
30
|
#
|
31
31
|
# [mail]: https://github.com/mikel/mail
|
32
32
|
class EmailReplyParser
|
33
|
-
VERSION = "0.
|
33
|
+
VERSION = "0.5.0"
|
34
34
|
|
35
|
-
# Splits an email body into a list of Fragments.
|
35
|
+
# Public: Splits an email body into a list of Fragments.
|
36
36
|
#
|
37
37
|
# text - A String email body.
|
38
38
|
#
|
@@ -41,6 +41,15 @@ class EmailReplyParser
|
|
41
41
|
Email.new.read(text)
|
42
42
|
end
|
43
43
|
|
44
|
+
# Public: Get the text of the visible portions of the given email body.
|
45
|
+
#
|
46
|
+
# text - A String email body.
|
47
|
+
#
|
48
|
+
# Returns a String.
|
49
|
+
def self.parse_reply(text)
|
50
|
+
self.read(text).visible_text
|
51
|
+
end
|
52
|
+
|
44
53
|
### Emails
|
45
54
|
|
46
55
|
# An Email instance represents a parsed body String.
|
@@ -52,6 +61,13 @@ class EmailReplyParser
|
|
52
61
|
@fragments = []
|
53
62
|
end
|
54
63
|
|
64
|
+
# Public: Gets the combined text of the visible fragments of the email body.
|
65
|
+
#
|
66
|
+
# Returns a String.
|
67
|
+
def visible_text
|
68
|
+
fragments.select{|f| !f.hidden?}.map{|f| f.to_s}.join("\n").rstrip
|
69
|
+
end
|
70
|
+
|
55
71
|
# Splits the given text into a list of Fragments. This is roughly done by
|
56
72
|
# reversing the text and parsing from the bottom to the top. This way we
|
57
73
|
# can check for 'On <date>, <author> wrote:' lines above quoted blocks.
|
@@ -105,8 +121,8 @@ class EmailReplyParser
|
|
105
121
|
|
106
122
|
private
|
107
123
|
EMPTY = "".freeze
|
108
|
-
SIG_REGEX = /(
|
109
|
-
|
124
|
+
SIG_REGEX = /(--|__|\w-$)|(^(\w+\s*){1,3} #{"Sent from my".reverse}$)/
|
125
|
+
|
110
126
|
### Line-by-Line Parsing
|
111
127
|
|
112
128
|
# Scans the given line of text and figures out which fragment it belongs
|
@@ -240,3 +256,4 @@ class EmailReplyParser
|
|
240
256
|
end
|
241
257
|
end
|
242
258
|
end
|
259
|
+
|
@@ -101,6 +101,47 @@ I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
|
|
101
101
|
assert_equal "The Quick Brown Fox Jumps Over The Lazy Dog", original
|
102
102
|
end
|
103
103
|
|
104
|
+
def test_returns_only_the_visible_fragments_as_a_string
|
105
|
+
reply = email(:email_2_1)
|
106
|
+
assert_equal reply.fragments.select{|r| !r.hidden?}.map{|r| r.to_s}.join("\n").rstrip, reply.visible_text
|
107
|
+
end
|
108
|
+
|
109
|
+
def test_parse_out_just_top_for_outlook_reply
|
110
|
+
body = IO.read EMAIL_FIXTURE_PATH.join("email_2_1.txt").to_s
|
111
|
+
assert_equal "Outlook with a reply", EmailReplyParser.parse_reply(body)
|
112
|
+
end
|
113
|
+
|
114
|
+
def test_parse_out_sent_from_iPhone
|
115
|
+
body = IO.read EMAIL_FIXTURE_PATH.join("email_iPhone.txt").to_s
|
116
|
+
assert_equal "Here is another email", EmailReplyParser.parse_reply(body)
|
117
|
+
end
|
118
|
+
|
119
|
+
def test_parse_out_sent_from_BlackBerry
|
120
|
+
body = IO.read EMAIL_FIXTURE_PATH.join("email_BlackBerry.txt").to_s
|
121
|
+
assert_equal "Here is another email", EmailReplyParser.parse_reply(body)
|
122
|
+
end
|
123
|
+
|
124
|
+
def test_parse_out_send_from_multiword_mobile_device
|
125
|
+
body = IO.read EMAIL_FIXTURE_PATH.join("email_multi_word_sent_from_my_mobile_device.txt").to_s
|
126
|
+
assert_equal "Here is another email", EmailReplyParser.parse_reply(body)
|
127
|
+
end
|
128
|
+
|
129
|
+
def test_do_not_parse_out_send_from_in_regular_sentence
|
130
|
+
body = IO.read EMAIL_FIXTURE_PATH.join("email_sent_from_my_not_signature.txt").to_s
|
131
|
+
assert_equal "Here is another email\n\nSent from my desk, is much easier then my mobile phone.", EmailReplyParser.parse_reply(body)
|
132
|
+
end
|
133
|
+
|
134
|
+
def test_retains_bullets
|
135
|
+
body = IO.read EMAIL_FIXTURE_PATH.join("email_bullets.txt").to_s
|
136
|
+
assert_equal "test 2 this should list second\n\nand have spaces\n\nand retain this formatting\n\n\n - how about bullets\n - and another",
|
137
|
+
EmailReplyParser.parse_reply(body)
|
138
|
+
end
|
139
|
+
|
140
|
+
def test_parse_reply
|
141
|
+
body = IO.read EMAIL_FIXTURE_PATH.join("email_1_2.txt").to_s
|
142
|
+
assert_equal EmailReplyParser.read(body).visible_text, EmailReplyParser.parse_reply(body)
|
143
|
+
end
|
144
|
+
|
104
145
|
def email(name)
|
105
146
|
body = IO.read EMAIL_FIXTURE_PATH.join("#{name}.txt").to_s
|
106
147
|
EmailReplyParser.read body
|
@@ -0,0 +1,25 @@
|
|
1
|
+
Outlook with a reply
|
2
|
+
|
3
|
+
|
4
|
+
------------------------------
|
5
|
+
|
6
|
+
*From:* Google Apps Sync Team [mailto:mail-noreply@google.com]
|
7
|
+
*Sent:* Thursday, February 09, 2012 1:36 PM
|
8
|
+
*To:* jow@xxxx.com
|
9
|
+
*Subject:* Google Apps Sync was updated!
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
Dear Google Apps Sync user,
|
14
|
+
|
15
|
+
Google Apps Sync for Microsoft Outlook® was recently updated. Your computer
|
16
|
+
now has the latest version (version 2.5). This release includes bug fixes
|
17
|
+
to improve product reliability. For more information about these and other
|
18
|
+
changes, please see the help article here:
|
19
|
+
|
20
|
+
http://www.google.com/support/a/bin/answer.py?answer=153463
|
21
|
+
|
22
|
+
Sincerely,
|
23
|
+
|
24
|
+
The Google Apps Sync Team.
|
25
|
+
|
@@ -0,0 +1,22 @@
|
|
1
|
+
test 2 this should list second
|
2
|
+
|
3
|
+
and have spaces
|
4
|
+
|
5
|
+
and retain this formatting
|
6
|
+
|
7
|
+
|
8
|
+
- how about bullets
|
9
|
+
- and another
|
10
|
+
|
11
|
+
|
12
|
+
On Fri, Feb 24, 2012 at 10:19 AM, <examples@email.goalengine.com> wrote:
|
13
|
+
|
14
|
+
> Give us an example of how you applied what they learned to achieve
|
15
|
+
> something in your organization
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
--
|
21
|
+
|
22
|
+
*Joe Smith | Director, Product Management*
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: email_reply_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -32,6 +32,12 @@ files:
|
|
32
32
|
- test/emails/email_1_4.txt
|
33
33
|
- test/emails/email_1_5.txt
|
34
34
|
- test/emails/email_1_6.txt
|
35
|
+
- test/emails/email_2_1.txt
|
36
|
+
- test/emails/email_BlackBerry.txt
|
37
|
+
- test/emails/email_bullets.txt
|
38
|
+
- test/emails/email_iPhone.txt
|
39
|
+
- test/emails/email_multi_word_sent_from_my_mobile_device.txt
|
40
|
+
- test/emails/email_sent_from_my_not_signature.txt
|
35
41
|
homepage: http://github.com/github/email_reply_parser
|
36
42
|
licenses: []
|
37
43
|
post_install_message:
|