email_reply_parser 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/email_reply_parser.gemspec +7 -1
- data/lib/email_reply_parser.rb +21 -4
- data/test/email_reply_parser_test.rb +41 -0
- data/test/emails/email_2_1.txt +25 -0
- data/test/emails/email_BlackBerry.txt +3 -0
- data/test/emails/email_bullets.txt +22 -0
- data/test/emails/email_iPhone.txt +3 -0
- data/test/emails/email_multi_word_sent_from_my_mobile_device.txt +3 -0
- data/test/emails/email_sent_from_my_not_signature.txt +3 -0
- metadata +7 -1
data/email_reply_parser.gemspec
CHANGED
@@ -13,7 +13,7 @@ Gem::Specification.new do |s|
|
|
13
13
|
## If your rubyforge_project name is different, then edit it and comment out
|
14
14
|
## the sub! line in the Rakefile
|
15
15
|
s.name = 'email_reply_parser'
|
16
|
-
s.version = '0.
|
16
|
+
s.version = '0.5.0'
|
17
17
|
s.date = '2012-03-01'
|
18
18
|
s.rubyforge_project = 'email_reply_parser'
|
19
19
|
|
@@ -72,6 +72,12 @@ Gem::Specification.new do |s|
|
|
72
72
|
test/emails/email_1_4.txt
|
73
73
|
test/emails/email_1_5.txt
|
74
74
|
test/emails/email_1_6.txt
|
75
|
+
test/emails/email_2_1.txt
|
76
|
+
test/emails/email_BlackBerry.txt
|
77
|
+
test/emails/email_bullets.txt
|
78
|
+
test/emails/email_iPhone.txt
|
79
|
+
test/emails/email_multi_word_sent_from_my_mobile_device.txt
|
80
|
+
test/emails/email_sent_from_my_not_signature.txt
|
75
81
|
]
|
76
82
|
# = MANIFEST =
|
77
83
|
|
data/lib/email_reply_parser.rb
CHANGED
@@ -30,9 +30,9 @@ require 'strscan'
|
|
30
30
|
#
|
31
31
|
# [mail]: https://github.com/mikel/mail
|
32
32
|
class EmailReplyParser
|
33
|
-
VERSION = "0.
|
33
|
+
VERSION = "0.5.0"
|
34
34
|
|
35
|
-
# Splits an email body into a list of Fragments.
|
35
|
+
# Public: Splits an email body into a list of Fragments.
|
36
36
|
#
|
37
37
|
# text - A String email body.
|
38
38
|
#
|
@@ -41,6 +41,15 @@ class EmailReplyParser
|
|
41
41
|
Email.new.read(text)
|
42
42
|
end
|
43
43
|
|
44
|
+
# Public: Get the text of the visible portions of the given email body.
|
45
|
+
#
|
46
|
+
# text - A String email body.
|
47
|
+
#
|
48
|
+
# Returns a String.
|
49
|
+
def self.parse_reply(text)
|
50
|
+
self.read(text).visible_text
|
51
|
+
end
|
52
|
+
|
44
53
|
### Emails
|
45
54
|
|
46
55
|
# An Email instance represents a parsed body String.
|
@@ -52,6 +61,13 @@ class EmailReplyParser
|
|
52
61
|
@fragments = []
|
53
62
|
end
|
54
63
|
|
64
|
+
# Public: Gets the combined text of the visible fragments of the email body.
|
65
|
+
#
|
66
|
+
# Returns a String.
|
67
|
+
def visible_text
|
68
|
+
fragments.select{|f| !f.hidden?}.map{|f| f.to_s}.join("\n").rstrip
|
69
|
+
end
|
70
|
+
|
55
71
|
# Splits the given text into a list of Fragments. This is roughly done by
|
56
72
|
# reversing the text and parsing from the bottom to the top. This way we
|
57
73
|
# can check for 'On <date>, <author> wrote:' lines above quoted blocks.
|
@@ -105,8 +121,8 @@ class EmailReplyParser
|
|
105
121
|
|
106
122
|
private
|
107
123
|
EMPTY = "".freeze
|
108
|
-
SIG_REGEX = /(
|
109
|
-
|
124
|
+
SIG_REGEX = /(--|__|\w-$)|(^(\w+\s*){1,3} #{"Sent from my".reverse}$)/
|
125
|
+
|
110
126
|
### Line-by-Line Parsing
|
111
127
|
|
112
128
|
# Scans the given line of text and figures out which fragment it belongs
|
@@ -240,3 +256,4 @@ class EmailReplyParser
|
|
240
256
|
end
|
241
257
|
end
|
242
258
|
end
|
259
|
+
|
@@ -101,6 +101,47 @@ I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
|
|
101
101
|
assert_equal "The Quick Brown Fox Jumps Over The Lazy Dog", original
|
102
102
|
end
|
103
103
|
|
104
|
+
def test_returns_only_the_visible_fragments_as_a_string
|
105
|
+
reply = email(:email_2_1)
|
106
|
+
assert_equal reply.fragments.select{|r| !r.hidden?}.map{|r| r.to_s}.join("\n").rstrip, reply.visible_text
|
107
|
+
end
|
108
|
+
|
109
|
+
def test_parse_out_just_top_for_outlook_reply
|
110
|
+
body = IO.read EMAIL_FIXTURE_PATH.join("email_2_1.txt").to_s
|
111
|
+
assert_equal "Outlook with a reply", EmailReplyParser.parse_reply(body)
|
112
|
+
end
|
113
|
+
|
114
|
+
def test_parse_out_sent_from_iPhone
|
115
|
+
body = IO.read EMAIL_FIXTURE_PATH.join("email_iPhone.txt").to_s
|
116
|
+
assert_equal "Here is another email", EmailReplyParser.parse_reply(body)
|
117
|
+
end
|
118
|
+
|
119
|
+
def test_parse_out_sent_from_BlackBerry
|
120
|
+
body = IO.read EMAIL_FIXTURE_PATH.join("email_BlackBerry.txt").to_s
|
121
|
+
assert_equal "Here is another email", EmailReplyParser.parse_reply(body)
|
122
|
+
end
|
123
|
+
|
124
|
+
def test_parse_out_send_from_multiword_mobile_device
|
125
|
+
body = IO.read EMAIL_FIXTURE_PATH.join("email_multi_word_sent_from_my_mobile_device.txt").to_s
|
126
|
+
assert_equal "Here is another email", EmailReplyParser.parse_reply(body)
|
127
|
+
end
|
128
|
+
|
129
|
+
def test_do_not_parse_out_send_from_in_regular_sentence
|
130
|
+
body = IO.read EMAIL_FIXTURE_PATH.join("email_sent_from_my_not_signature.txt").to_s
|
131
|
+
assert_equal "Here is another email\n\nSent from my desk, is much easier then my mobile phone.", EmailReplyParser.parse_reply(body)
|
132
|
+
end
|
133
|
+
|
134
|
+
def test_retains_bullets
|
135
|
+
body = IO.read EMAIL_FIXTURE_PATH.join("email_bullets.txt").to_s
|
136
|
+
assert_equal "test 2 this should list second\n\nand have spaces\n\nand retain this formatting\n\n\n - how about bullets\n - and another",
|
137
|
+
EmailReplyParser.parse_reply(body)
|
138
|
+
end
|
139
|
+
|
140
|
+
def test_parse_reply
|
141
|
+
body = IO.read EMAIL_FIXTURE_PATH.join("email_1_2.txt").to_s
|
142
|
+
assert_equal EmailReplyParser.read(body).visible_text, EmailReplyParser.parse_reply(body)
|
143
|
+
end
|
144
|
+
|
104
145
|
def email(name)
|
105
146
|
body = IO.read EMAIL_FIXTURE_PATH.join("#{name}.txt").to_s
|
106
147
|
EmailReplyParser.read body
|
@@ -0,0 +1,25 @@
|
|
1
|
+
Outlook with a reply
|
2
|
+
|
3
|
+
|
4
|
+
------------------------------
|
5
|
+
|
6
|
+
*From:* Google Apps Sync Team [mailto:mail-noreply@google.com]
|
7
|
+
*Sent:* Thursday, February 09, 2012 1:36 PM
|
8
|
+
*To:* jow@xxxx.com
|
9
|
+
*Subject:* Google Apps Sync was updated!
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
Dear Google Apps Sync user,
|
14
|
+
|
15
|
+
Google Apps Sync for Microsoft Outlook® was recently updated. Your computer
|
16
|
+
now has the latest version (version 2.5). This release includes bug fixes
|
17
|
+
to improve product reliability. For more information about these and other
|
18
|
+
changes, please see the help article here:
|
19
|
+
|
20
|
+
http://www.google.com/support/a/bin/answer.py?answer=153463
|
21
|
+
|
22
|
+
Sincerely,
|
23
|
+
|
24
|
+
The Google Apps Sync Team.
|
25
|
+
|
@@ -0,0 +1,22 @@
|
|
1
|
+
test 2 this should list second
|
2
|
+
|
3
|
+
and have spaces
|
4
|
+
|
5
|
+
and retain this formatting
|
6
|
+
|
7
|
+
|
8
|
+
- how about bullets
|
9
|
+
- and another
|
10
|
+
|
11
|
+
|
12
|
+
On Fri, Feb 24, 2012 at 10:19 AM, <examples@email.goalengine.com> wrote:
|
13
|
+
|
14
|
+
> Give us an example of how you applied what they learned to achieve
|
15
|
+
> something in your organization
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
--
|
21
|
+
|
22
|
+
*Joe Smith | Director, Product Management*
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: email_reply_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -32,6 +32,12 @@ files:
|
|
32
32
|
- test/emails/email_1_4.txt
|
33
33
|
- test/emails/email_1_5.txt
|
34
34
|
- test/emails/email_1_6.txt
|
35
|
+
- test/emails/email_2_1.txt
|
36
|
+
- test/emails/email_BlackBerry.txt
|
37
|
+
- test/emails/email_bullets.txt
|
38
|
+
- test/emails/email_iPhone.txt
|
39
|
+
- test/emails/email_multi_word_sent_from_my_mobile_device.txt
|
40
|
+
- test/emails/email_sent_from_my_not_signature.txt
|
35
41
|
homepage: http://github.com/github/email_reply_parser
|
36
42
|
licenses: []
|
37
43
|
post_install_message:
|