email_reply_parser 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,7 +13,7 @@ Gem::Specification.new do |s|
13
13
  ## If your rubyforge_project name is different, then edit it and comment out
14
14
  ## the sub! line in the Rakefile
15
15
  s.name = 'email_reply_parser'
16
- s.version = '0.4.0'
16
+ s.version = '0.5.0'
17
17
  s.date = '2012-03-01'
18
18
  s.rubyforge_project = 'email_reply_parser'
19
19
 
@@ -72,6 +72,12 @@ Gem::Specification.new do |s|
72
72
  test/emails/email_1_4.txt
73
73
  test/emails/email_1_5.txt
74
74
  test/emails/email_1_6.txt
75
+ test/emails/email_2_1.txt
76
+ test/emails/email_BlackBerry.txt
77
+ test/emails/email_bullets.txt
78
+ test/emails/email_iPhone.txt
79
+ test/emails/email_multi_word_sent_from_my_mobile_device.txt
80
+ test/emails/email_sent_from_my_not_signature.txt
75
81
  ]
76
82
  # = MANIFEST =
77
83
 
@@ -30,9 +30,9 @@ require 'strscan'
30
30
  #
31
31
  # [mail]: https://github.com/mikel/mail
32
32
  class EmailReplyParser
33
- VERSION = "0.4.0"
33
+ VERSION = "0.5.0"
34
34
 
35
- # Splits an email body into a list of Fragments.
35
+ # Public: Splits an email body into a list of Fragments.
36
36
  #
37
37
  # text - A String email body.
38
38
  #
@@ -41,6 +41,15 @@ class EmailReplyParser
41
41
  Email.new.read(text)
42
42
  end
43
43
 
44
+ # Public: Get the text of the visible portions of the given email body.
45
+ #
46
+ # text - A String email body.
47
+ #
48
+ # Returns a String.
49
+ def self.parse_reply(text)
50
+ self.read(text).visible_text
51
+ end
52
+
44
53
  ### Emails
45
54
 
46
55
  # An Email instance represents a parsed body String.
@@ -52,6 +61,13 @@ class EmailReplyParser
52
61
  @fragments = []
53
62
  end
54
63
 
64
+ # Public: Gets the combined text of the visible fragments of the email body.
65
+ #
66
+ # Returns a String.
67
+ def visible_text
68
+ fragments.select{|f| !f.hidden?}.map{|f| f.to_s}.join("\n").rstrip
69
+ end
70
+
55
71
  # Splits the given text into a list of Fragments. This is roughly done by
56
72
  # reversing the text and parsing from the bottom to the top. This way we
57
73
  # can check for 'On <date>, <author> wrote:' lines above quoted blocks.
@@ -105,8 +121,8 @@ class EmailReplyParser
105
121
 
106
122
  private
107
123
  EMPTY = "".freeze
108
- SIG_REGEX = /(\s--|__|\w-)$/
109
-
124
+ SIG_REGEX = /(--|__|\w-$)|(^(\w+\s*){1,3} #{"Sent from my".reverse}$)/
125
+
110
126
  ### Line-by-Line Parsing
111
127
 
112
128
  # Scans the given line of text and figures out which fragment it belongs
@@ -240,3 +256,4 @@ class EmailReplyParser
240
256
  end
241
257
  end
242
258
  end
259
+
@@ -101,6 +101,47 @@ I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
101
101
  assert_equal "The Quick Brown Fox Jumps Over The Lazy Dog", original
102
102
  end
103
103
 
104
+ def test_returns_only_the_visible_fragments_as_a_string
105
+ reply = email(:email_2_1)
106
+ assert_equal reply.fragments.select{|r| !r.hidden?}.map{|r| r.to_s}.join("\n").rstrip, reply.visible_text
107
+ end
108
+
109
+ def test_parse_out_just_top_for_outlook_reply
110
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_2_1.txt").to_s
111
+ assert_equal "Outlook with a reply", EmailReplyParser.parse_reply(body)
112
+ end
113
+
114
+ def test_parse_out_sent_from_iPhone
115
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_iPhone.txt").to_s
116
+ assert_equal "Here is another email", EmailReplyParser.parse_reply(body)
117
+ end
118
+
119
+ def test_parse_out_sent_from_BlackBerry
120
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_BlackBerry.txt").to_s
121
+ assert_equal "Here is another email", EmailReplyParser.parse_reply(body)
122
+ end
123
+
124
+ def test_parse_out_send_from_multiword_mobile_device
125
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_multi_word_sent_from_my_mobile_device.txt").to_s
126
+ assert_equal "Here is another email", EmailReplyParser.parse_reply(body)
127
+ end
128
+
129
+ def test_do_not_parse_out_send_from_in_regular_sentence
130
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_sent_from_my_not_signature.txt").to_s
131
+ assert_equal "Here is another email\n\nSent from my desk, is much easier then my mobile phone.", EmailReplyParser.parse_reply(body)
132
+ end
133
+
134
+ def test_retains_bullets
135
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_bullets.txt").to_s
136
+ assert_equal "test 2 this should list second\n\nand have spaces\n\nand retain this formatting\n\n\n - how about bullets\n - and another",
137
+ EmailReplyParser.parse_reply(body)
138
+ end
139
+
140
+ def test_parse_reply
141
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_1_2.txt").to_s
142
+ assert_equal EmailReplyParser.read(body).visible_text, EmailReplyParser.parse_reply(body)
143
+ end
144
+
104
145
  def email(name)
105
146
  body = IO.read EMAIL_FIXTURE_PATH.join("#{name}.txt").to_s
106
147
  EmailReplyParser.read body
@@ -0,0 +1,25 @@
1
+ Outlook with a reply
2
+
3
+
4
+ ------------------------------
5
+
6
+ *From:* Google Apps Sync Team [mailto:mail-noreply@google.com]
7
+ *Sent:* Thursday, February 09, 2012 1:36 PM
8
+ *To:* jow@xxxx.com
9
+ *Subject:* Google Apps Sync was updated!
10
+
11
+
12
+
13
+ Dear Google Apps Sync user,
14
+
15
+ Google Apps Sync for Microsoft Outlook® was recently updated. Your computer
16
+ now has the latest version (version 2.5). This release includes bug fixes
17
+ to improve product reliability. For more information about these and other
18
+ changes, please see the help article here:
19
+
20
+ http://www.google.com/support/a/bin/answer.py?answer=153463
21
+
22
+ Sincerely,
23
+
24
+ The Google Apps Sync Team.
25
+
@@ -0,0 +1,3 @@
1
+ Here is another email
2
+
3
+ Sent from my BlackBerry
@@ -0,0 +1,22 @@
1
+ test 2 this should list second
2
+
3
+ and have spaces
4
+
5
+ and retain this formatting
6
+
7
+
8
+ - how about bullets
9
+ - and another
10
+
11
+
12
+ On Fri, Feb 24, 2012 at 10:19 AM, <examples@email.goalengine.com> wrote:
13
+
14
+ > Give us an example of how you applied what they learned to achieve
15
+ > something in your organization
16
+
17
+
18
+
19
+
20
+ --
21
+
22
+ *Joe Smith | Director, Product Management*
@@ -0,0 +1,3 @@
1
+ Here is another email
2
+
3
+ Sent from my iPhone
@@ -0,0 +1,3 @@
1
+ Here is another email
2
+
3
+ Sent from my Verizon Wireless BlackBerry
@@ -0,0 +1,3 @@
1
+ Here is another email
2
+
3
+ Sent from my desk, is much easier then my mobile phone.
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: email_reply_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -32,6 +32,12 @@ files:
32
32
  - test/emails/email_1_4.txt
33
33
  - test/emails/email_1_5.txt
34
34
  - test/emails/email_1_6.txt
35
+ - test/emails/email_2_1.txt
36
+ - test/emails/email_BlackBerry.txt
37
+ - test/emails/email_bullets.txt
38
+ - test/emails/email_iPhone.txt
39
+ - test/emails/email_multi_word_sent_from_my_mobile_device.txt
40
+ - test/emails/email_sent_from_my_not_signature.txt
35
41
  homepage: http://github.com/github/email_reply_parser
36
42
  licenses: []
37
43
  post_install_message: