email_reply_parser 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -13,7 +13,7 @@ Gem::Specification.new do |s|
13
13
  ## If your rubyforge_project name is different, then edit it and comment out
14
14
  ## the sub! line in the Rakefile
15
15
  s.name = 'email_reply_parser'
16
- s.version = '0.4.0'
16
+ s.version = '0.5.0'
17
17
  s.date = '2012-03-01'
18
18
  s.rubyforge_project = 'email_reply_parser'
19
19
 
@@ -72,6 +72,12 @@ Gem::Specification.new do |s|
72
72
  test/emails/email_1_4.txt
73
73
  test/emails/email_1_5.txt
74
74
  test/emails/email_1_6.txt
75
+ test/emails/email_2_1.txt
76
+ test/emails/email_BlackBerry.txt
77
+ test/emails/email_bullets.txt
78
+ test/emails/email_iPhone.txt
79
+ test/emails/email_multi_word_sent_from_my_mobile_device.txt
80
+ test/emails/email_sent_from_my_not_signature.txt
75
81
  ]
76
82
  # = MANIFEST =
77
83
 
@@ -30,9 +30,9 @@ require 'strscan'
30
30
  #
31
31
  # [mail]: https://github.com/mikel/mail
32
32
  class EmailReplyParser
33
- VERSION = "0.4.0"
33
+ VERSION = "0.5.0"
34
34
 
35
- # Splits an email body into a list of Fragments.
35
+ # Public: Splits an email body into a list of Fragments.
36
36
  #
37
37
  # text - A String email body.
38
38
  #
@@ -41,6 +41,15 @@ class EmailReplyParser
41
41
  Email.new.read(text)
42
42
  end
43
43
 
44
+ # Public: Get the text of the visible portions of the given email body.
45
+ #
46
+ # text - A String email body.
47
+ #
48
+ # Returns a String.
49
+ def self.parse_reply(text)
50
+ self.read(text).visible_text
51
+ end
52
+
44
53
  ### Emails
45
54
 
46
55
  # An Email instance represents a parsed body String.
@@ -52,6 +61,13 @@ class EmailReplyParser
52
61
  @fragments = []
53
62
  end
54
63
 
64
+ # Public: Gets the combined text of the visible fragments of the email body.
65
+ #
66
+ # Returns a String.
67
+ def visible_text
68
+ fragments.select{|f| !f.hidden?}.map{|f| f.to_s}.join("\n").rstrip
69
+ end
70
+
55
71
  # Splits the given text into a list of Fragments. This is roughly done by
56
72
  # reversing the text and parsing from the bottom to the top. This way we
57
73
  # can check for 'On <date>, <author> wrote:' lines above quoted blocks.
@@ -105,8 +121,8 @@ class EmailReplyParser
105
121
 
106
122
  private
107
123
  EMPTY = "".freeze
108
- SIG_REGEX = /(\s--|__|\w-)$/
109
-
124
+ SIG_REGEX = /(--|__|\w-$)|(^(\w+\s*){1,3} #{"Sent from my".reverse}$)/
125
+
110
126
  ### Line-by-Line Parsing
111
127
 
112
128
  # Scans the given line of text and figures out which fragment it belongs
@@ -240,3 +256,4 @@ class EmailReplyParser
240
256
  end
241
257
  end
242
258
  end
259
+
@@ -101,6 +101,47 @@ I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
101
101
  assert_equal "The Quick Brown Fox Jumps Over The Lazy Dog", original
102
102
  end
103
103
 
104
+ def test_returns_only_the_visible_fragments_as_a_string
105
+ reply = email(:email_2_1)
106
+ assert_equal reply.fragments.select{|r| !r.hidden?}.map{|r| r.to_s}.join("\n").rstrip, reply.visible_text
107
+ end
108
+
109
+ def test_parse_out_just_top_for_outlook_reply
110
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_2_1.txt").to_s
111
+ assert_equal "Outlook with a reply", EmailReplyParser.parse_reply(body)
112
+ end
113
+
114
+ def test_parse_out_sent_from_iPhone
115
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_iPhone.txt").to_s
116
+ assert_equal "Here is another email", EmailReplyParser.parse_reply(body)
117
+ end
118
+
119
+ def test_parse_out_sent_from_BlackBerry
120
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_BlackBerry.txt").to_s
121
+ assert_equal "Here is another email", EmailReplyParser.parse_reply(body)
122
+ end
123
+
124
+ def test_parse_out_send_from_multiword_mobile_device
125
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_multi_word_sent_from_my_mobile_device.txt").to_s
126
+ assert_equal "Here is another email", EmailReplyParser.parse_reply(body)
127
+ end
128
+
129
+ def test_do_not_parse_out_send_from_in_regular_sentence
130
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_sent_from_my_not_signature.txt").to_s
131
+ assert_equal "Here is another email\n\nSent from my desk, is much easier then my mobile phone.", EmailReplyParser.parse_reply(body)
132
+ end
133
+
134
+ def test_retains_bullets
135
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_bullets.txt").to_s
136
+ assert_equal "test 2 this should list second\n\nand have spaces\n\nand retain this formatting\n\n\n - how about bullets\n - and another",
137
+ EmailReplyParser.parse_reply(body)
138
+ end
139
+
140
+ def test_parse_reply
141
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_1_2.txt").to_s
142
+ assert_equal EmailReplyParser.read(body).visible_text, EmailReplyParser.parse_reply(body)
143
+ end
144
+
104
145
  def email(name)
105
146
  body = IO.read EMAIL_FIXTURE_PATH.join("#{name}.txt").to_s
106
147
  EmailReplyParser.read body
@@ -0,0 +1,25 @@
1
+ Outlook with a reply
2
+
3
+
4
+ ------------------------------
5
+
6
+ *From:* Google Apps Sync Team [mailto:mail-noreply@google.com]
7
+ *Sent:* Thursday, February 09, 2012 1:36 PM
8
+ *To:* jow@xxxx.com
9
+ *Subject:* Google Apps Sync was updated!
10
+
11
+
12
+
13
+ Dear Google Apps Sync user,
14
+
15
+ Google Apps Sync for Microsoft Outlook® was recently updated. Your computer
16
+ now has the latest version (version 2.5). This release includes bug fixes
17
+ to improve product reliability. For more information about these and other
18
+ changes, please see the help article here:
19
+
20
+ http://www.google.com/support/a/bin/answer.py?answer=153463
21
+
22
+ Sincerely,
23
+
24
+ The Google Apps Sync Team.
25
+
@@ -0,0 +1,3 @@
1
+ Here is another email
2
+
3
+ Sent from my BlackBerry
@@ -0,0 +1,22 @@
1
+ test 2 this should list second
2
+
3
+ and have spaces
4
+
5
+ and retain this formatting
6
+
7
+
8
+ - how about bullets
9
+ - and another
10
+
11
+
12
+ On Fri, Feb 24, 2012 at 10:19 AM, <examples@email.goalengine.com> wrote:
13
+
14
+ > Give us an example of how you applied what they learned to achieve
15
+ > something in your organization
16
+
17
+
18
+
19
+
20
+ --
21
+
22
+ *Joe Smith | Director, Product Management*
@@ -0,0 +1,3 @@
1
+ Here is another email
2
+
3
+ Sent from my iPhone
@@ -0,0 +1,3 @@
1
+ Here is another email
2
+
3
+ Sent from my Verizon Wireless BlackBerry
@@ -0,0 +1,3 @@
1
+ Here is another email
2
+
3
+ Sent from my desk, is much easier then my mobile phone.
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: email_reply_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -32,6 +32,12 @@ files:
32
32
  - test/emails/email_1_4.txt
33
33
  - test/emails/email_1_5.txt
34
34
  - test/emails/email_1_6.txt
35
+ - test/emails/email_2_1.txt
36
+ - test/emails/email_BlackBerry.txt
37
+ - test/emails/email_bullets.txt
38
+ - test/emails/email_iPhone.txt
39
+ - test/emails/email_multi_word_sent_from_my_mobile_device.txt
40
+ - test/emails/email_sent_from_my_not_signature.txt
35
41
  homepage: http://github.com/github/email_reply_parser
36
42
  licenses: []
37
43
  post_install_message: