email_reply_parser-discourse 0.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +22 -0
  3. data/README.md +68 -0
  4. data/Rakefile +135 -0
  5. data/email_reply_parser.gemspec +122 -0
  6. data/lib/email_reply_parser.rb +456 -0
  7. data/test/email_reply_parser_test.rb +431 -0
  8. data/test/emails/correct_sig.txt +4 -0
  9. data/test/emails/email_1_1.txt +13 -0
  10. data/test/emails/email_1_2.txt +51 -0
  11. data/test/emails/email_1_3.txt +55 -0
  12. data/test/emails/email_1_4.txt +5 -0
  13. data/test/emails/email_1_5.txt +15 -0
  14. data/test/emails/email_1_6.txt +15 -0
  15. data/test/emails/email_1_7.txt +12 -0
  16. data/test/emails/email_1_8.txt +6 -0
  17. data/test/emails/email_1_9.txt +9 -0
  18. data/test/emails/email_2_1.txt +25 -0
  19. data/test/emails/email_2_2.txt +10 -0
  20. data/test/emails/email_2_3.txt +14 -0
  21. data/test/emails/email_2_4.txt +14 -0
  22. data/test/emails/email_2_5.txt +15 -0
  23. data/test/emails/email_2_6.txt +11 -0
  24. data/test/emails/email_2_7.txt +5 -0
  25. data/test/emails/email_2_8.txt +4 -0
  26. data/test/emails/email_2_9.txt +9 -0
  27. data/test/emails/email_2nd_paragraph_starting_with_on.txt +12 -0
  28. data/test/emails/email_BlackBerry.txt +3 -0
  29. data/test/emails/email_bullets.txt +22 -0
  30. data/test/emails/email_from_address_in_quote_header.txt +12 -0
  31. data/test/emails/email_from_name_in_quote_header.txt +12 -0
  32. data/test/emails/email_hyphens.txt +5 -0
  33. data/test/emails/email_iPhone.txt +3 -0
  34. data/test/emails/email_mentions_own_email_address.txt +6 -0
  35. data/test/emails/email_mentions_own_name.txt +6 -0
  36. data/test/emails/email_multi_word_sent_from_my_mobile_device.txt +3 -0
  37. data/test/emails/email_multiline_quote_header_es_mx.txt +8 -0
  38. data/test/emails/email_multiline_quote_header_fr.txt +8 -0
  39. data/test/emails/email_multiline_quote_header_from_first.txt +11 -0
  40. data/test/emails/email_multiline_quote_header_from_replyto_date_to_subject.txt +12 -0
  41. data/test/emails/email_multiline_quote_header_from_to_date_subject.txt +11 -0
  42. data/test/emails/email_multiline_quote_header_none.txt +11 -0
  43. data/test/emails/email_multiline_quote_header_pt_br.txt +8 -0
  44. data/test/emails/email_multiline_quote_header_with_asterisks.txt +21 -0
  45. data/test/emails/email_multiline_quote_header_with_cc.txt +9 -0
  46. data/test/emails/email_multiline_quote_header_with_multiline_headers.txt +14 -0
  47. data/test/emails/email_no_signature_deliminator.txt +7 -0
  48. data/test/emails/email_no_signature_deliminator_adds_a_middle_initial.txt +7 -0
  49. data/test/emails/email_one_is_not_on.txt +10 -0
  50. data/test/emails/email_sent_from_my_not_signature.txt +3 -0
  51. data/test/emails/email_was_showing_as_nothing_visible.txt +13 -0
  52. metadata +96 -0
@@ -0,0 +1,431 @@
1
+ # encoding: UTF-8
2
+ require 'rubygems'
3
+ require 'test/unit'
4
+ require 'pathname'
5
+ require 'pp'
6
+
7
+ dir = Pathname.new File.expand_path(File.dirname(__FILE__))
8
+ require dir + '..' + 'lib' + 'email_reply_parser'
9
+
10
+ EMAIL_FIXTURE_PATH = dir + 'emails'
11
+
12
+ class EmailReplyParserTest < Test::Unit::TestCase
13
+ def test_does_not_modify_input_string
14
+ original = "The Quick Brown Fox Jumps Over The Lazy Dog"
15
+ EmailReplyParser.read original
16
+ assert_equal "The Quick Brown Fox Jumps Over The Lazy Dog", original
17
+ end
18
+
19
+ def test_reads_simple_body
20
+ reply = email(:email_1_1)
21
+ assert_equal 3, reply.fragments.size
22
+
23
+ assert reply.fragments.none? { |f| f.quoted? }
24
+ assert_equal [false, true, true],
25
+ reply.fragments.map { |f| f.signature? }
26
+ assert_equal [false, true, true],
27
+ reply.fragments.map { |f| f.hidden? }
28
+
29
+ assert_equal "Hi folks
30
+
31
+ What is the best way to clear a Riak bucket of all key, values after
32
+ running a test?
33
+ I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
34
+
35
+ assert_equal "-Abhishek Kona\n\n", reply.fragments[1].to_s
36
+ end
37
+
38
+ def test_reads_top_post
39
+ reply = email(:email_1_3)
40
+ assert_equal 5, reply.fragments.size
41
+
42
+ assert_equal [false, false, true, false, false],
43
+ reply.fragments.map { |f| f.quoted? }
44
+ assert_equal [false, true, true, true, true],
45
+ reply.fragments.map { |f| f.hidden? }
46
+ assert_equal [false, true, false, false, true],
47
+ reply.fragments.map { |f| f.signature? }
48
+
49
+ assert_match /^Oh thanks.\n\nHaving/, reply.fragments[0].to_s
50
+ assert_match /^-A/, reply.fragments[1].to_s
51
+ assert_match /^On [^\:]+\:/, reply.fragments[2].to_s
52
+ assert_match /^_/, reply.fragments[4].to_s
53
+ end
54
+
55
+ def test_reads_bottom_post
56
+ reply = email(:email_1_2)
57
+ assert_equal 6, reply.fragments.size
58
+
59
+ assert_equal [false, true, false, true, false, false],
60
+ reply.fragments.map { |f| f.quoted? }
61
+ assert_equal [false, false, false, false, false, true],
62
+ reply.fragments.map { |f| f.signature? }
63
+ assert_equal [false, false, false, true, true, true],
64
+ reply.fragments.map { |f| f.hidden? }
65
+
66
+ assert_equal "Hi,", reply.fragments[0].to_s
67
+ assert_match /^On [^\:]+\:/, reply.fragments[1].to_s
68
+ assert_match /^You can list/, reply.fragments[2].to_s
69
+ assert_match /^> /, reply.fragments[3].to_s
70
+ assert_match /^_/, reply.fragments[5].to_s
71
+ end
72
+
73
+ def test_recognizes_date_string_above_quote
74
+ reply = email :email_1_4
75
+
76
+ assert_match /^Awesome/, reply.fragments[0].to_s
77
+ assert_match /^On/, reply.fragments[1].to_s
78
+ assert_match /Loader/, reply.fragments[1].to_s
79
+ end
80
+
81
+ def test_a_complex_body_with_only_one_fragment
82
+ reply = email :email_1_5
83
+
84
+ assert_equal 1, reply.fragments.size
85
+ end
86
+
87
+ def test_reads_email_with_correct_signature
88
+ reply = email :correct_sig
89
+
90
+ assert_equal 2, reply.fragments.size
91
+ assert_equal [false, false], reply.fragments.map { |f| f.quoted? }
92
+ assert_equal [false, true], reply.fragments.map { |f| f.signature? }
93
+ assert_equal [false, true], reply.fragments.map { |f| f.hidden? }
94
+ assert_match /^--\nrick/, reply.fragments[1].to_s
95
+ end
96
+
97
+ def test_reads_email_containing_hyphens
98
+ reply = email :email_hyphens
99
+ assert_equal 1, reply.fragments.size
100
+ body = reply.fragments[0].to_s
101
+ assert_match /^Keep in mind/, body
102
+ assert_match /their physical exam.$/, body
103
+ end
104
+
105
+ def test_arbitrary_hypens_and_underscores
106
+ assert_one_signature = lambda do |reply|
107
+ assert_equal 2, reply.fragments.size
108
+ assert_equal [false, true], reply.fragments.map { |f| f.hidden? }
109
+ end
110
+
111
+ reply = EmailReplyParser.read "here __and__ now.\n\n---\nSandro"
112
+ assert_one_signature.call reply
113
+
114
+ reply = EmailReplyParser.read "--okay\n\n-Sandro"
115
+ assert_one_signature.call reply
116
+
117
+ reply = EmailReplyParser.read "__okay\n\n-Sandro"
118
+ assert_one_signature.call reply
119
+
120
+ reply = EmailReplyParser.read "--1337\n\n-Sandro"
121
+ assert_one_signature.call reply
122
+
123
+ reply = EmailReplyParser.read "__1337\n\n-Sandro"
124
+ assert_one_signature.call reply
125
+
126
+ reply = EmailReplyParser.read "data -- __ foo\n\n-Sandro"
127
+ assert_one_signature.call reply
128
+ end
129
+
130
+ def test_email_body_is_signature
131
+ reply = EmailReplyParser.parse_reply "-- \nLes Hill\nleshill@gmail.com"
132
+ assert_equal "", reply
133
+
134
+ reply = EmailReplyParser.parse_reply "From: abc\nTo: Les Hill\nDate: 31/01/2013\nSubject: foo"
135
+ assert_equal "", reply
136
+
137
+ reply = EmailReplyParser.parse_reply "On Fri, Feb 24, 2012 at 10:19 AM, <boris@example.com> wrote:\n\n> hello"
138
+ assert_equal "", reply
139
+ end
140
+
141
+ def test_deals_with_multiline_reply_headers
142
+ reply = email :email_1_6
143
+
144
+ assert_match /^I get/, reply.fragments[0].to_s
145
+ assert_match /^On/, reply.fragments[1].to_s
146
+ assert_match /Was this/, reply.fragments[1].to_s
147
+ end
148
+
149
+ def test_deals_with_windows_line_endings
150
+ reply = email :email_1_7
151
+
152
+ assert_match /:\+1:/, reply.fragments[0].to_s
153
+ assert_match /^On/, reply.fragments[1].to_s
154
+ assert_match /Steps 0-2/, reply.fragments[1].to_s
155
+ end
156
+
157
+ def test_parse_out_date_name_email_header
158
+ assert_equal "Hello", visible_text(:email_1_8)
159
+ end
160
+
161
+ def test_signature_prefixed_by_whitespace
162
+ assert_equal "Foo", visible_text(:email_1_9)
163
+ end
164
+
165
+ def test_returns_only_the_visible_fragments_as_a_string
166
+ reply = email(:email_2_1)
167
+ assert_equal reply.fragments.select{|r| !r.hidden?}.map{|r| r.to_s}.join("\n").rstrip, reply.visible_text
168
+ end
169
+
170
+ def test_parse_out_just_top_for_outlook_reply
171
+ assert_equal "Outlook with a reply", visible_text(:email_2_1)
172
+ end
173
+
174
+ def test_parse_out_just_top_for_outlook_with_reply_directly_above_line
175
+ assert_equal "Outlook with a reply directly above line", visible_text(:email_2_2)
176
+ end
177
+
178
+ def test_parse_out_just_top_for_windows_8_mail
179
+ assert_equal "This one is from Windows 8 Mail (preview).", visible_text(:email_2_3)
180
+ end
181
+
182
+ def test_parse_out_just_top_for_outlook_2007
183
+ assert_equal "Here's one from Outlook 2007.", visible_text(:email_2_4)
184
+ end
185
+
186
+ def test_parse_out_just_top_for_more_outlook_2013
187
+ assert_equal "One from Outlook 2013?", visible_text(:email_2_5)
188
+ end
189
+
190
+ def test_parse_out_just_top_for_hotmail_reply
191
+ assert_equal "Reply from the hottest mail.", visible_text(:email_2_6)
192
+ end
193
+
194
+ def test_parse_original_message
195
+ assert_equal "Foo", visible_text(:email_2_7)
196
+ end
197
+
198
+ def test_parse_weird_signature
199
+ assert_equal "Hello", visible_text(:email_2_8)
200
+ end
201
+
202
+ def test_parse_weird_signature_by_name
203
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_2_9.txt").to_s
204
+ expected_body = "Hello"
205
+ assert_equal expected_body, EmailReplyParser.parse_reply(body, "Rick Olson <rick.olson@example.com>")
206
+ end
207
+
208
+ def test_parse_out_sent_from_iPhone
209
+ assert_equal "Here is another email", visible_text(:email_iPhone)
210
+ end
211
+
212
+ def test_parse_out_sent_from_BlackBerry
213
+ assert_equal "Here is another email", visible_text(:email_BlackBerry)
214
+ end
215
+
216
+ def test_parse_out_send_from_multiword_mobile_device
217
+ assert_equal "Here is another email", visible_text(:email_multi_word_sent_from_my_mobile_device)
218
+ end
219
+
220
+ def test_do_not_parse_out_send_from_in_regular_sentence
221
+ expected = "Here is another email\n\nSent from my desk, is much easier then my mobile phone."
222
+ assert_equal expected, visible_text(:email_sent_from_my_not_signature)
223
+ end
224
+
225
+ def test_retains_bullets
226
+ expected = "test 2 this should list second\n\nand have spaces\n\nand retain this formatting\n\n\n - how about bullets\n - and another"
227
+ assert_equal expected, visible_text(:email_bullets)
228
+ end
229
+
230
+ def test_parse_reply
231
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_1_2.txt").to_s
232
+ assert_equal EmailReplyParser.read(body).visible_text, EmailReplyParser.parse_reply(body)
233
+ end
234
+
235
+ def test_parse_out_signature_using_from_name
236
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_no_signature_deliminator.txt").to_s
237
+ expected_body = "I don't like putting any delimiator in my signature because I think that is cool.\n\nReally it is."
238
+ assert_equal expected_body, EmailReplyParser.parse_reply(body, "Jim Smith <john.smith@gmail.com>")
239
+ end
240
+
241
+ def test_parse_out_signature_using_from_name_different_case
242
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_no_signature_deliminator.txt").to_s
243
+ expected_body = "I don't like putting any delimiator in my signature because I think that is cool.\n\nReally it is."
244
+ assert_equal expected_body, EmailReplyParser.parse_reply(body, "jim smith <john.smith@gmail.com>")
245
+ end
246
+
247
+
248
+ def test_parse_out_signature_using_from_name_last_then_first
249
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_no_signature_deliminator.txt").to_s
250
+ expected_body = "I don't like putting any delimiator in my signature because I think that is cool.\n\nReally it is."
251
+ assert_equal expected_body, EmailReplyParser.parse_reply(body, '"Smith, Jim" <john.smith@gmail.com>')
252
+ end
253
+
254
+ def test_parse_out_signature_using_from_name_when_middle_initial_is_in_signature
255
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_no_signature_deliminator_adds_a_middle_initial.txt").to_s
256
+ expected_body = "I don't like putting any delimiator in my signature because I think that is cool.\n\nReally it is."
257
+ assert_equal expected_body, EmailReplyParser.parse_reply(body, "Jim Smith <john.smith@gmail.com>")
258
+ end
259
+
260
+ def test_that_a_sentence_with_my_name_in_it_does_not_become_a_signature
261
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_mentions_own_name.txt").to_s
262
+ expected_body = "Hi,\n\nMy name is Jim Smith and I had a question.\n\nWhat do you do?"
263
+ assert_equal expected_body, EmailReplyParser.parse_reply(body, "Jim Smith <john.smith@gmail.com>")
264
+ end
265
+
266
+ def test_simple_email_with_reply
267
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_was_showing_as_nothing_visible.txt").to_s
268
+ expected_body = "On Friday, one achievement I had was learning a new technology that allows us
269
+ to keep UI elements and events separated from the software on the
270
+ server side, which should allow for more flexible UI code and
271
+ decreased chances of code becoming a swarm of angry hornets. I've
272
+ been transparent about the initial increased development time while
273
+ learning the technology."
274
+
275
+ assert_equal expected_body, EmailReplyParser.parse_reply(body)
276
+ end
277
+
278
+ def test_2nd_paragraph_starts_with_on
279
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_2nd_paragraph_starting_with_on.txt").to_s
280
+ expected_body = "This emails tests that multiline header fix isn't catching things it shouldn't.
281
+
282
+ On friday when I tried it didn't work as expect.
283
+
284
+ This line would have been considered part of the header line."
285
+ assert_equal expected_body, EmailReplyParser.parse_reply(body)
286
+ end
287
+
288
+ def test_from_email_in_quote_header
289
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_from_address_in_quote_header.txt").to_s
290
+ expected_body = "I have gained valuable experience from working with students from other cultures. They bring a significantly different perspective to the work we do. I have also had the opportunity to practice making myself very clear in discussion, so that everyone understands. I've also seen how different our culture is to them, in their reactions to what I think is a normal approach to assignments, and to life in general."
291
+ assert_equal expected_body, EmailReplyParser.parse_reply(body, "shelly@example.com")
292
+ end
293
+
294
+ def test_do_not_make_any_line_with_from_address_quote_heading
295
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_mentions_own_email_address.txt").to_s
296
+ expected_body = "Hi,\n\nMy email is john.smith@gmail.com and I had a question.\n\nWhat do you do?"
297
+ assert_equal expected_body, EmailReplyParser.parse_reply(body, "Jim Smith <john.smith@gmail.com>")
298
+ end
299
+
300
+ def test_from_name_in_quote_header
301
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_from_name_in_quote_header.txt").to_s
302
+ expected_body = "I have gained valuable experience from working with students from other cultures. They bring a significantly different perspective to the work we do. I have also had the opportunity to practice making myself very clear in discussion, so that everyone understands. I've also seen how different our culture is to them, in their reactions to what I think is a normal approach to assignments, and to life in general."
303
+ assert_equal expected_body, EmailReplyParser.parse_reply(body, "Smith, Shelly <shelly@example.com>")
304
+ end
305
+
306
+ def test_multiline_quote_header_from_first
307
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_multiline_quote_header_from_first.txt").to_s
308
+ expected_body = "I have gained valuable experience from working with students from other cultures. They bring a significantly different perspective to the work we do. I have also had the opportunity to practice making myself very clear in discussion, so that everyone understands. I've also seen how different our culture is to them, in their reactions to what I think is a normal approach to assignments, and to life in general."
309
+ assert_equal expected_body, EmailReplyParser.parse_reply(body, "Smith, Shelly <shelly@example.com>")
310
+ end
311
+
312
+ def test_multiline_quote_header_none
313
+ text = visible_text(:email_multiline_quote_header_none)
314
+ assert_match /Foo/, text
315
+ assert_match /THE END!/, text
316
+ end
317
+
318
+ def test_multiline_quote_header_from_to_date_subject
319
+ assert_equal "Foo!", visible_text(:email_multiline_quote_header_from_to_date_subject)
320
+ end
321
+
322
+ def test_multiline_quote_header_from_replyto_date_to_subject
323
+ assert_equal "Foo!", visible_text(:email_multiline_quote_header_from_replyto_date_to_subject)
324
+ end
325
+
326
+ def test_multiline_quote_header_pt_br
327
+ assert_equal "Foo-pt-br", visible_text(:email_multiline_quote_header_pt_br)
328
+ end
329
+
330
+ def test_multiline_quote_header_es_mx
331
+ assert_equal "Foo!", visible_text(:email_multiline_quote_header_es_mx)
332
+ end
333
+
334
+ def test_multiline_quote_header_fr
335
+ assert_equal "Foo!", visible_text(:email_multiline_quote_header_fr)
336
+ end
337
+
338
+ def test_multiline_quote_header_with_cc
339
+ assert_equal "Foo", visible_text(:email_multiline_quote_header_with_cc)
340
+ end
341
+
342
+ def test_multiline_quote_header_with_multiline_headers
343
+ assert_equal "Foo", visible_text(:email_multiline_quote_header_with_multiline_headers)
344
+ end
345
+
346
+ def test_multiline_quote_header_with_asterisks
347
+ assert_equal "Outlook with a reply", visible_text(:email_multiline_quote_header_with_asterisks)
348
+ end
349
+
350
+ def test_parsing_name_from_address
351
+ address = "Bob Jones <bob@gmail.com>"
352
+ email = EmailReplyParser::Email.new
353
+ assert_equal "Bob Jones", email.send(:parse_name_from_address, address)
354
+ end
355
+
356
+ def test_parsing_name_from_address_with_double_quotes
357
+ address = "\"Bob Jones\" <bob@gmail.com>"
358
+ email = EmailReplyParser::Email.new
359
+ assert_equal "Bob Jones", email.send(:parse_name_from_address, address)
360
+ end
361
+
362
+ def test_parsing_name_from_address_with_single_quotes
363
+ address = "'Bob Jones' <bob@gmail.com>"
364
+ email = EmailReplyParser::Email.new
365
+ assert_equal "Bob Jones", email.send(:parse_name_from_address, address)
366
+ end
367
+
368
+ def test_parsing_name_from_address_with_no_name
369
+ address = "bob@gmail.com"
370
+ email = EmailReplyParser::Email.new
371
+ assert_equal "", email.send(:parse_name_from_address, address)
372
+ end
373
+
374
+ def test_parsing_email_from_address_with_name
375
+ address = "\"Bob Jones\" <bob@gmail.com>"
376
+ email = EmailReplyParser::Email.new
377
+ assert_equal "bob@gmail.com", email.send(:parse_email_from_address, address)
378
+ end
379
+
380
+ def test_parsing_email_from_address_without_name
381
+ address = "bob@gmail.com"
382
+ email = EmailReplyParser::Email.new
383
+ assert_equal "bob@gmail.com", email.send(:parse_email_from_address, address)
384
+ end
385
+
386
+ def test_one_is_not_on
387
+ reply = email("email_one_is_not_on")
388
+ assert_match /One outstanding question/, reply.fragments[0].to_s
389
+ assert_match /^On Oct 1, 2012/, reply.fragments[1].to_s
390
+ end
391
+
392
+ def test_normalize_name_first_last
393
+ email = EmailReplyParser::Email.new
394
+ name = "John Smith"
395
+ assert_equal name, email.send(:normalize_name, name)
396
+ end
397
+
398
+ def test_normalize_name_last_first
399
+ email = EmailReplyParser::Email.new
400
+ name = "Smith, John"
401
+ assert_equal "John Smith", email.send(:normalize_name, name)
402
+ end
403
+
404
+ def test_normalize_name_first_last_and_qualification
405
+ email = EmailReplyParser::Email.new
406
+ name = "John Smith, MD"
407
+ assert_equal "John Smith", email.send(:normalize_name, name)
408
+ end
409
+
410
+ def test_parse_nil_body
411
+ body = nil
412
+ assert_equal "", EmailReplyParser.parse_reply(body)
413
+ end
414
+
415
+ def test_parse_empty_body
416
+ body = ""
417
+ assert_equal "", EmailReplyParser.parse_reply(body)
418
+ end
419
+
420
+ private
421
+
422
+ def email(name)
423
+ body = IO.read EMAIL_FIXTURE_PATH.join("#{name}.txt").to_s
424
+ EmailReplyParser.read(body)
425
+ end
426
+
427
+ def visible_text(name)
428
+ email(name).visible_text
429
+ end
430
+
431
+ end
@@ -0,0 +1,4 @@
1
+ this is an email with a correct -- signature.
2
+
3
+ --
4
+ rick
@@ -0,0 +1,13 @@
1
+ Hi folks
2
+
3
+ What is the best way to clear a Riak bucket of all key, values after
4
+ running a test?
5
+ I am currently using the Java HTTP API.
6
+
7
+ -Abhishek Kona
8
+
9
+
10
+ _______________________________________________
11
+ riak-users mailing list
12
+ riak-users@lists.basho.com
13
+ http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com