lp_email_reply_parser 0.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. data/LICENSE +22 -0
  2. data/README.md +68 -0
  3. data/Rakefile +135 -0
  4. data/lib/email_reply_parser.rb +448 -0
  5. data/lp_email_reply_parser.gemspec +77 -0
  6. data/test/email_reply_parser_test.rb +431 -0
  7. data/test/emails/correct_sig.txt +4 -0
  8. data/test/emails/email_1_1.txt +13 -0
  9. data/test/emails/email_1_2.txt +51 -0
  10. data/test/emails/email_1_3.txt +55 -0
  11. data/test/emails/email_1_4.txt +5 -0
  12. data/test/emails/email_1_5.txt +15 -0
  13. data/test/emails/email_1_6.txt +15 -0
  14. data/test/emails/email_1_7.txt +12 -0
  15. data/test/emails/email_1_8.txt +6 -0
  16. data/test/emails/email_1_9.txt +9 -0
  17. data/test/emails/email_2_1.txt +25 -0
  18. data/test/emails/email_2_2.txt +10 -0
  19. data/test/emails/email_2_3.txt +14 -0
  20. data/test/emails/email_2_4.txt +14 -0
  21. data/test/emails/email_2_5.txt +15 -0
  22. data/test/emails/email_2_6.txt +11 -0
  23. data/test/emails/email_2_7.txt +5 -0
  24. data/test/emails/email_2_8.txt +4 -0
  25. data/test/emails/email_2_9.txt +9 -0
  26. data/test/emails/email_2nd_paragraph_starting_with_on.txt +12 -0
  27. data/test/emails/email_BlackBerry.txt +3 -0
  28. data/test/emails/email_bullets.txt +22 -0
  29. data/test/emails/email_from_address_in_quote_header.txt +12 -0
  30. data/test/emails/email_from_name_in_quote_header.txt +12 -0
  31. data/test/emails/email_hyphens.txt +5 -0
  32. data/test/emails/email_iPhone.txt +3 -0
  33. data/test/emails/email_mentions_own_email_address.txt +6 -0
  34. data/test/emails/email_mentions_own_name.txt +6 -0
  35. data/test/emails/email_multi_word_sent_from_my_mobile_device.txt +3 -0
  36. data/test/emails/email_multiline_quote_header_es_mx.txt +8 -0
  37. data/test/emails/email_multiline_quote_header_fr.txt +8 -0
  38. data/test/emails/email_multiline_quote_header_from_first.txt +11 -0
  39. data/test/emails/email_multiline_quote_header_from_replyto_date_to_subject.txt +12 -0
  40. data/test/emails/email_multiline_quote_header_from_to_date_subject.txt +11 -0
  41. data/test/emails/email_multiline_quote_header_none.txt +11 -0
  42. data/test/emails/email_multiline_quote_header_pt_br.txt +8 -0
  43. data/test/emails/email_multiline_quote_header_with_asterisks.txt +21 -0
  44. data/test/emails/email_multiline_quote_header_with_cc.txt +9 -0
  45. data/test/emails/email_multiline_quote_header_with_multiline_headers.txt +14 -0
  46. data/test/emails/email_no_signature_deliminator.txt +7 -0
  47. data/test/emails/email_no_signature_deliminator_adds_a_middle_initial.txt +7 -0
  48. data/test/emails/email_one_is_not_on.txt +10 -0
  49. data/test/emails/email_sent_from_my_not_signature.txt +3 -0
  50. data/test/emails/email_was_showing_as_nothing_visible.txt +13 -0
  51. metadata +143 -0
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ The MIT License
2
+
3
+ Copyright (c) GitHub
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
22
+
data/README.md ADDED
@@ -0,0 +1,68 @@
1
+ # Email Reply Parser
2
+
3
+ [![Build Status](https://secure.travis-ci.org/lawrencepit/email_reply_parser.png?branch=master)](http://travis-ci.org/lawrencepit/email_reply_parser)
4
+ [![Code Climate](https://codeclimate.com/badge.png)](https://codeclimate.com/github/lawrencepit/email_reply_parser)
5
+ [![Gem Version](https://fury-badge.herokuapp.com/rb/email_reply_parser.png)](http://badge.fury.io/rb/email_reply_parser)
6
+
7
+ EmailReplyParser is a small library to parse plain text email content.
8
+
9
+ This is what GitHub uses to display comments that were created from
10
+ email replies. This code is being open sourced in an effort to
11
+ crowdsource the quality of our email representation.
12
+
13
+ ## Usage
14
+
15
+ To parse reply body:
16
+
17
+ `parsed_body = EmailReplyParser.parse_reply(email_body, from_address)`
18
+
19
+ Argument `from_address` is optional. If included it will attempt to parse out signatures based on the name in the from address (if signature doesn't have a standard deliminator.)
20
+
21
+ ## Installation
22
+
23
+ Get it from [GitHub][github] or `gem install email_reply_parser`. Run `rake` to run the tests.
24
+
25
+ [github]: https://github.com/github/email_reply_parser
26
+
27
+ ## Contribute
28
+
29
+ If you'd like to hack on EmailReplyParser, start by forking the repo on GitHub:
30
+
31
+ https://github.com/github/email_reply_parser
32
+
33
+ The best way to get your changes merged back into core is as follows:
34
+
35
+ * Clone down your fork
36
+ * Create a thoughtfully named topic branch to contain your change
37
+ * Hack away
38
+ * Add tests and make sure everything still passes by running rake
39
+ * If you are adding new functionality, document it in the README
40
+ * Do not change the version number, I will do that on my end
41
+ * If necessary, rebase your commits into logical chunks, without errors
42
+ * Push the branch up to GitHub
43
+ * Send a pull request to the `github/email_reply_parser` project.
44
+
45
+ ## Known Issues
46
+
47
+ ### Quoted Headers
48
+
49
+ Quoted headers like these currently don't work with other languages:
50
+
51
+ On <date>, <author> wrote:
52
+
53
+ > blah
54
+
55
+ ### Weird Signatures
56
+
57
+ Not everyone follows this convention:
58
+
59
+ Hello
60
+
61
+ Saludos!!!!!!!!!!!!!!
62
+ Galactic President Superstar Mc Awesomeville
63
+ GitHub
64
+
65
+ **********************DISCLAIMER***********************************
66
+ * Note: blah blah blah *
67
+ **********************DISCLAIMER***********************************
68
+
data/Rakefile ADDED
@@ -0,0 +1,135 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'date'
4
+
5
+ #############################################################################
6
+ #
7
+ # Helper functions
8
+ #
9
+ #############################################################################
10
+
11
+ def name
12
+ @name ||= Dir['*.gemspec'].first.split('.').first
13
+ end
14
+
15
+ def version
16
+ line = File.read("lib/#{name}.rb")[/^\s*VERSION\s*=\s*.*/]
17
+ line.match(/.*VERSION\s*=\s*['"](.*)['"]/)[1]
18
+ end
19
+
20
+ def date
21
+ Date.today.to_s
22
+ end
23
+
24
+ def rubyforge_project
25
+ name
26
+ end
27
+
28
+ def gemspec_file
29
+ "#{name}.gemspec"
30
+ end
31
+
32
+ def gem_file
33
+ "#{name}-#{version}.gem"
34
+ end
35
+
36
+ def replace_header(head, header_name)
37
+ head.sub!(/(\.#{header_name}\s*= ').*'/) { "#{$1}#{send(header_name)}'"}
38
+ end
39
+
40
+ #############################################################################
41
+ #
42
+ # Standard tasks
43
+ #
44
+ #############################################################################
45
+
46
+ task :default => :test
47
+
48
+ require 'rake/testtask'
49
+ Rake::TestTask.new(:test) do |test|
50
+ test.libs << 'lib' << 'test'
51
+ test.pattern = 'test/*_test.rb'
52
+ test.verbose = true
53
+ end
54
+
55
+ desc "Open an irb session preloaded with this library"
56
+ task :console do
57
+ sh "irb -rubygems -r ./lib/#{name}.rb"
58
+ end
59
+
60
+ #############################################################################
61
+ #
62
+ # Custom tasks (add your own tasks here)
63
+ #
64
+ #############################################################################
65
+
66
+
67
+
68
+ #############################################################################
69
+ #
70
+ # Packaging tasks
71
+ #
72
+ #############################################################################
73
+
74
+ desc "Create tag v#{version} and build and push #{gem_file} to Rubygems"
75
+ task :release => :build do
76
+ unless `git branch` =~ /^\* master$/
77
+ puts "You must be on the master branch to release!"
78
+ exit!
79
+ end
80
+ sh "git commit --allow-empty -a -m 'Release #{version}'"
81
+ sh "git tag v#{version}"
82
+ sh "git push origin master"
83
+ sh "git push origin v#{version}"
84
+ sh "gem push pkg/#{name}-#{version}.gem"
85
+ end
86
+
87
+ desc "Build #{gem_file} into the pkg directory"
88
+ task :build => :gemspec do
89
+ sh "mkdir -p pkg"
90
+ sh "gem build #{gemspec_file}"
91
+ sh "mv #{gem_file} pkg"
92
+ end
93
+
94
+ desc "Generate #{gemspec_file}"
95
+ task :gemspec => :validate do
96
+ # read spec file and split out manifest section
97
+ spec = File.read(gemspec_file)
98
+ head, manifest, tail = spec.split(" # = MANIFEST =\n")
99
+
100
+ # replace name version and date
101
+ replace_header(head, :name)
102
+ replace_header(head, :version)
103
+ replace_header(head, :date)
104
+ #comment this out if your rubyforge_project has a different name
105
+ replace_header(head, :rubyforge_project)
106
+
107
+ # determine file list from git ls-files
108
+ files = `git ls-files`.
109
+ split("\n").
110
+ sort.
111
+ reject { |file| file =~ /^\./ }.
112
+ reject { |file| file =~ /^(rdoc|pkg)/ }.
113
+ map { |file| " #{file}" }.
114
+ join("\n")
115
+
116
+ # piece file back together and write
117
+ manifest = " s.files = %w[\n#{files}\n ]\n"
118
+ spec = [head, manifest, tail].join(" # = MANIFEST =\n")
119
+ File.open(gemspec_file, 'w') { |io| io.write(spec) }
120
+ puts "Updated #{gemspec_file}"
121
+ end
122
+
123
+ desc "Validate #{gemspec_file}"
124
+ task :validate do
125
+ libfiles = Dir['lib/*'] - ["lib/#{name}.rb", "lib/#{name}"]
126
+ unless libfiles.empty?
127
+ puts "Directory `lib` should only contain a `#{name}.rb` file and `#{name}` dir."
128
+ exit!
129
+ end
130
+ unless Dir['VERSION*'].empty?
131
+ puts "A `VERSION` file at root level violates Gem best practices."
132
+ exit!
133
+ end
134
+ end
135
+
@@ -0,0 +1,448 @@
1
+ require 'strscan'
2
+
3
+ # EmailReplyParser is a small library to parse plain text email content. The
4
+ # goal is to identify which fragments are quoted, part of a signature, or
5
+ # original body content. We want to support both top and bottom posters, so
6
+ # no simple "REPLY ABOVE HERE" content is used.
7
+ #
8
+ # Beyond RFC 5322 (which is handled by the [Ruby mail gem][mail]), there aren't
9
+ # any real standards for how emails are created. This attempts to parse out
10
+ # common conventions for things like replies:
11
+ #
12
+ # this is some text
13
+ #
14
+ # On <date>, <author> wrote:
15
+ # > blah blah
16
+ # > blah blah
17
+ #
18
+ # ... and signatures:
19
+ #
20
+ # this is some text
21
+ #
22
+ # --
23
+ # Bob
24
+ # http://homepage.com/~bob
25
+ #
26
+ # Each of these are parsed into Fragment objects.
27
+ #
28
+ # EmailReplyParser also attempts to figure out which of these blocks should
29
+ # be hidden from users.
30
+ #
31
+ # [mail]: https://github.com/mikel/mail
32
+ class EmailReplyParser
33
+ VERSION = "0.6"
34
+
35
+ # Public: Splits an email body into a list of Fragments.
36
+ #
37
+ # text - A String email body.
38
+ # from_address - from address of the email (optional)
39
+ #
40
+ # Returns an Email instance.
41
+ def self.read(text, from_address = "")
42
+ Email.new.read(text, from_address)
43
+ end
44
+
45
+ # Public: Get the text of the visible portions of the given email body.
46
+ #
47
+ # text - A String email body.
48
+ # from_address - from address of the email (optional)
49
+ #
50
+ # Returns a String.
51
+ def self.parse_reply(text, from_address = "")
52
+ self.read(text.to_s, from_address).visible_text
53
+ end
54
+
55
+ ### Emails
56
+
57
+ # An Email instance represents a parsed body String.
58
+ class Email
59
+ # Emails have an Array of Fragments.
60
+ attr_reader :fragments
61
+
62
+ # Public: Gets the combined text of the visible fragments of the email body.
63
+ #
64
+ # Returns a String.
65
+ def visible_text
66
+ fragments.select{|f| !f.hidden?}.map{|f| f.to_s}.join("\n").rstrip
67
+ end
68
+
69
+ # Splits the given text into a list of Fragments. This is roughly done by
70
+ # reversing the text and parsing from the bottom to the top. This way we
71
+ # can check for 'On <date>, <author> wrote:' lines above quoted blocks.
72
+ #
73
+ # text - A String email body.
74
+ # from_address - from address of the email (optional)
75
+ #
76
+ # Returns this same Email instance.
77
+ def read(text, from_address = "")
78
+ @fragments = []
79
+
80
+ # parse out the from name if one exists and save for use later
81
+ @from_name_raw = parse_raw_name_from_address(from_address)
82
+ @from_name_normalized = normalize_name(@from_name_raw)
83
+ @from_email = parse_email_from_address(from_address)
84
+
85
+ text = normalize_text(text)
86
+
87
+ # The text is reversed initially due to the way we check for hidden
88
+ # fragments.
89
+ text = text.reverse
90
+
91
+ # This determines if any 'visible' Fragment has been found. Once any
92
+ # visible Fragment is found, stop looking for hidden ones.
93
+ @found_visible = false
94
+
95
+ # This instance variable points to the current Fragment. If the matched
96
+ # line fits, it should be added to this Fragment. Otherwise, finish it
97
+ # and start a new Fragment.
98
+ @fragment = nil
99
+
100
+ # Use the StringScanner to pull out each line of the email content.
101
+ @scanner = StringScanner.new(text)
102
+ while line = @scanner.scan_until(/\n/n)
103
+ scan_line(line)
104
+ end
105
+
106
+ # Be sure to parse the last line of the email.
107
+ if (last_line = @scanner.rest.to_s).size > 0
108
+ scan_line(last_line, true)
109
+ end
110
+
111
+ # Finish up the final fragment. Finishing a fragment will detect any
112
+ # attributes (hidden, signature, reply), and join each line into a
113
+ # string.
114
+ finish_fragment
115
+
116
+ @scanner = @fragment = nil
117
+
118
+ self
119
+ end
120
+
121
+ private
122
+ EMPTY = "".freeze
123
+
124
+ COMMON_REPLY_HEADER_REGEXES = [
125
+ /^On(.+)wrote:$/nm,
126
+ /\A\d{4}\/\d{1,2}\/\d{1,2}\s+.{1,80}\s<[^@]+@[^@]+>\Z/,
127
+ ]
128
+
129
+ # Line optionally starts with whitespace, contains two or more hyphens or
130
+ # underscores, and ends with optional whitespace.
131
+ # Example: '---' or '___' or '--- '
132
+ MULTI_LINE_SIGNATURE_REGEX = /^\s*[-_]{2,}\s*$/
133
+
134
+ # Line optionally starts with whitespace, followed by one hyphen, followed by a word character
135
+ # Example: '-Sandro'
136
+ ONE_LINE_SIGNATURE_REGEX = /^\s*-\w/
137
+
138
+ ORIGINAL_MESSAGE_SIGNATURE_REGEX = /^[\s_-]+(Original Message)?[\s_-]+$/
139
+
140
+ # No block-quotes (> or <), followed by up to three words, followed by "Sent from my".
141
+ # Example: "Sent from my iPhone 3G"
142
+ SENT_FROM_REGEX = /^Sent from my (\s*\w+){1,3}(\s*<.*>)?$/
143
+
144
+ SIGNATURE_REGEX = Regexp.new(Regexp.union(MULTI_LINE_SIGNATURE_REGEX, ONE_LINE_SIGNATURE_REGEX, ORIGINAL_MESSAGE_SIGNATURE_REGEX, SENT_FROM_REGEX).source, Regexp::NOENCODING)
145
+
146
+ # TODO: refactor out in a i18n.yml file
147
+ # Supports English, French, Es-Mexican, Pt-Brazilian
148
+ # Maps a label to a label-group
149
+ QUOTE_HEADER_LABELS = Hash[*{
150
+ :from => ["From", "De"],
151
+ :to => ["To", "Para", "A"],
152
+ :cc => ["CC"],
153
+ :reply_to => ["Reply-To"],
154
+ :date => ["Date", "Sent", "Enviado", "Enviada em", "Fecha"],
155
+ :subject => ["Subject", "Assunto", "Asunto", "Objet"]
156
+ }.map {|group, labels| labels.map {|label| [label.downcase, group]}}.flatten]
157
+
158
+ # normalize text so it is easier to parse
159
+ #
160
+ # text - text to normalize
161
+ #
162
+ # Returns a String
163
+ def normalize_text(text)
164
+ # in 1.9 we want to operate on the raw bytes
165
+ text = text.dup.force_encoding('binary') if text.respond_to?(:force_encoding)
166
+
167
+ # Normalize line endings.
168
+ text.gsub!("\r\n", "\n")
169
+
170
+ # Check for multi-line reply headers. Some clients break up
171
+ # the "On DATE, NAME <EMAIL> wrote:" line into multiple lines.
172
+ if match = text.match(/^(On\s(.+)wrote:)$/m)
173
+ # Remove all new lines from the reply header. as long as we don't have any double newline
174
+ # if we do they we have grabbed something that is not actually a reply header
175
+ text.gsub! match[1], match[1].gsub("\n", " ") unless match[1] =~ /\n\n/
176
+ end
177
+
178
+ # Some users may reply directly above a line of underscores.
179
+ # In order to ensure that these fragments are split correctly,
180
+ # make sure that all lines of underscores are preceded by
181
+ # at least two newline characters.
182
+ text.gsub!(/([^\n])(?=\n_{7}_+)$/m, "\\1\n")
183
+
184
+ text
185
+ end
186
+
187
+ # Parse a person's name from an e-mail address
188
+ #
189
+ # email - email address.
190
+ #
191
+ # Returns a String.
192
+ def parse_name_from_address(address)
193
+ normalize_name(parse_raw_name_from_address(address))
194
+ end
195
+
196
+ def parse_raw_name_from_address(address)
197
+ match = address.match(/^["']*([\w\s,]+)["']*\s*</)
198
+ match ? match[1].strip.to_s : EMPTY
199
+ end
200
+
201
+ def parse_email_from_address(address)
202
+ match = address.match /<(.*)>/
203
+ match ? match[1] : address
204
+ end
205
+
206
+ # Normalize a name to First Last
207
+ #
208
+ # name - name to normailze.
209
+ #
210
+ # Returns a String.
211
+ def normalize_name(name)
212
+ if name.include?(',')
213
+ make_name_first_then_last(name)
214
+ else
215
+ name
216
+ end
217
+ end
218
+
219
+ def make_name_first_then_last(name)
220
+ split_name = name.split(',')
221
+ if split_name[0].include?(" ")
222
+ split_name[0].to_s
223
+ else
224
+ split_name[1].strip + " " + split_name[0].strip
225
+ end
226
+ end
227
+
228
+ ### Line-by-Line Parsing
229
+
230
+ # Scans the given line of text and determines which fragment it belongs to.
231
+ def scan_line(line, last = false)
232
+ line.chomp!("\n")
233
+ line.reverse!
234
+ line.rstrip!
235
+
236
+ # Mark the current Fragment as a signature if the current line is empty
237
+ # and the Fragment starts with a common signature indicator.
238
+ # Mark the current Fragment as a quote if the current line is empty
239
+ # and the Fragment starts with a multiline quote header.
240
+ scan_signature_or_quote if @fragment && line == EMPTY
241
+
242
+ # We're looking for leading `>`'s to see if this line is part of a
243
+ # quoted Fragment.
244
+ is_quoted = !!(line =~ /^>+/n)
245
+
246
+ # Note that a common reply header also counts as part of the quoted
247
+ # Fragment, even though it doesn't start with `>`.
248
+ unless @fragment &&
249
+ ((@fragment.quoted? == is_quoted) ||
250
+ (@fragment.quoted? && (line_is_reply_header?(line) || line == EMPTY)))
251
+ finish_fragment
252
+ @fragment = Fragment.new
253
+ @fragment.quoted = is_quoted
254
+ end
255
+
256
+ @fragment.add_line(line)
257
+ scan_signature_or_quote if last
258
+ end
259
+
260
+ def scan_signature_or_quote
261
+ if signature_line?(@fragment.lines.first)
262
+ @fragment.signature = true
263
+ finish_fragment
264
+ elsif multiline_quote_header_in_fragment?
265
+ @fragment.quoted = true
266
+ finish_fragment
267
+ end
268
+ end
269
+
270
+ # Returns +true+ if the current block in the current fragment has
271
+ # a multiline quote header, +false+ otherwise.
272
+ #
273
+ # The quote header we're looking for is mainly generated by Outlook
274
+ # clients. It's considered a quote header if the first 4 folded lines
275
+ # have one of the following forms:
276
+ #
277
+ # label: some text
278
+ # *label:* some text
279
+ #
280
+ # where a line like this:
281
+ #
282
+ # label: some text
283
+ # possibly indented text that belongs to the previous line
284
+ #
285
+ # is folded into:
286
+ #
287
+ # label: some text possibly indented text that belongs to the previous line
288
+ #
289
+ # and where label is a value from +QUOTE_HEADER_LABELS+ that appears
290
+ # only once in the first 4 lines and where each group of a label
291
+ # is represented at most once.
292
+ def multiline_quote_header_in_fragment?
293
+ folding = false
294
+ label_groups = []
295
+ @fragment.current_block.split("\n").each do |line|
296
+ if line =~ /\A\s*\*?([^:]+):(\s|\*)/
297
+ label = QUOTE_HEADER_LABELS[$1.downcase]
298
+ if label
299
+ return false if label_groups.include?(label)
300
+ return true if label_groups.length == 3
301
+ label_groups << label
302
+ folding = true
303
+ elsif !folding
304
+ return false
305
+ end
306
+ elsif !folding
307
+ return false
308
+ else
309
+ folding = true
310
+ end
311
+ end
312
+ return false
313
+ end
314
+
315
+ # Detects if a given line is the beginning of a signature
316
+ #
317
+ # line - A String line of text from the email.
318
+ #
319
+ # Returns true if the line is the beginning of a signature, or false.
320
+ def signature_line?(line)
321
+ line =~ SIGNATURE_REGEX || line_is_signature_name?(line)
322
+ end
323
+
324
+ # Detects if a given line is a common reply header.
325
+ #
326
+ # line - A String line of text from the email.
327
+ #
328
+ # Returns true if the line is a valid header, or false.
329
+ def line_is_reply_header?(line)
330
+ COMMON_REPLY_HEADER_REGEXES.each do |regex|
331
+ return true if line =~ regex
332
+ end
333
+ false
334
+ end
335
+
336
+ # Detects if the @from name is a big part of a given line and therefore the beginning of a signature
337
+ #
338
+ # line - A String line of text from the email.
339
+ #
340
+ # Returns true if @from_name is a big part of the line, or false.
341
+ def line_is_signature_name?(line)
342
+ regexp = generate_regexp_for_name()
343
+ @from_name_normalized != "" && (line =~ regexp) && ((@from_name_normalized.size.to_f / line.size) > 0.25)
344
+ end
345
+
346
+ #generates regexp which always for additional words or initials between first and last names
347
+ def generate_regexp_for_name
348
+ name_parts = @from_name_normalized.split(" ")
349
+ seperator = '[\w.\s]*'
350
+ regexp = Regexp.new(name_parts.join(seperator), Regexp::IGNORECASE)
351
+ end
352
+
353
+ # Builds the fragment string, after all lines have been added.
354
+ # It also checks to see if this Fragment is hidden. The hidden
355
+ # Fragment check reads from the bottom to the top.
356
+ #
357
+ # Any quoted Fragments or signature Fragments are marked hidden if they
358
+ # are below any visible Fragments. Visible Fragments are expected to
359
+ # contain original content by the author. If they are below a quoted
360
+ # Fragment, then the Fragment should be visible to give context to the
361
+ # reply.
362
+ #
363
+ # some original text (visible)
364
+ #
365
+ # > do you have any two's? (quoted, visible)
366
+ #
367
+ # Go fish! (visible)
368
+ #
369
+ # > --
370
+ # > Player 1 (quoted, hidden)
371
+ #
372
+ # --
373
+ # Player 2 (signature, hidden)
374
+ #
375
+ def finish_fragment
376
+ if @fragment
377
+ @fragment.finish
378
+ if !@found_visible
379
+ if @fragment.quoted? || @fragment.signature? ||
380
+ @fragment.reply_header? || @fragment.to_s.strip == EMPTY
381
+ @fragment.hidden = true
382
+ else
383
+ @found_visible = true
384
+ end
385
+ end
386
+ @fragments.insert(0, @fragment)
387
+ end
388
+ @fragment = nil
389
+ end
390
+ end
391
+
392
+ # Represents a group of paragraphs in the email sharing common attributes.
393
+ # Paragraphs should get their own fragment if they are a quoted area or a
394
+ # signature.
395
+ class Fragment < Struct.new(:quoted, :signature, :reply_header, :hidden)
396
+ # Array of string lines that make up the content of this fragment.
397
+ attr_reader :lines
398
+
399
+ # Array of string lines that is being processed not having
400
+ # an empty line.
401
+ attr_reader :current_block
402
+
403
+ # This is reserved for the joined String that is build when this Fragment
404
+ # is finished.
405
+ attr_reader :content
406
+
407
+ def initialize
408
+ self.quoted = self.signature = self.reply_header = self.hidden = false
409
+ @lines = []
410
+ @current_block = []
411
+ @content = nil
412
+ end
413
+
414
+ alias quoted? quoted
415
+ alias signature? signature
416
+ alias reply_header? reply_header
417
+ alias hidden? hidden
418
+
419
+ def add_line(line)
420
+ return unless line
421
+ @lines.insert(0, line)
422
+ if line == ""
423
+ @current_block.clear
424
+ else
425
+ @current_block.insert(0, line)
426
+ end
427
+ end
428
+
429
+ def current_block
430
+ @current_block.join("\n")
431
+ end
432
+
433
+ # Builds the string content by joining the lines and reversing them.
434
+ def finish
435
+ @content = @lines.join("\n")
436
+ @lines = @current_block = nil
437
+ end
438
+
439
+ def to_s
440
+ @lines ? @lines.join("\n") : @content
441
+ end
442
+
443
+ def inspect
444
+ "#{super.inspect} : #{to_s.inspect}"
445
+ end
446
+ end
447
+ end
448
+