lp_email_reply_parser 0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. data/LICENSE +22 -0
  2. data/README.md +68 -0
  3. data/Rakefile +135 -0
  4. data/lib/email_reply_parser.rb +448 -0
  5. data/lp_email_reply_parser.gemspec +77 -0
  6. data/test/email_reply_parser_test.rb +431 -0
  7. data/test/emails/correct_sig.txt +4 -0
  8. data/test/emails/email_1_1.txt +13 -0
  9. data/test/emails/email_1_2.txt +51 -0
  10. data/test/emails/email_1_3.txt +55 -0
  11. data/test/emails/email_1_4.txt +5 -0
  12. data/test/emails/email_1_5.txt +15 -0
  13. data/test/emails/email_1_6.txt +15 -0
  14. data/test/emails/email_1_7.txt +12 -0
  15. data/test/emails/email_1_8.txt +6 -0
  16. data/test/emails/email_1_9.txt +9 -0
  17. data/test/emails/email_2_1.txt +25 -0
  18. data/test/emails/email_2_2.txt +10 -0
  19. data/test/emails/email_2_3.txt +14 -0
  20. data/test/emails/email_2_4.txt +14 -0
  21. data/test/emails/email_2_5.txt +15 -0
  22. data/test/emails/email_2_6.txt +11 -0
  23. data/test/emails/email_2_7.txt +5 -0
  24. data/test/emails/email_2_8.txt +4 -0
  25. data/test/emails/email_2_9.txt +9 -0
  26. data/test/emails/email_2nd_paragraph_starting_with_on.txt +12 -0
  27. data/test/emails/email_BlackBerry.txt +3 -0
  28. data/test/emails/email_bullets.txt +22 -0
  29. data/test/emails/email_from_address_in_quote_header.txt +12 -0
  30. data/test/emails/email_from_name_in_quote_header.txt +12 -0
  31. data/test/emails/email_hyphens.txt +5 -0
  32. data/test/emails/email_iPhone.txt +3 -0
  33. data/test/emails/email_mentions_own_email_address.txt +6 -0
  34. data/test/emails/email_mentions_own_name.txt +6 -0
  35. data/test/emails/email_multi_word_sent_from_my_mobile_device.txt +3 -0
  36. data/test/emails/email_multiline_quote_header_es_mx.txt +8 -0
  37. data/test/emails/email_multiline_quote_header_fr.txt +8 -0
  38. data/test/emails/email_multiline_quote_header_from_first.txt +11 -0
  39. data/test/emails/email_multiline_quote_header_from_replyto_date_to_subject.txt +12 -0
  40. data/test/emails/email_multiline_quote_header_from_to_date_subject.txt +11 -0
  41. data/test/emails/email_multiline_quote_header_none.txt +11 -0
  42. data/test/emails/email_multiline_quote_header_pt_br.txt +8 -0
  43. data/test/emails/email_multiline_quote_header_with_asterisks.txt +21 -0
  44. data/test/emails/email_multiline_quote_header_with_cc.txt +9 -0
  45. data/test/emails/email_multiline_quote_header_with_multiline_headers.txt +14 -0
  46. data/test/emails/email_no_signature_deliminator.txt +7 -0
  47. data/test/emails/email_no_signature_deliminator_adds_a_middle_initial.txt +7 -0
  48. data/test/emails/email_one_is_not_on.txt +10 -0
  49. data/test/emails/email_sent_from_my_not_signature.txt +3 -0
  50. data/test/emails/email_was_showing_as_nothing_visible.txt +13 -0
  51. metadata +143 -0
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ The MIT License
2
+
3
+ Copyright (c) GitHub
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
22
+
data/README.md ADDED
@@ -0,0 +1,68 @@
1
+ # Email Reply Parser
2
+
3
+ [![Build Status](https://secure.travis-ci.org/lawrencepit/email_reply_parser.png?branch=master)](http://travis-ci.org/lawrencepit/email_reply_parser)
4
+ [![Code Climate](https://codeclimate.com/badge.png)](https://codeclimate.com/github/lawrencepit/email_reply_parser)
5
+ [![Gem Version](https://fury-badge.herokuapp.com/rb/email_reply_parser.png)](http://badge.fury.io/rb/email_reply_parser)
6
+
7
+ EmailReplyParser is a small library to parse plain text email content.
8
+
9
+ This is what GitHub uses to display comments that were created from
10
+ email replies. This code is being open sourced in an effort to
11
+ crowdsource the quality of our email representation.
12
+
13
+ ## Usage
14
+
15
+ To parse reply body:
16
+
17
+ `parsed_body = EmailReplyParser.parse_reply(email_body, from_address)`
18
+
19
+ Argument `from_address` is optional. If included it will attempt to parse out signatures based on the name in the from address (if signature doesn't have a standard deliminator.)
20
+
21
+ ## Installation
22
+
23
+ Get it from [GitHub][github] or `gem install email_reply_parser`. Run `rake` to run the tests.
24
+
25
+ [github]: https://github.com/github/email_reply_parser
26
+
27
+ ## Contribute
28
+
29
+ If you'd like to hack on EmailReplyParser, start by forking the repo on GitHub:
30
+
31
+ https://github.com/github/email_reply_parser
32
+
33
+ The best way to get your changes merged back into core is as follows:
34
+
35
+ * Clone down your fork
36
+ * Create a thoughtfully named topic branch to contain your change
37
+ * Hack away
38
+ * Add tests and make sure everything still passes by running rake
39
+ * If you are adding new functionality, document it in the README
40
+ * Do not change the version number, I will do that on my end
41
+ * If necessary, rebase your commits into logical chunks, without errors
42
+ * Push the branch up to GitHub
43
+ * Send a pull request to the `github/email_reply_parser` project.
44
+
45
+ ## Known Issues
46
+
47
+ ### Quoted Headers
48
+
49
+ Quoted headers like these currently don't work with other languages:
50
+
51
+ On <date>, <author> wrote:
52
+
53
+ > blah
54
+
55
+ ### Weird Signatures
56
+
57
+ Not everyone follows this convention:
58
+
59
+ Hello
60
+
61
+ Saludos!!!!!!!!!!!!!!
62
+ Galactic President Superstar Mc Awesomeville
63
+ GitHub
64
+
65
+ **********************DISCLAIMER***********************************
66
+ * Note: blah blah blah *
67
+ **********************DISCLAIMER***********************************
68
+
data/Rakefile ADDED
@@ -0,0 +1,135 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'date'
4
+
5
+ #############################################################################
6
+ #
7
+ # Helper functions
8
+ #
9
+ #############################################################################
10
+
11
+ def name
12
+ @name ||= Dir['*.gemspec'].first.split('.').first
13
+ end
14
+
15
+ def version
16
+ line = File.read("lib/#{name}.rb")[/^\s*VERSION\s*=\s*.*/]
17
+ line.match(/.*VERSION\s*=\s*['"](.*)['"]/)[1]
18
+ end
19
+
20
+ def date
21
+ Date.today.to_s
22
+ end
23
+
24
+ def rubyforge_project
25
+ name
26
+ end
27
+
28
+ def gemspec_file
29
+ "#{name}.gemspec"
30
+ end
31
+
32
+ def gem_file
33
+ "#{name}-#{version}.gem"
34
+ end
35
+
36
+ def replace_header(head, header_name)
37
+ head.sub!(/(\.#{header_name}\s*= ').*'/) { "#{$1}#{send(header_name)}'"}
38
+ end
39
+
40
+ #############################################################################
41
+ #
42
+ # Standard tasks
43
+ #
44
+ #############################################################################
45
+
46
+ task :default => :test
47
+
48
+ require 'rake/testtask'
49
+ Rake::TestTask.new(:test) do |test|
50
+ test.libs << 'lib' << 'test'
51
+ test.pattern = 'test/*_test.rb'
52
+ test.verbose = true
53
+ end
54
+
55
+ desc "Open an irb session preloaded with this library"
56
+ task :console do
57
+ sh "irb -rubygems -r ./lib/#{name}.rb"
58
+ end
59
+
60
+ #############################################################################
61
+ #
62
+ # Custom tasks (add your own tasks here)
63
+ #
64
+ #############################################################################
65
+
66
+
67
+
68
+ #############################################################################
69
+ #
70
+ # Packaging tasks
71
+ #
72
+ #############################################################################
73
+
74
+ desc "Create tag v#{version} and build and push #{gem_file} to Rubygems"
75
+ task :release => :build do
76
+ unless `git branch` =~ /^\* master$/
77
+ puts "You must be on the master branch to release!"
78
+ exit!
79
+ end
80
+ sh "git commit --allow-empty -a -m 'Release #{version}'"
81
+ sh "git tag v#{version}"
82
+ sh "git push origin master"
83
+ sh "git push origin v#{version}"
84
+ sh "gem push pkg/#{name}-#{version}.gem"
85
+ end
86
+
87
+ desc "Build #{gem_file} into the pkg directory"
88
+ task :build => :gemspec do
89
+ sh "mkdir -p pkg"
90
+ sh "gem build #{gemspec_file}"
91
+ sh "mv #{gem_file} pkg"
92
+ end
93
+
94
+ desc "Generate #{gemspec_file}"
95
+ task :gemspec => :validate do
96
+ # read spec file and split out manifest section
97
+ spec = File.read(gemspec_file)
98
+ head, manifest, tail = spec.split(" # = MANIFEST =\n")
99
+
100
+ # replace name version and date
101
+ replace_header(head, :name)
102
+ replace_header(head, :version)
103
+ replace_header(head, :date)
104
+ #comment this out if your rubyforge_project has a different name
105
+ replace_header(head, :rubyforge_project)
106
+
107
+ # determine file list from git ls-files
108
+ files = `git ls-files`.
109
+ split("\n").
110
+ sort.
111
+ reject { |file| file =~ /^\./ }.
112
+ reject { |file| file =~ /^(rdoc|pkg)/ }.
113
+ map { |file| " #{file}" }.
114
+ join("\n")
115
+
116
+ # piece file back together and write
117
+ manifest = " s.files = %w[\n#{files}\n ]\n"
118
+ spec = [head, manifest, tail].join(" # = MANIFEST =\n")
119
+ File.open(gemspec_file, 'w') { |io| io.write(spec) }
120
+ puts "Updated #{gemspec_file}"
121
+ end
122
+
123
+ desc "Validate #{gemspec_file}"
124
+ task :validate do
125
+ libfiles = Dir['lib/*'] - ["lib/#{name}.rb", "lib/#{name}"]
126
+ unless libfiles.empty?
127
+ puts "Directory `lib` should only contain a `#{name}.rb` file and `#{name}` dir."
128
+ exit!
129
+ end
130
+ unless Dir['VERSION*'].empty?
131
+ puts "A `VERSION` file at root level violates Gem best practices."
132
+ exit!
133
+ end
134
+ end
135
+
@@ -0,0 +1,448 @@
1
+ require 'strscan'
2
+
3
+ # EmailReplyParser is a small library to parse plain text email content. The
4
+ # goal is to identify which fragments are quoted, part of a signature, or
5
+ # original body content. We want to support both top and bottom posters, so
6
+ # no simple "REPLY ABOVE HERE" content is used.
7
+ #
8
+ # Beyond RFC 5322 (which is handled by the [Ruby mail gem][mail]), there aren't
9
+ # any real standards for how emails are created. This attempts to parse out
10
+ # common conventions for things like replies:
11
+ #
12
+ # this is some text
13
+ #
14
+ # On <date>, <author> wrote:
15
+ # > blah blah
16
+ # > blah blah
17
+ #
18
+ # ... and signatures:
19
+ #
20
+ # this is some text
21
+ #
22
+ # --
23
+ # Bob
24
+ # http://homepage.com/~bob
25
+ #
26
+ # Each of these are parsed into Fragment objects.
27
+ #
28
+ # EmailReplyParser also attempts to figure out which of these blocks should
29
+ # be hidden from users.
30
+ #
31
+ # [mail]: https://github.com/mikel/mail
32
+ class EmailReplyParser
33
+ VERSION = "0.6"
34
+
35
+ # Public: Splits an email body into a list of Fragments.
36
+ #
37
+ # text - A String email body.
38
+ # from_address - from address of the email (optional)
39
+ #
40
+ # Returns an Email instance.
41
+ def self.read(text, from_address = "")
42
+ Email.new.read(text, from_address)
43
+ end
44
+
45
+ # Public: Get the text of the visible portions of the given email body.
46
+ #
47
+ # text - A String email body.
48
+ # from_address - from address of the email (optional)
49
+ #
50
+ # Returns a String.
51
+ def self.parse_reply(text, from_address = "")
52
+ self.read(text.to_s, from_address).visible_text
53
+ end
54
+
55
+ ### Emails
56
+
57
+ # An Email instance represents a parsed body String.
58
+ class Email
59
+ # Emails have an Array of Fragments.
60
+ attr_reader :fragments
61
+
62
+ # Public: Gets the combined text of the visible fragments of the email body.
63
+ #
64
+ # Returns a String.
65
+ def visible_text
66
+ fragments.select{|f| !f.hidden?}.map{|f| f.to_s}.join("\n").rstrip
67
+ end
68
+
69
+ # Splits the given text into a list of Fragments. This is roughly done by
70
+ # reversing the text and parsing from the bottom to the top. This way we
71
+ # can check for 'On <date>, <author> wrote:' lines above quoted blocks.
72
+ #
73
+ # text - A String email body.
74
+ # from_address - from address of the email (optional)
75
+ #
76
+ # Returns this same Email instance.
77
+ def read(text, from_address = "")
78
+ @fragments = []
79
+
80
+ # parse out the from name if one exists and save for use later
81
+ @from_name_raw = parse_raw_name_from_address(from_address)
82
+ @from_name_normalized = normalize_name(@from_name_raw)
83
+ @from_email = parse_email_from_address(from_address)
84
+
85
+ text = normalize_text(text)
86
+
87
+ # The text is reversed initially due to the way we check for hidden
88
+ # fragments.
89
+ text = text.reverse
90
+
91
+ # This determines if any 'visible' Fragment has been found. Once any
92
+ # visible Fragment is found, stop looking for hidden ones.
93
+ @found_visible = false
94
+
95
+ # This instance variable points to the current Fragment. If the matched
96
+ # line fits, it should be added to this Fragment. Otherwise, finish it
97
+ # and start a new Fragment.
98
+ @fragment = nil
99
+
100
+ # Use the StringScanner to pull out each line of the email content.
101
+ @scanner = StringScanner.new(text)
102
+ while line = @scanner.scan_until(/\n/n)
103
+ scan_line(line)
104
+ end
105
+
106
+ # Be sure to parse the last line of the email.
107
+ if (last_line = @scanner.rest.to_s).size > 0
108
+ scan_line(last_line, true)
109
+ end
110
+
111
+ # Finish up the final fragment. Finishing a fragment will detect any
112
+ # attributes (hidden, signature, reply), and join each line into a
113
+ # string.
114
+ finish_fragment
115
+
116
+ @scanner = @fragment = nil
117
+
118
+ self
119
+ end
120
+
121
+ private
122
+ EMPTY = "".freeze
123
+
124
+ COMMON_REPLY_HEADER_REGEXES = [
125
+ /^On(.+)wrote:$/nm,
126
+ /\A\d{4}\/\d{1,2}\/\d{1,2}\s+.{1,80}\s<[^@]+@[^@]+>\Z/,
127
+ ]
128
+
129
+ # Line optionally starts with whitespace, contains two or more hyphens or
130
+ # underscores, and ends with optional whitespace.
131
+ # Example: '---' or '___' or '--- '
132
+ MULTI_LINE_SIGNATURE_REGEX = /^\s*[-_]{2,}\s*$/
133
+
134
+ # Line optionally starts with whitespace, followed by one hyphen, followed by a word character
135
+ # Example: '-Sandro'
136
+ ONE_LINE_SIGNATURE_REGEX = /^\s*-\w/
137
+
138
+ ORIGINAL_MESSAGE_SIGNATURE_REGEX = /^[\s_-]+(Original Message)?[\s_-]+$/
139
+
140
+ # No block-quotes (> or <), followed by up to three words, followed by "Sent from my".
141
+ # Example: "Sent from my iPhone 3G"
142
+ SENT_FROM_REGEX = /^Sent from my (\s*\w+){1,3}(\s*<.*>)?$/
143
+
144
+ SIGNATURE_REGEX = Regexp.new(Regexp.union(MULTI_LINE_SIGNATURE_REGEX, ONE_LINE_SIGNATURE_REGEX, ORIGINAL_MESSAGE_SIGNATURE_REGEX, SENT_FROM_REGEX).source, Regexp::NOENCODING)
145
+
146
+ # TODO: refactor out in a i18n.yml file
147
+ # Supports English, French, Es-Mexican, Pt-Brazilian
148
+ # Maps a label to a label-group
149
+ QUOTE_HEADER_LABELS = Hash[*{
150
+ :from => ["From", "De"],
151
+ :to => ["To", "Para", "A"],
152
+ :cc => ["CC"],
153
+ :reply_to => ["Reply-To"],
154
+ :date => ["Date", "Sent", "Enviado", "Enviada em", "Fecha"],
155
+ :subject => ["Subject", "Assunto", "Asunto", "Objet"]
156
+ }.map {|group, labels| labels.map {|label| [label.downcase, group]}}.flatten]
157
+
158
+ # normalize text so it is easier to parse
159
+ #
160
+ # text - text to normalize
161
+ #
162
+ # Returns a String
163
+ def normalize_text(text)
164
+ # in 1.9 we want to operate on the raw bytes
165
+ text = text.dup.force_encoding('binary') if text.respond_to?(:force_encoding)
166
+
167
+ # Normalize line endings.
168
+ text.gsub!("\r\n", "\n")
169
+
170
+ # Check for multi-line reply headers. Some clients break up
171
+ # the "On DATE, NAME <EMAIL> wrote:" line into multiple lines.
172
+ if match = text.match(/^(On\s(.+)wrote:)$/m)
173
+ # Remove all new lines from the reply header. as long as we don't have any double newline
174
+ # if we do they we have grabbed something that is not actually a reply header
175
+ text.gsub! match[1], match[1].gsub("\n", " ") unless match[1] =~ /\n\n/
176
+ end
177
+
178
+ # Some users may reply directly above a line of underscores.
179
+ # In order to ensure that these fragments are split correctly,
180
+ # make sure that all lines of underscores are preceded by
181
+ # at least two newline characters.
182
+ text.gsub!(/([^\n])(?=\n_{7}_+)$/m, "\\1\n")
183
+
184
+ text
185
+ end
186
+
187
+ # Parse a person's name from an e-mail address
188
+ #
189
+ # email - email address.
190
+ #
191
+ # Returns a String.
192
+ def parse_name_from_address(address)
193
+ normalize_name(parse_raw_name_from_address(address))
194
+ end
195
+
196
+ def parse_raw_name_from_address(address)
197
+ match = address.match(/^["']*([\w\s,]+)["']*\s*</)
198
+ match ? match[1].strip.to_s : EMPTY
199
+ end
200
+
201
+ def parse_email_from_address(address)
202
+ match = address.match /<(.*)>/
203
+ match ? match[1] : address
204
+ end
205
+
206
+ # Normalize a name to First Last
207
+ #
208
+ # name - name to normailze.
209
+ #
210
+ # Returns a String.
211
+ def normalize_name(name)
212
+ if name.include?(',')
213
+ make_name_first_then_last(name)
214
+ else
215
+ name
216
+ end
217
+ end
218
+
219
+ def make_name_first_then_last(name)
220
+ split_name = name.split(',')
221
+ if split_name[0].include?(" ")
222
+ split_name[0].to_s
223
+ else
224
+ split_name[1].strip + " " + split_name[0].strip
225
+ end
226
+ end
227
+
228
+ ### Line-by-Line Parsing
229
+
230
+ # Scans the given line of text and determines which fragment it belongs to.
231
+ def scan_line(line, last = false)
232
+ line.chomp!("\n")
233
+ line.reverse!
234
+ line.rstrip!
235
+
236
+ # Mark the current Fragment as a signature if the current line is empty
237
+ # and the Fragment starts with a common signature indicator.
238
+ # Mark the current Fragment as a quote if the current line is empty
239
+ # and the Fragment starts with a multiline quote header.
240
+ scan_signature_or_quote if @fragment && line == EMPTY
241
+
242
+ # We're looking for leading `>`'s to see if this line is part of a
243
+ # quoted Fragment.
244
+ is_quoted = !!(line =~ /^>+/n)
245
+
246
+ # Note that a common reply header also counts as part of the quoted
247
+ # Fragment, even though it doesn't start with `>`.
248
+ unless @fragment &&
249
+ ((@fragment.quoted? == is_quoted) ||
250
+ (@fragment.quoted? && (line_is_reply_header?(line) || line == EMPTY)))
251
+ finish_fragment
252
+ @fragment = Fragment.new
253
+ @fragment.quoted = is_quoted
254
+ end
255
+
256
+ @fragment.add_line(line)
257
+ scan_signature_or_quote if last
258
+ end
259
+
260
+ def scan_signature_or_quote
261
+ if signature_line?(@fragment.lines.first)
262
+ @fragment.signature = true
263
+ finish_fragment
264
+ elsif multiline_quote_header_in_fragment?
265
+ @fragment.quoted = true
266
+ finish_fragment
267
+ end
268
+ end
269
+
270
+ # Returns +true+ if the current block in the current fragment has
271
+ # a multiline quote header, +false+ otherwise.
272
+ #
273
+ # The quote header we're looking for is mainly generated by Outlook
274
+ # clients. It's considered a quote header if the first 4 folded lines
275
+ # have one of the following forms:
276
+ #
277
+ # label: some text
278
+ # *label:* some text
279
+ #
280
+ # where a line like this:
281
+ #
282
+ # label: some text
283
+ # possibly indented text that belongs to the previous line
284
+ #
285
+ # is folded into:
286
+ #
287
+ # label: some text possibly indented text that belongs to the previous line
288
+ #
289
+ # and where label is a value from +QUOTE_HEADER_LABELS+ that appears
290
+ # only once in the first 4 lines and where each group of a label
291
+ # is represented at most once.
292
+ def multiline_quote_header_in_fragment?
293
+ folding = false
294
+ label_groups = []
295
+ @fragment.current_block.split("\n").each do |line|
296
+ if line =~ /\A\s*\*?([^:]+):(\s|\*)/
297
+ label = QUOTE_HEADER_LABELS[$1.downcase]
298
+ if label
299
+ return false if label_groups.include?(label)
300
+ return true if label_groups.length == 3
301
+ label_groups << label
302
+ folding = true
303
+ elsif !folding
304
+ return false
305
+ end
306
+ elsif !folding
307
+ return false
308
+ else
309
+ folding = true
310
+ end
311
+ end
312
+ return false
313
+ end
314
+
315
+ # Detects if a given line is the beginning of a signature
316
+ #
317
+ # line - A String line of text from the email.
318
+ #
319
+ # Returns true if the line is the beginning of a signature, or false.
320
+ def signature_line?(line)
321
+ line =~ SIGNATURE_REGEX || line_is_signature_name?(line)
322
+ end
323
+
324
+ # Detects if a given line is a common reply header.
325
+ #
326
+ # line - A String line of text from the email.
327
+ #
328
+ # Returns true if the line is a valid header, or false.
329
+ def line_is_reply_header?(line)
330
+ COMMON_REPLY_HEADER_REGEXES.each do |regex|
331
+ return true if line =~ regex
332
+ end
333
+ false
334
+ end
335
+
336
+ # Detects if the @from name is a big part of a given line and therefore the beginning of a signature
337
+ #
338
+ # line - A String line of text from the email.
339
+ #
340
+ # Returns true if @from_name is a big part of the line, or false.
341
+ def line_is_signature_name?(line)
342
+ regexp = generate_regexp_for_name()
343
+ @from_name_normalized != "" && (line =~ regexp) && ((@from_name_normalized.size.to_f / line.size) > 0.25)
344
+ end
345
+
346
+ #generates regexp which always for additional words or initials between first and last names
347
+ def generate_regexp_for_name
348
+ name_parts = @from_name_normalized.split(" ")
349
+ seperator = '[\w.\s]*'
350
+ regexp = Regexp.new(name_parts.join(seperator), Regexp::IGNORECASE)
351
+ end
352
+
353
+ # Builds the fragment string, after all lines have been added.
354
+ # It also checks to see if this Fragment is hidden. The hidden
355
+ # Fragment check reads from the bottom to the top.
356
+ #
357
+ # Any quoted Fragments or signature Fragments are marked hidden if they
358
+ # are below any visible Fragments. Visible Fragments are expected to
359
+ # contain original content by the author. If they are below a quoted
360
+ # Fragment, then the Fragment should be visible to give context to the
361
+ # reply.
362
+ #
363
+ # some original text (visible)
364
+ #
365
+ # > do you have any two's? (quoted, visible)
366
+ #
367
+ # Go fish! (visible)
368
+ #
369
+ # > --
370
+ # > Player 1 (quoted, hidden)
371
+ #
372
+ # --
373
+ # Player 2 (signature, hidden)
374
+ #
375
+ def finish_fragment
376
+ if @fragment
377
+ @fragment.finish
378
+ if !@found_visible
379
+ if @fragment.quoted? || @fragment.signature? ||
380
+ @fragment.reply_header? || @fragment.to_s.strip == EMPTY
381
+ @fragment.hidden = true
382
+ else
383
+ @found_visible = true
384
+ end
385
+ end
386
+ @fragments.insert(0, @fragment)
387
+ end
388
+ @fragment = nil
389
+ end
390
+ end
391
+
392
+ # Represents a group of paragraphs in the email sharing common attributes.
393
+ # Paragraphs should get their own fragment if they are a quoted area or a
394
+ # signature.
395
+ class Fragment < Struct.new(:quoted, :signature, :reply_header, :hidden)
396
+ # Array of string lines that make up the content of this fragment.
397
+ attr_reader :lines
398
+
399
+ # Array of string lines that is being processed not having
400
+ # an empty line.
401
+ attr_reader :current_block
402
+
403
+ # This is reserved for the joined String that is build when this Fragment
404
+ # is finished.
405
+ attr_reader :content
406
+
407
+ def initialize
408
+ self.quoted = self.signature = self.reply_header = self.hidden = false
409
+ @lines = []
410
+ @current_block = []
411
+ @content = nil
412
+ end
413
+
414
+ alias quoted? quoted
415
+ alias signature? signature
416
+ alias reply_header? reply_header
417
+ alias hidden? hidden
418
+
419
+ def add_line(line)
420
+ return unless line
421
+ @lines.insert(0, line)
422
+ if line == ""
423
+ @current_block.clear
424
+ else
425
+ @current_block.insert(0, line)
426
+ end
427
+ end
428
+
429
+ def current_block
430
+ @current_block.join("\n")
431
+ end
432
+
433
+ # Builds the string content by joining the lines and reversing them.
434
+ def finish
435
+ @content = @lines.join("\n")
436
+ @lines = @current_block = nil
437
+ end
438
+
439
+ def to_s
440
+ @lines ? @lines.join("\n") : @content
441
+ end
442
+
443
+ def inspect
444
+ "#{super.inspect} : #{to_s.inspect}"
445
+ end
446
+ end
447
+ end
448
+