discourse_email_parser 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 628e0332e75b83a5206356998c27c4195f6f09b1
4
+ data.tar.gz: b985069be7090e9bd87f2944a80fcc54e5ba2ae8
5
+ SHA512:
6
+ metadata.gz: 472e57c5da6d459041e1a8a5f46f7c5ea2f153fb7dc742d049ad1b163713a2288d1e8d491f1657c05122e82ca826ee21481fa40c18a99fc5b71a8411a54ff061
7
+ data.tar.gz: 62aa7ce4d6827917f0000661ef2482da7000eec122d6c742f682dfab8e9ade254c20725592ae3e0c2d2278d8dc22ecd0c0b0b9eb943eac68ac37466ae0b21fb8
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ The MIT License
2
+
3
+ Copyright (c) GitHub
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
22
+
data/README.md ADDED
@@ -0,0 +1,15 @@
1
+ # Discourse Email Parser
2
+
3
+ DiscourseEmailParser is a small library to parse plain text email content.
4
+
5
+ ##Usage
6
+
7
+ To parse reply body:
8
+
9
+ `parsed_body = DiscourseEmailParser.parse_reply(email_body)`
10
+
11
+ ## Installation
12
+
13
+ Get it from [GitHub][github]. Run `rake` to run the tests.
14
+
15
+ [github]: https://github.com/discourse/discourse_email_parser
data/Rakefile ADDED
@@ -0,0 +1,130 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'date'
4
+
5
+ #############################################################################
6
+ #
7
+ # Helper functions
8
+ #
9
+ #############################################################################
10
+
11
+ def name
12
+ @name ||= Dir['*.gemspec'].first.split('.').first
13
+ end
14
+
15
+ def version
16
+ line = File.read("lib/#{name}.rb")[/^\s*VERSION\s*=\s*.*/]
17
+ line.match(/.*VERSION\s*=\s*['"](.*)['"]/)[1]
18
+ end
19
+
20
+ def date
21
+ Date.today.to_s
22
+ end
23
+
24
+ def gemspec_file
25
+ "#{name}.gemspec"
26
+ end
27
+
28
+ def gem_file
29
+ "#{name}-#{version}.gem"
30
+ end
31
+
32
+ def replace_header(head, header_name)
33
+ head.sub!(/(\.#{header_name}\s*= ').*'/) { "#{$1}#{send(header_name)}'"}
34
+ end
35
+
36
+ #############################################################################
37
+ #
38
+ # Standard tasks
39
+ #
40
+ #############################################################################
41
+
42
+ task :default => :test
43
+
44
+ require 'rake/testtask'
45
+ Rake::TestTask.new(:test) do |test|
46
+ test.libs << 'lib' << 'test'
47
+ test.pattern = 'test/**/*_test.rb'
48
+ test.verbose = true
49
+ end
50
+
51
+ desc "Open an irb session preloaded with this library"
52
+ task :console do
53
+ sh "irb -rubygems -r ./lib/#{name}.rb"
54
+ end
55
+
56
+ #############################################################################
57
+ #
58
+ # Custom tasks (add your own tasks here)
59
+ #
60
+ #############################################################################
61
+
62
+
63
+
64
+ #############################################################################
65
+ #
66
+ # Packaging tasks
67
+ #
68
+ #############################################################################
69
+
70
+ desc "Create tag v#{version} and build and push #{gem_file} to Rubygems"
71
+ task :release => :build do
72
+ unless `git branch` =~ /^\* master$/
73
+ puts "You must be on the master branch to release!"
74
+ exit!
75
+ end
76
+ sh "git commit --allow-empty -a -m 'Release #{version}'"
77
+ sh "git tag v#{version}"
78
+ sh "git push origin master"
79
+ sh "git push origin v#{version}"
80
+ sh "gem push pkg/#{name}-#{version}.gem"
81
+ end
82
+
83
+ desc "Build #{gem_file} into the pkg directory"
84
+ task :build => :gemspec do
85
+ sh "mkdir -p pkg"
86
+ sh "gem build #{gemspec_file}"
87
+ sh "mv #{gem_file} pkg"
88
+ end
89
+
90
+ desc "Generate #{gemspec_file}"
91
+ task :gemspec => :validate do
92
+ # read spec file and split out manifest section
93
+ spec = File.read(gemspec_file)
94
+ head, manifest, tail = spec.split(" # = MANIFEST =\n")
95
+
96
+ # replace name version and date
97
+ replace_header(head, :name)
98
+ replace_header(head, :version)
99
+ replace_header(head, :date)
100
+ #comment this out if your rubyforge_project has a different name
101
+ replace_header(head, :rubyforge_project)
102
+
103
+ # determine file list from git ls-files
104
+ files = `git ls-files`.
105
+ split("\n").
106
+ sort.
107
+ reject { |file| file =~ /^\./ }.
108
+ reject { |file| file =~ /^(rdoc|pkg)/ }.
109
+ map { |file| " #{file}" }.
110
+ join("\n")
111
+
112
+ # piece file back together and write
113
+ manifest = " s.files = %w[\n#{files}\n ]\n"
114
+ spec = [head, manifest, tail].join(" # = MANIFEST =\n")
115
+ File.open(gemspec_file, 'w') { |io| io.write(spec) }
116
+ puts "Updated #{gemspec_file}"
117
+ end
118
+
119
+ desc "Validate #{gemspec_file}"
120
+ task :validate do
121
+ libfiles = Dir['lib/*'] - ["lib/#{name}.rb", "lib/#{name}"]
122
+ unless libfiles.empty?
123
+ puts "Directory `lib` should only contain a `#{name}.rb` file and `#{name}` dir."
124
+ exit!
125
+ end
126
+ unless Dir['VERSION*'].empty?
127
+ puts "A `VERSION` file at root level violates Gem best practices."
128
+ exit!
129
+ end
130
+ end
@@ -0,0 +1,51 @@
1
+ $LOAD_PATH.unshift '.'
2
+ require 'lib/discourse_email_parser'
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = 'discourse_email_parser'
6
+ s.version = DiscourseEmailParser::VERSION
7
+ s.date = Time.now.strftime('%Y-%m-%d')
8
+
9
+ s.summary = "Small library to parse plain text email content."
10
+ s.description = "DiscourseEmailParser is a small library to parse plain text email content."
11
+
12
+ s.authors = ["Rick Olson", "Arpit Jalan"]
13
+ s.email = 'arpit.jalan@discourse.org'
14
+ s.homepage = 'http://github.com/discourse/discourse_email_parser'
15
+ s.license = 'MIT'
16
+
17
+ s.require_paths = %w[lib]
18
+
19
+ s.files = %w[
20
+ LICENSE
21
+ README.md
22
+ Rakefile
23
+ discourse_email_parser.gemspec
24
+ lib/discourse_email_parser.rb
25
+ script/release
26
+ script/test
27
+ test/discourse_email_parser_test.rb
28
+ test/emails/correct_sig.txt
29
+ test/emails/email_1_1.txt
30
+ test/emails/email_1_2.txt
31
+ test/emails/email_1_3.txt
32
+ test/emails/email_1_4.txt
33
+ test/emails/email_1_5.txt
34
+ test/emails/email_1_6.txt
35
+ test/emails/email_1_7.txt
36
+ test/emails/email_1_8.txt
37
+ test/emails/email_2_1.txt
38
+ test/emails/email_2_2.txt
39
+ test/emails/email_BlackBerry.txt
40
+ test/emails/email_bullets.txt
41
+ test/emails/email_iPhone.txt
42
+ test/emails/email_multi_word_sent_from_my_mobile_device.txt
43
+ test/emails/email_one_is_not_on.txt
44
+ test/emails/email_sent_from_my_not_signature.txt
45
+ test/emails/email_sig_delimiter_in_middle_of_line.txt
46
+ test/emails/greedy_on.txt
47
+ test/emails/pathological.txt
48
+ ]
49
+
50
+ s.test_files = s.files.select { |path| path =~ /^test\/.*_test\.rb/ }
51
+ end
@@ -0,0 +1,285 @@
1
+ require 'strscan'
2
+
3
+ # https://github.com/github/email_reply_parser/blob/master/lib/email_reply_parser.rb
4
+ #
5
+ # DiscourseEmailParser is a small library to parse plain text email content. The
6
+ # goal is to identify which fragments are quoted, part of a signature, or
7
+ # original body content. We want to support both top and bottom posters, so
8
+ # no simple "REPLY ABOVE HERE" content is used.
9
+ #
10
+ # Beyond RFC 5322 (which is handled by the [Ruby mail gem][mail]), there aren't
11
+ # any real standards for how emails are created. This attempts to parse out
12
+ # common conventions for things like replies:
13
+ #
14
+ # this is some text
15
+ #
16
+ # On <date>, <author> wrote:
17
+ # > blah blah
18
+ # > blah blah
19
+ #
20
+ # ... and signatures:
21
+ #
22
+ # this is some text
23
+ #
24
+ # --
25
+ # Bob
26
+ # http://homepage.com/~bob
27
+ #
28
+ # Each of these are parsed into Fragment objects.
29
+ #
30
+ # DiscourseEmailParser also attempts to figure out which of these blocks should
31
+ # be hidden from users.
32
+ #
33
+ # [mail]: https://github.com/mikel/mail
34
+ class DiscourseEmailParser
35
+ VERSION = "0.6.0"
36
+
37
+ # Public: Splits an email body into a list of Fragments.
38
+ #
39
+ # text - A String email body.
40
+ #
41
+ # Returns an Email instance.
42
+ def self.read(text)
43
+ Email.new.read(text)
44
+ end
45
+
46
+ # Public: Get the text of the visible portions of the given email body.
47
+ #
48
+ # text - A String email body.
49
+ #
50
+ # Returns a String.
51
+ def self.parse_reply(text)
52
+ self.read(text).visible_text
53
+ end
54
+
55
+ ### Emails
56
+
57
+ # An Email instance represents a parsed body String.
58
+ class Email
59
+ # Emails have an Array of Fragments.
60
+ attr_reader :fragments
61
+
62
+ def initialize
63
+ @fragments = []
64
+ end
65
+
66
+ # Public: Gets the combined text of the visible fragments of the email body.
67
+ #
68
+ # Returns a String.
69
+ def visible_text
70
+ fragments.select{|f| !f.hidden?}.map{|f| f.to_s}.join("\n").rstrip
71
+ end
72
+
73
+ # Splits the given text into a list of Fragments. This is roughly done by
74
+ # reversing the text and parsing from the bottom to the top. This way we
75
+ # can check for 'On <date>, <author> wrote:' lines above quoted blocks.
76
+ #
77
+ # text - A String email body.
78
+ #
79
+ # Returns this same Email instance.
80
+ def read(text)
81
+ # in 1.9 we want to operate on the raw bytes
82
+ text = text.dup.force_encoding('binary') if text.respond_to?(:force_encoding)
83
+
84
+ # Normalize line endings.
85
+ text.gsub!("\r\n", "\n")
86
+
87
+ # Check for multi-line reply headers. Some clients break up
88
+ # the "On DATE, NAME <EMAIL> wrote:" line into multiple lines.
89
+ if text =~ /^(?!On.*On\s.+?wrote:)(On\s(.+?)wrote:)$/nm
90
+ # Remove all new lines from the reply header.
91
+ text.gsub! $1, $1.gsub("\n", " ")
92
+ end
93
+
94
+ # Check for "---- Original Message ----"
95
+ # and strip email content after that part
96
+ if text =~ /^([\s_-]+Original (?i)message?[\s_-]+$.*)/nm
97
+ text.gsub!($1, "")
98
+ end
99
+
100
+ # Some users may reply directly above a line of underscores.
101
+ # In order to ensure that these fragments are split correctly,
102
+ # make sure that all lines of underscores are preceded by
103
+ # at least two newline characters.
104
+ text.gsub!(/([^\n])(?=\n_{7}_+)$/m, "\\1\n")
105
+
106
+ # The text is reversed initially due to the way we check for hidden
107
+ # fragments.
108
+ text = text.reverse
109
+
110
+ # This determines if any 'visible' Fragment has been found. Once any
111
+ # visible Fragment is found, stop looking for hidden ones.
112
+ @found_visible = false
113
+
114
+ # This instance variable points to the current Fragment. If the matched
115
+ # line fits, it should be added to this Fragment. Otherwise, finish it
116
+ # and start a new Fragment.
117
+ @fragment = nil
118
+
119
+ # Use the StringScanner to pull out each line of the email content.
120
+ @scanner = StringScanner.new(text)
121
+ while line = @scanner.scan_until(/\n/n)
122
+ scan_line(line)
123
+ end
124
+
125
+ # Be sure to parse the last line of the email.
126
+ if (last_line = @scanner.rest.to_s).size > 0
127
+ scan_line(last_line)
128
+ end
129
+
130
+ # Finish up the final fragment. Finishing a fragment will detect any
131
+ # attributes (hidden, signature, reply), and join each line into a
132
+ # string.
133
+ finish_fragment
134
+
135
+ @scanner = @fragment = nil
136
+
137
+ # Now that parsing is done, reverse the order.
138
+ @fragments.reverse!
139
+ self
140
+ end
141
+
142
+ private
143
+ EMPTY = "".freeze
144
+ SIGNATURE = '(?m)(--\s*$|__\s*$)|(^(\w+\s*){1,3} ym morf tneS$)'
145
+
146
+ begin
147
+ require 're2'
148
+ SIG_REGEX = RE2::Regexp.new(SIGNATURE)
149
+ rescue LoadError
150
+ SIG_REGEX = Regexp.new(SIGNATURE)
151
+ end
152
+
153
+ ### Line-by-Line Parsing
154
+
155
+ # Scans the given line of text and figures out which fragment it belongs
156
+ # to.
157
+ #
158
+ # line - A String line of text from the email.
159
+ #
160
+ # Returns nothing.
161
+ def scan_line(line)
162
+ line.chomp!("\n")
163
+ line.lstrip! unless SIG_REGEX.match(line)
164
+
165
+ # We're looking for leading `>`'s to see if this line is part of a
166
+ # quoted Fragment.
167
+ is_quoted = !!(line =~ /(>+)$/n)
168
+
169
+ # Mark the current Fragment as a signature if the current line is empty
170
+ # and the Fragment starts with a common signature indicator.
171
+ if @fragment && line == EMPTY
172
+ if SIG_REGEX.match @fragment.lines.last
173
+ @fragment.signature = true
174
+ finish_fragment
175
+ end
176
+ end
177
+
178
+ # If the line matches the current fragment, add it. Note that a common
179
+ # reply header also counts as part of the quoted Fragment, even though
180
+ # it doesn't start with `>`.
181
+ if @fragment &&
182
+ ((@fragment.quoted? == is_quoted) ||
183
+ (@fragment.quoted? && (quote_header?(line) || line == EMPTY)))
184
+ @fragment.lines << line
185
+
186
+ # Otherwise, finish the fragment and start a new one.
187
+ else
188
+ finish_fragment
189
+ @fragment = Fragment.new(is_quoted, line)
190
+ end
191
+ end
192
+
193
+ # Detects if a given line is a header above a quoted area. It is only
194
+ # checked for lines preceding quoted regions.
195
+ #
196
+ # line - A String line of text from the email.
197
+ #
198
+ # Returns true if the line is a valid header, or false.
199
+ def quote_header?(line)
200
+ line =~ /^:etorw.*nO$/n
201
+ end
202
+
203
+ # Builds the fragment string and reverses it, after all lines have been
204
+ # added. It also checks to see if this Fragment is hidden. The hidden
205
+ # Fragment check reads from the bottom to the top.
206
+ #
207
+ # Any quoted Fragments or signature Fragments are marked hidden if they
208
+ # are below any visible Fragments. Visible Fragments are expected to
209
+ # contain original content by the author. If they are below a quoted
210
+ # Fragment, then the Fragment should be visible to give context to the
211
+ # reply.
212
+ #
213
+ # some original text (visible)
214
+ #
215
+ # > do you have any two's? (quoted, visible)
216
+ #
217
+ # Go fish! (visible)
218
+ #
219
+ # > --
220
+ # > Player 1 (quoted, hidden)
221
+ #
222
+ # --
223
+ # Player 2 (signature, hidden)
224
+ #
225
+ def finish_fragment
226
+ if @fragment
227
+ @fragment.finish
228
+ if !@found_visible
229
+ if @fragment.quoted? || @fragment.signature? ||
230
+ @fragment.to_s.strip == EMPTY
231
+ @fragment.hidden = true
232
+ else
233
+ @found_visible = true
234
+ end
235
+ end
236
+ @fragments << @fragment
237
+ end
238
+ @fragment = nil
239
+ end
240
+ end
241
+
242
+ ### Fragments
243
+
244
+ # Represents a group of paragraphs in the email sharing common attributes.
245
+ # Paragraphs should get their own fragment if they are a quoted area or a
246
+ # signature.
247
+ class Fragment < Struct.new(:quoted, :signature, :hidden)
248
+ # This is an Array of String lines of content. Since the content is
249
+ # reversed, this array is backwards, and contains reversed strings.
250
+ attr_reader :lines,
251
+
252
+ # This is reserved for the joined String that is build when this Fragment
253
+ # is finished.
254
+ :content
255
+
256
+ def initialize(quoted, first_line)
257
+ self.signature = self.hidden = false
258
+ self.quoted = quoted
259
+ @lines = [first_line]
260
+ @content = nil
261
+ @lines.compact!
262
+ end
263
+
264
+ alias quoted? quoted
265
+ alias signature? signature
266
+ alias hidden? hidden
267
+
268
+ # Builds the string content by joining the lines and reversing them.
269
+ #
270
+ # Returns nothing.
271
+ def finish
272
+ @content = @lines.join("\n")
273
+ @lines = nil
274
+ @content.reverse!
275
+ end
276
+
277
+ def to_s
278
+ @content
279
+ end
280
+
281
+ def inspect
282
+ to_s.inspect
283
+ end
284
+ end
285
+ end
data/script/release ADDED
@@ -0,0 +1,2 @@
1
+ #!/bin/sh
2
+ rake release
data/script/test ADDED
@@ -0,0 +1,2 @@
1
+ #!/bin/sh
2
+ rake test
@@ -0,0 +1,202 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'pathname'
4
+ require 'pp'
5
+
6
+ dir = Pathname.new File.expand_path(File.dirname(__FILE__))
7
+ require dir + '..' + 'lib' + 'discourse_email_parser'
8
+
9
+ EMAIL_FIXTURE_PATH = dir + 'emails'
10
+
11
+ class DiscourseEmailParserTest < Test::Unit::TestCase
12
+ def test_reads_simple_body
13
+ reply = email(:email_1_1)
14
+ assert_equal 3, reply.fragments.size
15
+
16
+ assert reply.fragments.none? { |f| f.quoted? }
17
+ assert_equal [false, true, true],
18
+ reply.fragments.map { |f| f.signature? }
19
+ assert_equal [false, true, true],
20
+ reply.fragments.map { |f| f.hidden? }
21
+
22
+ assert_equal "Hi folks
23
+
24
+ What is the best way to clear a Riak bucket of all key, values after
25
+ running a test?
26
+ I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
27
+
28
+ assert_equal "--Abhishek Kona\n\n", reply.fragments[1].to_s
29
+ end
30
+
31
+ def test_reads_top_post
32
+ reply = email(:email_1_3)
33
+ assert_equal 5, reply.fragments.size
34
+
35
+ assert_equal [false, false, true, false, false],
36
+ reply.fragments.map { |f| f.quoted? }
37
+ assert_equal [false, true, true, true, true],
38
+ reply.fragments.map { |f| f.hidden? }
39
+ assert_equal [false, true, false, false, true],
40
+ reply.fragments.map { |f| f.signature? }
41
+
42
+ assert_match /^Oh thanks.\n\nHaving/, reply.fragments[0].to_s
43
+ assert_match /^--A/, reply.fragments[1].to_s
44
+ assert_match /^On [^\:]+\:/, reply.fragments[2].to_s
45
+ assert_match /^_/, reply.fragments[4].to_s
46
+ end
47
+
48
+ def test_reads_bottom_post
49
+ reply = email(:email_1_2)
50
+ assert_equal 6, reply.fragments.size
51
+
52
+ assert_equal [false, true, false, true, false, false],
53
+ reply.fragments.map { |f| f.quoted? }
54
+ assert_equal [false, false, false, false, false, true],
55
+ reply.fragments.map { |f| f.signature? }
56
+ assert_equal [false, false, false, true, true, true],
57
+ reply.fragments.map { |f| f.hidden? }
58
+
59
+ assert_equal "Hi,", reply.fragments[0].to_s
60
+ assert_match /^On [^\:]+\:/, reply.fragments[1].to_s
61
+ assert_match /^You can list/, reply.fragments[2].to_s
62
+ assert_match /^> /, reply.fragments[3].to_s
63
+ assert_match /^_/, reply.fragments[5].to_s
64
+ end
65
+
66
+ def test_reads_inline_replies
67
+ reply = email(:email_1_8)
68
+ assert_equal 7, reply.fragments.size
69
+
70
+ assert_equal [true, false, true, false, true, false, false],
71
+ reply.fragments.map { |f| f.quoted? }
72
+ assert_equal [false, false, false, false, false, false, true],
73
+ reply.fragments.map { |f| f.signature? }
74
+ assert_equal [false, false, false, false, true, true, true],
75
+ reply.fragments.map { |f| f.hidden? }
76
+
77
+ assert_match /^On [^\:]+\:/, reply.fragments[0].to_s
78
+ assert_match /^I will reply/, reply.fragments[1].to_s
79
+ assert_match "okay?", reply.fragments[2].to_s
80
+ assert_match /^and under this./, reply.fragments[3].to_s
81
+ assert_match /inline/, reply.fragments[4].to_s
82
+ assert_equal "\n", reply.fragments[5].to_s
83
+ assert_equal "--\nHey there, this is my signature\n", reply.fragments[6].to_s
84
+ end
85
+
86
+ def test_recognizes_date_string_above_quote
87
+ reply = email :email_1_4
88
+
89
+ assert_match /^Awesome/, reply.fragments[0].to_s
90
+ assert_match /^On/, reply.fragments[1].to_s
91
+ assert_match /Loader/, reply.fragments[1].to_s
92
+ end
93
+
94
+ def test_a_complex_body_with_only_one_fragment
95
+ reply = email :email_1_5
96
+
97
+ assert_equal 1, reply.fragments.size
98
+ end
99
+
100
+ def test_reads_email_with_correct_signature
101
+ reply = email :correct_sig
102
+
103
+ assert_equal 2, reply.fragments.size
104
+ assert_equal [false, false], reply.fragments.map { |f| f.quoted? }
105
+ assert_equal [false, true], reply.fragments.map { |f| f.signature? }
106
+ assert_equal [false, true], reply.fragments.map { |f| f.hidden? }
107
+ assert_match /^-- \nrick/, reply.fragments[1].to_s
108
+ end
109
+
110
+ def test_deals_with_multiline_reply_headers
111
+ reply = email :email_1_6
112
+
113
+ assert_match /^I get/, reply.fragments[0].to_s
114
+ assert_match /^On/, reply.fragments[1].to_s
115
+ assert_match /Was this/, reply.fragments[1].to_s
116
+ end
117
+
118
+ def test_deals_with_windows_line_endings
119
+ reply = email :email_1_7
120
+
121
+ assert_match /:\+1:/, reply.fragments[0].to_s
122
+ assert_match /^On/, reply.fragments[1].to_s
123
+ assert_match /Steps 0-2/, reply.fragments[1].to_s
124
+ end
125
+
126
+ def test_does_not_modify_input_string
127
+ original = "The Quick Brown Fox Jumps Over The Lazy Dog"
128
+ DiscourseEmailParser.read original
129
+ assert_equal "The Quick Brown Fox Jumps Over The Lazy Dog", original
130
+ end
131
+
132
+ def test_returns_only_the_visible_fragments_as_a_string
133
+ reply = email(:email_2_1)
134
+ assert_equal reply.fragments.select{|r| !r.hidden?}.map{|r| r.to_s}.join("\n").rstrip, reply.visible_text
135
+ end
136
+
137
+ def test_parse_out_just_top_for_outlook_reply
138
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_2_1.txt").to_s
139
+ assert_equal "Outlook with a reply", DiscourseEmailParser.parse_reply(body)
140
+ end
141
+
142
+ def test_parse_out_just_top_for_outlook_with_reply_directly_above_line
143
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_2_2.txt").to_s
144
+ assert_equal "Outlook with a reply directly above line", DiscourseEmailParser.parse_reply(body)
145
+ end
146
+
147
+ def test_parse_out_sent_from_iPhone
148
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_iPhone.txt").to_s
149
+ assert_equal "Here is another email", DiscourseEmailParser.parse_reply(body)
150
+ end
151
+
152
+ def test_parse_out_sent_from_BlackBerry
153
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_BlackBerry.txt").to_s
154
+ assert_equal "Here is another email", DiscourseEmailParser.parse_reply(body)
155
+ end
156
+
157
+ def test_parse_out_send_from_multiword_mobile_device
158
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_multi_word_sent_from_my_mobile_device.txt").to_s
159
+ assert_equal "Here is another email", DiscourseEmailParser.parse_reply(body)
160
+ end
161
+
162
+ def test_do_not_parse_out_send_from_in_regular_sentence
163
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_sent_from_my_not_signature.txt").to_s
164
+ assert_equal "Here is another email\n\nSent from my desk, is much easier then my mobile phone.", DiscourseEmailParser.parse_reply(body)
165
+ end
166
+
167
+ def test_retains_bullets
168
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_bullets.txt").to_s
169
+ assert_equal "test 2 this should list second\n\nand have spaces\n\nand retain this formatting\n\n\n - how about bullets\n - and another",
170
+ DiscourseEmailParser.parse_reply(body)
171
+ end
172
+
173
+ def test_parse_reply
174
+ body = IO.read EMAIL_FIXTURE_PATH.join("email_1_2.txt").to_s
175
+ assert_equal DiscourseEmailParser.read(body).visible_text, DiscourseEmailParser.parse_reply(body)
176
+ end
177
+
178
+ def test_one_is_not_on
179
+ reply = email("email_one_is_not_on")
180
+ assert_match /One outstanding question/, reply.fragments[0].to_s
181
+ assert_match /^On Oct 1, 2012/, reply.fragments[1].to_s
182
+ end
183
+
184
+ def test_mulitple_on
185
+ reply = email("greedy_on")
186
+ assert_match /^On your remote host/, reply.fragments[0].to_s
187
+ assert_match /^On 9 Jan 2014/, reply.fragments[1].to_s
188
+ assert_equal [false, true, false], reply.fragments.map { |f| f.quoted? }
189
+ assert_equal [false, false, false], reply.fragments.map { |f| f.signature? }
190
+ assert_equal [false, true, true], reply.fragments.map { |f| f.hidden? }
191
+ end
192
+
193
+ def test_doesnt_remove_signature_delimiter_in_mid_line
194
+ reply = email(:email_sig_delimiter_in_middle_of_line)
195
+ assert_equal 1, reply.fragments.size
196
+ end
197
+
198
+ def email(name)
199
+ body = IO.read EMAIL_FIXTURE_PATH.join("#{name}.txt").to_s
200
+ DiscourseEmailParser.read body
201
+ end
202
+ end
@@ -0,0 +1,4 @@
1
+ this is an email with a correct -- signature.
2
+
3
+ --
4
+ rick
@@ -0,0 +1,13 @@
1
+ Hi folks
2
+
3
+ What is the best way to clear a Riak bucket of all key, values after
4
+ running a test?
5
+ I am currently using the Java HTTP API.
6
+
7
+ --Abhishek Kona
8
+
9
+
10
+ _______________________________________________
11
+ riak-users mailing list
12
+ riak-users@lists.basho.com
13
+ http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com
@@ -0,0 +1,51 @@
1
+ Hi,
2
+ On Tue, 2011-03-01 at 18:02 +0530, Abhishek Kona wrote:
3
+ > Hi folks
4
+ >
5
+ > What is the best way to clear a Riak bucket of all key, values after
6
+ > running a test?
7
+ > I am currently using the Java HTTP API.
8
+
9
+ You can list the keys for the bucket and call delete for each. Or if you
10
+ put the keys (and kept track of them in your test) you can delete them
11
+ one at a time (without incurring the cost of calling list first.)
12
+
13
+ Something like:
14
+
15
+ String bucket = "my_bucket";
16
+ BucketResponse bucketResponse = riakClient.listBucket(bucket);
17
+ RiakBucketInfo bucketInfo = bucketResponse.getBucketInfo();
18
+
19
+ for(String key : bucketInfo.getKeys()) {
20
+ riakClient.delete(bucket, key);
21
+ }
22
+
23
+
24
+ would do it.
25
+
26
+ See also
27
+
28
+ http://wiki.basho.com/REST-API.html#Bucket-operations
29
+
30
+ which says
31
+
32
+ "At the moment there is no straightforward way to delete an entire
33
+ Bucket. There is, however, an open ticket for the feature. To delete all
34
+ the keys in a bucket, you’ll need to delete them all individually."
35
+
36
+ >
37
+ > -- Abhishek Kona
38
+ >
39
+ >
40
+ > _______________________________________________
41
+ > riak-users mailing list
42
+ > riak-users@lists.basho.com
43
+ > http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com
44
+
45
+
46
+
47
+
48
+ _______________________________________________
49
+ riak-users mailing list
50
+ riak-users@lists.basho.com
51
+ http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com
@@ -0,0 +1,55 @@
1
+ Oh thanks.
2
+
3
+ Having the function would be great.
4
+
5
+ --Abhishek Kona
6
+
7
+ On 01/03/11 7:07 PM, Russell Brown wrote:
8
+ > Hi,
9
+ > On Tue, 2011-03-01 at 18:02 +0530, Abhishek Kona wrote:
10
+ >> Hi folks
11
+ >>
12
+ >> What is the best way to clear a Riak bucket of all key, values after
13
+ >> running a test?
14
+ >> I am currently using the Java HTTP API.
15
+ > You can list the keys for the bucket and call delete for each. Or if you
16
+ > put the keys (and kept track of them in your test) you can delete them
17
+ > one at a time (without incurring the cost of calling list first.)
18
+ >
19
+ > Something like:
20
+ >
21
+ > String bucket = "my_bucket";
22
+ > BucketResponse bucketResponse = riakClient.listBucket(bucket);
23
+ > RiakBucketInfo bucketInfo = bucketResponse.getBucketInfo();
24
+ >
25
+ > for(String key : bucketInfo.getKeys()) {
26
+ > riakClient.delete(bucket, key);
27
+ > }
28
+ >
29
+ >
30
+ > would do it.
31
+ >
32
+ > See also
33
+ >
34
+ > http://wiki.basho.com/REST-API.html#Bucket-operations
35
+ >
36
+ > which says
37
+ >
38
+ > "At the moment there is no straightforward way to delete an entire
39
+ > Bucket. There is, however, an open ticket for the feature. To delete all
40
+ > the keys in a bucket, you’ll need to delete them all individually."
41
+ >
42
+ >> --Abhishek Kona
43
+ >>
44
+ >>
45
+ >> _______________________________________________
46
+ >> riak-users mailing list
47
+ >> riak-users@lists.basho.com
48
+ >> http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com
49
+ >
50
+
51
+
52
+ _______________________________________________
53
+ riak-users mailing list
54
+ riak-users@lists.basho.com
55
+ http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com
@@ -0,0 +1,8 @@
1
+ Awesome! I haven't had another problem with it.
2
+
3
+ On Aug 22, 2011, at 7:37 PM, defunkt<reply@reply.github.com> wrote:
4
+
5
+
6
+
7
+
8
+ > Loader seems to be working well.
@@ -0,0 +1,15 @@
1
+ One: Here's what I've got.
2
+
3
+ - This would be the first bullet point that wraps to the second line
4
+ to the next
5
+ - This is the second bullet point and it doesn't wrap
6
+ - This is the third bullet point and I'm having trouble coming up with enough
7
+ to say
8
+ - This is the fourth bullet point
9
+
10
+ Two:
11
+ - Here is another bullet point
12
+ - And another one
13
+
14
+ This is a paragraph that talks about a bunch of stuff. It goes on and on
15
+ for a while.
@@ -0,0 +1,15 @@
1
+ I get proper rendering as well.
2
+
3
+ Sent from a magnificent torch of pixels
4
+
5
+ On Dec 16, 2011, at 12:47 PM, Corey Donohoe
6
+ <reply@reply.github.com>
7
+ wrote:
8
+
9
+ > Was this caching related or fixed already? I get proper rendering here.
10
+ >
11
+ > ![](https://img.skitch.com/20111216-m9munqjsy112yqap5cjee5wr6c.jpg)
12
+ >
13
+ > ---
14
+ > Reply to this email directly or view it on GitHub:
15
+ > https://github.com/github/github/issues/2278#issuecomment-3182418
@@ -0,0 +1,12 @@
1
+ :+1:
2
+
3
+ On Tue, Sep 25, 2012 at 8:59 AM, Chris Wanstrath
4
+ <notifications@github.com>wrote:
5
+
6
+ > Steps 0-2 are in prod. Gonna let them sit for a bit then start cleaning up
7
+ > the old code with 3 & 4.
8
+ >
9
+ >
10
+ > Reply to this email directly or view it on GitHub.
11
+ >
12
+ >
@@ -0,0 +1,37 @@
1
+ On Tue, Apr 29, 2014 at 4:22 PM, Example Dev <sugar@example.com>wrote:
2
+
3
+ > okay. Well, here's some stuff I can write.
4
+ >
5
+ > And if I write a 2 second line you and maybe reply under this?
6
+ >
7
+ > Or if you didn't really feel like it, you could reply under this line.Or
8
+ > if you didn't really feel like it, you could reply under this line. Or if
9
+ > you didn't really feel like it, you could reply under this line. Or if you
10
+ > didn't really feel like it, you could reply under this line.
11
+ >
12
+
13
+ I will reply under this one
14
+
15
+ >
16
+ > okay?
17
+ >
18
+
19
+ and under this.
20
+
21
+ >
22
+ > -- Tim
23
+ >
24
+ > On Tue, April 29, 2014 at 4:21 PM, Tim Haines <tmhaines@example.com> wrote:
25
+ > > hi there
26
+ > >
27
+ > > After you reply to this I'm going to send you some inline responses.
28
+ > >
29
+ > > --
30
+ > > Hey there, this is my signature
31
+ >
32
+ >
33
+ >
34
+
35
+
36
+ --
37
+ Hey there, this is my signature
@@ -0,0 +1,25 @@
1
+ Outlook with a reply
2
+
3
+
4
+ ------------------------------
5
+
6
+ *From:* Google Apps Sync Team [mailto:mail-noreply@google.com]
7
+ *Sent:* Thursday, February 09, 2012 1:36 PM
8
+ *To:* jow@xxxx.com
9
+ *Subject:* Google Apps Sync was updated!
10
+
11
+
12
+
13
+ Dear Google Apps Sync user,
14
+
15
+ Google Apps Sync for Microsoft Outlook® was recently updated. Your computer
16
+ now has the latest version (version 2.5). This release includes bug fixes
17
+ to improve product reliability. For more information about these and other
18
+ changes, please see the help article here:
19
+
20
+ http://www.google.com/support/a/bin/answer.py?answer=153463
21
+
22
+ Sincerely,
23
+
24
+ The Google Apps Sync Team.
25
+
@@ -0,0 +1,10 @@
1
+ Outlook with a reply directly above line
2
+ ________________________________________
3
+ From: CRM Comments [crm-comment@example.com]
4
+ Sent: Friday, 23 March 2012 5:08 p.m.
5
+ To: John S. Greene
6
+ Subject: [contact:106] John Greene
7
+
8
+ A new comment has been added to the Contact named 'John Greene':
9
+
10
+ I am replying to a comment.
@@ -0,0 +1,3 @@
1
+ Here is another email
2
+
3
+ Sent from my BlackBerry
@@ -0,0 +1,22 @@
1
+ test 2 this should list second
2
+
3
+ and have spaces
4
+
5
+ and retain this formatting
6
+
7
+
8
+ - how about bullets
9
+ - and another
10
+
11
+
12
+ On Fri, Feb 24, 2012 at 10:19 AM, <examples@email.goalengine.com> wrote:
13
+
14
+ > Give us an example of how you applied what they learned to achieve
15
+ > something in your organization
16
+
17
+
18
+
19
+
20
+ --
21
+
22
+ *Joe Smith | Director, Product Management*
@@ -0,0 +1,3 @@
1
+ Here is another email
2
+
3
+ Sent from my iPhone
@@ -0,0 +1,3 @@
1
+ Here is another email
2
+
3
+ Sent from my Verizon Wireless BlackBerry
@@ -0,0 +1,10 @@
1
+ Thank, this is really helpful.
2
+
3
+ One outstanding question I had:
4
+
5
+ Locally (on development), when I run...
6
+
7
+ On Oct 1, 2012, at 11:55 PM, Dave Tapley wrote:
8
+
9
+ > The good news is that I've found a much better query for lastLocation.
10
+ >
@@ -0,0 +1,3 @@
1
+ Here is another email
2
+
3
+ Sent from my desk, is much easier then my mobile phone.
@@ -0,0 +1,7 @@
1
+ Hi there!
2
+
3
+ Stuff happened.
4
+
5
+ And here is a fix -- this is not a signature.
6
+
7
+ kthxbai
@@ -0,0 +1,16 @@
1
+ On your remote host you can run:
2
+
3
+ telnet 127.0.0.1 52698
4
+
5
+ This should connect to TextMate (on your Mac, via the tunnel). If that
6
+ fails, the tunnel is not working.
7
+
8
+ On 9 Jan 2014, at 2:47, George Plymale wrote:
9
+
10
+ > I am having an odd issue wherein suddenly port forwarding stopped
11
+ > working in a particular scenario for me. By default I have ssh set to
12
+ > use the following config (my ~/.ssh/config file):
13
+ > […]
14
+ > ---
15
+ > Reply to this email directly or view it on GitHub:
16
+ > https://github.com/textmate/rmate/issues/29
@@ -0,0 +1,20 @@
1
+ I think you're onto something. I will try to fix the problem as soon as I
2
+ get back to a computer.
3
+ On Dec 8, 2013 2:10 PM, "John Sullivan" <notifications@github.com> wrote:
4
+
5
+ > I think your code is shortening the reference sequence you return to be
6
+ > the same size as the query sequence, and we end up losing data. Here's some
7
+ > debugging output from me putzing around...
8
+ >
9
+ > name: gi|253409428|ref|GQ227366.1| Influenza A virus (A/pika/Qinghai/BI/2007(H5N1)) segment 1 polymerase PB2 (PB2) gene, complete cds
10
+ > score: 39.0
11
+ >
12
+ > organism.sequence: ATGGAGAGAATAAAGGAATTAAGAGATCTAATGTCACAGTCCCGCACTCGCGAGATACTAACAAAGACCACTGTGGACCATATGGCCATAATCAAGAAATACACATCAGGAAGACAAGAGAAGAACCCTGCTCTCAGAATGAAATGGATGATGGCAATGAAATATCCAATCACAGCGGACAAGAGAATAATAGAGATGATTCCTGAAAGGAATGAACAAGGACAGACACTCTGGAGCAAGACAAATGATGCTGGATCGGACAGGGTGATGGTGTCTCCCCTAGCTGTAACTTGGTGGAATAGGAATGGGCCGACGACAAGTACAGTTCATTATCCAAAGGTTTACAAAACATACTTTGAGAAGGTTGAAAGGTTAAAACATGGAACCTTCGGTCCCGTTCATTTCCGAAACCAAGTTAAAATACGCCGCCGAGTTGATACAAATCCTGGCCATGCAGATCTCAGTGCTAAAGAAGCACAAGATGTCATCATGGAGGTCGTTTTCCCAAATGAAGTGGGAGCTAGAATATTGACTTCAGAGTCACAGTTGACAATAACGAAAGAGAAAAAAGAAGAGCTCCAAGATTGTAAGATTGCTCCCTTAATGGTTGCATACATGTTGGAAAGGGAACTGGTCCGCAAAACCAGATTCCTACCAGTAGCAGGCGGAACAAGCAGTGTGTACATTGAGGTATTGCATTTGACTCAAGGAACCTGCTGGGCACAGATGTACACTCCAGGCGGAGAAGTAAGAAATGACGATGTTGACCAGAGTTTGATCATTGCTGCCAGAAACATTGTTAGGAGAGCAACGGTATCAGCGGATCCACTGGCATCACTGCTGGAGATGTGTCACAGCACACAAATTGGTGGGATAAGGATGGTGGACATCCTTAGGCAAACTCCAACTGAGGAACAAGCTGTGGATATATGCAAAGCAGCAATGGGTCTGAGGATTAGTTCATCCTTTAGCTTTGGAG
13
+ > GCTTCACTTTCAAAAGAACAAGTGGATCATCCGCCACGAAGGAAGAGGAAGTGCTTACAGGCAACCTCCAAACATTGAAAATAAGAGTACATGAGGGGTATGAGGAGTTCACAATGGTTGGGCAGAGGGCAACAGCTATCCTGAGGAAAGCAACTAGAAGGCTGATTCAGTTGATAGTAAGTGGAAGAAACGAACAATCAATCGCTGAGGCAATCATTGTAGCAATGGTGTTCTCACAGGAGGATCGCATGATAAAAGCAGTCCGAGGCGATCTGAATTTCGTAAACAGAGCAAACCAAAGATTAAACCCCATGCATCAACTCCTGAGACATTTTCAAAAGGACGCAAAAGTGCTATTTCAGAATTGGGGAACTGAGCCAATTGATAATGTCATGGGGATGATCGGAATATTACCTGACATGACTCCCAGCACAGAAACGTCACTGAGAGGAGTGAGAGTTAGTAAAATGGGAGTAGATGAGTATTCCAGCACTGAGAGAGTAGTTGTAAGCATTGACCGCTTCTTAAGGGTTCGAGACCAGCGGGGGAACGTACTCTTATCTCCCGAAGAGGTCAGCGAAACCCAGGGAACAGAGAAGTTGACAATAACATATTCATCATCAATGATGTGGGAAATCAACGGTCCTGAGTCAGTGCTTGTTAACACTTACCAATGGATCATTAGAAACTGGGAGACCGTGAAAATTCAGTGGTCTCAGGACCCCACGATGTTGTACAATAAGATGGAGTTTGAACCGTTCCAATCCTTGGTACCTAAAGCTGCCAGAGGTCAATACAGTGGATTTGTGAGAACATTATTCCAACAAATGCGTGACGTACTGGGGACATTTGATACTGTCCAGATAATAAAGCTGCTACCATTTGCAGCAGCCCCACCGAAGCAGAGCAGAATGCAGTTTTCTTCTCTAACTGTGAATGTGAGAGGCTCAGGAATGAGAATACTCATAAGGGGCAATTCCCCTGTGTTCAACTACAA
14
+ > TAAGGCAACCCAAAGACTTACCGTTCTTGGAAAGGACGCAGGTGCATTAACAGAGGATCCAGATGAGGGGACAGCCGGAGTGGAATCTGCAGTACTGAGGGGGTTCCTAATTCTAGGCAAGGAGGACAAAAGATATGGACCAGCATTGAGCATCAATGAACTGAGCAATCTTGCAAAAGGGGAGAAAGCTAATGTGCTGATAGGGCAAGGAGACGTGGTGTTGGTAATGAAACGGAAACGGGACTCTAGCATACTTACTGACAGCCAGACAGCGACCAAAAGAATTCGGATGGCCATCAATTAGTGTCGAATTGTTTAAAAACGACCTTGTTTCTACT
15
+ > reference_alignment: ________________________________________________
16
+ >
17
+ > query: AGCGAAAGCAGGTCAAATATATTCAATATGGAGAGAATAAAAGAATTAAG
18
+ >
19
+ > query_alignment: GCGAAAGCAGGTCAAATATATTCAATATGGAGAGAATAAAAGAATTAAG
20
+ >
metadata ADDED
@@ -0,0 +1,74 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: discourse_email_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.6.0
5
+ platform: ruby
6
+ authors:
7
+ - Rick Olson
8
+ - Arpit Jalan
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2015-12-11 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: DiscourseEmailParser is a small library to parse plain text email content.
15
+ email: arpit.jalan@discourse.org
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - LICENSE
21
+ - README.md
22
+ - Rakefile
23
+ - discourse_email_parser.gemspec
24
+ - lib/discourse_email_parser.rb
25
+ - script/release
26
+ - script/test
27
+ - test/discourse_email_parser_test.rb
28
+ - test/emails/correct_sig.txt
29
+ - test/emails/email_1_1.txt
30
+ - test/emails/email_1_2.txt
31
+ - test/emails/email_1_3.txt
32
+ - test/emails/email_1_4.txt
33
+ - test/emails/email_1_5.txt
34
+ - test/emails/email_1_6.txt
35
+ - test/emails/email_1_7.txt
36
+ - test/emails/email_1_8.txt
37
+ - test/emails/email_2_1.txt
38
+ - test/emails/email_2_2.txt
39
+ - test/emails/email_BlackBerry.txt
40
+ - test/emails/email_bullets.txt
41
+ - test/emails/email_iPhone.txt
42
+ - test/emails/email_multi_word_sent_from_my_mobile_device.txt
43
+ - test/emails/email_one_is_not_on.txt
44
+ - test/emails/email_sent_from_my_not_signature.txt
45
+ - test/emails/email_sig_delimiter_in_middle_of_line.txt
46
+ - test/emails/greedy_on.txt
47
+ - test/emails/pathological.txt
48
+ homepage: http://github.com/discourse/discourse_email_parser
49
+ licenses:
50
+ - MIT
51
+ metadata: {}
52
+ post_install_message:
53
+ rdoc_options: []
54
+ require_paths:
55
+ - lib
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ requirements: []
67
+ rubyforge_project:
68
+ rubygems_version: 2.4.8
69
+ signing_key:
70
+ specification_version: 4
71
+ summary: Small library to parse plain text email content.
72
+ test_files:
73
+ - test/discourse_email_parser_test.rb
74
+ has_rdoc: