email_reply_parser 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +22 -0
- data/README.md +94 -0
- data/Rakefile +135 -0
- data/email_reply_parser.gemspec +78 -0
- data/lib/email_reply_parser.rb +234 -0
- data/test/email_reply_parser_test.rb +70 -0
- data/test/emails/email_1_1.txt +13 -0
- data/test/emails/email_1_2.txt +51 -0
- data/test/emails/email_1_3.txt +55 -0
- metadata +76 -0
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
The MIT License
|
2
|
+
|
3
|
+
Copyright (c) GitHub
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
22
|
+
|
data/README.md
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
# Email Reply Parser
|
2
|
+
|
3
|
+
EmailReplyParser is a small library to parse plain text email content.
|
4
|
+
See the rocco-documented source code for specifics on how it works.
|
5
|
+
|
6
|
+
This is what GitHub uses to display comments that were created from
|
7
|
+
email replies. This code is being open sourced in an effort to
|
8
|
+
crowdsource the quality of our email representation.
|
9
|
+
|
10
|
+
See more at http://help.github.com/code/email_reply_parser/
|
11
|
+
|
12
|
+
## Installation
|
13
|
+
|
14
|
+
Get it from [GitHub][github] or `gem install email_reply_parser`. Run `rake` to run the tests.
|
15
|
+
|
16
|
+
[github]: https://github.com/github/email_reply_parser
|
17
|
+
|
18
|
+
## Contribute
|
19
|
+
|
20
|
+
If you'd like to hack on EmailReplyParser, start by forking the repo on GitHub:
|
21
|
+
|
22
|
+
https://github.com/github/email_reply_parser
|
23
|
+
|
24
|
+
The best way to get your changes merged back into core is as follows:
|
25
|
+
|
26
|
+
* Clone down your fork
|
27
|
+
* Create a thoughtfully named topic branch to contain your change
|
28
|
+
* Hack away
|
29
|
+
* Add tests and make sure everything still passes by running rake
|
30
|
+
* If you are adding new functionality, document it in the README
|
31
|
+
* Do not change the version number, I will do that on my end
|
32
|
+
* If necessary, rebase your commits into logical chunks, without errors
|
33
|
+
* Push the branch up to GitHub
|
34
|
+
* Send a pull request to the `github/email_reply_parser` project.
|
35
|
+
|
36
|
+
## Known Issues
|
37
|
+
|
38
|
+
### Quoted Headers
|
39
|
+
|
40
|
+
Quoted headers aren't picked up if there's an extra line break:
|
41
|
+
|
42
|
+
On <date>, <author> wrote:
|
43
|
+
|
44
|
+
> blah
|
45
|
+
|
46
|
+
Also, they're not picked up if the email client breaks it up into
|
47
|
+
multiple lines. GMail breaks up any lines over 80 characters for you.
|
48
|
+
|
49
|
+
On <date>, <author>
|
50
|
+
wrote:
|
51
|
+
> blah
|
52
|
+
|
53
|
+
Not to mention that we're search for "on" and "wrote". It won't work
|
54
|
+
with other languages.
|
55
|
+
|
56
|
+
Possible solution: Remove "reply@reply.github.com" lines...
|
57
|
+
|
58
|
+
### Weird Signatures
|
59
|
+
|
60
|
+
Lines starting with `-` or `_` sometimes mark the beginning of
|
61
|
+
signatures:
|
62
|
+
|
63
|
+
Hello
|
64
|
+
|
65
|
+
--
|
66
|
+
Rick
|
67
|
+
|
68
|
+
Not everyone follows this convention:
|
69
|
+
|
70
|
+
Hello
|
71
|
+
|
72
|
+
Mr Rick Olson
|
73
|
+
Galactic President Superstar Mc Awesomeville
|
74
|
+
GitHub
|
75
|
+
|
76
|
+
**********************DISCLAIMER***********************************
|
77
|
+
* Note: blah blah blah *
|
78
|
+
**********************DISCLAIMER***********************************
|
79
|
+
|
80
|
+
|
81
|
+
|
82
|
+
### Strange Quoting
|
83
|
+
|
84
|
+
Apparently, prefixing lines with `>` isn't universal either:
|
85
|
+
|
86
|
+
Hello
|
87
|
+
|
88
|
+
--
|
89
|
+
Rick
|
90
|
+
|
91
|
+
________________________________________
|
92
|
+
From: Bob [reply@reply.github.com]
|
93
|
+
Sent: Monday, March 14, 2011 6:16 PM
|
94
|
+
To: Rick
|
data/Rakefile
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'date'
|
4
|
+
|
5
|
+
#############################################################################
|
6
|
+
#
|
7
|
+
# Helper functions
|
8
|
+
#
|
9
|
+
#############################################################################
|
10
|
+
|
11
|
+
def name
|
12
|
+
@name ||= Dir['*.gemspec'].first.split('.').first
|
13
|
+
end
|
14
|
+
|
15
|
+
def version
|
16
|
+
line = File.read("lib/#{name}.rb")[/^\s*VERSION\s*=\s*.*/]
|
17
|
+
line.match(/.*VERSION\s*=\s*['"](.*)['"]/)[1]
|
18
|
+
end
|
19
|
+
|
20
|
+
def date
|
21
|
+
Date.today.to_s
|
22
|
+
end
|
23
|
+
|
24
|
+
def rubyforge_project
|
25
|
+
name
|
26
|
+
end
|
27
|
+
|
28
|
+
def gemspec_file
|
29
|
+
"#{name}.gemspec"
|
30
|
+
end
|
31
|
+
|
32
|
+
def gem_file
|
33
|
+
"#{name}-#{version}.gem"
|
34
|
+
end
|
35
|
+
|
36
|
+
def replace_header(head, header_name)
|
37
|
+
head.sub!(/(\.#{header_name}\s*= ').*'/) { "#{$1}#{send(header_name)}'"}
|
38
|
+
end
|
39
|
+
|
40
|
+
#############################################################################
|
41
|
+
#
|
42
|
+
# Standard tasks
|
43
|
+
#
|
44
|
+
#############################################################################
|
45
|
+
|
46
|
+
task :default => :test
|
47
|
+
|
48
|
+
require 'rake/testtask'
|
49
|
+
Rake::TestTask.new(:test) do |test|
|
50
|
+
test.libs << 'lib' << 'test'
|
51
|
+
test.pattern = 'test/**/*_test.rb'
|
52
|
+
test.verbose = true
|
53
|
+
end
|
54
|
+
|
55
|
+
desc "Open an irb session preloaded with this library"
|
56
|
+
task :console do
|
57
|
+
sh "irb -rubygems -r ./lib/#{name}.rb"
|
58
|
+
end
|
59
|
+
|
60
|
+
#############################################################################
|
61
|
+
#
|
62
|
+
# Custom tasks (add your own tasks here)
|
63
|
+
#
|
64
|
+
#############################################################################
|
65
|
+
|
66
|
+
|
67
|
+
|
68
|
+
#############################################################################
|
69
|
+
#
|
70
|
+
# Packaging tasks
|
71
|
+
#
|
72
|
+
#############################################################################
|
73
|
+
|
74
|
+
desc "Create tag v#{version} and build and push #{gem_file} to Rubygems"
|
75
|
+
task :release => :build do
|
76
|
+
unless `git branch` =~ /^\* master$/
|
77
|
+
puts "You must be on the master branch to release!"
|
78
|
+
exit!
|
79
|
+
end
|
80
|
+
sh "git commit --allow-empty -a -m 'Release #{version}'"
|
81
|
+
sh "git tag v#{version}"
|
82
|
+
sh "git push origin master"
|
83
|
+
sh "git push origin v#{version}"
|
84
|
+
sh "gem push pkg/#{name}-#{version}.gem"
|
85
|
+
end
|
86
|
+
|
87
|
+
desc "Build #{gem_file} into the pkg directory"
|
88
|
+
task :build => :gemspec do
|
89
|
+
sh "mkdir -p pkg"
|
90
|
+
sh "gem build #{gemspec_file}"
|
91
|
+
sh "mv #{gem_file} pkg"
|
92
|
+
end
|
93
|
+
|
94
|
+
desc "Generate #{gemspec_file}"
|
95
|
+
task :gemspec => :validate do
|
96
|
+
# read spec file and split out manifest section
|
97
|
+
spec = File.read(gemspec_file)
|
98
|
+
head, manifest, tail = spec.split(" # = MANIFEST =\n")
|
99
|
+
|
100
|
+
# replace name version and date
|
101
|
+
replace_header(head, :name)
|
102
|
+
replace_header(head, :version)
|
103
|
+
replace_header(head, :date)
|
104
|
+
#comment this out if your rubyforge_project has a different name
|
105
|
+
replace_header(head, :rubyforge_project)
|
106
|
+
|
107
|
+
# determine file list from git ls-files
|
108
|
+
files = `git ls-files`.
|
109
|
+
split("\n").
|
110
|
+
sort.
|
111
|
+
reject { |file| file =~ /^\./ }.
|
112
|
+
reject { |file| file =~ /^(rdoc|pkg)/ }.
|
113
|
+
map { |file| " #{file}" }.
|
114
|
+
join("\n")
|
115
|
+
|
116
|
+
# piece file back together and write
|
117
|
+
manifest = " s.files = %w[\n#{files}\n ]\n"
|
118
|
+
spec = [head, manifest, tail].join(" # = MANIFEST =\n")
|
119
|
+
File.open(gemspec_file, 'w') { |io| io.write(spec) }
|
120
|
+
puts "Updated #{gemspec_file}"
|
121
|
+
end
|
122
|
+
|
123
|
+
desc "Validate #{gemspec_file}"
|
124
|
+
task :validate do
|
125
|
+
libfiles = Dir['lib/*'] - ["lib/#{name}.rb", "lib/#{name}"]
|
126
|
+
unless libfiles.empty?
|
127
|
+
puts "Directory `lib` should only contain a `#{name}.rb` file and `#{name}` dir."
|
128
|
+
exit!
|
129
|
+
end
|
130
|
+
unless Dir['VERSION*'].empty?
|
131
|
+
puts "A `VERSION` file at root level violates Gem best practices."
|
132
|
+
exit!
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
@@ -0,0 +1,78 @@
|
|
1
|
+
## This is the rakegem gemspec template. Make sure you read and understand
|
2
|
+
## all of the comments. Some sections require modification, and others can
|
3
|
+
## be deleted if you don't need them. Once you understand the contents of
|
4
|
+
## this file, feel free to delete any comments that begin with two hash marks.
|
5
|
+
## You can find comprehensive Gem::Specification documentation, at
|
6
|
+
## http://docs.rubygems.org/read/chapter/20
|
7
|
+
Gem::Specification.new do |s|
|
8
|
+
s.specification_version = 2 if s.respond_to? :specification_version=
|
9
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
10
|
+
s.rubygems_version = '1.3.5'
|
11
|
+
|
12
|
+
## Leave these as is they will be modified for you by the rake gemspec task.
|
13
|
+
## If your rubyforge_project name is different, then edit it and comment out
|
14
|
+
## the sub! line in the Rakefile
|
15
|
+
s.name = 'email_reply_parser'
|
16
|
+
s.version = '0.2.0'
|
17
|
+
s.date = '2011-03-17'
|
18
|
+
s.rubyforge_project = 'email_reply_parser'
|
19
|
+
|
20
|
+
## Make sure your summary is short. The description may be as long
|
21
|
+
## as you like.
|
22
|
+
s.summary = "Short description used in Gem listings."
|
23
|
+
s.description = "Long description. Maybe copied from the README."
|
24
|
+
|
25
|
+
## List the primary authors. If there are a bunch of authors, it's probably
|
26
|
+
## better to set the email to an email list or something. If you don't have
|
27
|
+
## a custom homepage, consider using your GitHub URL or the like.
|
28
|
+
s.authors = ["Rick Olson"]
|
29
|
+
s.email = 'technoweenie@gmail.com'
|
30
|
+
s.homepage = 'http://github.com/github/email_reply_parser'
|
31
|
+
|
32
|
+
## This gets added to the $LOAD_PATH so that 'lib/NAME.rb' can be required as
|
33
|
+
## require 'NAME.rb' or'/lib/NAME/file.rb' can be as require 'NAME/file.rb'
|
34
|
+
s.require_paths = %w[lib]
|
35
|
+
|
36
|
+
## This sections is only necessary if you have C extensions.
|
37
|
+
#s.require_paths << 'ext'
|
38
|
+
#s.extensions = %w[ext/extconf.rb]
|
39
|
+
|
40
|
+
## If your gem includes any executables, list them here.
|
41
|
+
#s.executables = ["name"]
|
42
|
+
#s.default_executable = 'name'
|
43
|
+
|
44
|
+
## Specify any RDoc options here. You'll want to add your README and
|
45
|
+
## LICENSE files to the extra_rdoc_files list.
|
46
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
47
|
+
s.extra_rdoc_files = %w[README.md LICENSE]
|
48
|
+
|
49
|
+
## List your runtime dependencies here. Runtime dependencies are those
|
50
|
+
## that are needed for an end user to actually USE your code.
|
51
|
+
#s.add_dependency('DEPNAME', [">= 1.1.0", "< 2.0.0"])
|
52
|
+
|
53
|
+
## List your development dependencies here. Development dependencies are
|
54
|
+
## those that are only needed during development
|
55
|
+
#s.add_development_dependency('DEVDEPNAME', [">= 1.1.0", "< 2.0.0"])
|
56
|
+
|
57
|
+
## Leave this section as-is. It will be automatically generated from the
|
58
|
+
## contents of your Git repository via the gemspec task. DO NOT REMOVE
|
59
|
+
## THE MANIFEST COMMENTS, they are used as delimiters by the task.
|
60
|
+
# = MANIFEST =
|
61
|
+
s.files = %w[
|
62
|
+
LICENSE
|
63
|
+
README.md
|
64
|
+
Rakefile
|
65
|
+
email_reply_parser.gemspec
|
66
|
+
lib/email_reply_parser.rb
|
67
|
+
test/email_reply_parser_test.rb
|
68
|
+
test/emails/email_1_1.txt
|
69
|
+
test/emails/email_1_2.txt
|
70
|
+
test/emails/email_1_3.txt
|
71
|
+
]
|
72
|
+
# = MANIFEST =
|
73
|
+
|
74
|
+
## Test files will be grabbed from the file list. Make sure the path glob
|
75
|
+
## matches what you actually use.
|
76
|
+
s.test_files = s.files.select { |path| path =~ /^test\/.*_test\.rb/ }
|
77
|
+
end
|
78
|
+
|
@@ -0,0 +1,234 @@
|
|
1
|
+
require 'strscan'
|
2
|
+
|
3
|
+
# EmailReplyParser is a small library to parse plain text email content. The
|
4
|
+
# goal is to identify which fragments are quoted, part of a signature, or
|
5
|
+
# original body content. We want to support both top and bottom posters, so
|
6
|
+
# no simple "REPLY ABOVE HERE" content is used.
|
7
|
+
#
|
8
|
+
# Beyond RFC 5322 (which is handled by the [Ruby mail gem][mail]), there aren't
|
9
|
+
# any real standards for how emails are created. This attempts to parse out
|
10
|
+
# common conventions for things like replies:
|
11
|
+
#
|
12
|
+
# this is some text
|
13
|
+
#
|
14
|
+
# On <date>, <author> wrote:
|
15
|
+
# > blah blah
|
16
|
+
# > blah blah
|
17
|
+
#
|
18
|
+
# ... and signatures:
|
19
|
+
#
|
20
|
+
# this is some text
|
21
|
+
#
|
22
|
+
# --
|
23
|
+
# Bob
|
24
|
+
# http://homepage.com/~bob
|
25
|
+
#
|
26
|
+
# Each of these are parsed into Fragment objects.
|
27
|
+
#
|
28
|
+
# EmailReplyParser also attempts to figure out which of these blocks should
|
29
|
+
# be hidden from users.
|
30
|
+
#
|
31
|
+
# [mail]: https://github.com/mikel/mail
|
32
|
+
class EmailReplyParser
|
33
|
+
VERSION = "0.2.0"
|
34
|
+
|
35
|
+
# Splits an email body into a list of Fragments.
|
36
|
+
#
|
37
|
+
# text - A String email body.
|
38
|
+
#
|
39
|
+
# Returns an Email instance.
|
40
|
+
def self.read(text)
|
41
|
+
Email.new.read(text)
|
42
|
+
end
|
43
|
+
|
44
|
+
### Emails
|
45
|
+
|
46
|
+
# An Email instance represents a parsed body String.
|
47
|
+
class Email
|
48
|
+
# Emails have an Array of Fragments.
|
49
|
+
attr_reader :fragments
|
50
|
+
|
51
|
+
def initialize
|
52
|
+
@fragments = []
|
53
|
+
end
|
54
|
+
|
55
|
+
# Splits the given text into a list of Fragments. This is roughly done by
|
56
|
+
# reversing the text and parsing from the bottom to the top. This way we
|
57
|
+
# can check for 'On <date>, <author> wrote:' lines above quoted blocks.
|
58
|
+
#
|
59
|
+
# text - A String email body.
|
60
|
+
#
|
61
|
+
# Returns this same Email instance.
|
62
|
+
def read(text)
|
63
|
+
# The text is reversed initially due to the way we check for hidden
|
64
|
+
# fragments.
|
65
|
+
text.reverse!
|
66
|
+
|
67
|
+
# This determines if any 'visible' Fragment has been found. Once any
|
68
|
+
# visible Fragment is found, stop looking for hidden ones.
|
69
|
+
@found_visible = false
|
70
|
+
|
71
|
+
# This instance variable points to the current Fragment. If the matched
|
72
|
+
# line fits, it should be added to this Fragment. Otherwise, finish it
|
73
|
+
# and start a new Fragment.
|
74
|
+
@fragment = nil
|
75
|
+
|
76
|
+
# Use the StringScanner to pull out each line of the email content.
|
77
|
+
@scanner = StringScanner.new(text)
|
78
|
+
while line = @scanner.scan_until(/\n/)
|
79
|
+
scan_line(line)
|
80
|
+
end
|
81
|
+
|
82
|
+
# Be sure to parse the last line of the email.
|
83
|
+
if (last_line = @scanner.rest.to_s).size > 0
|
84
|
+
scan_line(last_line)
|
85
|
+
end
|
86
|
+
|
87
|
+
# Finish up the final fragment. Finishing a fragment will detect any
|
88
|
+
# attributes (hidden, signature, reply), and join each line into a
|
89
|
+
# string.
|
90
|
+
finish_fragment
|
91
|
+
|
92
|
+
@scanner = @fragment = nil
|
93
|
+
|
94
|
+
# Now that parsing is done, reverse the order.
|
95
|
+
@fragments.reverse!
|
96
|
+
self
|
97
|
+
end
|
98
|
+
|
99
|
+
private
|
100
|
+
EMPTY = "".freeze
|
101
|
+
|
102
|
+
### Line-by-Line Parsing
|
103
|
+
|
104
|
+
# Scans the given line of text and figures out which fragment it belongs
|
105
|
+
# to.
|
106
|
+
#
|
107
|
+
# line - A String line of text from the email.
|
108
|
+
#
|
109
|
+
# Returns nothing.
|
110
|
+
def scan_line(line)
|
111
|
+
line.chomp!("\n")
|
112
|
+
line.lstrip!
|
113
|
+
|
114
|
+
# We're looking for leading `>`'s to see if this line is part of a
|
115
|
+
# quoted Fragment.
|
116
|
+
line_levels = line =~ /(>+)$/ ? $1.size : 0
|
117
|
+
|
118
|
+
# Mark the current Fragment as a signature if the current line is empty
|
119
|
+
# and the Fragment starts with a common signature indicator.
|
120
|
+
if @fragment && line == EMPTY
|
121
|
+
if @fragment.lines.last =~ /[\-\_]$/
|
122
|
+
@fragment.signature = true
|
123
|
+
finish_fragment
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
# If the line matches the current fragment, add it. Note that a common
|
128
|
+
# reply header also counts as part of the quoted Fragment, even though
|
129
|
+
# it doesn't start with `>`.
|
130
|
+
if @fragment &&
|
131
|
+
((@fragment.quoted? != line_levels.zero?) ||
|
132
|
+
(@fragment.quoted? && quote_header?(line)))
|
133
|
+
@fragment.lines << line
|
134
|
+
|
135
|
+
# Otherwise, finish the fragment and start a new one.
|
136
|
+
else
|
137
|
+
finish_fragment
|
138
|
+
@fragment = Fragment.new(!line_levels.zero?, line)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
# Detects if a given line is a header above a quoted area. It is only
|
143
|
+
# checked for lines preceding quoted regions.
|
144
|
+
#
|
145
|
+
# line - A String line of text from the email.
|
146
|
+
#
|
147
|
+
# Returns true if the line is a valid header, or false.
|
148
|
+
def quote_header?(line)
|
149
|
+
line =~ /^:etorw.*nO$/
|
150
|
+
end
|
151
|
+
|
152
|
+
# Builds the fragment string and reverses it, after all lines have been
|
153
|
+
# added. It also checks to see if this Fragment is hidden. The hidden
|
154
|
+
# Fragment check reads from the bottom to the top.
|
155
|
+
#
|
156
|
+
# Any quoted Fragments or signature Fragments are marked hidden if they
|
157
|
+
# are below any visible Fragments. Visible Fragments are expected to
|
158
|
+
# contain original content by the author. If they are below a quoted
|
159
|
+
# Fragment, then the Fragment should be visible to give context to the
|
160
|
+
# reply.
|
161
|
+
#
|
162
|
+
# some original text (visible)
|
163
|
+
#
|
164
|
+
# > do you have any two's? (quoted, visible)
|
165
|
+
#
|
166
|
+
# Go fish! (visible)
|
167
|
+
#
|
168
|
+
# > --
|
169
|
+
# > Player 1 (quoted, hidden)
|
170
|
+
#
|
171
|
+
# --
|
172
|
+
# Player 2 (signature, hidden)
|
173
|
+
#
|
174
|
+
def finish_fragment
|
175
|
+
if @fragment
|
176
|
+
@fragment.finish
|
177
|
+
if !@found_visible
|
178
|
+
if @fragment.quoted? || @fragment.signature? ||
|
179
|
+
@fragment.to_s.strip == EMPTY
|
180
|
+
@fragment.hidden = true
|
181
|
+
else
|
182
|
+
@found_visible = true
|
183
|
+
end
|
184
|
+
end
|
185
|
+
@fragments << @fragment
|
186
|
+
end
|
187
|
+
@fragment = nil
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
### Fragments
|
192
|
+
|
193
|
+
# Represents a group of paragraphs in the email sharing common attributes.
|
194
|
+
# Paragraphs should get their own fragment if they are a quoted area or a
|
195
|
+
# signature.
|
196
|
+
class Fragment < Struct.new(:quoted, :signature, :hidden)
|
197
|
+
# This is an Array of String lines of content. Since the content is
|
198
|
+
# reversed, this array is backwards, and contains reversed strings.
|
199
|
+
attr_reader :lines,
|
200
|
+
|
201
|
+
# This is reserved for the joined String that is build when this Fragment
|
202
|
+
# is finished.
|
203
|
+
:content
|
204
|
+
|
205
|
+
def initialize(quoted, first_line)
|
206
|
+
self.signature = self.hidden = false
|
207
|
+
self.quoted = quoted
|
208
|
+
@lines = [first_line]
|
209
|
+
@content = nil
|
210
|
+
@lines.compact!
|
211
|
+
end
|
212
|
+
|
213
|
+
alias quoted? quoted
|
214
|
+
alias signature? signature
|
215
|
+
alias hidden? hidden
|
216
|
+
|
217
|
+
# Builds the string content by joining the lines and reversing them.
|
218
|
+
#
|
219
|
+
# Returns nothing.
|
220
|
+
def finish
|
221
|
+
@content = @lines.join("\n")
|
222
|
+
@lines = nil
|
223
|
+
@content.reverse!
|
224
|
+
end
|
225
|
+
|
226
|
+
def to_s
|
227
|
+
@content
|
228
|
+
end
|
229
|
+
|
230
|
+
def inspect
|
231
|
+
to_s.inspect
|
232
|
+
end
|
233
|
+
end
|
234
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'test/unit'
|
3
|
+
require 'pathname'
|
4
|
+
require 'pp'
|
5
|
+
|
6
|
+
dir = Pathname.new File.expand_path(File.dirname(__FILE__))
|
7
|
+
require dir + '..' + 'lib' + 'email_reply_parser'
|
8
|
+
|
9
|
+
EMAIL_FIXTURE_PATH = dir + 'emails'
|
10
|
+
|
11
|
+
class EmailReplyParserTest < Test::Unit::TestCase
|
12
|
+
def test_reads_simple_body
|
13
|
+
reply = email(:email_1_1)
|
14
|
+
assert_equal 3, reply.fragments.size
|
15
|
+
|
16
|
+
assert reply.fragments.none? { |f| f.quoted? }
|
17
|
+
assert_equal [false, true, true],
|
18
|
+
reply.fragments.map { |f| f.signature? }
|
19
|
+
assert_equal [false, true, true],
|
20
|
+
reply.fragments.map { |f| f.hidden? }
|
21
|
+
|
22
|
+
assert_equal "Hi folks
|
23
|
+
|
24
|
+
What is the best way to clear a Riak bucket of all key, values after
|
25
|
+
running a test?
|
26
|
+
I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
|
27
|
+
|
28
|
+
assert_equal "-Abhishek Kona\n\n", reply.fragments[1].to_s
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_reads_top_post
|
32
|
+
reply = email(:email_1_3)
|
33
|
+
assert_equal 5, reply.fragments.size
|
34
|
+
|
35
|
+
assert_equal [false, false, true, false, false],
|
36
|
+
reply.fragments.map { |f| f.quoted? }
|
37
|
+
assert_equal [false, true, true, true, true],
|
38
|
+
reply.fragments.map { |f| f.hidden? }
|
39
|
+
assert_equal [false, true, false, false, true],
|
40
|
+
reply.fragments.map { |f| f.signature? }
|
41
|
+
|
42
|
+
assert_match /^Oh thanks.\n\nHaving/, reply.fragments[0].to_s
|
43
|
+
assert_match /^-A/, reply.fragments[1].to_s
|
44
|
+
assert_match /^On [^\:]+\:/, reply.fragments[2].to_s
|
45
|
+
assert_match /^_/, reply.fragments[4].to_s
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_reads_bottom_post
|
49
|
+
reply = email(:email_1_2)
|
50
|
+
assert_equal 6, reply.fragments.size
|
51
|
+
|
52
|
+
assert_equal [false, true, false, true, false, false],
|
53
|
+
reply.fragments.map { |f| f.quoted? }
|
54
|
+
assert_equal [false, false, false, false, false, true],
|
55
|
+
reply.fragments.map { |f| f.signature? }
|
56
|
+
assert_equal [false, false, false, true, true, true],
|
57
|
+
reply.fragments.map { |f| f.hidden? }
|
58
|
+
|
59
|
+
assert_equal "Hi,", reply.fragments[0].to_s
|
60
|
+
assert_match /^On [^\:]+\:/, reply.fragments[1].to_s
|
61
|
+
assert_match /^You can list/, reply.fragments[2].to_s
|
62
|
+
assert_match /^> /, reply.fragments[3].to_s
|
63
|
+
assert_match /^_/, reply.fragments[5].to_s
|
64
|
+
end
|
65
|
+
|
66
|
+
def email(name)
|
67
|
+
body = IO.read EMAIL_FIXTURE_PATH.join("#{name}.txt").to_s
|
68
|
+
EmailReplyParser.read body
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
Hi folks
|
2
|
+
|
3
|
+
What is the best way to clear a Riak bucket of all key, values after
|
4
|
+
running a test?
|
5
|
+
I am currently using the Java HTTP API.
|
6
|
+
|
7
|
+
-Abhishek Kona
|
8
|
+
|
9
|
+
|
10
|
+
_______________________________________________
|
11
|
+
riak-users mailing list
|
12
|
+
riak-users@lists.basho.com
|
13
|
+
http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com
|
@@ -0,0 +1,51 @@
|
|
1
|
+
Hi,
|
2
|
+
On Tue, 2011-03-01 at 18:02 +0530, Abhishek Kona wrote:
|
3
|
+
> Hi folks
|
4
|
+
>
|
5
|
+
> What is the best way to clear a Riak bucket of all key, values after
|
6
|
+
> running a test?
|
7
|
+
> I am currently using the Java HTTP API.
|
8
|
+
|
9
|
+
You can list the keys for the bucket and call delete for each. Or if you
|
10
|
+
put the keys (and kept track of them in your test) you can delete them
|
11
|
+
one at a time (without incurring the cost of calling list first.)
|
12
|
+
|
13
|
+
Something like:
|
14
|
+
|
15
|
+
String bucket = "my_bucket";
|
16
|
+
BucketResponse bucketResponse = riakClient.listBucket(bucket);
|
17
|
+
RiakBucketInfo bucketInfo = bucketResponse.getBucketInfo();
|
18
|
+
|
19
|
+
for(String key : bucketInfo.getKeys()) {
|
20
|
+
riakClient.delete(bucket, key);
|
21
|
+
}
|
22
|
+
|
23
|
+
|
24
|
+
would do it.
|
25
|
+
|
26
|
+
See also
|
27
|
+
|
28
|
+
http://wiki.basho.com/REST-API.html#Bucket-operations
|
29
|
+
|
30
|
+
which says
|
31
|
+
|
32
|
+
"At the moment there is no straightforward way to delete an entire
|
33
|
+
Bucket. There is, however, an open ticket for the feature. To delete all
|
34
|
+
the keys in a bucket, you’ll need to delete them all individually."
|
35
|
+
|
36
|
+
>
|
37
|
+
> -Abhishek Kona
|
38
|
+
>
|
39
|
+
>
|
40
|
+
> _______________________________________________
|
41
|
+
> riak-users mailing list
|
42
|
+
> riak-users@lists.basho.com
|
43
|
+
> http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
|
48
|
+
_______________________________________________
|
49
|
+
riak-users mailing list
|
50
|
+
riak-users@lists.basho.com
|
51
|
+
http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com
|
@@ -0,0 +1,55 @@
|
|
1
|
+
Oh thanks.
|
2
|
+
|
3
|
+
Having the function would be great.
|
4
|
+
|
5
|
+
-Abhishek Kona
|
6
|
+
|
7
|
+
On 01/03/11 7:07 PM, Russell Brown wrote:
|
8
|
+
> Hi,
|
9
|
+
> On Tue, 2011-03-01 at 18:02 +0530, Abhishek Kona wrote:
|
10
|
+
>> Hi folks
|
11
|
+
>>
|
12
|
+
>> What is the best way to clear a Riak bucket of all key, values after
|
13
|
+
>> running a test?
|
14
|
+
>> I am currently using the Java HTTP API.
|
15
|
+
> You can list the keys for the bucket and call delete for each. Or if you
|
16
|
+
> put the keys (and kept track of them in your test) you can delete them
|
17
|
+
> one at a time (without incurring the cost of calling list first.)
|
18
|
+
>
|
19
|
+
> Something like:
|
20
|
+
>
|
21
|
+
> String bucket = "my_bucket";
|
22
|
+
> BucketResponse bucketResponse = riakClient.listBucket(bucket);
|
23
|
+
> RiakBucketInfo bucketInfo = bucketResponse.getBucketInfo();
|
24
|
+
>
|
25
|
+
> for(String key : bucketInfo.getKeys()) {
|
26
|
+
> riakClient.delete(bucket, key);
|
27
|
+
> }
|
28
|
+
>
|
29
|
+
>
|
30
|
+
> would do it.
|
31
|
+
>
|
32
|
+
> See also
|
33
|
+
>
|
34
|
+
> http://wiki.basho.com/REST-API.html#Bucket-operations
|
35
|
+
>
|
36
|
+
> which says
|
37
|
+
>
|
38
|
+
> "At the moment there is no straightforward way to delete an entire
|
39
|
+
> Bucket. There is, however, an open ticket for the feature. To delete all
|
40
|
+
> the keys in a bucket, you’ll need to delete them all individually."
|
41
|
+
>
|
42
|
+
>> -Abhishek Kona
|
43
|
+
>>
|
44
|
+
>>
|
45
|
+
>> _______________________________________________
|
46
|
+
>> riak-users mailing list
|
47
|
+
>> riak-users@lists.basho.com
|
48
|
+
>> http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com
|
49
|
+
>
|
50
|
+
|
51
|
+
|
52
|
+
_______________________________________________
|
53
|
+
riak-users mailing list
|
54
|
+
riak-users@lists.basho.com
|
55
|
+
http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com
|
metadata
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: email_reply_parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 23
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 2
|
9
|
+
- 0
|
10
|
+
version: 0.2.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Rick Olson
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-03-17 00:00:00 -07:00
|
19
|
+
default_executable:
|
20
|
+
dependencies: []
|
21
|
+
|
22
|
+
description: Long description. Maybe copied from the README.
|
23
|
+
email: technoweenie@gmail.com
|
24
|
+
executables: []
|
25
|
+
|
26
|
+
extensions: []
|
27
|
+
|
28
|
+
extra_rdoc_files:
|
29
|
+
- README.md
|
30
|
+
- LICENSE
|
31
|
+
files:
|
32
|
+
- LICENSE
|
33
|
+
- README.md
|
34
|
+
- Rakefile
|
35
|
+
- email_reply_parser.gemspec
|
36
|
+
- lib/email_reply_parser.rb
|
37
|
+
- test/email_reply_parser_test.rb
|
38
|
+
- test/emails/email_1_1.txt
|
39
|
+
- test/emails/email_1_2.txt
|
40
|
+
- test/emails/email_1_3.txt
|
41
|
+
has_rdoc: true
|
42
|
+
homepage: http://github.com/github/email_reply_parser
|
43
|
+
licenses: []
|
44
|
+
|
45
|
+
post_install_message:
|
46
|
+
rdoc_options:
|
47
|
+
- --charset=UTF-8
|
48
|
+
require_paths:
|
49
|
+
- lib
|
50
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
51
|
+
none: false
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
hash: 3
|
56
|
+
segments:
|
57
|
+
- 0
|
58
|
+
version: "0"
|
59
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
60
|
+
none: false
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
hash: 3
|
65
|
+
segments:
|
66
|
+
- 0
|
67
|
+
version: "0"
|
68
|
+
requirements: []
|
69
|
+
|
70
|
+
rubyforge_project: email_reply_parser
|
71
|
+
rubygems_version: 1.3.7
|
72
|
+
signing_key:
|
73
|
+
specification_version: 2
|
74
|
+
summary: Short description used in Gem listings.
|
75
|
+
test_files:
|
76
|
+
- test/email_reply_parser_test.rb
|