mail_extract 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/.travis.yml +4 -0
- data/Gemfile +3 -0
- data/lib/mail_extract.rb +2 -2
- data/lib/mail_extract/line.rb +13 -8
- data/lib/mail_extract/parser.rb +26 -5
- data/lib/mail_extract/version.rb +1 -1
- data/mail_extract.gemspec +1 -0
- data/spec/fixtures/iphone.txt +3 -0
- data/spec/fixtures/iphone_with_quotes.txt +18 -0
- data/spec/line_spec.rb +6 -1
- data/spec/parser_spec.rb +13 -3
- data/spec/spec_helper.rb +4 -0
- metadata +61 -27
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/lib/mail_extract.rb
CHANGED
data/lib/mail_extract/line.rb
CHANGED
@@ -1,17 +1,20 @@
|
|
1
1
|
module MailExtract
|
2
2
|
class Line
|
3
|
-
attr_reader :body, :type
|
3
|
+
attr_reader :body, :type, :subtype
|
4
4
|
|
5
5
|
PATTERNS = {
|
6
|
-
/^[>]+\s?/
|
7
|
-
/^--/
|
8
|
-
/^-- /
|
9
|
-
/^[_]{2,}\n?/
|
10
|
-
/^[-]{2,}\n?/
|
6
|
+
/^[>]+\s?/ => :quote,
|
7
|
+
/^--/ => :signature,
|
8
|
+
/^-- / => :signature,
|
9
|
+
/^[_]{2,}\n?/ => :signature,
|
10
|
+
/^[-]{2,}\n?/ => :signature,
|
11
|
+
/^sent from my (iphone|ipad)/i => :signature
|
11
12
|
}
|
12
13
|
|
13
14
|
def initialize(str)
|
14
|
-
@body
|
15
|
+
@body = str
|
16
|
+
@subtype = :none
|
17
|
+
|
15
18
|
detect_type(str)
|
16
19
|
end
|
17
20
|
|
@@ -38,11 +41,13 @@ module MailExtract
|
|
38
41
|
def detect_type(line)
|
39
42
|
# Detects the start line of quote text
|
40
43
|
if line.strip =~ /^On\s/i && line =~ /at [\d:]+/ || line.strip =~ />? wrote:\z/
|
41
|
-
@type
|
44
|
+
@type = :quote
|
45
|
+
@subtype = :start
|
42
46
|
return
|
43
47
|
end
|
44
48
|
|
45
49
|
@type = :text
|
50
|
+
|
46
51
|
PATTERNS.each_pair do |p,t|
|
47
52
|
if line =~ p
|
48
53
|
@type = t
|
data/lib/mail_extract/parser.rb
CHANGED
@@ -6,14 +6,20 @@ module MailExtract
|
|
6
6
|
|
7
7
|
# Initialize a new MailExtract::Parser object
|
8
8
|
#
|
9
|
-
# text
|
9
|
+
# text - Email message body
|
10
|
+
# options - Parsing options
|
10
11
|
#
|
11
|
-
|
12
|
+
# Parsing options include:
|
13
|
+
# :only_head - Skip the rest of the message after quote start (default: false)
|
14
|
+
#
|
15
|
+
def initialize(text, options={})
|
12
16
|
@lines = []
|
13
17
|
@text = text.strip
|
14
18
|
@body = ""
|
15
19
|
@last_type = :text
|
16
20
|
@type = :text
|
21
|
+
@options = options
|
22
|
+
|
17
23
|
parse
|
18
24
|
end
|
19
25
|
|
@@ -22,13 +28,25 @@ module MailExtract
|
|
22
28
|
# Process email message body
|
23
29
|
#
|
24
30
|
def parse
|
31
|
+
break_after_quote = @options[:only_head] || false
|
25
32
|
scanner = StringScanner.new(@text)
|
33
|
+
|
34
|
+
# Process until message end
|
26
35
|
while str = scanner.scan_until(/\n/)
|
27
|
-
parse_line(str)
|
36
|
+
line = parse_line(str)
|
37
|
+
|
38
|
+
if break_after_quote
|
39
|
+
break if line.quote? && line.subtype == :start
|
40
|
+
end
|
28
41
|
end
|
29
|
-
|
30
|
-
|
42
|
+
|
43
|
+
# Process the rest (if any)
|
44
|
+
if !break_after_quote && @last_type != :quote
|
45
|
+
if (last_line = scanner.rest.to_s).size > 0
|
46
|
+
parse_line(last_line)
|
47
|
+
end
|
31
48
|
end
|
49
|
+
|
32
50
|
@body = @lines.join("\n").strip
|
33
51
|
end
|
34
52
|
|
@@ -36,6 +54,7 @@ module MailExtract
|
|
36
54
|
#
|
37
55
|
def parse_line(str)
|
38
56
|
line = MailExtract::Line.new(str)
|
57
|
+
|
39
58
|
if line.quote?
|
40
59
|
if @last_type == :text ; @type = :quote ; end
|
41
60
|
elsif line.text?
|
@@ -47,6 +66,8 @@ module MailExtract
|
|
47
66
|
end
|
48
67
|
@last_type = line.type
|
49
68
|
@lines << line.body.strip if @type == :text
|
69
|
+
|
70
|
+
line
|
50
71
|
end
|
51
72
|
end
|
52
73
|
end
|
data/lib/mail_extract/version.rb
CHANGED
data/mail_extract.gemspec
CHANGED
@@ -0,0 +1,18 @@
|
|
1
|
+
|
2
|
+
Primary reply content
|
3
|
+
|
4
|
+
--
|
5
|
+
I take full responsibility for any typos and refuse to blame them on my
|
6
|
+
cellphone
|
7
|
+
|
8
|
+
On Aug 12, 2011, at 6:00 AM, Robot <noreply@foobar.com> wrote:
|
9
|
+
|
10
|
+
Hi Dude,
|
11
|
+
|
12
|
+
Some text goes here, with no quotes
|
13
|
+
Blah blah blah
|
14
|
+
Blah!
|
15
|
+
Blah!!!!
|
16
|
+
|
17
|
+
Thanks,
|
18
|
+
Whatever
|
data/spec/line_spec.rb
CHANGED
@@ -7,12 +7,15 @@ describe 'MailExtract::Line' do
|
|
7
7
|
|
8
8
|
it 'detects quote start by date' do
|
9
9
|
line('On Tue, 2011-03-01 at 18:02 +0530, somebody wrote:').type.should == :quote
|
10
|
+
line('On Tue, 2011-03-01 at 18:02 +0530, somebody wrote:').subtype.should == :start
|
10
11
|
line('On 2011-03-01 at 18:02 somebody wrote').type.should == :quote
|
12
|
+
line('On 2011-03-01 at 18:02 somebody wrote').subtype.should == :start
|
11
13
|
line('On some day somebody wrote').type.should == :text
|
12
14
|
end
|
13
15
|
|
14
16
|
it 'detects quote' do
|
15
17
|
line('> this is a quote').type.should == :quote
|
18
|
+
line('> this is a quote').subtype.should == :none
|
16
19
|
line('> >> this is a quote').type.should == :quote
|
17
20
|
end
|
18
21
|
|
@@ -20,7 +23,9 @@ describe 'MailExtract::Line' do
|
|
20
23
|
lines = [
|
21
24
|
"--\nUsername",
|
22
25
|
"-- \nUsername",
|
23
|
-
"_______\nSome text"
|
26
|
+
"_______\nSome text",
|
27
|
+
"Sent from my iPhone",
|
28
|
+
"Sent from my iPad"
|
24
29
|
]
|
25
30
|
|
26
31
|
lines.each do |l|
|
data/spec/parser_spec.rb
CHANGED
@@ -2,17 +2,27 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe 'MailExtract::Parser' do
|
4
4
|
it 'parses an email' do
|
5
|
-
body =
|
5
|
+
body = parse_fixture('simple.txt')
|
6
6
|
body.should == result_fixture('simple.txt')
|
7
7
|
end
|
8
8
|
|
9
9
|
it 'parses an email with quotes' do
|
10
|
-
body =
|
10
|
+
body = parse_fixture('simple_with_quotes.txt')
|
11
11
|
body.should == result_fixture('simple_with_quotes.txt')
|
12
12
|
end
|
13
13
|
|
14
14
|
it 'parses a reply email with broken authored line' do
|
15
|
-
body =
|
15
|
+
body = parse_fixture('reply_with_quotes.txt')
|
16
16
|
body.should == 'This is a first line of the message'
|
17
17
|
end
|
18
|
+
|
19
|
+
it 'parses a message send via iphone' do
|
20
|
+
body = parse_fixture('iphone.txt')
|
21
|
+
body.should == 'This is a shit i sent from my iphone'
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'parses a reply sent via iphone' do
|
25
|
+
body = MailExtract.new(fixture('iphone_with_quotes.txt'), :only_head => true).body
|
26
|
+
body.should == 'Primary reply content'
|
27
|
+
end
|
18
28
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,35 +1,53 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: mail_extract
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.2
|
3
|
+
version: !ruby/object:Gem::Version
|
5
4
|
prerelease:
|
5
|
+
version: 0.1.3
|
6
6
|
platform: ruby
|
7
|
-
authors:
|
7
|
+
authors:
|
8
8
|
- Dan Sosedoff
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
|
13
|
-
|
14
|
-
|
12
|
+
|
13
|
+
date: 2011-08-12 00:00:00 -05:00
|
14
|
+
default_executable:
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: rake
|
18
|
+
prerelease: false
|
19
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
20
|
+
none: false
|
21
|
+
requirements:
|
22
|
+
- - ">="
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: "0"
|
25
|
+
type: :development
|
26
|
+
version_requirements: *id001
|
27
|
+
- !ruby/object:Gem::Dependency
|
15
28
|
name: rspec
|
16
|
-
|
29
|
+
prerelease: false
|
30
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
17
31
|
none: false
|
18
|
-
requirements:
|
32
|
+
requirements:
|
19
33
|
- - ~>
|
20
|
-
- !ruby/object:Gem::Version
|
21
|
-
version:
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: "2.6"
|
22
36
|
type: :development
|
23
|
-
|
24
|
-
version_requirements: *72663040
|
37
|
+
version_requirements: *id002
|
25
38
|
description: Email body parser that strips out all quotes and signatures.
|
26
39
|
email: dan.sosedoff@gmail.com
|
27
40
|
executables: []
|
41
|
+
|
28
42
|
extensions: []
|
43
|
+
|
29
44
|
extra_rdoc_files: []
|
30
|
-
|
45
|
+
|
46
|
+
files:
|
31
47
|
- .gitignore
|
32
48
|
- .rspec
|
49
|
+
- .travis.yml
|
50
|
+
- Gemfile
|
33
51
|
- README.md
|
34
52
|
- Rakefile
|
35
53
|
- lib/mail_extract.rb
|
@@ -37,6 +55,8 @@ files:
|
|
37
55
|
- lib/mail_extract/parser.rb
|
38
56
|
- lib/mail_extract/version.rb
|
39
57
|
- mail_extract.gemspec
|
58
|
+
- spec/fixtures/iphone.txt
|
59
|
+
- spec/fixtures/iphone_with_quotes.txt
|
40
60
|
- spec/fixtures/reply_with_quotes.txt
|
41
61
|
- spec/fixtures/result_simple.txt
|
42
62
|
- spec/fixtures/result_simple_with_quotes.txt
|
@@ -45,28 +65,42 @@ files:
|
|
45
65
|
- spec/line_spec.rb
|
46
66
|
- spec/parser_spec.rb
|
47
67
|
- spec/spec_helper.rb
|
68
|
+
has_rdoc: true
|
48
69
|
homepage: https://github.com/sosedoff/mail_extract
|
49
70
|
licenses: []
|
71
|
+
|
50
72
|
post_install_message:
|
51
73
|
rdoc_options: []
|
52
|
-
|
74
|
+
|
75
|
+
require_paths:
|
53
76
|
- lib
|
54
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
77
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
55
78
|
none: false
|
56
|
-
requirements:
|
57
|
-
- -
|
58
|
-
- !ruby/object:Gem::Version
|
59
|
-
version:
|
60
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: "0"
|
83
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
61
84
|
none: false
|
62
|
-
requirements:
|
63
|
-
- -
|
64
|
-
- !ruby/object:Gem::Version
|
65
|
-
version:
|
85
|
+
requirements:
|
86
|
+
- - ">="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: "0"
|
66
89
|
requirements: []
|
90
|
+
|
67
91
|
rubyforge_project:
|
68
|
-
rubygems_version: 1.
|
92
|
+
rubygems_version: 1.6.2
|
69
93
|
signing_key:
|
70
94
|
specification_version: 3
|
71
95
|
summary: Extracts email message body
|
72
|
-
test_files:
|
96
|
+
test_files:
|
97
|
+
- spec/fixtures/iphone.txt
|
98
|
+
- spec/fixtures/iphone_with_quotes.txt
|
99
|
+
- spec/fixtures/reply_with_quotes.txt
|
100
|
+
- spec/fixtures/result_simple.txt
|
101
|
+
- spec/fixtures/result_simple_with_quotes.txt
|
102
|
+
- spec/fixtures/simple.txt
|
103
|
+
- spec/fixtures/simple_with_quotes.txt
|
104
|
+
- spec/line_spec.rb
|
105
|
+
- spec/parser_spec.rb
|
106
|
+
- spec/spec_helper.rb
|