mail_extract 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.travis.yml +4 -0
- data/Gemfile +3 -0
- data/lib/mail_extract.rb +2 -2
- data/lib/mail_extract/line.rb +13 -8
- data/lib/mail_extract/parser.rb +26 -5
- data/lib/mail_extract/version.rb +1 -1
- data/mail_extract.gemspec +1 -0
- data/spec/fixtures/iphone.txt +3 -0
- data/spec/fixtures/iphone_with_quotes.txt +18 -0
- data/spec/line_spec.rb +6 -1
- data/spec/parser_spec.rb +13 -3
- data/spec/spec_helper.rb +4 -0
- metadata +61 -27
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/lib/mail_extract.rb
CHANGED
data/lib/mail_extract/line.rb
CHANGED
@@ -1,17 +1,20 @@
|
|
1
1
|
module MailExtract
|
2
2
|
class Line
|
3
|
-
attr_reader :body, :type
|
3
|
+
attr_reader :body, :type, :subtype
|
4
4
|
|
5
5
|
PATTERNS = {
|
6
|
-
/^[>]+\s?/
|
7
|
-
/^--/
|
8
|
-
/^-- /
|
9
|
-
/^[_]{2,}\n?/
|
10
|
-
/^[-]{2,}\n?/
|
6
|
+
/^[>]+\s?/ => :quote,
|
7
|
+
/^--/ => :signature,
|
8
|
+
/^-- / => :signature,
|
9
|
+
/^[_]{2,}\n?/ => :signature,
|
10
|
+
/^[-]{2,}\n?/ => :signature,
|
11
|
+
/^sent from my (iphone|ipad)/i => :signature
|
11
12
|
}
|
12
13
|
|
13
14
|
def initialize(str)
|
14
|
-
@body
|
15
|
+
@body = str
|
16
|
+
@subtype = :none
|
17
|
+
|
15
18
|
detect_type(str)
|
16
19
|
end
|
17
20
|
|
@@ -38,11 +41,13 @@ module MailExtract
|
|
38
41
|
def detect_type(line)
|
39
42
|
# Detects the start line of quote text
|
40
43
|
if line.strip =~ /^On\s/i && line =~ /at [\d:]+/ || line.strip =~ />? wrote:\z/
|
41
|
-
@type
|
44
|
+
@type = :quote
|
45
|
+
@subtype = :start
|
42
46
|
return
|
43
47
|
end
|
44
48
|
|
45
49
|
@type = :text
|
50
|
+
|
46
51
|
PATTERNS.each_pair do |p,t|
|
47
52
|
if line =~ p
|
48
53
|
@type = t
|
data/lib/mail_extract/parser.rb
CHANGED
@@ -6,14 +6,20 @@ module MailExtract
|
|
6
6
|
|
7
7
|
# Initialize a new MailExtract::Parser object
|
8
8
|
#
|
9
|
-
# text
|
9
|
+
# text - Email message body
|
10
|
+
# options - Parsing options
|
10
11
|
#
|
11
|
-
|
12
|
+
# Parsing options include:
|
13
|
+
# :only_head - Skip the rest of the message after quote start (default: false)
|
14
|
+
#
|
15
|
+
def initialize(text, options={})
|
12
16
|
@lines = []
|
13
17
|
@text = text.strip
|
14
18
|
@body = ""
|
15
19
|
@last_type = :text
|
16
20
|
@type = :text
|
21
|
+
@options = options
|
22
|
+
|
17
23
|
parse
|
18
24
|
end
|
19
25
|
|
@@ -22,13 +28,25 @@ module MailExtract
|
|
22
28
|
# Process email message body
|
23
29
|
#
|
24
30
|
def parse
|
31
|
+
break_after_quote = @options[:only_head] || false
|
25
32
|
scanner = StringScanner.new(@text)
|
33
|
+
|
34
|
+
# Process until message end
|
26
35
|
while str = scanner.scan_until(/\n/)
|
27
|
-
parse_line(str)
|
36
|
+
line = parse_line(str)
|
37
|
+
|
38
|
+
if break_after_quote
|
39
|
+
break if line.quote? && line.subtype == :start
|
40
|
+
end
|
28
41
|
end
|
29
|
-
|
30
|
-
|
42
|
+
|
43
|
+
# Process the rest (if any)
|
44
|
+
if !break_after_quote && @last_type != :quote
|
45
|
+
if (last_line = scanner.rest.to_s).size > 0
|
46
|
+
parse_line(last_line)
|
47
|
+
end
|
31
48
|
end
|
49
|
+
|
32
50
|
@body = @lines.join("\n").strip
|
33
51
|
end
|
34
52
|
|
@@ -36,6 +54,7 @@ module MailExtract
|
|
36
54
|
#
|
37
55
|
def parse_line(str)
|
38
56
|
line = MailExtract::Line.new(str)
|
57
|
+
|
39
58
|
if line.quote?
|
40
59
|
if @last_type == :text ; @type = :quote ; end
|
41
60
|
elsif line.text?
|
@@ -47,6 +66,8 @@ module MailExtract
|
|
47
66
|
end
|
48
67
|
@last_type = line.type
|
49
68
|
@lines << line.body.strip if @type == :text
|
69
|
+
|
70
|
+
line
|
50
71
|
end
|
51
72
|
end
|
52
73
|
end
|
data/lib/mail_extract/version.rb
CHANGED
data/mail_extract.gemspec
CHANGED
@@ -0,0 +1,18 @@
|
|
1
|
+
|
2
|
+
Primary reply content
|
3
|
+
|
4
|
+
--
|
5
|
+
I take full responsibility for any typos and refuse to blame them on my
|
6
|
+
cellphone
|
7
|
+
|
8
|
+
On Aug 12, 2011, at 6:00 AM, Robot <noreply@foobar.com> wrote:
|
9
|
+
|
10
|
+
Hi Dude,
|
11
|
+
|
12
|
+
Some text goes here, with no quotes
|
13
|
+
Blah blah blah
|
14
|
+
Blah!
|
15
|
+
Blah!!!!
|
16
|
+
|
17
|
+
Thanks,
|
18
|
+
Whatever
|
data/spec/line_spec.rb
CHANGED
@@ -7,12 +7,15 @@ describe 'MailExtract::Line' do
|
|
7
7
|
|
8
8
|
it 'detects quote start by date' do
|
9
9
|
line('On Tue, 2011-03-01 at 18:02 +0530, somebody wrote:').type.should == :quote
|
10
|
+
line('On Tue, 2011-03-01 at 18:02 +0530, somebody wrote:').subtype.should == :start
|
10
11
|
line('On 2011-03-01 at 18:02 somebody wrote').type.should == :quote
|
12
|
+
line('On 2011-03-01 at 18:02 somebody wrote').subtype.should == :start
|
11
13
|
line('On some day somebody wrote').type.should == :text
|
12
14
|
end
|
13
15
|
|
14
16
|
it 'detects quote' do
|
15
17
|
line('> this is a quote').type.should == :quote
|
18
|
+
line('> this is a quote').subtype.should == :none
|
16
19
|
line('> >> this is a quote').type.should == :quote
|
17
20
|
end
|
18
21
|
|
@@ -20,7 +23,9 @@ describe 'MailExtract::Line' do
|
|
20
23
|
lines = [
|
21
24
|
"--\nUsername",
|
22
25
|
"-- \nUsername",
|
23
|
-
"_______\nSome text"
|
26
|
+
"_______\nSome text",
|
27
|
+
"Sent from my iPhone",
|
28
|
+
"Sent from my iPad"
|
24
29
|
]
|
25
30
|
|
26
31
|
lines.each do |l|
|
data/spec/parser_spec.rb
CHANGED
@@ -2,17 +2,27 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe 'MailExtract::Parser' do
|
4
4
|
it 'parses an email' do
|
5
|
-
body =
|
5
|
+
body = parse_fixture('simple.txt')
|
6
6
|
body.should == result_fixture('simple.txt')
|
7
7
|
end
|
8
8
|
|
9
9
|
it 'parses an email with quotes' do
|
10
|
-
body =
|
10
|
+
body = parse_fixture('simple_with_quotes.txt')
|
11
11
|
body.should == result_fixture('simple_with_quotes.txt')
|
12
12
|
end
|
13
13
|
|
14
14
|
it 'parses a reply email with broken authored line' do
|
15
|
-
body =
|
15
|
+
body = parse_fixture('reply_with_quotes.txt')
|
16
16
|
body.should == 'This is a first line of the message'
|
17
17
|
end
|
18
|
+
|
19
|
+
it 'parses a message send via iphone' do
|
20
|
+
body = parse_fixture('iphone.txt')
|
21
|
+
body.should == 'This is a shit i sent from my iphone'
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'parses a reply sent via iphone' do
|
25
|
+
body = MailExtract.new(fixture('iphone_with_quotes.txt'), :only_head => true).body
|
26
|
+
body.should == 'Primary reply content'
|
27
|
+
end
|
18
28
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,35 +1,53 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: mail_extract
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.2
|
3
|
+
version: !ruby/object:Gem::Version
|
5
4
|
prerelease:
|
5
|
+
version: 0.1.3
|
6
6
|
platform: ruby
|
7
|
-
authors:
|
7
|
+
authors:
|
8
8
|
- Dan Sosedoff
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
|
13
|
-
|
14
|
-
|
12
|
+
|
13
|
+
date: 2011-08-12 00:00:00 -05:00
|
14
|
+
default_executable:
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: rake
|
18
|
+
prerelease: false
|
19
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
20
|
+
none: false
|
21
|
+
requirements:
|
22
|
+
- - ">="
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: "0"
|
25
|
+
type: :development
|
26
|
+
version_requirements: *id001
|
27
|
+
- !ruby/object:Gem::Dependency
|
15
28
|
name: rspec
|
16
|
-
|
29
|
+
prerelease: false
|
30
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
17
31
|
none: false
|
18
|
-
requirements:
|
32
|
+
requirements:
|
19
33
|
- - ~>
|
20
|
-
- !ruby/object:Gem::Version
|
21
|
-
version:
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: "2.6"
|
22
36
|
type: :development
|
23
|
-
|
24
|
-
version_requirements: *72663040
|
37
|
+
version_requirements: *id002
|
25
38
|
description: Email body parser that strips out all quotes and signatures.
|
26
39
|
email: dan.sosedoff@gmail.com
|
27
40
|
executables: []
|
41
|
+
|
28
42
|
extensions: []
|
43
|
+
|
29
44
|
extra_rdoc_files: []
|
30
|
-
|
45
|
+
|
46
|
+
files:
|
31
47
|
- .gitignore
|
32
48
|
- .rspec
|
49
|
+
- .travis.yml
|
50
|
+
- Gemfile
|
33
51
|
- README.md
|
34
52
|
- Rakefile
|
35
53
|
- lib/mail_extract.rb
|
@@ -37,6 +55,8 @@ files:
|
|
37
55
|
- lib/mail_extract/parser.rb
|
38
56
|
- lib/mail_extract/version.rb
|
39
57
|
- mail_extract.gemspec
|
58
|
+
- spec/fixtures/iphone.txt
|
59
|
+
- spec/fixtures/iphone_with_quotes.txt
|
40
60
|
- spec/fixtures/reply_with_quotes.txt
|
41
61
|
- spec/fixtures/result_simple.txt
|
42
62
|
- spec/fixtures/result_simple_with_quotes.txt
|
@@ -45,28 +65,42 @@ files:
|
|
45
65
|
- spec/line_spec.rb
|
46
66
|
- spec/parser_spec.rb
|
47
67
|
- spec/spec_helper.rb
|
68
|
+
has_rdoc: true
|
48
69
|
homepage: https://github.com/sosedoff/mail_extract
|
49
70
|
licenses: []
|
71
|
+
|
50
72
|
post_install_message:
|
51
73
|
rdoc_options: []
|
52
|
-
|
74
|
+
|
75
|
+
require_paths:
|
53
76
|
- lib
|
54
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
77
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
55
78
|
none: false
|
56
|
-
requirements:
|
57
|
-
- -
|
58
|
-
- !ruby/object:Gem::Version
|
59
|
-
version:
|
60
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: "0"
|
83
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
61
84
|
none: false
|
62
|
-
requirements:
|
63
|
-
- -
|
64
|
-
- !ruby/object:Gem::Version
|
65
|
-
version:
|
85
|
+
requirements:
|
86
|
+
- - ">="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: "0"
|
66
89
|
requirements: []
|
90
|
+
|
67
91
|
rubyforge_project:
|
68
|
-
rubygems_version: 1.
|
92
|
+
rubygems_version: 1.6.2
|
69
93
|
signing_key:
|
70
94
|
specification_version: 3
|
71
95
|
summary: Extracts email message body
|
72
|
-
test_files:
|
96
|
+
test_files:
|
97
|
+
- spec/fixtures/iphone.txt
|
98
|
+
- spec/fixtures/iphone_with_quotes.txt
|
99
|
+
- spec/fixtures/reply_with_quotes.txt
|
100
|
+
- spec/fixtures/result_simple.txt
|
101
|
+
- spec/fixtures/result_simple_with_quotes.txt
|
102
|
+
- spec/fixtures/simple.txt
|
103
|
+
- spec/fixtures/simple_with_quotes.txt
|
104
|
+
- spec/line_spec.rb
|
105
|
+
- spec/parser_spec.rb
|
106
|
+
- spec/spec_helper.rb
|