mail_extract 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.travis.yml ADDED
@@ -0,0 +1,4 @@
1
+ rvm:
2
+ - 1.8.7
3
+ - 1.9.2
4
+ - ree
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'http://rubygems.org'
2
+
3
+ gemspec
data/lib/mail_extract.rb CHANGED
@@ -7,8 +7,8 @@ module MailExtract
7
7
  #
8
8
  # @return [MailExtract::Parser]
9
9
  #
10
- def new(body)
11
- MailExtract::Parser.new(body)
10
+ def new(body, options={})
11
+ MailExtract::Parser.new(body, options)
12
12
  end
13
13
  end
14
14
  end
@@ -1,17 +1,20 @@
1
1
  module MailExtract
2
2
  class Line
3
- attr_reader :body, :type
3
+ attr_reader :body, :type, :subtype
4
4
 
5
5
  PATTERNS = {
6
- /^[>]+\s?/ => :quote,
7
- /^--/ => :signature,
8
- /^-- / => :signature,
9
- /^[_]{2,}\n?/ => :signature,
10
- /^[-]{2,}\n?/ => :signature
6
+ /^[>]+\s?/ => :quote,
7
+ /^--/ => :signature,
8
+ /^-- / => :signature,
9
+ /^[_]{2,}\n?/ => :signature,
10
+ /^[-]{2,}\n?/ => :signature,
11
+ /^sent from my (iphone|ipad)/i => :signature
11
12
  }
12
13
 
13
14
  def initialize(str)
14
- @body = str
15
+ @body = str
16
+ @subtype = :none
17
+
15
18
  detect_type(str)
16
19
  end
17
20
 
@@ -38,11 +41,13 @@ module MailExtract
38
41
  def detect_type(line)
39
42
  # Detects the start line of quote text
40
43
  if line.strip =~ /^On\s/i && line =~ /at [\d:]+/ || line.strip =~ />? wrote:\z/
41
- @type = :quote
44
+ @type = :quote
45
+ @subtype = :start
42
46
  return
43
47
  end
44
48
 
45
49
  @type = :text
50
+
46
51
  PATTERNS.each_pair do |p,t|
47
52
  if line =~ p
48
53
  @type = t
@@ -6,14 +6,20 @@ module MailExtract
6
6
 
7
7
  # Initialize a new MailExtract::Parser object
8
8
  #
9
- # text - Email message body
9
+ # text - Email message body
10
+ # options - Parsing options
10
11
  #
11
- def initialize(text)
12
+ # Parsing options include:
13
+ # :only_head - Skip the rest of the message after quote start (default: false)
14
+ #
15
+ def initialize(text, options={})
12
16
  @lines = []
13
17
  @text = text.strip
14
18
  @body = ""
15
19
  @last_type = :text
16
20
  @type = :text
21
+ @options = options
22
+
17
23
  parse
18
24
  end
19
25
 
@@ -22,13 +28,25 @@ module MailExtract
22
28
  # Process email message body
23
29
  #
24
30
  def parse
31
+ break_after_quote = @options[:only_head] || false
25
32
  scanner = StringScanner.new(@text)
33
+
34
+ # Process until message end
26
35
  while str = scanner.scan_until(/\n/)
27
- parse_line(str)
36
+ line = parse_line(str)
37
+
38
+ if break_after_quote
39
+ break if line.quote? && line.subtype == :start
40
+ end
28
41
  end
29
- if (last_line = scanner.rest.to_s).size > 0
30
- parse_line(last_line)
42
+
43
+ # Process the rest (if any)
44
+ if !break_after_quote && @last_type != :quote
45
+ if (last_line = scanner.rest.to_s).size > 0
46
+ parse_line(last_line)
47
+ end
31
48
  end
49
+
32
50
  @body = @lines.join("\n").strip
33
51
  end
34
52
 
@@ -36,6 +54,7 @@ module MailExtract
36
54
  #
37
55
  def parse_line(str)
38
56
  line = MailExtract::Line.new(str)
57
+
39
58
  if line.quote?
40
59
  if @last_type == :text ; @type = :quote ; end
41
60
  elsif line.text?
@@ -47,6 +66,8 @@ module MailExtract
47
66
  end
48
67
  @last_type = line.type
49
68
  @lines << line.body.strip if @type == :text
69
+
70
+ line
50
71
  end
51
72
  end
52
73
  end
@@ -1,5 +1,5 @@
1
1
  module MailExtract
2
2
  unless defined? ::MailExtract::VERSION
3
- VERSION = "0.1.2".freeze
3
+ VERSION = "0.1.3".freeze
4
4
  end
5
5
  end
data/mail_extract.gemspec CHANGED
@@ -15,5 +15,6 @@ Gem::Specification.new do |gem|
15
15
  gem.executables = `git ls-files -- bin/*`.split("\n").map{|f| File.basename(f)}
16
16
  gem.require_paths = ['lib']
17
17
 
18
+ gem.add_development_dependency 'rake'
18
19
  gem.add_development_dependency 'rspec', '~> 2.6'
19
20
  end
@@ -0,0 +1,3 @@
1
+ This is a shit i sent from my iphone
2
+
3
+ Sent from my iPhone
@@ -0,0 +1,18 @@
1
+
2
+ Primary reply content
3
+
4
+ --
5
+ I take full responsibility for any typos and refuse to blame them on my
6
+ cellphone
7
+
8
+ On Aug 12, 2011, at 6:00 AM, Robot <noreply@foobar.com> wrote:
9
+
10
+ Hi Dude,
11
+
12
+ Some text goes here, with no quotes
13
+ Blah blah blah
14
+ Blah!
15
+ Blah!!!!
16
+
17
+ Thanks,
18
+ Whatever
data/spec/line_spec.rb CHANGED
@@ -7,12 +7,15 @@ describe 'MailExtract::Line' do
7
7
 
8
8
  it 'detects quote start by date' do
9
9
  line('On Tue, 2011-03-01 at 18:02 +0530, somebody wrote:').type.should == :quote
10
+ line('On Tue, 2011-03-01 at 18:02 +0530, somebody wrote:').subtype.should == :start
10
11
  line('On 2011-03-01 at 18:02 somebody wrote').type.should == :quote
12
+ line('On 2011-03-01 at 18:02 somebody wrote').subtype.should == :start
11
13
  line('On some day somebody wrote').type.should == :text
12
14
  end
13
15
 
14
16
  it 'detects quote' do
15
17
  line('> this is a quote').type.should == :quote
18
+ line('> this is a quote').subtype.should == :none
16
19
  line('> >> this is a quote').type.should == :quote
17
20
  end
18
21
 
@@ -20,7 +23,9 @@ describe 'MailExtract::Line' do
20
23
  lines = [
21
24
  "--\nUsername",
22
25
  "-- \nUsername",
23
- "_______\nSome text"
26
+ "_______\nSome text",
27
+ "Sent from my iPhone",
28
+ "Sent from my iPad"
24
29
  ]
25
30
 
26
31
  lines.each do |l|
data/spec/parser_spec.rb CHANGED
@@ -2,17 +2,27 @@ require 'spec_helper'
2
2
 
3
3
  describe 'MailExtract::Parser' do
4
4
  it 'parses an email' do
5
- body = MailExtract.new(fixture('simple.txt')).body
5
+ body = parse_fixture('simple.txt')
6
6
  body.should == result_fixture('simple.txt')
7
7
  end
8
8
 
9
9
  it 'parses an email with quotes' do
10
- body = MailExtract.new(fixture('simple_with_quotes.txt')).body
10
+ body = parse_fixture('simple_with_quotes.txt')
11
11
  body.should == result_fixture('simple_with_quotes.txt')
12
12
  end
13
13
 
14
14
  it 'parses a reply email with broken authored line' do
15
- body = MailExtract.new(fixture('reply_with_quotes.txt')).body
15
+ body = parse_fixture('reply_with_quotes.txt')
16
16
  body.should == 'This is a first line of the message'
17
17
  end
18
+
19
+ it 'parses a message send via iphone' do
20
+ body = parse_fixture('iphone.txt')
21
+ body.should == 'This is a shit i sent from my iphone'
22
+ end
23
+
24
+ it 'parses a reply sent via iphone' do
25
+ body = MailExtract.new(fixture('iphone_with_quotes.txt'), :only_head => true).body
26
+ body.should == 'Primary reply content'
27
+ end
18
28
  end
data/spec/spec_helper.rb CHANGED
@@ -15,3 +15,7 @@ end
15
15
  def result_fixture(file)
16
16
  fixture("result_#{file}")
17
17
  end
18
+
19
+ def parse_fixture(file)
20
+ MailExtract.new(fixture(file)).body
21
+ end
metadata CHANGED
@@ -1,35 +1,53 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: mail_extract
3
- version: !ruby/object:Gem::Version
4
- version: 0.1.2
3
+ version: !ruby/object:Gem::Version
5
4
  prerelease:
5
+ version: 0.1.3
6
6
  platform: ruby
7
- authors:
7
+ authors:
8
8
  - Dan Sosedoff
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-08-11 00:00:00.000000000Z
13
- dependencies:
14
- - !ruby/object:Gem::Dependency
12
+
13
+ date: 2011-08-12 00:00:00 -05:00
14
+ default_executable:
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: rake
18
+ prerelease: false
19
+ requirement: &id001 !ruby/object:Gem::Requirement
20
+ none: false
21
+ requirements:
22
+ - - ">="
23
+ - !ruby/object:Gem::Version
24
+ version: "0"
25
+ type: :development
26
+ version_requirements: *id001
27
+ - !ruby/object:Gem::Dependency
15
28
  name: rspec
16
- requirement: &72663040 !ruby/object:Gem::Requirement
29
+ prerelease: false
30
+ requirement: &id002 !ruby/object:Gem::Requirement
17
31
  none: false
18
- requirements:
32
+ requirements:
19
33
  - - ~>
20
- - !ruby/object:Gem::Version
21
- version: '2.6'
34
+ - !ruby/object:Gem::Version
35
+ version: "2.6"
22
36
  type: :development
23
- prerelease: false
24
- version_requirements: *72663040
37
+ version_requirements: *id002
25
38
  description: Email body parser that strips out all quotes and signatures.
26
39
  email: dan.sosedoff@gmail.com
27
40
  executables: []
41
+
28
42
  extensions: []
43
+
29
44
  extra_rdoc_files: []
30
- files:
45
+
46
+ files:
31
47
  - .gitignore
32
48
  - .rspec
49
+ - .travis.yml
50
+ - Gemfile
33
51
  - README.md
34
52
  - Rakefile
35
53
  - lib/mail_extract.rb
@@ -37,6 +55,8 @@ files:
37
55
  - lib/mail_extract/parser.rb
38
56
  - lib/mail_extract/version.rb
39
57
  - mail_extract.gemspec
58
+ - spec/fixtures/iphone.txt
59
+ - spec/fixtures/iphone_with_quotes.txt
40
60
  - spec/fixtures/reply_with_quotes.txt
41
61
  - spec/fixtures/result_simple.txt
42
62
  - spec/fixtures/result_simple_with_quotes.txt
@@ -45,28 +65,42 @@ files:
45
65
  - spec/line_spec.rb
46
66
  - spec/parser_spec.rb
47
67
  - spec/spec_helper.rb
68
+ has_rdoc: true
48
69
  homepage: https://github.com/sosedoff/mail_extract
49
70
  licenses: []
71
+
50
72
  post_install_message:
51
73
  rdoc_options: []
52
- require_paths:
74
+
75
+ require_paths:
53
76
  - lib
54
- required_ruby_version: !ruby/object:Gem::Requirement
77
+ required_ruby_version: !ruby/object:Gem::Requirement
55
78
  none: false
56
- requirements:
57
- - - ! '>='
58
- - !ruby/object:Gem::Version
59
- version: '0'
60
- required_rubygems_version: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: "0"
83
+ required_rubygems_version: !ruby/object:Gem::Requirement
61
84
  none: false
62
- requirements:
63
- - - ! '>='
64
- - !ruby/object:Gem::Version
65
- version: '0'
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: "0"
66
89
  requirements: []
90
+
67
91
  rubyforge_project:
68
- rubygems_version: 1.8.5
92
+ rubygems_version: 1.6.2
69
93
  signing_key:
70
94
  specification_version: 3
71
95
  summary: Extracts email message body
72
- test_files: []
96
+ test_files:
97
+ - spec/fixtures/iphone.txt
98
+ - spec/fixtures/iphone_with_quotes.txt
99
+ - spec/fixtures/reply_with_quotes.txt
100
+ - spec/fixtures/result_simple.txt
101
+ - spec/fixtures/result_simple_with_quotes.txt
102
+ - spec/fixtures/simple.txt
103
+ - spec/fixtures/simple_with_quotes.txt
104
+ - spec/line_spec.rb
105
+ - spec/parser_spec.rb
106
+ - spec/spec_helper.rb