mail_extract 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
data/.travis.yml ADDED
@@ -0,0 +1,4 @@
1
+ rvm:
2
+ - 1.8.7
3
+ - 1.9.2
4
+ - ree
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'http://rubygems.org'
2
+
3
+ gemspec
data/lib/mail_extract.rb CHANGED
@@ -7,8 +7,8 @@ module MailExtract
7
7
  #
8
8
  # @return [MailExtract::Parser]
9
9
  #
10
- def new(body)
11
- MailExtract::Parser.new(body)
10
+ def new(body, options={})
11
+ MailExtract::Parser.new(body, options)
12
12
  end
13
13
  end
14
14
  end
@@ -1,17 +1,20 @@
1
1
  module MailExtract
2
2
  class Line
3
- attr_reader :body, :type
3
+ attr_reader :body, :type, :subtype
4
4
 
5
5
  PATTERNS = {
6
- /^[>]+\s?/ => :quote,
7
- /^--/ => :signature,
8
- /^-- / => :signature,
9
- /^[_]{2,}\n?/ => :signature,
10
- /^[-]{2,}\n?/ => :signature
6
+ /^[>]+\s?/ => :quote,
7
+ /^--/ => :signature,
8
+ /^-- / => :signature,
9
+ /^[_]{2,}\n?/ => :signature,
10
+ /^[-]{2,}\n?/ => :signature,
11
+ /^sent from my (iphone|ipad)/i => :signature
11
12
  }
12
13
 
13
14
  def initialize(str)
14
- @body = str
15
+ @body = str
16
+ @subtype = :none
17
+
15
18
  detect_type(str)
16
19
  end
17
20
 
@@ -38,11 +41,13 @@ module MailExtract
38
41
  def detect_type(line)
39
42
  # Detects the start line of quote text
40
43
  if line.strip =~ /^On\s/i && line =~ /at [\d:]+/ || line.strip =~ />? wrote:\z/
41
- @type = :quote
44
+ @type = :quote
45
+ @subtype = :start
42
46
  return
43
47
  end
44
48
 
45
49
  @type = :text
50
+
46
51
  PATTERNS.each_pair do |p,t|
47
52
  if line =~ p
48
53
  @type = t
@@ -6,14 +6,20 @@ module MailExtract
6
6
 
7
7
  # Initialize a new MailExtract::Parser object
8
8
  #
9
- # text - Email message body
9
+ # text - Email message body
10
+ # options - Parsing options
10
11
  #
11
- def initialize(text)
12
+ # Parsing options include:
13
+ # :only_head - Skip the rest of the message after quote start (default: false)
14
+ #
15
+ def initialize(text, options={})
12
16
  @lines = []
13
17
  @text = text.strip
14
18
  @body = ""
15
19
  @last_type = :text
16
20
  @type = :text
21
+ @options = options
22
+
17
23
  parse
18
24
  end
19
25
 
@@ -22,13 +28,25 @@ module MailExtract
22
28
  # Process email message body
23
29
  #
24
30
  def parse
31
+ break_after_quote = @options[:only_head] || false
25
32
  scanner = StringScanner.new(@text)
33
+
34
+ # Process until message end
26
35
  while str = scanner.scan_until(/\n/)
27
- parse_line(str)
36
+ line = parse_line(str)
37
+
38
+ if break_after_quote
39
+ break if line.quote? && line.subtype == :start
40
+ end
28
41
  end
29
- if (last_line = scanner.rest.to_s).size > 0
30
- parse_line(last_line)
42
+
43
+ # Process the rest (if any)
44
+ if !break_after_quote && @last_type != :quote
45
+ if (last_line = scanner.rest.to_s).size > 0
46
+ parse_line(last_line)
47
+ end
31
48
  end
49
+
32
50
  @body = @lines.join("\n").strip
33
51
  end
34
52
 
@@ -36,6 +54,7 @@ module MailExtract
36
54
  #
37
55
  def parse_line(str)
38
56
  line = MailExtract::Line.new(str)
57
+
39
58
  if line.quote?
40
59
  if @last_type == :text ; @type = :quote ; end
41
60
  elsif line.text?
@@ -47,6 +66,8 @@ module MailExtract
47
66
  end
48
67
  @last_type = line.type
49
68
  @lines << line.body.strip if @type == :text
69
+
70
+ line
50
71
  end
51
72
  end
52
73
  end
@@ -1,5 +1,5 @@
1
1
  module MailExtract
2
2
  unless defined? ::MailExtract::VERSION
3
- VERSION = "0.1.2".freeze
3
+ VERSION = "0.1.3".freeze
4
4
  end
5
5
  end
data/mail_extract.gemspec CHANGED
@@ -15,5 +15,6 @@ Gem::Specification.new do |gem|
15
15
  gem.executables = `git ls-files -- bin/*`.split("\n").map{|f| File.basename(f)}
16
16
  gem.require_paths = ['lib']
17
17
 
18
+ gem.add_development_dependency 'rake'
18
19
  gem.add_development_dependency 'rspec', '~> 2.6'
19
20
  end
@@ -0,0 +1,3 @@
1
+ This is a shit i sent from my iphone
2
+
3
+ Sent from my iPhone
@@ -0,0 +1,18 @@
1
+
2
+ Primary reply content
3
+
4
+ --
5
+ I take full responsibility for any typos and refuse to blame them on my
6
+ cellphone
7
+
8
+ On Aug 12, 2011, at 6:00 AM, Robot <noreply@foobar.com> wrote:
9
+
10
+ Hi Dude,
11
+
12
+ Some text goes here, with no quotes
13
+ Blah blah blah
14
+ Blah!
15
+ Blah!!!!
16
+
17
+ Thanks,
18
+ Whatever
data/spec/line_spec.rb CHANGED
@@ -7,12 +7,15 @@ describe 'MailExtract::Line' do
7
7
 
8
8
  it 'detects quote start by date' do
9
9
  line('On Tue, 2011-03-01 at 18:02 +0530, somebody wrote:').type.should == :quote
10
+ line('On Tue, 2011-03-01 at 18:02 +0530, somebody wrote:').subtype.should == :start
10
11
  line('On 2011-03-01 at 18:02 somebody wrote').type.should == :quote
12
+ line('On 2011-03-01 at 18:02 somebody wrote').subtype.should == :start
11
13
  line('On some day somebody wrote').type.should == :text
12
14
  end
13
15
 
14
16
  it 'detects quote' do
15
17
  line('> this is a quote').type.should == :quote
18
+ line('> this is a quote').subtype.should == :none
16
19
  line('> >> this is a quote').type.should == :quote
17
20
  end
18
21
 
@@ -20,7 +23,9 @@ describe 'MailExtract::Line' do
20
23
  lines = [
21
24
  "--\nUsername",
22
25
  "-- \nUsername",
23
- "_______\nSome text"
26
+ "_______\nSome text",
27
+ "Sent from my iPhone",
28
+ "Sent from my iPad"
24
29
  ]
25
30
 
26
31
  lines.each do |l|
data/spec/parser_spec.rb CHANGED
@@ -2,17 +2,27 @@ require 'spec_helper'
2
2
 
3
3
  describe 'MailExtract::Parser' do
4
4
  it 'parses an email' do
5
- body = MailExtract.new(fixture('simple.txt')).body
5
+ body = parse_fixture('simple.txt')
6
6
  body.should == result_fixture('simple.txt')
7
7
  end
8
8
 
9
9
  it 'parses an email with quotes' do
10
- body = MailExtract.new(fixture('simple_with_quotes.txt')).body
10
+ body = parse_fixture('simple_with_quotes.txt')
11
11
  body.should == result_fixture('simple_with_quotes.txt')
12
12
  end
13
13
 
14
14
  it 'parses a reply email with broken authored line' do
15
- body = MailExtract.new(fixture('reply_with_quotes.txt')).body
15
+ body = parse_fixture('reply_with_quotes.txt')
16
16
  body.should == 'This is a first line of the message'
17
17
  end
18
+
19
+ it 'parses a message send via iphone' do
20
+ body = parse_fixture('iphone.txt')
21
+ body.should == 'This is a shit i sent from my iphone'
22
+ end
23
+
24
+ it 'parses a reply sent via iphone' do
25
+ body = MailExtract.new(fixture('iphone_with_quotes.txt'), :only_head => true).body
26
+ body.should == 'Primary reply content'
27
+ end
18
28
  end
data/spec/spec_helper.rb CHANGED
@@ -15,3 +15,7 @@ end
15
15
  def result_fixture(file)
16
16
  fixture("result_#{file}")
17
17
  end
18
+
19
+ def parse_fixture(file)
20
+ MailExtract.new(fixture(file)).body
21
+ end
metadata CHANGED
@@ -1,35 +1,53 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: mail_extract
3
- version: !ruby/object:Gem::Version
4
- version: 0.1.2
3
+ version: !ruby/object:Gem::Version
5
4
  prerelease:
5
+ version: 0.1.3
6
6
  platform: ruby
7
- authors:
7
+ authors:
8
8
  - Dan Sosedoff
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-08-11 00:00:00.000000000Z
13
- dependencies:
14
- - !ruby/object:Gem::Dependency
12
+
13
+ date: 2011-08-12 00:00:00 -05:00
14
+ default_executable:
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: rake
18
+ prerelease: false
19
+ requirement: &id001 !ruby/object:Gem::Requirement
20
+ none: false
21
+ requirements:
22
+ - - ">="
23
+ - !ruby/object:Gem::Version
24
+ version: "0"
25
+ type: :development
26
+ version_requirements: *id001
27
+ - !ruby/object:Gem::Dependency
15
28
  name: rspec
16
- requirement: &72663040 !ruby/object:Gem::Requirement
29
+ prerelease: false
30
+ requirement: &id002 !ruby/object:Gem::Requirement
17
31
  none: false
18
- requirements:
32
+ requirements:
19
33
  - - ~>
20
- - !ruby/object:Gem::Version
21
- version: '2.6'
34
+ - !ruby/object:Gem::Version
35
+ version: "2.6"
22
36
  type: :development
23
- prerelease: false
24
- version_requirements: *72663040
37
+ version_requirements: *id002
25
38
  description: Email body parser that strips out all quotes and signatures.
26
39
  email: dan.sosedoff@gmail.com
27
40
  executables: []
41
+
28
42
  extensions: []
43
+
29
44
  extra_rdoc_files: []
30
- files:
45
+
46
+ files:
31
47
  - .gitignore
32
48
  - .rspec
49
+ - .travis.yml
50
+ - Gemfile
33
51
  - README.md
34
52
  - Rakefile
35
53
  - lib/mail_extract.rb
@@ -37,6 +55,8 @@ files:
37
55
  - lib/mail_extract/parser.rb
38
56
  - lib/mail_extract/version.rb
39
57
  - mail_extract.gemspec
58
+ - spec/fixtures/iphone.txt
59
+ - spec/fixtures/iphone_with_quotes.txt
40
60
  - spec/fixtures/reply_with_quotes.txt
41
61
  - spec/fixtures/result_simple.txt
42
62
  - spec/fixtures/result_simple_with_quotes.txt
@@ -45,28 +65,42 @@ files:
45
65
  - spec/line_spec.rb
46
66
  - spec/parser_spec.rb
47
67
  - spec/spec_helper.rb
68
+ has_rdoc: true
48
69
  homepage: https://github.com/sosedoff/mail_extract
49
70
  licenses: []
71
+
50
72
  post_install_message:
51
73
  rdoc_options: []
52
- require_paths:
74
+
75
+ require_paths:
53
76
  - lib
54
- required_ruby_version: !ruby/object:Gem::Requirement
77
+ required_ruby_version: !ruby/object:Gem::Requirement
55
78
  none: false
56
- requirements:
57
- - - ! '>='
58
- - !ruby/object:Gem::Version
59
- version: '0'
60
- required_rubygems_version: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: "0"
83
+ required_rubygems_version: !ruby/object:Gem::Requirement
61
84
  none: false
62
- requirements:
63
- - - ! '>='
64
- - !ruby/object:Gem::Version
65
- version: '0'
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: "0"
66
89
  requirements: []
90
+
67
91
  rubyforge_project:
68
- rubygems_version: 1.8.5
92
+ rubygems_version: 1.6.2
69
93
  signing_key:
70
94
  specification_version: 3
71
95
  summary: Extracts email message body
72
- test_files: []
96
+ test_files:
97
+ - spec/fixtures/iphone.txt
98
+ - spec/fixtures/iphone_with_quotes.txt
99
+ - spec/fixtures/reply_with_quotes.txt
100
+ - spec/fixtures/result_simple.txt
101
+ - spec/fixtures/result_simple_with_quotes.txt
102
+ - spec/fixtures/simple.txt
103
+ - spec/fixtures/simple_with_quotes.txt
104
+ - spec/line_spec.rb
105
+ - spec/parser_spec.rb
106
+ - spec/spec_helper.rb