mail_parser 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ require "gmail"
2
+
3
+ path = File.expand_path(File.join(File.dirname(__FILE__), "../spec/samples"))
4
+
5
+ username = ARGV[0]
6
+ password = ARGV[1]
7
+
8
+ Gmail.connect username, password do |gmail|
9
+
10
+ mails = gmail.inbox.find(from: "auto-confirm@amazon.com")
11
+
12
+ mails.each_with_index do |mail,index|
13
+ File.open("../spec/samples/amazon_order#{index}.html","w") do |f|
14
+ f.puts(mails.body.decoded)
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,3 @@
1
+ require "patterns/pattern"
2
+ require "patterns/amazon_pattern"
3
+ require "mail_parser/mail_parser"
@@ -0,0 +1,72 @@
1
+ class MailParser
2
+ attr_accessor :body, :data, :index, :pattern, :stop
3
+
4
+ def initialize(pattern=nil)
5
+ @pattern = pattern
6
+ end
7
+
8
+ def truncate_html(html)
9
+ html.gsub(/<.*>/,"")
10
+ end
11
+
12
+ def parse(body, *options, &block)
13
+ if options.include? :html
14
+ body = truncate_html(body)
15
+ end
16
+ @body = body.split("\n")
17
+ @index = 0
18
+ @data = {}
19
+
20
+ if block_given?
21
+ yield self
22
+ end
23
+
24
+ #call the injected pattern
25
+ if not @pattern.nil? and not @pattern.block.nil?
26
+ @pattern.block.call self
27
+ end
28
+ end
29
+
30
+ def match(field, *patterns)
31
+ pattern = patterns[0]
32
+ if patterns.length == 2
33
+ invoked = false
34
+ target_pattern = patterns[1]
35
+ else
36
+ invoked = true
37
+ target_pattern = pattern
38
+ end
39
+
40
+ for line in @body[@index..@body.length]
41
+ if @stop and line =~ @stop
42
+ throw :halt
43
+ end
44
+ if !invoked and line =~ pattern
45
+ invoked = true
46
+ end
47
+ if invoked and line =~ target_pattern
48
+ @data[field] = target_pattern.match(line)[1]
49
+ @index += 1
50
+ return
51
+ end
52
+ @index += 1
53
+ end
54
+ end
55
+
56
+ def parse_until(field,pattern)
57
+ data = @data.clone
58
+ collections = []
59
+ begin
60
+ @data = {}
61
+ @stop = pattern
62
+ catch(:halt) do
63
+ yield self if block_given?
64
+ end
65
+ @stop = nil
66
+ collections << @data if @data != {}
67
+ end until @body[@index] =~ pattern or @index >= @body.length
68
+ @data = data.merge({field => collections })
69
+ end
70
+
71
+ alias :match_until :parse_until
72
+ end
@@ -0,0 +1,14 @@
1
+ class AmazonPattern
2
+ attr_accessor :block
3
+ def initialize
4
+ @block = lambda do |m|
5
+ m.match :shipping , /Shipping & Handling/, /\$(\d+\.\d+)/
6
+ m.match :total_price, /Total for this Order/, /\$(\d+\.\d+)/
7
+
8
+ m.match_until :items, /\*{10}/ do |m|
9
+ m.match :name, /\d+/, /"(.*)"/
10
+ m.match :price, /\$(\d+\.\d+)/
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,9 @@
1
+ class Pattern
2
+ attr_accessor :block
3
+
4
+ def initialize
5
+ @block = lambda do |m|
6
+ m.match :price , /(\d+)/
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,15 @@
1
+ require "spec_helper.rb"
2
+
3
+ describe AmazonPattern do
4
+ before do
5
+ path = File.join(File.dirname(__FILE__),"samples/amazon_order.html")
6
+ @mail = File.open(path).read
7
+ @parser = MailParser.new(AmazonPattern.new)
8
+ end
9
+
10
+ it "can inject to parser" do
11
+ @parser.parse @mail
12
+ @parser.data.should ==
13
+ {:shipping => "3.99", :total_price=>"9.54", :items=>[{:name=>"The Curious Cook: More Kitchen Science and Lore", :price=>"5.55"}]}
14
+ end
15
+ end
@@ -0,0 +1,84 @@
1
+ require "spec_helper.rb"
2
+
3
+ describe MailParser do
4
+ before do
5
+ @parser = MailParser.new
6
+ end
7
+ it "should able to parse" do
8
+
9
+ @parser.parse "Test :: 1" do |m|
10
+ m.match :value, /Test/, /(\d)/
11
+ end
12
+
13
+ @parser.data[:value].should == "1"
14
+ end
15
+
16
+ it "can trim html tags" do
17
+
18
+ @parser.parse "<html> <head> Test :: $12</head> </html>", :html do |m|
19
+ m.match :tag, /\<.*\>/, /\<\/(.*)\>/
20
+ end
21
+
22
+ @parser.data[:tag].should be_nil
23
+ end
24
+
25
+ it "can match multiple items" do
26
+
27
+ @parser.parse "test :1 \n test :2 \n END", :html do |m|
28
+ m.parse_until :items, /END/ do |m|
29
+ m.match :price, /test/, /(\d+)/
30
+ end
31
+ end
32
+
33
+ @parser.data.should == {:items => [{price: "1"},{price: "2"}]}
34
+ end
35
+
36
+ it "can stop at curtain line" do
37
+ @parser.parse "test :1 \n END \n test :2" do |m|
38
+ m.parse_until :items, /END/ do |m|
39
+ m.match :price, /test/, /(\d+)/
40
+ end
41
+ end
42
+
43
+ @parser.data.should == {:items => [{price: "1"}]}
44
+
45
+ end
46
+
47
+ it "can match single line" do
48
+
49
+ @parser.parse "Test :1" do |m|
50
+ m.match :number , /:(\d)/
51
+ end
52
+
53
+ @parser.data.should == {:number => "1"}
54
+ end
55
+
56
+ describe "Amazon order" do
57
+
58
+ before do
59
+ path = File.join(File.dirname(__FILE__),"samples/amazon_order.html")
60
+ @mail = File.open(path).read
61
+ end
62
+
63
+ it "can parse Amazon order mail" do
64
+
65
+ @parser.parse @mail do |m|
66
+ m.match :email, /E-mail/,/(\w+@\w+\.\w+)/
67
+ end
68
+
69
+ @parser.data.should == {email: "test@example.com"}
70
+ end
71
+
72
+ it "can extracts items" do
73
+ @parser.parse @mail do |m|
74
+ m.parse_until :items , /Need to give a gift?/ do |m|
75
+ m.match :name, /\d\s"(.*)"/
76
+ m.match :price, /\$(\d+\.*\d*)/
77
+ end
78
+ end
79
+
80
+ @parser.data.should == {:items => [
81
+ {:name => "The Curious Cook: More Kitchen Science and Lore", :price => "5.55"}]}
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,11 @@
1
+ require "spec_helper.rb"
2
+
3
+ describe Pattern do
4
+ it "can inject to parser" do
5
+ parser = MailParser.new(Pattern.new)
6
+
7
+ parser.parse "TEST :: 12"
8
+
9
+ parser.data.should == {:price => "12"}
10
+ end
11
+ end
@@ -0,0 +1,14 @@
1
+ # This file was generated by the `rspec --init` command. Conventionally, all
2
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
3
+ # Require this file using `require "spec_helper.rb"` to ensure that it is only
4
+ # loaded once.
5
+ #
6
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
7
+
8
+ require "./lib/mail_parser"
9
+
10
+ RSpec.configure do |config|
11
+ config.treat_symbols_as_metadata_keys_with_true_values = true
12
+ config.run_all_when_everything_filtered = true
13
+ config.filter_run :focus
14
+ end
metadata ADDED
@@ -0,0 +1,73 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mail_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Jimmy Chao
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-03-20 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: mail_parser
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ description: Parser for parsing imformation by regex in mail
31
+ email: ''
32
+ executables: []
33
+ extensions: []
34
+ extra_rdoc_files: []
35
+ files:
36
+ - lib/mail_loader.rb
37
+ - lib/mail_parser.rb
38
+ - lib/mail_parser/mail_parser.rb
39
+ - lib/patterns/amazon_pattern.rb
40
+ - lib/patterns/pattern.rb
41
+ - spec/amazon_pattern_spec.rb
42
+ - spec/mail_parser_spec.rb
43
+ - spec/pattern_spec.rb
44
+ - spec/spec_helper.rb
45
+ homepage: http://github.com/Rafe/mail_parser
46
+ licenses: []
47
+ post_install_message:
48
+ rdoc_options: []
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ! '>='
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ required_rubygems_version: !ruby/object:Gem::Requirement
58
+ none: false
59
+ requirements:
60
+ - - ! '>='
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ requirements: []
64
+ rubyforge_project:
65
+ rubygems_version: 1.8.19
66
+ signing_key:
67
+ specification_version: 3
68
+ summary: Parser for parsing imformation by regex in mail
69
+ test_files:
70
+ - spec/amazon_pattern_spec.rb
71
+ - spec/mail_parser_spec.rb
72
+ - spec/pattern_spec.rb
73
+ - spec/spec_helper.rb