mail_parser 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,17 @@
1
+ require "gmail"
2
+
3
+ path = File.expand_path(File.join(File.dirname(__FILE__), "../spec/samples"))
4
+
5
+ username = ARGV[0]
6
+ password = ARGV[1]
7
+
8
+ Gmail.connect username, password do |gmail|
9
+
10
+ mails = gmail.inbox.find(from: "auto-confirm@amazon.com")
11
+
12
+ mails.each_with_index do |mail,index|
13
+ File.open("../spec/samples/amazon_order#{index}.html","w") do |f|
14
+ f.puts(mails.body.decoded)
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,3 @@
1
+ require "patterns/pattern"
2
+ require "patterns/amazon_pattern"
3
+ require "mail_parser/mail_parser"
@@ -0,0 +1,72 @@
1
+ class MailParser
2
+ attr_accessor :body, :data, :index, :pattern, :stop
3
+
4
+ def initialize(pattern=nil)
5
+ @pattern = pattern
6
+ end
7
+
8
+ def truncate_html(html)
9
+ html.gsub(/<.*>/,"")
10
+ end
11
+
12
+ def parse(body, *options, &block)
13
+ if options.include? :html
14
+ body = truncate_html(body)
15
+ end
16
+ @body = body.split("\n")
17
+ @index = 0
18
+ @data = {}
19
+
20
+ if block_given?
21
+ yield self
22
+ end
23
+
24
+ #call the injected pattern
25
+ if not @pattern.nil? and not @pattern.block.nil?
26
+ @pattern.block.call self
27
+ end
28
+ end
29
+
30
+ def match(field, *patterns)
31
+ pattern = patterns[0]
32
+ if patterns.length == 2
33
+ invoked = false
34
+ target_pattern = patterns[1]
35
+ else
36
+ invoked = true
37
+ target_pattern = pattern
38
+ end
39
+
40
+ for line in @body[@index..@body.length]
41
+ if @stop and line =~ @stop
42
+ throw :halt
43
+ end
44
+ if !invoked and line =~ pattern
45
+ invoked = true
46
+ end
47
+ if invoked and line =~ target_pattern
48
+ @data[field] = target_pattern.match(line)[1]
49
+ @index += 1
50
+ return
51
+ end
52
+ @index += 1
53
+ end
54
+ end
55
+
56
+ def parse_until(field,pattern)
57
+ data = @data.clone
58
+ collections = []
59
+ begin
60
+ @data = {}
61
+ @stop = pattern
62
+ catch(:halt) do
63
+ yield self if block_given?
64
+ end
65
+ @stop = nil
66
+ collections << @data if @data != {}
67
+ end until @body[@index] =~ pattern or @index >= @body.length
68
+ @data = data.merge({field => collections })
69
+ end
70
+
71
+ alias :match_until :parse_until
72
+ end
@@ -0,0 +1,14 @@
1
+ class AmazonPattern
2
+ attr_accessor :block
3
+ def initialize
4
+ @block = lambda do |m|
5
+ m.match :shipping , /Shipping & Handling/, /\$(\d+\.\d+)/
6
+ m.match :total_price, /Total for this Order/, /\$(\d+\.\d+)/
7
+
8
+ m.match_until :items, /\*{10}/ do |m|
9
+ m.match :name, /\d+/, /"(.*)"/
10
+ m.match :price, /\$(\d+\.\d+)/
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,9 @@
1
+ class Pattern
2
+ attr_accessor :block
3
+
4
+ def initialize
5
+ @block = lambda do |m|
6
+ m.match :price , /(\d+)/
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,15 @@
1
+ require "spec_helper.rb"
2
+
3
+ describe AmazonPattern do
4
+ before do
5
+ path = File.join(File.dirname(__FILE__),"samples/amazon_order.html")
6
+ @mail = File.open(path).read
7
+ @parser = MailParser.new(AmazonPattern.new)
8
+ end
9
+
10
+ it "can inject to parser" do
11
+ @parser.parse @mail
12
+ @parser.data.should ==
13
+ {:shipping => "3.99", :total_price=>"9.54", :items=>[{:name=>"The Curious Cook: More Kitchen Science and Lore", :price=>"5.55"}]}
14
+ end
15
+ end
@@ -0,0 +1,84 @@
1
+ require "spec_helper.rb"
2
+
3
+ describe MailParser do
4
+ before do
5
+ @parser = MailParser.new
6
+ end
7
+ it "should able to parse" do
8
+
9
+ @parser.parse "Test :: 1" do |m|
10
+ m.match :value, /Test/, /(\d)/
11
+ end
12
+
13
+ @parser.data[:value].should == "1"
14
+ end
15
+
16
+ it "can trim html tags" do
17
+
18
+ @parser.parse "<html> <head> Test :: $12</head> </html>", :html do |m|
19
+ m.match :tag, /\<.*\>/, /\<\/(.*)\>/
20
+ end
21
+
22
+ @parser.data[:tag].should be_nil
23
+ end
24
+
25
+ it "can match multiple items" do
26
+
27
+ @parser.parse "test :1 \n test :2 \n END", :html do |m|
28
+ m.parse_until :items, /END/ do |m|
29
+ m.match :price, /test/, /(\d+)/
30
+ end
31
+ end
32
+
33
+ @parser.data.should == {:items => [{price: "1"},{price: "2"}]}
34
+ end
35
+
36
+ it "can stop at curtain line" do
37
+ @parser.parse "test :1 \n END \n test :2" do |m|
38
+ m.parse_until :items, /END/ do |m|
39
+ m.match :price, /test/, /(\d+)/
40
+ end
41
+ end
42
+
43
+ @parser.data.should == {:items => [{price: "1"}]}
44
+
45
+ end
46
+
47
+ it "can match single line" do
48
+
49
+ @parser.parse "Test :1" do |m|
50
+ m.match :number , /:(\d)/
51
+ end
52
+
53
+ @parser.data.should == {:number => "1"}
54
+ end
55
+
56
+ describe "Amazon order" do
57
+
58
+ before do
59
+ path = File.join(File.dirname(__FILE__),"samples/amazon_order.html")
60
+ @mail = File.open(path).read
61
+ end
62
+
63
+ it "can parse Amazon order mail" do
64
+
65
+ @parser.parse @mail do |m|
66
+ m.match :email, /E-mail/,/(\w+@\w+\.\w+)/
67
+ end
68
+
69
+ @parser.data.should == {email: "test@example.com"}
70
+ end
71
+
72
+ it "can extracts items" do
73
+ @parser.parse @mail do |m|
74
+ m.parse_until :items , /Need to give a gift?/ do |m|
75
+ m.match :name, /\d\s"(.*)"/
76
+ m.match :price, /\$(\d+\.*\d*)/
77
+ end
78
+ end
79
+
80
+ @parser.data.should == {:items => [
81
+ {:name => "The Curious Cook: More Kitchen Science and Lore", :price => "5.55"}]}
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,11 @@
1
+ require "spec_helper.rb"
2
+
3
+ describe Pattern do
4
+ it "can inject to parser" do
5
+ parser = MailParser.new(Pattern.new)
6
+
7
+ parser.parse "TEST :: 12"
8
+
9
+ parser.data.should == {:price => "12"}
10
+ end
11
+ end
@@ -0,0 +1,14 @@
1
+ # This file was generated by the `rspec --init` command. Conventionally, all
2
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
3
+ # Require this file using `require "spec_helper.rb"` to ensure that it is only
4
+ # loaded once.
5
+ #
6
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
7
+
8
+ require "./lib/mail_parser"
9
+
10
+ RSpec.configure do |config|
11
+ config.treat_symbols_as_metadata_keys_with_true_values = true
12
+ config.run_all_when_everything_filtered = true
13
+ config.filter_run :focus
14
+ end
metadata ADDED
@@ -0,0 +1,73 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mail_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Jimmy Chao
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-03-20 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: mail_parser
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ description: Parser for parsing imformation by regex in mail
31
+ email: ''
32
+ executables: []
33
+ extensions: []
34
+ extra_rdoc_files: []
35
+ files:
36
+ - lib/mail_loader.rb
37
+ - lib/mail_parser.rb
38
+ - lib/mail_parser/mail_parser.rb
39
+ - lib/patterns/amazon_pattern.rb
40
+ - lib/patterns/pattern.rb
41
+ - spec/amazon_pattern_spec.rb
42
+ - spec/mail_parser_spec.rb
43
+ - spec/pattern_spec.rb
44
+ - spec/spec_helper.rb
45
+ homepage: http://github.com/Rafe/mail_parser
46
+ licenses: []
47
+ post_install_message:
48
+ rdoc_options: []
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ! '>='
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ required_rubygems_version: !ruby/object:Gem::Requirement
58
+ none: false
59
+ requirements:
60
+ - - ! '>='
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ requirements: []
64
+ rubyforge_project:
65
+ rubygems_version: 1.8.19
66
+ signing_key:
67
+ specification_version: 3
68
+ summary: Parser for parsing imformation by regex in mail
69
+ test_files:
70
+ - spec/amazon_pattern_spec.rb
71
+ - spec/mail_parser_spec.rb
72
+ - spec/pattern_spec.rb
73
+ - spec/spec_helper.rb