mail_parser 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/mail_loader.rb +17 -0
- data/lib/mail_parser.rb +3 -0
- data/lib/mail_parser/mail_parser.rb +72 -0
- data/lib/patterns/amazon_pattern.rb +14 -0
- data/lib/patterns/pattern.rb +9 -0
- data/spec/amazon_pattern_spec.rb +15 -0
- data/spec/mail_parser_spec.rb +84 -0
- data/spec/pattern_spec.rb +11 -0
- data/spec/spec_helper.rb +14 -0
- metadata +73 -0
data/lib/mail_loader.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
require "gmail"
|
2
|
+
|
3
|
+
path = File.expand_path(File.join(File.dirname(__FILE__), "../spec/samples"))
|
4
|
+
|
5
|
+
username = ARGV[0]
|
6
|
+
password = ARGV[1]
|
7
|
+
|
8
|
+
Gmail.connect username, password do |gmail|
|
9
|
+
|
10
|
+
mails = gmail.inbox.find(from: "auto-confirm@amazon.com")
|
11
|
+
|
12
|
+
mails.each_with_index do |mail,index|
|
13
|
+
File.open("../spec/samples/amazon_order#{index}.html","w") do |f|
|
14
|
+
f.puts(mails.body.decoded)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/lib/mail_parser.rb
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
class MailParser
|
2
|
+
attr_accessor :body, :data, :index, :pattern, :stop
|
3
|
+
|
4
|
+
def initialize(pattern=nil)
|
5
|
+
@pattern = pattern
|
6
|
+
end
|
7
|
+
|
8
|
+
def truncate_html(html)
|
9
|
+
html.gsub(/<.*>/,"")
|
10
|
+
end
|
11
|
+
|
12
|
+
def parse(body, *options, &block)
|
13
|
+
if options.include? :html
|
14
|
+
body = truncate_html(body)
|
15
|
+
end
|
16
|
+
@body = body.split("\n")
|
17
|
+
@index = 0
|
18
|
+
@data = {}
|
19
|
+
|
20
|
+
if block_given?
|
21
|
+
yield self
|
22
|
+
end
|
23
|
+
|
24
|
+
#call the injected pattern
|
25
|
+
if not @pattern.nil? and not @pattern.block.nil?
|
26
|
+
@pattern.block.call self
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def match(field, *patterns)
|
31
|
+
pattern = patterns[0]
|
32
|
+
if patterns.length == 2
|
33
|
+
invoked = false
|
34
|
+
target_pattern = patterns[1]
|
35
|
+
else
|
36
|
+
invoked = true
|
37
|
+
target_pattern = pattern
|
38
|
+
end
|
39
|
+
|
40
|
+
for line in @body[@index..@body.length]
|
41
|
+
if @stop and line =~ @stop
|
42
|
+
throw :halt
|
43
|
+
end
|
44
|
+
if !invoked and line =~ pattern
|
45
|
+
invoked = true
|
46
|
+
end
|
47
|
+
if invoked and line =~ target_pattern
|
48
|
+
@data[field] = target_pattern.match(line)[1]
|
49
|
+
@index += 1
|
50
|
+
return
|
51
|
+
end
|
52
|
+
@index += 1
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def parse_until(field,pattern)
|
57
|
+
data = @data.clone
|
58
|
+
collections = []
|
59
|
+
begin
|
60
|
+
@data = {}
|
61
|
+
@stop = pattern
|
62
|
+
catch(:halt) do
|
63
|
+
yield self if block_given?
|
64
|
+
end
|
65
|
+
@stop = nil
|
66
|
+
collections << @data if @data != {}
|
67
|
+
end until @body[@index] =~ pattern or @index >= @body.length
|
68
|
+
@data = data.merge({field => collections })
|
69
|
+
end
|
70
|
+
|
71
|
+
alias :match_until :parse_until
|
72
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
class AmazonPattern
|
2
|
+
attr_accessor :block
|
3
|
+
def initialize
|
4
|
+
@block = lambda do |m|
|
5
|
+
m.match :shipping , /Shipping & Handling/, /\$(\d+\.\d+)/
|
6
|
+
m.match :total_price, /Total for this Order/, /\$(\d+\.\d+)/
|
7
|
+
|
8
|
+
m.match_until :items, /\*{10}/ do |m|
|
9
|
+
m.match :name, /\d+/, /"(.*)"/
|
10
|
+
m.match :price, /\$(\d+\.\d+)/
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require "spec_helper.rb"
|
2
|
+
|
3
|
+
describe AmazonPattern do
|
4
|
+
before do
|
5
|
+
path = File.join(File.dirname(__FILE__),"samples/amazon_order.html")
|
6
|
+
@mail = File.open(path).read
|
7
|
+
@parser = MailParser.new(AmazonPattern.new)
|
8
|
+
end
|
9
|
+
|
10
|
+
it "can inject to parser" do
|
11
|
+
@parser.parse @mail
|
12
|
+
@parser.data.should ==
|
13
|
+
{:shipping => "3.99", :total_price=>"9.54", :items=>[{:name=>"The Curious Cook: More Kitchen Science and Lore", :price=>"5.55"}]}
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require "spec_helper.rb"
|
2
|
+
|
3
|
+
describe MailParser do
|
4
|
+
before do
|
5
|
+
@parser = MailParser.new
|
6
|
+
end
|
7
|
+
it "should able to parse" do
|
8
|
+
|
9
|
+
@parser.parse "Test :: 1" do |m|
|
10
|
+
m.match :value, /Test/, /(\d)/
|
11
|
+
end
|
12
|
+
|
13
|
+
@parser.data[:value].should == "1"
|
14
|
+
end
|
15
|
+
|
16
|
+
it "can trim html tags" do
|
17
|
+
|
18
|
+
@parser.parse "<html> <head> Test :: $12</head> </html>", :html do |m|
|
19
|
+
m.match :tag, /\<.*\>/, /\<\/(.*)\>/
|
20
|
+
end
|
21
|
+
|
22
|
+
@parser.data[:tag].should be_nil
|
23
|
+
end
|
24
|
+
|
25
|
+
it "can match multiple items" do
|
26
|
+
|
27
|
+
@parser.parse "test :1 \n test :2 \n END", :html do |m|
|
28
|
+
m.parse_until :items, /END/ do |m|
|
29
|
+
m.match :price, /test/, /(\d+)/
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
@parser.data.should == {:items => [{price: "1"},{price: "2"}]}
|
34
|
+
end
|
35
|
+
|
36
|
+
it "can stop at curtain line" do
|
37
|
+
@parser.parse "test :1 \n END \n test :2" do |m|
|
38
|
+
m.parse_until :items, /END/ do |m|
|
39
|
+
m.match :price, /test/, /(\d+)/
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
@parser.data.should == {:items => [{price: "1"}]}
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
it "can match single line" do
|
48
|
+
|
49
|
+
@parser.parse "Test :1" do |m|
|
50
|
+
m.match :number , /:(\d)/
|
51
|
+
end
|
52
|
+
|
53
|
+
@parser.data.should == {:number => "1"}
|
54
|
+
end
|
55
|
+
|
56
|
+
describe "Amazon order" do
|
57
|
+
|
58
|
+
before do
|
59
|
+
path = File.join(File.dirname(__FILE__),"samples/amazon_order.html")
|
60
|
+
@mail = File.open(path).read
|
61
|
+
end
|
62
|
+
|
63
|
+
it "can parse Amazon order mail" do
|
64
|
+
|
65
|
+
@parser.parse @mail do |m|
|
66
|
+
m.match :email, /E-mail/,/(\w+@\w+\.\w+)/
|
67
|
+
end
|
68
|
+
|
69
|
+
@parser.data.should == {email: "test@example.com"}
|
70
|
+
end
|
71
|
+
|
72
|
+
it "can extracts items" do
|
73
|
+
@parser.parse @mail do |m|
|
74
|
+
m.parse_until :items , /Need to give a gift?/ do |m|
|
75
|
+
m.match :name, /\d\s"(.*)"/
|
76
|
+
m.match :price, /\$(\d+\.*\d*)/
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
@parser.data.should == {:items => [
|
81
|
+
{:name => "The Curious Cook: More Kitchen Science and Lore", :price => "5.55"}]}
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# This file was generated by the `rspec --init` command. Conventionally, all
|
2
|
+
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
|
3
|
+
# Require this file using `require "spec_helper.rb"` to ensure that it is only
|
4
|
+
# loaded once.
|
5
|
+
#
|
6
|
+
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
7
|
+
|
8
|
+
require "./lib/mail_parser"
|
9
|
+
|
10
|
+
RSpec.configure do |config|
|
11
|
+
config.treat_symbols_as_metadata_keys_with_true_values = true
|
12
|
+
config.run_all_when_everything_filtered = true
|
13
|
+
config.filter_run :focus
|
14
|
+
end
|
metadata
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: mail_parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Jimmy Chao
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-03-20 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: mail_parser
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
description: Parser for parsing imformation by regex in mail
|
31
|
+
email: ''
|
32
|
+
executables: []
|
33
|
+
extensions: []
|
34
|
+
extra_rdoc_files: []
|
35
|
+
files:
|
36
|
+
- lib/mail_loader.rb
|
37
|
+
- lib/mail_parser.rb
|
38
|
+
- lib/mail_parser/mail_parser.rb
|
39
|
+
- lib/patterns/amazon_pattern.rb
|
40
|
+
- lib/patterns/pattern.rb
|
41
|
+
- spec/amazon_pattern_spec.rb
|
42
|
+
- spec/mail_parser_spec.rb
|
43
|
+
- spec/pattern_spec.rb
|
44
|
+
- spec/spec_helper.rb
|
45
|
+
homepage: http://github.com/Rafe/mail_parser
|
46
|
+
licenses: []
|
47
|
+
post_install_message:
|
48
|
+
rdoc_options: []
|
49
|
+
require_paths:
|
50
|
+
- lib
|
51
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
+
none: false
|
53
|
+
requirements:
|
54
|
+
- - ! '>='
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '0'
|
57
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
58
|
+
none: false
|
59
|
+
requirements:
|
60
|
+
- - ! '>='
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
requirements: []
|
64
|
+
rubyforge_project:
|
65
|
+
rubygems_version: 1.8.19
|
66
|
+
signing_key:
|
67
|
+
specification_version: 3
|
68
|
+
summary: Parser for parsing imformation by regex in mail
|
69
|
+
test_files:
|
70
|
+
- spec/amazon_pattern_spec.rb
|
71
|
+
- spec/mail_parser_spec.rb
|
72
|
+
- spec/pattern_spec.rb
|
73
|
+
- spec/spec_helper.rb
|