mail_parser 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/mail_loader.rb +17 -0
- data/lib/mail_parser.rb +3 -0
- data/lib/mail_parser/mail_parser.rb +72 -0
- data/lib/patterns/amazon_pattern.rb +14 -0
- data/lib/patterns/pattern.rb +9 -0
- data/spec/amazon_pattern_spec.rb +15 -0
- data/spec/mail_parser_spec.rb +84 -0
- data/spec/pattern_spec.rb +11 -0
- data/spec/spec_helper.rb +14 -0
- metadata +73 -0
data/lib/mail_loader.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
require "gmail"
|
2
|
+
|
3
|
+
path = File.expand_path(File.join(File.dirname(__FILE__), "../spec/samples"))
|
4
|
+
|
5
|
+
username = ARGV[0]
|
6
|
+
password = ARGV[1]
|
7
|
+
|
8
|
+
Gmail.connect username, password do |gmail|
|
9
|
+
|
10
|
+
mails = gmail.inbox.find(from: "auto-confirm@amazon.com")
|
11
|
+
|
12
|
+
mails.each_with_index do |mail,index|
|
13
|
+
File.open("../spec/samples/amazon_order#{index}.html","w") do |f|
|
14
|
+
f.puts(mails.body.decoded)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/lib/mail_parser.rb
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
class MailParser
|
2
|
+
attr_accessor :body, :data, :index, :pattern, :stop
|
3
|
+
|
4
|
+
def initialize(pattern=nil)
|
5
|
+
@pattern = pattern
|
6
|
+
end
|
7
|
+
|
8
|
+
def truncate_html(html)
|
9
|
+
html.gsub(/<.*>/,"")
|
10
|
+
end
|
11
|
+
|
12
|
+
def parse(body, *options, &block)
|
13
|
+
if options.include? :html
|
14
|
+
body = truncate_html(body)
|
15
|
+
end
|
16
|
+
@body = body.split("\n")
|
17
|
+
@index = 0
|
18
|
+
@data = {}
|
19
|
+
|
20
|
+
if block_given?
|
21
|
+
yield self
|
22
|
+
end
|
23
|
+
|
24
|
+
#call the injected pattern
|
25
|
+
if not @pattern.nil? and not @pattern.block.nil?
|
26
|
+
@pattern.block.call self
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def match(field, *patterns)
|
31
|
+
pattern = patterns[0]
|
32
|
+
if patterns.length == 2
|
33
|
+
invoked = false
|
34
|
+
target_pattern = patterns[1]
|
35
|
+
else
|
36
|
+
invoked = true
|
37
|
+
target_pattern = pattern
|
38
|
+
end
|
39
|
+
|
40
|
+
for line in @body[@index..@body.length]
|
41
|
+
if @stop and line =~ @stop
|
42
|
+
throw :halt
|
43
|
+
end
|
44
|
+
if !invoked and line =~ pattern
|
45
|
+
invoked = true
|
46
|
+
end
|
47
|
+
if invoked and line =~ target_pattern
|
48
|
+
@data[field] = target_pattern.match(line)[1]
|
49
|
+
@index += 1
|
50
|
+
return
|
51
|
+
end
|
52
|
+
@index += 1
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def parse_until(field,pattern)
|
57
|
+
data = @data.clone
|
58
|
+
collections = []
|
59
|
+
begin
|
60
|
+
@data = {}
|
61
|
+
@stop = pattern
|
62
|
+
catch(:halt) do
|
63
|
+
yield self if block_given?
|
64
|
+
end
|
65
|
+
@stop = nil
|
66
|
+
collections << @data if @data != {}
|
67
|
+
end until @body[@index] =~ pattern or @index >= @body.length
|
68
|
+
@data = data.merge({field => collections })
|
69
|
+
end
|
70
|
+
|
71
|
+
alias :match_until :parse_until
|
72
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
class AmazonPattern
|
2
|
+
attr_accessor :block
|
3
|
+
def initialize
|
4
|
+
@block = lambda do |m|
|
5
|
+
m.match :shipping , /Shipping & Handling/, /\$(\d+\.\d+)/
|
6
|
+
m.match :total_price, /Total for this Order/, /\$(\d+\.\d+)/
|
7
|
+
|
8
|
+
m.match_until :items, /\*{10}/ do |m|
|
9
|
+
m.match :name, /\d+/, /"(.*)"/
|
10
|
+
m.match :price, /\$(\d+\.\d+)/
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require "spec_helper.rb"
|
2
|
+
|
3
|
+
describe AmazonPattern do
|
4
|
+
before do
|
5
|
+
path = File.join(File.dirname(__FILE__),"samples/amazon_order.html")
|
6
|
+
@mail = File.open(path).read
|
7
|
+
@parser = MailParser.new(AmazonPattern.new)
|
8
|
+
end
|
9
|
+
|
10
|
+
it "can inject to parser" do
|
11
|
+
@parser.parse @mail
|
12
|
+
@parser.data.should ==
|
13
|
+
{:shipping => "3.99", :total_price=>"9.54", :items=>[{:name=>"The Curious Cook: More Kitchen Science and Lore", :price=>"5.55"}]}
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require "spec_helper.rb"
|
2
|
+
|
3
|
+
describe MailParser do
|
4
|
+
before do
|
5
|
+
@parser = MailParser.new
|
6
|
+
end
|
7
|
+
it "should able to parse" do
|
8
|
+
|
9
|
+
@parser.parse "Test :: 1" do |m|
|
10
|
+
m.match :value, /Test/, /(\d)/
|
11
|
+
end
|
12
|
+
|
13
|
+
@parser.data[:value].should == "1"
|
14
|
+
end
|
15
|
+
|
16
|
+
it "can trim html tags" do
|
17
|
+
|
18
|
+
@parser.parse "<html> <head> Test :: $12</head> </html>", :html do |m|
|
19
|
+
m.match :tag, /\<.*\>/, /\<\/(.*)\>/
|
20
|
+
end
|
21
|
+
|
22
|
+
@parser.data[:tag].should be_nil
|
23
|
+
end
|
24
|
+
|
25
|
+
it "can match multiple items" do
|
26
|
+
|
27
|
+
@parser.parse "test :1 \n test :2 \n END", :html do |m|
|
28
|
+
m.parse_until :items, /END/ do |m|
|
29
|
+
m.match :price, /test/, /(\d+)/
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
@parser.data.should == {:items => [{price: "1"},{price: "2"}]}
|
34
|
+
end
|
35
|
+
|
36
|
+
it "can stop at curtain line" do
|
37
|
+
@parser.parse "test :1 \n END \n test :2" do |m|
|
38
|
+
m.parse_until :items, /END/ do |m|
|
39
|
+
m.match :price, /test/, /(\d+)/
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
@parser.data.should == {:items => [{price: "1"}]}
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
it "can match single line" do
|
48
|
+
|
49
|
+
@parser.parse "Test :1" do |m|
|
50
|
+
m.match :number , /:(\d)/
|
51
|
+
end
|
52
|
+
|
53
|
+
@parser.data.should == {:number => "1"}
|
54
|
+
end
|
55
|
+
|
56
|
+
describe "Amazon order" do
|
57
|
+
|
58
|
+
before do
|
59
|
+
path = File.join(File.dirname(__FILE__),"samples/amazon_order.html")
|
60
|
+
@mail = File.open(path).read
|
61
|
+
end
|
62
|
+
|
63
|
+
it "can parse Amazon order mail" do
|
64
|
+
|
65
|
+
@parser.parse @mail do |m|
|
66
|
+
m.match :email, /E-mail/,/(\w+@\w+\.\w+)/
|
67
|
+
end
|
68
|
+
|
69
|
+
@parser.data.should == {email: "test@example.com"}
|
70
|
+
end
|
71
|
+
|
72
|
+
it "can extracts items" do
|
73
|
+
@parser.parse @mail do |m|
|
74
|
+
m.parse_until :items , /Need to give a gift?/ do |m|
|
75
|
+
m.match :name, /\d\s"(.*)"/
|
76
|
+
m.match :price, /\$(\d+\.*\d*)/
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
@parser.data.should == {:items => [
|
81
|
+
{:name => "The Curious Cook: More Kitchen Science and Lore", :price => "5.55"}]}
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# This file was generated by the `rspec --init` command. Conventionally, all
|
2
|
+
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
|
3
|
+
# Require this file using `require "spec_helper.rb"` to ensure that it is only
|
4
|
+
# loaded once.
|
5
|
+
#
|
6
|
+
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
7
|
+
|
8
|
+
require "./lib/mail_parser"
|
9
|
+
|
10
|
+
RSpec.configure do |config|
|
11
|
+
config.treat_symbols_as_metadata_keys_with_true_values = true
|
12
|
+
config.run_all_when_everything_filtered = true
|
13
|
+
config.filter_run :focus
|
14
|
+
end
|
metadata
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: mail_parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Jimmy Chao
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-03-20 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: mail_parser
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
description: Parser for parsing imformation by regex in mail
|
31
|
+
email: ''
|
32
|
+
executables: []
|
33
|
+
extensions: []
|
34
|
+
extra_rdoc_files: []
|
35
|
+
files:
|
36
|
+
- lib/mail_loader.rb
|
37
|
+
- lib/mail_parser.rb
|
38
|
+
- lib/mail_parser/mail_parser.rb
|
39
|
+
- lib/patterns/amazon_pattern.rb
|
40
|
+
- lib/patterns/pattern.rb
|
41
|
+
- spec/amazon_pattern_spec.rb
|
42
|
+
- spec/mail_parser_spec.rb
|
43
|
+
- spec/pattern_spec.rb
|
44
|
+
- spec/spec_helper.rb
|
45
|
+
homepage: http://github.com/Rafe/mail_parser
|
46
|
+
licenses: []
|
47
|
+
post_install_message:
|
48
|
+
rdoc_options: []
|
49
|
+
require_paths:
|
50
|
+
- lib
|
51
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
+
none: false
|
53
|
+
requirements:
|
54
|
+
- - ! '>='
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '0'
|
57
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
58
|
+
none: false
|
59
|
+
requirements:
|
60
|
+
- - ! '>='
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
requirements: []
|
64
|
+
rubyforge_project:
|
65
|
+
rubygems_version: 1.8.19
|
66
|
+
signing_key:
|
67
|
+
specification_version: 3
|
68
|
+
summary: Parser for parsing imformation by regex in mail
|
69
|
+
test_files:
|
70
|
+
- spec/amazon_pattern_spec.rb
|
71
|
+
- spec/mail_parser_spec.rb
|
72
|
+
- spec/pattern_spec.rb
|
73
|
+
- spec/spec_helper.rb
|