amazon_order 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rspec +2 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +154 -0
- data/Rakefile +6 -0
- data/amazon_order.gemspec +31 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/amazon_order.rb +11 -0
- data/lib/amazon_order/client.rb +108 -0
- data/lib/amazon_order/parser.rb +27 -0
- data/lib/amazon_order/parsers/base.rb +43 -0
- data/lib/amazon_order/parsers/order.rb +52 -0
- data/lib/amazon_order/parsers/product.rb +28 -0
- data/lib/amazon_order/version.rb +3 -0
- data/lib/amazon_order/writer.rb +45 -0
- metadata +160 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: e3a6bf0fa3f7452a0d5902ad1f18573a91533e3b
|
4
|
+
data.tar.gz: 20b21eb7ee25fb98487583bb8369f371acef22bc
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 56ee5c0283f97849f9cc461b780ceab6cbc4e468fd039066cef4f1ba3968b60b1cc5e34192c100eb8f8ec786435aa5e9d3b0620d2e0009d516fba0a53b00be81
|
7
|
+
data.tar.gz: 3679036cfa689b836654f8c57c4bcd093fecdc3ca4a17a900f49457ba424d71bb99eb36aa53bd3d2fc0b14c3fb01c2a8b6525ca92e71568ccc4262a6cf5c9b9f
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2017 Kazuho Yamaguchi
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,154 @@
|
|
1
|
+
# AmazonOrder
|
2
|
+
|
3
|
+
[](https://badge.fury.io/rb/amazon_order)
|
4
|
+
[](https://travis-ci.org/kyamaguchi/amazon_order)
|
5
|
+
|
6
|
+
Scrape information of amazon orders from amazon site
|
7
|
+
|
8
|
+
##### Fetch Amazon Orders information
|
9
|
+
|
10
|
+

|
11
|
+
|
12
|
+
##### Load orders information
|
13
|
+
|
14
|
+

|
15
|
+
|
16
|
+
Recorded with [Recordit](http://recordit.co/)
|
17
|
+
|
18
|
+
## Installation
|
19
|
+
|
20
|
+
Add this line to your application's Gemfile:
|
21
|
+
|
22
|
+
```ruby
|
23
|
+
gem 'amazon_order'
|
24
|
+
```
|
25
|
+
|
26
|
+
And then execute:
|
27
|
+
|
28
|
+
$ bundle
|
29
|
+
|
30
|
+
Or install it yourself as:
|
31
|
+
|
32
|
+
$ gem install amazon_order
|
33
|
+
|
34
|
+
## Usage
|
35
|
+
|
36
|
+
### Setup
|
37
|
+
|
38
|
+
[chromedriver](https://sites.google.com/a/chromium.org/chromedriver/downloads) is required. Please [download chromedriver](http://chromedriver.storage.googleapis.com/index.html) and update chromedriver regularly.
|
39
|
+
|
40
|
+
Create _.env_ following the instructions of https://github.com/kyamaguchi/amazon_auth
|
41
|
+
|
42
|
+
```
|
43
|
+
amazon_auth
|
44
|
+
|
45
|
+
vi .env
|
46
|
+
```
|
47
|
+
|
48
|
+
And `Dotenv.load` or `gem 'dotenv-rails'` may be required when you use this in your app.
|
49
|
+
|
50
|
+
### Run
|
51
|
+
|
52
|
+
In console
|
53
|
+
|
54
|
+
```ruby
|
55
|
+
require 'amazon_order'
|
56
|
+
client = AmazonOrder::Client.new(verbose: true, limit: 10)
|
57
|
+
client.fetch_amazon_orders
|
58
|
+
# Fetch orders of specified year
|
59
|
+
client.fetch_orders_for_year(year: 2016)
|
60
|
+
|
61
|
+
# Fetch all pages of specified year
|
62
|
+
client = AmazonOrder::Client.new(limit: nil)
|
63
|
+
client.sign_in
|
64
|
+
client.go_to_amazon_order_page
|
65
|
+
client.fetch_orders_for_year(year: 2015)
|
66
|
+
```
|
67
|
+
|
68
|
+
Downloaded pages will be stored into `order` directory.
|
69
|
+
You can reset by moving that directory.
|
70
|
+
|
71
|
+
Once `fetch_amazon_orders` succeeds, you can load orders information of downloaded pages anytime.
|
72
|
+
(You don't need to fetch pages with launching browser every time.)
|
73
|
+
|
74
|
+
```ruby
|
75
|
+
orders = client.load_amazon_orders;nil
|
76
|
+
orders.size
|
77
|
+
|
78
|
+
# Sum of order_total
|
79
|
+
orders.map(&:order_total).sum
|
80
|
+
|
81
|
+
# Products
|
82
|
+
products = orders.map(&:products).flatten;nil
|
83
|
+
products.size
|
84
|
+
|
85
|
+
# Sum of order_total by year
|
86
|
+
orders.group_by{|o| o.order_placed.strftime('%Y') }.sort_by{|year,_| year }.map{|year,records| puts [year, records.map(&:order_total).sum].inspect };nil
|
87
|
+
```
|
88
|
+
|
89
|
+
Example of data
|
90
|
+
|
91
|
+
```ruby
|
92
|
+
console> pp orders.first.to_hash
|
93
|
+
{"order_placed"=>Wed, 25 Aug 2010,
|
94
|
+
"order_number"=>"503-5746373-6335034",
|
95
|
+
"order_total"=>2940.0,
|
96
|
+
"shipment_status"=>nil,
|
97
|
+
"shipment_note"=>nil,
|
98
|
+
"order_details_path"=>
|
99
|
+
"/gp/your-account/order-details/...",
|
100
|
+
"all_products_displayed"=>false,
|
101
|
+
:products=>
|
102
|
+
[{"title"=>"メタプログラミングRuby",
|
103
|
+
...
|
104
|
+
}
|
105
|
+
|
106
|
+
console> pp products.first.to_hash
|
107
|
+
{"title"=>"メタプログラミングRuby",
|
108
|
+
"path"=>"/gp/product/4048687158/...",
|
109
|
+
"content"=>"Paolo Perrotta, 角征典...",
|
110
|
+
"image_url"=>
|
111
|
+
"https://images-fe.ssl-images-amazon.com/images/I/51TODrMIEnL.jpg"}
|
112
|
+
```
|
113
|
+
|
114
|
+
#### Options
|
115
|
+
|
116
|
+
Limit fetching with number of pages: `client = AmazonOrder::Client.new(limit: 5)`
|
117
|
+
`limit: nil` for no limit. default is 5
|
118
|
+
|
119
|
+
Set year range: `client = AmazonOrder::Client.new(year_from: 2012, year_to: 2013)`
|
120
|
+
default is Time.current.year
|
121
|
+
|
122
|
+
##### Options of amazon_auth gem
|
123
|
+
|
124
|
+
Firefox: `driver: :firefox`
|
125
|
+
|
126
|
+
Output debug log: `debug: true`
|
127
|
+
|
128
|
+
## Notice
|
129
|
+
|
130
|
+
This may not work well with amazon.com because I don't have enough data of order pages.
|
131
|
+
(amazon.co.jp will be OK)
|
132
|
+
|
133
|
+
## Development
|
134
|
+
|
135
|
+
```ruby
|
136
|
+
client = AmazonOrder::Client.new(debug: true)
|
137
|
+
```
|
138
|
+
|
139
|
+
### Testing
|
140
|
+
|
141
|
+
Test parsing of all your orders pages
|
142
|
+
|
143
|
+
```
|
144
|
+
ORDERS_DIR=/path/to/testapp/orders rspec spec/amazon_order/parser_spec.rb
|
145
|
+
```
|
146
|
+
|
147
|
+
## Contributing
|
148
|
+
|
149
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/kyamaguchi/amazon_order.
|
150
|
+
|
151
|
+
|
152
|
+
## License
|
153
|
+
|
154
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'amazon_order/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "amazon_order"
|
8
|
+
spec.version = AmazonOrder::VERSION
|
9
|
+
spec.authors = ["Kazuho Yamaguchi"]
|
10
|
+
spec.email = ["kzh.yap@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{Scrape information of amazon orders}
|
13
|
+
spec.description = %q{Scrape information of amazon orders}
|
14
|
+
spec.homepage = "https://github.com/kyamaguchi/amazon_order"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
18
|
+
f.match(%r{^(test|spec|features)/})
|
19
|
+
end
|
20
|
+
spec.bindir = "exe"
|
21
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
22
|
+
spec.require_paths = ["lib"]
|
23
|
+
|
24
|
+
spec.add_runtime_dependency "amazon_auth", "~> 0.3.3"
|
25
|
+
spec.add_development_dependency "bundler", "~> 1.14"
|
26
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
27
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
28
|
+
spec.add_development_dependency "byebug"
|
29
|
+
spec.add_development_dependency "pry-rescue"
|
30
|
+
spec.add_development_dependency "pry-stack_explorer"
|
31
|
+
end
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "amazon_order"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/lib/amazon_order.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
require 'amazon_auth'
|
2
|
+
require "amazon_order/version"
|
3
|
+
require "amazon_order/client"
|
4
|
+
require "amazon_order/parsers/base"
|
5
|
+
require "amazon_order/parsers/order"
|
6
|
+
require "amazon_order/parsers/product"
|
7
|
+
require "amazon_order/parser"
|
8
|
+
require "amazon_order/writer"
|
9
|
+
|
10
|
+
module AmazonOrder
|
11
|
+
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
module AmazonOrder
|
2
|
+
class Client
|
3
|
+
include AmazonAuth::CommonExtension
|
4
|
+
|
5
|
+
attr_accessor :session, :options
|
6
|
+
|
7
|
+
def initialize(options = {})
|
8
|
+
@options = options
|
9
|
+
@base_dir = @options.fetch(:base_dir, 'orders')
|
10
|
+
@client = AmazonAuth::Client.new(@options)
|
11
|
+
extend(AmazonAuth::SessionExtension)
|
12
|
+
end
|
13
|
+
|
14
|
+
def year_from
|
15
|
+
options.fetch(:year_from, Time.current.year)
|
16
|
+
end
|
17
|
+
|
18
|
+
def year_to
|
19
|
+
options.fetch(:year_to, Time.current.year)
|
20
|
+
end
|
21
|
+
|
22
|
+
def limit
|
23
|
+
options.fetch(:limit, 5)
|
24
|
+
end
|
25
|
+
|
26
|
+
def session
|
27
|
+
@session ||= @client.session
|
28
|
+
end
|
29
|
+
|
30
|
+
def fetch_amazon_orders
|
31
|
+
sign_in
|
32
|
+
go_to_amazon_order_page
|
33
|
+
year_to.to_i.downto(year_from.to_i) do |year|
|
34
|
+
fetch_orders_for_year(year: year)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def load_amazon_orders
|
39
|
+
orders = []
|
40
|
+
Dir.glob("#{@base_dir}/*html").each do |filepath|
|
41
|
+
log "Loading #{filepath}"
|
42
|
+
parser = AmazonOrder::Parser.new(filepath)
|
43
|
+
orders += parser.orders
|
44
|
+
end
|
45
|
+
orders.sort_by{|o| -o.fetched_at.to_i }.uniq(&:order_number)
|
46
|
+
end
|
47
|
+
|
48
|
+
def sign_in
|
49
|
+
@client.sign_in
|
50
|
+
end
|
51
|
+
|
52
|
+
def go_to_amazon_order_page
|
53
|
+
link = links_for('a').find{|link| link =~ %r{/order-history} }
|
54
|
+
session.visit link
|
55
|
+
end
|
56
|
+
|
57
|
+
def fetch_orders_for_year(options = {})
|
58
|
+
year = options.fetch(:year, Time.current.year)
|
59
|
+
if switch_year(year)
|
60
|
+
save_page_for(year, current_page_node.try!(:text))
|
61
|
+
while (node = next_page_node) do
|
62
|
+
session.visit node.attr('href')
|
63
|
+
save_page_for(year, current_page_node.text)
|
64
|
+
break if limit && limit <= current_page_node.text.to_i
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def switch_year(year)
|
70
|
+
return true if year.to_i == selected_year
|
71
|
+
session.first('.order-filter-dropdown .a-dropdown-prompt').click
|
72
|
+
option = session.all('.a-popover-wrapper .a-dropdown-link').find{|e| e.text.gsub(/\D+/,'').to_i == year.to_i }
|
73
|
+
return false if option.nil?
|
74
|
+
option.click
|
75
|
+
sleep 2
|
76
|
+
log "Year:#{year} -> #{number_of_orders}"
|
77
|
+
true
|
78
|
+
rescue => e
|
79
|
+
puts "#{e.message}\n#{e.backtrace.join("\n")}"
|
80
|
+
false
|
81
|
+
end
|
82
|
+
|
83
|
+
def save_page_for(year, page)
|
84
|
+
log "Saving year:#{year} page:#{page}"
|
85
|
+
path = ['order', year.to_s, "p#{page}", Time.current.strftime('%Y%m%d%H%M%S')].join('-') + '.html'
|
86
|
+
session.save_page(File.join(@base_dir, path))
|
87
|
+
end
|
88
|
+
|
89
|
+
def selected_year
|
90
|
+
wait_for_selector('#orderFilter')
|
91
|
+
doc.css('#orderFilter option').find{|o| !o.attr('selected').nil? }.attr('value').gsub(/\D+/,'').to_i
|
92
|
+
end
|
93
|
+
|
94
|
+
def number_of_orders
|
95
|
+
doc.css('#controlsContainer .num-orders').text.strip
|
96
|
+
end
|
97
|
+
|
98
|
+
def current_page_node
|
99
|
+
wait_for_selector('.a-pagination .a-selected')
|
100
|
+
doc.css('.a-pagination .a-selected a').first
|
101
|
+
end
|
102
|
+
|
103
|
+
def next_page_node
|
104
|
+
wait_for_selector('.a-pagination .a-selected')
|
105
|
+
doc.css('.a-pagination .a-selected ~ .a-normal').css('a').first
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module AmazonOrder
|
2
|
+
class Parser
|
3
|
+
attr_accessor :fetched_at
|
4
|
+
|
5
|
+
def initialize(filepath, options = {})
|
6
|
+
@filepath = filepath
|
7
|
+
|
8
|
+
@fetched_at = if (m = File.basename(@filepath).match(/\D(\d{14})/))
|
9
|
+
Time.strptime(m[1], '%Y%m%d%H%M%S')
|
10
|
+
else
|
11
|
+
File.ctime(@filepath)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def orders
|
16
|
+
@orders ||= doc.css(".order").map{|e| AmazonOrder::Parsers::Order.new(e, fetched_at: fetched_at) }
|
17
|
+
end
|
18
|
+
|
19
|
+
def doc
|
20
|
+
@doc ||= Nokogiri::HTML(body)
|
21
|
+
end
|
22
|
+
|
23
|
+
def body
|
24
|
+
@body ||= File.read(@filepath)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module AmazonOrder
|
2
|
+
module Parsers
|
3
|
+
class Base
|
4
|
+
attr_accessor :fetched_at
|
5
|
+
|
6
|
+
def initialize(node, options = {})
|
7
|
+
@node = node
|
8
|
+
@fetched_at = options[:fetched_at]
|
9
|
+
end
|
10
|
+
|
11
|
+
def inspect
|
12
|
+
"#<#{self.class.name}:#{self.object_id} #{self.to_hash}>"
|
13
|
+
end
|
14
|
+
|
15
|
+
def to_hash
|
16
|
+
hash = {}
|
17
|
+
self.class::ATTRIBUTES.each do |f|
|
18
|
+
hash[f] = send(f)
|
19
|
+
end
|
20
|
+
yield(hash) if block_given?
|
21
|
+
hash
|
22
|
+
end
|
23
|
+
|
24
|
+
def values
|
25
|
+
self.class::ATTRIBUTES.map{|a| send(a) }
|
26
|
+
end
|
27
|
+
|
28
|
+
def parse_date(date_text)
|
29
|
+
begin
|
30
|
+
Date.parse(date_text)
|
31
|
+
rescue ArgumentError => e
|
32
|
+
m = date_text.match(/\A(?<year>\d{4})年(?<month>\d{1,2})月(?<day>\d{1,2})日\z/)
|
33
|
+
Date.new(m[:year].to_i, m[:month].to_i, m[:day].to_i)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def get_original_image_url(url)
|
38
|
+
parts = url.split('/')
|
39
|
+
(parts[0..-2] + [parts.last.split('.').values_at(0,-1).join('.')]).join('/')
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
module AmazonOrder
|
2
|
+
module Parsers
|
3
|
+
class Order < Base
|
4
|
+
ATTRIBUTES = %w[
|
5
|
+
order_placed order_number order_total
|
6
|
+
shipment_status shipment_note
|
7
|
+
order_details_path
|
8
|
+
all_products_displayed
|
9
|
+
]
|
10
|
+
|
11
|
+
def order_placed
|
12
|
+
@_order_placed ||= parse_date(@node.css('.order-info .a-col-left .a-column')[0].css('.value').text.strip)
|
13
|
+
end
|
14
|
+
|
15
|
+
def order_number
|
16
|
+
@_order_number ||= @node.css('.order-info .a-col-right .a-row')[0].css('.value').text.strip
|
17
|
+
end
|
18
|
+
|
19
|
+
def order_total
|
20
|
+
@_order_total ||= @node.css('.order-info .a-col-left .a-column')[1].css('.value').text.strip.gsub(/[^\d\.]/,'').to_f
|
21
|
+
end
|
22
|
+
|
23
|
+
def shipment_status
|
24
|
+
# class names like "shipment-is-delivered" in '.shipment' node may be useful
|
25
|
+
@_shipment_status ||= @node.css('.shipment .shipment-top-row').present? ? @node.css('.shipment .shipment-top-row .a-row')[0].text.strip : nil
|
26
|
+
end
|
27
|
+
|
28
|
+
def shipment_note
|
29
|
+
@_shipment_note ||= @node.css('.shipment .shipment-top-row').present? ? @node.css('.shipment .shipment-top-row .a-row')[1].text.strip : nil
|
30
|
+
end
|
31
|
+
|
32
|
+
def order_details_path
|
33
|
+
@_order_details_path ||= @node.css('.order-info .a-col-right .a-row')[1].css('a.a-link-normal')[0].attr('href')
|
34
|
+
end
|
35
|
+
|
36
|
+
def all_products_displayed
|
37
|
+
@_all_products_displayed ||= @node.css('.a-box.order-info ~ .a-box .a-col-left .a-row').last.css('.a-link-emphasis').present?
|
38
|
+
end
|
39
|
+
|
40
|
+
def products
|
41
|
+
@_products ||= @node.css('.a-box.order-info ~ .a-box .a-col-left .a-row')[0].css('.a-fixed-left-grid').map{|e| AmazonOrder::Parsers::Product.new(e, fetched_at: fetched_at) }
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
def to_hash
|
46
|
+
super do |hash|
|
47
|
+
hash.merge!(products: products.map(&:to_hash))
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module AmazonOrder
|
2
|
+
module Parsers
|
3
|
+
class Product < Base
|
4
|
+
ATTRIBUTES = %w[
|
5
|
+
title
|
6
|
+
path
|
7
|
+
content
|
8
|
+
image_url
|
9
|
+
]
|
10
|
+
|
11
|
+
def title
|
12
|
+
@_title ||= @node.css('.a-col-right .a-row')[0].text.strip
|
13
|
+
end
|
14
|
+
|
15
|
+
def path
|
16
|
+
@_path ||= @node.css('.a-col-right .a-row a')[0].attr('href') rescue nil
|
17
|
+
end
|
18
|
+
|
19
|
+
def content
|
20
|
+
@_content ||= @node.css('.a-col-right .a-row')[1..-1].map(&:text).join.gsub(/\s+/, ' ').strip
|
21
|
+
end
|
22
|
+
|
23
|
+
def image_url
|
24
|
+
@_image_url ||= get_original_image_url(@node.css('.a-col-left img')[0].attr('src'))
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module AmazonOrder
|
2
|
+
class Writer
|
3
|
+
def initialize(base_dir, options = {})
|
4
|
+
@base_dir = base_dir
|
5
|
+
@output_dir = options.fetch(:output_dir, 'tmp')
|
6
|
+
end
|
7
|
+
|
8
|
+
def print_orders
|
9
|
+
data['orders']
|
10
|
+
end
|
11
|
+
|
12
|
+
def print_produts
|
13
|
+
data['products']
|
14
|
+
end
|
15
|
+
|
16
|
+
def generate_csv
|
17
|
+
require 'csv'
|
18
|
+
FileUtils.mkdir_p(@output_dir)
|
19
|
+
%w[orders products].map do |resource|
|
20
|
+
next if data[resource].blank?
|
21
|
+
csv_file = "#{@output_dir}/#{resource}#{Time.current.strftime('%Y%m%d%H%M%S')}.csv"
|
22
|
+
puts " Writing #{csv_file}"
|
23
|
+
CSV.open(csv_file, 'wb') do |csv|
|
24
|
+
data[resource].each{|r| csv << r }
|
25
|
+
end
|
26
|
+
csv_file
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def data
|
33
|
+
@_data ||= begin
|
34
|
+
data = {'orders' => [], 'products' => []}
|
35
|
+
Dir.glob("#{@base_dir}/*html").each do |filepath|
|
36
|
+
puts " Parsing #{filepath}"
|
37
|
+
parser = AmazonOrder::Parser.new(filepath)
|
38
|
+
data['orders'] += parser.orders.map(&:values)
|
39
|
+
data['products'] += parser.orders.map(&:products).flatten.map(&:values)
|
40
|
+
end
|
41
|
+
data
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
metadata
ADDED
@@ -0,0 +1,160 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: amazon_order
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Kazuho Yamaguchi
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-07-05 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: amazon_auth
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.3.3
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.3.3
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.14'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.14'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '10.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '10.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rspec
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '3.0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '3.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: byebug
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: pry-rescue
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: pry-stack_explorer
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
description: Scrape information of amazon orders
|
112
|
+
email:
|
113
|
+
- kzh.yap@gmail.com
|
114
|
+
executables: []
|
115
|
+
extensions: []
|
116
|
+
extra_rdoc_files: []
|
117
|
+
files:
|
118
|
+
- ".gitignore"
|
119
|
+
- ".rspec"
|
120
|
+
- ".travis.yml"
|
121
|
+
- Gemfile
|
122
|
+
- LICENSE.txt
|
123
|
+
- README.md
|
124
|
+
- Rakefile
|
125
|
+
- amazon_order.gemspec
|
126
|
+
- bin/console
|
127
|
+
- bin/setup
|
128
|
+
- lib/amazon_order.rb
|
129
|
+
- lib/amazon_order/client.rb
|
130
|
+
- lib/amazon_order/parser.rb
|
131
|
+
- lib/amazon_order/parsers/base.rb
|
132
|
+
- lib/amazon_order/parsers/order.rb
|
133
|
+
- lib/amazon_order/parsers/product.rb
|
134
|
+
- lib/amazon_order/version.rb
|
135
|
+
- lib/amazon_order/writer.rb
|
136
|
+
homepage: https://github.com/kyamaguchi/amazon_order
|
137
|
+
licenses:
|
138
|
+
- MIT
|
139
|
+
metadata: {}
|
140
|
+
post_install_message:
|
141
|
+
rdoc_options: []
|
142
|
+
require_paths:
|
143
|
+
- lib
|
144
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
145
|
+
requirements:
|
146
|
+
- - ">="
|
147
|
+
- !ruby/object:Gem::Version
|
148
|
+
version: '0'
|
149
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
150
|
+
requirements:
|
151
|
+
- - ">="
|
152
|
+
- !ruby/object:Gem::Version
|
153
|
+
version: '0'
|
154
|
+
requirements: []
|
155
|
+
rubyforge_project:
|
156
|
+
rubygems_version: 2.6.11
|
157
|
+
signing_key:
|
158
|
+
specification_version: 4
|
159
|
+
summary: Scrape information of amazon orders
|
160
|
+
test_files: []
|