amazon_order 0.1.0 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.travis.yml +4 -2
- data/README.md +18 -7
- data/amazon_order.gemspec +4 -4
- data/lib/amazon_order.rb +1 -0
- data/lib/amazon_order/client.rb +28 -4
- data/lib/amazon_order/parsers/base.rb +1 -0
- data/lib/amazon_order/parsers/order.rb +31 -13
- data/lib/amazon_order/parsers/product.rb +4 -1
- data/lib/amazon_order/parsers/shipment.rb +38 -0
- data/lib/amazon_order/version.rb +1 -1
- data/lib/amazon_order/writer.rb +15 -5
- metadata +18 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: ed9bb5a826d4c1d9248638828f0fab21219b2dccba0306e5e3c512801a2ce60e
|
4
|
+
data.tar.gz: dd2bcce492a9218a499bc63a3778f3c9d5a1f3baa3f4a018e4fbb416f84c69d4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9e0f7903e9248d6e83a75e2bcf7810f2376d607453b8d05fe57c10deaeff21246fffb4d7c6c07e213ddf0aa67480be748c7253f6b0abdd181401d3467780853d
|
7
|
+
data.tar.gz: b556a0c0596b71e14d552da3e60aa6e8faf76c35e3a6e1f07469ffcfbe3798e74ad6cecbd76d67b1f9018993ebdc140174291f7b4f582e97f1206f552ee2c5a5
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -37,11 +37,14 @@ Or install it yourself as:
|
|
37
37
|
|
38
38
|
[chromedriver](https://sites.google.com/a/chromium.org/chromedriver/downloads) is required. Please [download chromedriver](http://chromedriver.storage.googleapis.com/index.html) and update chromedriver regularly.
|
39
39
|
|
40
|
-
Create
|
40
|
+
Create credentials following the instructions of https://github.com/kyamaguchi/amazon_auth
|
41
|
+
Use `envchain` or _.env_
|
41
42
|
|
42
43
|
```
|
43
44
|
amazon_auth
|
44
45
|
|
46
|
+
envchain amazon ...
|
47
|
+
# OR
|
45
48
|
vi .env
|
46
49
|
```
|
47
50
|
|
@@ -53,7 +56,7 @@ In console
|
|
53
56
|
|
54
57
|
```ruby
|
55
58
|
require 'amazon_order'
|
56
|
-
client = AmazonOrder::Client.new(verbose: true, limit: 10)
|
59
|
+
client = AmazonOrder::Client.new(keep_cookie: true, verbose: true, limit: 10)
|
57
60
|
client.fetch_amazon_orders
|
58
61
|
# Fetch orders of specified year
|
59
62
|
client.fetch_orders_for_year(year: 2016)
|
@@ -65,8 +68,8 @@ client.go_to_amazon_order_page
|
|
65
68
|
client.fetch_orders_for_year(year: 2015)
|
66
69
|
```
|
67
70
|
|
68
|
-
Downloaded pages will be stored into `
|
69
|
-
|
71
|
+
Downloaded pages will be stored into `tmp/orders` directory.
|
72
|
+
`tmp` comes from `Capybara.save_path`.
|
70
73
|
|
71
74
|
Once `fetch_amazon_orders` succeeds, you can load orders information of downloaded pages anytime.
|
72
75
|
(You don't need to fetch pages with launching browser every time.)
|
@@ -86,7 +89,7 @@ products.size
|
|
86
89
|
orders.group_by{|o| o.order_placed.strftime('%Y') }.sort_by{|year,_| year }.map{|year,records| puts [year, records.map(&:order_total).sum].inspect };nil
|
87
90
|
```
|
88
91
|
|
89
|
-
Example of data
|
92
|
+
#### Example of data
|
90
93
|
|
91
94
|
```ruby
|
92
95
|
console> pp orders.first.to_hash
|
@@ -111,16 +114,24 @@ console> pp products.first.to_hash
|
|
111
114
|
"https://images-fe.ssl-images-amazon.com/images/I/51TODrMIEnL.jpg"}
|
112
115
|
```
|
113
116
|
|
117
|
+
#### Export csv
|
118
|
+
|
119
|
+
```ruby
|
120
|
+
client.generate_csv
|
121
|
+
```
|
122
|
+
|
114
123
|
#### Options
|
115
124
|
|
116
125
|
Limit fetching with number of pages: `client = AmazonOrder::Client.new(limit: 5)`
|
117
|
-
`limit: nil` for no limit. default is 5
|
126
|
+
(`limit: nil` for no limit. default is 5)
|
118
127
|
|
119
128
|
Set year range: `client = AmazonOrder::Client.new(year_from: 2012, year_to: 2013)`
|
120
|
-
default is Time.current.year
|
129
|
+
(default is Time.current.year)
|
121
130
|
|
122
131
|
##### Options of amazon_auth gem
|
123
132
|
|
133
|
+
Keep cookies(keep signin): `keep_cookie: true`
|
134
|
+
|
124
135
|
Firefox: `driver: :firefox`
|
125
136
|
|
126
137
|
Output debug log: `debug: true`
|
data/amazon_order.gemspec
CHANGED
@@ -21,10 +21,10 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
22
22
|
spec.require_paths = ["lib"]
|
23
23
|
|
24
|
-
spec.add_runtime_dependency "amazon_auth", "~> 0.
|
25
|
-
spec.add_development_dependency "bundler"
|
26
|
-
spec.add_development_dependency "rake"
|
27
|
-
spec.add_development_dependency "rspec"
|
24
|
+
spec.add_runtime_dependency "amazon_auth", "~> 0.4.0"
|
25
|
+
spec.add_development_dependency "bundler"
|
26
|
+
spec.add_development_dependency "rake"
|
27
|
+
spec.add_development_dependency "rspec"
|
28
28
|
spec.add_development_dependency "byebug"
|
29
29
|
spec.add_development_dependency "pry-rescue"
|
30
30
|
spec.add_development_dependency "pry-stack_explorer"
|
data/lib/amazon_order.rb
CHANGED
@@ -3,6 +3,7 @@ require "amazon_order/version"
|
|
3
3
|
require "amazon_order/client"
|
4
4
|
require "amazon_order/parsers/base"
|
5
5
|
require "amazon_order/parsers/order"
|
6
|
+
require "amazon_order/parsers/shipment"
|
6
7
|
require "amazon_order/parsers/product"
|
7
8
|
require "amazon_order/parser"
|
8
9
|
require "amazon_order/writer"
|
data/lib/amazon_order/client.rb
CHANGED
@@ -6,11 +6,14 @@ module AmazonOrder
|
|
6
6
|
|
7
7
|
def initialize(options = {})
|
8
8
|
@options = options
|
9
|
-
@base_dir = @options.fetch(:base_dir, 'orders')
|
10
9
|
@client = AmazonAuth::Client.new(@options)
|
11
10
|
extend(AmazonAuth::SessionExtension)
|
12
11
|
end
|
13
12
|
|
13
|
+
def base_dir
|
14
|
+
options.fetch(:base_dir, 'orders')
|
15
|
+
end
|
16
|
+
|
14
17
|
def year_from
|
15
18
|
options.fetch(:year_from, Time.current.year)
|
16
19
|
end
|
@@ -37,7 +40,7 @@ module AmazonOrder
|
|
37
40
|
|
38
41
|
def load_amazon_orders
|
39
42
|
orders = []
|
40
|
-
Dir.glob(
|
43
|
+
Dir.glob(file_glob_pattern).each do |filepath|
|
41
44
|
log "Loading #{filepath}"
|
42
45
|
parser = AmazonOrder::Parser.new(filepath)
|
43
46
|
orders += parser.orders
|
@@ -45,13 +48,34 @@ module AmazonOrder
|
|
45
48
|
orders.sort_by{|o| -o.fetched_at.to_i }.uniq(&:order_number)
|
46
49
|
end
|
47
50
|
|
51
|
+
def file_glob_pattern
|
52
|
+
File.join(Capybara.save_path, base_dir, '*html')
|
53
|
+
end
|
54
|
+
|
55
|
+
def generate_csv
|
56
|
+
writer.generate_csv
|
57
|
+
end
|
58
|
+
|
59
|
+
def writer
|
60
|
+
@_writer ||= AmazonOrder::Writer.new(file_glob_pattern)
|
61
|
+
end
|
62
|
+
|
48
63
|
def sign_in
|
49
64
|
@client.sign_in
|
50
65
|
end
|
51
66
|
|
52
67
|
def go_to_amazon_order_page
|
68
|
+
if doc.css('.cvf-account-switcher').present?
|
69
|
+
log "Account switcher page was displayed"
|
70
|
+
session.first('.cvf-account-switcher-profile-details').click
|
71
|
+
wait_for_selector('#nav-main') # Wait for page loading
|
72
|
+
end
|
53
73
|
link = links_for('a').find{|link| link =~ %r{/order-history} }
|
54
|
-
|
74
|
+
if link.present?
|
75
|
+
session.visit link
|
76
|
+
else
|
77
|
+
log "Link for order history wasn't found in #{session.current_url}"
|
78
|
+
end
|
55
79
|
end
|
56
80
|
|
57
81
|
def fetch_orders_for_year(options = {})
|
@@ -83,7 +107,7 @@ module AmazonOrder
|
|
83
107
|
def save_page_for(year, page)
|
84
108
|
log "Saving year:#{year} page:#{page}"
|
85
109
|
path = ['order', year.to_s, "p#{page}", Time.current.strftime('%Y%m%d%H%M%S')].join('-') + '.html'
|
86
|
-
session.save_page(File.join(
|
110
|
+
session.save_page(File.join(base_dir, path))
|
87
111
|
end
|
88
112
|
|
89
113
|
def selected_year
|
@@ -3,7 +3,6 @@ module AmazonOrder
|
|
3
3
|
class Order < Base
|
4
4
|
ATTRIBUTES = %w[
|
5
5
|
order_placed order_number order_total
|
6
|
-
shipment_status shipment_note
|
7
6
|
order_details_path
|
8
7
|
all_products_displayed
|
9
8
|
]
|
@@ -17,36 +16,55 @@ module AmazonOrder
|
|
17
16
|
end
|
18
17
|
|
19
18
|
def order_total
|
20
|
-
@_order_total ||= @node.css('.order-info .a-col-left .a-column')[1].css('.value').text.strip.gsub(/[^\d\.]/,'').to_f
|
19
|
+
@_order_total ||= @node.css('.order-info .a-col-left .a-column')[1].css('.value').text.strip.gsub(/[^\d\.]/, '').to_f
|
21
20
|
end
|
22
21
|
|
23
|
-
def
|
24
|
-
|
25
|
-
@_shipment_status ||= @node.css('.shipment .shipment-top-row').present? ? @node.css('.shipment .shipment-top-row .a-row')[0].text.strip : nil
|
22
|
+
def order_details_path
|
23
|
+
@_order_details_path ||= @node.css('.order-info .a-col-right .a-row')[1].css('a.a-link-normal')[0].attr('href')
|
26
24
|
end
|
27
25
|
|
28
|
-
def
|
29
|
-
|
26
|
+
def order_type
|
27
|
+
if @node.css('[id^=Leave-Service-Feedback]').present?
|
28
|
+
return :service_order
|
29
|
+
elsif @node.css('.shipment').present?
|
30
|
+
:shipment_order
|
31
|
+
else
|
32
|
+
:digital_order
|
33
|
+
end
|
30
34
|
end
|
31
35
|
|
32
|
-
def
|
33
|
-
@
|
36
|
+
def shipments
|
37
|
+
@_shipments ||= @node.css('.shipment')
|
38
|
+
.map do |shipment|
|
39
|
+
AmazonOrder::Parsers::Shipment.new(shipment,
|
40
|
+
containing_object: self,
|
41
|
+
fetched_at: fetched_at)
|
42
|
+
end
|
34
43
|
end
|
35
44
|
|
36
|
-
def
|
37
|
-
@
|
45
|
+
def products
|
46
|
+
@products ||= shipment_products + digital_products
|
38
47
|
end
|
39
48
|
|
40
|
-
def
|
41
|
-
@
|
49
|
+
def shipment_products
|
50
|
+
@shipment_products ||= shipments.flat_map(&:products)
|
51
|
+
end
|
52
|
+
|
53
|
+
def digital_products
|
54
|
+
@_products ||= @node.css('.a-box:not(.shipment) .a-fixed-left-grid').map { |e| AmazonOrder::Parsers::Product.new(e, fetched_at: fetched_at) }
|
42
55
|
end
|
43
56
|
|
57
|
+
# might be broken now that orders have multiple shipments
|
58
|
+
def all_products_displayed
|
59
|
+
@_all_products_displayed ||= @node.css('.a-box.order-info ~ .a-box .a-col-left .a-row').last.css('.a-link-emphasis').present?
|
60
|
+
end
|
44
61
|
|
45
62
|
def to_hash
|
46
63
|
super do |hash|
|
47
64
|
hash.merge!(products: products.map(&:to_hash))
|
48
65
|
end
|
49
66
|
end
|
67
|
+
|
50
68
|
end
|
51
69
|
end
|
52
70
|
end
|
@@ -21,7 +21,10 @@ module AmazonOrder
|
|
21
21
|
end
|
22
22
|
|
23
23
|
def image_url
|
24
|
-
@_image_url ||=
|
24
|
+
@_image_url ||= begin
|
25
|
+
img = @node.css('.a-col-left img')[0]
|
26
|
+
get_original_image_url(img.attr('data-a-hires').presence || img.attr('src'))
|
27
|
+
end
|
25
28
|
end
|
26
29
|
end
|
27
30
|
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module AmazonOrder
|
2
|
+
module Parsers
|
3
|
+
class Shipment < Base
|
4
|
+
ATTRIBUTES = %w[
|
5
|
+
shipment_status
|
6
|
+
shipment_note
|
7
|
+
]
|
8
|
+
|
9
|
+
# TODO shipment_date
|
10
|
+
|
11
|
+
def order
|
12
|
+
@containing_object
|
13
|
+
end
|
14
|
+
|
15
|
+
def shipment_status
|
16
|
+
# class names like "shipment-is-delivered" in '.shipment' node may be useful
|
17
|
+
@_shipment_status ||= @node.css('.shipment-top-row').present? ? @node.css('.shipment .shipment-top-row .a-row')[0].text.strip : nil
|
18
|
+
end
|
19
|
+
|
20
|
+
def shipment_note
|
21
|
+
@_shipment_note ||= case order.order_type
|
22
|
+
when :shipment_order
|
23
|
+
@node.css('.shipment-top-row').present? ? @node.css('.shipment .shipment-top-row .a-row')[1].text.strip : nil
|
24
|
+
when :service_order
|
25
|
+
nil
|
26
|
+
when :digital_order
|
27
|
+
nil
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
def products
|
33
|
+
@_products ||= @node.css('.a-fixed-left-grid').map { |e| AmazonOrder::Parsers::Product.new(e, fetched_at: fetched_at) }
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
data/lib/amazon_order/version.rb
CHANGED
data/lib/amazon_order/writer.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module AmazonOrder
|
2
2
|
class Writer
|
3
|
-
def initialize(
|
4
|
-
@
|
3
|
+
def initialize(file_glob_pattern, options = {})
|
4
|
+
@file_glob_pattern = file_glob_pattern
|
5
5
|
@output_dir = options.fetch(:output_dir, 'tmp')
|
6
6
|
end
|
7
7
|
|
@@ -9,7 +9,7 @@ module AmazonOrder
|
|
9
9
|
data['orders']
|
10
10
|
end
|
11
11
|
|
12
|
-
def
|
12
|
+
def print_products
|
13
13
|
data['products']
|
14
14
|
end
|
15
15
|
|
@@ -21,7 +21,8 @@ module AmazonOrder
|
|
21
21
|
csv_file = "#{@output_dir}/#{resource}#{Time.current.strftime('%Y%m%d%H%M%S')}.csv"
|
22
22
|
puts " Writing #{csv_file}"
|
23
23
|
CSV.open(csv_file, 'wb') do |csv|
|
24
|
-
|
24
|
+
csv << attributes_for(resource)
|
25
|
+
data[resource].each { |r| csv << r }
|
25
26
|
end
|
26
27
|
csv_file
|
27
28
|
end
|
@@ -32,7 +33,7 @@ module AmazonOrder
|
|
32
33
|
def data
|
33
34
|
@_data ||= begin
|
34
35
|
data = {'orders' => [], 'products' => []}
|
35
|
-
Dir.glob(
|
36
|
+
Dir.glob(@file_glob_pattern).each do |filepath|
|
36
37
|
puts " Parsing #{filepath}"
|
37
38
|
parser = AmazonOrder::Parser.new(filepath)
|
38
39
|
data['orders'] += parser.orders.map(&:values)
|
@@ -41,5 +42,14 @@ module AmazonOrder
|
|
41
42
|
data
|
42
43
|
end
|
43
44
|
end
|
45
|
+
|
46
|
+
def attributes_for(resource)
|
47
|
+
case resource
|
48
|
+
when 'orders'
|
49
|
+
AmazonOrder::Parsers::Order::ATTRIBUTES
|
50
|
+
when 'products'
|
51
|
+
AmazonOrder::Parsers::Product::ATTRIBUTES
|
52
|
+
end
|
53
|
+
end
|
44
54
|
end
|
45
55
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: amazon_order
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kazuho Yamaguchi
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-05-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: amazon_auth
|
@@ -16,56 +16,56 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.
|
19
|
+
version: 0.4.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.
|
26
|
+
version: 0.4.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: bundler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - "
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rake
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- - "
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
47
|
+
version: '0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- - "
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
54
|
+
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: rspec
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- - "
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
61
|
+
version: '0'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- - "
|
66
|
+
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: '
|
68
|
+
version: '0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: byebug
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -131,6 +131,7 @@ files:
|
|
131
131
|
- lib/amazon_order/parsers/base.rb
|
132
132
|
- lib/amazon_order/parsers/order.rb
|
133
133
|
- lib/amazon_order/parsers/product.rb
|
134
|
+
- lib/amazon_order/parsers/shipment.rb
|
134
135
|
- lib/amazon_order/version.rb
|
135
136
|
- lib/amazon_order/writer.rb
|
136
137
|
homepage: https://github.com/kyamaguchi/amazon_order
|
@@ -152,8 +153,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
152
153
|
- !ruby/object:Gem::Version
|
153
154
|
version: '0'
|
154
155
|
requirements: []
|
155
|
-
|
156
|
-
rubygems_version: 2.6.11
|
156
|
+
rubygems_version: 3.0.8
|
157
157
|
signing_key:
|
158
158
|
specification_version: 4
|
159
159
|
summary: Scrape information of amazon orders
|