compactor 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -0
- data/.travis.yml +9 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +47 -0
- data/LICENSE +22 -0
- data/README.md +47 -0
- data/Rakefile +48 -0
- data/compactor.gemspec +33 -0
- data/lib/compactor/extensions.rb +23 -0
- data/lib/compactor/scraped_row.rb +81 -0
- data/lib/compactor/scraper.rb +369 -0
- data/lib/compactor/version.rb +3 -0
- data/lib/compactor.rb +5 -0
- data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_bad_login/raise_error.yml +535 -0
- data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/multiple_pages.yml +11382 -0
- data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/no_reports.yml +777 -0
- data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/no_reports_to_request.yml +1804 -0
- data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/reports_to_request.yml +13482 -0
- data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/get_balance.yml +1050 -0
- data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/get_orders.yml +822 -0
- data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/get_orders_big.yml +4223 -0
- data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/get_orders_logging.yml +820 -0
- data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/get_orders_with_po_box.yml +793 -0
- data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/shipping_address_not_starting_with_number.yml +800 -0
- data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/with_multiple_marketplaces/find_reports/reports_to_request.yml +2948 -0
- data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/with_multiple_marketplaces/get_marketplaces.yml +842 -0
- data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/with_single_marketplaces/get_marketplaces.yml +877 -0
- data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_locked_account/raise_error.yml +1033 -0
- data/test/mechanize_extensions_test.rb +16 -0
- data/test/scraped_row_test.rb +9 -0
- data/test/scraper_test.rb +189 -0
- data/test/test_helper.rb +18 -0
- metadata +205 -0
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
compactor (0.1.2)
|
5
|
+
mechanize (= 2.4)
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: http://rubygems.org/
|
9
|
+
specs:
|
10
|
+
domain_name (0.5.4)
|
11
|
+
unf (~> 0.0.3)
|
12
|
+
fakeweb (1.3.0)
|
13
|
+
mechanize (2.4)
|
14
|
+
domain_name (~> 0.5, >= 0.5.1)
|
15
|
+
mime-types (~> 1.17, >= 1.17.2)
|
16
|
+
net-http-digest_auth (~> 1.1, >= 1.1.1)
|
17
|
+
net-http-persistent (~> 2.5, >= 2.5.2)
|
18
|
+
nokogiri (~> 1.4)
|
19
|
+
ntlm-http (~> 0.1, >= 0.1.1)
|
20
|
+
webrobots (~> 0.0, >= 0.0.9)
|
21
|
+
metaclass (0.0.1)
|
22
|
+
mime-types (1.19)
|
23
|
+
mocha (0.12.1)
|
24
|
+
metaclass (~> 0.0.1)
|
25
|
+
net-http-digest_auth (1.2.1)
|
26
|
+
net-http-persistent (2.8)
|
27
|
+
nokogiri (1.5.5)
|
28
|
+
ntlm-http (0.1.1)
|
29
|
+
rake (0.9.2.2)
|
30
|
+
rcov (0.9.11)
|
31
|
+
unf (0.0.5)
|
32
|
+
unf_ext
|
33
|
+
unf_ext (0.0.5)
|
34
|
+
vcr (2.0.1)
|
35
|
+
webrobots (0.0.13)
|
36
|
+
|
37
|
+
PLATFORMS
|
38
|
+
java
|
39
|
+
ruby
|
40
|
+
|
41
|
+
DEPENDENCIES
|
42
|
+
compactor!
|
43
|
+
fakeweb
|
44
|
+
mocha (= 0.12.1)
|
45
|
+
rake
|
46
|
+
rcov (= 0.9.11)
|
47
|
+
vcr (~> 2.0.0)
|
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2012 Julio Santos
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
# Compactor
|
2
|
+
|
3
|
+
[](http://travis-ci.org/julio/caterpillar)
|
4
|
+
|
5
|
+
Scrape Amazon Seller Central
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
gem 'compactor'
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install compactor
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
```
|
24
|
+
rake test:coverage
|
25
|
+
```
|
26
|
+
|
27
|
+
```
|
28
|
+
more soon
|
29
|
+
```
|
30
|
+
|
31
|
+
## Contributing
|
32
|
+
|
33
|
+
1. Fork it
|
34
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
35
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
36
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
37
|
+
5. Create new Pull Request
|
38
|
+
|
39
|
+
## Contributors
|
40
|
+
|
41
|
+
* Trae Robrock ( https://github.com/trobrock )
|
42
|
+
* Julio Santos ( https://github.com/julio )
|
43
|
+
|
44
|
+
## To-do
|
45
|
+
|
46
|
+
- Refactor
|
47
|
+
- document
|
data/Rakefile
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/testtask'
|
3
|
+
|
4
|
+
desc 'Default: run unit tests.'
|
5
|
+
task :default => :test
|
6
|
+
|
7
|
+
desc 'Test the compactor gem'
|
8
|
+
Rake::TestTask.new(:test) do |t|
|
9
|
+
t.libs << 'lib'
|
10
|
+
t.pattern = 'test/**/*_test.rb'
|
11
|
+
t.verbose = true
|
12
|
+
end
|
13
|
+
|
14
|
+
def run_coverage(files)
|
15
|
+
rm_f "coverage"
|
16
|
+
rm_f "coverage.data"
|
17
|
+
|
18
|
+
if files.empty?
|
19
|
+
puts "No files were specified for testing"
|
20
|
+
return
|
21
|
+
end
|
22
|
+
|
23
|
+
files = files.join(" ")
|
24
|
+
|
25
|
+
if RUBY_PLATFORM =~ /darwin/
|
26
|
+
exclude = '--exclude "gems/*" --exclude "Library/Frameworks/*"'
|
27
|
+
elsif RUBY_PLATFORM =~ /java/
|
28
|
+
exclude = '--exclude "rubygems/*,jruby/*,parser*,gemspec*,_DELEGATION*,__FORWARDABLE__,erb,eval*,recognize_optimized*,yaml,yaml/*,fcntl"'
|
29
|
+
else
|
30
|
+
exclude = '--exclude "rubygems/*"'
|
31
|
+
end
|
32
|
+
|
33
|
+
rcov_bin = RUBY_PLATFORM =~ /java/ ? "jruby -S bundle exec rcov" : "bundle exec rcov"
|
34
|
+
rcov = "#{rcov_bin} --rails -Ilib:test --sort coverage --text-report #{exclude}"
|
35
|
+
puts
|
36
|
+
puts
|
37
|
+
puts "Running tests..."
|
38
|
+
cmd = "#{rcov} #{files}"
|
39
|
+
puts cmd
|
40
|
+
sh cmd
|
41
|
+
end
|
42
|
+
|
43
|
+
namespace :test do
|
44
|
+
desc "Measures test coverage"
|
45
|
+
task :coverage do
|
46
|
+
run_coverage Dir["test/**/*_test.rb"]
|
47
|
+
end
|
48
|
+
end
|
data/compactor.gemspec
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "compactor/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |gem|
|
6
|
+
gem.name = "compactor"
|
7
|
+
gem.version = Compactor::VERSION
|
8
|
+
gem.authors = ["Julio Santos"]
|
9
|
+
gem.email = ["julio@morgane.com"]
|
10
|
+
gem.homepage = ""
|
11
|
+
gem.summary = "Scrape Amazon Seller Central"
|
12
|
+
gem.description = "Scrape Amazon Seller Central"
|
13
|
+
|
14
|
+
gem.rubyforge_project = "compactor"
|
15
|
+
|
16
|
+
gem.files = `git ls-files`.split("\n")
|
17
|
+
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
|
+
gem.require_paths = ["lib"]
|
20
|
+
|
21
|
+
gem.add_runtime_dependency "mechanize", "2.4"
|
22
|
+
|
23
|
+
if RUBY_PLATFORM == "java"
|
24
|
+
gem.add_runtime_dependency "jruby-openssl", '0.7.3'
|
25
|
+
gem.add_runtime_dependency "nokogiri", "1.5.0.beta.2"
|
26
|
+
end
|
27
|
+
|
28
|
+
gem.add_development_dependency "rake"
|
29
|
+
gem.add_development_dependency "mocha", "0.12.1"
|
30
|
+
gem.add_development_dependency "vcr", "~>2.0.0"
|
31
|
+
gem.add_development_dependency "fakeweb"
|
32
|
+
gem.add_development_dependency "rcov", "0.9.11"
|
33
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
class Object
|
2
|
+
def blank?; respond_to?(:empty?) ? empty? : !self; end
|
3
|
+
end
|
4
|
+
|
5
|
+
module Nokogiri
|
6
|
+
class MissingElement < ::StandardError; end
|
7
|
+
|
8
|
+
module XML
|
9
|
+
class Node
|
10
|
+
def search!(selector)
|
11
|
+
result = search(selector)
|
12
|
+
if result.blank?
|
13
|
+
fail MissingElement.new("No elements for [#{selector}]")
|
14
|
+
end
|
15
|
+
result
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
class Mechanize::Page
|
22
|
+
def_delegator :parser, :search!, :search!
|
23
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
module Compactor
|
2
|
+
module Amazon
|
3
|
+
class ScrapedRow
|
4
|
+
def initialize(node, mechanize)
|
5
|
+
@node = node
|
6
|
+
@mechanize = mechanize
|
7
|
+
end
|
8
|
+
|
9
|
+
def can_download_report?
|
10
|
+
!report_buttons.empty?
|
11
|
+
end
|
12
|
+
|
13
|
+
def report_buttons
|
14
|
+
last_cell.search(".secondarySmallButton").map do |ele|
|
15
|
+
Mechanize::Page::Link.new(ele.parent, @mechanize, @mechanize.page)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def download_report
|
20
|
+
report_url = report_buttons[0].node["href"]
|
21
|
+
report_identifier = report_buttons[0].node.search(".button_label").text
|
22
|
+
type = ReportScraper.report_type(report_identifier)
|
23
|
+
response_body = @mechanize.get(report_url).body
|
24
|
+
|
25
|
+
[type, response_body]
|
26
|
+
end
|
27
|
+
|
28
|
+
def reload
|
29
|
+
table_rows.each do |row|
|
30
|
+
row = ScrapedRow.new(row, @mechanize)
|
31
|
+
return row if row.date_range == date_range
|
32
|
+
end
|
33
|
+
|
34
|
+
nil
|
35
|
+
end
|
36
|
+
|
37
|
+
def request_report
|
38
|
+
button = last_cell.search(".regenerateButton")[0]
|
39
|
+
button_id = button['id']
|
40
|
+
|
41
|
+
@mechanize.post("/gp/payments-account/redrive.html", {
|
42
|
+
"groupId" => button_id
|
43
|
+
})
|
44
|
+
end
|
45
|
+
|
46
|
+
def ready?
|
47
|
+
div = last_cell.search("div")[-1]
|
48
|
+
text = div.text
|
49
|
+
|
50
|
+
ignorable_periods = ["(Processing)", "(Open)", "In Progress"]
|
51
|
+
!ignorable_periods.any? { |ignore_text| text.include?(ignore_text) &&
|
52
|
+
div.search(".regenerateButton").blank? }
|
53
|
+
end
|
54
|
+
|
55
|
+
def deposit_amount
|
56
|
+
@deposit_amount = fetch_deposit_amount if !@deposit_amount
|
57
|
+
|
58
|
+
@deposit_amount
|
59
|
+
end
|
60
|
+
|
61
|
+
def date_range
|
62
|
+
@date_range ||= @node.search("td:first-child a").text
|
63
|
+
end
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
def fetch_deposit_amount
|
68
|
+
deposit_cell = @node.search("td")[-2]
|
69
|
+
deposit_cell ? deposit_cell.text.gsub(/[^0-9\.]/, '').to_f : 0.0
|
70
|
+
end
|
71
|
+
|
72
|
+
def table_rows
|
73
|
+
@mechanize.page.search("tr")
|
74
|
+
end
|
75
|
+
|
76
|
+
def last_cell
|
77
|
+
@last_cell ||= @node.search("td")[-1]
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,369 @@
|
|
1
|
+
module Compactor
|
2
|
+
module Amazon
|
3
|
+
class AddressParseFailure < StandardError; end
|
4
|
+
class AuthenticationError < StandardError; end
|
5
|
+
class LockedAccountError < StandardError; end
|
6
|
+
class MissingRow < StandardError; end
|
7
|
+
class NoMarketplacesError < StandardError; end
|
8
|
+
class NotProAccountError < StandardError; end
|
9
|
+
class UnknownReportType < StandardError; end
|
10
|
+
|
11
|
+
ATTEMPTS_BEFORE_GIVING_UP = 15 # give up after 20 minutes
|
12
|
+
MARKETPLACE_HOMEPAGE = "https://sellercentral.amazon.com/gp/homepage.html"
|
13
|
+
AMAZON_COM_MARKETPLACE_ID = 'ATVPDKIKX0DER'
|
14
|
+
|
15
|
+
class ReportScraper
|
16
|
+
def initialize(email, password, merchant_id)
|
17
|
+
@merchant_id = merchant_id
|
18
|
+
|
19
|
+
@mechanize = Mechanize.new
|
20
|
+
@mechanize.max_file_buffer = 4 * 1024 * 1024
|
21
|
+
@mechanize.max_history = 2
|
22
|
+
@mechanize.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
23
|
+
@mechanize.agent.http.reuse_ssl_sessions = false
|
24
|
+
|
25
|
+
randomize_user_agent!
|
26
|
+
login_to_seller_central email, password
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.submit_form!(form)
|
30
|
+
form.submit
|
31
|
+
rescue Mechanize::ResponseCodeError => e
|
32
|
+
raise ::Compactor::Amazon::NotProAccountError if e.message.include?("403 => Net::HTTPForbidden")
|
33
|
+
raise # any other error just re-raise it
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.authorized_user?
|
37
|
+
message_box_error.empty? ||
|
38
|
+
!message_box_error.text.include?('There was an error with your email/password combination.')
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.merchant_identification(credentials={})
|
42
|
+
@agent = Mechanize.new
|
43
|
+
@agent.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
44
|
+
@agent.agent.http.reuse_ssl_sessions = false
|
45
|
+
@agent.get 'https://sellercentral.amazon.com/gp/mws/registration/register.html'
|
46
|
+
form = @agent.page.forms.first
|
47
|
+
form.email = credentials[:email]
|
48
|
+
form.password = credentials[:password]
|
49
|
+
submit_form! form
|
50
|
+
|
51
|
+
raise Compactor::Amazon::AuthenticationError unless authorized_user?
|
52
|
+
|
53
|
+
form = @agent.page.forms.first
|
54
|
+
form.developerName = credentials[:developer_name]
|
55
|
+
form.devMWSAccountId = credentials[:dev_account_id]
|
56
|
+
form.radiobutton_with(:value => 'devAuthorization').checked=true
|
57
|
+
form.submit
|
58
|
+
|
59
|
+
form = @agent.page.forms.first
|
60
|
+
form.checkbox_with(:name => 'agreeCheckBox').checked=true
|
61
|
+
form.checkbox_with(:name => 'understandCheckBox').checked=true
|
62
|
+
form.submit
|
63
|
+
|
64
|
+
@agent.page.forms.first.submit
|
65
|
+
merchant_id = @agent.page.parser.xpath('//table[@class="registration-key-table"]/tr[2]/td[1]').text
|
66
|
+
marketplace_id = @agent.page.parser.xpath('//table[@class="registration-key-table"]/tr[3]/td[1]').text
|
67
|
+
|
68
|
+
[merchant_id, marketplace_id]
|
69
|
+
end
|
70
|
+
|
71
|
+
def marketplaces
|
72
|
+
marketplaces = filter_marketplaces(get_marketplaces)
|
73
|
+
raise NoMarketplacesError if marketplaces.empty?
|
74
|
+
|
75
|
+
marketplaces.map do |account_name, marketplace_id|
|
76
|
+
select_marketplace(marketplace_id)
|
77
|
+
balance = get_balance
|
78
|
+
|
79
|
+
[ account_name, marketplace_id, balance ]
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def get_marketplaces
|
84
|
+
@mechanize.get MARKETPLACE_HOMEPAGE
|
85
|
+
|
86
|
+
marketplace_selector = @mechanize.page.search("#marketplaceSelect").first
|
87
|
+
if marketplace_selector
|
88
|
+
result = []
|
89
|
+
marketplace_selector.search("option").each do |ele|
|
90
|
+
name = ele.text
|
91
|
+
marketplace_id = ele["value"]
|
92
|
+
result << [ name, marketplace_id ]
|
93
|
+
end
|
94
|
+
return result
|
95
|
+
end
|
96
|
+
|
97
|
+
marketplace_name = @mechanize.page.search("#market_switch")
|
98
|
+
if marketplace_name
|
99
|
+
return [ [ marketplace_name.text.strip, nil ] ]
|
100
|
+
end
|
101
|
+
|
102
|
+
return []
|
103
|
+
end
|
104
|
+
|
105
|
+
def select_marketplace(marketplace_id)
|
106
|
+
marketplace_id = CGI.escape(marketplace_id)
|
107
|
+
@mechanize.get "https://sellercentral.amazon.com/gp/utilities/set-rainier-prefs.html?ie=UTF8&&marketplaceID=#{marketplace_id}"
|
108
|
+
end
|
109
|
+
|
110
|
+
def get_balance
|
111
|
+
go_to_past_settlements('', '')
|
112
|
+
return 0.0 if page_has_no_results?
|
113
|
+
open_row = report_rows.detect { |row| !row.ready? }
|
114
|
+
return 0.0 if open_row.nil?
|
115
|
+
open_row.deposit_amount
|
116
|
+
end
|
117
|
+
|
118
|
+
def reports(from, to)
|
119
|
+
from, to = parse_dates(from, to)
|
120
|
+
go_to_past_settlements(from, to)
|
121
|
+
|
122
|
+
get_reports
|
123
|
+
end
|
124
|
+
|
125
|
+
def get_orders(order_ids)
|
126
|
+
orders_hash = {}
|
127
|
+
order_ids.each do |order_id|
|
128
|
+
order = {}
|
129
|
+
@mechanize.get order_detail_url(order_id)
|
130
|
+
|
131
|
+
# Get the buyer name
|
132
|
+
begin
|
133
|
+
tr = @mechanize.page.search!("//tr[@class='list-row']/td[@class='data-display-field'][text()=\"Contact Buyer:\"]").first.parent
|
134
|
+
td = tr.search!("td[2]")
|
135
|
+
order["BuyerName"] = td.text.strip
|
136
|
+
td = @mechanize.page.search!("//tr[@class='list-row']/td[@class='data-display-field']/strong[text()='Shipping Address:']").first.parent
|
137
|
+
addr_lines = td.children.map(&:text).reject { |l| l.blank? || l =~ /^Shipping Address/ }
|
138
|
+
order["ShippingAddress"] = parse_address_lines!(addr_lines)
|
139
|
+
rescue Exception => e
|
140
|
+
end
|
141
|
+
|
142
|
+
orders_hash[order_id] = order
|
143
|
+
end
|
144
|
+
orders_hash
|
145
|
+
end
|
146
|
+
|
147
|
+
private
|
148
|
+
|
149
|
+
def self.message_box_error
|
150
|
+
@agent.page.parser.css(".messageboxerror")
|
151
|
+
end
|
152
|
+
|
153
|
+
def slowdown_like_a_human(count)
|
154
|
+
sleep count ** 2
|
155
|
+
end
|
156
|
+
|
157
|
+
def filter_marketplaces(marketplaces)
|
158
|
+
results = []
|
159
|
+
|
160
|
+
name, marketplace_id = marketplaces.detect do |n, m_id|
|
161
|
+
n == 'www.amazon.com' && ( m_id.nil? || m_id == AMAZON_COM_MARKETPLACE_ID )
|
162
|
+
end
|
163
|
+
results << [ 'Amazon Seller Account', AMAZON_COM_MARKETPLACE_ID ] if name
|
164
|
+
|
165
|
+
name, marketplace_id = marketplaces.detect do |n, m_id|
|
166
|
+
n == 'Your Checkout Website' && !m_id.nil?
|
167
|
+
end
|
168
|
+
results << [ 'Checkout By Amazon', marketplace_id ] if name
|
169
|
+
|
170
|
+
results
|
171
|
+
end
|
172
|
+
|
173
|
+
def order_detail_url(order_id)
|
174
|
+
"https://sellercentral.amazon.com/gp/orders-v2/details?ie=UTF8&orderID=#{order_id}"
|
175
|
+
end
|
176
|
+
|
177
|
+
def parse_address_lines!(addr_lines)
|
178
|
+
nbsp = "\302\240"
|
179
|
+
addr_lines = addr_lines.map { |line| line.gsub(nbsp, " ") }
|
180
|
+
# Assume the first line is the name of the buyer, so skip it
|
181
|
+
addr_lines = addr_lines[1..-1].reject { |l| l =~ /^Phone:/ }
|
182
|
+
|
183
|
+
raise AddressParseFailure if addr_lines.empty?
|
184
|
+
|
185
|
+
citystate_line = addr_lines.pop
|
186
|
+
city, remainder = citystate_line.split(/,\s*/)
|
187
|
+
|
188
|
+
raise AddressParseFailure if remainder.nil?
|
189
|
+
|
190
|
+
state, postalcode = remainder.split(/\s+/)
|
191
|
+
|
192
|
+
{
|
193
|
+
'street' => addr_lines.join('\n'),
|
194
|
+
'city' => city,
|
195
|
+
'state' => state,
|
196
|
+
'postalcode' => postalcode
|
197
|
+
}
|
198
|
+
end
|
199
|
+
|
200
|
+
# Pick a random user agent that isn't Mechanize
|
201
|
+
def randomize_user_agent!
|
202
|
+
agents = Mechanize::AGENT_ALIASES.keys.reject{ |k| k == "Mechanize" }
|
203
|
+
|
204
|
+
@mechanize.user_agent =
|
205
|
+
agents.respond_to?(:choice) ? agents.choice : agents.sample
|
206
|
+
end
|
207
|
+
|
208
|
+
def go_to_past_settlements(from, to)
|
209
|
+
from = CGI.escape(from)
|
210
|
+
to = CGI.escape(to)
|
211
|
+
|
212
|
+
@mechanize.get "https://sellercentral.amazon.com/gp/payments-account/past-settlements.html?endDate=#{to}&startDate=#{from}&pageSize=Ten"
|
213
|
+
end
|
214
|
+
|
215
|
+
def get_reports
|
216
|
+
reports = {}
|
217
|
+
page_num = 0
|
218
|
+
begin
|
219
|
+
get_reports_in_page.each do |report_type, report_streams|
|
220
|
+
reports[report_type] ||= []
|
221
|
+
reports[report_type] << report_streams
|
222
|
+
end
|
223
|
+
|
224
|
+
page_num += 1
|
225
|
+
end while pages_to_parse
|
226
|
+
|
227
|
+
reports.each { |type, streams| streams.flatten! }
|
228
|
+
end
|
229
|
+
|
230
|
+
def self.xml_report?(report_identifier)
|
231
|
+
report_identifier == "Download XML"
|
232
|
+
end
|
233
|
+
|
234
|
+
def self.text_v1_report?(report_identifier)
|
235
|
+
report_identifier == "Download Flat File"
|
236
|
+
end
|
237
|
+
|
238
|
+
def self.text_v2_report?(report_identifier)
|
239
|
+
report_identifier == "Download Flat File V2"
|
240
|
+
end
|
241
|
+
|
242
|
+
# Make this into a hash instead
|
243
|
+
def self.report_type(report_identifier)
|
244
|
+
return :xml if xml_report?(report_identifier)
|
245
|
+
return :tsv if text_v1_report?(report_identifier)
|
246
|
+
return :tsv2 if text_v2_report?(report_identifier)
|
247
|
+
|
248
|
+
fail Compactor::Amazon::UnknownReportType
|
249
|
+
end
|
250
|
+
|
251
|
+
def rescue_empty_results(&block)
|
252
|
+
3.times do
|
253
|
+
yield
|
254
|
+
break unless page_has_no_results?
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
def timeout_fetching_reports(reports_to_watch, reports, count)
|
259
|
+
if count > ATTEMPTS_BEFORE_GIVING_UP
|
260
|
+
reports_downloaded = reports.map { |type, reports| reports.size }.inject(:+)
|
261
|
+
reports_not_downloaded = reports_to_watch.size
|
262
|
+
total_reports = reports_not_downloaded + reports_downloaded
|
263
|
+
|
264
|
+
true
|
265
|
+
else
|
266
|
+
false
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
270
|
+
# Find the report to download from a row, and add it
|
271
|
+
# to a collection of reports. Do this while ensuring
|
272
|
+
# that the current page stays the current page.
|
273
|
+
def add_to_collection(reports, row)
|
274
|
+
@mechanize.transact do
|
275
|
+
report_type, report = row.download_report
|
276
|
+
reports[report_type] ||= []
|
277
|
+
reports[report_type] << report
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
def get_reports_to_watch(reports_to_watch, reports, count=0)
|
282
|
+
return if reports_to_watch.empty? || timeout_fetching_reports(reports_to_watch, reports, count)
|
283
|
+
|
284
|
+
rescue_empty_results { @mechanize.get @mechanize.page.uri }
|
285
|
+
reports_to_watch.reject! do |row|
|
286
|
+
row = row.reload
|
287
|
+
if row.nil?
|
288
|
+
true
|
289
|
+
elsif row.can_download_report?
|
290
|
+
add_to_collection(reports, row)
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
slowdown_like_a_human(count)
|
295
|
+
get_reports_to_watch(reports_to_watch, reports, count+1)
|
296
|
+
end
|
297
|
+
|
298
|
+
def pages_to_parse
|
299
|
+
next_button = @mechanize.page.links_with(:text => "Next")[0]
|
300
|
+
return false if next_button.nil?
|
301
|
+
|
302
|
+
next_button.click
|
303
|
+
end
|
304
|
+
|
305
|
+
def report_rows
|
306
|
+
tables = @mechanize.page.search!("#content-main-entities > table")
|
307
|
+
rows = tables[1].search("tr[class]").select do |ele|
|
308
|
+
["list-row-even","list-row-odd"].include? ele["class"]
|
309
|
+
end
|
310
|
+
|
311
|
+
rows.map { |raw_row| ScrapedRow.new(raw_row, @mechanize) }
|
312
|
+
end
|
313
|
+
|
314
|
+
def page_has_no_results?
|
315
|
+
@mechanize.page.search!(".data-display").text.include? "No results found"
|
316
|
+
end
|
317
|
+
|
318
|
+
def get_reports_in_page
|
319
|
+
reports_to_watch = []
|
320
|
+
reports = {}
|
321
|
+
|
322
|
+
return reports if page_has_no_results?
|
323
|
+
|
324
|
+
report_rows.each do |row|
|
325
|
+
if row.can_download_report?
|
326
|
+
add_to_collection(reports, row)
|
327
|
+
elsif row.ready?
|
328
|
+
@mechanize.transact do
|
329
|
+
row.request_report
|
330
|
+
reports_to_watch << row
|
331
|
+
end
|
332
|
+
end
|
333
|
+
end
|
334
|
+
|
335
|
+
get_reports_to_watch(reports_to_watch, reports)
|
336
|
+
|
337
|
+
reports
|
338
|
+
end
|
339
|
+
|
340
|
+
def parse_dates(from, to)
|
341
|
+
from = Date.parse(from.to_s).strftime("%m/%d/%y")
|
342
|
+
to = Date.parse(to.to_s).strftime("%m/%d/%y")
|
343
|
+
|
344
|
+
[from, to]
|
345
|
+
end
|
346
|
+
|
347
|
+
def login_to_seller_central(email, password)
|
348
|
+
@mechanize.get MARKETPLACE_HOMEPAGE
|
349
|
+
form = @mechanize.page.forms.first
|
350
|
+
form.email = email
|
351
|
+
form.password = password
|
352
|
+
form.submit
|
353
|
+
|
354
|
+
raise Compactor::Amazon::AuthenticationError if bad_login?
|
355
|
+
raise Compactor::Amazon::LockedAccountError if locked_account?
|
356
|
+
end
|
357
|
+
|
358
|
+
def bad_login?
|
359
|
+
!@mechanize.page.parser.css(".messageboxerror").blank? ||
|
360
|
+
@mechanize.page.parser.css('.tiny').text.include?('Sorry, you are not an authorized Seller Central user')
|
361
|
+
end
|
362
|
+
|
363
|
+
def locked_account?
|
364
|
+
alert_box = @mechanize.page.search(".messageboxalert")
|
365
|
+
alert_box && alert_box.text.include?("limited access to your seller account")
|
366
|
+
end
|
367
|
+
end
|
368
|
+
end
|
369
|
+
end
|