compactor 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +2 -0
- data/.travis.yml +9 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +47 -0
- data/LICENSE +22 -0
- data/README.md +47 -0
- data/Rakefile +48 -0
- data/compactor.gemspec +33 -0
- data/lib/compactor/extensions.rb +23 -0
- data/lib/compactor/scraped_row.rb +81 -0
- data/lib/compactor/scraper.rb +369 -0
- data/lib/compactor/version.rb +3 -0
- data/lib/compactor.rb +5 -0
- data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_bad_login/raise_error.yml +535 -0
- data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/multiple_pages.yml +11382 -0
- data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/no_reports.yml +777 -0
- data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/no_reports_to_request.yml +1804 -0
- data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/reports_to_request.yml +13482 -0
- data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/get_balance.yml +1050 -0
- data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/get_orders.yml +822 -0
- data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/get_orders_big.yml +4223 -0
- data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/get_orders_logging.yml +820 -0
- data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/get_orders_with_po_box.yml +793 -0
- data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/shipping_address_not_starting_with_number.yml +800 -0
- data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/with_multiple_marketplaces/find_reports/reports_to_request.yml +2948 -0
- data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/with_multiple_marketplaces/get_marketplaces.yml +842 -0
- data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/with_single_marketplaces/get_marketplaces.yml +877 -0
- data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_locked_account/raise_error.yml +1033 -0
- data/test/mechanize_extensions_test.rb +16 -0
- data/test/scraped_row_test.rb +9 -0
- data/test/scraper_test.rb +189 -0
- data/test/test_helper.rb +18 -0
- metadata +205 -0
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
compactor (0.1.2)
|
5
|
+
mechanize (= 2.4)
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: http://rubygems.org/
|
9
|
+
specs:
|
10
|
+
domain_name (0.5.4)
|
11
|
+
unf (~> 0.0.3)
|
12
|
+
fakeweb (1.3.0)
|
13
|
+
mechanize (2.4)
|
14
|
+
domain_name (~> 0.5, >= 0.5.1)
|
15
|
+
mime-types (~> 1.17, >= 1.17.2)
|
16
|
+
net-http-digest_auth (~> 1.1, >= 1.1.1)
|
17
|
+
net-http-persistent (~> 2.5, >= 2.5.2)
|
18
|
+
nokogiri (~> 1.4)
|
19
|
+
ntlm-http (~> 0.1, >= 0.1.1)
|
20
|
+
webrobots (~> 0.0, >= 0.0.9)
|
21
|
+
metaclass (0.0.1)
|
22
|
+
mime-types (1.19)
|
23
|
+
mocha (0.12.1)
|
24
|
+
metaclass (~> 0.0.1)
|
25
|
+
net-http-digest_auth (1.2.1)
|
26
|
+
net-http-persistent (2.8)
|
27
|
+
nokogiri (1.5.5)
|
28
|
+
ntlm-http (0.1.1)
|
29
|
+
rake (0.9.2.2)
|
30
|
+
rcov (0.9.11)
|
31
|
+
unf (0.0.5)
|
32
|
+
unf_ext
|
33
|
+
unf_ext (0.0.5)
|
34
|
+
vcr (2.0.1)
|
35
|
+
webrobots (0.0.13)
|
36
|
+
|
37
|
+
PLATFORMS
|
38
|
+
java
|
39
|
+
ruby
|
40
|
+
|
41
|
+
DEPENDENCIES
|
42
|
+
compactor!
|
43
|
+
fakeweb
|
44
|
+
mocha (= 0.12.1)
|
45
|
+
rake
|
46
|
+
rcov (= 0.9.11)
|
47
|
+
vcr (~> 2.0.0)
|
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2012 Julio Santos
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
# Compactor
|
2
|
+
|
3
|
+
[![Build Status](https://secure.travis-ci.org/julio/caterpillar.png)](http://travis-ci.org/julio/caterpillar)
|
4
|
+
|
5
|
+
Scrape Amazon Seller Central
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
gem 'compactor'
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install compactor
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
```
|
24
|
+
rake test:coverage
|
25
|
+
```
|
26
|
+
|
27
|
+
```
|
28
|
+
more soon
|
29
|
+
```
|
30
|
+
|
31
|
+
## Contributing
|
32
|
+
|
33
|
+
1. Fork it
|
34
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
35
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
36
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
37
|
+
5. Create new Pull Request
|
38
|
+
|
39
|
+
## Contributors
|
40
|
+
|
41
|
+
* Trae Robrock ( https://github.com/trobrock )
|
42
|
+
* Julio Santos ( https://github.com/julio )
|
43
|
+
|
44
|
+
## To-do
|
45
|
+
|
46
|
+
- Refactor
|
47
|
+
- document
|
data/Rakefile
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/testtask'
|
3
|
+
|
4
|
+
desc 'Default: run unit tests.'
|
5
|
+
task :default => :test
|
6
|
+
|
7
|
+
desc 'Test the compactor gem'
|
8
|
+
Rake::TestTask.new(:test) do |t|
|
9
|
+
t.libs << 'lib'
|
10
|
+
t.pattern = 'test/**/*_test.rb'
|
11
|
+
t.verbose = true
|
12
|
+
end
|
13
|
+
|
14
|
+
def run_coverage(files)
|
15
|
+
rm_f "coverage"
|
16
|
+
rm_f "coverage.data"
|
17
|
+
|
18
|
+
if files.empty?
|
19
|
+
puts "No files were specified for testing"
|
20
|
+
return
|
21
|
+
end
|
22
|
+
|
23
|
+
files = files.join(" ")
|
24
|
+
|
25
|
+
if RUBY_PLATFORM =~ /darwin/
|
26
|
+
exclude = '--exclude "gems/*" --exclude "Library/Frameworks/*"'
|
27
|
+
elsif RUBY_PLATFORM =~ /java/
|
28
|
+
exclude = '--exclude "rubygems/*,jruby/*,parser*,gemspec*,_DELEGATION*,__FORWARDABLE__,erb,eval*,recognize_optimized*,yaml,yaml/*,fcntl"'
|
29
|
+
else
|
30
|
+
exclude = '--exclude "rubygems/*"'
|
31
|
+
end
|
32
|
+
|
33
|
+
rcov_bin = RUBY_PLATFORM =~ /java/ ? "jruby -S bundle exec rcov" : "bundle exec rcov"
|
34
|
+
rcov = "#{rcov_bin} --rails -Ilib:test --sort coverage --text-report #{exclude}"
|
35
|
+
puts
|
36
|
+
puts
|
37
|
+
puts "Running tests..."
|
38
|
+
cmd = "#{rcov} #{files}"
|
39
|
+
puts cmd
|
40
|
+
sh cmd
|
41
|
+
end
|
42
|
+
|
43
|
+
namespace :test do
|
44
|
+
desc "Measures test coverage"
|
45
|
+
task :coverage do
|
46
|
+
run_coverage Dir["test/**/*_test.rb"]
|
47
|
+
end
|
48
|
+
end
|
data/compactor.gemspec
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "compactor/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |gem|
|
6
|
+
gem.name = "compactor"
|
7
|
+
gem.version = Compactor::VERSION
|
8
|
+
gem.authors = ["Julio Santos"]
|
9
|
+
gem.email = ["julio@morgane.com"]
|
10
|
+
gem.homepage = ""
|
11
|
+
gem.summary = "Scrape Amazon Seller Central"
|
12
|
+
gem.description = "Scrape Amazon Seller Central"
|
13
|
+
|
14
|
+
gem.rubyforge_project = "compactor"
|
15
|
+
|
16
|
+
gem.files = `git ls-files`.split("\n")
|
17
|
+
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
|
+
gem.require_paths = ["lib"]
|
20
|
+
|
21
|
+
gem.add_runtime_dependency "mechanize", "2.4"
|
22
|
+
|
23
|
+
if RUBY_PLATFORM == "java"
|
24
|
+
gem.add_runtime_dependency "jruby-openssl", '0.7.3'
|
25
|
+
gem.add_runtime_dependency "nokogiri", "1.5.0.beta.2"
|
26
|
+
end
|
27
|
+
|
28
|
+
gem.add_development_dependency "rake"
|
29
|
+
gem.add_development_dependency "mocha", "0.12.1"
|
30
|
+
gem.add_development_dependency "vcr", "~>2.0.0"
|
31
|
+
gem.add_development_dependency "fakeweb"
|
32
|
+
gem.add_development_dependency "rcov", "0.9.11"
|
33
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
class Object
|
2
|
+
def blank?; respond_to?(:empty?) ? empty? : !self; end
|
3
|
+
end
|
4
|
+
|
5
|
+
module Nokogiri
|
6
|
+
class MissingElement < ::StandardError; end
|
7
|
+
|
8
|
+
module XML
|
9
|
+
class Node
|
10
|
+
def search!(selector)
|
11
|
+
result = search(selector)
|
12
|
+
if result.blank?
|
13
|
+
fail MissingElement.new("No elements for [#{selector}]")
|
14
|
+
end
|
15
|
+
result
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
class Mechanize::Page
|
22
|
+
def_delegator :parser, :search!, :search!
|
23
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
module Compactor
|
2
|
+
module Amazon
|
3
|
+
class ScrapedRow
|
4
|
+
def initialize(node, mechanize)
|
5
|
+
@node = node
|
6
|
+
@mechanize = mechanize
|
7
|
+
end
|
8
|
+
|
9
|
+
def can_download_report?
|
10
|
+
!report_buttons.empty?
|
11
|
+
end
|
12
|
+
|
13
|
+
def report_buttons
|
14
|
+
last_cell.search(".secondarySmallButton").map do |ele|
|
15
|
+
Mechanize::Page::Link.new(ele.parent, @mechanize, @mechanize.page)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def download_report
|
20
|
+
report_url = report_buttons[0].node["href"]
|
21
|
+
report_identifier = report_buttons[0].node.search(".button_label").text
|
22
|
+
type = ReportScraper.report_type(report_identifier)
|
23
|
+
response_body = @mechanize.get(report_url).body
|
24
|
+
|
25
|
+
[type, response_body]
|
26
|
+
end
|
27
|
+
|
28
|
+
def reload
|
29
|
+
table_rows.each do |row|
|
30
|
+
row = ScrapedRow.new(row, @mechanize)
|
31
|
+
return row if row.date_range == date_range
|
32
|
+
end
|
33
|
+
|
34
|
+
nil
|
35
|
+
end
|
36
|
+
|
37
|
+
def request_report
|
38
|
+
button = last_cell.search(".regenerateButton")[0]
|
39
|
+
button_id = button['id']
|
40
|
+
|
41
|
+
@mechanize.post("/gp/payments-account/redrive.html", {
|
42
|
+
"groupId" => button_id
|
43
|
+
})
|
44
|
+
end
|
45
|
+
|
46
|
+
def ready?
|
47
|
+
div = last_cell.search("div")[-1]
|
48
|
+
text = div.text
|
49
|
+
|
50
|
+
ignorable_periods = ["(Processing)", "(Open)", "In Progress"]
|
51
|
+
!ignorable_periods.any? { |ignore_text| text.include?(ignore_text) &&
|
52
|
+
div.search(".regenerateButton").blank? }
|
53
|
+
end
|
54
|
+
|
55
|
+
def deposit_amount
|
56
|
+
@deposit_amount = fetch_deposit_amount if !@deposit_amount
|
57
|
+
|
58
|
+
@deposit_amount
|
59
|
+
end
|
60
|
+
|
61
|
+
def date_range
|
62
|
+
@date_range ||= @node.search("td:first-child a").text
|
63
|
+
end
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
def fetch_deposit_amount
|
68
|
+
deposit_cell = @node.search("td")[-2]
|
69
|
+
deposit_cell ? deposit_cell.text.gsub(/[^0-9\.]/, '').to_f : 0.0
|
70
|
+
end
|
71
|
+
|
72
|
+
def table_rows
|
73
|
+
@mechanize.page.search("tr")
|
74
|
+
end
|
75
|
+
|
76
|
+
def last_cell
|
77
|
+
@last_cell ||= @node.search("td")[-1]
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,369 @@
|
|
1
|
+
module Compactor
|
2
|
+
module Amazon
|
3
|
+
class AddressParseFailure < StandardError; end
|
4
|
+
class AuthenticationError < StandardError; end
|
5
|
+
class LockedAccountError < StandardError; end
|
6
|
+
class MissingRow < StandardError; end
|
7
|
+
class NoMarketplacesError < StandardError; end
|
8
|
+
class NotProAccountError < StandardError; end
|
9
|
+
class UnknownReportType < StandardError; end
|
10
|
+
|
11
|
+
ATTEMPTS_BEFORE_GIVING_UP = 15 # give up after 20 minutes
|
12
|
+
MARKETPLACE_HOMEPAGE = "https://sellercentral.amazon.com/gp/homepage.html"
|
13
|
+
AMAZON_COM_MARKETPLACE_ID = 'ATVPDKIKX0DER'
|
14
|
+
|
15
|
+
class ReportScraper
|
16
|
+
def initialize(email, password, merchant_id)
|
17
|
+
@merchant_id = merchant_id
|
18
|
+
|
19
|
+
@mechanize = Mechanize.new
|
20
|
+
@mechanize.max_file_buffer = 4 * 1024 * 1024
|
21
|
+
@mechanize.max_history = 2
|
22
|
+
@mechanize.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
23
|
+
@mechanize.agent.http.reuse_ssl_sessions = false
|
24
|
+
|
25
|
+
randomize_user_agent!
|
26
|
+
login_to_seller_central email, password
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.submit_form!(form)
|
30
|
+
form.submit
|
31
|
+
rescue Mechanize::ResponseCodeError => e
|
32
|
+
raise ::Compactor::Amazon::NotProAccountError if e.message.include?("403 => Net::HTTPForbidden")
|
33
|
+
raise # any other error just re-raise it
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.authorized_user?
|
37
|
+
message_box_error.empty? ||
|
38
|
+
!message_box_error.text.include?('There was an error with your email/password combination.')
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.merchant_identification(credentials={})
|
42
|
+
@agent = Mechanize.new
|
43
|
+
@agent.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
44
|
+
@agent.agent.http.reuse_ssl_sessions = false
|
45
|
+
@agent.get 'https://sellercentral.amazon.com/gp/mws/registration/register.html'
|
46
|
+
form = @agent.page.forms.first
|
47
|
+
form.email = credentials[:email]
|
48
|
+
form.password = credentials[:password]
|
49
|
+
submit_form! form
|
50
|
+
|
51
|
+
raise Compactor::Amazon::AuthenticationError unless authorized_user?
|
52
|
+
|
53
|
+
form = @agent.page.forms.first
|
54
|
+
form.developerName = credentials[:developer_name]
|
55
|
+
form.devMWSAccountId = credentials[:dev_account_id]
|
56
|
+
form.radiobutton_with(:value => 'devAuthorization').checked=true
|
57
|
+
form.submit
|
58
|
+
|
59
|
+
form = @agent.page.forms.first
|
60
|
+
form.checkbox_with(:name => 'agreeCheckBox').checked=true
|
61
|
+
form.checkbox_with(:name => 'understandCheckBox').checked=true
|
62
|
+
form.submit
|
63
|
+
|
64
|
+
@agent.page.forms.first.submit
|
65
|
+
merchant_id = @agent.page.parser.xpath('//table[@class="registration-key-table"]/tr[2]/td[1]').text
|
66
|
+
marketplace_id = @agent.page.parser.xpath('//table[@class="registration-key-table"]/tr[3]/td[1]').text
|
67
|
+
|
68
|
+
[merchant_id, marketplace_id]
|
69
|
+
end
|
70
|
+
|
71
|
+
def marketplaces
|
72
|
+
marketplaces = filter_marketplaces(get_marketplaces)
|
73
|
+
raise NoMarketplacesError if marketplaces.empty?
|
74
|
+
|
75
|
+
marketplaces.map do |account_name, marketplace_id|
|
76
|
+
select_marketplace(marketplace_id)
|
77
|
+
balance = get_balance
|
78
|
+
|
79
|
+
[ account_name, marketplace_id, balance ]
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def get_marketplaces
|
84
|
+
@mechanize.get MARKETPLACE_HOMEPAGE
|
85
|
+
|
86
|
+
marketplace_selector = @mechanize.page.search("#marketplaceSelect").first
|
87
|
+
if marketplace_selector
|
88
|
+
result = []
|
89
|
+
marketplace_selector.search("option").each do |ele|
|
90
|
+
name = ele.text
|
91
|
+
marketplace_id = ele["value"]
|
92
|
+
result << [ name, marketplace_id ]
|
93
|
+
end
|
94
|
+
return result
|
95
|
+
end
|
96
|
+
|
97
|
+
marketplace_name = @mechanize.page.search("#market_switch")
|
98
|
+
if marketplace_name
|
99
|
+
return [ [ marketplace_name.text.strip, nil ] ]
|
100
|
+
end
|
101
|
+
|
102
|
+
return []
|
103
|
+
end
|
104
|
+
|
105
|
+
def select_marketplace(marketplace_id)
|
106
|
+
marketplace_id = CGI.escape(marketplace_id)
|
107
|
+
@mechanize.get "https://sellercentral.amazon.com/gp/utilities/set-rainier-prefs.html?ie=UTF8&&marketplaceID=#{marketplace_id}"
|
108
|
+
end
|
109
|
+
|
110
|
+
def get_balance
|
111
|
+
go_to_past_settlements('', '')
|
112
|
+
return 0.0 if page_has_no_results?
|
113
|
+
open_row = report_rows.detect { |row| !row.ready? }
|
114
|
+
return 0.0 if open_row.nil?
|
115
|
+
open_row.deposit_amount
|
116
|
+
end
|
117
|
+
|
118
|
+
def reports(from, to)
|
119
|
+
from, to = parse_dates(from, to)
|
120
|
+
go_to_past_settlements(from, to)
|
121
|
+
|
122
|
+
get_reports
|
123
|
+
end
|
124
|
+
|
125
|
+
def get_orders(order_ids)
|
126
|
+
orders_hash = {}
|
127
|
+
order_ids.each do |order_id|
|
128
|
+
order = {}
|
129
|
+
@mechanize.get order_detail_url(order_id)
|
130
|
+
|
131
|
+
# Get the buyer name
|
132
|
+
begin
|
133
|
+
tr = @mechanize.page.search!("//tr[@class='list-row']/td[@class='data-display-field'][text()=\"Contact Buyer:\"]").first.parent
|
134
|
+
td = tr.search!("td[2]")
|
135
|
+
order["BuyerName"] = td.text.strip
|
136
|
+
td = @mechanize.page.search!("//tr[@class='list-row']/td[@class='data-display-field']/strong[text()='Shipping Address:']").first.parent
|
137
|
+
addr_lines = td.children.map(&:text).reject { |l| l.blank? || l =~ /^Shipping Address/ }
|
138
|
+
order["ShippingAddress"] = parse_address_lines!(addr_lines)
|
139
|
+
rescue Exception => e
|
140
|
+
end
|
141
|
+
|
142
|
+
orders_hash[order_id] = order
|
143
|
+
end
|
144
|
+
orders_hash
|
145
|
+
end
|
146
|
+
|
147
|
+
private
|
148
|
+
|
149
|
+
def self.message_box_error
|
150
|
+
@agent.page.parser.css(".messageboxerror")
|
151
|
+
end
|
152
|
+
|
153
|
+
def slowdown_like_a_human(count)
|
154
|
+
sleep count ** 2
|
155
|
+
end
|
156
|
+
|
157
|
+
def filter_marketplaces(marketplaces)
|
158
|
+
results = []
|
159
|
+
|
160
|
+
name, marketplace_id = marketplaces.detect do |n, m_id|
|
161
|
+
n == 'www.amazon.com' && ( m_id.nil? || m_id == AMAZON_COM_MARKETPLACE_ID )
|
162
|
+
end
|
163
|
+
results << [ 'Amazon Seller Account', AMAZON_COM_MARKETPLACE_ID ] if name
|
164
|
+
|
165
|
+
name, marketplace_id = marketplaces.detect do |n, m_id|
|
166
|
+
n == 'Your Checkout Website' && !m_id.nil?
|
167
|
+
end
|
168
|
+
results << [ 'Checkout By Amazon', marketplace_id ] if name
|
169
|
+
|
170
|
+
results
|
171
|
+
end
|
172
|
+
|
173
|
+
def order_detail_url(order_id)
|
174
|
+
"https://sellercentral.amazon.com/gp/orders-v2/details?ie=UTF8&orderID=#{order_id}"
|
175
|
+
end
|
176
|
+
|
177
|
+
def parse_address_lines!(addr_lines)
|
178
|
+
nbsp = "\302\240"
|
179
|
+
addr_lines = addr_lines.map { |line| line.gsub(nbsp, " ") }
|
180
|
+
# Assume the first line is the name of the buyer, so skip it
|
181
|
+
addr_lines = addr_lines[1..-1].reject { |l| l =~ /^Phone:/ }
|
182
|
+
|
183
|
+
raise AddressParseFailure if addr_lines.empty?
|
184
|
+
|
185
|
+
citystate_line = addr_lines.pop
|
186
|
+
city, remainder = citystate_line.split(/,\s*/)
|
187
|
+
|
188
|
+
raise AddressParseFailure if remainder.nil?
|
189
|
+
|
190
|
+
state, postalcode = remainder.split(/\s+/)
|
191
|
+
|
192
|
+
{
|
193
|
+
'street' => addr_lines.join('\n'),
|
194
|
+
'city' => city,
|
195
|
+
'state' => state,
|
196
|
+
'postalcode' => postalcode
|
197
|
+
}
|
198
|
+
end
|
199
|
+
|
200
|
+
# Pick a random user agent that isn't Mechanize
|
201
|
+
def randomize_user_agent!
|
202
|
+
agents = Mechanize::AGENT_ALIASES.keys.reject{ |k| k == "Mechanize" }
|
203
|
+
|
204
|
+
@mechanize.user_agent =
|
205
|
+
agents.respond_to?(:choice) ? agents.choice : agents.sample
|
206
|
+
end
|
207
|
+
|
208
|
+
def go_to_past_settlements(from, to)
|
209
|
+
from = CGI.escape(from)
|
210
|
+
to = CGI.escape(to)
|
211
|
+
|
212
|
+
@mechanize.get "https://sellercentral.amazon.com/gp/payments-account/past-settlements.html?endDate=#{to}&startDate=#{from}&pageSize=Ten"
|
213
|
+
end
|
214
|
+
|
215
|
+
def get_reports
|
216
|
+
reports = {}
|
217
|
+
page_num = 0
|
218
|
+
begin
|
219
|
+
get_reports_in_page.each do |report_type, report_streams|
|
220
|
+
reports[report_type] ||= []
|
221
|
+
reports[report_type] << report_streams
|
222
|
+
end
|
223
|
+
|
224
|
+
page_num += 1
|
225
|
+
end while pages_to_parse
|
226
|
+
|
227
|
+
reports.each { |type, streams| streams.flatten! }
|
228
|
+
end
|
229
|
+
|
230
|
+
def self.xml_report?(report_identifier)
|
231
|
+
report_identifier == "Download XML"
|
232
|
+
end
|
233
|
+
|
234
|
+
def self.text_v1_report?(report_identifier)
|
235
|
+
report_identifier == "Download Flat File"
|
236
|
+
end
|
237
|
+
|
238
|
+
def self.text_v2_report?(report_identifier)
|
239
|
+
report_identifier == "Download Flat File V2"
|
240
|
+
end
|
241
|
+
|
242
|
+
# Make this into a hash instead
|
243
|
+
def self.report_type(report_identifier)
|
244
|
+
return :xml if xml_report?(report_identifier)
|
245
|
+
return :tsv if text_v1_report?(report_identifier)
|
246
|
+
return :tsv2 if text_v2_report?(report_identifier)
|
247
|
+
|
248
|
+
fail Compactor::Amazon::UnknownReportType
|
249
|
+
end
|
250
|
+
|
251
|
+
def rescue_empty_results(&block)
|
252
|
+
3.times do
|
253
|
+
yield
|
254
|
+
break unless page_has_no_results?
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
def timeout_fetching_reports(reports_to_watch, reports, count)
|
259
|
+
if count > ATTEMPTS_BEFORE_GIVING_UP
|
260
|
+
reports_downloaded = reports.map { |type, reports| reports.size }.inject(:+)
|
261
|
+
reports_not_downloaded = reports_to_watch.size
|
262
|
+
total_reports = reports_not_downloaded + reports_downloaded
|
263
|
+
|
264
|
+
true
|
265
|
+
else
|
266
|
+
false
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
270
|
+
# Find the report to download from a row, and add it
|
271
|
+
# to a collection of reports. Do this while ensuring
|
272
|
+
# that the current page stays the current page.
|
273
|
+
def add_to_collection(reports, row)
|
274
|
+
@mechanize.transact do
|
275
|
+
report_type, report = row.download_report
|
276
|
+
reports[report_type] ||= []
|
277
|
+
reports[report_type] << report
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
def get_reports_to_watch(reports_to_watch, reports, count=0)
|
282
|
+
return if reports_to_watch.empty? || timeout_fetching_reports(reports_to_watch, reports, count)
|
283
|
+
|
284
|
+
rescue_empty_results { @mechanize.get @mechanize.page.uri }
|
285
|
+
reports_to_watch.reject! do |row|
|
286
|
+
row = row.reload
|
287
|
+
if row.nil?
|
288
|
+
true
|
289
|
+
elsif row.can_download_report?
|
290
|
+
add_to_collection(reports, row)
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
slowdown_like_a_human(count)
|
295
|
+
get_reports_to_watch(reports_to_watch, reports, count+1)
|
296
|
+
end
|
297
|
+
|
298
|
+
def pages_to_parse
|
299
|
+
next_button = @mechanize.page.links_with(:text => "Next")[0]
|
300
|
+
return false if next_button.nil?
|
301
|
+
|
302
|
+
next_button.click
|
303
|
+
end
|
304
|
+
|
305
|
+
def report_rows
|
306
|
+
tables = @mechanize.page.search!("#content-main-entities > table")
|
307
|
+
rows = tables[1].search("tr[class]").select do |ele|
|
308
|
+
["list-row-even","list-row-odd"].include? ele["class"]
|
309
|
+
end
|
310
|
+
|
311
|
+
rows.map { |raw_row| ScrapedRow.new(raw_row, @mechanize) }
|
312
|
+
end
|
313
|
+
|
314
|
+
def page_has_no_results?
|
315
|
+
@mechanize.page.search!(".data-display").text.include? "No results found"
|
316
|
+
end
|
317
|
+
|
318
|
+
def get_reports_in_page
|
319
|
+
reports_to_watch = []
|
320
|
+
reports = {}
|
321
|
+
|
322
|
+
return reports if page_has_no_results?
|
323
|
+
|
324
|
+
report_rows.each do |row|
|
325
|
+
if row.can_download_report?
|
326
|
+
add_to_collection(reports, row)
|
327
|
+
elsif row.ready?
|
328
|
+
@mechanize.transact do
|
329
|
+
row.request_report
|
330
|
+
reports_to_watch << row
|
331
|
+
end
|
332
|
+
end
|
333
|
+
end
|
334
|
+
|
335
|
+
get_reports_to_watch(reports_to_watch, reports)
|
336
|
+
|
337
|
+
reports
|
338
|
+
end
|
339
|
+
|
340
|
+
def parse_dates(from, to)
|
341
|
+
from = Date.parse(from.to_s).strftime("%m/%d/%y")
|
342
|
+
to = Date.parse(to.to_s).strftime("%m/%d/%y")
|
343
|
+
|
344
|
+
[from, to]
|
345
|
+
end
|
346
|
+
|
347
|
+
def login_to_seller_central(email, password)
|
348
|
+
@mechanize.get MARKETPLACE_HOMEPAGE
|
349
|
+
form = @mechanize.page.forms.first
|
350
|
+
form.email = email
|
351
|
+
form.password = password
|
352
|
+
form.submit
|
353
|
+
|
354
|
+
raise Compactor::Amazon::AuthenticationError if bad_login?
|
355
|
+
raise Compactor::Amazon::LockedAccountError if locked_account?
|
356
|
+
end
|
357
|
+
|
358
|
+
def bad_login?
|
359
|
+
!@mechanize.page.parser.css(".messageboxerror").blank? ||
|
360
|
+
@mechanize.page.parser.css('.tiny').text.include?('Sorry, you are not an authorized Seller Central user')
|
361
|
+
end
|
362
|
+
|
363
|
+
def locked_account?
|
364
|
+
alert_box = @mechanize.page.search(".messageboxalert")
|
365
|
+
alert_box && alert_box.text.include?("limited access to your seller account")
|
366
|
+
end
|
367
|
+
end
|
368
|
+
end
|
369
|
+
end
|