compactor 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. data/.gitignore +2 -0
  2. data/.travis.yml +9 -0
  3. data/Gemfile +3 -0
  4. data/Gemfile.lock +47 -0
  5. data/LICENSE +22 -0
  6. data/README.md +47 -0
  7. data/Rakefile +48 -0
  8. data/compactor.gemspec +33 -0
  9. data/lib/compactor/extensions.rb +23 -0
  10. data/lib/compactor/scraped_row.rb +81 -0
  11. data/lib/compactor/scraper.rb +369 -0
  12. data/lib/compactor/version.rb +3 -0
  13. data/lib/compactor.rb +5 -0
  14. data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_bad_login/raise_error.yml +535 -0
  15. data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/multiple_pages.yml +11382 -0
  16. data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/no_reports.yml +777 -0
  17. data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/no_reports_to_request.yml +1804 -0
  18. data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/reports_to_request.yml +13482 -0
  19. data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/get_balance.yml +1050 -0
  20. data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/get_orders.yml +822 -0
  21. data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/get_orders_big.yml +4223 -0
  22. data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/get_orders_logging.yml +820 -0
  23. data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/get_orders_with_po_box.yml +793 -0
  24. data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/shipping_address_not_starting_with_number.yml +800 -0
  25. data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/with_multiple_marketplaces/find_reports/reports_to_request.yml +2948 -0
  26. data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/with_multiple_marketplaces/get_marketplaces.yml +842 -0
  27. data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/with_single_marketplaces/get_marketplaces.yml +877 -0
  28. data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_locked_account/raise_error.yml +1033 -0
  29. data/test/mechanize_extensions_test.rb +16 -0
  30. data/test/scraped_row_test.rb +9 -0
  31. data/test/scraper_test.rb +189 -0
  32. data/test/test_helper.rb +18 -0
  33. metadata +205 -0
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+ coverage
2
+
data/.travis.yml ADDED
@@ -0,0 +1,9 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.8.7
4
+ - 1.9.2
5
+ - 1.9.3
6
+ - jruby-18mode # JRuby in 1.8 mode
7
+ - jruby-19mode # JRuby in 1.9 mode
8
+ - rbx-18mode
9
+ - rbx-19mode
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "http://rubygems.org"
2
+
3
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,47 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ compactor (0.1.2)
5
+ mechanize (= 2.4)
6
+
7
+ GEM
8
+ remote: http://rubygems.org/
9
+ specs:
10
+ domain_name (0.5.4)
11
+ unf (~> 0.0.3)
12
+ fakeweb (1.3.0)
13
+ mechanize (2.4)
14
+ domain_name (~> 0.5, >= 0.5.1)
15
+ mime-types (~> 1.17, >= 1.17.2)
16
+ net-http-digest_auth (~> 1.1, >= 1.1.1)
17
+ net-http-persistent (~> 2.5, >= 2.5.2)
18
+ nokogiri (~> 1.4)
19
+ ntlm-http (~> 0.1, >= 0.1.1)
20
+ webrobots (~> 0.0, >= 0.0.9)
21
+ metaclass (0.0.1)
22
+ mime-types (1.19)
23
+ mocha (0.12.1)
24
+ metaclass (~> 0.0.1)
25
+ net-http-digest_auth (1.2.1)
26
+ net-http-persistent (2.8)
27
+ nokogiri (1.5.5)
28
+ ntlm-http (0.1.1)
29
+ rake (0.9.2.2)
30
+ rcov (0.9.11)
31
+ unf (0.0.5)
32
+ unf_ext
33
+ unf_ext (0.0.5)
34
+ vcr (2.0.1)
35
+ webrobots (0.0.13)
36
+
37
+ PLATFORMS
38
+ java
39
+ ruby
40
+
41
+ DEPENDENCIES
42
+ compactor!
43
+ fakeweb
44
+ mocha (= 0.12.1)
45
+ rake
46
+ rcov (= 0.9.11)
47
+ vcr (~> 2.0.0)
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Julio Santos
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,47 @@
1
+ # Compactor
2
+
3
+ [![Build Status](https://secure.travis-ci.org/julio/caterpillar.png)](http://travis-ci.org/julio/caterpillar)
4
+
5
+ Scrape Amazon Seller Central
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ gem 'compactor'
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install compactor
20
+
21
+ ## Usage
22
+
23
+ ```
24
+ rake test:coverage
25
+ ```
26
+
27
+ ```
28
+ more soon
29
+ ```
30
+
31
+ ## Contributing
32
+
33
+ 1. Fork it
34
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
35
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
36
+ 4. Push to the branch (`git push origin my-new-feature`)
37
+ 5. Create new Pull Request
38
+
39
+ ## Contributors
40
+
41
+ * Trae Robrock ( https://github.com/trobrock )
42
+ * Julio Santos ( https://github.com/julio )
43
+
44
+ ## To-do
45
+
46
+ - Refactor
47
+ - document
data/Rakefile ADDED
@@ -0,0 +1,48 @@
1
+ require 'rake'
2
+ require 'rake/testtask'
3
+
4
+ desc 'Default: run unit tests.'
5
+ task :default => :test
6
+
7
+ desc 'Test the compactor gem'
8
+ Rake::TestTask.new(:test) do |t|
9
+ t.libs << 'lib'
10
+ t.pattern = 'test/**/*_test.rb'
11
+ t.verbose = true
12
+ end
13
+
14
+ def run_coverage(files)
15
+ rm_f "coverage"
16
+ rm_f "coverage.data"
17
+
18
+ if files.empty?
19
+ puts "No files were specified for testing"
20
+ return
21
+ end
22
+
23
+ files = files.join(" ")
24
+
25
+ if RUBY_PLATFORM =~ /darwin/
26
+ exclude = '--exclude "gems/*" --exclude "Library/Frameworks/*"'
27
+ elsif RUBY_PLATFORM =~ /java/
28
+ exclude = '--exclude "rubygems/*,jruby/*,parser*,gemspec*,_DELEGATION*,__FORWARDABLE__,erb,eval*,recognize_optimized*,yaml,yaml/*,fcntl"'
29
+ else
30
+ exclude = '--exclude "rubygems/*"'
31
+ end
32
+
33
+ rcov_bin = RUBY_PLATFORM =~ /java/ ? "jruby -S bundle exec rcov" : "bundle exec rcov"
34
+ rcov = "#{rcov_bin} --rails -Ilib:test --sort coverage --text-report #{exclude}"
35
+ puts
36
+ puts
37
+ puts "Running tests..."
38
+ cmd = "#{rcov} #{files}"
39
+ puts cmd
40
+ sh cmd
41
+ end
42
+
43
+ namespace :test do
44
+ desc "Measures test coverage"
45
+ task :coverage do
46
+ run_coverage Dir["test/**/*_test.rb"]
47
+ end
48
+ end
data/compactor.gemspec ADDED
@@ -0,0 +1,33 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "compactor/version"
4
+
5
+ Gem::Specification.new do |gem|
6
+ gem.name = "compactor"
7
+ gem.version = Compactor::VERSION
8
+ gem.authors = ["Julio Santos"]
9
+ gem.email = ["julio@morgane.com"]
10
+ gem.homepage = ""
11
+ gem.summary = "Scrape Amazon Seller Central"
12
+ gem.description = "Scrape Amazon Seller Central"
13
+
14
+ gem.rubyforge_project = "compactor"
15
+
16
+ gem.files = `git ls-files`.split("\n")
17
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ gem.require_paths = ["lib"]
20
+
21
+ gem.add_runtime_dependency "mechanize", "2.4"
22
+
23
+ if RUBY_PLATFORM == "java"
24
+ gem.add_runtime_dependency "jruby-openssl", '0.7.3'
25
+ gem.add_runtime_dependency "nokogiri", "1.5.0.beta.2"
26
+ end
27
+
28
+ gem.add_development_dependency "rake"
29
+ gem.add_development_dependency "mocha", "0.12.1"
30
+ gem.add_development_dependency "vcr", "~>2.0.0"
31
+ gem.add_development_dependency "fakeweb"
32
+ gem.add_development_dependency "rcov", "0.9.11"
33
+ end
@@ -0,0 +1,23 @@
1
+ class Object
2
+ def blank?; respond_to?(:empty?) ? empty? : !self; end
3
+ end
4
+
5
+ module Nokogiri
6
+ class MissingElement < ::StandardError; end
7
+
8
+ module XML
9
+ class Node
10
+ def search!(selector)
11
+ result = search(selector)
12
+ if result.blank?
13
+ fail MissingElement.new("No elements for [#{selector}]")
14
+ end
15
+ result
16
+ end
17
+ end
18
+ end
19
+ end
20
+
21
+ class Mechanize::Page
22
+ def_delegator :parser, :search!, :search!
23
+ end
@@ -0,0 +1,81 @@
1
+ module Compactor
2
+ module Amazon
3
+ class ScrapedRow
4
+ def initialize(node, mechanize)
5
+ @node = node
6
+ @mechanize = mechanize
7
+ end
8
+
9
+ def can_download_report?
10
+ !report_buttons.empty?
11
+ end
12
+
13
+ def report_buttons
14
+ last_cell.search(".secondarySmallButton").map do |ele|
15
+ Mechanize::Page::Link.new(ele.parent, @mechanize, @mechanize.page)
16
+ end
17
+ end
18
+
19
+ def download_report
20
+ report_url = report_buttons[0].node["href"]
21
+ report_identifier = report_buttons[0].node.search(".button_label").text
22
+ type = ReportScraper.report_type(report_identifier)
23
+ response_body = @mechanize.get(report_url).body
24
+
25
+ [type, response_body]
26
+ end
27
+
28
+ def reload
29
+ table_rows.each do |row|
30
+ row = ScrapedRow.new(row, @mechanize)
31
+ return row if row.date_range == date_range
32
+ end
33
+
34
+ nil
35
+ end
36
+
37
+ def request_report
38
+ button = last_cell.search(".regenerateButton")[0]
39
+ button_id = button['id']
40
+
41
+ @mechanize.post("/gp/payments-account/redrive.html", {
42
+ "groupId" => button_id
43
+ })
44
+ end
45
+
46
+ def ready?
47
+ div = last_cell.search("div")[-1]
48
+ text = div.text
49
+
50
+ ignorable_periods = ["(Processing)", "(Open)", "In Progress"]
51
+ !ignorable_periods.any? { |ignore_text| text.include?(ignore_text) &&
52
+ div.search(".regenerateButton").blank? }
53
+ end
54
+
55
+ def deposit_amount
56
+ @deposit_amount = fetch_deposit_amount if !@deposit_amount
57
+
58
+ @deposit_amount
59
+ end
60
+
61
+ def date_range
62
+ @date_range ||= @node.search("td:first-child a").text
63
+ end
64
+
65
+ private
66
+
67
+ def fetch_deposit_amount
68
+ deposit_cell = @node.search("td")[-2]
69
+ deposit_cell ? deposit_cell.text.gsub(/[^0-9\.]/, '').to_f : 0.0
70
+ end
71
+
72
+ def table_rows
73
+ @mechanize.page.search("tr")
74
+ end
75
+
76
+ def last_cell
77
+ @last_cell ||= @node.search("td")[-1]
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,369 @@
1
+ module Compactor
2
+ module Amazon
3
+ class AddressParseFailure < StandardError; end
4
+ class AuthenticationError < StandardError; end
5
+ class LockedAccountError < StandardError; end
6
+ class MissingRow < StandardError; end
7
+ class NoMarketplacesError < StandardError; end
8
+ class NotProAccountError < StandardError; end
9
+ class UnknownReportType < StandardError; end
10
+
11
+ ATTEMPTS_BEFORE_GIVING_UP = 15 # give up after 20 minutes
12
+ MARKETPLACE_HOMEPAGE = "https://sellercentral.amazon.com/gp/homepage.html"
13
+ AMAZON_COM_MARKETPLACE_ID = 'ATVPDKIKX0DER'
14
+
15
+ class ReportScraper
16
+ def initialize(email, password, merchant_id)
17
+ @merchant_id = merchant_id
18
+
19
+ @mechanize = Mechanize.new
20
+ @mechanize.max_file_buffer = 4 * 1024 * 1024
21
+ @mechanize.max_history = 2
22
+ @mechanize.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE
23
+ @mechanize.agent.http.reuse_ssl_sessions = false
24
+
25
+ randomize_user_agent!
26
+ login_to_seller_central email, password
27
+ end
28
+
29
+ def self.submit_form!(form)
30
+ form.submit
31
+ rescue Mechanize::ResponseCodeError => e
32
+ raise ::Compactor::Amazon::NotProAccountError if e.message.include?("403 => Net::HTTPForbidden")
33
+ raise # any other error just re-raise it
34
+ end
35
+
36
+ def self.authorized_user?
37
+ message_box_error.empty? ||
38
+ !message_box_error.text.include?('There was an error with your email/password combination.')
39
+ end
40
+
41
+ def self.merchant_identification(credentials={})
42
+ @agent = Mechanize.new
43
+ @agent.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE
44
+ @agent.agent.http.reuse_ssl_sessions = false
45
+ @agent.get 'https://sellercentral.amazon.com/gp/mws/registration/register.html'
46
+ form = @agent.page.forms.first
47
+ form.email = credentials[:email]
48
+ form.password = credentials[:password]
49
+ submit_form! form
50
+
51
+ raise Compactor::Amazon::AuthenticationError unless authorized_user?
52
+
53
+ form = @agent.page.forms.first
54
+ form.developerName = credentials[:developer_name]
55
+ form.devMWSAccountId = credentials[:dev_account_id]
56
+ form.radiobutton_with(:value => 'devAuthorization').checked=true
57
+ form.submit
58
+
59
+ form = @agent.page.forms.first
60
+ form.checkbox_with(:name => 'agreeCheckBox').checked=true
61
+ form.checkbox_with(:name => 'understandCheckBox').checked=true
62
+ form.submit
63
+
64
+ @agent.page.forms.first.submit
65
+ merchant_id = @agent.page.parser.xpath('//table[@class="registration-key-table"]/tr[2]/td[1]').text
66
+ marketplace_id = @agent.page.parser.xpath('//table[@class="registration-key-table"]/tr[3]/td[1]').text
67
+
68
+ [merchant_id, marketplace_id]
69
+ end
70
+
71
+ def marketplaces
72
+ marketplaces = filter_marketplaces(get_marketplaces)
73
+ raise NoMarketplacesError if marketplaces.empty?
74
+
75
+ marketplaces.map do |account_name, marketplace_id|
76
+ select_marketplace(marketplace_id)
77
+ balance = get_balance
78
+
79
+ [ account_name, marketplace_id, balance ]
80
+ end
81
+ end
82
+
83
+ def get_marketplaces
84
+ @mechanize.get MARKETPLACE_HOMEPAGE
85
+
86
+ marketplace_selector = @mechanize.page.search("#marketplaceSelect").first
87
+ if marketplace_selector
88
+ result = []
89
+ marketplace_selector.search("option").each do |ele|
90
+ name = ele.text
91
+ marketplace_id = ele["value"]
92
+ result << [ name, marketplace_id ]
93
+ end
94
+ return result
95
+ end
96
+
97
+ marketplace_name = @mechanize.page.search("#market_switch")
98
+ if marketplace_name
99
+ return [ [ marketplace_name.text.strip, nil ] ]
100
+ end
101
+
102
+ return []
103
+ end
104
+
105
+ def select_marketplace(marketplace_id)
106
+ marketplace_id = CGI.escape(marketplace_id)
107
+ @mechanize.get "https://sellercentral.amazon.com/gp/utilities/set-rainier-prefs.html?ie=UTF8&&marketplaceID=#{marketplace_id}"
108
+ end
109
+
110
+ def get_balance
111
+ go_to_past_settlements('', '')
112
+ return 0.0 if page_has_no_results?
113
+ open_row = report_rows.detect { |row| !row.ready? }
114
+ return 0.0 if open_row.nil?
115
+ open_row.deposit_amount
116
+ end
117
+
118
+ def reports(from, to)
119
+ from, to = parse_dates(from, to)
120
+ go_to_past_settlements(from, to)
121
+
122
+ get_reports
123
+ end
124
+
125
+ def get_orders(order_ids)
126
+ orders_hash = {}
127
+ order_ids.each do |order_id|
128
+ order = {}
129
+ @mechanize.get order_detail_url(order_id)
130
+
131
+ # Get the buyer name
132
+ begin
133
+ tr = @mechanize.page.search!("//tr[@class='list-row']/td[@class='data-display-field'][text()=\"Contact Buyer:\"]").first.parent
134
+ td = tr.search!("td[2]")
135
+ order["BuyerName"] = td.text.strip
136
+ td = @mechanize.page.search!("//tr[@class='list-row']/td[@class='data-display-field']/strong[text()='Shipping Address:']").first.parent
137
+ addr_lines = td.children.map(&:text).reject { |l| l.blank? || l =~ /^Shipping Address/ }
138
+ order["ShippingAddress"] = parse_address_lines!(addr_lines)
139
+ rescue Exception => e
140
+ end
141
+
142
+ orders_hash[order_id] = order
143
+ end
144
+ orders_hash
145
+ end
146
+
147
+ private
148
+
149
+ def self.message_box_error
150
+ @agent.page.parser.css(".messageboxerror")
151
+ end
152
+
153
+ def slowdown_like_a_human(count)
154
+ sleep count ** 2
155
+ end
156
+
157
+ def filter_marketplaces(marketplaces)
158
+ results = []
159
+
160
+ name, marketplace_id = marketplaces.detect do |n, m_id|
161
+ n == 'www.amazon.com' && ( m_id.nil? || m_id == AMAZON_COM_MARKETPLACE_ID )
162
+ end
163
+ results << [ 'Amazon Seller Account', AMAZON_COM_MARKETPLACE_ID ] if name
164
+
165
+ name, marketplace_id = marketplaces.detect do |n, m_id|
166
+ n == 'Your Checkout Website' && !m_id.nil?
167
+ end
168
+ results << [ 'Checkout By Amazon', marketplace_id ] if name
169
+
170
+ results
171
+ end
172
+
173
+ def order_detail_url(order_id)
174
+ "https://sellercentral.amazon.com/gp/orders-v2/details?ie=UTF8&orderID=#{order_id}"
175
+ end
176
+
177
+ def parse_address_lines!(addr_lines)
178
+ nbsp = "\302\240"
179
+ addr_lines = addr_lines.map { |line| line.gsub(nbsp, " ") }
180
+ # Assume the first line is the name of the buyer, so skip it
181
+ addr_lines = addr_lines[1..-1].reject { |l| l =~ /^Phone:/ }
182
+
183
+ raise AddressParseFailure if addr_lines.empty?
184
+
185
+ citystate_line = addr_lines.pop
186
+ city, remainder = citystate_line.split(/,\s*/)
187
+
188
+ raise AddressParseFailure if remainder.nil?
189
+
190
+ state, postalcode = remainder.split(/\s+/)
191
+
192
+ {
193
+ 'street' => addr_lines.join('\n'),
194
+ 'city' => city,
195
+ 'state' => state,
196
+ 'postalcode' => postalcode
197
+ }
198
+ end
199
+
200
+ # Pick a random user agent that isn't Mechanize
201
+ def randomize_user_agent!
202
+ agents = Mechanize::AGENT_ALIASES.keys.reject{ |k| k == "Mechanize" }
203
+
204
+ @mechanize.user_agent =
205
+ agents.respond_to?(:choice) ? agents.choice : agents.sample
206
+ end
207
+
208
+ def go_to_past_settlements(from, to)
209
+ from = CGI.escape(from)
210
+ to = CGI.escape(to)
211
+
212
+ @mechanize.get "https://sellercentral.amazon.com/gp/payments-account/past-settlements.html?endDate=#{to}&startDate=#{from}&pageSize=Ten"
213
+ end
214
+
215
+ def get_reports
216
+ reports = {}
217
+ page_num = 0
218
+ begin
219
+ get_reports_in_page.each do |report_type, report_streams|
220
+ reports[report_type] ||= []
221
+ reports[report_type] << report_streams
222
+ end
223
+
224
+ page_num += 1
225
+ end while pages_to_parse
226
+
227
+ reports.each { |type, streams| streams.flatten! }
228
+ end
229
+
230
+ def self.xml_report?(report_identifier)
231
+ report_identifier == "Download XML"
232
+ end
233
+
234
+ def self.text_v1_report?(report_identifier)
235
+ report_identifier == "Download Flat File"
236
+ end
237
+
238
+ def self.text_v2_report?(report_identifier)
239
+ report_identifier == "Download Flat File V2"
240
+ end
241
+
242
+ # Make this into a hash instead
243
+ def self.report_type(report_identifier)
244
+ return :xml if xml_report?(report_identifier)
245
+ return :tsv if text_v1_report?(report_identifier)
246
+ return :tsv2 if text_v2_report?(report_identifier)
247
+
248
+ fail Compactor::Amazon::UnknownReportType
249
+ end
250
+
251
+ def rescue_empty_results(&block)
252
+ 3.times do
253
+ yield
254
+ break unless page_has_no_results?
255
+ end
256
+ end
257
+
258
+ def timeout_fetching_reports(reports_to_watch, reports, count)
259
+ if count > ATTEMPTS_BEFORE_GIVING_UP
260
+ reports_downloaded = reports.map { |type, reports| reports.size }.inject(:+)
261
+ reports_not_downloaded = reports_to_watch.size
262
+ total_reports = reports_not_downloaded + reports_downloaded
263
+
264
+ true
265
+ else
266
+ false
267
+ end
268
+ end
269
+
270
+ # Find the report to download from a row, and add it
271
+ # to a collection of reports. Do this while ensuring
272
+ # that the current page stays the current page.
273
+ def add_to_collection(reports, row)
274
+ @mechanize.transact do
275
+ report_type, report = row.download_report
276
+ reports[report_type] ||= []
277
+ reports[report_type] << report
278
+ end
279
+ end
280
+
281
+ def get_reports_to_watch(reports_to_watch, reports, count=0)
282
+ return if reports_to_watch.empty? || timeout_fetching_reports(reports_to_watch, reports, count)
283
+
284
+ rescue_empty_results { @mechanize.get @mechanize.page.uri }
285
+ reports_to_watch.reject! do |row|
286
+ row = row.reload
287
+ if row.nil?
288
+ true
289
+ elsif row.can_download_report?
290
+ add_to_collection(reports, row)
291
+ end
292
+ end
293
+
294
+ slowdown_like_a_human(count)
295
+ get_reports_to_watch(reports_to_watch, reports, count+1)
296
+ end
297
+
298
+ def pages_to_parse
299
+ next_button = @mechanize.page.links_with(:text => "Next")[0]
300
+ return false if next_button.nil?
301
+
302
+ next_button.click
303
+ end
304
+
305
+ def report_rows
306
+ tables = @mechanize.page.search!("#content-main-entities > table")
307
+ rows = tables[1].search("tr[class]").select do |ele|
308
+ ["list-row-even","list-row-odd"].include? ele["class"]
309
+ end
310
+
311
+ rows.map { |raw_row| ScrapedRow.new(raw_row, @mechanize) }
312
+ end
313
+
314
+ def page_has_no_results?
315
+ @mechanize.page.search!(".data-display").text.include? "No results found"
316
+ end
317
+
318
+ def get_reports_in_page
319
+ reports_to_watch = []
320
+ reports = {}
321
+
322
+ return reports if page_has_no_results?
323
+
324
+ report_rows.each do |row|
325
+ if row.can_download_report?
326
+ add_to_collection(reports, row)
327
+ elsif row.ready?
328
+ @mechanize.transact do
329
+ row.request_report
330
+ reports_to_watch << row
331
+ end
332
+ end
333
+ end
334
+
335
+ get_reports_to_watch(reports_to_watch, reports)
336
+
337
+ reports
338
+ end
339
+
340
+ def parse_dates(from, to)
341
+ from = Date.parse(from.to_s).strftime("%m/%d/%y")
342
+ to = Date.parse(to.to_s).strftime("%m/%d/%y")
343
+
344
+ [from, to]
345
+ end
346
+
347
+ def login_to_seller_central(email, password)
348
+ @mechanize.get MARKETPLACE_HOMEPAGE
349
+ form = @mechanize.page.forms.first
350
+ form.email = email
351
+ form.password = password
352
+ form.submit
353
+
354
+ raise Compactor::Amazon::AuthenticationError if bad_login?
355
+ raise Compactor::Amazon::LockedAccountError if locked_account?
356
+ end
357
+
358
+ def bad_login?
359
+ !@mechanize.page.parser.css(".messageboxerror").blank? ||
360
+ @mechanize.page.parser.css('.tiny').text.include?('Sorry, you are not an authorized Seller Central user')
361
+ end
362
+
363
+ def locked_account?
364
+ alert_box = @mechanize.page.search(".messageboxalert")
365
+ alert_box && alert_box.text.include?("limited access to your seller account")
366
+ end
367
+ end
368
+ end
369
+ end
@@ -0,0 +1,3 @@
1
+ module Compactor
2
+ VERSION = "0.1.2"
3
+ end