compactor 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. data/.gitignore +2 -0
  2. data/.travis.yml +9 -0
  3. data/Gemfile +3 -0
  4. data/Gemfile.lock +47 -0
  5. data/LICENSE +22 -0
  6. data/README.md +47 -0
  7. data/Rakefile +48 -0
  8. data/compactor.gemspec +33 -0
  9. data/lib/compactor/extensions.rb +23 -0
  10. data/lib/compactor/scraped_row.rb +81 -0
  11. data/lib/compactor/scraper.rb +369 -0
  12. data/lib/compactor/version.rb +3 -0
  13. data/lib/compactor.rb +5 -0
  14. data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_bad_login/raise_error.yml +535 -0
  15. data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/multiple_pages.yml +11382 -0
  16. data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/no_reports.yml +777 -0
  17. data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/no_reports_to_request.yml +1804 -0
  18. data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/reports_to_request.yml +13482 -0
  19. data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/get_balance.yml +1050 -0
  20. data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/get_orders.yml +822 -0
  21. data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/get_orders_big.yml +4223 -0
  22. data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/get_orders_logging.yml +820 -0
  23. data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/get_orders_with_po_box.yml +793 -0
  24. data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/shipping_address_not_starting_with_number.yml +800 -0
  25. data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/with_multiple_marketplaces/find_reports/reports_to_request.yml +2948 -0
  26. data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/with_multiple_marketplaces/get_marketplaces.yml +842 -0
  27. data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/with_single_marketplaces/get_marketplaces.yml +877 -0
  28. data/test/fixtures/vcr_cassettes/AmazonReportScraper/with_locked_account/raise_error.yml +1033 -0
  29. data/test/mechanize_extensions_test.rb +16 -0
  30. data/test/scraped_row_test.rb +9 -0
  31. data/test/scraper_test.rb +189 -0
  32. data/test/test_helper.rb +18 -0
  33. metadata +205 -0
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+ coverage
2
+
data/.travis.yml ADDED
@@ -0,0 +1,9 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.8.7
4
+ - 1.9.2
5
+ - 1.9.3
6
+ - jruby-18mode # JRuby in 1.8 mode
7
+ - jruby-19mode # JRuby in 1.9 mode
8
+ - rbx-18mode
9
+ - rbx-19mode
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "http://rubygems.org"
2
+
3
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,47 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ compactor (0.1.2)
5
+ mechanize (= 2.4)
6
+
7
+ GEM
8
+ remote: http://rubygems.org/
9
+ specs:
10
+ domain_name (0.5.4)
11
+ unf (~> 0.0.3)
12
+ fakeweb (1.3.0)
13
+ mechanize (2.4)
14
+ domain_name (~> 0.5, >= 0.5.1)
15
+ mime-types (~> 1.17, >= 1.17.2)
16
+ net-http-digest_auth (~> 1.1, >= 1.1.1)
17
+ net-http-persistent (~> 2.5, >= 2.5.2)
18
+ nokogiri (~> 1.4)
19
+ ntlm-http (~> 0.1, >= 0.1.1)
20
+ webrobots (~> 0.0, >= 0.0.9)
21
+ metaclass (0.0.1)
22
+ mime-types (1.19)
23
+ mocha (0.12.1)
24
+ metaclass (~> 0.0.1)
25
+ net-http-digest_auth (1.2.1)
26
+ net-http-persistent (2.8)
27
+ nokogiri (1.5.5)
28
+ ntlm-http (0.1.1)
29
+ rake (0.9.2.2)
30
+ rcov (0.9.11)
31
+ unf (0.0.5)
32
+ unf_ext
33
+ unf_ext (0.0.5)
34
+ vcr (2.0.1)
35
+ webrobots (0.0.13)
36
+
37
+ PLATFORMS
38
+ java
39
+ ruby
40
+
41
+ DEPENDENCIES
42
+ compactor!
43
+ fakeweb
44
+ mocha (= 0.12.1)
45
+ rake
46
+ rcov (= 0.9.11)
47
+ vcr (~> 2.0.0)
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Julio Santos
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,47 @@
1
+ # Compactor
2
+
3
+ [![Build Status](https://secure.travis-ci.org/julio/caterpillar.png)](http://travis-ci.org/julio/caterpillar)
4
+
5
+ Scrape Amazon Seller Central
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ gem 'compactor'
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install compactor
20
+
21
+ ## Usage
22
+
23
+ ```
24
+ rake test:coverage
25
+ ```
26
+
27
+ ```
28
+ more soon
29
+ ```
30
+
31
+ ## Contributing
32
+
33
+ 1. Fork it
34
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
35
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
36
+ 4. Push to the branch (`git push origin my-new-feature`)
37
+ 5. Create new Pull Request
38
+
39
+ ## Contributors
40
+
41
+ * Trae Robrock ( https://github.com/trobrock )
42
+ * Julio Santos ( https://github.com/julio )
43
+
44
+ ## To-do
45
+
46
+ - Refactor
47
+ - document
data/Rakefile ADDED
@@ -0,0 +1,48 @@
1
+ require 'rake'
2
+ require 'rake/testtask'
3
+
4
+ desc 'Default: run unit tests.'
5
+ task :default => :test
6
+
7
+ desc 'Test the compactor gem'
8
+ Rake::TestTask.new(:test) do |t|
9
+ t.libs << 'lib'
10
+ t.pattern = 'test/**/*_test.rb'
11
+ t.verbose = true
12
+ end
13
+
14
+ def run_coverage(files)
15
+ rm_f "coverage"
16
+ rm_f "coverage.data"
17
+
18
+ if files.empty?
19
+ puts "No files were specified for testing"
20
+ return
21
+ end
22
+
23
+ files = files.join(" ")
24
+
25
+ if RUBY_PLATFORM =~ /darwin/
26
+ exclude = '--exclude "gems/*" --exclude "Library/Frameworks/*"'
27
+ elsif RUBY_PLATFORM =~ /java/
28
+ exclude = '--exclude "rubygems/*,jruby/*,parser*,gemspec*,_DELEGATION*,__FORWARDABLE__,erb,eval*,recognize_optimized*,yaml,yaml/*,fcntl"'
29
+ else
30
+ exclude = '--exclude "rubygems/*"'
31
+ end
32
+
33
+ rcov_bin = RUBY_PLATFORM =~ /java/ ? "jruby -S bundle exec rcov" : "bundle exec rcov"
34
+ rcov = "#{rcov_bin} --rails -Ilib:test --sort coverage --text-report #{exclude}"
35
+ puts
36
+ puts
37
+ puts "Running tests..."
38
+ cmd = "#{rcov} #{files}"
39
+ puts cmd
40
+ sh cmd
41
+ end
42
+
43
+ namespace :test do
44
+ desc "Measures test coverage"
45
+ task :coverage do
46
+ run_coverage Dir["test/**/*_test.rb"]
47
+ end
48
+ end
data/compactor.gemspec ADDED
@@ -0,0 +1,33 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "compactor/version"
4
+
5
+ Gem::Specification.new do |gem|
6
+ gem.name = "compactor"
7
+ gem.version = Compactor::VERSION
8
+ gem.authors = ["Julio Santos"]
9
+ gem.email = ["julio@morgane.com"]
10
+ gem.homepage = ""
11
+ gem.summary = "Scrape Amazon Seller Central"
12
+ gem.description = "Scrape Amazon Seller Central"
13
+
14
+ gem.rubyforge_project = "compactor"
15
+
16
+ gem.files = `git ls-files`.split("\n")
17
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ gem.require_paths = ["lib"]
20
+
21
+ gem.add_runtime_dependency "mechanize", "2.4"
22
+
23
+ if RUBY_PLATFORM == "java"
24
+ gem.add_runtime_dependency "jruby-openssl", '0.7.3'
25
+ gem.add_runtime_dependency "nokogiri", "1.5.0.beta.2"
26
+ end
27
+
28
+ gem.add_development_dependency "rake"
29
+ gem.add_development_dependency "mocha", "0.12.1"
30
+ gem.add_development_dependency "vcr", "~>2.0.0"
31
+ gem.add_development_dependency "fakeweb"
32
+ gem.add_development_dependency "rcov", "0.9.11"
33
+ end
@@ -0,0 +1,23 @@
1
+ class Object
2
+ def blank?; respond_to?(:empty?) ? empty? : !self; end
3
+ end
4
+
5
+ module Nokogiri
6
+ class MissingElement < ::StandardError; end
7
+
8
+ module XML
9
+ class Node
10
+ def search!(selector)
11
+ result = search(selector)
12
+ if result.blank?
13
+ fail MissingElement.new("No elements for [#{selector}]")
14
+ end
15
+ result
16
+ end
17
+ end
18
+ end
19
+ end
20
+
21
+ class Mechanize::Page
22
+ def_delegator :parser, :search!, :search!
23
+ end
@@ -0,0 +1,81 @@
1
+ module Compactor
2
+ module Amazon
3
+ class ScrapedRow
4
+ def initialize(node, mechanize)
5
+ @node = node
6
+ @mechanize = mechanize
7
+ end
8
+
9
+ def can_download_report?
10
+ !report_buttons.empty?
11
+ end
12
+
13
+ def report_buttons
14
+ last_cell.search(".secondarySmallButton").map do |ele|
15
+ Mechanize::Page::Link.new(ele.parent, @mechanize, @mechanize.page)
16
+ end
17
+ end
18
+
19
+ def download_report
20
+ report_url = report_buttons[0].node["href"]
21
+ report_identifier = report_buttons[0].node.search(".button_label").text
22
+ type = ReportScraper.report_type(report_identifier)
23
+ response_body = @mechanize.get(report_url).body
24
+
25
+ [type, response_body]
26
+ end
27
+
28
+ def reload
29
+ table_rows.each do |row|
30
+ row = ScrapedRow.new(row, @mechanize)
31
+ return row if row.date_range == date_range
32
+ end
33
+
34
+ nil
35
+ end
36
+
37
+ def request_report
38
+ button = last_cell.search(".regenerateButton")[0]
39
+ button_id = button['id']
40
+
41
+ @mechanize.post("/gp/payments-account/redrive.html", {
42
+ "groupId" => button_id
43
+ })
44
+ end
45
+
46
+ def ready?
47
+ div = last_cell.search("div")[-1]
48
+ text = div.text
49
+
50
+ ignorable_periods = ["(Processing)", "(Open)", "In Progress"]
51
+ !ignorable_periods.any? { |ignore_text| text.include?(ignore_text) &&
52
+ div.search(".regenerateButton").blank? }
53
+ end
54
+
55
+ def deposit_amount
56
+ @deposit_amount = fetch_deposit_amount if !@deposit_amount
57
+
58
+ @deposit_amount
59
+ end
60
+
61
+ def date_range
62
+ @date_range ||= @node.search("td:first-child a").text
63
+ end
64
+
65
+ private
66
+
67
+ def fetch_deposit_amount
68
+ deposit_cell = @node.search("td")[-2]
69
+ deposit_cell ? deposit_cell.text.gsub(/[^0-9\.]/, '').to_f : 0.0
70
+ end
71
+
72
+ def table_rows
73
+ @mechanize.page.search("tr")
74
+ end
75
+
76
+ def last_cell
77
+ @last_cell ||= @node.search("td")[-1]
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,369 @@
1
+ module Compactor
2
+ module Amazon
3
+ class AddressParseFailure < StandardError; end
4
+ class AuthenticationError < StandardError; end
5
+ class LockedAccountError < StandardError; end
6
+ class MissingRow < StandardError; end
7
+ class NoMarketplacesError < StandardError; end
8
+ class NotProAccountError < StandardError; end
9
+ class UnknownReportType < StandardError; end
10
+
11
+ ATTEMPTS_BEFORE_GIVING_UP = 15 # give up after 20 minutes
12
+ MARKETPLACE_HOMEPAGE = "https://sellercentral.amazon.com/gp/homepage.html"
13
+ AMAZON_COM_MARKETPLACE_ID = 'ATVPDKIKX0DER'
14
+
15
+ class ReportScraper
16
+ def initialize(email, password, merchant_id)
17
+ @merchant_id = merchant_id
18
+
19
+ @mechanize = Mechanize.new
20
+ @mechanize.max_file_buffer = 4 * 1024 * 1024
21
+ @mechanize.max_history = 2
22
+ @mechanize.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE
23
+ @mechanize.agent.http.reuse_ssl_sessions = false
24
+
25
+ randomize_user_agent!
26
+ login_to_seller_central email, password
27
+ end
28
+
29
+ def self.submit_form!(form)
30
+ form.submit
31
+ rescue Mechanize::ResponseCodeError => e
32
+ raise ::Compactor::Amazon::NotProAccountError if e.message.include?("403 => Net::HTTPForbidden")
33
+ raise # any other error just re-raise it
34
+ end
35
+
36
+ def self.authorized_user?
37
+ message_box_error.empty? ||
38
+ !message_box_error.text.include?('There was an error with your email/password combination.')
39
+ end
40
+
41
+ def self.merchant_identification(credentials={})
42
+ @agent = Mechanize.new
43
+ @agent.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE
44
+ @agent.agent.http.reuse_ssl_sessions = false
45
+ @agent.get 'https://sellercentral.amazon.com/gp/mws/registration/register.html'
46
+ form = @agent.page.forms.first
47
+ form.email = credentials[:email]
48
+ form.password = credentials[:password]
49
+ submit_form! form
50
+
51
+ raise Compactor::Amazon::AuthenticationError unless authorized_user?
52
+
53
+ form = @agent.page.forms.first
54
+ form.developerName = credentials[:developer_name]
55
+ form.devMWSAccountId = credentials[:dev_account_id]
56
+ form.radiobutton_with(:value => 'devAuthorization').checked=true
57
+ form.submit
58
+
59
+ form = @agent.page.forms.first
60
+ form.checkbox_with(:name => 'agreeCheckBox').checked=true
61
+ form.checkbox_with(:name => 'understandCheckBox').checked=true
62
+ form.submit
63
+
64
+ @agent.page.forms.first.submit
65
+ merchant_id = @agent.page.parser.xpath('//table[@class="registration-key-table"]/tr[2]/td[1]').text
66
+ marketplace_id = @agent.page.parser.xpath('//table[@class="registration-key-table"]/tr[3]/td[1]').text
67
+
68
+ [merchant_id, marketplace_id]
69
+ end
70
+
71
+ def marketplaces
72
+ marketplaces = filter_marketplaces(get_marketplaces)
73
+ raise NoMarketplacesError if marketplaces.empty?
74
+
75
+ marketplaces.map do |account_name, marketplace_id|
76
+ select_marketplace(marketplace_id)
77
+ balance = get_balance
78
+
79
+ [ account_name, marketplace_id, balance ]
80
+ end
81
+ end
82
+
83
+ def get_marketplaces
84
+ @mechanize.get MARKETPLACE_HOMEPAGE
85
+
86
+ marketplace_selector = @mechanize.page.search("#marketplaceSelect").first
87
+ if marketplace_selector
88
+ result = []
89
+ marketplace_selector.search("option").each do |ele|
90
+ name = ele.text
91
+ marketplace_id = ele["value"]
92
+ result << [ name, marketplace_id ]
93
+ end
94
+ return result
95
+ end
96
+
97
+ marketplace_name = @mechanize.page.search("#market_switch")
98
+ if marketplace_name
99
+ return [ [ marketplace_name.text.strip, nil ] ]
100
+ end
101
+
102
+ return []
103
+ end
104
+
105
+ def select_marketplace(marketplace_id)
106
+ marketplace_id = CGI.escape(marketplace_id)
107
+ @mechanize.get "https://sellercentral.amazon.com/gp/utilities/set-rainier-prefs.html?ie=UTF8&&marketplaceID=#{marketplace_id}"
108
+ end
109
+
110
+ def get_balance
111
+ go_to_past_settlements('', '')
112
+ return 0.0 if page_has_no_results?
113
+ open_row = report_rows.detect { |row| !row.ready? }
114
+ return 0.0 if open_row.nil?
115
+ open_row.deposit_amount
116
+ end
117
+
118
+ def reports(from, to)
119
+ from, to = parse_dates(from, to)
120
+ go_to_past_settlements(from, to)
121
+
122
+ get_reports
123
+ end
124
+
125
+ def get_orders(order_ids)
126
+ orders_hash = {}
127
+ order_ids.each do |order_id|
128
+ order = {}
129
+ @mechanize.get order_detail_url(order_id)
130
+
131
+ # Get the buyer name
132
+ begin
133
+ tr = @mechanize.page.search!("//tr[@class='list-row']/td[@class='data-display-field'][text()=\"Contact Buyer:\"]").first.parent
134
+ td = tr.search!("td[2]")
135
+ order["BuyerName"] = td.text.strip
136
+ td = @mechanize.page.search!("//tr[@class='list-row']/td[@class='data-display-field']/strong[text()='Shipping Address:']").first.parent
137
+ addr_lines = td.children.map(&:text).reject { |l| l.blank? || l =~ /^Shipping Address/ }
138
+ order["ShippingAddress"] = parse_address_lines!(addr_lines)
139
+ rescue Exception => e
140
+ end
141
+
142
+ orders_hash[order_id] = order
143
+ end
144
+ orders_hash
145
+ end
146
+
147
+ private
148
+
149
+ def self.message_box_error
150
+ @agent.page.parser.css(".messageboxerror")
151
+ end
152
+
153
+ def slowdown_like_a_human(count)
154
+ sleep count ** 2
155
+ end
156
+
157
+ def filter_marketplaces(marketplaces)
158
+ results = []
159
+
160
+ name, marketplace_id = marketplaces.detect do |n, m_id|
161
+ n == 'www.amazon.com' && ( m_id.nil? || m_id == AMAZON_COM_MARKETPLACE_ID )
162
+ end
163
+ results << [ 'Amazon Seller Account', AMAZON_COM_MARKETPLACE_ID ] if name
164
+
165
+ name, marketplace_id = marketplaces.detect do |n, m_id|
166
+ n == 'Your Checkout Website' && !m_id.nil?
167
+ end
168
+ results << [ 'Checkout By Amazon', marketplace_id ] if name
169
+
170
+ results
171
+ end
172
+
173
+ def order_detail_url(order_id)
174
+ "https://sellercentral.amazon.com/gp/orders-v2/details?ie=UTF8&orderID=#{order_id}"
175
+ end
176
+
177
+ def parse_address_lines!(addr_lines)
178
+ nbsp = "\302\240"
179
+ addr_lines = addr_lines.map { |line| line.gsub(nbsp, " ") }
180
+ # Assume the first line is the name of the buyer, so skip it
181
+ addr_lines = addr_lines[1..-1].reject { |l| l =~ /^Phone:/ }
182
+
183
+ raise AddressParseFailure if addr_lines.empty?
184
+
185
+ citystate_line = addr_lines.pop
186
+ city, remainder = citystate_line.split(/,\s*/)
187
+
188
+ raise AddressParseFailure if remainder.nil?
189
+
190
+ state, postalcode = remainder.split(/\s+/)
191
+
192
+ {
193
+ 'street' => addr_lines.join('\n'),
194
+ 'city' => city,
195
+ 'state' => state,
196
+ 'postalcode' => postalcode
197
+ }
198
+ end
199
+
200
+ # Pick a random user agent that isn't Mechanize
201
+ def randomize_user_agent!
202
+ agents = Mechanize::AGENT_ALIASES.keys.reject{ |k| k == "Mechanize" }
203
+
204
+ @mechanize.user_agent =
205
+ agents.respond_to?(:choice) ? agents.choice : agents.sample
206
+ end
207
+
208
+ def go_to_past_settlements(from, to)
209
+ from = CGI.escape(from)
210
+ to = CGI.escape(to)
211
+
212
+ @mechanize.get "https://sellercentral.amazon.com/gp/payments-account/past-settlements.html?endDate=#{to}&startDate=#{from}&pageSize=Ten"
213
+ end
214
+
215
+ def get_reports
216
+ reports = {}
217
+ page_num = 0
218
+ begin
219
+ get_reports_in_page.each do |report_type, report_streams|
220
+ reports[report_type] ||= []
221
+ reports[report_type] << report_streams
222
+ end
223
+
224
+ page_num += 1
225
+ end while pages_to_parse
226
+
227
+ reports.each { |type, streams| streams.flatten! }
228
+ end
229
+
230
+ def self.xml_report?(report_identifier)
231
+ report_identifier == "Download XML"
232
+ end
233
+
234
+ def self.text_v1_report?(report_identifier)
235
+ report_identifier == "Download Flat File"
236
+ end
237
+
238
+ def self.text_v2_report?(report_identifier)
239
+ report_identifier == "Download Flat File V2"
240
+ end
241
+
242
+ # Make this into a hash instead
243
+ def self.report_type(report_identifier)
244
+ return :xml if xml_report?(report_identifier)
245
+ return :tsv if text_v1_report?(report_identifier)
246
+ return :tsv2 if text_v2_report?(report_identifier)
247
+
248
+ fail Compactor::Amazon::UnknownReportType
249
+ end
250
+
251
+ def rescue_empty_results(&block)
252
+ 3.times do
253
+ yield
254
+ break unless page_has_no_results?
255
+ end
256
+ end
257
+
258
+ def timeout_fetching_reports(reports_to_watch, reports, count)
259
+ if count > ATTEMPTS_BEFORE_GIVING_UP
260
+ reports_downloaded = reports.map { |type, reports| reports.size }.inject(:+)
261
+ reports_not_downloaded = reports_to_watch.size
262
+ total_reports = reports_not_downloaded + reports_downloaded
263
+
264
+ true
265
+ else
266
+ false
267
+ end
268
+ end
269
+
270
+ # Find the report to download from a row, and add it
271
+ # to a collection of reports. Do this while ensuring
272
+ # that the current page stays the current page.
273
+ def add_to_collection(reports, row)
274
+ @mechanize.transact do
275
+ report_type, report = row.download_report
276
+ reports[report_type] ||= []
277
+ reports[report_type] << report
278
+ end
279
+ end
280
+
281
+ def get_reports_to_watch(reports_to_watch, reports, count=0)
282
+ return if reports_to_watch.empty? || timeout_fetching_reports(reports_to_watch, reports, count)
283
+
284
+ rescue_empty_results { @mechanize.get @mechanize.page.uri }
285
+ reports_to_watch.reject! do |row|
286
+ row = row.reload
287
+ if row.nil?
288
+ true
289
+ elsif row.can_download_report?
290
+ add_to_collection(reports, row)
291
+ end
292
+ end
293
+
294
+ slowdown_like_a_human(count)
295
+ get_reports_to_watch(reports_to_watch, reports, count+1)
296
+ end
297
+
298
+ def pages_to_parse
299
+ next_button = @mechanize.page.links_with(:text => "Next")[0]
300
+ return false if next_button.nil?
301
+
302
+ next_button.click
303
+ end
304
+
305
+ def report_rows
306
+ tables = @mechanize.page.search!("#content-main-entities > table")
307
+ rows = tables[1].search("tr[class]").select do |ele|
308
+ ["list-row-even","list-row-odd"].include? ele["class"]
309
+ end
310
+
311
+ rows.map { |raw_row| ScrapedRow.new(raw_row, @mechanize) }
312
+ end
313
+
314
+ def page_has_no_results?
315
+ @mechanize.page.search!(".data-display").text.include? "No results found"
316
+ end
317
+
318
+ def get_reports_in_page
319
+ reports_to_watch = []
320
+ reports = {}
321
+
322
+ return reports if page_has_no_results?
323
+
324
+ report_rows.each do |row|
325
+ if row.can_download_report?
326
+ add_to_collection(reports, row)
327
+ elsif row.ready?
328
+ @mechanize.transact do
329
+ row.request_report
330
+ reports_to_watch << row
331
+ end
332
+ end
333
+ end
334
+
335
+ get_reports_to_watch(reports_to_watch, reports)
336
+
337
+ reports
338
+ end
339
+
340
+ def parse_dates(from, to)
341
+ from = Date.parse(from.to_s).strftime("%m/%d/%y")
342
+ to = Date.parse(to.to_s).strftime("%m/%d/%y")
343
+
344
+ [from, to]
345
+ end
346
+
347
+ def login_to_seller_central(email, password)
348
+ @mechanize.get MARKETPLACE_HOMEPAGE
349
+ form = @mechanize.page.forms.first
350
+ form.email = email
351
+ form.password = password
352
+ form.submit
353
+
354
+ raise Compactor::Amazon::AuthenticationError if bad_login?
355
+ raise Compactor::Amazon::LockedAccountError if locked_account?
356
+ end
357
+
358
+ def bad_login?
359
+ !@mechanize.page.parser.css(".messageboxerror").blank? ||
360
+ @mechanize.page.parser.css('.tiny').text.include?('Sorry, you are not an authorized Seller Central user')
361
+ end
362
+
363
+ def locked_account?
364
+ alert_box = @mechanize.page.search(".messageboxalert")
365
+ alert_box && alert_box.text.include?("limited access to your seller account")
366
+ end
367
+ end
368
+ end
369
+ end
@@ -0,0 +1,3 @@
1
+ module Compactor
2
+ VERSION = "0.1.2"
3
+ end