compactor 0.3.5 → 0.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +1 -1
- data/lib/compactor/scraper.rb +32 -20
- data/lib/compactor/version.rb +1 -1
- data/test/scraper_test.rb +48 -1
- metadata +3 -3
data/Gemfile.lock
CHANGED
data/lib/compactor/scraper.rb
CHANGED
@@ -2,17 +2,18 @@
|
|
2
2
|
|
3
3
|
module Compactor
|
4
4
|
module Amazon
|
5
|
-
class AddressParseFailure
|
6
|
-
class AuthenticationError
|
7
|
-
class LockedAccountError
|
8
|
-
class
|
9
|
-
class
|
10
|
-
class
|
11
|
-
class
|
12
|
-
class
|
13
|
-
class
|
14
|
-
class
|
15
|
-
class
|
5
|
+
class AddressParseFailure < StandardError; end
|
6
|
+
class AuthenticationError < StandardError; end
|
7
|
+
class LockedAccountError < StandardError; end
|
8
|
+
class LoginFormNotFoundError < StandardError; end
|
9
|
+
class MissingReportButtons < StandardError; end
|
10
|
+
class MissingRow < StandardError; end
|
11
|
+
class MissingXmlReport < StandardError; end
|
12
|
+
class NoMarketplacesError < StandardError; end
|
13
|
+
class NotProAccountError < StandardError; end
|
14
|
+
class ReportLoadingTimeout < StandardError; end
|
15
|
+
class ReportTotalsMismatch < StandardError; end
|
16
|
+
class UnknownReportType < StandardError; end
|
16
17
|
|
17
18
|
ATTEMPTS_BEFORE_GIVING_UP = 15 # give up after 20 minutes
|
18
19
|
MARKETPLACE_HOMEPAGE = "https://sellercentral.amazon.com/gp/homepage.html"
|
@@ -20,7 +21,7 @@ module Compactor
|
|
20
21
|
|
21
22
|
class ReportScraper
|
22
23
|
def initialize(user_credentials={})
|
23
|
-
@mechanize =
|
24
|
+
@mechanize = agent
|
24
25
|
@mechanize.max_file_buffer = 4 * 1024 * 1024
|
25
26
|
@mechanize.max_history = 2
|
26
27
|
@mechanize.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
@@ -103,11 +104,11 @@ module Compactor
|
|
103
104
|
def payee_details(order_id)
|
104
105
|
@mechanize.get order_detail_url(order_id)
|
105
106
|
order = {}
|
106
|
-
order["BuyerName"]
|
107
|
+
order["BuyerName"] = buyer_name
|
107
108
|
order["ShippingAddress"] = shipping_address
|
108
|
-
|
109
109
|
order
|
110
110
|
rescue => e
|
111
|
+
nil
|
111
112
|
end
|
112
113
|
|
113
114
|
def get_orders(order_ids)
|
@@ -120,6 +121,10 @@ module Compactor
|
|
120
121
|
|
121
122
|
private
|
122
123
|
|
124
|
+
def agent
|
125
|
+
Mechanize.new
|
126
|
+
end
|
127
|
+
|
123
128
|
def slowdown_like_a_human(count)
|
124
129
|
sleep count ** 2
|
125
130
|
end
|
@@ -219,7 +224,7 @@ module Compactor
|
|
219
224
|
end
|
220
225
|
|
221
226
|
# 6 attempts make it wait at most a minute, or close enough to it
|
222
|
-
def wait_for_element(attempts=
|
227
|
+
def wait_for_element(attempts=default_number_of_attempts, &block)
|
223
228
|
attempts.times do |attempt|
|
224
229
|
element = yield
|
225
230
|
return element unless element.blank?
|
@@ -229,6 +234,10 @@ module Compactor
|
|
229
234
|
nil # no element found
|
230
235
|
end
|
231
236
|
|
237
|
+
def default_number_of_attempts
|
238
|
+
6
|
239
|
+
end
|
240
|
+
|
232
241
|
def rescue_empty_results(&block)
|
233
242
|
3.times do
|
234
243
|
yield
|
@@ -285,8 +294,7 @@ module Compactor
|
|
285
294
|
end
|
286
295
|
|
287
296
|
def page_has_no_results?
|
288
|
-
data_display_element =
|
289
|
-
wait_for_element { @mechanize.page.search(".data-display") }
|
297
|
+
data_display_element = @mechanize.page.search(".data-display")
|
290
298
|
|
291
299
|
fail ReportLoadingTimeout if data_display_element.blank?
|
292
300
|
|
@@ -319,8 +327,12 @@ module Compactor
|
|
319
327
|
end
|
320
328
|
|
321
329
|
def login_to_seller_central(email, password)
|
322
|
-
|
323
|
-
|
330
|
+
form = wait_for_element do
|
331
|
+
@mechanize.get MARKETPLACE_HOMEPAGE
|
332
|
+
@mechanize.page.forms.first
|
333
|
+
end
|
334
|
+
raise Compactor::Amazon::LoginFormNotFoundError if form.blank?
|
335
|
+
|
324
336
|
form.email = email
|
325
337
|
form.password = password
|
326
338
|
form.submit
|
@@ -340,4 +352,4 @@ module Compactor
|
|
340
352
|
end
|
341
353
|
end
|
342
354
|
end
|
343
|
-
end
|
355
|
+
end
|
data/lib/compactor/version.rb
CHANGED
data/test/scraper_test.rb
CHANGED
@@ -5,7 +5,7 @@ class ScraperTest < Test::Unit::TestCase
|
|
5
5
|
def setup
|
6
6
|
Compactor::Amazon::XmlParser.any_instance.stubs(:valid?).returns(true)
|
7
7
|
end
|
8
|
-
|
8
|
+
|
9
9
|
def test_should_not_find_elements_that_do_not_exist
|
10
10
|
VCR.use_cassette("AmazonReportScraper/with_good_login/find_reports/reports_to_request") do
|
11
11
|
scraper = Compactor::Amazon::ReportScraper.new(:email => "far@far.away", :password => "test")
|
@@ -28,6 +28,28 @@ class ScraperTest < Test::Unit::TestCase
|
|
28
28
|
end
|
29
29
|
end
|
30
30
|
|
31
|
+
def test_should_raise_error_if_cannot_find_login_form
|
32
|
+
Mechanize::Page.any_instance.stubs(:forms).returns([])
|
33
|
+
Compactor::Amazon::ReportScraper.any_instance.stubs(:wait_for_element).returns(nil)
|
34
|
+
VCR.use_cassette("AmazonReportScraper/with_good_login/find_reports/reports_to_request") do
|
35
|
+
assert_raises Compactor::Amazon::LoginFormNotFoundError do
|
36
|
+
Compactor::Amazon::ReportScraper.new(:email => "far@far.away", :password => "test")
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_should_still_get_forms_if_first_time_fails
|
42
|
+
Compactor::Amazon::ReportScraper.any_instance.stubs(:agent).returns(MockMechanize.new)
|
43
|
+
Compactor::Amazon::ReportScraper.any_instance.stubs(:default_number_of_attempts).returns(2)
|
44
|
+
Compactor::Amazon::ReportScraper.any_instance.stubs(:bad_login?).returns(false)
|
45
|
+
Compactor::Amazon::ReportScraper.any_instance.stubs(:locked_account?).returns(false)
|
46
|
+
VCR.use_cassette("AmazonReportScraper/with_good_login/find_reports/reports_to_request") do
|
47
|
+
assert_nothing_raised do
|
48
|
+
Compactor::Amazon::ReportScraper.new(:email => "far@far.away", :password => "test")
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
31
53
|
def test_should_raise_error_with_bad_login
|
32
54
|
VCR.use_cassette("AmazonReportScraper/with_bad_login/raise_error") do
|
33
55
|
assert_raises Compactor::Amazon::AuthenticationError do
|
@@ -209,4 +231,29 @@ class ScraperTest < Test::Unit::TestCase
|
|
209
231
|
end
|
210
232
|
end
|
211
233
|
end
|
234
|
+
|
235
|
+
class MockMechanize < Mechanize
|
236
|
+
def get(url)
|
237
|
+
def get(url)
|
238
|
+
@page = MockMechanizePageWithForms.new # other times
|
239
|
+
end
|
240
|
+
@page = MockMechanizePageWithNoForms.new # first time
|
241
|
+
end
|
242
|
+
|
243
|
+
def page
|
244
|
+
@page
|
245
|
+
end
|
246
|
+
|
247
|
+
class MockMechanizePageWithForms
|
248
|
+
def forms
|
249
|
+
[OpenStruct.new(:email => nil, :password => nil, :submit => nil), OpenStruct.new(:email => nil, :password => nil, :submit => nil)]
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
class MockMechanizePageWithNoForms
|
254
|
+
def forms
|
255
|
+
[]
|
256
|
+
end
|
257
|
+
end
|
258
|
+
end
|
212
259
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 3
|
8
|
-
-
|
9
|
-
version: 0.3.
|
8
|
+
- 8
|
9
|
+
version: 0.3.8
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Julio Santos
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2013-
|
17
|
+
date: 2013-04-15 00:00:00 -07:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|