compactor 0.3.5 → 0.3.8
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +1 -1
- data/lib/compactor/scraper.rb +32 -20
- data/lib/compactor/version.rb +1 -1
- data/test/scraper_test.rb +48 -1
- metadata +3 -3
data/Gemfile.lock
CHANGED
data/lib/compactor/scraper.rb
CHANGED
@@ -2,17 +2,18 @@
|
|
2
2
|
|
3
3
|
module Compactor
|
4
4
|
module Amazon
|
5
|
-
class AddressParseFailure
|
6
|
-
class AuthenticationError
|
7
|
-
class LockedAccountError
|
8
|
-
class
|
9
|
-
class
|
10
|
-
class
|
11
|
-
class
|
12
|
-
class
|
13
|
-
class
|
14
|
-
class
|
15
|
-
class
|
5
|
+
class AddressParseFailure < StandardError; end
|
6
|
+
class AuthenticationError < StandardError; end
|
7
|
+
class LockedAccountError < StandardError; end
|
8
|
+
class LoginFormNotFoundError < StandardError; end
|
9
|
+
class MissingReportButtons < StandardError; end
|
10
|
+
class MissingRow < StandardError; end
|
11
|
+
class MissingXmlReport < StandardError; end
|
12
|
+
class NoMarketplacesError < StandardError; end
|
13
|
+
class NotProAccountError < StandardError; end
|
14
|
+
class ReportLoadingTimeout < StandardError; end
|
15
|
+
class ReportTotalsMismatch < StandardError; end
|
16
|
+
class UnknownReportType < StandardError; end
|
16
17
|
|
17
18
|
ATTEMPTS_BEFORE_GIVING_UP = 15 # give up after 20 minutes
|
18
19
|
MARKETPLACE_HOMEPAGE = "https://sellercentral.amazon.com/gp/homepage.html"
|
@@ -20,7 +21,7 @@ module Compactor
|
|
20
21
|
|
21
22
|
class ReportScraper
|
22
23
|
def initialize(user_credentials={})
|
23
|
-
@mechanize =
|
24
|
+
@mechanize = agent
|
24
25
|
@mechanize.max_file_buffer = 4 * 1024 * 1024
|
25
26
|
@mechanize.max_history = 2
|
26
27
|
@mechanize.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
@@ -103,11 +104,11 @@ module Compactor
|
|
103
104
|
def payee_details(order_id)
|
104
105
|
@mechanize.get order_detail_url(order_id)
|
105
106
|
order = {}
|
106
|
-
order["BuyerName"]
|
107
|
+
order["BuyerName"] = buyer_name
|
107
108
|
order["ShippingAddress"] = shipping_address
|
108
|
-
|
109
109
|
order
|
110
110
|
rescue => e
|
111
|
+
nil
|
111
112
|
end
|
112
113
|
|
113
114
|
def get_orders(order_ids)
|
@@ -120,6 +121,10 @@ module Compactor
|
|
120
121
|
|
121
122
|
private
|
122
123
|
|
124
|
+
def agent
|
125
|
+
Mechanize.new
|
126
|
+
end
|
127
|
+
|
123
128
|
def slowdown_like_a_human(count)
|
124
129
|
sleep count ** 2
|
125
130
|
end
|
@@ -219,7 +224,7 @@ module Compactor
|
|
219
224
|
end
|
220
225
|
|
221
226
|
# 6 attempts make it wait at most a minute, or close enough to it
|
222
|
-
def wait_for_element(attempts=
|
227
|
+
def wait_for_element(attempts=default_number_of_attempts, &block)
|
223
228
|
attempts.times do |attempt|
|
224
229
|
element = yield
|
225
230
|
return element unless element.blank?
|
@@ -229,6 +234,10 @@ module Compactor
|
|
229
234
|
nil # no element found
|
230
235
|
end
|
231
236
|
|
237
|
+
def default_number_of_attempts
|
238
|
+
6
|
239
|
+
end
|
240
|
+
|
232
241
|
def rescue_empty_results(&block)
|
233
242
|
3.times do
|
234
243
|
yield
|
@@ -285,8 +294,7 @@ module Compactor
|
|
285
294
|
end
|
286
295
|
|
287
296
|
def page_has_no_results?
|
288
|
-
data_display_element =
|
289
|
-
wait_for_element { @mechanize.page.search(".data-display") }
|
297
|
+
data_display_element = @mechanize.page.search(".data-display")
|
290
298
|
|
291
299
|
fail ReportLoadingTimeout if data_display_element.blank?
|
292
300
|
|
@@ -319,8 +327,12 @@ module Compactor
|
|
319
327
|
end
|
320
328
|
|
321
329
|
def login_to_seller_central(email, password)
|
322
|
-
|
323
|
-
|
330
|
+
form = wait_for_element do
|
331
|
+
@mechanize.get MARKETPLACE_HOMEPAGE
|
332
|
+
@mechanize.page.forms.first
|
333
|
+
end
|
334
|
+
raise Compactor::Amazon::LoginFormNotFoundError if form.blank?
|
335
|
+
|
324
336
|
form.email = email
|
325
337
|
form.password = password
|
326
338
|
form.submit
|
@@ -340,4 +352,4 @@ module Compactor
|
|
340
352
|
end
|
341
353
|
end
|
342
354
|
end
|
343
|
-
end
|
355
|
+
end
|
data/lib/compactor/version.rb
CHANGED
data/test/scraper_test.rb
CHANGED
@@ -5,7 +5,7 @@ class ScraperTest < Test::Unit::TestCase
|
|
5
5
|
def setup
|
6
6
|
Compactor::Amazon::XmlParser.any_instance.stubs(:valid?).returns(true)
|
7
7
|
end
|
8
|
-
|
8
|
+
|
9
9
|
def test_should_not_find_elements_that_do_not_exist
|
10
10
|
VCR.use_cassette("AmazonReportScraper/with_good_login/find_reports/reports_to_request") do
|
11
11
|
scraper = Compactor::Amazon::ReportScraper.new(:email => "far@far.away", :password => "test")
|
@@ -28,6 +28,28 @@ class ScraperTest < Test::Unit::TestCase
|
|
28
28
|
end
|
29
29
|
end
|
30
30
|
|
31
|
+
def test_should_raise_error_if_cannot_find_login_form
|
32
|
+
Mechanize::Page.any_instance.stubs(:forms).returns([])
|
33
|
+
Compactor::Amazon::ReportScraper.any_instance.stubs(:wait_for_element).returns(nil)
|
34
|
+
VCR.use_cassette("AmazonReportScraper/with_good_login/find_reports/reports_to_request") do
|
35
|
+
assert_raises Compactor::Amazon::LoginFormNotFoundError do
|
36
|
+
Compactor::Amazon::ReportScraper.new(:email => "far@far.away", :password => "test")
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_should_still_get_forms_if_first_time_fails
|
42
|
+
Compactor::Amazon::ReportScraper.any_instance.stubs(:agent).returns(MockMechanize.new)
|
43
|
+
Compactor::Amazon::ReportScraper.any_instance.stubs(:default_number_of_attempts).returns(2)
|
44
|
+
Compactor::Amazon::ReportScraper.any_instance.stubs(:bad_login?).returns(false)
|
45
|
+
Compactor::Amazon::ReportScraper.any_instance.stubs(:locked_account?).returns(false)
|
46
|
+
VCR.use_cassette("AmazonReportScraper/with_good_login/find_reports/reports_to_request") do
|
47
|
+
assert_nothing_raised do
|
48
|
+
Compactor::Amazon::ReportScraper.new(:email => "far@far.away", :password => "test")
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
31
53
|
def test_should_raise_error_with_bad_login
|
32
54
|
VCR.use_cassette("AmazonReportScraper/with_bad_login/raise_error") do
|
33
55
|
assert_raises Compactor::Amazon::AuthenticationError do
|
@@ -209,4 +231,29 @@ class ScraperTest < Test::Unit::TestCase
|
|
209
231
|
end
|
210
232
|
end
|
211
233
|
end
|
234
|
+
|
235
|
+
class MockMechanize < Mechanize
|
236
|
+
def get(url)
|
237
|
+
def get(url)
|
238
|
+
@page = MockMechanizePageWithForms.new # other times
|
239
|
+
end
|
240
|
+
@page = MockMechanizePageWithNoForms.new # first time
|
241
|
+
end
|
242
|
+
|
243
|
+
def page
|
244
|
+
@page
|
245
|
+
end
|
246
|
+
|
247
|
+
class MockMechanizePageWithForms
|
248
|
+
def forms
|
249
|
+
[OpenStruct.new(:email => nil, :password => nil, :submit => nil), OpenStruct.new(:email => nil, :password => nil, :submit => nil)]
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
class MockMechanizePageWithNoForms
|
254
|
+
def forms
|
255
|
+
[]
|
256
|
+
end
|
257
|
+
end
|
258
|
+
end
|
212
259
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 3
|
8
|
-
-
|
9
|
-
version: 0.3.
|
8
|
+
- 8
|
9
|
+
version: 0.3.8
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Julio Santos
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2013-
|
17
|
+
date: 2013-04-15 00:00:00 -07:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|