compactor 0.2.3 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +2 -0
- data/Gemfile.lock +3 -10
- data/README.md +3 -1
- data/compactor-0.2.3.gem +0 -0
- data/lib/compactor/scraper.rb +20 -2
- data/lib/compactor/version.rb +1 -1
- data/test/scraper_test.rb +28 -6
- metadata +6 -4
data/.gitignore
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,20 +1,15 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
compactor (0.2.
|
5
|
-
jruby-openssl (= 0.7.3)
|
4
|
+
compactor (0.2.4)
|
6
5
|
mechanize (= 2.4)
|
7
|
-
nokogiri (>= 1.5.0, < 1.5.3)
|
8
6
|
|
9
7
|
GEM
|
10
8
|
remote: http://rubygems.org/
|
11
9
|
specs:
|
12
|
-
|
13
|
-
domain_name (0.5.7)
|
10
|
+
domain_name (0.5.4)
|
14
11
|
unf (~> 0.0.3)
|
15
12
|
fakeweb (1.3.0)
|
16
|
-
jruby-openssl (0.7.3)
|
17
|
-
bouncy-castle-java
|
18
13
|
mechanize (2.4)
|
19
14
|
domain_name (~> 0.5, >= 0.5.1)
|
20
15
|
mime-types (~> 1.17, >= 1.17.2)
|
@@ -29,15 +24,13 @@ GEM
|
|
29
24
|
metaclass (~> 0.0.1)
|
30
25
|
net-http-digest_auth (1.2.1)
|
31
26
|
net-http-persistent (2.8)
|
32
|
-
nokogiri (1.5.
|
33
|
-
nokogiri (1.5.2-java)
|
27
|
+
nokogiri (1.5.6)
|
34
28
|
ntlm-http (0.1.1)
|
35
29
|
rake (10.0.2)
|
36
30
|
rcov (0.9.11)
|
37
31
|
rcov (0.9.11-java)
|
38
32
|
unf (0.0.5)
|
39
33
|
unf_ext
|
40
|
-
unf (0.0.5-java)
|
41
34
|
unf_ext (0.0.5)
|
42
35
|
vcr (2.0.1)
|
43
36
|
webrobots (0.0.13)
|
data/README.md
CHANGED
@@ -2,6 +2,8 @@
|
|
2
2
|
|
3
3
|
[![Build Status](https://secure.travis-ci.org/julio/compactor.png)](http://travis-ci.org/julio/compactor)
|
4
4
|
|
5
|
+
[![Gem Version](https://badge.fury.io/rb/compactor.png)](http://badge.fury.io/rb/compactor)
|
6
|
+
|
5
7
|
Scrape Amazon Seller Central
|
6
8
|
|
7
9
|
## Installation
|
@@ -70,4 +72,4 @@ scrape "me@there.com", "secret", DateTime.parse("1/1/2012"), DateTime.now
|
|
70
72
|
## To-do
|
71
73
|
|
72
74
|
- Refactor
|
73
|
-
- 100% coverage
|
75
|
+
- 100% coverage
|
data/compactor-0.2.3.gem
ADDED
Binary file
|
data/lib/compactor/scraper.rb
CHANGED
@@ -11,6 +11,7 @@ module Compactor
|
|
11
11
|
class UnknownReportType < StandardError; end
|
12
12
|
class MissingXmlReport < StandardError; end
|
13
13
|
class MissingReportButtons < StandardError; end
|
14
|
+
class ReportLoadingTimeout < StandardError; end
|
14
15
|
|
15
16
|
ATTEMPTS_BEFORE_GIVING_UP = 15 # give up after 20 minutes
|
16
17
|
MARKETPLACE_HOMEPAGE = "https://sellercentral.amazon.com/gp/homepage.html"
|
@@ -205,6 +206,17 @@ module Compactor
|
|
205
206
|
fail Compactor::Amazon::UnknownReportType
|
206
207
|
end
|
207
208
|
|
209
|
+
# 6 attempts make it wait at most a minute, or close enough to it
|
210
|
+
def wait_for_element(attempts=6, &block)
|
211
|
+
attempts.times do |attempt|
|
212
|
+
element = yield
|
213
|
+
return element unless element.blank?
|
214
|
+
sleep 2**attempt # => 1 sec, 2 secs, 4, 8, 16, 32, etc
|
215
|
+
end
|
216
|
+
|
217
|
+
nil # no element found
|
218
|
+
end
|
219
|
+
|
208
220
|
def rescue_empty_results(&block)
|
209
221
|
3.times do
|
210
222
|
yield
|
@@ -236,7 +248,8 @@ module Compactor
|
|
236
248
|
end
|
237
249
|
|
238
250
|
def get_reports_to_watch(reports_to_watch, reports, count=0)
|
239
|
-
return if reports_to_watch.empty? ||
|
251
|
+
return if reports_to_watch.empty? ||
|
252
|
+
timeout_fetching_reports(reports_to_watch, reports, count)
|
240
253
|
|
241
254
|
rescue_empty_results { @mechanize.get @mechanize.page.uri }
|
242
255
|
reports_to_watch.reject! do |row|
|
@@ -269,7 +282,12 @@ module Compactor
|
|
269
282
|
end
|
270
283
|
|
271
284
|
def page_has_no_results?
|
272
|
-
|
285
|
+
data_display_element =
|
286
|
+
wait_for_element { @mechanize.page.search(".data-display") }
|
287
|
+
|
288
|
+
fail ReportLoadingTimeout if data_display_element.blank?
|
289
|
+
|
290
|
+
data_display_element.text.include? "No results found"
|
273
291
|
end
|
274
292
|
|
275
293
|
def get_reports_in_page
|
data/lib/compactor/version.rb
CHANGED
data/test/scraper_test.rb
CHANGED
@@ -2,6 +2,28 @@ require File.dirname(__FILE__) + '/test_helper'
|
|
2
2
|
require File.dirname(__FILE__) + '/../lib/compactor'
|
3
3
|
|
4
4
|
class ScraperTest < Test::Unit::TestCase
|
5
|
+
def test_should_not_find_elements_that_do_not_exist
|
6
|
+
VCR.use_cassette("AmazonReportScraper/with_good_login/find_reports/reports_to_request") do
|
7
|
+
scraper = Compactor::Amazon::ReportScraper.new(:email => "far@far.away", :password => "test")
|
8
|
+
mechanize = scraper.instance_variable_get("@mechanize")
|
9
|
+
element = scraper.send(:wait_for_element, 1) do
|
10
|
+
mechanize.page.search(".something-that-does-not-exist")
|
11
|
+
end
|
12
|
+
assert_nil element
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_should_find_elements_that_do_exist
|
17
|
+
VCR.use_cassette("AmazonReportScraper/with_good_login/find_reports/reports_to_request") do
|
18
|
+
scraper = Compactor::Amazon::ReportScraper.new(:email => "far@far.away", :password => "test")
|
19
|
+
mechanize = scraper.instance_variable_get("@mechanize")
|
20
|
+
element = scraper.send(:wait_for_element, 1) do
|
21
|
+
mechanize.page.forms
|
22
|
+
end
|
23
|
+
assert Mechanize::Form === element[0]
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
5
27
|
def test_should_raise_error_with_bad_login
|
6
28
|
VCR.use_cassette("AmazonReportScraper/with_bad_login/raise_error") do
|
7
29
|
assert_raises Compactor::Amazon::AuthenticationError do
|
@@ -9,7 +31,7 @@ class ScraperTest < Test::Unit::TestCase
|
|
9
31
|
end
|
10
32
|
end
|
11
33
|
end
|
12
|
-
|
34
|
+
|
13
35
|
def test_should_be_xml_if_button_label_is_Download_XML
|
14
36
|
assert_equal :xml, Compactor::Amazon::ReportScraper.report_type("Download XML")
|
15
37
|
end
|
@@ -27,7 +49,7 @@ class ScraperTest < Test::Unit::TestCase
|
|
27
49
|
Compactor::Amazon::ReportScraper.report_type("Download PDF")
|
28
50
|
end
|
29
51
|
end
|
30
|
-
|
52
|
+
|
31
53
|
def test_should_be_able_to_get_buyer_name_and_shipping_address_for_orders
|
32
54
|
VCR.use_cassette("AmazonReportScraper/with_good_login/get_orders") do
|
33
55
|
scraper = Compactor::Amazon::ReportScraper.new(:email => "far@far.away", :password => "test")
|
@@ -46,7 +68,7 @@ class ScraperTest < Test::Unit::TestCase
|
|
46
68
|
}, orders)
|
47
69
|
end
|
48
70
|
end
|
49
|
-
|
71
|
+
|
50
72
|
def test_should_support_addresses_where_the_street_address_line_does_not_start_with_a_number
|
51
73
|
VCR.use_cassette("AmazonReportScraper/with_good_login/shipping_address_not_starting_with_number") do
|
52
74
|
scraper = Compactor::Amazon::ReportScraper.new(:email => "far@far.away", :password => "test")
|
@@ -65,7 +87,7 @@ class ScraperTest < Test::Unit::TestCase
|
|
65
87
|
}, orders)
|
66
88
|
end
|
67
89
|
end
|
68
|
-
|
90
|
+
|
69
91
|
def test_should_handle_large_reports
|
70
92
|
VCR.use_cassette("AmazonReportScraper/with_good_login/get_orders_big") do
|
71
93
|
scraper = Compactor::Amazon::ReportScraper.new(:email => "far@far.away", :password => "test")
|
@@ -93,7 +115,7 @@ class ScraperTest < Test::Unit::TestCase
|
|
93
115
|
assert_equal( true, reports.any? { |type, reports| !reports.empty? } )
|
94
116
|
end
|
95
117
|
end
|
96
|
-
|
118
|
+
|
97
119
|
def test_should_find_reports_in_more_than_on_page
|
98
120
|
VCR.use_cassette("AmazonReportScraper/with_good_login/find_reports/multiple_pages") do
|
99
121
|
scraper = Compactor::Amazon::ReportScraper.new(:email => "far@far.away", :password => "test")
|
@@ -102,7 +124,7 @@ class ScraperTest < Test::Unit::TestCase
|
|
102
124
|
assert_equal( true, reports.any? { |type, reports| !reports.empty? } )
|
103
125
|
end
|
104
126
|
end
|
105
|
-
|
127
|
+
|
106
128
|
def test_should_find_no_reports_if_not_in_date_range
|
107
129
|
VCR.use_cassette("AmazonReportScraper/with_good_login/find_reports/no_reports") do
|
108
130
|
scraper = Compactor::Amazon::ReportScraper.new(:email => "far@far.away", :password => "test")
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: compactor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 31
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 2
|
9
|
-
-
|
10
|
-
version: 0.2.
|
9
|
+
- 4
|
10
|
+
version: 0.2.4
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Julio Santos
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2013-
|
18
|
+
date: 2013-02-14 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: mechanize
|
@@ -125,6 +125,7 @@ files:
|
|
125
125
|
- LICENSE
|
126
126
|
- README.md
|
127
127
|
- Rakefile
|
128
|
+
- compactor-0.2.3.gem
|
128
129
|
- compactor.gemspec
|
129
130
|
- lib/compactor.rb
|
130
131
|
- lib/compactor/extensions.rb
|
@@ -205,3 +206,4 @@ test_files:
|
|
205
206
|
- test/scraped_row_test.rb
|
206
207
|
- test/scraper_test.rb
|
207
208
|
- test/test_helper.rb
|
209
|
+
has_rdoc:
|