compactor 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -0
- data/Gemfile.lock +3 -10
- data/README.md +3 -1
- data/compactor-0.2.3.gem +0 -0
- data/lib/compactor/scraper.rb +20 -2
- data/lib/compactor/version.rb +1 -1
- data/test/scraper_test.rb +28 -6
- metadata +6 -4
data/.gitignore
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,20 +1,15 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
compactor (0.2.
|
5
|
-
jruby-openssl (= 0.7.3)
|
4
|
+
compactor (0.2.4)
|
6
5
|
mechanize (= 2.4)
|
7
|
-
nokogiri (>= 1.5.0, < 1.5.3)
|
8
6
|
|
9
7
|
GEM
|
10
8
|
remote: http://rubygems.org/
|
11
9
|
specs:
|
12
|
-
|
13
|
-
domain_name (0.5.7)
|
10
|
+
domain_name (0.5.4)
|
14
11
|
unf (~> 0.0.3)
|
15
12
|
fakeweb (1.3.0)
|
16
|
-
jruby-openssl (0.7.3)
|
17
|
-
bouncy-castle-java
|
18
13
|
mechanize (2.4)
|
19
14
|
domain_name (~> 0.5, >= 0.5.1)
|
20
15
|
mime-types (~> 1.17, >= 1.17.2)
|
@@ -29,15 +24,13 @@ GEM
|
|
29
24
|
metaclass (~> 0.0.1)
|
30
25
|
net-http-digest_auth (1.2.1)
|
31
26
|
net-http-persistent (2.8)
|
32
|
-
nokogiri (1.5.
|
33
|
-
nokogiri (1.5.2-java)
|
27
|
+
nokogiri (1.5.6)
|
34
28
|
ntlm-http (0.1.1)
|
35
29
|
rake (10.0.2)
|
36
30
|
rcov (0.9.11)
|
37
31
|
rcov (0.9.11-java)
|
38
32
|
unf (0.0.5)
|
39
33
|
unf_ext
|
40
|
-
unf (0.0.5-java)
|
41
34
|
unf_ext (0.0.5)
|
42
35
|
vcr (2.0.1)
|
43
36
|
webrobots (0.0.13)
|
data/README.md
CHANGED
@@ -2,6 +2,8 @@
|
|
2
2
|
|
3
3
|
[](http://travis-ci.org/julio/compactor)
|
4
4
|
|
5
|
+
[](http://badge.fury.io/rb/compactor)
|
6
|
+
|
5
7
|
Scrape Amazon Seller Central
|
6
8
|
|
7
9
|
## Installation
|
@@ -70,4 +72,4 @@ scrape "me@there.com", "secret", DateTime.parse("1/1/2012"), DateTime.now
|
|
70
72
|
## To-do
|
71
73
|
|
72
74
|
- Refactor
|
73
|
-
- 100% coverage
|
75
|
+
- 100% coverage
|
data/compactor-0.2.3.gem
ADDED
Binary file
|
data/lib/compactor/scraper.rb
CHANGED
@@ -11,6 +11,7 @@ module Compactor
|
|
11
11
|
class UnknownReportType < StandardError; end
|
12
12
|
class MissingXmlReport < StandardError; end
|
13
13
|
class MissingReportButtons < StandardError; end
|
14
|
+
class ReportLoadingTimeout < StandardError; end
|
14
15
|
|
15
16
|
ATTEMPTS_BEFORE_GIVING_UP = 15 # give up after 20 minutes
|
16
17
|
MARKETPLACE_HOMEPAGE = "https://sellercentral.amazon.com/gp/homepage.html"
|
@@ -205,6 +206,17 @@ module Compactor
|
|
205
206
|
fail Compactor::Amazon::UnknownReportType
|
206
207
|
end
|
207
208
|
|
209
|
+
# 6 attempts make it wait at most a minute, or close enough to it
|
210
|
+
def wait_for_element(attempts=6, &block)
|
211
|
+
attempts.times do |attempt|
|
212
|
+
element = yield
|
213
|
+
return element unless element.blank?
|
214
|
+
sleep 2**attempt # => 1 sec, 2 secs, 4, 8, 16, 32, etc
|
215
|
+
end
|
216
|
+
|
217
|
+
nil # no element found
|
218
|
+
end
|
219
|
+
|
208
220
|
def rescue_empty_results(&block)
|
209
221
|
3.times do
|
210
222
|
yield
|
@@ -236,7 +248,8 @@ module Compactor
|
|
236
248
|
end
|
237
249
|
|
238
250
|
def get_reports_to_watch(reports_to_watch, reports, count=0)
|
239
|
-
return if reports_to_watch.empty? ||
|
251
|
+
return if reports_to_watch.empty? ||
|
252
|
+
timeout_fetching_reports(reports_to_watch, reports, count)
|
240
253
|
|
241
254
|
rescue_empty_results { @mechanize.get @mechanize.page.uri }
|
242
255
|
reports_to_watch.reject! do |row|
|
@@ -269,7 +282,12 @@ module Compactor
|
|
269
282
|
end
|
270
283
|
|
271
284
|
def page_has_no_results?
|
272
|
-
|
285
|
+
data_display_element =
|
286
|
+
wait_for_element { @mechanize.page.search(".data-display") }
|
287
|
+
|
288
|
+
fail ReportLoadingTimeout if data_display_element.blank?
|
289
|
+
|
290
|
+
data_display_element.text.include? "No results found"
|
273
291
|
end
|
274
292
|
|
275
293
|
def get_reports_in_page
|
data/lib/compactor/version.rb
CHANGED
data/test/scraper_test.rb
CHANGED
@@ -2,6 +2,28 @@ require File.dirname(__FILE__) + '/test_helper'
|
|
2
2
|
require File.dirname(__FILE__) + '/../lib/compactor'
|
3
3
|
|
4
4
|
class ScraperTest < Test::Unit::TestCase
|
5
|
+
def test_should_not_find_elements_that_do_not_exist
|
6
|
+
VCR.use_cassette("AmazonReportScraper/with_good_login/find_reports/reports_to_request") do
|
7
|
+
scraper = Compactor::Amazon::ReportScraper.new(:email => "far@far.away", :password => "test")
|
8
|
+
mechanize = scraper.instance_variable_get("@mechanize")
|
9
|
+
element = scraper.send(:wait_for_element, 1) do
|
10
|
+
mechanize.page.search(".something-that-does-not-exist")
|
11
|
+
end
|
12
|
+
assert_nil element
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_should_find_elements_that_do_exist
|
17
|
+
VCR.use_cassette("AmazonReportScraper/with_good_login/find_reports/reports_to_request") do
|
18
|
+
scraper = Compactor::Amazon::ReportScraper.new(:email => "far@far.away", :password => "test")
|
19
|
+
mechanize = scraper.instance_variable_get("@mechanize")
|
20
|
+
element = scraper.send(:wait_for_element, 1) do
|
21
|
+
mechanize.page.forms
|
22
|
+
end
|
23
|
+
assert Mechanize::Form === element[0]
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
5
27
|
def test_should_raise_error_with_bad_login
|
6
28
|
VCR.use_cassette("AmazonReportScraper/with_bad_login/raise_error") do
|
7
29
|
assert_raises Compactor::Amazon::AuthenticationError do
|
@@ -9,7 +31,7 @@ class ScraperTest < Test::Unit::TestCase
|
|
9
31
|
end
|
10
32
|
end
|
11
33
|
end
|
12
|
-
|
34
|
+
|
13
35
|
def test_should_be_xml_if_button_label_is_Download_XML
|
14
36
|
assert_equal :xml, Compactor::Amazon::ReportScraper.report_type("Download XML")
|
15
37
|
end
|
@@ -27,7 +49,7 @@ class ScraperTest < Test::Unit::TestCase
|
|
27
49
|
Compactor::Amazon::ReportScraper.report_type("Download PDF")
|
28
50
|
end
|
29
51
|
end
|
30
|
-
|
52
|
+
|
31
53
|
def test_should_be_able_to_get_buyer_name_and_shipping_address_for_orders
|
32
54
|
VCR.use_cassette("AmazonReportScraper/with_good_login/get_orders") do
|
33
55
|
scraper = Compactor::Amazon::ReportScraper.new(:email => "far@far.away", :password => "test")
|
@@ -46,7 +68,7 @@ class ScraperTest < Test::Unit::TestCase
|
|
46
68
|
}, orders)
|
47
69
|
end
|
48
70
|
end
|
49
|
-
|
71
|
+
|
50
72
|
def test_should_support_addresses_where_the_street_address_line_does_not_start_with_a_number
|
51
73
|
VCR.use_cassette("AmazonReportScraper/with_good_login/shipping_address_not_starting_with_number") do
|
52
74
|
scraper = Compactor::Amazon::ReportScraper.new(:email => "far@far.away", :password => "test")
|
@@ -65,7 +87,7 @@ class ScraperTest < Test::Unit::TestCase
|
|
65
87
|
}, orders)
|
66
88
|
end
|
67
89
|
end
|
68
|
-
|
90
|
+
|
69
91
|
def test_should_handle_large_reports
|
70
92
|
VCR.use_cassette("AmazonReportScraper/with_good_login/get_orders_big") do
|
71
93
|
scraper = Compactor::Amazon::ReportScraper.new(:email => "far@far.away", :password => "test")
|
@@ -93,7 +115,7 @@ class ScraperTest < Test::Unit::TestCase
|
|
93
115
|
assert_equal( true, reports.any? { |type, reports| !reports.empty? } )
|
94
116
|
end
|
95
117
|
end
|
96
|
-
|
118
|
+
|
97
119
|
def test_should_find_reports_in_more_than_on_page
|
98
120
|
VCR.use_cassette("AmazonReportScraper/with_good_login/find_reports/multiple_pages") do
|
99
121
|
scraper = Compactor::Amazon::ReportScraper.new(:email => "far@far.away", :password => "test")
|
@@ -102,7 +124,7 @@ class ScraperTest < Test::Unit::TestCase
|
|
102
124
|
assert_equal( true, reports.any? { |type, reports| !reports.empty? } )
|
103
125
|
end
|
104
126
|
end
|
105
|
-
|
127
|
+
|
106
128
|
def test_should_find_no_reports_if_not_in_date_range
|
107
129
|
VCR.use_cassette("AmazonReportScraper/with_good_login/find_reports/no_reports") do
|
108
130
|
scraper = Compactor::Amazon::ReportScraper.new(:email => "far@far.away", :password => "test")
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: compactor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 31
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 2
|
9
|
-
-
|
10
|
-
version: 0.2.
|
9
|
+
- 4
|
10
|
+
version: 0.2.4
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Julio Santos
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2013-
|
18
|
+
date: 2013-02-14 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: mechanize
|
@@ -125,6 +125,7 @@ files:
|
|
125
125
|
- LICENSE
|
126
126
|
- README.md
|
127
127
|
- Rakefile
|
128
|
+
- compactor-0.2.3.gem
|
128
129
|
- compactor.gemspec
|
129
130
|
- lib/compactor.rb
|
130
131
|
- lib/compactor/extensions.rb
|
@@ -205,3 +206,4 @@ test_files:
|
|
205
206
|
- test/scraped_row_test.rb
|
206
207
|
- test/scraper_test.rb
|
207
208
|
- test/test_helper.rb
|
209
|
+
has_rdoc:
|