compactor 0.2.3 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -1,2 +1,4 @@
1
1
  coverage
2
+ .bundle/
3
+ vendor/
2
4
 
data/Gemfile.lock CHANGED
@@ -1,20 +1,15 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- compactor (0.2.3)
5
- jruby-openssl (= 0.7.3)
4
+ compactor (0.2.4)
6
5
  mechanize (= 2.4)
7
- nokogiri (>= 1.5.0, < 1.5.3)
8
6
 
9
7
  GEM
10
8
  remote: http://rubygems.org/
11
9
  specs:
12
- bouncy-castle-java (1.5.0146.1)
13
- domain_name (0.5.7)
10
+ domain_name (0.5.4)
14
11
  unf (~> 0.0.3)
15
12
  fakeweb (1.3.0)
16
- jruby-openssl (0.7.3)
17
- bouncy-castle-java
18
13
  mechanize (2.4)
19
14
  domain_name (~> 0.5, >= 0.5.1)
20
15
  mime-types (~> 1.17, >= 1.17.2)
@@ -29,15 +24,13 @@ GEM
29
24
  metaclass (~> 0.0.1)
30
25
  net-http-digest_auth (1.2.1)
31
26
  net-http-persistent (2.8)
32
- nokogiri (1.5.2)
33
- nokogiri (1.5.2-java)
27
+ nokogiri (1.5.6)
34
28
  ntlm-http (0.1.1)
35
29
  rake (10.0.2)
36
30
  rcov (0.9.11)
37
31
  rcov (0.9.11-java)
38
32
  unf (0.0.5)
39
33
  unf_ext
40
- unf (0.0.5-java)
41
34
  unf_ext (0.0.5)
42
35
  vcr (2.0.1)
43
36
  webrobots (0.0.13)
data/README.md CHANGED
@@ -2,6 +2,8 @@
2
2
 
3
3
  [![Build Status](https://secure.travis-ci.org/julio/compactor.png)](http://travis-ci.org/julio/compactor)
4
4
 
5
+ [![Gem Version](https://badge.fury.io/rb/compactor.png)](http://badge.fury.io/rb/compactor)
6
+
5
7
  Scrape Amazon Seller Central
6
8
 
7
9
  ## Installation
@@ -70,4 +72,4 @@ scrape "me@there.com", "secret", DateTime.parse("1/1/2012"), DateTime.now
70
72
  ## To-do
71
73
 
72
74
  - Refactor
73
- - 100% coverage
75
+ - 100% coverage
Binary file
@@ -11,6 +11,7 @@ module Compactor
11
11
  class UnknownReportType < StandardError; end
12
12
  class MissingXmlReport < StandardError; end
13
13
  class MissingReportButtons < StandardError; end
14
+ class ReportLoadingTimeout < StandardError; end
14
15
 
15
16
  ATTEMPTS_BEFORE_GIVING_UP = 15 # give up after 20 minutes
16
17
  MARKETPLACE_HOMEPAGE = "https://sellercentral.amazon.com/gp/homepage.html"
@@ -205,6 +206,17 @@ module Compactor
205
206
  fail Compactor::Amazon::UnknownReportType
206
207
  end
207
208
 
209
+ # 6 attempts make it wait at most a minute, or close enough to it
210
+ def wait_for_element(attempts=6, &block)
211
+ attempts.times do |attempt|
212
+ element = yield
213
+ return element unless element.blank?
214
+ sleep 2**attempt # => 1 sec, 2 secs, 4, 8, 16, 32, etc
215
+ end
216
+
217
+ nil # no element found
218
+ end
219
+
208
220
  def rescue_empty_results(&block)
209
221
  3.times do
210
222
  yield
@@ -236,7 +248,8 @@ module Compactor
236
248
  end
237
249
 
238
250
  def get_reports_to_watch(reports_to_watch, reports, count=0)
239
- return if reports_to_watch.empty? || timeout_fetching_reports(reports_to_watch, reports, count)
251
+ return if reports_to_watch.empty? ||
252
+ timeout_fetching_reports(reports_to_watch, reports, count)
240
253
 
241
254
  rescue_empty_results { @mechanize.get @mechanize.page.uri }
242
255
  reports_to_watch.reject! do |row|
@@ -269,7 +282,12 @@ module Compactor
269
282
  end
270
283
 
271
284
  def page_has_no_results?
272
- @mechanize.page.search!(".data-display").text.include? "No results found"
285
+ data_display_element =
286
+ wait_for_element { @mechanize.page.search(".data-display") }
287
+
288
+ fail ReportLoadingTimeout if data_display_element.blank?
289
+
290
+ data_display_element.text.include? "No results found"
273
291
  end
274
292
 
275
293
  def get_reports_in_page
@@ -1,3 +1,3 @@
1
1
  module Compactor
2
- VERSION = "0.2.3"
2
+ VERSION = "0.2.4"
3
3
  end
data/test/scraper_test.rb CHANGED
@@ -2,6 +2,28 @@ require File.dirname(__FILE__) + '/test_helper'
2
2
  require File.dirname(__FILE__) + '/../lib/compactor'
3
3
 
4
4
  class ScraperTest < Test::Unit::TestCase
5
+ def test_should_not_find_elements_that_do_not_exist
6
+ VCR.use_cassette("AmazonReportScraper/with_good_login/find_reports/reports_to_request") do
7
+ scraper = Compactor::Amazon::ReportScraper.new(:email => "far@far.away", :password => "test")
8
+ mechanize = scraper.instance_variable_get("@mechanize")
9
+ element = scraper.send(:wait_for_element, 1) do
10
+ mechanize.page.search(".something-that-does-not-exist")
11
+ end
12
+ assert_nil element
13
+ end
14
+ end
15
+
16
+ def test_should_find_elements_that_do_exist
17
+ VCR.use_cassette("AmazonReportScraper/with_good_login/find_reports/reports_to_request") do
18
+ scraper = Compactor::Amazon::ReportScraper.new(:email => "far@far.away", :password => "test")
19
+ mechanize = scraper.instance_variable_get("@mechanize")
20
+ element = scraper.send(:wait_for_element, 1) do
21
+ mechanize.page.forms
22
+ end
23
+ assert Mechanize::Form === element[0]
24
+ end
25
+ end
26
+
5
27
  def test_should_raise_error_with_bad_login
6
28
  VCR.use_cassette("AmazonReportScraper/with_bad_login/raise_error") do
7
29
  assert_raises Compactor::Amazon::AuthenticationError do
@@ -9,7 +31,7 @@ class ScraperTest < Test::Unit::TestCase
9
31
  end
10
32
  end
11
33
  end
12
-
34
+
13
35
  def test_should_be_xml_if_button_label_is_Download_XML
14
36
  assert_equal :xml, Compactor::Amazon::ReportScraper.report_type("Download XML")
15
37
  end
@@ -27,7 +49,7 @@ class ScraperTest < Test::Unit::TestCase
27
49
  Compactor::Amazon::ReportScraper.report_type("Download PDF")
28
50
  end
29
51
  end
30
-
52
+
31
53
  def test_should_be_able_to_get_buyer_name_and_shipping_address_for_orders
32
54
  VCR.use_cassette("AmazonReportScraper/with_good_login/get_orders") do
33
55
  scraper = Compactor::Amazon::ReportScraper.new(:email => "far@far.away", :password => "test")
@@ -46,7 +68,7 @@ class ScraperTest < Test::Unit::TestCase
46
68
  }, orders)
47
69
  end
48
70
  end
49
-
71
+
50
72
  def test_should_support_addresses_where_the_street_address_line_does_not_start_with_a_number
51
73
  VCR.use_cassette("AmazonReportScraper/with_good_login/shipping_address_not_starting_with_number") do
52
74
  scraper = Compactor::Amazon::ReportScraper.new(:email => "far@far.away", :password => "test")
@@ -65,7 +87,7 @@ class ScraperTest < Test::Unit::TestCase
65
87
  }, orders)
66
88
  end
67
89
  end
68
-
90
+
69
91
  def test_should_handle_large_reports
70
92
  VCR.use_cassette("AmazonReportScraper/with_good_login/get_orders_big") do
71
93
  scraper = Compactor::Amazon::ReportScraper.new(:email => "far@far.away", :password => "test")
@@ -93,7 +115,7 @@ class ScraperTest < Test::Unit::TestCase
93
115
  assert_equal( true, reports.any? { |type, reports| !reports.empty? } )
94
116
  end
95
117
  end
96
-
118
+
97
119
  def test_should_find_reports_in_more_than_on_page
98
120
  VCR.use_cassette("AmazonReportScraper/with_good_login/find_reports/multiple_pages") do
99
121
  scraper = Compactor::Amazon::ReportScraper.new(:email => "far@far.away", :password => "test")
@@ -102,7 +124,7 @@ class ScraperTest < Test::Unit::TestCase
102
124
  assert_equal( true, reports.any? { |type, reports| !reports.empty? } )
103
125
  end
104
126
  end
105
-
127
+
106
128
  def test_should_find_no_reports_if_not_in_date_range
107
129
  VCR.use_cassette("AmazonReportScraper/with_good_login/find_reports/no_reports") do
108
130
  scraper = Compactor::Amazon::ReportScraper.new(:email => "far@far.away", :password => "test")
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: compactor
3
3
  version: !ruby/object:Gem::Version
4
- hash: 17
4
+ hash: 31
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 2
9
- - 3
10
- version: 0.2.3
9
+ - 4
10
+ version: 0.2.4
11
11
  platform: ruby
12
12
  authors:
13
13
  - Julio Santos
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2013-01-12 00:00:00 Z
18
+ date: 2013-02-14 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: mechanize
@@ -125,6 +125,7 @@ files:
125
125
  - LICENSE
126
126
  - README.md
127
127
  - Rakefile
128
+ - compactor-0.2.3.gem
128
129
  - compactor.gemspec
129
130
  - lib/compactor.rb
130
131
  - lib/compactor/extensions.rb
@@ -205,3 +206,4 @@ test_files:
205
206
  - test/scraped_row_test.rb
206
207
  - test/scraper_test.rb
207
208
  - test/test_helper.rb
209
+ has_rdoc: