compactor 0.1.6 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile.lock CHANGED
@@ -1,15 +1,20 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- compactor (0.1.6)
4
+ compactor (0.2.3)
5
+ jruby-openssl (= 0.7.3)
5
6
  mechanize (= 2.4)
7
+ nokogiri (>= 1.5.0, < 1.5.3)
6
8
 
7
9
  GEM
8
10
  remote: http://rubygems.org/
9
11
  specs:
10
- domain_name (0.5.4)
12
+ bouncy-castle-java (1.5.0146.1)
13
+ domain_name (0.5.7)
11
14
  unf (~> 0.0.3)
12
15
  fakeweb (1.3.0)
16
+ jruby-openssl (0.7.3)
17
+ bouncy-castle-java
13
18
  mechanize (2.4)
14
19
  domain_name (~> 0.5, >= 0.5.1)
15
20
  mime-types (~> 1.17, >= 1.17.2)
@@ -24,12 +29,15 @@ GEM
24
29
  metaclass (~> 0.0.1)
25
30
  net-http-digest_auth (1.2.1)
26
31
  net-http-persistent (2.8)
27
- nokogiri (1.5.5)
32
+ nokogiri (1.5.2)
33
+ nokogiri (1.5.2-java)
28
34
  ntlm-http (0.1.1)
29
- rake (0.9.2.2)
35
+ rake (10.0.2)
30
36
  rcov (0.9.11)
37
+ rcov (0.9.11-java)
31
38
  unf (0.0.5)
32
39
  unf_ext
40
+ unf (0.0.5-java)
33
41
  unf_ext (0.0.5)
34
42
  vcr (2.0.1)
35
43
  webrobots (0.0.13)
data/README.md CHANGED
@@ -24,8 +24,34 @@ Or install it yourself as:
24
24
  rake test:coverage
25
25
  ```
26
26
 
27
- ```
28
- more soon
27
+ ```ruby
28
+ def scrape(email, password, from, to)
29
+ scraper = Compactor::Amazon::ReportScraper.new(:email => email, :password => password)
30
+ marketplaces = scraper.marketplaces
31
+
32
+ original_from = from
33
+ original_to = to
34
+
35
+ marketplaces.each do |marketplace|
36
+ scraper.select_marketplace marketplace[1]
37
+
38
+ from = original_from
39
+ to = original_to
40
+
41
+ puts "Marketplace: #{marketplace[1]}"
42
+ while from < to
43
+ begin
44
+ reports_by_type = scraper.reports(from, to)
45
+ puts "There are #{reports_by_type.size} reports between #{from.to_date} and #{to.to_date}"
46
+ rescue Exception => e
47
+ puts "ERROR: #{e.message} - USER: #{email}"
48
+ end
49
+ from += 1.week
50
+ end
51
+ end
52
+ end
53
+
54
+ scrape "me@there.com", "secret", DateTime.parse("1/1/2012"), DateTime.now
29
55
  ```
30
56
 
31
57
  ## Contributing
@@ -44,4 +70,4 @@ more soon
44
70
  ## To-do
45
71
 
46
72
  - Refactor
47
- - document
73
+ - 100% coverage
data/compactor.gemspec CHANGED
@@ -22,7 +22,7 @@ Gem::Specification.new do |gem|
22
22
 
23
23
  if RUBY_PLATFORM == "java"
24
24
  gem.add_runtime_dependency "jruby-openssl", '0.7.3'
25
- gem.add_runtime_dependency "nokogiri", "1.5.0.beta.2"
25
+ gem.add_runtime_dependency "nokogiri", ">= 1.5.0", "< 1.5.3"
26
26
  end
27
27
 
28
28
  gem.add_development_dependency "rake"
@@ -2,6 +2,16 @@ class Object
2
2
  def blank?; respond_to?(:empty?) ? empty? : !self; end
3
3
  end
4
4
 
5
+ class Date
6
+ def self.parse_to_us_format(date)
7
+ if date.is_a? String
8
+ date_format = date['-'] ? "%Y-%m-%d" : "%m/%d/%Y"
9
+ date = Date.strptime(date, date_format)
10
+ end
11
+ date.strftime("%m/%d/%y")
12
+ end
13
+ end
14
+
5
15
  module Nokogiri
6
16
  class MissingElement < ::StandardError; end
7
17
 
@@ -7,7 +7,7 @@ module Compactor
7
7
  end
8
8
 
9
9
  def can_download_report?
10
- !report_buttons.empty?
10
+ !report_buttons.blank?
11
11
  end
12
12
 
13
13
  def report_buttons
@@ -17,8 +17,10 @@ module Compactor
17
17
  end
18
18
 
19
19
  def download_report
20
- report_url = report_buttons[0].node["href"]
21
- report_identifier = report_buttons[0].node.search(".button_label").text
20
+ buttons = report_buttons
21
+ button_index = index_of_button(buttons)
22
+ report_url = buttons[button_index].node["href"]
23
+ report_identifier = buttons[button_index].node.search(".button_label").text
22
24
  type = ReportScraper.report_type(report_identifier)
23
25
  response_body = @mechanize.get(report_url).body
24
26
 
@@ -43,18 +45,25 @@ module Compactor
43
45
  })
44
46
  end
45
47
 
46
- def ready?
47
- div = last_cell.search("div")[-1]
48
- text = div.text
48
+ # A settlement period (row) is considered ready to be parsed
49
+ # if it's not processing, open or in progress. Also the "regenerate"
50
+ # button is not present. This means that all is left is 1 or more
51
+ # buttons to get the actual reports
52
+ def requestable_report?
53
+ !last_cell.search(".regenerateButton").empty?
54
+ end
55
+
56
+ def not_settled_report?
57
+ text = last_div.text
49
58
 
50
- ignorable_periods = ["(Processing)", "(Open)", "In Progress"]
51
- !ignorable_periods.any? { |ignore_text| text.include?(ignore_text) &&
52
- div.search(".regenerateButton").blank? }
59
+ # Is the report not settled yet? (in pending-like state)
60
+ ["(Processing)", "(Open)", "In Progress"].any? do |report_state|
61
+ text.include?(report_state)
62
+ end
53
63
  end
54
64
 
55
65
  def deposit_amount
56
66
  @deposit_amount = fetch_deposit_amount if !@deposit_amount
57
-
58
67
  @deposit_amount
59
68
  end
60
69
 
@@ -64,6 +73,10 @@ module Compactor
64
73
 
65
74
  private
66
75
 
76
+ def last_div
77
+ last_cell.search("div")[-1]
78
+ end
79
+
67
80
  def fetch_deposit_amount
68
81
  deposit_cell = @node.search("td")[-2]
69
82
  deposit_cell ? deposit_cell.text.gsub(/[^0-9\.]/, '').to_f : 0.0
@@ -76,6 +89,19 @@ module Compactor
76
89
  def last_cell
77
90
  @last_cell ||= @node.search("td")[-1]
78
91
  end
92
+
93
+ def index_of_button(buttons)
94
+ raise MissingReportButtons if buttons.blank? # no buttons at all!
95
+
96
+ buttons.each_with_index do |button, index|
97
+ # XML is preferred
98
+ return index if button.node.search(".button_label").text == "Download XML"
99
+ end
100
+
101
+ # No XML, look for another type of report, use the first one, whatever
102
+ # the type
103
+ 0
104
+ end
79
105
  end
80
106
  end
81
107
  end
@@ -1,12 +1,16 @@
1
+ # encoding: utf-8 # make the regexes ruby 1.9 friendly
2
+
1
3
  module Compactor
2
4
  module Amazon
3
- class AddressParseFailure < StandardError; end
4
- class AuthenticationError < StandardError; end
5
- class LockedAccountError < StandardError; end
6
- class MissingRow < StandardError; end
7
- class NoMarketplacesError < StandardError; end
8
- class NotProAccountError < StandardError; end
9
- class UnknownReportType < StandardError; end
5
+ class AddressParseFailure < StandardError; end
6
+ class AuthenticationError < StandardError; end
7
+ class LockedAccountError < StandardError; end
8
+ class MissingRow < StandardError; end
9
+ class NoMarketplacesError < StandardError; end
10
+ class NotProAccountError < StandardError; end
11
+ class UnknownReportType < StandardError; end
12
+ class MissingXmlReport < StandardError; end
13
+ class MissingReportButtons < StandardError; end
10
14
 
11
15
  ATTEMPTS_BEFORE_GIVING_UP = 15 # give up after 20 minutes
12
16
  MARKETPLACE_HOMEPAGE = "https://sellercentral.amazon.com/gp/homepage.html"
@@ -50,7 +54,7 @@ module Compactor
50
54
  return result
51
55
  end
52
56
 
53
- marketplace_name = @mechanize.page.search("#market_switch")
57
+ marketplace_name = @mechanize.page.search("#market_switch .merch-site-span")
54
58
  if marketplace_name
55
59
  return [ [ marketplace_name.text.strip, nil ] ]
56
60
  end
@@ -66,15 +70,14 @@ module Compactor
66
70
  def get_balance
67
71
  go_to_past_settlements('', '')
68
72
  return 0.0 if page_has_no_results?
69
- open_row = report_rows.detect { |row| !row.ready? }
70
- return 0.0 if open_row.nil?
71
- open_row.deposit_amount
73
+ open_row = report_rows.detect { |row| row.not_settled_report? }
74
+
75
+ open_row.nil? ? 0.0 : open_row.deposit_amount
72
76
  end
73
77
 
74
78
  def reports(from, to)
75
79
  from, to = parse_dates(from, to)
76
80
  go_to_past_settlements(from, to)
77
-
78
81
  get_reports
79
82
  end
80
83
 
@@ -88,8 +91,10 @@ module Compactor
88
91
  begin
89
92
  tr = @mechanize.page.search!("//tr[@class='list-row']/td[@class='data-display-field'][text()=\"Contact Buyer:\"]").first.parent
90
93
  td = tr.search!("td[2]")
94
+
91
95
  order["BuyerName"] = td.text.strip
92
96
  td = @mechanize.page.search!("//tr[@class='list-row']/td[@class='data-display-field']/strong[text()='Shipping Address:']").first.parent
97
+
93
98
  addr_lines = td.children.map(&:text).reject { |l| l.blank? || l =~ /^Shipping Address/ }
94
99
  order["ShippingAddress"] = parse_address_lines!(addr_lines)
95
100
  rescue Exception => e
@@ -115,7 +120,7 @@ module Compactor
115
120
  results << [ 'Amazon Seller Account', AMAZON_COM_MARKETPLACE_ID ] if name
116
121
 
117
122
  name, marketplace_id = marketplaces.detect do |n, m_id|
118
- n == 'Your Checkout Website' && !m_id.nil?
123
+ (n == 'Your Checkout Website' || n == "Checkout by Amazon (Production View)") && !m_id.nil?
119
124
  end
120
125
  results << [ 'Checkout By Amazon', marketplace_id ] if name
121
126
 
@@ -174,10 +179,8 @@ module Compactor
174
179
  reports[report_type] ||= []
175
180
  reports[report_type] << report_streams
176
181
  end
177
-
178
182
  page_num += 1
179
183
  end while pages_to_parse
180
-
181
184
  reports.each { |type, streams| streams.flatten! }
182
185
  end
183
186
 
@@ -272,13 +275,12 @@ module Compactor
272
275
  def get_reports_in_page
273
276
  reports_to_watch = []
274
277
  reports = {}
275
-
276
278
  return reports if page_has_no_results?
277
279
 
278
280
  report_rows.each do |row|
279
281
  if row.can_download_report?
280
282
  add_to_collection(reports, row)
281
- elsif row.ready?
283
+ elsif row.requestable_report?
282
284
  @mechanize.transact do
283
285
  row.request_report
284
286
  reports_to_watch << row
@@ -292,10 +294,7 @@ module Compactor
292
294
  end
293
295
 
294
296
  def parse_dates(from, to)
295
- from = Date.parse(from.to_s).strftime("%m/%d/%y")
296
- to = Date.parse(to.to_s).strftime("%m/%d/%y")
297
-
298
- [from, to]
297
+ [ Date.parse_to_us_format(from.to_s), Date.parse_to_us_format(to.to_s) ]
299
298
  end
300
299
 
301
300
  def login_to_seller_central(email, password)
@@ -1,3 +1,3 @@
1
1
  module Compactor
2
- VERSION = "0.1.6"
3
- end
2
+ VERSION = "0.2.3"
3
+ end
@@ -0,0 +1,15 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ class DateExtensionsTest < Test::Unit::TestCase
4
+ def test_parse_strings_separated_by_dashes
5
+ assert_equal "01/01/12", Date.parse_to_us_format("2012-1-1")
6
+ end
7
+
8
+ def test_parse_strings_separated_by_slashes
9
+ assert_equal "12/31/11", Date.parse_to_us_format("12/31/2011")
10
+ end
11
+
12
+ def test_convert_dates_to_strings
13
+ assert_equal "01/01/12", Date.parse_to_us_format(Date.parse("2012-1-1"))
14
+ end
15
+ end
data/test/test_helper.rb CHANGED
@@ -1,4 +1,5 @@
1
- require "rubygems"
1
+ require "bundler/setup"
2
+
2
3
  require "test/unit"
3
4
  require "vcr"
4
5
  require "mechanize"
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: compactor
3
3
  version: !ruby/object:Gem::Version
4
- hash: 23
4
+ hash: 17
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
- - 1
9
- - 6
10
- version: 0.1.6
8
+ - 2
9
+ - 3
10
+ version: 0.2.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Julio Santos
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-11-05 00:00:00 Z
18
+ date: 2013-01-12 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: mechanize
@@ -125,13 +125,13 @@ files:
125
125
  - LICENSE
126
126
  - README.md
127
127
  - Rakefile
128
- - compactor-0.1.2.gem
129
128
  - compactor.gemspec
130
129
  - lib/compactor.rb
131
130
  - lib/compactor/extensions.rb
132
131
  - lib/compactor/scraped_row.rb
133
132
  - lib/compactor/scraper.rb
134
133
  - lib/compactor/version.rb
134
+ - test/date_extensions_test.rb
135
135
  - test/fixtures/vcr_cassettes/AmazonReportScraper/with_bad_login/raise_error.yml
136
136
  - test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/multiple_pages.yml
137
137
  - test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/no_reports.yml
@@ -185,6 +185,7 @@ signing_key:
185
185
  specification_version: 3
186
186
  summary: Scrape Amazon Seller Central
187
187
  test_files:
188
+ - test/date_extensions_test.rb
188
189
  - test/fixtures/vcr_cassettes/AmazonReportScraper/with_bad_login/raise_error.yml
189
190
  - test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/multiple_pages.yml
190
191
  - test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/no_reports.yml
data/compactor-0.1.2.gem DELETED
Binary file