compactor 0.1.6 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile.lock CHANGED
@@ -1,15 +1,20 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- compactor (0.1.6)
4
+ compactor (0.2.3)
5
+ jruby-openssl (= 0.7.3)
5
6
  mechanize (= 2.4)
7
+ nokogiri (>= 1.5.0, < 1.5.3)
6
8
 
7
9
  GEM
8
10
  remote: http://rubygems.org/
9
11
  specs:
10
- domain_name (0.5.4)
12
+ bouncy-castle-java (1.5.0146.1)
13
+ domain_name (0.5.7)
11
14
  unf (~> 0.0.3)
12
15
  fakeweb (1.3.0)
16
+ jruby-openssl (0.7.3)
17
+ bouncy-castle-java
13
18
  mechanize (2.4)
14
19
  domain_name (~> 0.5, >= 0.5.1)
15
20
  mime-types (~> 1.17, >= 1.17.2)
@@ -24,12 +29,15 @@ GEM
24
29
  metaclass (~> 0.0.1)
25
30
  net-http-digest_auth (1.2.1)
26
31
  net-http-persistent (2.8)
27
- nokogiri (1.5.5)
32
+ nokogiri (1.5.2)
33
+ nokogiri (1.5.2-java)
28
34
  ntlm-http (0.1.1)
29
- rake (0.9.2.2)
35
+ rake (10.0.2)
30
36
  rcov (0.9.11)
37
+ rcov (0.9.11-java)
31
38
  unf (0.0.5)
32
39
  unf_ext
40
+ unf (0.0.5-java)
33
41
  unf_ext (0.0.5)
34
42
  vcr (2.0.1)
35
43
  webrobots (0.0.13)
data/README.md CHANGED
@@ -24,8 +24,34 @@ Or install it yourself as:
24
24
  rake test:coverage
25
25
  ```
26
26
 
27
- ```
28
- more soon
27
+ ```ruby
28
+ def scrape(email, password, from, to)
29
+ scraper = Compactor::Amazon::ReportScraper.new(:email => email, :password => password)
30
+ marketplaces = scraper.marketplaces
31
+
32
+ original_from = from
33
+ original_to = to
34
+
35
+ marketplaces.each do |marketplace|
36
+ scraper.select_marketplace marketplace[1]
37
+
38
+ from = original_from
39
+ to = original_to
40
+
41
+ puts "Marketplace: #{marketplace[1]}"
42
+ while from < to
43
+ begin
44
+ reports_by_type = scraper.reports(from, to)
45
+ puts "There are #{reports_by_type.size} reports between #{from.to_date} and #{to.to_date}"
46
+ rescue Exception => e
47
+ puts "ERROR: #{e.message} - USER: #{email}"
48
+ end
49
+ from += 1.week
50
+ end
51
+ end
52
+ end
53
+
54
+ scrape "me@there.com", "secret", DateTime.parse("1/1/2012"), DateTime.now
29
55
  ```
30
56
 
31
57
  ## Contributing
@@ -44,4 +70,4 @@ more soon
44
70
  ## To-do
45
71
 
46
72
  - Refactor
47
- - document
73
+ - 100% coverage
data/compactor.gemspec CHANGED
@@ -22,7 +22,7 @@ Gem::Specification.new do |gem|
22
22
 
23
23
  if RUBY_PLATFORM == "java"
24
24
  gem.add_runtime_dependency "jruby-openssl", '0.7.3'
25
- gem.add_runtime_dependency "nokogiri", "1.5.0.beta.2"
25
+ gem.add_runtime_dependency "nokogiri", ">= 1.5.0", "< 1.5.3"
26
26
  end
27
27
 
28
28
  gem.add_development_dependency "rake"
@@ -2,6 +2,16 @@ class Object
2
2
  def blank?; respond_to?(:empty?) ? empty? : !self; end
3
3
  end
4
4
 
5
+ class Date
6
+ def self.parse_to_us_format(date)
7
+ if date.is_a? String
8
+ date_format = date['-'] ? "%Y-%m-%d" : "%m/%d/%Y"
9
+ date = Date.strptime(date, date_format)
10
+ end
11
+ date.strftime("%m/%d/%y")
12
+ end
13
+ end
14
+
5
15
  module Nokogiri
6
16
  class MissingElement < ::StandardError; end
7
17
 
@@ -7,7 +7,7 @@ module Compactor
7
7
  end
8
8
 
9
9
  def can_download_report?
10
- !report_buttons.empty?
10
+ !report_buttons.blank?
11
11
  end
12
12
 
13
13
  def report_buttons
@@ -17,8 +17,10 @@ module Compactor
17
17
  end
18
18
 
19
19
  def download_report
20
- report_url = report_buttons[0].node["href"]
21
- report_identifier = report_buttons[0].node.search(".button_label").text
20
+ buttons = report_buttons
21
+ button_index = index_of_button(buttons)
22
+ report_url = buttons[button_index].node["href"]
23
+ report_identifier = buttons[button_index].node.search(".button_label").text
22
24
  type = ReportScraper.report_type(report_identifier)
23
25
  response_body = @mechanize.get(report_url).body
24
26
 
@@ -43,18 +45,25 @@ module Compactor
43
45
  })
44
46
  end
45
47
 
46
- def ready?
47
- div = last_cell.search("div")[-1]
48
- text = div.text
48
+ # A settlement period (row) is considered ready to be parsed
49
+ # if it's not processing, open or in progress. Also the "regenerate"
50
+ # button is not present. This means that all is left is 1 or more
51
+ # buttons to get the actual reports
52
+ def requestable_report?
53
+ !last_cell.search(".regenerateButton").empty?
54
+ end
55
+
56
+ def not_settled_report?
57
+ text = last_div.text
49
58
 
50
- ignorable_periods = ["(Processing)", "(Open)", "In Progress"]
51
- !ignorable_periods.any? { |ignore_text| text.include?(ignore_text) &&
52
- div.search(".regenerateButton").blank? }
59
+ # Is the report not settled yet? (in pending-like state)
60
+ ["(Processing)", "(Open)", "In Progress"].any? do |report_state|
61
+ text.include?(report_state)
62
+ end
53
63
  end
54
64
 
55
65
  def deposit_amount
56
66
  @deposit_amount = fetch_deposit_amount if !@deposit_amount
57
-
58
67
  @deposit_amount
59
68
  end
60
69
 
@@ -64,6 +73,10 @@ module Compactor
64
73
 
65
74
  private
66
75
 
76
+ def last_div
77
+ last_cell.search("div")[-1]
78
+ end
79
+
67
80
  def fetch_deposit_amount
68
81
  deposit_cell = @node.search("td")[-2]
69
82
  deposit_cell ? deposit_cell.text.gsub(/[^0-9\.]/, '').to_f : 0.0
@@ -76,6 +89,19 @@ module Compactor
76
89
  def last_cell
77
90
  @last_cell ||= @node.search("td")[-1]
78
91
  end
92
+
93
+ def index_of_button(buttons)
94
+ raise MissingReportButtons if buttons.blank? # no buttons at all!
95
+
96
+ buttons.each_with_index do |button, index|
97
+ # XML is preferred
98
+ return index if button.node.search(".button_label").text == "Download XML"
99
+ end
100
+
101
+ # No XML, look for another type of report, use the first one, whatever
102
+ # the type
103
+ 0
104
+ end
79
105
  end
80
106
  end
81
107
  end
@@ -1,12 +1,16 @@
1
+ # encoding: utf-8 # make the regexes ruby 1.9 friendly
2
+
1
3
  module Compactor
2
4
  module Amazon
3
- class AddressParseFailure < StandardError; end
4
- class AuthenticationError < StandardError; end
5
- class LockedAccountError < StandardError; end
6
- class MissingRow < StandardError; end
7
- class NoMarketplacesError < StandardError; end
8
- class NotProAccountError < StandardError; end
9
- class UnknownReportType < StandardError; end
5
+ class AddressParseFailure < StandardError; end
6
+ class AuthenticationError < StandardError; end
7
+ class LockedAccountError < StandardError; end
8
+ class MissingRow < StandardError; end
9
+ class NoMarketplacesError < StandardError; end
10
+ class NotProAccountError < StandardError; end
11
+ class UnknownReportType < StandardError; end
12
+ class MissingXmlReport < StandardError; end
13
+ class MissingReportButtons < StandardError; end
10
14
 
11
15
  ATTEMPTS_BEFORE_GIVING_UP = 15 # give up after 20 minutes
12
16
  MARKETPLACE_HOMEPAGE = "https://sellercentral.amazon.com/gp/homepage.html"
@@ -50,7 +54,7 @@ module Compactor
50
54
  return result
51
55
  end
52
56
 
53
- marketplace_name = @mechanize.page.search("#market_switch")
57
+ marketplace_name = @mechanize.page.search("#market_switch .merch-site-span")
54
58
  if marketplace_name
55
59
  return [ [ marketplace_name.text.strip, nil ] ]
56
60
  end
@@ -66,15 +70,14 @@ module Compactor
66
70
  def get_balance
67
71
  go_to_past_settlements('', '')
68
72
  return 0.0 if page_has_no_results?
69
- open_row = report_rows.detect { |row| !row.ready? }
70
- return 0.0 if open_row.nil?
71
- open_row.deposit_amount
73
+ open_row = report_rows.detect { |row| row.not_settled_report? }
74
+
75
+ open_row.nil? ? 0.0 : open_row.deposit_amount
72
76
  end
73
77
 
74
78
  def reports(from, to)
75
79
  from, to = parse_dates(from, to)
76
80
  go_to_past_settlements(from, to)
77
-
78
81
  get_reports
79
82
  end
80
83
 
@@ -88,8 +91,10 @@ module Compactor
88
91
  begin
89
92
  tr = @mechanize.page.search!("//tr[@class='list-row']/td[@class='data-display-field'][text()=\"Contact Buyer:\"]").first.parent
90
93
  td = tr.search!("td[2]")
94
+
91
95
  order["BuyerName"] = td.text.strip
92
96
  td = @mechanize.page.search!("//tr[@class='list-row']/td[@class='data-display-field']/strong[text()='Shipping Address:']").first.parent
97
+
93
98
  addr_lines = td.children.map(&:text).reject { |l| l.blank? || l =~ /^Shipping Address/ }
94
99
  order["ShippingAddress"] = parse_address_lines!(addr_lines)
95
100
  rescue Exception => e
@@ -115,7 +120,7 @@ module Compactor
115
120
  results << [ 'Amazon Seller Account', AMAZON_COM_MARKETPLACE_ID ] if name
116
121
 
117
122
  name, marketplace_id = marketplaces.detect do |n, m_id|
118
- n == 'Your Checkout Website' && !m_id.nil?
123
+ (n == 'Your Checkout Website' || n == "Checkout by Amazon (Production View)") && !m_id.nil?
119
124
  end
120
125
  results << [ 'Checkout By Amazon', marketplace_id ] if name
121
126
 
@@ -174,10 +179,8 @@ module Compactor
174
179
  reports[report_type] ||= []
175
180
  reports[report_type] << report_streams
176
181
  end
177
-
178
182
  page_num += 1
179
183
  end while pages_to_parse
180
-
181
184
  reports.each { |type, streams| streams.flatten! }
182
185
  end
183
186
 
@@ -272,13 +275,12 @@ module Compactor
272
275
  def get_reports_in_page
273
276
  reports_to_watch = []
274
277
  reports = {}
275
-
276
278
  return reports if page_has_no_results?
277
279
 
278
280
  report_rows.each do |row|
279
281
  if row.can_download_report?
280
282
  add_to_collection(reports, row)
281
- elsif row.ready?
283
+ elsif row.requestable_report?
282
284
  @mechanize.transact do
283
285
  row.request_report
284
286
  reports_to_watch << row
@@ -292,10 +294,7 @@ module Compactor
292
294
  end
293
295
 
294
296
  def parse_dates(from, to)
295
- from = Date.parse(from.to_s).strftime("%m/%d/%y")
296
- to = Date.parse(to.to_s).strftime("%m/%d/%y")
297
-
298
- [from, to]
297
+ [ Date.parse_to_us_format(from.to_s), Date.parse_to_us_format(to.to_s) ]
299
298
  end
300
299
 
301
300
  def login_to_seller_central(email, password)
@@ -1,3 +1,3 @@
1
1
  module Compactor
2
- VERSION = "0.1.6"
3
- end
2
+ VERSION = "0.2.3"
3
+ end
@@ -0,0 +1,15 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ class DateExtensionsTest < Test::Unit::TestCase
4
+ def test_parse_strings_separated_by_dashes
5
+ assert_equal "01/01/12", Date.parse_to_us_format("2012-1-1")
6
+ end
7
+
8
+ def test_parse_strings_separated_by_slashes
9
+ assert_equal "12/31/11", Date.parse_to_us_format("12/31/2011")
10
+ end
11
+
12
+ def test_convert_dates_to_strings
13
+ assert_equal "01/01/12", Date.parse_to_us_format(Date.parse("2012-1-1"))
14
+ end
15
+ end
data/test/test_helper.rb CHANGED
@@ -1,4 +1,5 @@
1
- require "rubygems"
1
+ require "bundler/setup"
2
+
2
3
  require "test/unit"
3
4
  require "vcr"
4
5
  require "mechanize"
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: compactor
3
3
  version: !ruby/object:Gem::Version
4
- hash: 23
4
+ hash: 17
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
- - 1
9
- - 6
10
- version: 0.1.6
8
+ - 2
9
+ - 3
10
+ version: 0.2.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Julio Santos
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-11-05 00:00:00 Z
18
+ date: 2013-01-12 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: mechanize
@@ -125,13 +125,13 @@ files:
125
125
  - LICENSE
126
126
  - README.md
127
127
  - Rakefile
128
- - compactor-0.1.2.gem
129
128
  - compactor.gemspec
130
129
  - lib/compactor.rb
131
130
  - lib/compactor/extensions.rb
132
131
  - lib/compactor/scraped_row.rb
133
132
  - lib/compactor/scraper.rb
134
133
  - lib/compactor/version.rb
134
+ - test/date_extensions_test.rb
135
135
  - test/fixtures/vcr_cassettes/AmazonReportScraper/with_bad_login/raise_error.yml
136
136
  - test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/multiple_pages.yml
137
137
  - test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/no_reports.yml
@@ -185,6 +185,7 @@ signing_key:
185
185
  specification_version: 3
186
186
  summary: Scrape Amazon Seller Central
187
187
  test_files:
188
+ - test/date_extensions_test.rb
188
189
  - test/fixtures/vcr_cassettes/AmazonReportScraper/with_bad_login/raise_error.yml
189
190
  - test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/multiple_pages.yml
190
191
  - test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/no_reports.yml
data/compactor-0.1.2.gem DELETED
Binary file