compactor 0.1.6 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +12 -4
- data/README.md +29 -3
- data/compactor.gemspec +1 -1
- data/lib/compactor/extensions.rb +10 -0
- data/lib/compactor/scraped_row.rb +36 -10
- data/lib/compactor/scraper.rb +20 -21
- data/lib/compactor/version.rb +2 -2
- data/test/date_extensions_test.rb +15 -0
- data/test/test_helper.rb +2 -1
- metadata +7 -6
- data/compactor-0.1.2.gem +0 -0
data/Gemfile.lock
CHANGED
@@ -1,15 +1,20 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
compactor (0.
|
4
|
+
compactor (0.2.3)
|
5
|
+
jruby-openssl (= 0.7.3)
|
5
6
|
mechanize (= 2.4)
|
7
|
+
nokogiri (>= 1.5.0, < 1.5.3)
|
6
8
|
|
7
9
|
GEM
|
8
10
|
remote: http://rubygems.org/
|
9
11
|
specs:
|
10
|
-
|
12
|
+
bouncy-castle-java (1.5.0146.1)
|
13
|
+
domain_name (0.5.7)
|
11
14
|
unf (~> 0.0.3)
|
12
15
|
fakeweb (1.3.0)
|
16
|
+
jruby-openssl (0.7.3)
|
17
|
+
bouncy-castle-java
|
13
18
|
mechanize (2.4)
|
14
19
|
domain_name (~> 0.5, >= 0.5.1)
|
15
20
|
mime-types (~> 1.17, >= 1.17.2)
|
@@ -24,12 +29,15 @@ GEM
|
|
24
29
|
metaclass (~> 0.0.1)
|
25
30
|
net-http-digest_auth (1.2.1)
|
26
31
|
net-http-persistent (2.8)
|
27
|
-
nokogiri (1.5.
|
32
|
+
nokogiri (1.5.2)
|
33
|
+
nokogiri (1.5.2-java)
|
28
34
|
ntlm-http (0.1.1)
|
29
|
-
rake (0.
|
35
|
+
rake (10.0.2)
|
30
36
|
rcov (0.9.11)
|
37
|
+
rcov (0.9.11-java)
|
31
38
|
unf (0.0.5)
|
32
39
|
unf_ext
|
40
|
+
unf (0.0.5-java)
|
33
41
|
unf_ext (0.0.5)
|
34
42
|
vcr (2.0.1)
|
35
43
|
webrobots (0.0.13)
|
data/README.md
CHANGED
@@ -24,8 +24,34 @@ Or install it yourself as:
|
|
24
24
|
rake test:coverage
|
25
25
|
```
|
26
26
|
|
27
|
-
```
|
28
|
-
|
27
|
+
```ruby
|
28
|
+
def scrape(email, password, from, to)
|
29
|
+
scraper = Compactor::Amazon::ReportScraper.new(:email => email, :password => password)
|
30
|
+
marketplaces = scraper.marketplaces
|
31
|
+
|
32
|
+
original_from = from
|
33
|
+
original_to = to
|
34
|
+
|
35
|
+
marketplaces.each do |marketplace|
|
36
|
+
scraper.select_marketplace marketplace[1]
|
37
|
+
|
38
|
+
from = original_from
|
39
|
+
to = original_to
|
40
|
+
|
41
|
+
puts "Marketplace: #{marketplace[1]}"
|
42
|
+
while from < to
|
43
|
+
begin
|
44
|
+
reports_by_type = scraper.reports(from, to)
|
45
|
+
puts "There are #{reports_by_type.size} reports between #{from.to_date} and #{to.to_date}"
|
46
|
+
rescue Exception => e
|
47
|
+
puts "ERROR: #{e.message} - USER: #{email}"
|
48
|
+
end
|
49
|
+
from += 1.week
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
scrape "me@there.com", "secret", DateTime.parse("1/1/2012"), DateTime.now
|
29
55
|
```
|
30
56
|
|
31
57
|
## Contributing
|
@@ -44,4 +70,4 @@ more soon
|
|
44
70
|
## To-do
|
45
71
|
|
46
72
|
- Refactor
|
47
|
-
-
|
73
|
+
- 100% coverage
|
data/compactor.gemspec
CHANGED
@@ -22,7 +22,7 @@ Gem::Specification.new do |gem|
|
|
22
22
|
|
23
23
|
if RUBY_PLATFORM == "java"
|
24
24
|
gem.add_runtime_dependency "jruby-openssl", '0.7.3'
|
25
|
-
gem.add_runtime_dependency "nokogiri", "1.5.0.
|
25
|
+
gem.add_runtime_dependency "nokogiri", ">= 1.5.0", "< 1.5.3"
|
26
26
|
end
|
27
27
|
|
28
28
|
gem.add_development_dependency "rake"
|
data/lib/compactor/extensions.rb
CHANGED
@@ -2,6 +2,16 @@ class Object
|
|
2
2
|
def blank?; respond_to?(:empty?) ? empty? : !self; end
|
3
3
|
end
|
4
4
|
|
5
|
+
class Date
|
6
|
+
def self.parse_to_us_format(date)
|
7
|
+
if date.is_a? String
|
8
|
+
date_format = date['-'] ? "%Y-%m-%d" : "%m/%d/%Y"
|
9
|
+
date = Date.strptime(date, date_format)
|
10
|
+
end
|
11
|
+
date.strftime("%m/%d/%y")
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
5
15
|
module Nokogiri
|
6
16
|
class MissingElement < ::StandardError; end
|
7
17
|
|
@@ -7,7 +7,7 @@ module Compactor
|
|
7
7
|
end
|
8
8
|
|
9
9
|
def can_download_report?
|
10
|
-
!report_buttons.
|
10
|
+
!report_buttons.blank?
|
11
11
|
end
|
12
12
|
|
13
13
|
def report_buttons
|
@@ -17,8 +17,10 @@ module Compactor
|
|
17
17
|
end
|
18
18
|
|
19
19
|
def download_report
|
20
|
-
|
21
|
-
|
20
|
+
buttons = report_buttons
|
21
|
+
button_index = index_of_button(buttons)
|
22
|
+
report_url = buttons[button_index].node["href"]
|
23
|
+
report_identifier = buttons[button_index].node.search(".button_label").text
|
22
24
|
type = ReportScraper.report_type(report_identifier)
|
23
25
|
response_body = @mechanize.get(report_url).body
|
24
26
|
|
@@ -43,18 +45,25 @@ module Compactor
|
|
43
45
|
})
|
44
46
|
end
|
45
47
|
|
46
|
-
|
47
|
-
|
48
|
-
|
48
|
+
# A settlement period (row) is considered ready to be parsed
|
49
|
+
# if it's not processing, open or in progress. Also the "regenerate"
|
50
|
+
# button is not present. This means that all is left is 1 or more
|
51
|
+
# buttons to get the actual reports
|
52
|
+
def requestable_report?
|
53
|
+
!last_cell.search(".regenerateButton").empty?
|
54
|
+
end
|
55
|
+
|
56
|
+
def not_settled_report?
|
57
|
+
text = last_div.text
|
49
58
|
|
50
|
-
|
51
|
-
|
52
|
-
|
59
|
+
# Is the report not settled yet? (in pending-like state)
|
60
|
+
["(Processing)", "(Open)", "In Progress"].any? do |report_state|
|
61
|
+
text.include?(report_state)
|
62
|
+
end
|
53
63
|
end
|
54
64
|
|
55
65
|
def deposit_amount
|
56
66
|
@deposit_amount = fetch_deposit_amount if !@deposit_amount
|
57
|
-
|
58
67
|
@deposit_amount
|
59
68
|
end
|
60
69
|
|
@@ -64,6 +73,10 @@ module Compactor
|
|
64
73
|
|
65
74
|
private
|
66
75
|
|
76
|
+
def last_div
|
77
|
+
last_cell.search("div")[-1]
|
78
|
+
end
|
79
|
+
|
67
80
|
def fetch_deposit_amount
|
68
81
|
deposit_cell = @node.search("td")[-2]
|
69
82
|
deposit_cell ? deposit_cell.text.gsub(/[^0-9\.]/, '').to_f : 0.0
|
@@ -76,6 +89,19 @@ module Compactor
|
|
76
89
|
def last_cell
|
77
90
|
@last_cell ||= @node.search("td")[-1]
|
78
91
|
end
|
92
|
+
|
93
|
+
def index_of_button(buttons)
|
94
|
+
raise MissingReportButtons if buttons.blank? # no buttons at all!
|
95
|
+
|
96
|
+
buttons.each_with_index do |button, index|
|
97
|
+
# XML is preferred
|
98
|
+
return index if button.node.search(".button_label").text == "Download XML"
|
99
|
+
end
|
100
|
+
|
101
|
+
# No XML, look for another type of report, use the first one, whatever
|
102
|
+
# the type
|
103
|
+
0
|
104
|
+
end
|
79
105
|
end
|
80
106
|
end
|
81
107
|
end
|
data/lib/compactor/scraper.rb
CHANGED
@@ -1,12 +1,16 @@
|
|
1
|
+
# encoding: utf-8 # make the regexes ruby 1.9 friendly
|
2
|
+
|
1
3
|
module Compactor
|
2
4
|
module Amazon
|
3
|
-
class AddressParseFailure
|
4
|
-
class AuthenticationError
|
5
|
-
class LockedAccountError
|
6
|
-
class MissingRow
|
7
|
-
class NoMarketplacesError
|
8
|
-
class NotProAccountError
|
9
|
-
class UnknownReportType
|
5
|
+
class AddressParseFailure < StandardError; end
|
6
|
+
class AuthenticationError < StandardError; end
|
7
|
+
class LockedAccountError < StandardError; end
|
8
|
+
class MissingRow < StandardError; end
|
9
|
+
class NoMarketplacesError < StandardError; end
|
10
|
+
class NotProAccountError < StandardError; end
|
11
|
+
class UnknownReportType < StandardError; end
|
12
|
+
class MissingXmlReport < StandardError; end
|
13
|
+
class MissingReportButtons < StandardError; end
|
10
14
|
|
11
15
|
ATTEMPTS_BEFORE_GIVING_UP = 15 # give up after 20 minutes
|
12
16
|
MARKETPLACE_HOMEPAGE = "https://sellercentral.amazon.com/gp/homepage.html"
|
@@ -50,7 +54,7 @@ module Compactor
|
|
50
54
|
return result
|
51
55
|
end
|
52
56
|
|
53
|
-
marketplace_name = @mechanize.page.search("#market_switch")
|
57
|
+
marketplace_name = @mechanize.page.search("#market_switch .merch-site-span")
|
54
58
|
if marketplace_name
|
55
59
|
return [ [ marketplace_name.text.strip, nil ] ]
|
56
60
|
end
|
@@ -66,15 +70,14 @@ module Compactor
|
|
66
70
|
def get_balance
|
67
71
|
go_to_past_settlements('', '')
|
68
72
|
return 0.0 if page_has_no_results?
|
69
|
-
open_row = report_rows.detect { |row|
|
70
|
-
|
71
|
-
open_row.deposit_amount
|
73
|
+
open_row = report_rows.detect { |row| row.not_settled_report? }
|
74
|
+
|
75
|
+
open_row.nil? ? 0.0 : open_row.deposit_amount
|
72
76
|
end
|
73
77
|
|
74
78
|
def reports(from, to)
|
75
79
|
from, to = parse_dates(from, to)
|
76
80
|
go_to_past_settlements(from, to)
|
77
|
-
|
78
81
|
get_reports
|
79
82
|
end
|
80
83
|
|
@@ -88,8 +91,10 @@ module Compactor
|
|
88
91
|
begin
|
89
92
|
tr = @mechanize.page.search!("//tr[@class='list-row']/td[@class='data-display-field'][text()=\"Contact Buyer:\"]").first.parent
|
90
93
|
td = tr.search!("td[2]")
|
94
|
+
|
91
95
|
order["BuyerName"] = td.text.strip
|
92
96
|
td = @mechanize.page.search!("//tr[@class='list-row']/td[@class='data-display-field']/strong[text()='Shipping Address:']").first.parent
|
97
|
+
|
93
98
|
addr_lines = td.children.map(&:text).reject { |l| l.blank? || l =~ /^Shipping Address/ }
|
94
99
|
order["ShippingAddress"] = parse_address_lines!(addr_lines)
|
95
100
|
rescue Exception => e
|
@@ -115,7 +120,7 @@ module Compactor
|
|
115
120
|
results << [ 'Amazon Seller Account', AMAZON_COM_MARKETPLACE_ID ] if name
|
116
121
|
|
117
122
|
name, marketplace_id = marketplaces.detect do |n, m_id|
|
118
|
-
|
123
|
+
(n == 'Your Checkout Website' || n == "Checkout by Amazon (Production View)") && !m_id.nil?
|
119
124
|
end
|
120
125
|
results << [ 'Checkout By Amazon', marketplace_id ] if name
|
121
126
|
|
@@ -174,10 +179,8 @@ module Compactor
|
|
174
179
|
reports[report_type] ||= []
|
175
180
|
reports[report_type] << report_streams
|
176
181
|
end
|
177
|
-
|
178
182
|
page_num += 1
|
179
183
|
end while pages_to_parse
|
180
|
-
|
181
184
|
reports.each { |type, streams| streams.flatten! }
|
182
185
|
end
|
183
186
|
|
@@ -272,13 +275,12 @@ module Compactor
|
|
272
275
|
def get_reports_in_page
|
273
276
|
reports_to_watch = []
|
274
277
|
reports = {}
|
275
|
-
|
276
278
|
return reports if page_has_no_results?
|
277
279
|
|
278
280
|
report_rows.each do |row|
|
279
281
|
if row.can_download_report?
|
280
282
|
add_to_collection(reports, row)
|
281
|
-
elsif row.
|
283
|
+
elsif row.requestable_report?
|
282
284
|
@mechanize.transact do
|
283
285
|
row.request_report
|
284
286
|
reports_to_watch << row
|
@@ -292,10 +294,7 @@ module Compactor
|
|
292
294
|
end
|
293
295
|
|
294
296
|
def parse_dates(from, to)
|
295
|
-
|
296
|
-
to = Date.parse(to.to_s).strftime("%m/%d/%y")
|
297
|
-
|
298
|
-
[from, to]
|
297
|
+
[ Date.parse_to_us_format(from.to_s), Date.parse_to_us_format(to.to_s) ]
|
299
298
|
end
|
300
299
|
|
301
300
|
def login_to_seller_central(email, password)
|
data/lib/compactor/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
module Compactor
|
2
|
-
VERSION = "0.
|
3
|
-
end
|
2
|
+
VERSION = "0.2.3"
|
3
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper'
|
2
|
+
|
3
|
+
class DateExtensionsTest < Test::Unit::TestCase
|
4
|
+
def test_parse_strings_separated_by_dashes
|
5
|
+
assert_equal "01/01/12", Date.parse_to_us_format("2012-1-1")
|
6
|
+
end
|
7
|
+
|
8
|
+
def test_parse_strings_separated_by_slashes
|
9
|
+
assert_equal "12/31/11", Date.parse_to_us_format("12/31/2011")
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_convert_dates_to_strings
|
13
|
+
assert_equal "01/01/12", Date.parse_to_us_format(Date.parse("2012-1-1"))
|
14
|
+
end
|
15
|
+
end
|
data/test/test_helper.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: compactor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 17
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 2
|
9
|
+
- 3
|
10
|
+
version: 0.2.3
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Julio Santos
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date:
|
18
|
+
date: 2013-01-12 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: mechanize
|
@@ -125,13 +125,13 @@ files:
|
|
125
125
|
- LICENSE
|
126
126
|
- README.md
|
127
127
|
- Rakefile
|
128
|
-
- compactor-0.1.2.gem
|
129
128
|
- compactor.gemspec
|
130
129
|
- lib/compactor.rb
|
131
130
|
- lib/compactor/extensions.rb
|
132
131
|
- lib/compactor/scraped_row.rb
|
133
132
|
- lib/compactor/scraper.rb
|
134
133
|
- lib/compactor/version.rb
|
134
|
+
- test/date_extensions_test.rb
|
135
135
|
- test/fixtures/vcr_cassettes/AmazonReportScraper/with_bad_login/raise_error.yml
|
136
136
|
- test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/multiple_pages.yml
|
137
137
|
- test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/no_reports.yml
|
@@ -185,6 +185,7 @@ signing_key:
|
|
185
185
|
specification_version: 3
|
186
186
|
summary: Scrape Amazon Seller Central
|
187
187
|
test_files:
|
188
|
+
- test/date_extensions_test.rb
|
188
189
|
- test/fixtures/vcr_cassettes/AmazonReportScraper/with_bad_login/raise_error.yml
|
189
190
|
- test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/multiple_pages.yml
|
190
191
|
- test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/no_reports.yml
|
data/compactor-0.1.2.gem
DELETED
Binary file
|