compactor 0.1.6 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +12 -4
- data/README.md +29 -3
- data/compactor.gemspec +1 -1
- data/lib/compactor/extensions.rb +10 -0
- data/lib/compactor/scraped_row.rb +36 -10
- data/lib/compactor/scraper.rb +20 -21
- data/lib/compactor/version.rb +2 -2
- data/test/date_extensions_test.rb +15 -0
- data/test/test_helper.rb +2 -1
- metadata +7 -6
- data/compactor-0.1.2.gem +0 -0
data/Gemfile.lock
CHANGED
@@ -1,15 +1,20 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
compactor (0.
|
4
|
+
compactor (0.2.3)
|
5
|
+
jruby-openssl (= 0.7.3)
|
5
6
|
mechanize (= 2.4)
|
7
|
+
nokogiri (>= 1.5.0, < 1.5.3)
|
6
8
|
|
7
9
|
GEM
|
8
10
|
remote: http://rubygems.org/
|
9
11
|
specs:
|
10
|
-
|
12
|
+
bouncy-castle-java (1.5.0146.1)
|
13
|
+
domain_name (0.5.7)
|
11
14
|
unf (~> 0.0.3)
|
12
15
|
fakeweb (1.3.0)
|
16
|
+
jruby-openssl (0.7.3)
|
17
|
+
bouncy-castle-java
|
13
18
|
mechanize (2.4)
|
14
19
|
domain_name (~> 0.5, >= 0.5.1)
|
15
20
|
mime-types (~> 1.17, >= 1.17.2)
|
@@ -24,12 +29,15 @@ GEM
|
|
24
29
|
metaclass (~> 0.0.1)
|
25
30
|
net-http-digest_auth (1.2.1)
|
26
31
|
net-http-persistent (2.8)
|
27
|
-
nokogiri (1.5.
|
32
|
+
nokogiri (1.5.2)
|
33
|
+
nokogiri (1.5.2-java)
|
28
34
|
ntlm-http (0.1.1)
|
29
|
-
rake (0.
|
35
|
+
rake (10.0.2)
|
30
36
|
rcov (0.9.11)
|
37
|
+
rcov (0.9.11-java)
|
31
38
|
unf (0.0.5)
|
32
39
|
unf_ext
|
40
|
+
unf (0.0.5-java)
|
33
41
|
unf_ext (0.0.5)
|
34
42
|
vcr (2.0.1)
|
35
43
|
webrobots (0.0.13)
|
data/README.md
CHANGED
@@ -24,8 +24,34 @@ Or install it yourself as:
|
|
24
24
|
rake test:coverage
|
25
25
|
```
|
26
26
|
|
27
|
-
```
|
28
|
-
|
27
|
+
```ruby
|
28
|
+
def scrape(email, password, from, to)
|
29
|
+
scraper = Compactor::Amazon::ReportScraper.new(:email => email, :password => password)
|
30
|
+
marketplaces = scraper.marketplaces
|
31
|
+
|
32
|
+
original_from = from
|
33
|
+
original_to = to
|
34
|
+
|
35
|
+
marketplaces.each do |marketplace|
|
36
|
+
scraper.select_marketplace marketplace[1]
|
37
|
+
|
38
|
+
from = original_from
|
39
|
+
to = original_to
|
40
|
+
|
41
|
+
puts "Marketplace: #{marketplace[1]}"
|
42
|
+
while from < to
|
43
|
+
begin
|
44
|
+
reports_by_type = scraper.reports(from, to)
|
45
|
+
puts "There are #{reports_by_type.size} reports between #{from.to_date} and #{to.to_date}"
|
46
|
+
rescue Exception => e
|
47
|
+
puts "ERROR: #{e.message} - USER: #{email}"
|
48
|
+
end
|
49
|
+
from += 1.week
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
scrape "me@there.com", "secret", DateTime.parse("1/1/2012"), DateTime.now
|
29
55
|
```
|
30
56
|
|
31
57
|
## Contributing
|
@@ -44,4 +70,4 @@ more soon
|
|
44
70
|
## To-do
|
45
71
|
|
46
72
|
- Refactor
|
47
|
-
-
|
73
|
+
- 100% coverage
|
data/compactor.gemspec
CHANGED
@@ -22,7 +22,7 @@ Gem::Specification.new do |gem|
|
|
22
22
|
|
23
23
|
if RUBY_PLATFORM == "java"
|
24
24
|
gem.add_runtime_dependency "jruby-openssl", '0.7.3'
|
25
|
-
gem.add_runtime_dependency "nokogiri", "1.5.0.
|
25
|
+
gem.add_runtime_dependency "nokogiri", ">= 1.5.0", "< 1.5.3"
|
26
26
|
end
|
27
27
|
|
28
28
|
gem.add_development_dependency "rake"
|
data/lib/compactor/extensions.rb
CHANGED
@@ -2,6 +2,16 @@ class Object
|
|
2
2
|
def blank?; respond_to?(:empty?) ? empty? : !self; end
|
3
3
|
end
|
4
4
|
|
5
|
+
class Date
|
6
|
+
def self.parse_to_us_format(date)
|
7
|
+
if date.is_a? String
|
8
|
+
date_format = date['-'] ? "%Y-%m-%d" : "%m/%d/%Y"
|
9
|
+
date = Date.strptime(date, date_format)
|
10
|
+
end
|
11
|
+
date.strftime("%m/%d/%y")
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
5
15
|
module Nokogiri
|
6
16
|
class MissingElement < ::StandardError; end
|
7
17
|
|
@@ -7,7 +7,7 @@ module Compactor
|
|
7
7
|
end
|
8
8
|
|
9
9
|
def can_download_report?
|
10
|
-
!report_buttons.
|
10
|
+
!report_buttons.blank?
|
11
11
|
end
|
12
12
|
|
13
13
|
def report_buttons
|
@@ -17,8 +17,10 @@ module Compactor
|
|
17
17
|
end
|
18
18
|
|
19
19
|
def download_report
|
20
|
-
|
21
|
-
|
20
|
+
buttons = report_buttons
|
21
|
+
button_index = index_of_button(buttons)
|
22
|
+
report_url = buttons[button_index].node["href"]
|
23
|
+
report_identifier = buttons[button_index].node.search(".button_label").text
|
22
24
|
type = ReportScraper.report_type(report_identifier)
|
23
25
|
response_body = @mechanize.get(report_url).body
|
24
26
|
|
@@ -43,18 +45,25 @@ module Compactor
|
|
43
45
|
})
|
44
46
|
end
|
45
47
|
|
46
|
-
|
47
|
-
|
48
|
-
|
48
|
+
# A settlement period (row) is considered ready to be parsed
|
49
|
+
# if it's not processing, open or in progress. Also the "regenerate"
|
50
|
+
# button is not present. This means that all is left is 1 or more
|
51
|
+
# buttons to get the actual reports
|
52
|
+
def requestable_report?
|
53
|
+
!last_cell.search(".regenerateButton").empty?
|
54
|
+
end
|
55
|
+
|
56
|
+
def not_settled_report?
|
57
|
+
text = last_div.text
|
49
58
|
|
50
|
-
|
51
|
-
|
52
|
-
|
59
|
+
# Is the report not settled yet? (in pending-like state)
|
60
|
+
["(Processing)", "(Open)", "In Progress"].any? do |report_state|
|
61
|
+
text.include?(report_state)
|
62
|
+
end
|
53
63
|
end
|
54
64
|
|
55
65
|
def deposit_amount
|
56
66
|
@deposit_amount = fetch_deposit_amount if !@deposit_amount
|
57
|
-
|
58
67
|
@deposit_amount
|
59
68
|
end
|
60
69
|
|
@@ -64,6 +73,10 @@ module Compactor
|
|
64
73
|
|
65
74
|
private
|
66
75
|
|
76
|
+
def last_div
|
77
|
+
last_cell.search("div")[-1]
|
78
|
+
end
|
79
|
+
|
67
80
|
def fetch_deposit_amount
|
68
81
|
deposit_cell = @node.search("td")[-2]
|
69
82
|
deposit_cell ? deposit_cell.text.gsub(/[^0-9\.]/, '').to_f : 0.0
|
@@ -76,6 +89,19 @@ module Compactor
|
|
76
89
|
def last_cell
|
77
90
|
@last_cell ||= @node.search("td")[-1]
|
78
91
|
end
|
92
|
+
|
93
|
+
def index_of_button(buttons)
|
94
|
+
raise MissingReportButtons if buttons.blank? # no buttons at all!
|
95
|
+
|
96
|
+
buttons.each_with_index do |button, index|
|
97
|
+
# XML is preferred
|
98
|
+
return index if button.node.search(".button_label").text == "Download XML"
|
99
|
+
end
|
100
|
+
|
101
|
+
# No XML, look for another type of report, use the first one, whatever
|
102
|
+
# the type
|
103
|
+
0
|
104
|
+
end
|
79
105
|
end
|
80
106
|
end
|
81
107
|
end
|
data/lib/compactor/scraper.rb
CHANGED
@@ -1,12 +1,16 @@
|
|
1
|
+
# encoding: utf-8 # make the regexes ruby 1.9 friendly
|
2
|
+
|
1
3
|
module Compactor
|
2
4
|
module Amazon
|
3
|
-
class AddressParseFailure
|
4
|
-
class AuthenticationError
|
5
|
-
class LockedAccountError
|
6
|
-
class MissingRow
|
7
|
-
class NoMarketplacesError
|
8
|
-
class NotProAccountError
|
9
|
-
class UnknownReportType
|
5
|
+
class AddressParseFailure < StandardError; end
|
6
|
+
class AuthenticationError < StandardError; end
|
7
|
+
class LockedAccountError < StandardError; end
|
8
|
+
class MissingRow < StandardError; end
|
9
|
+
class NoMarketplacesError < StandardError; end
|
10
|
+
class NotProAccountError < StandardError; end
|
11
|
+
class UnknownReportType < StandardError; end
|
12
|
+
class MissingXmlReport < StandardError; end
|
13
|
+
class MissingReportButtons < StandardError; end
|
10
14
|
|
11
15
|
ATTEMPTS_BEFORE_GIVING_UP = 15 # give up after 20 minutes
|
12
16
|
MARKETPLACE_HOMEPAGE = "https://sellercentral.amazon.com/gp/homepage.html"
|
@@ -50,7 +54,7 @@ module Compactor
|
|
50
54
|
return result
|
51
55
|
end
|
52
56
|
|
53
|
-
marketplace_name = @mechanize.page.search("#market_switch")
|
57
|
+
marketplace_name = @mechanize.page.search("#market_switch .merch-site-span")
|
54
58
|
if marketplace_name
|
55
59
|
return [ [ marketplace_name.text.strip, nil ] ]
|
56
60
|
end
|
@@ -66,15 +70,14 @@ module Compactor
|
|
66
70
|
def get_balance
|
67
71
|
go_to_past_settlements('', '')
|
68
72
|
return 0.0 if page_has_no_results?
|
69
|
-
open_row = report_rows.detect { |row|
|
70
|
-
|
71
|
-
open_row.deposit_amount
|
73
|
+
open_row = report_rows.detect { |row| row.not_settled_report? }
|
74
|
+
|
75
|
+
open_row.nil? ? 0.0 : open_row.deposit_amount
|
72
76
|
end
|
73
77
|
|
74
78
|
def reports(from, to)
|
75
79
|
from, to = parse_dates(from, to)
|
76
80
|
go_to_past_settlements(from, to)
|
77
|
-
|
78
81
|
get_reports
|
79
82
|
end
|
80
83
|
|
@@ -88,8 +91,10 @@ module Compactor
|
|
88
91
|
begin
|
89
92
|
tr = @mechanize.page.search!("//tr[@class='list-row']/td[@class='data-display-field'][text()=\"Contact Buyer:\"]").first.parent
|
90
93
|
td = tr.search!("td[2]")
|
94
|
+
|
91
95
|
order["BuyerName"] = td.text.strip
|
92
96
|
td = @mechanize.page.search!("//tr[@class='list-row']/td[@class='data-display-field']/strong[text()='Shipping Address:']").first.parent
|
97
|
+
|
93
98
|
addr_lines = td.children.map(&:text).reject { |l| l.blank? || l =~ /^Shipping Address/ }
|
94
99
|
order["ShippingAddress"] = parse_address_lines!(addr_lines)
|
95
100
|
rescue Exception => e
|
@@ -115,7 +120,7 @@ module Compactor
|
|
115
120
|
results << [ 'Amazon Seller Account', AMAZON_COM_MARKETPLACE_ID ] if name
|
116
121
|
|
117
122
|
name, marketplace_id = marketplaces.detect do |n, m_id|
|
118
|
-
|
123
|
+
(n == 'Your Checkout Website' || n == "Checkout by Amazon (Production View)") && !m_id.nil?
|
119
124
|
end
|
120
125
|
results << [ 'Checkout By Amazon', marketplace_id ] if name
|
121
126
|
|
@@ -174,10 +179,8 @@ module Compactor
|
|
174
179
|
reports[report_type] ||= []
|
175
180
|
reports[report_type] << report_streams
|
176
181
|
end
|
177
|
-
|
178
182
|
page_num += 1
|
179
183
|
end while pages_to_parse
|
180
|
-
|
181
184
|
reports.each { |type, streams| streams.flatten! }
|
182
185
|
end
|
183
186
|
|
@@ -272,13 +275,12 @@ module Compactor
|
|
272
275
|
def get_reports_in_page
|
273
276
|
reports_to_watch = []
|
274
277
|
reports = {}
|
275
|
-
|
276
278
|
return reports if page_has_no_results?
|
277
279
|
|
278
280
|
report_rows.each do |row|
|
279
281
|
if row.can_download_report?
|
280
282
|
add_to_collection(reports, row)
|
281
|
-
elsif row.
|
283
|
+
elsif row.requestable_report?
|
282
284
|
@mechanize.transact do
|
283
285
|
row.request_report
|
284
286
|
reports_to_watch << row
|
@@ -292,10 +294,7 @@ module Compactor
|
|
292
294
|
end
|
293
295
|
|
294
296
|
def parse_dates(from, to)
|
295
|
-
|
296
|
-
to = Date.parse(to.to_s).strftime("%m/%d/%y")
|
297
|
-
|
298
|
-
[from, to]
|
297
|
+
[ Date.parse_to_us_format(from.to_s), Date.parse_to_us_format(to.to_s) ]
|
299
298
|
end
|
300
299
|
|
301
300
|
def login_to_seller_central(email, password)
|
data/lib/compactor/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
module Compactor
|
2
|
-
VERSION = "0.
|
3
|
-
end
|
2
|
+
VERSION = "0.2.3"
|
3
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper'
|
2
|
+
|
3
|
+
class DateExtensionsTest < Test::Unit::TestCase
|
4
|
+
def test_parse_strings_separated_by_dashes
|
5
|
+
assert_equal "01/01/12", Date.parse_to_us_format("2012-1-1")
|
6
|
+
end
|
7
|
+
|
8
|
+
def test_parse_strings_separated_by_slashes
|
9
|
+
assert_equal "12/31/11", Date.parse_to_us_format("12/31/2011")
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_convert_dates_to_strings
|
13
|
+
assert_equal "01/01/12", Date.parse_to_us_format(Date.parse("2012-1-1"))
|
14
|
+
end
|
15
|
+
end
|
data/test/test_helper.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: compactor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 17
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 2
|
9
|
+
- 3
|
10
|
+
version: 0.2.3
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Julio Santos
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date:
|
18
|
+
date: 2013-01-12 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: mechanize
|
@@ -125,13 +125,13 @@ files:
|
|
125
125
|
- LICENSE
|
126
126
|
- README.md
|
127
127
|
- Rakefile
|
128
|
-
- compactor-0.1.2.gem
|
129
128
|
- compactor.gemspec
|
130
129
|
- lib/compactor.rb
|
131
130
|
- lib/compactor/extensions.rb
|
132
131
|
- lib/compactor/scraped_row.rb
|
133
132
|
- lib/compactor/scraper.rb
|
134
133
|
- lib/compactor/version.rb
|
134
|
+
- test/date_extensions_test.rb
|
135
135
|
- test/fixtures/vcr_cassettes/AmazonReportScraper/with_bad_login/raise_error.yml
|
136
136
|
- test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/multiple_pages.yml
|
137
137
|
- test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/no_reports.yml
|
@@ -185,6 +185,7 @@ signing_key:
|
|
185
185
|
specification_version: 3
|
186
186
|
summary: Scrape Amazon Seller Central
|
187
187
|
test_files:
|
188
|
+
- test/date_extensions_test.rb
|
188
189
|
- test/fixtures/vcr_cassettes/AmazonReportScraper/with_bad_login/raise_error.yml
|
189
190
|
- test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/multiple_pages.yml
|
190
191
|
- test/fixtures/vcr_cassettes/AmazonReportScraper/with_good_login/find_reports/no_reports.yml
|
data/compactor-0.1.2.gem
DELETED
Binary file
|