bazaar_sources 0.2.1.1.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +7 -0
- data/init.rb +1 -0
- data/lib/api_helpers/amazon.rb +606 -0
- data/lib/api_helpers/api_helper.rb +9 -0
- data/lib/api_helpers/external_url.rb +127 -0
- data/lib/api_helpers/httparty_nokogiri_parser.rb +14 -0
- data/lib/api_helpers/reseller_ratings_api.rb +174 -0
- data/lib/api_helpers/shopping.rb +224 -0
- data/lib/api_helpers/shopping_bulk_api.rb +514 -0
- data/lib/api_helpers/shopzilla_api.rb +230 -0
- data/lib/bazaar_sources.rb +35 -0
- data/lib/sources/amazon_source.rb +94 -0
- data/lib/sources/buy_source.rb +34 -0
- data/lib/sources/ebay_source.rb +35 -0
- data/lib/sources/epinions_source.rb +80 -0
- data/lib/sources/google_source.rb +119 -0
- data/lib/sources/price_grabber_source.rb +94 -0
- data/lib/sources/reseller_ratings_source.rb +47 -0
- data/lib/sources/shopping_source.rb +136 -0
- data/lib/sources/shopzilla_source.rb +108 -0
- data/lib/sources/simple_sources.yml +71 -0
- data/lib/sources/source.rb +242 -0
- metadata +137 -0
data/README.rdoc
ADDED
data/init.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'bazaar_sources'
|
@@ -0,0 +1,606 @@
|
|
1
|
+
require 'hpricot'
|
2
|
+
require 'open-uri'
|
3
|
+
require 'cgi'
|
4
|
+
require 'digest/sha2'
|
5
|
+
|
6
|
+
module Amazon
|
7
|
+
class AsinNotFoundError < StandardError
|
8
|
+
attr_reader :asin
|
9
|
+
def initialize(message, asin)
|
10
|
+
super(message)
|
11
|
+
@asin = asin
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
class AsinFatalError < StandardError
|
16
|
+
attr_reader :asin
|
17
|
+
def initialize(message, asin)
|
18
|
+
super(message)
|
19
|
+
@asin = asin
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
class ProductAdvertising
|
24
|
+
def associate_tag
|
25
|
+
AMAZON_ASSOCIATE_TAG
|
26
|
+
end
|
27
|
+
|
28
|
+
def at_a_glance_url(seller_id)
|
29
|
+
"http://www.amazon.com/gp/help/seller/at-a-glance.html?seller=#{seller_id}"
|
30
|
+
end
|
31
|
+
|
32
|
+
def offer_url(asin, merchant_type, merchant_id)
|
33
|
+
"http://www.amazon.com/exec/obidos/ASIN/#{asin}/?#{merchant_type == 'seller' ? 'seller' : 'm'}=#{merchant_id}&tag=#{associate_tag}"
|
34
|
+
end
|
35
|
+
|
36
|
+
def offer_listing_url(asin)
|
37
|
+
"http://www.amazon.com/gp/offer-listing/#{asin}?condition=new"
|
38
|
+
end
|
39
|
+
|
40
|
+
def accessories_url(asin)
|
41
|
+
accessories_url = "http://www.amazon.com/dp/accessories/#{asin}/#accessories"
|
42
|
+
"http://www.amazon.com/gp/redirect.html?ie=UTF8&tag=#{associate_tag}&linkCode=ur2&camp=1789&creative=9325&location=#{CGI::escape(accessories_url)}"
|
43
|
+
end
|
44
|
+
|
45
|
+
def find_offers_by_asin(asin, featured_merchants_only=false)
|
46
|
+
# find_offers_by_asin_via_api(asin, featured_merchants_only)
|
47
|
+
scrape_offer_listing_page_to_hash(asin, featured_merchants_only)
|
48
|
+
end
|
49
|
+
|
50
|
+
def find_product_review_info_by_asin_raw(asin)
|
51
|
+
request = {'Operation' => 'ItemLookup',
|
52
|
+
'ResponseGroup' => 'Reviews',
|
53
|
+
'ItemId' => asin.strip,
|
54
|
+
'IdType' => 'ASIN'}
|
55
|
+
make_amazon_api_request_raw request
|
56
|
+
end
|
57
|
+
|
58
|
+
def find_product_by_asin(asin)
|
59
|
+
request = {'Operation' => 'ItemLookup',
|
60
|
+
'ResponseGroup' => 'Medium',
|
61
|
+
'ItemId' => asin.strip,
|
62
|
+
'IdType' => 'ASIN'}
|
63
|
+
res = make_amazon_api_request request
|
64
|
+
|
65
|
+
item = res / 'Items' / 'Item'
|
66
|
+
asin = (item / 'ASIN').inner_html
|
67
|
+
item_attributes = item / 'ItemAttributes'
|
68
|
+
name = (item_attributes / 'Title').inner_html
|
69
|
+
list_price = (item_attributes / 'ListPrice' / 'Amount').inner_html
|
70
|
+
if list_price.nil? || list_price.empty?
|
71
|
+
list_price = 0
|
72
|
+
else
|
73
|
+
list_price = (list_price.to_f / 100.0)
|
74
|
+
end
|
75
|
+
model = (item_attributes / 'Model').inner_html
|
76
|
+
mpn = (item_attributes / 'MPN').inner_html
|
77
|
+
upc = (item_attributes / 'UPC').inner_html
|
78
|
+
manufacturer = (item_attributes / 'Manufacturer').inner_html
|
79
|
+
|
80
|
+
features = (item_attributes / 'Feature').collect{|x| x.inner_html } # specifications
|
81
|
+
|
82
|
+
editorial_reviews = (item / 'EditorialReviews').inject({}) {|ha, x| ha[(x / 'EditorialReview' / 'Source').inner_html] = (x / 'EditorialReview' / 'Content').inner_html; ha }
|
83
|
+
|
84
|
+
begin
|
85
|
+
small_image = {:url => (item.at('SmallImage') / 'URL').inner_html,
|
86
|
+
:width => (item.at('SmallImage') / 'Width').inner_html,
|
87
|
+
:height => (item.at('SmallImage') / 'Height').inner_html}
|
88
|
+
rescue
|
89
|
+
small_image = nil
|
90
|
+
end
|
91
|
+
begin
|
92
|
+
medium_image = {:url => (item.at('MediumImage') / 'URL').inner_html,
|
93
|
+
:width => (item.at('MediumImage') / 'Width').inner_html,
|
94
|
+
:height => (item.at('MediumImage') / 'Height').inner_html}
|
95
|
+
rescue
|
96
|
+
medium_image = nil
|
97
|
+
end
|
98
|
+
begin
|
99
|
+
large_image = {:url => (item.at('LargeImage') / 'URL').inner_html,
|
100
|
+
:width => (item.at('LargeImage') / 'Width').inner_html,
|
101
|
+
:height => (item.at('LargeImage') / 'Height').inner_html}
|
102
|
+
rescue
|
103
|
+
large_image = nil
|
104
|
+
end
|
105
|
+
|
106
|
+
product = {:asin => asin,
|
107
|
+
:name => name,
|
108
|
+
:list_price => list_price,
|
109
|
+
:model => model,
|
110
|
+
:mpn => mpn,
|
111
|
+
:upc => upc,
|
112
|
+
:manufacturer => manufacturer,
|
113
|
+
:small_image => small_image,
|
114
|
+
:medium_image => medium_image,
|
115
|
+
:large_image => large_image,
|
116
|
+
:features => features,
|
117
|
+
:editorial_reviews => editorial_reviews}
|
118
|
+
product
|
119
|
+
end
|
120
|
+
|
121
|
+
def item_search(search_terms)
|
122
|
+
request = {'Operation' => 'ItemSearch',
|
123
|
+
'Keywords' => search_terms,
|
124
|
+
'SearchIndex' => 'All',
|
125
|
+
'ResponseGroup' => 'Images,ItemAttributes'}
|
126
|
+
res = make_amazon_api_request request
|
127
|
+
products = []
|
128
|
+
items = (res / 'Items' / 'Item')
|
129
|
+
items.each do |item|
|
130
|
+
begin
|
131
|
+
small_image = item.at('SmallImage')
|
132
|
+
if !small_image.nil?
|
133
|
+
small_image_url = (small_image / 'URL').inner_html
|
134
|
+
else
|
135
|
+
small_image_url = ''
|
136
|
+
end
|
137
|
+
products << {
|
138
|
+
:asin => (item / 'ASIN').inner_html,
|
139
|
+
:name => (item / 'ItemAttributes' / 'Title').inner_html,
|
140
|
+
:small_image_url => small_image_url
|
141
|
+
}
|
142
|
+
rescue
|
143
|
+
end
|
144
|
+
end
|
145
|
+
products
|
146
|
+
end
|
147
|
+
|
148
|
+
def seller_lookup(seller_id)
|
149
|
+
request = { 'Operation' => 'SellerLookup',
|
150
|
+
'SellerId' => seller_id }
|
151
|
+
res = make_amazon_api_request request
|
152
|
+
|
153
|
+
element = res.at('/SellerLookupResponse/Sellers/Seller/SellerName')
|
154
|
+
if element.nil?
|
155
|
+
element = res.at('/SellerLookupResponse/Sellers/Seller/Nickname')
|
156
|
+
end
|
157
|
+
if !element.nil?
|
158
|
+
merchant_name = element.inner_text
|
159
|
+
end
|
160
|
+
begin
|
161
|
+
details = scrape_at_a_glance_page(seller_id)
|
162
|
+
logo_url = details[:logo_url]
|
163
|
+
merchant_name = details[:merchant_name] if merchant_name.nil? || merchant_name.empty?
|
164
|
+
homepage = details[:homepage]
|
165
|
+
rescue
|
166
|
+
end
|
167
|
+
|
168
|
+
if merchant_name.nil? || merchant_name.empty?
|
169
|
+
merchant_name = "Amazon merchant (#{seller_id})"
|
170
|
+
end
|
171
|
+
|
172
|
+
element = res.at('/SellerLookupResponse/Sellers/Seller/GlancePage')
|
173
|
+
glance_page_url = element.inner_text unless element.nil?
|
174
|
+
|
175
|
+
element = res.at('/SellerLookupResponse/Sellers/Seller/AverageFeedbackRating')
|
176
|
+
average_feedback_rating = element.nil? ? 0.0 : element.inner_text.to_f
|
177
|
+
|
178
|
+
element = res.at('/SellerLookupResponse/Sellers/Seller/TotalFeedback')
|
179
|
+
total_feedback = element.nil? ? 0 : element.inner_text.to_i
|
180
|
+
|
181
|
+
{ :seller_id => seller_id,
|
182
|
+
:merchant_name => merchant_name,
|
183
|
+
:glance_page_url => glance_page_url,
|
184
|
+
:average_feedback_rating => average_feedback_rating,
|
185
|
+
:total_feedback => total_feedback,
|
186
|
+
:logo_url => logo_url,
|
187
|
+
:homepage => homepage }
|
188
|
+
end
|
189
|
+
|
190
|
+
private
|
191
|
+
|
192
|
+
def find_offers_by_asin_via_api(asin, featured_merchants_only=false)
|
193
|
+
asin.strip!
|
194
|
+
request = {'Operation' => 'ItemLookup',
|
195
|
+
'ResponseGroup' => 'Large,OfferFull',
|
196
|
+
'ItemId' => asin,
|
197
|
+
'IdType' => 'ASIN',
|
198
|
+
'MerchantId' => featured_merchants_only ? 'Featured' : 'All',
|
199
|
+
'Condition' => 'New',
|
200
|
+
'OfferPage' => 1}
|
201
|
+
req = make_amazon_api_request request
|
202
|
+
offers = {}
|
203
|
+
|
204
|
+
total_offer_pages = (req / 'Items' / 'Offers' / 'TotalOfferPages').inner_html.to_i
|
205
|
+
|
206
|
+
#enumerate through all the offer pages
|
207
|
+
1.upto(total_offer_pages) do |page|
|
208
|
+
# move on to the next page if necessary
|
209
|
+
# (this helps avoid a repeat request)
|
210
|
+
if page != 1
|
211
|
+
request['OfferPage']+=1
|
212
|
+
req = make_amazon_api_request request
|
213
|
+
end
|
214
|
+
|
215
|
+
#loop through all the offers
|
216
|
+
(req / 'Items' / 'Offers' / 'Offer' ).each do |offer|
|
217
|
+
# find either ther seller id or the merchant id
|
218
|
+
|
219
|
+
id = (offer / 'Merchant' / 'MerchantId').inner_html
|
220
|
+
if id.nil? || id.empty?
|
221
|
+
id = (offer / 'Seller' / 'SellerId').inner_html
|
222
|
+
name = (offer / 'Seller' / 'Nickname').inner_html
|
223
|
+
type = 'seller'
|
224
|
+
else
|
225
|
+
name = (offer / 'Merchant' / 'Name').inner_html
|
226
|
+
type = 'merchant'
|
227
|
+
end
|
228
|
+
|
229
|
+
if (offer / 'OfferListing' / 'SalePrice').size > 0 # sometimes we get a SalePrice
|
230
|
+
unformatted_price = (offer / 'OfferListing' / 'SalePrice' / 'Amount').inner_html
|
231
|
+
formatted_price = (offer / 'OfferListing' / 'SalePrice' / 'FormattedPrice').inner_html
|
232
|
+
else # most of the time we just get Price
|
233
|
+
unformatted_price = (offer / 'OfferListing' / 'Price' / 'Amount').inner_html
|
234
|
+
formatted_price = (offer / 'OfferListing' / 'Price' / 'FormattedPrice').inner_html
|
235
|
+
end
|
236
|
+
added_to_cart = false
|
237
|
+
if formatted_price == 'Too low to display'
|
238
|
+
offer_listing_id = (offer / 'OfferListing' / 'OfferListingId').inner_html
|
239
|
+
unformatted_price, formatted_price = reveal_too_low_to_display_price_from_offer_listing_id(offer_listing_id)
|
240
|
+
added_to_cart = true
|
241
|
+
end
|
242
|
+
|
243
|
+
if (offer / 'OfferListing' / 'Quantity')
|
244
|
+
quantity = (offer / 'OfferListing' / 'Quantity').inner_html.to_i
|
245
|
+
end
|
246
|
+
|
247
|
+
if !unformatted_price.nil? && !unformatted_price.empty?
|
248
|
+
price = unformatted_price.to_i * 0.01 # convert 21995 to 219.95
|
249
|
+
elsif !formatted_price.nil? && !formatted_price.empty? # sometimes we only get a formatted price and no amount
|
250
|
+
price = formatted_price.gsub(/[$,]/,'').to_f
|
251
|
+
else
|
252
|
+
price = 0.0 # should never get here.
|
253
|
+
end
|
254
|
+
|
255
|
+
offer_listing_id = (offer / 'OfferListing' / 'OfferListingId').inner_html
|
256
|
+
total_feedback = (offer / 'Merchant' / 'TotalFeedback')
|
257
|
+
|
258
|
+
if quantity.nil? || quantity > 0
|
259
|
+
url = offer_url(asin, type, id)
|
260
|
+
# do we already have it in the offers hash?
|
261
|
+
# if so, we only want a lower price to override the entry.
|
262
|
+
if !offers[id] || offers[id][:price] > price
|
263
|
+
#add it to the offers hash
|
264
|
+
offers[id] = { :merchant_code => id,
|
265
|
+
:merchant_name => CGI::unescapeHTML(name),
|
266
|
+
:merchant_logo_url => nil,
|
267
|
+
:cpc => nil,
|
268
|
+
:price => BigDecimal(price.to_s),
|
269
|
+
:shipping => nil,
|
270
|
+
:offer_url => url,
|
271
|
+
:offer_tier => type == 'seller' ? 2 : 1,
|
272
|
+
:merchant_type => type }
|
273
|
+
end
|
274
|
+
end
|
275
|
+
end
|
276
|
+
end
|
277
|
+
offers
|
278
|
+
end
|
279
|
+
|
280
|
+
def scrape_offer_listing_page_to_hash(asin, featured_merchants_only=false)
|
281
|
+
offers_hash = {}
|
282
|
+
offers = scrape_offer_listing_page(asin, featured_merchants_only)
|
283
|
+
offers.each do |offer|
|
284
|
+
offers_hash[offer[:merchant_code]] = offer
|
285
|
+
end
|
286
|
+
offers_hash
|
287
|
+
end
|
288
|
+
|
289
|
+
def scrape_offer_listing_page(asin, featured_merchants_only=false)
|
290
|
+
begin
|
291
|
+
url = offer_listing_url(asin)
|
292
|
+
doc = scrape_page(url, Source.amazon_source.offer_ttl_seconds / 2, 'offer-listing')
|
293
|
+
rescue Net::HTTPServerException => ex
|
294
|
+
if ex.message =~ /^404/
|
295
|
+
raise Amazon::AsinNotFoundError.new(ex.message, asin)
|
296
|
+
else
|
297
|
+
raise ex
|
298
|
+
end
|
299
|
+
rescue Net::HTTPFatalError => ex
|
300
|
+
raise Amazon::AsinFatalError.new(ex.message, asin)
|
301
|
+
end
|
302
|
+
offers = []
|
303
|
+
offers_box_element = doc.at('div.resultsset')
|
304
|
+
offer_type_header_tables = offers_box_element.search('table')
|
305
|
+
offer_type_header_tables.each do |offer_type_header_table|
|
306
|
+
inner_text = offer_type_header_table.inner_text
|
307
|
+
if inner_text.include?('Featured Merchants')
|
308
|
+
featured_offer_rows = offer_type_header_table.search('tbody.result/tr')
|
309
|
+
offers += parse_offer_listing_rows(asin, featured_offer_rows, true)
|
310
|
+
elsif !featured_merchants_only && inner_text.include?('New')
|
311
|
+
other_offer_rows = offer_type_header_table.search('tbody.result/tr')
|
312
|
+
offers += parse_offer_listing_rows(asin, other_offer_rows, false, offers.length)
|
313
|
+
end
|
314
|
+
end
|
315
|
+
|
316
|
+
# offers.each_with_index do |offer, i|
|
317
|
+
# puts "#{i+1}. --------------------------------------------------------------------"
|
318
|
+
# puts "Merchant: #{offer[:name]} (#{offer[:merchant_id]})#{' FEATURED' if offer[:featured_merchant]}"
|
319
|
+
# puts "Merchant logo URL: #{offer[:merchant_logo_url]}" unless offer[:merchant_logo_url].nil?
|
320
|
+
# puts "Price/Shipping: #{offer[:price]}/#{offer[:shipping]}"
|
321
|
+
# puts "Offer ID: #{offer[:offer_id]}"
|
322
|
+
# puts "Offer URL: #{offer[:offer_url]}"
|
323
|
+
# puts "Merchant type: #{offer[:merchant_type]}"
|
324
|
+
# puts "Had to add to cart to get price." if offer[:added_to_cart]
|
325
|
+
# if offer[:merchant_id].nil? || offer[:name].nil? ||
|
326
|
+
# offer[:price].nil? || offer[:shipping].nil? ||
|
327
|
+
# offer[:offer_id].nil? || offer[:offer_url].nil?
|
328
|
+
# puts "!!!! One or more fields not parsed correctly !!!!"
|
329
|
+
# end
|
330
|
+
# puts '-----------------------------------------------------------------------'
|
331
|
+
# end
|
332
|
+
offers
|
333
|
+
end
|
334
|
+
|
335
|
+
def parse_offer_listing_rows(asin, offer_listing_rows, featured_merchants, offer_index_offset=0)
|
336
|
+
offers = []
|
337
|
+
offer_listing_rows.each_with_index do |row, offer_index|
|
338
|
+
# Offer Listing ID
|
339
|
+
offer_listing_tag = row.at("td.readytobuy/form/input[@name *= 'offering-id.']")
|
340
|
+
unless offer_listing_tag.nil?
|
341
|
+
offer_listing_id = offer_listing_tag.attributes['name'].sub('offering-id.', '')
|
342
|
+
end
|
343
|
+
|
344
|
+
# Price
|
345
|
+
added_to_cart = false
|
346
|
+
price_element = row.at("span.price")
|
347
|
+
unless price_element.nil?
|
348
|
+
price = price_to_f(price_element.inner_text)
|
349
|
+
end
|
350
|
+
add_to_cart_span = row.at("td/span[text() *= 'Add to cart to see price.']")
|
351
|
+
add_to_cart_span = row.at("td/span[text() *= 'Price not displayed.']") if add_to_cart_span.nil?
|
352
|
+
if add_to_cart_span && !offer_listing_id.nil? && !offer_listing_id.empty?
|
353
|
+
price = price_to_f(reveal_too_low_to_display_price_from_offer_listing_id(offer_listing_id).second)
|
354
|
+
added_to_cart = true
|
355
|
+
end
|
356
|
+
if price.nil?
|
357
|
+
puts "Failed to find offer price while scraping the offer listing page; ASIN: #{asin}. Skipping."
|
358
|
+
next
|
359
|
+
end
|
360
|
+
|
361
|
+
# Shipping
|
362
|
+
shipping_element = row.at("div.shipping_block/span.price_shipping")
|
363
|
+
if shipping_element.nil?
|
364
|
+
super_saver_element = row.at("span.supersaver")
|
365
|
+
shipping = 0.0 unless super_saver_element.nil?
|
366
|
+
else
|
367
|
+
shipping = price_to_f(shipping_element.inner_text)
|
368
|
+
end
|
369
|
+
|
370
|
+
seller_info = row.at("td[/ul.sellerInformation]")
|
371
|
+
unless seller_info.nil?
|
372
|
+
# Seller ID, merchant rating, and num merchant reviews
|
373
|
+
seller_id = nil
|
374
|
+
merchant_rating = nil
|
375
|
+
num_merchant_reviews = nil
|
376
|
+
rating_block = seller_info.at("div.rating")
|
377
|
+
unless rating_block.nil?
|
378
|
+
rating_text = rating_block.inner_text
|
379
|
+
if rating_text =~ /\((\d+) ratings\)/
|
380
|
+
num_merchant_reviews = $1.to_i
|
381
|
+
end
|
382
|
+
end
|
383
|
+
rating_link = seller_info.at("div.rating/a")
|
384
|
+
unless rating_link.nil?
|
385
|
+
seller_id = rating_link.attributes['href'].match(/seller=([^&#]+)/)[1]
|
386
|
+
end
|
387
|
+
rating_img = seller_info.at("div.rating/img")
|
388
|
+
unless rating_img.nil?
|
389
|
+
merchant_rating = (rating_img.attributes['src'].match(/stars\-([\d\-]+)/)[1].sub(/\-/,'.').to_f*20).to_i
|
390
|
+
end
|
391
|
+
|
392
|
+
if seller_id.nil?
|
393
|
+
shipping_rates_link = seller_info.at("div.availability/a[text() *= 'shipping rates']")
|
394
|
+
unless shipping_rates_link.nil?
|
395
|
+
if shipping_rates_link.attributes['href'].match(/seller=([^&#]+)/)
|
396
|
+
seller_id = $1
|
397
|
+
end
|
398
|
+
end
|
399
|
+
end
|
400
|
+
if seller_id.nil?
|
401
|
+
seller_profile_link = seller_info.at("div.rating//a[text() = 'Seller Profile']")
|
402
|
+
unless seller_profile_link.nil?
|
403
|
+
if seller_profile_link.attributes['href'].match(/seller=([^&#]+)/)
|
404
|
+
seller_id = $1
|
405
|
+
end
|
406
|
+
end
|
407
|
+
end
|
408
|
+
if seller_id.nil?
|
409
|
+
puts "Failed to find seller_id while scraping the offer listing page; ASIN: #{asin}, seller info: #{seller_info.inner_html}"
|
410
|
+
next
|
411
|
+
end
|
412
|
+
|
413
|
+
# Seller's Name & logo URL
|
414
|
+
merchant_type = 'merchant'
|
415
|
+
seller_label_link = seller_info.at('div.seller/a')
|
416
|
+
if seller_label_link.nil?
|
417
|
+
seller_logo_img = seller_info.at('a/img')
|
418
|
+
seller_logo_img = seller_info.at('img') if seller_logo_img.nil?
|
419
|
+
unless seller_logo_img.nil?
|
420
|
+
name = safe_strip(seller_logo_img.attributes['alt'])
|
421
|
+
logo_url = seller_logo_img.attributes['src']
|
422
|
+
end
|
423
|
+
else
|
424
|
+
name = safe_strip(seller_label_link.inner_text)
|
425
|
+
merchant_type = 'seller'
|
426
|
+
end
|
427
|
+
|
428
|
+
# Availability
|
429
|
+
in_stock = true
|
430
|
+
availability_element = seller_info.at("div.availability")
|
431
|
+
unless availability_element.nil?
|
432
|
+
availability_info = availability_element.inner_text
|
433
|
+
if availability_info.match(/out of stock/i)
|
434
|
+
in_stock = false
|
435
|
+
elsif availability_info.match(/Usually ships within .+ days/i)
|
436
|
+
in_stock = true
|
437
|
+
elsif availability_info.match(/Usually ships within .+ months/i)
|
438
|
+
in_stock = false
|
439
|
+
elsif availability_info.match(/In Stock/i)
|
440
|
+
in_stock = true
|
441
|
+
end
|
442
|
+
end
|
443
|
+
end
|
444
|
+
|
445
|
+
if in_stock
|
446
|
+
# Offer URL
|
447
|
+
offer_url = offer_url(asin, merchant_type, seller_id)
|
448
|
+
|
449
|
+
offers << { :original_index => offer_index + offer_index_offset,
|
450
|
+
:merchant_code => seller_id,
|
451
|
+
:merchant_name => CGI::unescapeHTML(name),
|
452
|
+
:merchant_logo_url => logo_url,
|
453
|
+
:cpc => Source.amazon_source.cpc,
|
454
|
+
:price => price.nil? ? nil : BigDecimal(price.to_s),
|
455
|
+
:shipping => shipping.nil? ? nil : BigDecimal(shipping.to_s),
|
456
|
+
:offer_url => offer_url,
|
457
|
+
:offer_tier => featured_merchants ? 1 : 3,
|
458
|
+
:merchant_rating => merchant_rating,
|
459
|
+
:num_merchant_reviews => num_merchant_reviews,
|
460
|
+
:merchant_type => merchant_type }
|
461
|
+
end
|
462
|
+
end
|
463
|
+
offers
|
464
|
+
end
|
465
|
+
|
466
|
+
# reveal a too low to display price by adding it to the cart
|
467
|
+
# returns the amount (in pennies) and the formatted price
|
468
|
+
def reveal_too_low_to_display_price_from_offer_listing_id(offer_listing_id)
|
469
|
+
request = {'Operation' => 'CartCreate',
|
470
|
+
'AssociateTag' => AMAZON_ASSOCIATE_TAG,
|
471
|
+
'Item.1.OfferListingId' => offer_listing_id,
|
472
|
+
'Item.1.Quantity' => 1}
|
473
|
+
req = make_amazon_api_request request
|
474
|
+
formatted_price = (req / 'Cart' / 'CartItems' / 'SubTotal' / 'FormattedPrice').inner_html
|
475
|
+
unformatted_price = (req / 'Cart' / 'CartItems' / 'SubTotal' / 'Amount').inner_html
|
476
|
+
[unformatted_price, formatted_price]
|
477
|
+
end
|
478
|
+
|
479
|
+
def scrape_at_a_glance_page(seller_id)
|
480
|
+
url = at_a_glance_url(seller_id)
|
481
|
+
doc = scrape_page(url, 10.minutes, 'seller')
|
482
|
+
merchant_description_box_element = doc.at('//table//tr//td//h1[@class = "sans"]/strong/../..')
|
483
|
+
|
484
|
+
unless merchant_description_box_element.nil?
|
485
|
+
element = merchant_description_box_element.at('//h1/strong')
|
486
|
+
merchant_name = element.inner_text.strip unless element.nil?
|
487
|
+
|
488
|
+
element = merchant_description_box_element.at('//img')
|
489
|
+
merchant_logo_url = element.attributes['src'] unless element.nil?
|
490
|
+
end
|
491
|
+
|
492
|
+
homepage_link = doc.at('//tr[@class = "tiny"]/td/a[@target = "_blank" and @href = text()]')
|
493
|
+
homepage = homepage_link.inner_text unless homepage_link.nil?
|
494
|
+
|
495
|
+
{ :merchant_name => merchant_name,
|
496
|
+
:logo_url => merchant_logo_url,
|
497
|
+
:homepage => homepage }
|
498
|
+
end
|
499
|
+
|
500
|
+
def scrape_page(url, cache_ttl, context_name=nil)
|
501
|
+
# shoot off the request
|
502
|
+
body = do_api_request(url)
|
503
|
+
Hpricot(body)
|
504
|
+
end
|
505
|
+
|
506
|
+
def cache
|
507
|
+
@cache ||= (eval('CACHE') rescue nil)
|
508
|
+
end
|
509
|
+
|
510
|
+
# make any API request given a hash of querystring parameters
|
511
|
+
def make_amazon_api_request(user_params)
|
512
|
+
result = make_amazon_api_request_raw(user_params)
|
513
|
+
result ? Hpricot.XML(result) : nil
|
514
|
+
end
|
515
|
+
|
516
|
+
# make API request, but don't process through Hpricot so called can
|
517
|
+
# process (with, say, Nokogiri)
|
518
|
+
def make_amazon_api_request_raw(user_params)
|
519
|
+
params = {'Service' => 'AWSECommerceService',
|
520
|
+
'Version' => '2007-07-16',
|
521
|
+
'AWSAccessKeyId' => AMAZON_ACCESS_KEY_ID}
|
522
|
+
params = params.merge(user_params) # merge in the user params
|
523
|
+
|
524
|
+
# because params is a hash, its order isn't defined.. so we sort it.
|
525
|
+
# this converts it to an array, but that's okay.
|
526
|
+
sorted_params_arr = params.sort{|a,b| a[0]<=>b[0]}
|
527
|
+
# build the query string
|
528
|
+
query_string = sorted_params_arr.collect{|x| "#{x[0]}=#{CGI::escape(CGI::unescape(x[1].to_s))}"}.join('&')
|
529
|
+
|
530
|
+
# do we already have a cached version of this API call?
|
531
|
+
key = "amazon-api-#{Digest::MD5.hexdigest(query_string)}-v2"
|
532
|
+
result = cache ? cache.get(key) : nil
|
533
|
+
if !result # nope.. gotta get a new one.
|
534
|
+
url = sign_url('ecs.amazonaws.com', '/onca/xml', params)
|
535
|
+
# shoot off the request
|
536
|
+
result = do_api_request(url)
|
537
|
+
cache.set(key, result, Source.amazon_source.offer_ttl_seconds) if cache # 1 hour
|
538
|
+
end
|
539
|
+
result
|
540
|
+
end
|
541
|
+
|
542
|
+
# create the Net::HTTP object to actually do the request
|
543
|
+
def do_api_request(url, retry_num=0, max_retries=10)
|
544
|
+
if retry_num >= max_retries
|
545
|
+
raise StandardError, "Failed to get Amazon URL with after #{max_retries} tries for url: #{url.inspect}"
|
546
|
+
end
|
547
|
+
|
548
|
+
#puts "Amazon API request URL: #{url}"
|
549
|
+
req_url = URI.safe_parse(url)
|
550
|
+
http = Net::HTTP.new(req_url.host, 80)
|
551
|
+
http.read_timeout=5 # 5 second timeout
|
552
|
+
resp = nil
|
553
|
+
begin
|
554
|
+
http.start do |web|
|
555
|
+
resp = web.get("#{req_url.path}?#{req_url.query}")
|
556
|
+
end
|
557
|
+
rescue Timeout::Error
|
558
|
+
# timed out, try again.
|
559
|
+
retry_num += 1
|
560
|
+
do_api_request(url, retry_num, max_retries)
|
561
|
+
end
|
562
|
+
|
563
|
+
case resp
|
564
|
+
when Net::HTTPSuccess
|
565
|
+
resp.body
|
566
|
+
when Net::HTTPRedirection
|
567
|
+
redirect_url = resp['location']
|
568
|
+
retry_num += 1
|
569
|
+
do_api_request(redirect_url, retry_num, max_retries)
|
570
|
+
when Net::HTTPServiceUnavailable
|
571
|
+
puts "GOT Net::HTTPServiceUnavailable FROM AMAZON; SLEEPING AND TRYING IN TWO SECONDS. RETRY NUM #{retry_num}."
|
572
|
+
sleep(2)
|
573
|
+
retry_num += 1
|
574
|
+
do_api_request(url, retry_num, max_retries)
|
575
|
+
when Net::HTTPClientError, Net::HTTPServerError
|
576
|
+
puts "GOT #{resp.class.name} FROM AMAZON."
|
577
|
+
resp.error!
|
578
|
+
else
|
579
|
+
raise StandardError, "Failed to get Amazon URL with unknown error: #{resp.inspect} For url: #{url.inspect}"
|
580
|
+
end
|
581
|
+
end
|
582
|
+
|
583
|
+
def safe_strip(value)
|
584
|
+
value.nil? ? nil : value.strip
|
585
|
+
end
|
586
|
+
|
587
|
+
def price_to_f(value)
|
588
|
+
return nil if value.nil? || value.empty?
|
589
|
+
value.gsub(/[^\d\.]/, '').match(/(\d*\.?\d+)/)[1].to_f rescue nil
|
590
|
+
end
|
591
|
+
|
592
|
+
def sign_url(host, path, params)
|
593
|
+
timestamp = CGI::escape(Time.now.utc.strftime('%Y-%m-%dT%H:%M:%SZ'))
|
594
|
+
params['Timestamp'] = timestamp
|
595
|
+
params_string = params.sort{|a,b| a[0]<=>b[0]}.collect{|x| "#{x[0]}=#{CGI::escape(CGI::unescape(x[1].to_s))}"}.join('&')
|
596
|
+
params_string.gsub!('+', '%20')
|
597
|
+
|
598
|
+
query = "GET\n#{host}\n#{path}\n#{params_string}"
|
599
|
+
|
600
|
+
hmac = Digest::HMAC.new(AMAZON_SECRET_ACCESS_KEY, Digest::SHA256).digest(query)
|
601
|
+
base64_hmac = Base64.encode64(hmac).chomp
|
602
|
+
signature = CGI::escape(base64_hmac)
|
603
|
+
"http://#{host}#{path}?#{params_string}&Signature=#{signature}"
|
604
|
+
end
|
605
|
+
end
|
606
|
+
end
|