bazaar_sources 0.2.1.1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc ADDED
@@ -0,0 +1,7 @@
1
+ = Bazaar Sources
2
+
3
+ * http://gems.digitaladvisor.com
4
+
5
+ == DESCRIPTION:
6
+
7
+ Bazaar Sources is...
data/init.rb ADDED
@@ -0,0 +1 @@
1
+ require 'bazaar_sources'
@@ -0,0 +1,606 @@
1
+ require 'hpricot'
2
+ require 'open-uri'
3
+ require 'cgi'
4
+ require 'digest/sha2'
5
+
6
+ module Amazon
7
+ class AsinNotFoundError < StandardError
8
+ attr_reader :asin
9
+ def initialize(message, asin)
10
+ super(message)
11
+ @asin = asin
12
+ end
13
+ end
14
+
15
+ class AsinFatalError < StandardError
16
+ attr_reader :asin
17
+ def initialize(message, asin)
18
+ super(message)
19
+ @asin = asin
20
+ end
21
+ end
22
+
23
+ class ProductAdvertising
24
+ def associate_tag
25
+ AMAZON_ASSOCIATE_TAG
26
+ end
27
+
28
+ def at_a_glance_url(seller_id)
29
+ "http://www.amazon.com/gp/help/seller/at-a-glance.html?seller=#{seller_id}"
30
+ end
31
+
32
+ def offer_url(asin, merchant_type, merchant_id)
33
+ "http://www.amazon.com/exec/obidos/ASIN/#{asin}/?#{merchant_type == 'seller' ? 'seller' : 'm'}=#{merchant_id}&tag=#{associate_tag}"
34
+ end
35
+
36
+ def offer_listing_url(asin)
37
+ "http://www.amazon.com/gp/offer-listing/#{asin}?condition=new"
38
+ end
39
+
40
+ def accessories_url(asin)
41
+ accessories_url = "http://www.amazon.com/dp/accessories/#{asin}/#accessories"
42
+ "http://www.amazon.com/gp/redirect.html?ie=UTF8&tag=#{associate_tag}&linkCode=ur2&camp=1789&creative=9325&location=#{CGI::escape(accessories_url)}"
43
+ end
44
+
45
+ def find_offers_by_asin(asin, featured_merchants_only=false)
46
+ # find_offers_by_asin_via_api(asin, featured_merchants_only)
47
+ scrape_offer_listing_page_to_hash(asin, featured_merchants_only)
48
+ end
49
+
50
+ def find_product_review_info_by_asin_raw(asin)
51
+ request = {'Operation' => 'ItemLookup',
52
+ 'ResponseGroup' => 'Reviews',
53
+ 'ItemId' => asin.strip,
54
+ 'IdType' => 'ASIN'}
55
+ make_amazon_api_request_raw request
56
+ end
57
+
58
+ def find_product_by_asin(asin)
59
+ request = {'Operation' => 'ItemLookup',
60
+ 'ResponseGroup' => 'Medium',
61
+ 'ItemId' => asin.strip,
62
+ 'IdType' => 'ASIN'}
63
+ res = make_amazon_api_request request
64
+
65
+ item = res / 'Items' / 'Item'
66
+ asin = (item / 'ASIN').inner_html
67
+ item_attributes = item / 'ItemAttributes'
68
+ name = (item_attributes / 'Title').inner_html
69
+ list_price = (item_attributes / 'ListPrice' / 'Amount').inner_html
70
+ if list_price.nil? || list_price.empty?
71
+ list_price = 0
72
+ else
73
+ list_price = (list_price.to_f / 100.0)
74
+ end
75
+ model = (item_attributes / 'Model').inner_html
76
+ mpn = (item_attributes / 'MPN').inner_html
77
+ upc = (item_attributes / 'UPC').inner_html
78
+ manufacturer = (item_attributes / 'Manufacturer').inner_html
79
+
80
+ features = (item_attributes / 'Feature').collect{|x| x.inner_html } # specifications
81
+
82
+ editorial_reviews = (item / 'EditorialReviews').inject({}) {|ha, x| ha[(x / 'EditorialReview' / 'Source').inner_html] = (x / 'EditorialReview' / 'Content').inner_html; ha }
83
+
84
+ begin
85
+ small_image = {:url => (item.at('SmallImage') / 'URL').inner_html,
86
+ :width => (item.at('SmallImage') / 'Width').inner_html,
87
+ :height => (item.at('SmallImage') / 'Height').inner_html}
88
+ rescue
89
+ small_image = nil
90
+ end
91
+ begin
92
+ medium_image = {:url => (item.at('MediumImage') / 'URL').inner_html,
93
+ :width => (item.at('MediumImage') / 'Width').inner_html,
94
+ :height => (item.at('MediumImage') / 'Height').inner_html}
95
+ rescue
96
+ medium_image = nil
97
+ end
98
+ begin
99
+ large_image = {:url => (item.at('LargeImage') / 'URL').inner_html,
100
+ :width => (item.at('LargeImage') / 'Width').inner_html,
101
+ :height => (item.at('LargeImage') / 'Height').inner_html}
102
+ rescue
103
+ large_image = nil
104
+ end
105
+
106
+ product = {:asin => asin,
107
+ :name => name,
108
+ :list_price => list_price,
109
+ :model => model,
110
+ :mpn => mpn,
111
+ :upc => upc,
112
+ :manufacturer => manufacturer,
113
+ :small_image => small_image,
114
+ :medium_image => medium_image,
115
+ :large_image => large_image,
116
+ :features => features,
117
+ :editorial_reviews => editorial_reviews}
118
+ product
119
+ end
120
+
121
+ def item_search(search_terms)
122
+ request = {'Operation' => 'ItemSearch',
123
+ 'Keywords' => search_terms,
124
+ 'SearchIndex' => 'All',
125
+ 'ResponseGroup' => 'Images,ItemAttributes'}
126
+ res = make_amazon_api_request request
127
+ products = []
128
+ items = (res / 'Items' / 'Item')
129
+ items.each do |item|
130
+ begin
131
+ small_image = item.at('SmallImage')
132
+ if !small_image.nil?
133
+ small_image_url = (small_image / 'URL').inner_html
134
+ else
135
+ small_image_url = ''
136
+ end
137
+ products << {
138
+ :asin => (item / 'ASIN').inner_html,
139
+ :name => (item / 'ItemAttributes' / 'Title').inner_html,
140
+ :small_image_url => small_image_url
141
+ }
142
+ rescue
143
+ end
144
+ end
145
+ products
146
+ end
147
+
148
+ def seller_lookup(seller_id)
149
+ request = { 'Operation' => 'SellerLookup',
150
+ 'SellerId' => seller_id }
151
+ res = make_amazon_api_request request
152
+
153
+ element = res.at('/SellerLookupResponse/Sellers/Seller/SellerName')
154
+ if element.nil?
155
+ element = res.at('/SellerLookupResponse/Sellers/Seller/Nickname')
156
+ end
157
+ if !element.nil?
158
+ merchant_name = element.inner_text
159
+ end
160
+ begin
161
+ details = scrape_at_a_glance_page(seller_id)
162
+ logo_url = details[:logo_url]
163
+ merchant_name = details[:merchant_name] if merchant_name.nil? || merchant_name.empty?
164
+ homepage = details[:homepage]
165
+ rescue
166
+ end
167
+
168
+ if merchant_name.nil? || merchant_name.empty?
169
+ merchant_name = "Amazon merchant (#{seller_id})"
170
+ end
171
+
172
+ element = res.at('/SellerLookupResponse/Sellers/Seller/GlancePage')
173
+ glance_page_url = element.inner_text unless element.nil?
174
+
175
+ element = res.at('/SellerLookupResponse/Sellers/Seller/AverageFeedbackRating')
176
+ average_feedback_rating = element.nil? ? 0.0 : element.inner_text.to_f
177
+
178
+ element = res.at('/SellerLookupResponse/Sellers/Seller/TotalFeedback')
179
+ total_feedback = element.nil? ? 0 : element.inner_text.to_i
180
+
181
+ { :seller_id => seller_id,
182
+ :merchant_name => merchant_name,
183
+ :glance_page_url => glance_page_url,
184
+ :average_feedback_rating => average_feedback_rating,
185
+ :total_feedback => total_feedback,
186
+ :logo_url => logo_url,
187
+ :homepage => homepage }
188
+ end
189
+
190
+ private
191
+
192
+ def find_offers_by_asin_via_api(asin, featured_merchants_only=false)
193
+ asin.strip!
194
+ request = {'Operation' => 'ItemLookup',
195
+ 'ResponseGroup' => 'Large,OfferFull',
196
+ 'ItemId' => asin,
197
+ 'IdType' => 'ASIN',
198
+ 'MerchantId' => featured_merchants_only ? 'Featured' : 'All',
199
+ 'Condition' => 'New',
200
+ 'OfferPage' => 1}
201
+ req = make_amazon_api_request request
202
+ offers = {}
203
+
204
+ total_offer_pages = (req / 'Items' / 'Offers' / 'TotalOfferPages').inner_html.to_i
205
+
206
+ #enumerate through all the offer pages
207
+ 1.upto(total_offer_pages) do |page|
208
+ # move on to the next page if necessary
209
+ # (this helps avoid a repeat request)
210
+ if page != 1
211
+ request['OfferPage']+=1
212
+ req = make_amazon_api_request request
213
+ end
214
+
215
+ #loop through all the offers
216
+ (req / 'Items' / 'Offers' / 'Offer' ).each do |offer|
217
+ # find either ther seller id or the merchant id
218
+
219
+ id = (offer / 'Merchant' / 'MerchantId').inner_html
220
+ if id.nil? || id.empty?
221
+ id = (offer / 'Seller' / 'SellerId').inner_html
222
+ name = (offer / 'Seller' / 'Nickname').inner_html
223
+ type = 'seller'
224
+ else
225
+ name = (offer / 'Merchant' / 'Name').inner_html
226
+ type = 'merchant'
227
+ end
228
+
229
+ if (offer / 'OfferListing' / 'SalePrice').size > 0 # sometimes we get a SalePrice
230
+ unformatted_price = (offer / 'OfferListing' / 'SalePrice' / 'Amount').inner_html
231
+ formatted_price = (offer / 'OfferListing' / 'SalePrice' / 'FormattedPrice').inner_html
232
+ else # most of the time we just get Price
233
+ unformatted_price = (offer / 'OfferListing' / 'Price' / 'Amount').inner_html
234
+ formatted_price = (offer / 'OfferListing' / 'Price' / 'FormattedPrice').inner_html
235
+ end
236
+ added_to_cart = false
237
+ if formatted_price == 'Too low to display'
238
+ offer_listing_id = (offer / 'OfferListing' / 'OfferListingId').inner_html
239
+ unformatted_price, formatted_price = reveal_too_low_to_display_price_from_offer_listing_id(offer_listing_id)
240
+ added_to_cart = true
241
+ end
242
+
243
+ if (offer / 'OfferListing' / 'Quantity')
244
+ quantity = (offer / 'OfferListing' / 'Quantity').inner_html.to_i
245
+ end
246
+
247
+ if !unformatted_price.nil? && !unformatted_price.empty?
248
+ price = unformatted_price.to_i * 0.01 # convert 21995 to 219.95
249
+ elsif !formatted_price.nil? && !formatted_price.empty? # sometimes we only get a formatted price and no amount
250
+ price = formatted_price.gsub(/[$,]/,'').to_f
251
+ else
252
+ price = 0.0 # should never get here.
253
+ end
254
+
255
+ offer_listing_id = (offer / 'OfferListing' / 'OfferListingId').inner_html
256
+ total_feedback = (offer / 'Merchant' / 'TotalFeedback')
257
+
258
+ if quantity.nil? || quantity > 0
259
+ url = offer_url(asin, type, id)
260
+ # do we already have it in the offers hash?
261
+ # if so, we only want a lower price to override the entry.
262
+ if !offers[id] || offers[id][:price] > price
263
+ #add it to the offers hash
264
+ offers[id] = { :merchant_code => id,
265
+ :merchant_name => CGI::unescapeHTML(name),
266
+ :merchant_logo_url => nil,
267
+ :cpc => nil,
268
+ :price => BigDecimal(price.to_s),
269
+ :shipping => nil,
270
+ :offer_url => url,
271
+ :offer_tier => type == 'seller' ? 2 : 1,
272
+ :merchant_type => type }
273
+ end
274
+ end
275
+ end
276
+ end
277
+ offers
278
+ end
279
+
280
+ def scrape_offer_listing_page_to_hash(asin, featured_merchants_only=false)
281
+ offers_hash = {}
282
+ offers = scrape_offer_listing_page(asin, featured_merchants_only)
283
+ offers.each do |offer|
284
+ offers_hash[offer[:merchant_code]] = offer
285
+ end
286
+ offers_hash
287
+ end
288
+
289
+ def scrape_offer_listing_page(asin, featured_merchants_only=false)
290
+ begin
291
+ url = offer_listing_url(asin)
292
+ doc = scrape_page(url, Source.amazon_source.offer_ttl_seconds / 2, 'offer-listing')
293
+ rescue Net::HTTPServerException => ex
294
+ if ex.message =~ /^404/
295
+ raise Amazon::AsinNotFoundError.new(ex.message, asin)
296
+ else
297
+ raise ex
298
+ end
299
+ rescue Net::HTTPFatalError => ex
300
+ raise Amazon::AsinFatalError.new(ex.message, asin)
301
+ end
302
+ offers = []
303
+ offers_box_element = doc.at('div.resultsset')
304
+ offer_type_header_tables = offers_box_element.search('table')
305
+ offer_type_header_tables.each do |offer_type_header_table|
306
+ inner_text = offer_type_header_table.inner_text
307
+ if inner_text.include?('Featured Merchants')
308
+ featured_offer_rows = offer_type_header_table.search('tbody.result/tr')
309
+ offers += parse_offer_listing_rows(asin, featured_offer_rows, true)
310
+ elsif !featured_merchants_only && inner_text.include?('New')
311
+ other_offer_rows = offer_type_header_table.search('tbody.result/tr')
312
+ offers += parse_offer_listing_rows(asin, other_offer_rows, false, offers.length)
313
+ end
314
+ end
315
+
316
+ # offers.each_with_index do |offer, i|
317
+ # puts "#{i+1}. --------------------------------------------------------------------"
318
+ # puts "Merchant: #{offer[:name]} (#{offer[:merchant_id]})#{' FEATURED' if offer[:featured_merchant]}"
319
+ # puts "Merchant logo URL: #{offer[:merchant_logo_url]}" unless offer[:merchant_logo_url].nil?
320
+ # puts "Price/Shipping: #{offer[:price]}/#{offer[:shipping]}"
321
+ # puts "Offer ID: #{offer[:offer_id]}"
322
+ # puts "Offer URL: #{offer[:offer_url]}"
323
+ # puts "Merchant type: #{offer[:merchant_type]}"
324
+ # puts "Had to add to cart to get price." if offer[:added_to_cart]
325
+ # if offer[:merchant_id].nil? || offer[:name].nil? ||
326
+ # offer[:price].nil? || offer[:shipping].nil? ||
327
+ # offer[:offer_id].nil? || offer[:offer_url].nil?
328
+ # puts "!!!! One or more fields not parsed correctly !!!!"
329
+ # end
330
+ # puts '-----------------------------------------------------------------------'
331
+ # end
332
+ offers
333
+ end
334
+
335
+ def parse_offer_listing_rows(asin, offer_listing_rows, featured_merchants, offer_index_offset=0)
336
+ offers = []
337
+ offer_listing_rows.each_with_index do |row, offer_index|
338
+ # Offer Listing ID
339
+ offer_listing_tag = row.at("td.readytobuy/form/input[@name *= 'offering-id.']")
340
+ unless offer_listing_tag.nil?
341
+ offer_listing_id = offer_listing_tag.attributes['name'].sub('offering-id.', '')
342
+ end
343
+
344
+ # Price
345
+ added_to_cart = false
346
+ price_element = row.at("span.price")
347
+ unless price_element.nil?
348
+ price = price_to_f(price_element.inner_text)
349
+ end
350
+ add_to_cart_span = row.at("td/span[text() *= 'Add to cart to see price.']")
351
+ add_to_cart_span = row.at("td/span[text() *= 'Price not displayed.']") if add_to_cart_span.nil?
352
+ if add_to_cart_span && !offer_listing_id.nil? && !offer_listing_id.empty?
353
+ price = price_to_f(reveal_too_low_to_display_price_from_offer_listing_id(offer_listing_id).second)
354
+ added_to_cart = true
355
+ end
356
+ if price.nil?
357
+ puts "Failed to find offer price while scraping the offer listing page; ASIN: #{asin}. Skipping."
358
+ next
359
+ end
360
+
361
+ # Shipping
362
+ shipping_element = row.at("div.shipping_block/span.price_shipping")
363
+ if shipping_element.nil?
364
+ super_saver_element = row.at("span.supersaver")
365
+ shipping = 0.0 unless super_saver_element.nil?
366
+ else
367
+ shipping = price_to_f(shipping_element.inner_text)
368
+ end
369
+
370
+ seller_info = row.at("td[/ul.sellerInformation]")
371
+ unless seller_info.nil?
372
+ # Seller ID, merchant rating, and num merchant reviews
373
+ seller_id = nil
374
+ merchant_rating = nil
375
+ num_merchant_reviews = nil
376
+ rating_block = seller_info.at("div.rating")
377
+ unless rating_block.nil?
378
+ rating_text = rating_block.inner_text
379
+ if rating_text =~ /\((\d+) ratings\)/
380
+ num_merchant_reviews = $1.to_i
381
+ end
382
+ end
383
+ rating_link = seller_info.at("div.rating/a")
384
+ unless rating_link.nil?
385
+ seller_id = rating_link.attributes['href'].match(/seller=([^&#]+)/)[1]
386
+ end
387
+ rating_img = seller_info.at("div.rating/img")
388
+ unless rating_img.nil?
389
+ merchant_rating = (rating_img.attributes['src'].match(/stars\-([\d\-]+)/)[1].sub(/\-/,'.').to_f*20).to_i
390
+ end
391
+
392
+ if seller_id.nil?
393
+ shipping_rates_link = seller_info.at("div.availability/a[text() *= 'shipping rates']")
394
+ unless shipping_rates_link.nil?
395
+ if shipping_rates_link.attributes['href'].match(/seller=([^&#]+)/)
396
+ seller_id = $1
397
+ end
398
+ end
399
+ end
400
+ if seller_id.nil?
401
+ seller_profile_link = seller_info.at("div.rating//a[text() = 'Seller Profile']")
402
+ unless seller_profile_link.nil?
403
+ if seller_profile_link.attributes['href'].match(/seller=([^&#]+)/)
404
+ seller_id = $1
405
+ end
406
+ end
407
+ end
408
+ if seller_id.nil?
409
+ puts "Failed to find seller_id while scraping the offer listing page; ASIN: #{asin}, seller info: #{seller_info.inner_html}"
410
+ next
411
+ end
412
+
413
+ # Seller's Name & logo URL
414
+ merchant_type = 'merchant'
415
+ seller_label_link = seller_info.at('div.seller/a')
416
+ if seller_label_link.nil?
417
+ seller_logo_img = seller_info.at('a/img')
418
+ seller_logo_img = seller_info.at('img') if seller_logo_img.nil?
419
+ unless seller_logo_img.nil?
420
+ name = safe_strip(seller_logo_img.attributes['alt'])
421
+ logo_url = seller_logo_img.attributes['src']
422
+ end
423
+ else
424
+ name = safe_strip(seller_label_link.inner_text)
425
+ merchant_type = 'seller'
426
+ end
427
+
428
+ # Availability
429
+ in_stock = true
430
+ availability_element = seller_info.at("div.availability")
431
+ unless availability_element.nil?
432
+ availability_info = availability_element.inner_text
433
+ if availability_info.match(/out of stock/i)
434
+ in_stock = false
435
+ elsif availability_info.match(/Usually ships within .+ days/i)
436
+ in_stock = true
437
+ elsif availability_info.match(/Usually ships within .+ months/i)
438
+ in_stock = false
439
+ elsif availability_info.match(/In Stock/i)
440
+ in_stock = true
441
+ end
442
+ end
443
+ end
444
+
445
+ if in_stock
446
+ # Offer URL
447
+ offer_url = offer_url(asin, merchant_type, seller_id)
448
+
449
+ offers << { :original_index => offer_index + offer_index_offset,
450
+ :merchant_code => seller_id,
451
+ :merchant_name => CGI::unescapeHTML(name),
452
+ :merchant_logo_url => logo_url,
453
+ :cpc => Source.amazon_source.cpc,
454
+ :price => price.nil? ? nil : BigDecimal(price.to_s),
455
+ :shipping => shipping.nil? ? nil : BigDecimal(shipping.to_s),
456
+ :offer_url => offer_url,
457
+ :offer_tier => featured_merchants ? 1 : 3,
458
+ :merchant_rating => merchant_rating,
459
+ :num_merchant_reviews => num_merchant_reviews,
460
+ :merchant_type => merchant_type }
461
+ end
462
+ end
463
+ offers
464
+ end
465
+
466
+ # reveal a too low to display price by adding it to the cart
467
+ # returns the amount (in pennies) and the formatted price
468
+ def reveal_too_low_to_display_price_from_offer_listing_id(offer_listing_id)
469
+ request = {'Operation' => 'CartCreate',
470
+ 'AssociateTag' => AMAZON_ASSOCIATE_TAG,
471
+ 'Item.1.OfferListingId' => offer_listing_id,
472
+ 'Item.1.Quantity' => 1}
473
+ req = make_amazon_api_request request
474
+ formatted_price = (req / 'Cart' / 'CartItems' / 'SubTotal' / 'FormattedPrice').inner_html
475
+ unformatted_price = (req / 'Cart' / 'CartItems' / 'SubTotal' / 'Amount').inner_html
476
+ [unformatted_price, formatted_price]
477
+ end
478
+
479
+ def scrape_at_a_glance_page(seller_id)
480
+ url = at_a_glance_url(seller_id)
481
+ doc = scrape_page(url, 10.minutes, 'seller')
482
+ merchant_description_box_element = doc.at('//table//tr//td//h1[@class = "sans"]/strong/../..')
483
+
484
+ unless merchant_description_box_element.nil?
485
+ element = merchant_description_box_element.at('//h1/strong')
486
+ merchant_name = element.inner_text.strip unless element.nil?
487
+
488
+ element = merchant_description_box_element.at('//img')
489
+ merchant_logo_url = element.attributes['src'] unless element.nil?
490
+ end
491
+
492
+ homepage_link = doc.at('//tr[@class = "tiny"]/td/a[@target = "_blank" and @href = text()]')
493
+ homepage = homepage_link.inner_text unless homepage_link.nil?
494
+
495
+ { :merchant_name => merchant_name,
496
+ :logo_url => merchant_logo_url,
497
+ :homepage => homepage }
498
+ end
499
+
500
+ def scrape_page(url, cache_ttl, context_name=nil)
501
+ # shoot off the request
502
+ body = do_api_request(url)
503
+ Hpricot(body)
504
+ end
505
+
506
+ def cache
507
+ @cache ||= (eval('CACHE') rescue nil)
508
+ end
509
+
510
+ # make any API request given a hash of querystring parameters
511
+ def make_amazon_api_request(user_params)
512
+ result = make_amazon_api_request_raw(user_params)
513
+ result ? Hpricot.XML(result) : nil
514
+ end
515
+
516
+ # make API request, but don't process through Hpricot so called can
517
+ # process (with, say, Nokogiri)
518
+ def make_amazon_api_request_raw(user_params)
519
+ params = {'Service' => 'AWSECommerceService',
520
+ 'Version' => '2007-07-16',
521
+ 'AWSAccessKeyId' => AMAZON_ACCESS_KEY_ID}
522
+ params = params.merge(user_params) # merge in the user params
523
+
524
+ # because params is a hash, its order isn't defined.. so we sort it.
525
+ # this converts it to an array, but that's okay.
526
+ sorted_params_arr = params.sort{|a,b| a[0]<=>b[0]}
527
+ # build the query string
528
+ query_string = sorted_params_arr.collect{|x| "#{x[0]}=#{CGI::escape(CGI::unescape(x[1].to_s))}"}.join('&')
529
+
530
+ # do we already have a cached version of this API call?
531
+ key = "amazon-api-#{Digest::MD5.hexdigest(query_string)}-v2"
532
+ result = cache ? cache.get(key) : nil
533
+ if !result # nope.. gotta get a new one.
534
+ url = sign_url('ecs.amazonaws.com', '/onca/xml', params)
535
+ # shoot off the request
536
+ result = do_api_request(url)
537
+ cache.set(key, result, Source.amazon_source.offer_ttl_seconds) if cache # 1 hour
538
+ end
539
+ result
540
+ end
541
+
542
+ # create the Net::HTTP object to actually do the request
543
+ def do_api_request(url, retry_num=0, max_retries=10)
544
+ if retry_num >= max_retries
545
+ raise StandardError, "Failed to get Amazon URL with after #{max_retries} tries for url: #{url.inspect}"
546
+ end
547
+
548
+ #puts "Amazon API request URL: #{url}"
549
+ req_url = URI.safe_parse(url)
550
+ http = Net::HTTP.new(req_url.host, 80)
551
+ http.read_timeout=5 # 5 second timeout
552
+ resp = nil
553
+ begin
554
+ http.start do |web|
555
+ resp = web.get("#{req_url.path}?#{req_url.query}")
556
+ end
557
+ rescue Timeout::Error
558
+ # timed out, try again.
559
+ retry_num += 1
560
+ do_api_request(url, retry_num, max_retries)
561
+ end
562
+
563
+ case resp
564
+ when Net::HTTPSuccess
565
+ resp.body
566
+ when Net::HTTPRedirection
567
+ redirect_url = resp['location']
568
+ retry_num += 1
569
+ do_api_request(redirect_url, retry_num, max_retries)
570
+ when Net::HTTPServiceUnavailable
571
+ puts "GOT Net::HTTPServiceUnavailable FROM AMAZON; SLEEPING AND TRYING IN TWO SECONDS. RETRY NUM #{retry_num}."
572
+ sleep(2)
573
+ retry_num += 1
574
+ do_api_request(url, retry_num, max_retries)
575
+ when Net::HTTPClientError, Net::HTTPServerError
576
+ puts "GOT #{resp.class.name} FROM AMAZON."
577
+ resp.error!
578
+ else
579
+ raise StandardError, "Failed to get Amazon URL with unknown error: #{resp.inspect} For url: #{url.inspect}"
580
+ end
581
+ end
582
+
583
+ def safe_strip(value)
584
+ value.nil? ? nil : value.strip
585
+ end
586
+
587
+ def price_to_f(value)
588
+ return nil if value.nil? || value.empty?
589
+ value.gsub(/[^\d\.]/, '').match(/(\d*\.?\d+)/)[1].to_f rescue nil
590
+ end
591
+
592
+ def sign_url(host, path, params)
593
+ timestamp = CGI::escape(Time.now.utc.strftime('%Y-%m-%dT%H:%M:%SZ'))
594
+ params['Timestamp'] = timestamp
595
+ params_string = params.sort{|a,b| a[0]<=>b[0]}.collect{|x| "#{x[0]}=#{CGI::escape(CGI::unescape(x[1].to_s))}"}.join('&')
596
+ params_string.gsub!('+', '%20')
597
+
598
+ query = "GET\n#{host}\n#{path}\n#{params_string}"
599
+
600
+ hmac = Digest::HMAC.new(AMAZON_SECRET_ACCESS_KEY, Digest::SHA256).digest(query)
601
+ base64_hmac = Base64.encode64(hmac).chomp
602
+ signature = CGI::escape(base64_hmac)
603
+ "http://#{host}#{path}?#{params_string}&Signature=#{signature}"
604
+ end
605
+ end
606
+ end
@@ -0,0 +1,9 @@
1
+ module ApiHelper
2
+ def to_i_or_nil(string_value)
3
+ string_value.nil? ? nil : string_value.strip.to_i rescue nil
4
+ end
5
+
6
+ def to_d_or_nil(string_value)
7
+ string_value.nil? ? nil : BigDecimal(string_value.strip) rescue nil
8
+ end
9
+ end