bazaar_sources 0.2.1.1.1.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,514 @@
1
+ module ShoppingBulkAPI
2
+ require 'hpricot'
3
+ require 'open-uri'
4
+ require 'cgi'
5
+
6
+ class SearchType
7
+ SHOPPING_PRODUCT_ID = 'SHOPPING_PRODUCT_ID' # Shopping Product ID-based search
8
+ PRODUCT = 'PRODUCT' # Search by our products
9
+ KEYWORDS = 'KEYWORDS' # Search by a 1 or more keywords
10
+ end
11
+
12
+ def self.get_all_categories
13
+ params = {
14
+ 'categoryId' => 0,
15
+ 'showAllDescendants' => true
16
+ }
17
+ result = make_v3_request :CategoryTree, params
18
+ parse_category(result.at('category[@id="0"]'))
19
+ end
20
+
21
+ # parse a category, then look for sub-categories and parse those too!
22
+ def self.parse_category(category, parent_id=nil)
23
+ categories = []
24
+ id = category.attributes['id'].to_i
25
+ # main category, does not count as a parent and should not be added
26
+ if id == 0
27
+ name = nil
28
+ id = nil
29
+ else
30
+ name = category.at('name').innerText
31
+ end
32
+ hash = {
33
+ :banned => banned_categories.include?(name),
34
+ :id => id,
35
+ :name => name,
36
+ :parent_id => parent_id
37
+ }
38
+ if sub_categories = category.at('categories')
39
+ categories << hash.merge({:end_point => false}) unless name == nil || id == nil
40
+ # if there are sub categories, we don't want to add the parent to the list
41
+ # that we'll be searching.
42
+ (sub_categories / '> category').each do |sub_category|
43
+ categories += parse_category(sub_category, id)
44
+ end
45
+ else
46
+ categories << hash.merge({:end_point => true})
47
+ end
48
+
49
+ categories
50
+ end
51
+
52
+ # batch lookup products
53
+ # takes a search hash, which should have a :search_type key (one member from SearchType Enum above)
54
+ # for searching products, pass an array of products in :products (:search_type => ShoppingBulkAPI::SearchType::PRODUCT)
55
+ # for searching from shopping product ids, pass an array of shopping product ids in :shopping_product_ids (:search_type => ShoppingBulkAPI::SearchType::SHOPPING_PRODUCT_ID)
56
+ # for those two above, you can pass :batch_lookup, which is how many we should look up w/ shopping at once
57
+ # for searching a keyword, pass a :keywords array of strings (any you want to be included in results, ordered) (:search_type => ShoppingBulkAPI::SearchType::KEYWORDS)
58
+ def self.batch_search_v3(search_hash, sandbox=false)
59
+ search_hash[:batch_lookup] ||= 20
60
+
61
+ case search_hash[:search_type]
62
+ when SearchType::SHOPPING_PRODUCT_ID
63
+ items = search_hash[:shopping_product_ids]
64
+ search_hash[:get_extra_product_info] ||= false
65
+ when SearchType::PRODUCT
66
+ # list of shopping product ids to their associated product
67
+ # useful to bring back the shopping_product_ids into products
68
+ search_hash[:product_ids_hash] = search_hash[:products].inject({}) do |ha, product|
69
+ shopping_product_source = product.shopping_ids.detect{|product_source| !product_source.questionable?}
70
+ unless shopping_product_source.nil?
71
+ shopping_id = shopping_product_source.source_id
72
+ if ha.has_key?(shopping_id)
73
+ puts "DUPLICATE KEY FOR #{shopping_id} !! #{ha[shopping_id].inspect} VS #{product.id}"
74
+ end
75
+ ha[shopping_id] = product.id
76
+ end
77
+ ha
78
+ end
79
+ # just the shopping product ids
80
+ search_hash[:product_ids] = search_hash[:product_ids_hash].keys
81
+ items = search_hash[:product_ids]
82
+ search_hash[:get_extra_product_info] ||= false
83
+ when SearchType::KEYWORDS
84
+ items = search_hash[:keywords]
85
+ search_hash[:get_extra_product_info] = true # force extra info, how else will we get the name/etc. ?!
86
+ else
87
+ raise ArgumentError, "Invalid :search_type specified: #{search_hash[:search_type].inspect}"
88
+ end
89
+
90
+ # defaults
91
+ all_offers = self.default_offers
92
+ all_product_infos = self.default_product_infos(search_hash[:get_extra_product_info])
93
+ missed_ids = []
94
+ second_misses = []
95
+ # puts "SEARCH HASH: #{search_hash.inspect}"
96
+ # look 'em up in batches!
97
+ items.each_slice(search_hash[:batch_lookup]) do |batch_items|
98
+ search_hash[:batch_items] = batch_items
99
+ misses, offers, product_infos = self.single_batch_search_v3(search_hash, sandbox)
100
+ all_product_infos.update(product_infos)
101
+ all_offers.update(offers)
102
+ missed_ids += misses unless misses.empty?
103
+ end
104
+
105
+ # for the ones we missed, we're going to try looking them up one more time
106
+ # before giving up entirely.
107
+ # (only applies to non-category/keyword searches)
108
+ if missed_ids.length > 0
109
+ # now look up the missed IDs in their own batch
110
+ missed_ids.each_slice(search_hash[:batch_lookup]) do |batch_items|
111
+ search_hash[:batch_items] = batch_items
112
+ misses, offers, product_infos = self.single_batch_search_v3(search_hash, sandbox)
113
+ all_product_infos.update(product_infos)
114
+ all_offers.update(offers)
115
+ second_misses += misses unless misses.empty?
116
+ offers = nil
117
+ product_infos = nil
118
+ end
119
+ end
120
+
121
+ # only care to look up one-by-one if we're going to do something with the data
122
+ # (for product lookups, then, to hide or update shopping ids)
123
+ if !second_misses.empty? && search_hash[:search_type] == SearchType::PRODUCT
124
+ # missed again? gotta look up one-by-one!
125
+ products_to_hide = []
126
+ second_misses.each do |product_id|
127
+ our_product_id = search_hash[:product_ids_hash][product_id]
128
+ search_hash[:batch_items] = [product_id]
129
+ final_miss, offers, product_infos = self.single_batch_search_v3(search_hash, sandbox)
130
+ all_product_infos.update(product_infos)
131
+ all_offers.update(offers)
132
+ if !final_miss.empty?
133
+ puts "****** COULDN'T LOOK UP INFO FOR #{our_product_id} ( SHOPPING ID #{product_id}) !! Adding to hide queue..."
134
+ products_to_hide << product_id
135
+ else
136
+ # shopping gave us a product ID back that doesn't match our shopping product ID! gotta update!
137
+ new_shopping_id = product_infos[our_product_id][:reported_product_id]
138
+ if ps = ProductSource.find_by_source_name_and_source_id(ProductSource::Name::SHOPPING, new_shopping_id)
139
+ puts "SHOPPING PRODUCT ID ALREADY EXISTS AT #{ps.product_id} -- HIDING DUPLICATE #{our_product_id}"
140
+ products_to_hide << product_id
141
+ else
142
+ puts "UPDATING SOURCE ID: FROM #{product_id.inspect} TO #{new_shopping_id.inspect} FOR product id##{our_product_id}"
143
+ ProductSource.update_all("source_id = E'#{new_shopping_id}'", "source_id = E'#{product_id}' AND product_id = #{our_product_id}")
144
+ end
145
+ end
146
+ end
147
+ puts "HIDING PRODUCTS: #{products_to_hide.inspect}"
148
+ if products_to_hide.length > 0
149
+ products_to_hide.each do |shopping_product_id|
150
+ ProductSource.increment_not_found_count(ProductSource::Name::SHOPPING, shopping_product_id)
151
+ end
152
+ end
153
+ end
154
+ # return all that jazz
155
+ [all_offers, all_product_infos]
156
+ end
157
+
158
+ # find a single batch of offers and shove them info all_offers hash
159
+ # this is just a helper for batch_search_v3 and shouldn't be called directly
160
+ # for looking up a whole lot of product offers, look at batch_search_v3 above
161
+ def self.single_batch_search_v3(search_hash, sandbox=false)
162
+ misses, offers, product_infos = do_search_v3(search_hash, sandbox)
163
+
164
+ # turn shopping ids into product ids for the returned results ( both offers and product_infos )
165
+ # (only if they initially gave us a set of products)
166
+ if search_hash[:search_type] == SearchType::PRODUCT
167
+ search_hash[:batch_items].each do |product_id|
168
+ [offers, product_infos].each do |item|
169
+ our_id = search_hash[:product_ids_hash][product_id]
170
+ # if OUR product id is the same as shopping's, we don't delete. obvi.
171
+ next if our_id == product_id
172
+ if our_id.nil?
173
+ puts "******** NIL FOR #{product_id}"
174
+ end
175
+ item[our_id] = item[product_id]
176
+ item.delete(product_id)
177
+ end
178
+ end
179
+ end
180
+ [misses, offers, product_infos]
181
+ end
182
+
183
+ def self.default_offers
184
+ Hash.new([]).clone
185
+ end
186
+
187
+ def self.default_product_infos get_extra_product_info
188
+ # smart defaults for error handling
189
+ if get_extra_product_info
190
+ product_infos = Hash.new({
191
+ :avg_secondary_cpcs => nil,
192
+ :primary_cpc => nil,
193
+ :reported_product_id => nil,
194
+ :images => {},
195
+ :manufacturer => nil,
196
+ :name => nil,
197
+ :description => nil,
198
+ :review_url => nil,
199
+ :review_count => nil,
200
+ :rating => nil
201
+ })
202
+ else # even if they don't ask for it! BAM!
203
+ product_infos = Hash.new({
204
+ :avg_secondary_cpcs => nil,
205
+ :primary_cpc => nil,
206
+ :reported_product_id => nil
207
+ })
208
+ end
209
+
210
+ product_infos.clone
211
+ end
212
+
213
+ def self.do_search_v3(search_hash, sandbox=false)
214
+ # defaults
215
+ offers = self.default_offers
216
+ product_infos = self.default_product_infos(search_hash[:get_extra_product_info])
217
+ misses = []
218
+
219
+ case search_hash[:search_type]
220
+ when SearchType::PRODUCT, SearchType::SHOPPING_PRODUCT_ID
221
+ search_hash[:batch_items].compact! # remove nils
222
+ if search_hash[:batch_items].empty?
223
+ # nothing to look for! dummy.
224
+ puts "NO PRODUCT ID PASSED!"
225
+ # return blanks
226
+ return [misses, offers, product_infos]
227
+ end
228
+ params = {
229
+ 'productId' => search_hash[:batch_items],
230
+ 'showProductOffers' => true,
231
+ 'trackingId' => search_hash[:tracking_id],
232
+ 'numItems' => 20
233
+ }
234
+ when SearchType::KEYWORDS
235
+ params = {
236
+ #'categoryId' => search_hash[:category],
237
+ 'keyword' => Array(search_hash[:keywords].collect{|x| CGI::escape(x) }), # can be an array, thass coo' wit me.
238
+ 'showProductOffers' => true,
239
+ 'trackingId' => search_hash[:tracking_id],
240
+ 'numOffersPerProduct' => 20,
241
+ 'pageNumber' => 1,
242
+ 'numItems' => (search_hash[:num_items].nil? || search_hash[:num_items].to_s.empty?) ? 1 : search_hash[:num_items],
243
+ # 'productSortType' => 'price',
244
+ # 'productSortOrder' => 'asc'
245
+ }
246
+ end
247
+
248
+ result = make_v3_request :GeneralSearch, params, sandbox
249
+
250
+
251
+ if search_hash[:search_type] == SearchType::PRODUCT || search_hash[:search_type] == SearchType::SHOPPING_PRODUCT_ID
252
+ if search_hash[:batch_items].length == 1 && result.at('product')
253
+ # if we're looking up one ID, it doesn't matter if the ID they returned doesn't match ours
254
+ misses = []
255
+ elsif result.at('product') # if we got ANY products back
256
+ misses = search_hash[:batch_items] - (result / 'product').collect{|x| x.attributes['id']}
257
+ else # probably an error happened
258
+ misses = search_hash[:batch_items]
259
+ end
260
+ end
261
+
262
+ errors = result.search('exception[@type=error]')
263
+ if errors.length > 0
264
+ # we got an error, or more than one!
265
+ if (search_hash[:search_type] == SearchType::PRODUCT || search_hash[:search_type] == SearchType::SHOPPING_PRODUCT_ID) && search_hash[:batch_items].length == 1 && errors.length == 1 && errors.first.at('message').innerText == "Could not find ProductIDs #{search_hash[:batch_items].first}"
266
+ # happens when we look up one product id and it's not a valid product id according to shopping. we ignore this kind of error.
267
+ else
268
+ puts "*** ERROR *** Could not look up offers by product ids:"
269
+ errors.each do |error|
270
+ puts " - #{error.at('message').innerText}"
271
+ end
272
+ # notify hoptoad of this shit!
273
+ HoptoadNotifier.notify(
274
+ :error_class => "ShoppingOfferError",
275
+ :error_message => %{
276
+ We got error(s) while trying to get the shopping offers! #{search_hash[:batch_items].inspect}
277
+ },
278
+ :request => { :params => Hash[*errors.collect{|x| ["Error ##{errors.index(x)}", x.at('message').innerText] }.flatten] }
279
+ )
280
+ end
281
+
282
+ # return blanks
283
+ return [misses, offers, product_infos]
284
+ end
285
+
286
+ (result / 'product').each do |product|
287
+ product_id = product.attributes['id']
288
+ if (search_hash[:search_type] == SearchType::PRODUCT || search_hash[:search_type] == SearchType::SHOPPING_PRODUCT_ID)
289
+ # this happens when they give us back an ID that we didn't ask for
290
+ # (if we are only looking at one product, we don't care what ID they give us back,
291
+ # we know it's the product we were looking for)
292
+ if search_hash[:batch_items].length == 1 && !search_hash[:batch_items].include?(product_id)
293
+ product_id = search_hash[:batch_items].first # revert back to the ID we asked for, we put the other in product_infos[x][:reported_product_id]
294
+ elsif !search_hash[:batch_items].include?(product_id)
295
+ # skip it, already included in the misses ( hopefully ... )
296
+ next
297
+ end
298
+ end
299
+
300
+ offers[product_id]={}
301
+ product_infos[product_id] = {
302
+ :reported_product_id => product.attributes['id'] # their reported ID doesn't necessarily match up with our ID
303
+ }
304
+ if search_hash[:get_extra_product_info]
305
+ product_infos[product_id][:name] = product.at('name').innerText
306
+ product_infos[product_id][:review_url] = (product.at('reviewURL').innerText rescue nil)
307
+ product_infos[product_id][:review_count] = (product.at('rating/reviewCount').innerText rescue nil)
308
+ product_infos[product_id][:rating] = (product.at('rating/rating').innerText rescue nil)
309
+
310
+ try_description = product.at('fullDescription').innerText
311
+ if try_description.nil? || try_description.empty?
312
+ try_description = product.at('shortDescription').innerText
313
+ end
314
+ product_infos[product_id][:description] = (try_description.nil? || try_description.empty?) ? '' : try_description[0...255]
315
+
316
+ images = (product / 'images' / 'image[@available="true"]').collect{|x|
317
+ {
318
+ :width => x.attributes['width'].to_i,
319
+ :height => x.attributes['height'].to_i,
320
+ :url => x.at('sourceURL').innerText
321
+ }
322
+ }.sort_by{|x| x[:width] * x[:height] }
323
+
324
+ product_infos[product_id][:images] = {
325
+ :small_image => images[0],
326
+ :medium_image => images[1],
327
+ :large_image => images[2]
328
+ }
329
+
330
+ # possible_manufacturers = (product / 'offer > manufacturer').collect{|x| x.innerText}.compact.uniq
331
+ #
332
+ # if possible_manufacturers.length == 1
333
+ # product_infos[product_id][:manufacturer] = possible_manufacturers.first # easy peasy lemon squezy
334
+ # elsif possible_manufacturers.length > 1
335
+ # # figure out which manufacturer is the most popular
336
+ # manufacturers_popularity_index = possible_manufacturers.inject({}) {|ha, manufacturer| ha[manufacturer] ||= 0; ha[manufacturer] += 1; ha }
337
+ # product_infos[product_id][:manufacturer] = manufacturers_popularity_index.sort_by{|key, val| val }.last.first
338
+ # else
339
+ # product_infos[product_id][:manufacturer] = nil # zip, zero, doodad :(
340
+ # end
341
+ end
342
+
343
+ (product.at('offers') / 'offer').each do |offer|
344
+ store = offer.at('store')
345
+ store_hash = {
346
+ :name => store.at('name').innerText,
347
+ :trusted => store.attributes['trusted'] == "true",
348
+ :id => store.attributes['id'].to_i,
349
+ :authorized_reseller => store.attributes['authorizedReseller'] == "true"
350
+ }
351
+ store_logo = store.at('logo')
352
+ if store_logo.attributes['available'] == "true"
353
+ store_hash[:logo] = {
354
+ :width => store_logo.attributes['width'],
355
+ :height => store_logo.attributes['height'],
356
+ :url => store_logo.at('sourceURL').innerText
357
+ }
358
+ else
359
+ store_hash[:logo] = nil
360
+ end
361
+
362
+ # store rating
363
+ store_rating = store.at('ratingInfo')
364
+ store_hash[:rating] = {
365
+ :number => store_rating.at('rating').nil? ? nil : normalize_merchant_rating(store_rating.at('rating').innerText.to_f),
366
+ :count => store_rating.at('reviewCount').innerText.to_i,
367
+ :url => store_rating.at('reviewURL').nil? ? nil : store_rating.at('reviewURL').innerText
368
+ }
369
+ shipping_info = offer.at('shippingCost').attributes['checkSite'] == "true" ? nil : to_d_or_nil(offer.at('shippingCost').innerText)
370
+ price_info = to_d_or_nil(offer.at('basePrice').innerText)
371
+ if shipping_info && price_info
372
+ total_price = shipping_info + price_info
373
+ else
374
+ total_price = price_info
375
+ end
376
+
377
+ # in-stock
378
+ stock_status = offer.at('stockStatus').innerText
379
+ in_stock = stock_status != 'out-of-stock' && stock_status != 'back-order'
380
+
381
+ if in_stock
382
+ offers[product_id][store_hash[:id]] = { :merchant_code => store_hash[:id].to_s,
383
+ :merchant_name => store_hash[:name],
384
+ :merchant_logo_url => store_hash[:logo].nil? ? nil : store_hash[:logo][:url],
385
+ :cpc => offer.at('cpc').nil? ? nil : (offer.at('cpc').innerText.to_f*100).to_i,
386
+ :price => to_d_or_nil(offer.at('basePrice').innerText),
387
+ :shipping => offer.at('shippingCost').attributes['checkSite'] == "true" ? nil : to_d_or_nil(offer.at('shippingCost').innerText),
388
+ :offer_url => offer.at('offerURL').innerText,
389
+ :offer_tier => 1,
390
+ :merchant_rating => store_hash[:rating][:number],
391
+ :num_merchant_reviews => store_hash[:rating][:count] }
392
+ end
393
+ end
394
+ # return an array, don't care about the hash. was used for dup checking.
395
+ offers[product_id] = offers[product_id].values.sort_by{|x| x[:price] + (x[:shipping] || 0) }
396
+ end
397
+
398
+ [misses, offers, product_infos]
399
+ end
400
+
401
+ def self.normalize_merchant_rating(merchant_rating)
402
+ merchant_rating.nil? ? nil : (merchant_rating * 20.0).round
403
+ end
404
+
405
+ # get any ol' random attribute from a shopping id
406
+ # for instance, 'Screen Size' is a good'un.
407
+ def self.get_attribute_from_shopping_id_v3 shopping_id, attribute
408
+ product_info = find_by_product_id_v3 shopping_id
409
+ values = product_info[:specifications].values.flatten
410
+ index = values.index(attribute)
411
+ # we +1 here because the flattened values are [name, value] oriented
412
+ index.nil? ? nil : values[index+1]
413
+ end
414
+
415
+ def self.parse_images_v3 images_element
416
+ images_element.inject({}) do |ha,obj|
417
+ ha["#{obj.attributes['width']}x#{obj.attributes['height']}"] = [obj.attributes['available'] == 'true', obj.at('sourceURL').innerText]
418
+ ha
419
+ end
420
+ end
421
+
422
+ def self.find_related_terms_v3 keyword, sandbox=false
423
+ result = make_v3_request :GeneralSearch, {'keyword' => keyword}, sandbox
424
+ (result / 'relatedTerms > term').collect{|x| x.innerText}
425
+ end
426
+
427
+ private
428
+
429
+ def self.make_v3_request(action, user_params, sandbox=false)
430
+ params = {
431
+ 'trackingId' => '8039097',
432
+ 'apiKey' => '21e3f349-c5f4-4783-8354-6ff75371ae22'
433
+ }
434
+ params = params.merge(user_params) # merge in the user params
435
+ # sort 'em for the caching
436
+ params = params.sort
437
+
438
+ query_string = params.collect{|x|
439
+ if x[1].class == Array
440
+ x[1].collect{|y| "#{x[0]}=#{y}" }.join '&'
441
+ else
442
+ "#{x[0]}=#{x[1]}"
443
+ end
444
+ }.join "&" # build the api url
445
+
446
+ # do we already have a cached version of this API call?
447
+ # key = "shopping-api-v3-#{action}-#{sandbox}-#{Digest::MD5.hexdigest(query_string)}-v2"
448
+ #result = CACHE.get(key)
449
+ #if !result # nope.. gotta get a new one.
450
+ url = sandbox ? "http://sandbox.api.shopping.com/publisher/3.0/rest/#{action}?#{query_string}" : "http://publisher.api.shopping.com/publisher/3.0/rest/#{action}?#{query_string}"
451
+ # puts "Shopping.com API request URL: #{url}"
452
+ result = do_api_request(url)
453
+ #begin
454
+ # CACHE.set(key, result, Source.shopping_source.offer_ttl_seconds)
455
+ #rescue MemCache::MemCacheError => e
456
+ # raise e unless e.message == 'Value too large, memcached can only store 1MB of data per key'
457
+ #end
458
+ #end
459
+ Hpricot.XML(result)
460
+ end
461
+
462
+ # create the Net::HTTP object to actually do the request
463
+ def self.do_api_request(url, retry_num=0, max_retries=4)
464
+ # print '~.~'
465
+ if retry_num >= max_retries
466
+ raise StandardError, "Failed to get Shopping URL with after #{max_retries} tries for url: #{url.inspect}"
467
+ end
468
+
469
+ req_url = URI.safe_parse(url)
470
+ http = Net::HTTP.new(req_url.host, req_url.port)
471
+ http.read_timeout = 5 # 5 second timeout
472
+ resp = nil
473
+ begin
474
+ http.start do |web|
475
+ resp = web.get("#{req_url.path}?#{req_url.query}")
476
+ end
477
+ rescue Timeout::Error, Errno::EPIPE, Errno::ECONNRESET
478
+ puts "Timeout, broken pipe, or connection reset. Trying again."
479
+ # timed out, try again.
480
+ retry_num += 1
481
+ do_api_request(url, retry_num, max_retries)
482
+ end
483
+
484
+ case resp
485
+ when Net::HTTPSuccess, Net::HTTPRedirection
486
+ resp.body
487
+ when Net::HTTPInternalServerError
488
+ puts "GOT Net::HTTPInternalServerError FROM Shopping; SLEEPING AND TRYING IN 0.5 SECONDS. RETRY NUM #{retry_num}."
489
+ sleep(0.5)
490
+ retry_num += 1
491
+ do_api_request(url, retry_num, max_retries)
492
+ when Net::HTTPServiceUnavailable
493
+ puts "GOT Net::HTTPServiceUnavailable FROM Shopping; SLEEPING AND TRYING IN TWO SECONDS. RETRY NUM #{retry_num}."
494
+ sleep(2)
495
+ retry_num += 1
496
+ do_api_request(url, retry_num, max_retries)
497
+ when nil
498
+ puts "GOT nil FROM Shopping; SLEEPING AND TRYING IN 0.5 SECONDS. RETRY NUM #{retry_num}."
499
+ sleep(0.5)
500
+ retry_num += 1
501
+ do_api_request(url, retry_num, max_retries)
502
+ else
503
+ raise StandardError, "Failed to get Shopping URL with unknown error: #{resp.inspect} For url: #{url.inspect}"
504
+ end
505
+ end
506
+
507
+ def self.to_i_or_nil(value)
508
+ value.nil? ? nil : value.strip.to_i rescue nil
509
+ end
510
+
511
+ def self.to_d_or_nil(value)
512
+ value.nil? ? nil : BigDecimal(value.strip) rescue nil
513
+ end
514
+ end