bazaar_sources 0.2.1.1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +7 -0
- data/init.rb +1 -0
- data/lib/api_helpers/amazon.rb +606 -0
- data/lib/api_helpers/api_helper.rb +9 -0
- data/lib/api_helpers/external_url.rb +127 -0
- data/lib/api_helpers/httparty_nokogiri_parser.rb +14 -0
- data/lib/api_helpers/reseller_ratings_api.rb +174 -0
- data/lib/api_helpers/shopping.rb +224 -0
- data/lib/api_helpers/shopping_bulk_api.rb +514 -0
- data/lib/api_helpers/shopzilla_api.rb +230 -0
- data/lib/bazaar_sources.rb +35 -0
- data/lib/sources/amazon_source.rb +94 -0
- data/lib/sources/buy_source.rb +34 -0
- data/lib/sources/ebay_source.rb +35 -0
- data/lib/sources/epinions_source.rb +80 -0
- data/lib/sources/google_source.rb +119 -0
- data/lib/sources/price_grabber_source.rb +94 -0
- data/lib/sources/reseller_ratings_source.rb +47 -0
- data/lib/sources/shopping_source.rb +136 -0
- data/lib/sources/shopzilla_source.rb +108 -0
- data/lib/sources/simple_sources.yml +71 -0
- data/lib/sources/source.rb +242 -0
- metadata +137 -0
@@ -0,0 +1,230 @@
|
|
1
|
+
require 'hpricot'
|
2
|
+
require 'open-uri'
|
3
|
+
require 'timeout'
|
4
|
+
|
5
|
+
class ShopzillaAPI
|
6
|
+
@@default_api_call_timeout = 25
|
7
|
+
def self.default_api_call_timeout=(obj)
|
8
|
+
@@default_api_call_timeout = obj
|
9
|
+
end
|
10
|
+
attr_accessor :api_call_timeout
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@logger = Logger.new(STDERR)
|
14
|
+
end
|
15
|
+
|
16
|
+
# Find all offers for a product given the (shopzilla) product_id
|
17
|
+
def find_offers_by_product_id(product_id)
|
18
|
+
result = make_offer_service_request(product_id)
|
19
|
+
offers = {}
|
20
|
+
unless result.nil?
|
21
|
+
merchant_offers = result / 'Products/Product/Offers/Offer'
|
22
|
+
merchant_offers.each do |offer|
|
23
|
+
merchant_id = offer.attributes['merchantId']
|
24
|
+
merchant_name = safe_inner_text(offer.at('merchantName'))
|
25
|
+
merchant_rating_elem = offer.at('MerchantRating')
|
26
|
+
merchant_rating = normalize_merchant_rating(merchant_rating_elem.attributes['value'].to_f) unless merchant_rating_elem.nil?
|
27
|
+
price = get_price_from_node(offer.at('price'))
|
28
|
+
shipping = get_price_from_node(offer.at('shipAmount'))
|
29
|
+
url = safe_inner_text(offer.at('url'))
|
30
|
+
condition = safe_inner_text(offer.at('condition'))
|
31
|
+
stock = safe_inner_text(offer.at('stock'))
|
32
|
+
if is_likely_new_condition?(condition) && is_likely_in_stock?(stock)
|
33
|
+
offers[merchant_id] = { :merchant_code => merchant_id,
|
34
|
+
:merchant_name => safe_unescape_html(merchant_name),
|
35
|
+
:merchant_logo_url => "http://img.bizrate.com/merchant/#{merchant_id}.gif",
|
36
|
+
:cpc => nil,
|
37
|
+
:price => price,
|
38
|
+
:shipping => shipping,
|
39
|
+
:offer_url => safe_unescape_html(url),
|
40
|
+
:offer_tier => 1,
|
41
|
+
:merchant_rating => merchant_rating,
|
42
|
+
:num_merchant_reviews => nil }
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
offers.values
|
47
|
+
end
|
48
|
+
|
49
|
+
def normalize_merchant_rating(merchant_rating)
|
50
|
+
merchant_rating.nil? ? nil : (merchant_rating * 10.0).round
|
51
|
+
end
|
52
|
+
|
53
|
+
# This method makes a separate API call to get merchant detail info. It returns a hash that is aligned with
|
54
|
+
# the merchant_source model
|
55
|
+
def merchant_source_detail(merchant_id)
|
56
|
+
result = make_merchant_service_request(merchant_id)
|
57
|
+
return nil if result.nil?
|
58
|
+
merchant_element = result / ("Merchants/Merchant[@id=#{merchant_id}]")
|
59
|
+
merchant_source = {}
|
60
|
+
merchant_source[:source] = 'shopzilla'
|
61
|
+
merchant_source[:code] = merchant_id.to_s
|
62
|
+
merchant_source[:name] = safe_inner_text(merchant_element.at('name'))
|
63
|
+
logo_url = logo_url(merchant_id)
|
64
|
+
if verify_logo_url(logo_url)
|
65
|
+
merchant_source[:logo_url] = logo_url
|
66
|
+
end
|
67
|
+
|
68
|
+
# rating will not exist if unrated (although "unrated" will -- doh!)
|
69
|
+
rated = false
|
70
|
+
begin
|
71
|
+
rating_elem = merchant_element.at('/Rating/Overall')
|
72
|
+
unless rating_elem.nil?
|
73
|
+
rated = true
|
74
|
+
merchant_source[:merchant_rating] = normalize_merchant_rating(rating_elem.attributes['value'].to_f)
|
75
|
+
end
|
76
|
+
rescue
|
77
|
+
merchant_source[:merchant_rating] = 0
|
78
|
+
end
|
79
|
+
# There are about 38 ways this could fail, so rescue any baddies
|
80
|
+
begin
|
81
|
+
# URL provided is an entity escaped url to shopzilla/bizrate for example
|
82
|
+
# http://www.bizrate.com/rd?t=http%3A%2F%2Fwww.pcnation.com%2Fasp%2Findex.asp%3Faffid%3D308&mid=31427&cat_id=&prod_id=350513557&oid=&pos=1&b_id=18&rf=af1&af_id=3973&af_creative_id=6&af_assettype_id=10&af_placement_id=1
|
83
|
+
# http://www.bizrate.com/rd?t=http%3A%2F%2Fad.doubleclick.net%2Fclk%3B23623113%3B12119329%3Bs%3Fhttp%3A%2F%2Fwww.staples.com%2Fwebapp%2Fwcs%2Fstores%2Fservlet%2Fhome%3FstoreId%3D10001%26langId%3D-1%26cm_mmc%3Donline_bizrate-_-search-_-staples_brand-_-staples.com&mid=370&cat_id=&prod_id=&oid=&pos=1&b_id=18&rf=af1&af_id=3973&af_creative_id=6&af_assettype_id=10&af_placement_id=1
|
84
|
+
# http://www.bizrate.com/rd?t=http%3A%2F%2Fwww.tigerdirect.com%2Findex.asp%3FSRCCODE%3DBIZRATE&mid=23939&cat_id=&prod_id=&oid=&pos=1&b_id=18&rf=af1&af_id=3973&af_creative_id=6&af_assettype_id=10&af_placement_id=1
|
85
|
+
# http://www.bizrate.com/rd?t=http%3A%2F%2Fad.doubleclick.net%2Fclk%3BNEW_1%3B6928611%3Ba%3Fhttp%3A%2F%2Fwww.officedepot.com&mid=814&cat_id=&prod_id=&oid=&pos=1&b_id=18&rf=af1&af_id=3973&af_creative_id=6&af_assettype_id=10&af_placement_id=1
|
86
|
+
redir_url = CGI::unescape(merchant_element.at('url').inner_text)
|
87
|
+
# the query string will contain a value at the "t" parameter
|
88
|
+
t_param_value = redir_url.match(/(\?|&)t=(.+)/)[2]
|
89
|
+
if t_param_value.index('doubleclick').nil?
|
90
|
+
homepage = t_param_value.match(/https?:\/\/(.+?)(\/|&|\?|$)/)[1]
|
91
|
+
else
|
92
|
+
homepage = t_param_value.match(/.+https?:\/\/(.+?)(\/|&|\?|$)/)[1]
|
93
|
+
end
|
94
|
+
merchant_source[:homepage] = "http://#{homepage}/"
|
95
|
+
rescue
|
96
|
+
merchant_source[:homepage] = nil
|
97
|
+
end
|
98
|
+
# now, we just need the number of reviews
|
99
|
+
if rated
|
100
|
+
num_merchant_reviews = safe_inner_text(merchant_element.at('Details/surveyCount'))
|
101
|
+
num_merchant_reviews = (num_merchant_reviews.nil? || num_merchant_reviews.empty?) ? 0 : num_merchant_reviews.to_i
|
102
|
+
merchant_source[:num_merchant_reviews] = num_merchant_reviews
|
103
|
+
end
|
104
|
+
merchant_source
|
105
|
+
end
|
106
|
+
|
107
|
+
def logo_url(merchant_id)
|
108
|
+
"http://img.bizrate.com/merchant/#{merchant_id}.gif"
|
109
|
+
end
|
110
|
+
|
111
|
+
def verified_logo_url(merchant_id)
|
112
|
+
logo_url = logo_url(merchant_id)
|
113
|
+
verify_logo_url(logo_url) ? logo_url : nil
|
114
|
+
end
|
115
|
+
|
116
|
+
# -----------------------------------------------------------------------------------------------
|
117
|
+
private
|
118
|
+
# -----------------------------------------------------------------------------------------------
|
119
|
+
|
120
|
+
def make_offer_service_request(product_id)
|
121
|
+
params = {'productId' => product_id.to_s.strip,
|
122
|
+
'offersOnly' => 'true',
|
123
|
+
'biddedOnly' => 'true',
|
124
|
+
'resultsOffers' => '100',
|
125
|
+
'zipCode' => '64141'}
|
126
|
+
make_api_request('product', params)
|
127
|
+
end
|
128
|
+
|
129
|
+
def make_product_service_request(product_id)
|
130
|
+
params = {'productId' => product_id.to_s.strip}
|
131
|
+
make_api_request('product', params)
|
132
|
+
end
|
133
|
+
|
134
|
+
def make_merchant_service_request(merchant_id)
|
135
|
+
params = {'merchantId' => merchant_id.to_s.strip,
|
136
|
+
'expandDetails' => 'true'}
|
137
|
+
make_api_request('merchant', params)
|
138
|
+
end
|
139
|
+
|
140
|
+
def make_brand_service_request(category_id, keyword)
|
141
|
+
params = {'categoryId' => category_id,
|
142
|
+
'keyword' => keyword.strip}
|
143
|
+
make_api_request('brands', params)
|
144
|
+
end
|
145
|
+
|
146
|
+
def make_taxonomy_service_request(category_id, keyword)
|
147
|
+
params = {'categoryId' => category_id,
|
148
|
+
'keyword' => keyword.strip}
|
149
|
+
make_api_request('taxonomy', params)
|
150
|
+
end
|
151
|
+
|
152
|
+
# make any API request given a hash of querystring parameter/values. Generic parameters will be supplied.
|
153
|
+
def make_api_request(service, service_params)
|
154
|
+
params = {'apiKey' => SHOPZILLA_API_KEY,
|
155
|
+
'publisherId' => 3973,
|
156
|
+
'placementId' => 1 # This is a value we can pass to
|
157
|
+
}
|
158
|
+
params = params.merge(service_params) # merge in the user params
|
159
|
+
|
160
|
+
# sort 'em
|
161
|
+
params = params.sort
|
162
|
+
|
163
|
+
# build the querystring
|
164
|
+
query_string = params.collect do |x|
|
165
|
+
if x[1].class == Array
|
166
|
+
x[1].collect{|y| "#{x[0]}=#{y}" }.join '&'
|
167
|
+
else
|
168
|
+
"#{x[0]}=#{x[1]}"
|
169
|
+
end
|
170
|
+
end.join('&')
|
171
|
+
|
172
|
+
# do we already have a cached version of this API call?
|
173
|
+
key = "shopping-api-#{Digest::MD5.hexdigest(query_string)}-v3"
|
174
|
+
result = CACHE.get(key)
|
175
|
+
if !result # nope.. gotta get a new one.
|
176
|
+
url = "http://catalog.bizrate.com/services/catalog/v1/us/#{service}?#{query_string}"
|
177
|
+
#puts "shopzilla.com API request URL: #{url}"
|
178
|
+
begin
|
179
|
+
result = timeout(api_call_timeout || @@default_api_call_timeout) do
|
180
|
+
open(url)
|
181
|
+
end
|
182
|
+
result = result.read if result
|
183
|
+
|
184
|
+
CACHE.set(key, result, Source.shopzilla_source.offer_ttl_seconds)
|
185
|
+
rescue Timeout::Error
|
186
|
+
@logger.warn "Shopzilla API call timed out: #{url}"
|
187
|
+
result = nil
|
188
|
+
rescue Exception => ex
|
189
|
+
@logger.warn "Shopzilla API call failed (#{ex.message}): #{url}"
|
190
|
+
result = nil
|
191
|
+
end
|
192
|
+
end
|
193
|
+
if result
|
194
|
+
Hpricot.XML(result)
|
195
|
+
else
|
196
|
+
nil
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
def verify_logo_url(logo_url)
|
201
|
+
begin
|
202
|
+
open(logo_url, 'rb').close
|
203
|
+
return true
|
204
|
+
rescue Exception => ex
|
205
|
+
puts "Not using bad merchant logo URL #{logo_url}: #{ex.message}"
|
206
|
+
return false
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
def safe_inner_text(element)
|
211
|
+
element.nil? ? nil : element.inner_text
|
212
|
+
end
|
213
|
+
|
214
|
+
def safe_unescape_html(text)
|
215
|
+
text.nil? ? nil : CGI::unescapeHTML(text)
|
216
|
+
end
|
217
|
+
|
218
|
+
def is_likely_new_condition?(condition_text)
|
219
|
+
condition_text.nil? || condition_text.empty? || condition_text.upcase == 'NEW' || condition_text.upcase == 'OEM'
|
220
|
+
end
|
221
|
+
|
222
|
+
def is_likely_in_stock?(stock_text)
|
223
|
+
stock_text.nil? || stock_text.empty? || stock_text.upcase != 'OUT'
|
224
|
+
end
|
225
|
+
|
226
|
+
def get_price_from_node(element)
|
227
|
+
price = element.attributes['integral']
|
228
|
+
(price.nil? || price.empty?) ? nil : (price.to_i / 100.0).to_d
|
229
|
+
end
|
230
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'sources/source'
|
2
|
+
require 'sources/amazon_source'
|
3
|
+
require 'sources/buy_source'
|
4
|
+
require 'sources/ebay_source'
|
5
|
+
require 'sources/epinions_source'
|
6
|
+
require 'sources/google_source'
|
7
|
+
require 'sources/price_grabber_source'
|
8
|
+
require 'sources/reseller_ratings_source'
|
9
|
+
require 'sources/shopping_source'
|
10
|
+
require 'sources/shopzilla_source'
|
11
|
+
|
12
|
+
module BazaarSources
|
13
|
+
VERSION = '0.1.8'
|
14
|
+
end
|
15
|
+
|
16
|
+
module URI
|
17
|
+
CHARACTERS_DISLIKED_BY_PARSE = '^<>`| '
|
18
|
+
def self.safe_parse(url)
|
19
|
+
escaped = URI.escape(url, CHARACTERS_DISLIKED_BY_PARSE)
|
20
|
+
uri = URI.parse(escaped)
|
21
|
+
uri
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
module Hpricot
|
26
|
+
module Traverse
|
27
|
+
def inner_text
|
28
|
+
if respond_to?(:children) and children
|
29
|
+
children.map { |x| x.inner_text.force_encoding('UTF-8') }.join
|
30
|
+
else
|
31
|
+
""
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
require 'api_helpers/amazon'
|
2
|
+
|
3
|
+
class AmazonSource < Source
|
4
|
+
AMAZON_MERCHANT_PERMALINK = 'amazon'
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
super(:name => 'Amazon',
|
8
|
+
:homepage => 'http://www.amazon.com/',
|
9
|
+
:cpc => 12,
|
10
|
+
:offer_enabled => true,
|
11
|
+
:offer_ttl_seconds => 3600,
|
12
|
+
:use_for_merchant_ratings => true,
|
13
|
+
:offer_affiliate => true,
|
14
|
+
:supports_lifetime_ratings => false,
|
15
|
+
:batch_fetch_delay => 2,
|
16
|
+
:product_code_regexp => /^[a-zA-Z0-9]{10}$/,
|
17
|
+
:product_code_examples => ['B000HEC7BO', 'B002YP45EQ'],
|
18
|
+
:product_page_link_erb => "http://www.amazon.com/gp/product/<%= product_code %>")
|
19
|
+
end
|
20
|
+
|
21
|
+
def api
|
22
|
+
@api ||= Amazon::ProductAdvertising.new
|
23
|
+
end
|
24
|
+
|
25
|
+
def url_for_merchant_source_page(merchant_source_code)
|
26
|
+
api.at_a_glance_url(merchant_source_code)
|
27
|
+
end
|
28
|
+
|
29
|
+
def fetch_merchant_source(merchant_source_page_url)
|
30
|
+
amazon_merchant_code = merchant_source_page_url
|
31
|
+
if merchant_source_page_url.match /(\?|&)seller=(A.+?)(&.*|$)/
|
32
|
+
amazon_merchant_code = $2
|
33
|
+
end
|
34
|
+
delay_fetch
|
35
|
+
properties = api.seller_lookup(amazon_merchant_code)
|
36
|
+
|
37
|
+
{ :source => self,
|
38
|
+
:code => properties[:seller_id],
|
39
|
+
:name => properties[:merchant_name],
|
40
|
+
:merchant_rating => properties[:average_feedback_rating] * 20.0,
|
41
|
+
:num_merchant_reviews => properties[:total_feedback],
|
42
|
+
:logo_url => properties[:logo_url],
|
43
|
+
:homepage => properties[:homepage] }
|
44
|
+
end
|
45
|
+
|
46
|
+
def fetch_best_offer(product_code, min_num_offers_to_qualify=nil)
|
47
|
+
delay_fetch
|
48
|
+
offers = fetch_offers(product_code)
|
49
|
+
if !min_num_offers_to_qualify.nil? && offers.length < min_num_offers_to_qualify
|
50
|
+
return nil
|
51
|
+
end
|
52
|
+
offers.inject(nil) do |best_offer, offer|
|
53
|
+
unless offer.price.nil? || offer.shipping.nil?
|
54
|
+
if best_offer.nil? || (offer.price + offer.shipping) < (best_offer.price + best_offer.shipping)
|
55
|
+
best_offer = offer
|
56
|
+
end
|
57
|
+
end
|
58
|
+
best_offer
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def fetch_street_price(product_code)
|
63
|
+
best_offer = fetch_best_offer(product_code, 3)
|
64
|
+
best_offer.nil? ? nil : best_offer.total_price
|
65
|
+
end
|
66
|
+
|
67
|
+
def self.nullify_offer_url(offer_url)
|
68
|
+
offer_url.gsub(/#{AMAZON_ASSOCIATE_TAG}/, AMAZON_ASSOCIATE_TAG_ALT)
|
69
|
+
end
|
70
|
+
|
71
|
+
def nullify_offer_url(offer_url)
|
72
|
+
AmazonSource.nullify_offer_url(offer_url)
|
73
|
+
end
|
74
|
+
|
75
|
+
def offer_affiliate_for_merchant?(merchant)
|
76
|
+
!merchant.nil? && merchant.permalink == AMAZON_MERCHANT_PERMALINK
|
77
|
+
end
|
78
|
+
|
79
|
+
def affiliate_wrap_deal_url(deal_url, nullify=false)
|
80
|
+
nullify ? nullify_offer_url(deal_url) : deal_url
|
81
|
+
end
|
82
|
+
|
83
|
+
def fetch_offers(product_code)
|
84
|
+
begin
|
85
|
+
api.find_offers_by_asin(product_code).values
|
86
|
+
rescue Amazon::AsinNotFoundError => ex
|
87
|
+
raise Source::ProductNotFoundError.new(ex.message << " w/ #{product_code}", keyname, product_code)
|
88
|
+
rescue Amazon::AsinFatalError => ex
|
89
|
+
raise Source::ProductFatalError.new(ex.message << " w/ #{product_code}", keyname, product_code)
|
90
|
+
rescue => ex
|
91
|
+
raise Source::GeneralError.new(ex.message << " w/ #{product_code}", keyname)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
class BuySource < Source
|
2
|
+
BUY_MERCHANT_PERMALINK = 'buy-com'
|
3
|
+
BUY_AFFILIATE_PID = '3332520'
|
4
|
+
BUY_AFFILIATE_URL_PREFIX = "http://affiliate.buy.com/gateway.aspx?adid=17662&pid=#{BUY_AFFILIATE_PID}&aid=10391416&sURL="
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
super(:name => 'Buy.com',
|
8
|
+
:homepage => 'http://www.buy.com/',
|
9
|
+
:cpc => 7,
|
10
|
+
:offer_enabled => false,
|
11
|
+
:offer_ttl_seconds => 86400,
|
12
|
+
:use_for_merchant_ratings => false,
|
13
|
+
:offer_affiliate => true,
|
14
|
+
:supports_lifetime_ratings => false,
|
15
|
+
:batch_fetch_delay => 2,
|
16
|
+
:product_code_regexp => /^\d{9}$/,
|
17
|
+
:product_code_examples => ['208464207', '211986388'],
|
18
|
+
:product_page_link_erb => "http://www.buy.com/retail/product.asp?sku=<%= product_code %>")
|
19
|
+
end
|
20
|
+
|
21
|
+
def nullify_offer_url(offer_url)
|
22
|
+
offer_url.gsub(/#{BUY_AFFILIATE_PID}/, '')
|
23
|
+
end
|
24
|
+
|
25
|
+
def offer_affiliate_for_merchant?(merchant)
|
26
|
+
!merchant.nil? && merchant.permalink == BUY_MERCHANT_PERMALINK
|
27
|
+
end
|
28
|
+
|
29
|
+
def affiliate_wrap_deal_url(deal_url, nullify=false)
|
30
|
+
offer_url = BUY_AFFILIATE_URL_PREFIX
|
31
|
+
offer_url += CGI::escape(deal_url)
|
32
|
+
nullify ? nullify_offer_url(offer_url) : offer_url
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
class EbaySource < Source
|
2
|
+
EBAY_MERCHANT_PERMALINK = 'ebay'
|
3
|
+
EBAY_AFFILIATE_PID = '711-53200-19255-0'
|
4
|
+
EBAY_DEFAULT_CAMPAIGN_ID = '5336205246'
|
5
|
+
EBAY_ADMIN_CAMPAIGN_ID = '5336210401'
|
6
|
+
EBAY_AFFILIATE_URL_PREFIX = "http://rover.ebay.com/rover/1/#{EBAY_AFFILIATE_PID}/1?type=4&campid=#{EBAY_DEFAULT_CAMPAIGN_ID}&toolid=10001&customid=&mpre="
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
super(:name => 'eBay.com',
|
10
|
+
:homepage => 'http://www.ebay.com/',
|
11
|
+
:cpc => 10,
|
12
|
+
:offer_enabled => false,
|
13
|
+
:offer_ttl_seconds => 86400,
|
14
|
+
:use_for_merchant_ratings => false,
|
15
|
+
:offer_affiliate => true,
|
16
|
+
:supports_lifetime_ratings => false,
|
17
|
+
:batch_fetch_delay => 2,
|
18
|
+
:product_code_regexp => nil,
|
19
|
+
:product_code_examples => [])
|
20
|
+
end
|
21
|
+
|
22
|
+
def nullify_offer_url(offer_url)
|
23
|
+
offer_url.gsub(/#{EBAY_DEFAULT_CAMPAIGN_ID}/, EBAY_ADMIN_CAMPAIGN_ID)
|
24
|
+
end
|
25
|
+
|
26
|
+
def offer_affiliate_for_merchant?(merchant)
|
27
|
+
!merchant.nil? && merchant.permalink == EBAY_MERCHANT_PERMALINK
|
28
|
+
end
|
29
|
+
|
30
|
+
def affiliate_wrap_deal_url(deal_url, nullify=false)
|
31
|
+
offer_url = EBAY_AFFILIATE_URL_PREFIX
|
32
|
+
offer_url += CGI::escape(deal_url)
|
33
|
+
nullify ? nullify_offer_url(offer_url) : offer_url
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
require 'ostruct'
|
2
|
+
|
3
|
+
class EpinionsSource < Source
|
4
|
+
def initialize
|
5
|
+
super(:name => 'Epinions',
|
6
|
+
:homepage => 'http://www.epinions.com/',
|
7
|
+
:cpc => 0,
|
8
|
+
:offer_enabled => false,
|
9
|
+
:offer_ttl_seconds => 0,
|
10
|
+
:use_for_merchant_ratings => true,
|
11
|
+
:offer_affiliate => false,
|
12
|
+
:supports_lifetime_ratings => false,
|
13
|
+
:batch_fetch_delay => 1,
|
14
|
+
:product_code_regexp => /^\d{4,10}$/,
|
15
|
+
:product_code_examples => ['44393573', '37469715'])
|
16
|
+
end
|
17
|
+
|
18
|
+
def url_for_merchant_source_page(merchant_source_code)
|
19
|
+
"http://www.epinions.com/#{merchant_source_code}"
|
20
|
+
end
|
21
|
+
|
22
|
+
def code_from_merchant_source_page_url(merchant_source_page_url)
|
23
|
+
merchant_source_page_url.match(/epinions\.com\/([^\/\?#]*)/)[1]
|
24
|
+
end
|
25
|
+
|
26
|
+
def fetch_merchant_source(merchant_source_page_url)
|
27
|
+
merchant_source_page_url.gsub!(/\/display_~.*$/, '')
|
28
|
+
delay_fetch
|
29
|
+
doc = Hpricot(open(merchant_source_page_url))
|
30
|
+
|
31
|
+
merchant_source = OpenStruct.new
|
32
|
+
merchant_source.source = self
|
33
|
+
|
34
|
+
# merchant name
|
35
|
+
element = doc.at('h1[@class = "title"]')
|
36
|
+
unless element.nil?
|
37
|
+
name = element.inner_text.strip
|
38
|
+
merchant_source.name = name
|
39
|
+
end
|
40
|
+
|
41
|
+
# merchant logo
|
42
|
+
element = doc.at('img[@name = "product_image"]')
|
43
|
+
unless element.nil?
|
44
|
+
logo_url = element.attributes['src']
|
45
|
+
logo_url.gsub!(/-resized\d+/, '')
|
46
|
+
merchant_source.logo_url = logo_url
|
47
|
+
end
|
48
|
+
|
49
|
+
# merchant code
|
50
|
+
merchant_source.code = code_from_merchant_source_page_url(merchant_source_page_url)
|
51
|
+
|
52
|
+
# merchant rating
|
53
|
+
element = doc.at('span[text() *= "Overall store rating:"]/../img')
|
54
|
+
element = doc.at('span[text() *= "Overall service rating:"]/../img') if element.nil?
|
55
|
+
unless element.nil?
|
56
|
+
merchant_rating = element.attributes['alt'].match(/Store Rating: ((\d|,)*\.?\d)/)[1]
|
57
|
+
merchant_source.merchant_rating = merchant_rating.to_f * 20.0 unless merchant_rating.nil?
|
58
|
+
end
|
59
|
+
|
60
|
+
# Num Merchant Reviews
|
61
|
+
element = doc.at('span[@class = "sgr"]')
|
62
|
+
unless element.nil?
|
63
|
+
num_merchant_reviews = element.inner_text.match(/Reviewed by (\d+) customer/)[1]
|
64
|
+
merchant_source.num_merchant_reviews = num_merchant_reviews.delete(',').to_i unless num_merchant_reviews.nil? || num_merchant_reviews.empty?
|
65
|
+
end
|
66
|
+
|
67
|
+
# Homepage
|
68
|
+
element = doc.at('span[text() = "Web Site"]../../td[2]/span/a')
|
69
|
+
unless element.nil?
|
70
|
+
homepage = element.inner_text.strip.downcase
|
71
|
+
merchant_source.homepage = homepage
|
72
|
+
end
|
73
|
+
|
74
|
+
merchant_source
|
75
|
+
end
|
76
|
+
|
77
|
+
def format_rating(merchant_source)
|
78
|
+
'%01.1f/5.0' % (merchant_source.get_merchant_rating.to_f / 20.0)
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'uri'
|
2
|
+
require 'ostruct'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'hpricot'
|
5
|
+
|
6
|
+
class GoogleSource < Source
|
7
|
+
def initialize
|
8
|
+
super(:name => 'Google Shopping',
|
9
|
+
:homepage => 'http://www.google.com/products',
|
10
|
+
:cpc => 0,
|
11
|
+
:offer_enabled => false,
|
12
|
+
:offer_ttl_seconds => 0,
|
13
|
+
:use_for_merchant_ratings => true,
|
14
|
+
:offer_affiliate => false,
|
15
|
+
:supports_lifetime_ratings => false,
|
16
|
+
:batch_fetch_delay => 3,
|
17
|
+
:product_code_regexp => nil,
|
18
|
+
:product_code_examples => [])
|
19
|
+
end
|
20
|
+
|
21
|
+
def url_for_merchant_source_page(merchant_source_code)
|
22
|
+
"http://www.google.com/products/reviews?sort=1&cid=#{merchant_source_code}"
|
23
|
+
end
|
24
|
+
|
25
|
+
def code_from_merchant_source_page_url(merchant_source_page_url)
|
26
|
+
merchant_source_page_url.match /google.com.*cid=(.+?)($|&.*)/
|
27
|
+
$1
|
28
|
+
end
|
29
|
+
|
30
|
+
def fetch_merchant_source(merchant_source_page_url)
|
31
|
+
delay_fetch
|
32
|
+
doc = Hpricot(open(merchant_source_page_url))
|
33
|
+
|
34
|
+
merchant_source = OpenStruct.new
|
35
|
+
merchant_source.source = self
|
36
|
+
|
37
|
+
# merchant code
|
38
|
+
code = code_from_merchant_source_page_url(merchant_source_page_url)
|
39
|
+
merchant_source.code = code
|
40
|
+
|
41
|
+
# merchant name
|
42
|
+
element = doc.at('//table//tr/td//font[@size = "+1"]')
|
43
|
+
unless element.nil?
|
44
|
+
name = element.inner_text.strip
|
45
|
+
merchant_source.name = name
|
46
|
+
end
|
47
|
+
|
48
|
+
rating_box_element = doc.at('//table//tr//td//b[text() = "Average rating"]/..')
|
49
|
+
|
50
|
+
# merchant rating
|
51
|
+
element = rating_box_element.at('font[@size = "+3"]')
|
52
|
+
unless element.nil?
|
53
|
+
merchant_rating = element.inner_text.match(/\s*(.*?)\s*\/.*?/)[1]
|
54
|
+
merchant_source.merchant_rating = merchant_rating.to_f * 20.0 unless merchant_rating.nil?
|
55
|
+
end
|
56
|
+
|
57
|
+
# Num Merchant Reviews
|
58
|
+
element = rating_box_element.at('font[@size = "-1"]')
|
59
|
+
unless element.nil?
|
60
|
+
num_merchant_reviews = element.inner_text.match(/((\d|,)+)/)[1]
|
61
|
+
merchant_source.num_merchant_reviews = num_merchant_reviews.delete(',').to_i unless num_merchant_reviews.nil? || num_merchant_reviews.empty?
|
62
|
+
end
|
63
|
+
|
64
|
+
merchant_source
|
65
|
+
end
|
66
|
+
|
67
|
+
def format_rating(merchant_source)
|
68
|
+
'%01.1f/5.0' % (merchant_source.get_merchant_rating.to_f / 20.0)
|
69
|
+
end
|
70
|
+
|
71
|
+
def self.grab_new_mappings(google_merchant_list_url)
|
72
|
+
#google_merchant_list_url = "http://www.google.com/products/catalog?q=Projectors&btnG=Search+Products&show=dd&cid=8852330310663509594&sa=N&start=0#ps-sellers"
|
73
|
+
body = open(google_merchant_list_url)
|
74
|
+
doc = Hpricot.XML(body)
|
75
|
+
|
76
|
+
google_sellers = []
|
77
|
+
sellers_table = (doc / '#ps-sellers-table')
|
78
|
+
sellers_table.search('td.ps-seller-col').each_with_index do |sellers_column, i|
|
79
|
+
next if i == 0
|
80
|
+
# puts "Seller's column: #{sellers_column}"
|
81
|
+
link = sellers_column.at('a')
|
82
|
+
unless link.nil?
|
83
|
+
name = link.inner_text.strip
|
84
|
+
puts "Seller: #{name}"
|
85
|
+
if link.attributes['href'].match /\?q=http:\/\/(.+)\//
|
86
|
+
domain = Merchant.parse_url_for_domain($1)
|
87
|
+
puts "Domain: #{domain}"
|
88
|
+
end
|
89
|
+
end
|
90
|
+
rating_link = sellers_column.next_sibling.at('a')
|
91
|
+
code = rating_link.attributes['href'].match(/.*&cid=(.+)&.*/)[1] unless rating_link.nil?
|
92
|
+
puts "CID: #{code}"
|
93
|
+
puts '-----------------------------------------------------'
|
94
|
+
google_sellers << {:name => name, :code => code, :domain => domain} unless domain.nil? || domain.empty? || code.nil? || code.empty?
|
95
|
+
end
|
96
|
+
|
97
|
+
new_mappings_count = 0
|
98
|
+
google_source = GoogleSource.first
|
99
|
+
google_sellers.each do |seller|
|
100
|
+
merchants = Merchant.find(:all, :conditions => {:domain => seller[:domain]})
|
101
|
+
if merchants.length > 1
|
102
|
+
puts "More than one merchant found for domain: #{seller[:domain]}"
|
103
|
+
elsif merchants.length == 1
|
104
|
+
merchant = merchants.first
|
105
|
+
if merchant.merchant_source(google_source).nil?
|
106
|
+
url = google_source.url_for_merchant_source_page(seller[:code])
|
107
|
+
new_merchant_source = google_source.fetch_merchant_source(url)
|
108
|
+
merchant.merchant_sources << new_merchant_source
|
109
|
+
merchant.update_from_sources
|
110
|
+
merchant.save!
|
111
|
+
new_mappings_count += 1
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
puts "Google sellers found: #{google_sellers.length}"
|
116
|
+
puts "New mappings added: #{new_mappings_count}"
|
117
|
+
new_mappings_count
|
118
|
+
end
|
119
|
+
end
|