bazaar_sources 0.2.1.1.1.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,230 @@
1
+ require 'hpricot'
2
+ require 'open-uri'
3
+ require 'timeout'
4
+
5
+ class ShopzillaAPI
6
+ @@default_api_call_timeout = 25
7
+ def self.default_api_call_timeout=(obj)
8
+ @@default_api_call_timeout = obj
9
+ end
10
+ attr_accessor :api_call_timeout
11
+
12
+ def initialize
13
+ @logger = Logger.new(STDERR)
14
+ end
15
+
16
+ # Find all offers for a product given the (shopzilla) product_id
17
+ def find_offers_by_product_id(product_id)
18
+ result = make_offer_service_request(product_id)
19
+ offers = {}
20
+ unless result.nil?
21
+ merchant_offers = result / 'Products/Product/Offers/Offer'
22
+ merchant_offers.each do |offer|
23
+ merchant_id = offer.attributes['merchantId']
24
+ merchant_name = safe_inner_text(offer.at('merchantName'))
25
+ merchant_rating_elem = offer.at('MerchantRating')
26
+ merchant_rating = normalize_merchant_rating(merchant_rating_elem.attributes['value'].to_f) unless merchant_rating_elem.nil?
27
+ price = get_price_from_node(offer.at('price'))
28
+ shipping = get_price_from_node(offer.at('shipAmount'))
29
+ url = safe_inner_text(offer.at('url'))
30
+ condition = safe_inner_text(offer.at('condition'))
31
+ stock = safe_inner_text(offer.at('stock'))
32
+ if is_likely_new_condition?(condition) && is_likely_in_stock?(stock)
33
+ offers[merchant_id] = { :merchant_code => merchant_id,
34
+ :merchant_name => safe_unescape_html(merchant_name),
35
+ :merchant_logo_url => "http://img.bizrate.com/merchant/#{merchant_id}.gif",
36
+ :cpc => nil,
37
+ :price => price,
38
+ :shipping => shipping,
39
+ :offer_url => safe_unescape_html(url),
40
+ :offer_tier => 1,
41
+ :merchant_rating => merchant_rating,
42
+ :num_merchant_reviews => nil }
43
+ end
44
+ end
45
+ end
46
+ offers.values
47
+ end
48
+
49
+ def normalize_merchant_rating(merchant_rating)
50
+ merchant_rating.nil? ? nil : (merchant_rating * 10.0).round
51
+ end
52
+
53
+ # This method makes a separate API call to get merchant detail info. It returns a hash that is aligned with
54
+ # the merchant_source model
55
+ def merchant_source_detail(merchant_id)
56
+ result = make_merchant_service_request(merchant_id)
57
+ return nil if result.nil?
58
+ merchant_element = result / ("Merchants/Merchant[@id=#{merchant_id}]")
59
+ merchant_source = {}
60
+ merchant_source[:source] = 'shopzilla'
61
+ merchant_source[:code] = merchant_id.to_s
62
+ merchant_source[:name] = safe_inner_text(merchant_element.at('name'))
63
+ logo_url = logo_url(merchant_id)
64
+ if verify_logo_url(logo_url)
65
+ merchant_source[:logo_url] = logo_url
66
+ end
67
+
68
+ # rating will not exist if unrated (although "unrated" will -- doh!)
69
+ rated = false
70
+ begin
71
+ rating_elem = merchant_element.at('/Rating/Overall')
72
+ unless rating_elem.nil?
73
+ rated = true
74
+ merchant_source[:merchant_rating] = normalize_merchant_rating(rating_elem.attributes['value'].to_f)
75
+ end
76
+ rescue
77
+ merchant_source[:merchant_rating] = 0
78
+ end
79
+ # There are about 38 ways this could fail, so rescue any baddies
80
+ begin
81
+ # URL provided is an entity escaped url to shopzilla/bizrate for example
82
+ # http://www.bizrate.com/rd?t=http%3A%2F%2Fwww.pcnation.com%2Fasp%2Findex.asp%3Faffid%3D308&mid=31427&cat_id=&prod_id=350513557&oid=&pos=1&b_id=18&rf=af1&af_id=3973&af_creative_id=6&af_assettype_id=10&af_placement_id=1
83
+ # http://www.bizrate.com/rd?t=http%3A%2F%2Fad.doubleclick.net%2Fclk%3B23623113%3B12119329%3Bs%3Fhttp%3A%2F%2Fwww.staples.com%2Fwebapp%2Fwcs%2Fstores%2Fservlet%2Fhome%3FstoreId%3D10001%26langId%3D-1%26cm_mmc%3Donline_bizrate-_-search-_-staples_brand-_-staples.com&mid=370&cat_id=&prod_id=&oid=&pos=1&b_id=18&rf=af1&af_id=3973&af_creative_id=6&af_assettype_id=10&af_placement_id=1
84
+ # http://www.bizrate.com/rd?t=http%3A%2F%2Fwww.tigerdirect.com%2Findex.asp%3FSRCCODE%3DBIZRATE&mid=23939&cat_id=&prod_id=&oid=&pos=1&b_id=18&rf=af1&af_id=3973&af_creative_id=6&af_assettype_id=10&af_placement_id=1
85
+ # http://www.bizrate.com/rd?t=http%3A%2F%2Fad.doubleclick.net%2Fclk%3BNEW_1%3B6928611%3Ba%3Fhttp%3A%2F%2Fwww.officedepot.com&mid=814&cat_id=&prod_id=&oid=&pos=1&b_id=18&rf=af1&af_id=3973&af_creative_id=6&af_assettype_id=10&af_placement_id=1
86
+ redir_url = CGI::unescape(merchant_element.at('url').inner_text)
87
+ # the query string will contain a value at the "t" parameter
88
+ t_param_value = redir_url.match(/(\?|&)t=(.+)/)[2]
89
+ if t_param_value.index('doubleclick').nil?
90
+ homepage = t_param_value.match(/https?:\/\/(.+?)(\/|&|\?|$)/)[1]
91
+ else
92
+ homepage = t_param_value.match(/.+https?:\/\/(.+?)(\/|&|\?|$)/)[1]
93
+ end
94
+ merchant_source[:homepage] = "http://#{homepage}/"
95
+ rescue
96
+ merchant_source[:homepage] = nil
97
+ end
98
+ # now, we just need the number of reviews
99
+ if rated
100
+ num_merchant_reviews = safe_inner_text(merchant_element.at('Details/surveyCount'))
101
+ num_merchant_reviews = (num_merchant_reviews.nil? || num_merchant_reviews.empty?) ? 0 : num_merchant_reviews.to_i
102
+ merchant_source[:num_merchant_reviews] = num_merchant_reviews
103
+ end
104
+ merchant_source
105
+ end
106
+
107
+ def logo_url(merchant_id)
108
+ "http://img.bizrate.com/merchant/#{merchant_id}.gif"
109
+ end
110
+
111
+ def verified_logo_url(merchant_id)
112
+ logo_url = logo_url(merchant_id)
113
+ verify_logo_url(logo_url) ? logo_url : nil
114
+ end
115
+
116
+ # -----------------------------------------------------------------------------------------------
117
+ private
118
+ # -----------------------------------------------------------------------------------------------
119
+
120
+ def make_offer_service_request(product_id)
121
+ params = {'productId' => product_id.to_s.strip,
122
+ 'offersOnly' => 'true',
123
+ 'biddedOnly' => 'true',
124
+ 'resultsOffers' => '100',
125
+ 'zipCode' => '64141'}
126
+ make_api_request('product', params)
127
+ end
128
+
129
+ def make_product_service_request(product_id)
130
+ params = {'productId' => product_id.to_s.strip}
131
+ make_api_request('product', params)
132
+ end
133
+
134
+ def make_merchant_service_request(merchant_id)
135
+ params = {'merchantId' => merchant_id.to_s.strip,
136
+ 'expandDetails' => 'true'}
137
+ make_api_request('merchant', params)
138
+ end
139
+
140
+ def make_brand_service_request(category_id, keyword)
141
+ params = {'categoryId' => category_id,
142
+ 'keyword' => keyword.strip}
143
+ make_api_request('brands', params)
144
+ end
145
+
146
+ def make_taxonomy_service_request(category_id, keyword)
147
+ params = {'categoryId' => category_id,
148
+ 'keyword' => keyword.strip}
149
+ make_api_request('taxonomy', params)
150
+ end
151
+
152
+ # make any API request given a hash of querystring parameter/values. Generic parameters will be supplied.
153
+ def make_api_request(service, service_params)
154
+ params = {'apiKey' => SHOPZILLA_API_KEY,
155
+ 'publisherId' => 3973,
156
+ 'placementId' => 1 # This is a value we can pass to
157
+ }
158
+ params = params.merge(service_params) # merge in the user params
159
+
160
+ # sort 'em
161
+ params = params.sort
162
+
163
+ # build the querystring
164
+ query_string = params.collect do |x|
165
+ if x[1].class == Array
166
+ x[1].collect{|y| "#{x[0]}=#{y}" }.join '&'
167
+ else
168
+ "#{x[0]}=#{x[1]}"
169
+ end
170
+ end.join('&')
171
+
172
+ # do we already have a cached version of this API call?
173
+ key = "shopping-api-#{Digest::MD5.hexdigest(query_string)}-v3"
174
+ result = CACHE.get(key)
175
+ if !result # nope.. gotta get a new one.
176
+ url = "http://catalog.bizrate.com/services/catalog/v1/us/#{service}?#{query_string}"
177
+ #puts "shopzilla.com API request URL: #{url}"
178
+ begin
179
+ result = timeout(api_call_timeout || @@default_api_call_timeout) do
180
+ open(url)
181
+ end
182
+ result = result.read if result
183
+
184
+ CACHE.set(key, result, Source.shopzilla_source.offer_ttl_seconds)
185
+ rescue Timeout::Error
186
+ @logger.warn "Shopzilla API call timed out: #{url}"
187
+ result = nil
188
+ rescue Exception => ex
189
+ @logger.warn "Shopzilla API call failed (#{ex.message}): #{url}"
190
+ result = nil
191
+ end
192
+ end
193
+ if result
194
+ Hpricot.XML(result)
195
+ else
196
+ nil
197
+ end
198
+ end
199
+
200
+ def verify_logo_url(logo_url)
201
+ begin
202
+ open(logo_url, 'rb').close
203
+ return true
204
+ rescue Exception => ex
205
+ puts "Not using bad merchant logo URL #{logo_url}: #{ex.message}"
206
+ return false
207
+ end
208
+ end
209
+
210
+ def safe_inner_text(element)
211
+ element.nil? ? nil : element.inner_text
212
+ end
213
+
214
+ def safe_unescape_html(text)
215
+ text.nil? ? nil : CGI::unescapeHTML(text)
216
+ end
217
+
218
+ def is_likely_new_condition?(condition_text)
219
+ condition_text.nil? || condition_text.empty? || condition_text.upcase == 'NEW' || condition_text.upcase == 'OEM'
220
+ end
221
+
222
+ def is_likely_in_stock?(stock_text)
223
+ stock_text.nil? || stock_text.empty? || stock_text.upcase != 'OUT'
224
+ end
225
+
226
+ def get_price_from_node(element)
227
+ price = element.attributes['integral']
228
+ (price.nil? || price.empty?) ? nil : (price.to_i / 100.0).to_d
229
+ end
230
+ end
@@ -0,0 +1,35 @@
1
+ require 'sources/source'
2
+ require 'sources/amazon_source'
3
+ require 'sources/buy_source'
4
+ require 'sources/ebay_source'
5
+ require 'sources/epinions_source'
6
+ require 'sources/google_source'
7
+ require 'sources/price_grabber_source'
8
+ require 'sources/reseller_ratings_source'
9
+ require 'sources/shopping_source'
10
+ require 'sources/shopzilla_source'
11
+
12
+ module BazaarSources
13
+ VERSION = '0.1.8'
14
+ end
15
+
16
+ module URI
17
+ CHARACTERS_DISLIKED_BY_PARSE = '^<>`| '
18
+ def self.safe_parse(url)
19
+ escaped = URI.escape(url, CHARACTERS_DISLIKED_BY_PARSE)
20
+ uri = URI.parse(escaped)
21
+ uri
22
+ end
23
+ end
24
+
25
+ module Hpricot
26
+ module Traverse
27
+ def inner_text
28
+ if respond_to?(:children) and children
29
+ children.map { |x| x.inner_text.force_encoding('UTF-8') }.join
30
+ else
31
+ ""
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,94 @@
1
+ require 'api_helpers/amazon'
2
+
3
+ class AmazonSource < Source
4
+ AMAZON_MERCHANT_PERMALINK = 'amazon'
5
+
6
+ def initialize
7
+ super(:name => 'Amazon',
8
+ :homepage => 'http://www.amazon.com/',
9
+ :cpc => 12,
10
+ :offer_enabled => true,
11
+ :offer_ttl_seconds => 3600,
12
+ :use_for_merchant_ratings => true,
13
+ :offer_affiliate => true,
14
+ :supports_lifetime_ratings => false,
15
+ :batch_fetch_delay => 2,
16
+ :product_code_regexp => /^[a-zA-Z0-9]{10}$/,
17
+ :product_code_examples => ['B000HEC7BO', 'B002YP45EQ'],
18
+ :product_page_link_erb => "http://www.amazon.com/gp/product/<%= product_code %>")
19
+ end
20
+
21
+ def api
22
+ @api ||= Amazon::ProductAdvertising.new
23
+ end
24
+
25
+ def url_for_merchant_source_page(merchant_source_code)
26
+ api.at_a_glance_url(merchant_source_code)
27
+ end
28
+
29
+ def fetch_merchant_source(merchant_source_page_url)
30
+ amazon_merchant_code = merchant_source_page_url
31
+ if merchant_source_page_url.match /(\?|&)seller=(A.+?)(&.*|$)/
32
+ amazon_merchant_code = $2
33
+ end
34
+ delay_fetch
35
+ properties = api.seller_lookup(amazon_merchant_code)
36
+
37
+ { :source => self,
38
+ :code => properties[:seller_id],
39
+ :name => properties[:merchant_name],
40
+ :merchant_rating => properties[:average_feedback_rating] * 20.0,
41
+ :num_merchant_reviews => properties[:total_feedback],
42
+ :logo_url => properties[:logo_url],
43
+ :homepage => properties[:homepage] }
44
+ end
45
+
46
+ def fetch_best_offer(product_code, min_num_offers_to_qualify=nil)
47
+ delay_fetch
48
+ offers = fetch_offers(product_code)
49
+ if !min_num_offers_to_qualify.nil? && offers.length < min_num_offers_to_qualify
50
+ return nil
51
+ end
52
+ offers.inject(nil) do |best_offer, offer|
53
+ unless offer.price.nil? || offer.shipping.nil?
54
+ if best_offer.nil? || (offer.price + offer.shipping) < (best_offer.price + best_offer.shipping)
55
+ best_offer = offer
56
+ end
57
+ end
58
+ best_offer
59
+ end
60
+ end
61
+
62
+ def fetch_street_price(product_code)
63
+ best_offer = fetch_best_offer(product_code, 3)
64
+ best_offer.nil? ? nil : best_offer.total_price
65
+ end
66
+
67
+ def self.nullify_offer_url(offer_url)
68
+ offer_url.gsub(/#{AMAZON_ASSOCIATE_TAG}/, AMAZON_ASSOCIATE_TAG_ALT)
69
+ end
70
+
71
+ def nullify_offer_url(offer_url)
72
+ AmazonSource.nullify_offer_url(offer_url)
73
+ end
74
+
75
+ def offer_affiliate_for_merchant?(merchant)
76
+ !merchant.nil? && merchant.permalink == AMAZON_MERCHANT_PERMALINK
77
+ end
78
+
79
+ def affiliate_wrap_deal_url(deal_url, nullify=false)
80
+ nullify ? nullify_offer_url(deal_url) : deal_url
81
+ end
82
+
83
+ def fetch_offers(product_code)
84
+ begin
85
+ api.find_offers_by_asin(product_code).values
86
+ rescue Amazon::AsinNotFoundError => ex
87
+ raise Source::ProductNotFoundError.new(ex.message << " w/ #{product_code}", keyname, product_code)
88
+ rescue Amazon::AsinFatalError => ex
89
+ raise Source::ProductFatalError.new(ex.message << " w/ #{product_code}", keyname, product_code)
90
+ rescue => ex
91
+ raise Source::GeneralError.new(ex.message << " w/ #{product_code}", keyname)
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,34 @@
1
+ class BuySource < Source
2
+ BUY_MERCHANT_PERMALINK = 'buy-com'
3
+ BUY_AFFILIATE_PID = '3332520'
4
+ BUY_AFFILIATE_URL_PREFIX = "http://affiliate.buy.com/gateway.aspx?adid=17662&pid=#{BUY_AFFILIATE_PID}&aid=10391416&sURL="
5
+
6
+ def initialize
7
+ super(:name => 'Buy.com',
8
+ :homepage => 'http://www.buy.com/',
9
+ :cpc => 7,
10
+ :offer_enabled => false,
11
+ :offer_ttl_seconds => 86400,
12
+ :use_for_merchant_ratings => false,
13
+ :offer_affiliate => true,
14
+ :supports_lifetime_ratings => false,
15
+ :batch_fetch_delay => 2,
16
+ :product_code_regexp => /^\d{9}$/,
17
+ :product_code_examples => ['208464207', '211986388'],
18
+ :product_page_link_erb => "http://www.buy.com/retail/product.asp?sku=<%= product_code %>")
19
+ end
20
+
21
+ def nullify_offer_url(offer_url)
22
+ offer_url.gsub(/#{BUY_AFFILIATE_PID}/, '')
23
+ end
24
+
25
+ def offer_affiliate_for_merchant?(merchant)
26
+ !merchant.nil? && merchant.permalink == BUY_MERCHANT_PERMALINK
27
+ end
28
+
29
+ def affiliate_wrap_deal_url(deal_url, nullify=false)
30
+ offer_url = BUY_AFFILIATE_URL_PREFIX
31
+ offer_url += CGI::escape(deal_url)
32
+ nullify ? nullify_offer_url(offer_url) : offer_url
33
+ end
34
+ end
@@ -0,0 +1,35 @@
1
+ class EbaySource < Source
2
+ EBAY_MERCHANT_PERMALINK = 'ebay'
3
+ EBAY_AFFILIATE_PID = '711-53200-19255-0'
4
+ EBAY_DEFAULT_CAMPAIGN_ID = '5336205246'
5
+ EBAY_ADMIN_CAMPAIGN_ID = '5336210401'
6
+ EBAY_AFFILIATE_URL_PREFIX = "http://rover.ebay.com/rover/1/#{EBAY_AFFILIATE_PID}/1?type=4&campid=#{EBAY_DEFAULT_CAMPAIGN_ID}&toolid=10001&customid=&mpre="
7
+
8
+ def initialize
9
+ super(:name => 'eBay.com',
10
+ :homepage => 'http://www.ebay.com/',
11
+ :cpc => 10,
12
+ :offer_enabled => false,
13
+ :offer_ttl_seconds => 86400,
14
+ :use_for_merchant_ratings => false,
15
+ :offer_affiliate => true,
16
+ :supports_lifetime_ratings => false,
17
+ :batch_fetch_delay => 2,
18
+ :product_code_regexp => nil,
19
+ :product_code_examples => [])
20
+ end
21
+
22
+ def nullify_offer_url(offer_url)
23
+ offer_url.gsub(/#{EBAY_DEFAULT_CAMPAIGN_ID}/, EBAY_ADMIN_CAMPAIGN_ID)
24
+ end
25
+
26
+ def offer_affiliate_for_merchant?(merchant)
27
+ !merchant.nil? && merchant.permalink == EBAY_MERCHANT_PERMALINK
28
+ end
29
+
30
+ def affiliate_wrap_deal_url(deal_url, nullify=false)
31
+ offer_url = EBAY_AFFILIATE_URL_PREFIX
32
+ offer_url += CGI::escape(deal_url)
33
+ nullify ? nullify_offer_url(offer_url) : offer_url
34
+ end
35
+ end
@@ -0,0 +1,80 @@
1
+ require 'ostruct'
2
+
3
+ class EpinionsSource < Source
4
+ def initialize
5
+ super(:name => 'Epinions',
6
+ :homepage => 'http://www.epinions.com/',
7
+ :cpc => 0,
8
+ :offer_enabled => false,
9
+ :offer_ttl_seconds => 0,
10
+ :use_for_merchant_ratings => true,
11
+ :offer_affiliate => false,
12
+ :supports_lifetime_ratings => false,
13
+ :batch_fetch_delay => 1,
14
+ :product_code_regexp => /^\d{4,10}$/,
15
+ :product_code_examples => ['44393573', '37469715'])
16
+ end
17
+
18
+ def url_for_merchant_source_page(merchant_source_code)
19
+ "http://www.epinions.com/#{merchant_source_code}"
20
+ end
21
+
22
+ def code_from_merchant_source_page_url(merchant_source_page_url)
23
+ merchant_source_page_url.match(/epinions\.com\/([^\/\?#]*)/)[1]
24
+ end
25
+
26
+ def fetch_merchant_source(merchant_source_page_url)
27
+ merchant_source_page_url.gsub!(/\/display_~.*$/, '')
28
+ delay_fetch
29
+ doc = Hpricot(open(merchant_source_page_url))
30
+
31
+ merchant_source = OpenStruct.new
32
+ merchant_source.source = self
33
+
34
+ # merchant name
35
+ element = doc.at('h1[@class = "title"]')
36
+ unless element.nil?
37
+ name = element.inner_text.strip
38
+ merchant_source.name = name
39
+ end
40
+
41
+ # merchant logo
42
+ element = doc.at('img[@name = "product_image"]')
43
+ unless element.nil?
44
+ logo_url = element.attributes['src']
45
+ logo_url.gsub!(/-resized\d+/, '')
46
+ merchant_source.logo_url = logo_url
47
+ end
48
+
49
+ # merchant code
50
+ merchant_source.code = code_from_merchant_source_page_url(merchant_source_page_url)
51
+
52
+ # merchant rating
53
+ element = doc.at('span[text() *= "Overall store rating:"]/../img')
54
+ element = doc.at('span[text() *= "Overall service rating:"]/../img') if element.nil?
55
+ unless element.nil?
56
+ merchant_rating = element.attributes['alt'].match(/Store Rating: ((\d|,)*\.?\d)/)[1]
57
+ merchant_source.merchant_rating = merchant_rating.to_f * 20.0 unless merchant_rating.nil?
58
+ end
59
+
60
+ # Num Merchant Reviews
61
+ element = doc.at('span[@class = "sgr"]')
62
+ unless element.nil?
63
+ num_merchant_reviews = element.inner_text.match(/Reviewed by (\d+) customer/)[1]
64
+ merchant_source.num_merchant_reviews = num_merchant_reviews.delete(',').to_i unless num_merchant_reviews.nil? || num_merchant_reviews.empty?
65
+ end
66
+
67
+ # Homepage
68
+ element = doc.at('span[text() = "Web Site"]../../td[2]/span/a')
69
+ unless element.nil?
70
+ homepage = element.inner_text.strip.downcase
71
+ merchant_source.homepage = homepage
72
+ end
73
+
74
+ merchant_source
75
+ end
76
+
77
+ def format_rating(merchant_source)
78
+ '%01.1f/5.0' % (merchant_source.get_merchant_rating.to_f / 20.0)
79
+ end
80
+ end
@@ -0,0 +1,119 @@
1
+ require 'uri'
2
+ require 'ostruct'
3
+ require 'open-uri'
4
+ require 'hpricot'
5
+
6
+ class GoogleSource < Source
7
+ def initialize
8
+ super(:name => 'Google Shopping',
9
+ :homepage => 'http://www.google.com/products',
10
+ :cpc => 0,
11
+ :offer_enabled => false,
12
+ :offer_ttl_seconds => 0,
13
+ :use_for_merchant_ratings => true,
14
+ :offer_affiliate => false,
15
+ :supports_lifetime_ratings => false,
16
+ :batch_fetch_delay => 3,
17
+ :product_code_regexp => nil,
18
+ :product_code_examples => [])
19
+ end
20
+
21
+ def url_for_merchant_source_page(merchant_source_code)
22
+ "http://www.google.com/products/reviews?sort=1&cid=#{merchant_source_code}"
23
+ end
24
+
25
+ def code_from_merchant_source_page_url(merchant_source_page_url)
26
+ merchant_source_page_url.match /google.com.*cid=(.+?)($|&.*)/
27
+ $1
28
+ end
29
+
30
+ def fetch_merchant_source(merchant_source_page_url)
31
+ delay_fetch
32
+ doc = Hpricot(open(merchant_source_page_url))
33
+
34
+ merchant_source = OpenStruct.new
35
+ merchant_source.source = self
36
+
37
+ # merchant code
38
+ code = code_from_merchant_source_page_url(merchant_source_page_url)
39
+ merchant_source.code = code
40
+
41
+ # merchant name
42
+ element = doc.at('//table//tr/td//font[@size = "+1"]')
43
+ unless element.nil?
44
+ name = element.inner_text.strip
45
+ merchant_source.name = name
46
+ end
47
+
48
+ rating_box_element = doc.at('//table//tr//td//b[text() = "Average rating"]/..')
49
+
50
+ # merchant rating
51
+ element = rating_box_element.at('font[@size = "+3"]')
52
+ unless element.nil?
53
+ merchant_rating = element.inner_text.match(/\s*(.*?)\s*\/.*?/)[1]
54
+ merchant_source.merchant_rating = merchant_rating.to_f * 20.0 unless merchant_rating.nil?
55
+ end
56
+
57
+ # Num Merchant Reviews
58
+ element = rating_box_element.at('font[@size = "-1"]')
59
+ unless element.nil?
60
+ num_merchant_reviews = element.inner_text.match(/((\d|,)+)/)[1]
61
+ merchant_source.num_merchant_reviews = num_merchant_reviews.delete(',').to_i unless num_merchant_reviews.nil? || num_merchant_reviews.empty?
62
+ end
63
+
64
+ merchant_source
65
+ end
66
+
67
+ def format_rating(merchant_source)
68
+ '%01.1f/5.0' % (merchant_source.get_merchant_rating.to_f / 20.0)
69
+ end
70
+
71
+ def self.grab_new_mappings(google_merchant_list_url)
72
+ #google_merchant_list_url = "http://www.google.com/products/catalog?q=Projectors&btnG=Search+Products&show=dd&cid=8852330310663509594&sa=N&start=0#ps-sellers"
73
+ body = open(google_merchant_list_url)
74
+ doc = Hpricot.XML(body)
75
+
76
+ google_sellers = []
77
+ sellers_table = (doc / '#ps-sellers-table')
78
+ sellers_table.search('td.ps-seller-col').each_with_index do |sellers_column, i|
79
+ next if i == 0
80
+ # puts "Seller's column: #{sellers_column}"
81
+ link = sellers_column.at('a')
82
+ unless link.nil?
83
+ name = link.inner_text.strip
84
+ puts "Seller: #{name}"
85
+ if link.attributes['href'].match /\?q=http:\/\/(.+)\//
86
+ domain = Merchant.parse_url_for_domain($1)
87
+ puts "Domain: #{domain}"
88
+ end
89
+ end
90
+ rating_link = sellers_column.next_sibling.at('a')
91
+ code = rating_link.attributes['href'].match(/.*&cid=(.+)&.*/)[1] unless rating_link.nil?
92
+ puts "CID: #{code}"
93
+ puts '-----------------------------------------------------'
94
+ google_sellers << {:name => name, :code => code, :domain => domain} unless domain.nil? || domain.empty? || code.nil? || code.empty?
95
+ end
96
+
97
+ new_mappings_count = 0
98
+ google_source = GoogleSource.first
99
+ google_sellers.each do |seller|
100
+ merchants = Merchant.find(:all, :conditions => {:domain => seller[:domain]})
101
+ if merchants.length > 1
102
+ puts "More than one merchant found for domain: #{seller[:domain]}"
103
+ elsif merchants.length == 1
104
+ merchant = merchants.first
105
+ if merchant.merchant_source(google_source).nil?
106
+ url = google_source.url_for_merchant_source_page(seller[:code])
107
+ new_merchant_source = google_source.fetch_merchant_source(url)
108
+ merchant.merchant_sources << new_merchant_source
109
+ merchant.update_from_sources
110
+ merchant.save!
111
+ new_mappings_count += 1
112
+ end
113
+ end
114
+ end
115
+ puts "Google sellers found: #{google_sellers.length}"
116
+ puts "New mappings added: #{new_mappings_count}"
117
+ new_mappings_count
118
+ end
119
+ end