bazaar_sources 0.2.1.1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,230 @@
1
+ require 'hpricot'
2
+ require 'open-uri'
3
+ require 'timeout'
4
+
5
+ class ShopzillaAPI
6
+ @@default_api_call_timeout = 25
7
+ def self.default_api_call_timeout=(obj)
8
+ @@default_api_call_timeout = obj
9
+ end
10
+ attr_accessor :api_call_timeout
11
+
12
+ def initialize
13
+ @logger = Logger.new(STDERR)
14
+ end
15
+
16
+ # Find all offers for a product given the (shopzilla) product_id
17
+ def find_offers_by_product_id(product_id)
18
+ result = make_offer_service_request(product_id)
19
+ offers = {}
20
+ unless result.nil?
21
+ merchant_offers = result / 'Products/Product/Offers/Offer'
22
+ merchant_offers.each do |offer|
23
+ merchant_id = offer.attributes['merchantId']
24
+ merchant_name = safe_inner_text(offer.at('merchantName'))
25
+ merchant_rating_elem = offer.at('MerchantRating')
26
+ merchant_rating = normalize_merchant_rating(merchant_rating_elem.attributes['value'].to_f) unless merchant_rating_elem.nil?
27
+ price = get_price_from_node(offer.at('price'))
28
+ shipping = get_price_from_node(offer.at('shipAmount'))
29
+ url = safe_inner_text(offer.at('url'))
30
+ condition = safe_inner_text(offer.at('condition'))
31
+ stock = safe_inner_text(offer.at('stock'))
32
+ if is_likely_new_condition?(condition) && is_likely_in_stock?(stock)
33
+ offers[merchant_id] = { :merchant_code => merchant_id,
34
+ :merchant_name => safe_unescape_html(merchant_name),
35
+ :merchant_logo_url => "http://img.bizrate.com/merchant/#{merchant_id}.gif",
36
+ :cpc => nil,
37
+ :price => price,
38
+ :shipping => shipping,
39
+ :offer_url => safe_unescape_html(url),
40
+ :offer_tier => 1,
41
+ :merchant_rating => merchant_rating,
42
+ :num_merchant_reviews => nil }
43
+ end
44
+ end
45
+ end
46
+ offers.values
47
+ end
48
+
49
+ def normalize_merchant_rating(merchant_rating)
50
+ merchant_rating.nil? ? nil : (merchant_rating * 10.0).round
51
+ end
52
+
53
+ # This method makes a separate API call to get merchant detail info. It returns a hash that is aligned with
54
+ # the merchant_source model
55
+ def merchant_source_detail(merchant_id)
56
+ result = make_merchant_service_request(merchant_id)
57
+ return nil if result.nil?
58
+ merchant_element = result / ("Merchants/Merchant[@id=#{merchant_id}]")
59
+ merchant_source = {}
60
+ merchant_source[:source] = 'shopzilla'
61
+ merchant_source[:code] = merchant_id.to_s
62
+ merchant_source[:name] = safe_inner_text(merchant_element.at('name'))
63
+ logo_url = logo_url(merchant_id)
64
+ if verify_logo_url(logo_url)
65
+ merchant_source[:logo_url] = logo_url
66
+ end
67
+
68
+ # rating will not exist if unrated (although "unrated" will -- doh!)
69
+ rated = false
70
+ begin
71
+ rating_elem = merchant_element.at('/Rating/Overall')
72
+ unless rating_elem.nil?
73
+ rated = true
74
+ merchant_source[:merchant_rating] = normalize_merchant_rating(rating_elem.attributes['value'].to_f)
75
+ end
76
+ rescue
77
+ merchant_source[:merchant_rating] = 0
78
+ end
79
+ # There are about 38 ways this could fail, so rescue any baddies
80
+ begin
81
+ # URL provided is an entity escaped url to shopzilla/bizrate for example
82
+ # http://www.bizrate.com/rd?t=http%3A%2F%2Fwww.pcnation.com%2Fasp%2Findex.asp%3Faffid%3D308&mid=31427&cat_id=&prod_id=350513557&oid=&pos=1&b_id=18&rf=af1&af_id=3973&af_creative_id=6&af_assettype_id=10&af_placement_id=1
83
+ # http://www.bizrate.com/rd?t=http%3A%2F%2Fad.doubleclick.net%2Fclk%3B23623113%3B12119329%3Bs%3Fhttp%3A%2F%2Fwww.staples.com%2Fwebapp%2Fwcs%2Fstores%2Fservlet%2Fhome%3FstoreId%3D10001%26langId%3D-1%26cm_mmc%3Donline_bizrate-_-search-_-staples_brand-_-staples.com&mid=370&cat_id=&prod_id=&oid=&pos=1&b_id=18&rf=af1&af_id=3973&af_creative_id=6&af_assettype_id=10&af_placement_id=1
84
+ # http://www.bizrate.com/rd?t=http%3A%2F%2Fwww.tigerdirect.com%2Findex.asp%3FSRCCODE%3DBIZRATE&mid=23939&cat_id=&prod_id=&oid=&pos=1&b_id=18&rf=af1&af_id=3973&af_creative_id=6&af_assettype_id=10&af_placement_id=1
85
+ # http://www.bizrate.com/rd?t=http%3A%2F%2Fad.doubleclick.net%2Fclk%3BNEW_1%3B6928611%3Ba%3Fhttp%3A%2F%2Fwww.officedepot.com&mid=814&cat_id=&prod_id=&oid=&pos=1&b_id=18&rf=af1&af_id=3973&af_creative_id=6&af_assettype_id=10&af_placement_id=1
86
+ redir_url = CGI::unescape(merchant_element.at('url').inner_text)
87
+ # the query string will contain a value at the "t" parameter
88
+ t_param_value = redir_url.match(/(\?|&)t=(.+)/)[2]
89
+ if t_param_value.index('doubleclick').nil?
90
+ homepage = t_param_value.match(/https?:\/\/(.+?)(\/|&|\?|$)/)[1]
91
+ else
92
+ homepage = t_param_value.match(/.+https?:\/\/(.+?)(\/|&|\?|$)/)[1]
93
+ end
94
+ merchant_source[:homepage] = "http://#{homepage}/"
95
+ rescue
96
+ merchant_source[:homepage] = nil
97
+ end
98
+ # now, we just need the number of reviews
99
+ if rated
100
+ num_merchant_reviews = safe_inner_text(merchant_element.at('Details/surveyCount'))
101
+ num_merchant_reviews = (num_merchant_reviews.nil? || num_merchant_reviews.empty?) ? 0 : num_merchant_reviews.to_i
102
+ merchant_source[:num_merchant_reviews] = num_merchant_reviews
103
+ end
104
+ merchant_source
105
+ end
106
+
107
+ def logo_url(merchant_id)
108
+ "http://img.bizrate.com/merchant/#{merchant_id}.gif"
109
+ end
110
+
111
+ def verified_logo_url(merchant_id)
112
+ logo_url = logo_url(merchant_id)
113
+ verify_logo_url(logo_url) ? logo_url : nil
114
+ end
115
+
116
+ # -----------------------------------------------------------------------------------------------
117
+ private
118
+ # -----------------------------------------------------------------------------------------------
119
+
120
+ def make_offer_service_request(product_id)
121
+ params = {'productId' => product_id.to_s.strip,
122
+ 'offersOnly' => 'true',
123
+ 'biddedOnly' => 'true',
124
+ 'resultsOffers' => '100',
125
+ 'zipCode' => '64141'}
126
+ make_api_request('product', params)
127
+ end
128
+
129
+ def make_product_service_request(product_id)
130
+ params = {'productId' => product_id.to_s.strip}
131
+ make_api_request('product', params)
132
+ end
133
+
134
+ def make_merchant_service_request(merchant_id)
135
+ params = {'merchantId' => merchant_id.to_s.strip,
136
+ 'expandDetails' => 'true'}
137
+ make_api_request('merchant', params)
138
+ end
139
+
140
+ def make_brand_service_request(category_id, keyword)
141
+ params = {'categoryId' => category_id,
142
+ 'keyword' => keyword.strip}
143
+ make_api_request('brands', params)
144
+ end
145
+
146
+ def make_taxonomy_service_request(category_id, keyword)
147
+ params = {'categoryId' => category_id,
148
+ 'keyword' => keyword.strip}
149
+ make_api_request('taxonomy', params)
150
+ end
151
+
152
+ # make any API request given a hash of querystring parameter/values. Generic parameters will be supplied.
153
+ def make_api_request(service, service_params)
154
+ params = {'apiKey' => SHOPZILLA_API_KEY,
155
+ 'publisherId' => 3973,
156
+ 'placementId' => 1 # This is a value we can pass to
157
+ }
158
+ params = params.merge(service_params) # merge in the user params
159
+
160
+ # sort 'em
161
+ params = params.sort
162
+
163
+ # build the querystring
164
+ query_string = params.collect do |x|
165
+ if x[1].class == Array
166
+ x[1].collect{|y| "#{x[0]}=#{y}" }.join '&'
167
+ else
168
+ "#{x[0]}=#{x[1]}"
169
+ end
170
+ end.join('&')
171
+
172
+ # do we already have a cached version of this API call?
173
+ key = "shopping-api-#{Digest::MD5.hexdigest(query_string)}-v3"
174
+ result = CACHE.get(key)
175
+ if !result # nope.. gotta get a new one.
176
+ url = "http://catalog.bizrate.com/services/catalog/v1/us/#{service}?#{query_string}"
177
+ #puts "shopzilla.com API request URL: #{url}"
178
+ begin
179
+ result = timeout(api_call_timeout || @@default_api_call_timeout) do
180
+ open(url)
181
+ end
182
+ result = result.read if result
183
+
184
+ CACHE.set(key, result, Source.shopzilla_source.offer_ttl_seconds)
185
+ rescue Timeout::Error
186
+ @logger.warn "Shopzilla API call timed out: #{url}"
187
+ result = nil
188
+ rescue Exception => ex
189
+ @logger.warn "Shopzilla API call failed (#{ex.message}): #{url}"
190
+ result = nil
191
+ end
192
+ end
193
+ if result
194
+ Hpricot.XML(result)
195
+ else
196
+ nil
197
+ end
198
+ end
199
+
200
+ def verify_logo_url(logo_url)
201
+ begin
202
+ open(logo_url, 'rb').close
203
+ return true
204
+ rescue Exception => ex
205
+ puts "Not using bad merchant logo URL #{logo_url}: #{ex.message}"
206
+ return false
207
+ end
208
+ end
209
+
210
+ def safe_inner_text(element)
211
+ element.nil? ? nil : element.inner_text
212
+ end
213
+
214
+ def safe_unescape_html(text)
215
+ text.nil? ? nil : CGI::unescapeHTML(text)
216
+ end
217
+
218
+ def is_likely_new_condition?(condition_text)
219
+ condition_text.nil? || condition_text.empty? || condition_text.upcase == 'NEW' || condition_text.upcase == 'OEM'
220
+ end
221
+
222
+ def is_likely_in_stock?(stock_text)
223
+ stock_text.nil? || stock_text.empty? || stock_text.upcase != 'OUT'
224
+ end
225
+
226
+ def get_price_from_node(element)
227
+ price = element.attributes['integral']
228
+ (price.nil? || price.empty?) ? nil : (price.to_i / 100.0).to_d
229
+ end
230
+ end
@@ -0,0 +1,35 @@
1
+ require 'sources/source'
2
+ require 'sources/amazon_source'
3
+ require 'sources/buy_source'
4
+ require 'sources/ebay_source'
5
+ require 'sources/epinions_source'
6
+ require 'sources/google_source'
7
+ require 'sources/price_grabber_source'
8
+ require 'sources/reseller_ratings_source'
9
+ require 'sources/shopping_source'
10
+ require 'sources/shopzilla_source'
11
+
12
+ module BazaarSources
13
+ VERSION = '0.1.8'
14
+ end
15
+
16
+ module URI
17
+ CHARACTERS_DISLIKED_BY_PARSE = '^<>`| '
18
+ def self.safe_parse(url)
19
+ escaped = URI.escape(url, CHARACTERS_DISLIKED_BY_PARSE)
20
+ uri = URI.parse(escaped)
21
+ uri
22
+ end
23
+ end
24
+
25
+ module Hpricot
26
+ module Traverse
27
+ def inner_text
28
+ if respond_to?(:children) and children
29
+ children.map { |x| x.inner_text.force_encoding('UTF-8') }.join
30
+ else
31
+ ""
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,94 @@
1
+ require 'api_helpers/amazon'
2
+
3
+ class AmazonSource < Source
4
+ AMAZON_MERCHANT_PERMALINK = 'amazon'
5
+
6
+ def initialize
7
+ super(:name => 'Amazon',
8
+ :homepage => 'http://www.amazon.com/',
9
+ :cpc => 12,
10
+ :offer_enabled => true,
11
+ :offer_ttl_seconds => 3600,
12
+ :use_for_merchant_ratings => true,
13
+ :offer_affiliate => true,
14
+ :supports_lifetime_ratings => false,
15
+ :batch_fetch_delay => 2,
16
+ :product_code_regexp => /^[a-zA-Z0-9]{10}$/,
17
+ :product_code_examples => ['B000HEC7BO', 'B002YP45EQ'],
18
+ :product_page_link_erb => "http://www.amazon.com/gp/product/<%= product_code %>")
19
+ end
20
+
21
+ def api
22
+ @api ||= Amazon::ProductAdvertising.new
23
+ end
24
+
25
+ def url_for_merchant_source_page(merchant_source_code)
26
+ api.at_a_glance_url(merchant_source_code)
27
+ end
28
+
29
+ def fetch_merchant_source(merchant_source_page_url)
30
+ amazon_merchant_code = merchant_source_page_url
31
+ if merchant_source_page_url.match /(\?|&)seller=(A.+?)(&.*|$)/
32
+ amazon_merchant_code = $2
33
+ end
34
+ delay_fetch
35
+ properties = api.seller_lookup(amazon_merchant_code)
36
+
37
+ { :source => self,
38
+ :code => properties[:seller_id],
39
+ :name => properties[:merchant_name],
40
+ :merchant_rating => properties[:average_feedback_rating] * 20.0,
41
+ :num_merchant_reviews => properties[:total_feedback],
42
+ :logo_url => properties[:logo_url],
43
+ :homepage => properties[:homepage] }
44
+ end
45
+
46
+ def fetch_best_offer(product_code, min_num_offers_to_qualify=nil)
47
+ delay_fetch
48
+ offers = fetch_offers(product_code)
49
+ if !min_num_offers_to_qualify.nil? && offers.length < min_num_offers_to_qualify
50
+ return nil
51
+ end
52
+ offers.inject(nil) do |best_offer, offer|
53
+ unless offer.price.nil? || offer.shipping.nil?
54
+ if best_offer.nil? || (offer.price + offer.shipping) < (best_offer.price + best_offer.shipping)
55
+ best_offer = offer
56
+ end
57
+ end
58
+ best_offer
59
+ end
60
+ end
61
+
62
+ def fetch_street_price(product_code)
63
+ best_offer = fetch_best_offer(product_code, 3)
64
+ best_offer.nil? ? nil : best_offer.total_price
65
+ end
66
+
67
+ def self.nullify_offer_url(offer_url)
68
+ offer_url.gsub(/#{AMAZON_ASSOCIATE_TAG}/, AMAZON_ASSOCIATE_TAG_ALT)
69
+ end
70
+
71
+ def nullify_offer_url(offer_url)
72
+ AmazonSource.nullify_offer_url(offer_url)
73
+ end
74
+
75
+ def offer_affiliate_for_merchant?(merchant)
76
+ !merchant.nil? && merchant.permalink == AMAZON_MERCHANT_PERMALINK
77
+ end
78
+
79
+ def affiliate_wrap_deal_url(deal_url, nullify=false)
80
+ nullify ? nullify_offer_url(deal_url) : deal_url
81
+ end
82
+
83
+ def fetch_offers(product_code)
84
+ begin
85
+ api.find_offers_by_asin(product_code).values
86
+ rescue Amazon::AsinNotFoundError => ex
87
+ raise Source::ProductNotFoundError.new(ex.message << " w/ #{product_code}", keyname, product_code)
88
+ rescue Amazon::AsinFatalError => ex
89
+ raise Source::ProductFatalError.new(ex.message << " w/ #{product_code}", keyname, product_code)
90
+ rescue => ex
91
+ raise Source::GeneralError.new(ex.message << " w/ #{product_code}", keyname)
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,34 @@
1
+ class BuySource < Source
2
+ BUY_MERCHANT_PERMALINK = 'buy-com'
3
+ BUY_AFFILIATE_PID = '3332520'
4
+ BUY_AFFILIATE_URL_PREFIX = "http://affiliate.buy.com/gateway.aspx?adid=17662&pid=#{BUY_AFFILIATE_PID}&aid=10391416&sURL="
5
+
6
+ def initialize
7
+ super(:name => 'Buy.com',
8
+ :homepage => 'http://www.buy.com/',
9
+ :cpc => 7,
10
+ :offer_enabled => false,
11
+ :offer_ttl_seconds => 86400,
12
+ :use_for_merchant_ratings => false,
13
+ :offer_affiliate => true,
14
+ :supports_lifetime_ratings => false,
15
+ :batch_fetch_delay => 2,
16
+ :product_code_regexp => /^\d{9}$/,
17
+ :product_code_examples => ['208464207', '211986388'],
18
+ :product_page_link_erb => "http://www.buy.com/retail/product.asp?sku=<%= product_code %>")
19
+ end
20
+
21
+ def nullify_offer_url(offer_url)
22
+ offer_url.gsub(/#{BUY_AFFILIATE_PID}/, '')
23
+ end
24
+
25
+ def offer_affiliate_for_merchant?(merchant)
26
+ !merchant.nil? && merchant.permalink == BUY_MERCHANT_PERMALINK
27
+ end
28
+
29
+ def affiliate_wrap_deal_url(deal_url, nullify=false)
30
+ offer_url = BUY_AFFILIATE_URL_PREFIX
31
+ offer_url += CGI::escape(deal_url)
32
+ nullify ? nullify_offer_url(offer_url) : offer_url
33
+ end
34
+ end
@@ -0,0 +1,35 @@
1
+ class EbaySource < Source
2
+ EBAY_MERCHANT_PERMALINK = 'ebay'
3
+ EBAY_AFFILIATE_PID = '711-53200-19255-0'
4
+ EBAY_DEFAULT_CAMPAIGN_ID = '5336205246'
5
+ EBAY_ADMIN_CAMPAIGN_ID = '5336210401'
6
+ EBAY_AFFILIATE_URL_PREFIX = "http://rover.ebay.com/rover/1/#{EBAY_AFFILIATE_PID}/1?type=4&campid=#{EBAY_DEFAULT_CAMPAIGN_ID}&toolid=10001&customid=&mpre="
7
+
8
+ def initialize
9
+ super(:name => 'eBay.com',
10
+ :homepage => 'http://www.ebay.com/',
11
+ :cpc => 10,
12
+ :offer_enabled => false,
13
+ :offer_ttl_seconds => 86400,
14
+ :use_for_merchant_ratings => false,
15
+ :offer_affiliate => true,
16
+ :supports_lifetime_ratings => false,
17
+ :batch_fetch_delay => 2,
18
+ :product_code_regexp => nil,
19
+ :product_code_examples => [])
20
+ end
21
+
22
+ def nullify_offer_url(offer_url)
23
+ offer_url.gsub(/#{EBAY_DEFAULT_CAMPAIGN_ID}/, EBAY_ADMIN_CAMPAIGN_ID)
24
+ end
25
+
26
+ def offer_affiliate_for_merchant?(merchant)
27
+ !merchant.nil? && merchant.permalink == EBAY_MERCHANT_PERMALINK
28
+ end
29
+
30
+ def affiliate_wrap_deal_url(deal_url, nullify=false)
31
+ offer_url = EBAY_AFFILIATE_URL_PREFIX
32
+ offer_url += CGI::escape(deal_url)
33
+ nullify ? nullify_offer_url(offer_url) : offer_url
34
+ end
35
+ end
@@ -0,0 +1,80 @@
1
+ require 'ostruct'
2
+
3
+ class EpinionsSource < Source
4
+ def initialize
5
+ super(:name => 'Epinions',
6
+ :homepage => 'http://www.epinions.com/',
7
+ :cpc => 0,
8
+ :offer_enabled => false,
9
+ :offer_ttl_seconds => 0,
10
+ :use_for_merchant_ratings => true,
11
+ :offer_affiliate => false,
12
+ :supports_lifetime_ratings => false,
13
+ :batch_fetch_delay => 1,
14
+ :product_code_regexp => /^\d{4,10}$/,
15
+ :product_code_examples => ['44393573', '37469715'])
16
+ end
17
+
18
+ def url_for_merchant_source_page(merchant_source_code)
19
+ "http://www.epinions.com/#{merchant_source_code}"
20
+ end
21
+
22
+ def code_from_merchant_source_page_url(merchant_source_page_url)
23
+ merchant_source_page_url.match(/epinions\.com\/([^\/\?#]*)/)[1]
24
+ end
25
+
26
+ def fetch_merchant_source(merchant_source_page_url)
27
+ merchant_source_page_url.gsub!(/\/display_~.*$/, '')
28
+ delay_fetch
29
+ doc = Hpricot(open(merchant_source_page_url))
30
+
31
+ merchant_source = OpenStruct.new
32
+ merchant_source.source = self
33
+
34
+ # merchant name
35
+ element = doc.at('h1[@class = "title"]')
36
+ unless element.nil?
37
+ name = element.inner_text.strip
38
+ merchant_source.name = name
39
+ end
40
+
41
+ # merchant logo
42
+ element = doc.at('img[@name = "product_image"]')
43
+ unless element.nil?
44
+ logo_url = element.attributes['src']
45
+ logo_url.gsub!(/-resized\d+/, '')
46
+ merchant_source.logo_url = logo_url
47
+ end
48
+
49
+ # merchant code
50
+ merchant_source.code = code_from_merchant_source_page_url(merchant_source_page_url)
51
+
52
+ # merchant rating
53
+ element = doc.at('span[text() *= "Overall store rating:"]/../img')
54
+ element = doc.at('span[text() *= "Overall service rating:"]/../img') if element.nil?
55
+ unless element.nil?
56
+ merchant_rating = element.attributes['alt'].match(/Store Rating: ((\d|,)*\.?\d)/)[1]
57
+ merchant_source.merchant_rating = merchant_rating.to_f * 20.0 unless merchant_rating.nil?
58
+ end
59
+
60
+ # Num Merchant Reviews
61
+ element = doc.at('span[@class = "sgr"]')
62
+ unless element.nil?
63
+ num_merchant_reviews = element.inner_text.match(/Reviewed by (\d+) customer/)[1]
64
+ merchant_source.num_merchant_reviews = num_merchant_reviews.delete(',').to_i unless num_merchant_reviews.nil? || num_merchant_reviews.empty?
65
+ end
66
+
67
+ # Homepage
68
+ element = doc.at('span[text() = "Web Site"]../../td[2]/span/a')
69
+ unless element.nil?
70
+ homepage = element.inner_text.strip.downcase
71
+ merchant_source.homepage = homepage
72
+ end
73
+
74
+ merchant_source
75
+ end
76
+
77
+ def format_rating(merchant_source)
78
+ '%01.1f/5.0' % (merchant_source.get_merchant_rating.to_f / 20.0)
79
+ end
80
+ end
@@ -0,0 +1,119 @@
1
+ require 'uri'
2
+ require 'ostruct'
3
+ require 'open-uri'
4
+ require 'hpricot'
5
+
6
+ class GoogleSource < Source
7
+ def initialize
8
+ super(:name => 'Google Shopping',
9
+ :homepage => 'http://www.google.com/products',
10
+ :cpc => 0,
11
+ :offer_enabled => false,
12
+ :offer_ttl_seconds => 0,
13
+ :use_for_merchant_ratings => true,
14
+ :offer_affiliate => false,
15
+ :supports_lifetime_ratings => false,
16
+ :batch_fetch_delay => 3,
17
+ :product_code_regexp => nil,
18
+ :product_code_examples => [])
19
+ end
20
+
21
+ def url_for_merchant_source_page(merchant_source_code)
22
+ "http://www.google.com/products/reviews?sort=1&cid=#{merchant_source_code}"
23
+ end
24
+
25
+ def code_from_merchant_source_page_url(merchant_source_page_url)
26
+ merchant_source_page_url.match /google.com.*cid=(.+?)($|&.*)/
27
+ $1
28
+ end
29
+
30
+ def fetch_merchant_source(merchant_source_page_url)
31
+ delay_fetch
32
+ doc = Hpricot(open(merchant_source_page_url))
33
+
34
+ merchant_source = OpenStruct.new
35
+ merchant_source.source = self
36
+
37
+ # merchant code
38
+ code = code_from_merchant_source_page_url(merchant_source_page_url)
39
+ merchant_source.code = code
40
+
41
+ # merchant name
42
+ element = doc.at('//table//tr/td//font[@size = "+1"]')
43
+ unless element.nil?
44
+ name = element.inner_text.strip
45
+ merchant_source.name = name
46
+ end
47
+
48
+ rating_box_element = doc.at('//table//tr//td//b[text() = "Average rating"]/..')
49
+
50
+ # merchant rating
51
+ element = rating_box_element.at('font[@size = "+3"]')
52
+ unless element.nil?
53
+ merchant_rating = element.inner_text.match(/\s*(.*?)\s*\/.*?/)[1]
54
+ merchant_source.merchant_rating = merchant_rating.to_f * 20.0 unless merchant_rating.nil?
55
+ end
56
+
57
+ # Num Merchant Reviews
58
+ element = rating_box_element.at('font[@size = "-1"]')
59
+ unless element.nil?
60
+ num_merchant_reviews = element.inner_text.match(/((\d|,)+)/)[1]
61
+ merchant_source.num_merchant_reviews = num_merchant_reviews.delete(',').to_i unless num_merchant_reviews.nil? || num_merchant_reviews.empty?
62
+ end
63
+
64
+ merchant_source
65
+ end
66
+
67
+ def format_rating(merchant_source)
68
+ '%01.1f/5.0' % (merchant_source.get_merchant_rating.to_f / 20.0)
69
+ end
70
+
71
+ def self.grab_new_mappings(google_merchant_list_url)
72
+ #google_merchant_list_url = "http://www.google.com/products/catalog?q=Projectors&btnG=Search+Products&show=dd&cid=8852330310663509594&sa=N&start=0#ps-sellers"
73
+ body = open(google_merchant_list_url)
74
+ doc = Hpricot.XML(body)
75
+
76
+ google_sellers = []
77
+ sellers_table = (doc / '#ps-sellers-table')
78
+ sellers_table.search('td.ps-seller-col').each_with_index do |sellers_column, i|
79
+ next if i == 0
80
+ # puts "Seller's column: #{sellers_column}"
81
+ link = sellers_column.at('a')
82
+ unless link.nil?
83
+ name = link.inner_text.strip
84
+ puts "Seller: #{name}"
85
+ if link.attributes['href'].match /\?q=http:\/\/(.+)\//
86
+ domain = Merchant.parse_url_for_domain($1)
87
+ puts "Domain: #{domain}"
88
+ end
89
+ end
90
+ rating_link = sellers_column.next_sibling.at('a')
91
+ code = rating_link.attributes['href'].match(/.*&cid=(.+)&.*/)[1] unless rating_link.nil?
92
+ puts "CID: #{code}"
93
+ puts '-----------------------------------------------------'
94
+ google_sellers << {:name => name, :code => code, :domain => domain} unless domain.nil? || domain.empty? || code.nil? || code.empty?
95
+ end
96
+
97
+ new_mappings_count = 0
98
+ google_source = GoogleSource.first
99
+ google_sellers.each do |seller|
100
+ merchants = Merchant.find(:all, :conditions => {:domain => seller[:domain]})
101
+ if merchants.length > 1
102
+ puts "More than one merchant found for domain: #{seller[:domain]}"
103
+ elsif merchants.length == 1
104
+ merchant = merchants.first
105
+ if merchant.merchant_source(google_source).nil?
106
+ url = google_source.url_for_merchant_source_page(seller[:code])
107
+ new_merchant_source = google_source.fetch_merchant_source(url)
108
+ merchant.merchant_sources << new_merchant_source
109
+ merchant.update_from_sources
110
+ merchant.save!
111
+ new_mappings_count += 1
112
+ end
113
+ end
114
+ end
115
+ puts "Google sellers found: #{google_sellers.length}"
116
+ puts "New mappings added: #{new_mappings_count}"
117
+ new_mappings_count
118
+ end
119
+ end