bazaar_sources 0.2.1.1.1.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,127 @@
1
+ module ExternalUrl
2
+ require 'net/http'
3
+ require 'uri'
4
+
5
+ REQUEST_HEADERS = {
6
+ 'User-Agent' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11'
7
+ }
8
+
9
+ # Note: This method is only used now by the Validator, and is only suitable for the Validator.
10
+ def self.just_page_content(page)
11
+ # No longer used
12
+ justbody = /.*?<body.*?>(.*)<\/body>/im
13
+ comments = /(<!--.*?-->)(.{1,40})/m
14
+ nostyle = /<style.*?<\/style>/im
15
+ notags = /<.*?>/im
16
+ noentities = /&.*?;/
17
+ noextrawhitespace = /(\s)+/im
18
+ # Remove comments, unless inside of JavaScript (because frequently JavaScript has good matches for model numbers, etc.)
19
+ page.gsub(comments) do |c|
20
+ comment = $1
21
+ post = $2
22
+ if post =~ /<\/script/
23
+ comment + post
24
+ else
25
+ post
26
+ end
27
+ end.gsub(nostyle,' ').gsub(notags,'').gsub(noentities,' ').gsub(noextrawhitespace,'\1')
28
+ end
29
+
30
+ # returns a hash containing :success flag; if true, you'll have the :response (thus response.body) and :final_uri
31
+ # (e.g. if redirected) If false a :message is set and :final_uri
32
+ def self.fetch_response(url, limit = 10, debug = false)
33
+ begin
34
+ if limit == 0
35
+ return {:success => false, :response => nil, :message => "Redirected too many times", :final_uri => url}
36
+ end
37
+
38
+ message = self.invalid_uri(url)
39
+ if message
40
+ return {:success => false, :response => nil, :message => message, :final_uri => url}
41
+ end
42
+ uri = URI.safe_parse(url.to_s)
43
+ http_request = Net::HTTP.new(uri.host)
44
+ if debug
45
+ puts "http request: #{http_request.inspect}"
46
+ end
47
+ # Adding user agent header helps some merchants feel more comfortable with our bot
48
+ no_host_url = uri.to_s.gsub(/.*?#{uri.host}(.*)/,'\1')
49
+ if debug
50
+ puts "http request to: #{no_host_url}"
51
+ end
52
+ response = http_request.get(no_host_url, REQUEST_HEADERS)
53
+ if debug
54
+ puts "http response: #{response.inspect}"
55
+ end
56
+
57
+ case response
58
+ when Net::HTTPSuccess then
59
+ if debug
60
+ puts "Success, final url: #{url}"
61
+ ExternalUrl.to_file(url, response.body, "html")
62
+ ExternalUrl.to_file(url, ExternalUrl.just_page_content(response.body), "txt")
63
+ end
64
+ {:success => true, :response => response, :final_uri => url}
65
+ when Net::HTTPRedirection then
66
+ redirect_url = to_absolute_url(response['location'], url)
67
+ if debug
68
+ puts "Redirecting to #{redirect_url}"
69
+ end
70
+ self.fetch_response(redirect_url, limit - 1, debug)
71
+ else
72
+ {:success => false, :response => response, :final_uri => url}
73
+ end
74
+ rescue Exception => exp
75
+ {:success => false, :response => nil, :message => exp.message, :final_uri => url}
76
+ end
77
+ end
78
+
79
+ private
80
+
81
+ def self.to_file(url, content, ext = "html")
82
+ uri = URI.safe_parse(url.to_s)
83
+ filename_base = "#{uri.host}.#{uri.path}?#{uri.query}"
84
+ filename = filename_base.gsub(/(\W)+/,"-") + ".#{ext}"
85
+ f = File.new(filename, "w")
86
+ f.write(content)
87
+ f.close
88
+ puts "Wrote content to file #{filename} in #{Dir.pwd}"
89
+ end
90
+
91
+ # Returns nil if the URL is a valid URI, else a message. In this case, scheme, host and path are all required.
92
+ def self.invalid_uri(url)
93
+ return 'No URL' if url.nil? || url.empty?
94
+ begin
95
+ uri = URI.safe_parse(url)
96
+ if uri.nil? || uri.scheme.nil? || uri.host.nil? || uri.path.nil?
97
+ if uri.nil?
98
+ return "URL is not well formed"
99
+ else
100
+ return "URL incomplete: scheme is #{uri.scheme.nil? ? "missing" : uri.scheme}, host is #{uri.host.nil? ? "missing" : uri.host} and path is #{uri.path.nil? ? "missing" : uri.path}"
101
+ end
102
+ end
103
+ rescue Exception => exp
104
+ return "URI improperly formed: #{exp.message}"
105
+ end
106
+ return nil
107
+ end
108
+
109
+ def self.to_absolute_url(url, current_url)
110
+ unless url.is_a? URI
111
+ url = URI.safe_parse(url)
112
+ end
113
+
114
+ # construct an absolute url
115
+ if url.relative?
116
+ unless current_url.is_a? URI
117
+ current_url = URI.safe_parse(current_url)
118
+ end
119
+
120
+ url.scheme = current_url.scheme
121
+ url.host = current_url.host
122
+ url.port = current_url.port unless current_url.port == 80
123
+ end
124
+
125
+ return url.to_s
126
+ end
127
+ end
@@ -0,0 +1,14 @@
1
+ require 'nokogiri'
2
+
3
+ class HttpartyNokogiriParser < HTTParty::Parser
4
+
5
+ protected
6
+
7
+ def xml
8
+ Nokogiri::XML(body)
9
+ end
10
+
11
+ def html
12
+ Nokogiri::HTML(body)
13
+ end
14
+ end
@@ -0,0 +1,174 @@
1
+ require 'ostruct'
2
+ require 'hpricot'
3
+ require 'api_helpers/external_url'
4
+
5
+ module ResellerRatingsAPI
6
+ def self.alt_code_from_merchant_source_page_url(merchant_source_page_url)
7
+ alt_code = nil
8
+ if res = merchant_source_page_url.match(/resellerratings\.com\/store\/([^\/\?#]*)/)
9
+ alt_code = res[1]
10
+ end
11
+ alt_code
12
+ end
13
+
14
+ def self.fetch_suggestions(search_text, limit)
15
+ rr_search_url = "http://www.resellerratings.com/reseller_list.pl?keyword_search=#{URI.escape(search_text)}"
16
+ doc_and_final_uri = open_doc(rr_search_url)
17
+ result = []
18
+ unless doc_and_final_uri.nil? || doc_and_final_uri[:final_uri].nil? || doc_and_final_uri[:final_uri].empty?
19
+ final_uri = URI.safe_parse(doc_and_final_uri[:final_uri])
20
+ if final_uri.path == '/reseller_list.pl'
21
+ # got the search results page with more than one result
22
+ result = convert_search_results_page_to_merchant_array(doc_and_final_uri[:doc], limit)
23
+ elsif final_uri.path.match(/^\/store\/(.+)$/)
24
+ # got merchant page back
25
+ result << { :merchant_page_url => final_uri, :merchant_code => $1, :merchant_name => $1.gsub('_', ' ') }
26
+ else
27
+ # don't know where we ended up
28
+ end
29
+ end
30
+ result
31
+ end
32
+
33
+ def self.search_for_merchant_source(search_text, limit=15)
34
+ merchant_sources = []
35
+ rr_search_url = "http://www.resellerratings.com/reseller_list.pl?keyword_search=#{URI.escape(search_text)}"
36
+ doc_and_final_uri = open_doc(rr_search_url)
37
+ unless doc_and_final_uri.nil?
38
+ if merchant_page_url?(doc_and_final_uri[:final_uri])
39
+ merchant_sources << convert_merchant_page_to_merchant_source(doc_and_final_uri)
40
+ else
41
+ doc_and_final_uri[:doc].search('tr/td/font/a[text() = "Read Reviews"]/../../..').each do |result_row|
42
+ element = result_row.at('td//a')
43
+ name = element.inner_text.strip
44
+ alt_merchant_code = element.attributes['href'].match(/\/store\/(.+)$/)[1]
45
+ existing_merchant_source = MerchantSource.find_by_source_and_alt_code(Source.reseller_ratings_source, alt_merchant_code)
46
+ if existing_merchant_source.nil?
47
+ merchant_sources << OpenStruct.new({:source => Source.reseller_ratings_source, :name => name, :alt_code => alt_merchant_code})
48
+ else
49
+ merchant_sources << existing_merchant_source
50
+ end
51
+ break if merchant_sources.length >= limit
52
+ end
53
+ end
54
+ end
55
+ merchant_sources
56
+ end
57
+
58
+ def self.search_for_merchant_source_best_match(search_text)
59
+ rr_search_url = "http://www.resellerratings.com/reseller_list.pl?keyword_search=#{URI.escape(search_text)}"
60
+ fetch_merchant_source(rr_search_url)
61
+ end
62
+
63
+ def self.fetch_merchant_source(merchant_source_page_url)
64
+ doc_and_final_uri = open_doc(merchant_source_page_url)
65
+ if !doc_and_final_uri.nil?
66
+ convert_merchant_page_to_merchant_source(doc_and_final_uri)
67
+ else
68
+ nil
69
+ end
70
+ end
71
+
72
+ def self.fetch_merchant_source_by_alt_merchant_code(alt_merchant_code)
73
+ merchant_source_page_url = "http://www.resellerratings.com/store/#{alt_merchant_code}"
74
+ fetch_merchant_source(merchant_source_page_url)
75
+ end
76
+
77
+ def self.merchant_page_url?(url)
78
+ !url.nil? && url.match(/\/store\/.+$/) != nil
79
+ end
80
+
81
+ private
82
+
83
+ def self.convert_merchant_page_to_merchant_source(doc_and_final_uri)
84
+ return nil if doc_and_final_uri.nil?
85
+ merchant_source = OpenStruct.new
86
+ merchant_source.source = Source.reseller_ratings_source
87
+ doc = doc_and_final_uri[:doc]
88
+
89
+ # Merchant Code
90
+ elements = doc.search('img[@src="http://images.resellerratings.com/images/write_a_review.gif"]/..')
91
+ unless elements.empty?
92
+ code = elements.first[:href].match(/^.*?([0-9]+).*?$/)[1]
93
+ merchant_source.code = code
94
+ end
95
+
96
+ # Use a blank 'code' to indicate we didn't find the merchant page
97
+ if merchant_source.code.nil? || merchant_source.code.empty?
98
+ return nil
99
+ end
100
+
101
+ # Alternative Merchant Code
102
+ unless doc_and_final_uri[:final_uri].nil? || doc_and_final_uri[:final_uri].empty?
103
+ merchant_source.alt_code = alt_code_from_merchant_source_page_url(doc_and_final_uri[:final_uri])
104
+ end
105
+
106
+ # Merchant Name
107
+ elements = doc.search('img[@src="http://images.resellerratings.com/images/small-storefront-rev.gif"]/../..')
108
+ unless elements.empty?
109
+ name = elements.first.inner_text.strip
110
+ merchant_source.name = name
111
+ end
112
+
113
+ # Merchant Homepage
114
+ elements = doc.search('font[text() *= "Homepage:"]/a/font')
115
+ unless elements.empty?
116
+ homepage = elements.first.inner_text.strip
117
+ merchant_source.homepage = homepage
118
+ end
119
+
120
+ # Merchant Rating
121
+ elements = doc.search('font[text() *= "Six-Month Rating:"]/../font[2]')
122
+ unless elements.empty?
123
+ merchant_rating = elements.first.inner_text.match(/\s*(.*?)\s*\/.*?/)[1]
124
+ merchant_source.merchant_rating = (merchant_rating.to_f * 10.0).round unless merchant_rating.nil?
125
+ end
126
+
127
+ # Num Merchant Reviews
128
+ elements = doc.search('font[text() *= "Six-Month Reviews:"]/../../td[2]')
129
+ unless elements.empty?
130
+ num_merchant_reviews = elements.first.inner_text.strip
131
+ merchant_source.num_merchant_reviews = num_merchant_reviews
132
+ end
133
+
134
+ # Merchant Rating Lifetime
135
+ elements = doc.search('font[text() *= "Lifetime Rating:"]/../font[2]')
136
+ unless elements.empty?
137
+ merchant_rating_lifetime = elements.first.inner_text.match(/\s*(.*?)\s*\/.*?/)[1]
138
+ merchant_source.merchant_rating_lifetime = (merchant_rating_lifetime.to_f * 10.0).round unless merchant_rating_lifetime.nil?
139
+ end
140
+
141
+ # Num Merchant Reviews Lifetime
142
+ elements = doc.search("font[text() *= 'Lifetime\nReviews:']/../../td[2]")
143
+ unless elements.empty?
144
+ num_merchant_reviews_lifetime = elements.first.inner_text.strip
145
+ merchant_source.num_merchant_reviews_lifetime = num_merchant_reviews_lifetime
146
+ end
147
+
148
+ merchant_source
149
+ end
150
+
151
+ def self.convert_search_results_page_to_merchant_array(search_results_doc, limit)
152
+ result = []
153
+ merchant_links = search_results_doc.search('tr[/td/font/a/font/b[text() = "Store Name"]]../tr/td/a')
154
+ merchant_links.each_with_index do |merchant_link, index|
155
+ break if index > limit-1
156
+ merchant_link.attributes['href'].match(/^.*\/store\/(.+)$/)
157
+ result << { :merchant_page_url => merchant_link.attributes['href'],
158
+ :merchant_code => $1,
159
+ :merchant_name => merchant_link.inner_text.strip }
160
+ end
161
+ result
162
+ end
163
+
164
+ def self.open_doc(url)
165
+ response = ExternalUrl.fetch_response(url)
166
+ if response[:success]
167
+ doc = Hpricot(response[:response].body)
168
+ final_uri = response[:final_uri]
169
+ return {:doc => doc, :final_uri => final_uri}
170
+ else
171
+ return nil
172
+ end
173
+ end
174
+ end
@@ -0,0 +1,224 @@
1
+ require File.join(File.dirname(__FILE__), 'api_helper')
2
+ require 'rubygems'
3
+ gem 'httparty', '>= 0.5.0'
4
+ require 'httparty'
5
+ require File.join(File.dirname(__FILE__), 'httparty_nokogiri_parser')
6
+
7
+ module Shopping
8
+ class Error < StandardError
9
+ attr_reader :code
10
+ def initialize(message, code)
11
+ super(message)
12
+ @code = code
13
+ end
14
+ end
15
+
16
+ class Publisher
17
+ include ApiHelper
18
+ include HTTParty
19
+ parser HttpartyNokogiriParser
20
+ format :xml
21
+ base_uri 'http://publisher.api.shopping.com/publisher/3.0/rest/'
22
+ default_params 'trackingId' => '3068547', 'apiKey' => SHOPPING_API_KEY
23
+
24
+ MAX_OFFERS = 20
25
+ PUTS_API_URL = false
26
+
27
+ def fetch_product(product_id, include_specs=false, include_offers=false)
28
+ # Won't get psuedo-redirected to new product ID unless we request at least one offer (very strange)
29
+ query = {'numItems' => include_offers ? MAX_OFFERS : 1, 'productId' => product_id.strip, 'showProductSpecs' => include_specs ? 'true' : 'false'}
30
+ call_api('/GeneralSearch', {:query => query}) do |doc|
31
+ product_node = doc.at('GeneralSearchResponse/categories/category/items/product')
32
+ convert_product_node(product_node, include_offers)
33
+ end
34
+ end
35
+
36
+ def fetch_offers(product_id)
37
+ query = {'numItems' => MAX_OFFERS, 'showOffersOnly' => 'true', 'productId' => product_id.strip}
38
+ call_api('/GeneralSearch', {:query => query}) do |doc|
39
+ items_node = doc.at('GeneralSearchResponse/categories/category/items')
40
+ convert_offers_collection_node(items_node)
41
+ end
42
+ end
43
+
44
+ def search_for_product(keyword, max_results=10)
45
+ query = {'doSkipping' => 'false', 'showProductOffers' => 'false', 'numAttributes' => 0, 'numItems' => max_results, 'keyword' => keyword.strip}
46
+ call_api('/GeneralSearch', {:query => query}) do |doc|
47
+ product_nodes = doc.search('GeneralSearchResponse/categories/category/items/product')
48
+ product_nodes.collect{|product_node| convert_product_node(product_node)}
49
+ end
50
+ end
51
+
52
+ protected
53
+
54
+ def call_api(path, options, &block)
55
+ if PUTS_API_URL
56
+ merged_options = self.class.default_options.dup.merge(options)
57
+ puts "Shopping.com API URL: #{HTTParty::Request.new(Net::HTTP::Get, path, merged_options).uri}"
58
+ end
59
+ doc = self.class.get(path, options)
60
+ errors = get_errors(doc)
61
+ if errors.empty?
62
+ yield doc
63
+ else
64
+ raise_exception(errors)
65
+ end
66
+ end
67
+
68
+ def get_errors(doc)
69
+ errors = []
70
+ doc.search('GenericResponse/exceptions/exception') do |exception_node|
71
+ message = exception_mode.at('message').text
72
+ code = exception_node.at('code').text.to_i
73
+ errors << Shopping::Error.new(message, code)
74
+ end
75
+ errors
76
+ end
77
+
78
+ def raise_exception(errors)
79
+ raise errors.first
80
+ end
81
+
82
+ def convert_product_node(product_node, include_offers=false)
83
+ product = {}
84
+ product[:product_id] = product_node['id']
85
+ product[:name] = product_node.at('name').text
86
+
87
+ description = product_node.at('fullDescription').text
88
+ if description.nil? || description.empty?
89
+ description = product_node.at('shortDescription').text
90
+ end
91
+ product[:description] = (description.nil? || description.empty?) ? '' : description
92
+
93
+ image_nodes = product_node.search('images/image[@available="true"]')
94
+ images = image_nodes.collect{|x|
95
+ {
96
+ :width => x['width'].to_i,
97
+ :height => x['height'].to_i,
98
+ :url => x.at('sourceURL').text
99
+ }
100
+ }.sort_by{|x| x[:width] * x[:height] }
101
+
102
+ product[:images] = {
103
+ :small_image => images[0],
104
+ :medium_image => images[1],
105
+ :large_image => images[2]
106
+ }
107
+
108
+ # possible_manufacturers = (product / 'offer > manufacturer').collect{|x| x.text}.compact.uniq
109
+ #
110
+ # if possible_manufacturers.length == 1
111
+ # product[:manufacturer] = possible_manufacturers.first # easy peasy lemon squezy
112
+ # elsif possible_manufacturers.length > 1
113
+ # # figure out which manufacturer is the most popular
114
+ # manufacturers_popularity_index = possible_manufacturers.inject({}) {|ha, manufacturer| ha[manufacturer] ||= 0; ha[manufacturer] += 1; ha }
115
+ # product[:manufacturer] = manufacturers_popularity_index.sort_by{|key, val| val }.last.first
116
+ # else
117
+ # product[:manufacturer] = nil # zip, zero, doodad :(
118
+ # end
119
+
120
+ # rating
121
+ review_count_node = product_node.at('rating/reviewCount')
122
+ product[:num_reviews] = review_count_node.nil? ? 0 : review_count_node.text.to_i
123
+ rating_value_node = product_node.at('rating/rating')
124
+ product[:rating] = rating_value_node.nil? ? nil : normalize_product_rating(rating_value_node.text.to_f)
125
+
126
+ # offers
127
+ if include_offers
128
+ offers_node = product_node.at('offers')
129
+ product[:offers] = convert_offers_collection_node(offers_node) unless offers_node.nil?
130
+ end
131
+
132
+ # specifications
133
+ specifications_node = product_node.at('specifications')
134
+ product[:specifications] = convert_specifications_node(specifications_node) unless specifications_node.nil?
135
+
136
+ product
137
+ end
138
+
139
+ def convert_offers_collection_node(offers_collection_node)
140
+ offer_nodes = offers_collection_node.nil? ? nil : offers_collection_node.search('offer')
141
+ return [] if offer_nodes.nil?
142
+ offers = {}
143
+ offer_nodes.each_with_index do |offer, offer_index|
144
+ # in-stock
145
+ stock_status = offer.at('stockStatus').text
146
+ in_stock = stock_status != 'out-of-stock' && stock_status != 'back-order'
147
+
148
+ if in_stock
149
+ store = offer.at('store')
150
+ store_hash = {
151
+ :id => store['id'],
152
+ :name => store.at('name').text,
153
+ :trusted => store['trusted'] == "true",
154
+ :authorized_reseller => store['authorizedReseller'] == 'true'
155
+ }
156
+ store_logo = store.at('logo')
157
+ if store_logo['available'] == 'true'
158
+ store_hash[:logo] = {
159
+ :width => store_logo['width'],
160
+ :height => store_logo['height'],
161
+ :url => store_logo.at('sourceURL').text
162
+ }
163
+ else
164
+ store_hash[:logo] = nil
165
+ end
166
+
167
+ # store rating
168
+ store_rating = store.at('ratingInfo')
169
+ store_hash[:rating] = {
170
+ :number => store_rating.at('rating').nil? ? nil : normalize_merchant_rating(store_rating.at('rating').text.to_f),
171
+ :count => store_rating.at('reviewCount').text.to_i,
172
+ :url => store_rating.at('reviewURL').nil? ? nil : store_rating.at('reviewURL').text
173
+ }
174
+
175
+ # prices
176
+ cpc = offer.at('cpc').nil? ? nil : (offer.at('cpc').text.to_f*100).to_i
177
+ base_price = to_d_or_nil(offer.at('basePrice').text)
178
+ shipping_cost = offer.at('shippingCost')['checkSite'] == 'true' ? nil : to_d_or_nil(offer.at('shippingCost').text)
179
+
180
+ # skip this offer if we already have one from same merchant and it has a lower total price
181
+ existing_offer = offers[store_hash[:id]]
182
+ unless existing_offer.nil?
183
+ next if existing_offer[:price] + (existing_offer[:shipping] || 0.0) < base_price + (shipping_cost || 0.0)
184
+ end
185
+
186
+ offers[store_hash[:id]] = { :original_index => offer_index,
187
+ :merchant_code => store_hash[:id],
188
+ :merchant_name => store_hash[:name],
189
+ :merchant_logo_url => store_hash[:logo].nil? ? nil : store_hash[:logo][:url],
190
+ :cpc => cpc,
191
+ :price => base_price,
192
+ :shipping => shipping_cost,
193
+ :offer_url => offer.at('offerURL').text,
194
+ :offer_tier => 1,
195
+ :merchant_rating => store_hash[:rating][:number],
196
+ :num_merchant_reviews => store_hash[:rating][:count] }
197
+ end
198
+ end
199
+ offers.values.sort_by{|x| x[:price] + (x[:shipping] || 0) }
200
+ end
201
+
202
+ def convert_specifications_node(specifications_node)
203
+ specifications = {}
204
+ specifications_node.search('feature').each do |feature_node|
205
+ feature_name = feature_node.at('name').text
206
+ value_nodes = feature_node.search('value')
207
+ if value_nodes.length > 1
208
+ specifications[feature_name] = value_nodes.collect{|value_node| value_node.text}
209
+ elsif value_nodes.length == 1
210
+ specifications[feature_name] = value_nodes.first.text
211
+ end
212
+ end
213
+ specifications
214
+ end
215
+
216
+ def normalize_product_rating(product_rating)
217
+ product_rating.nil? ? nil : (product_rating * 20.0).round
218
+ end
219
+
220
+ def normalize_merchant_rating(merchant_rating)
221
+ merchant_rating.nil? ? nil : (merchant_rating * 20.0).round
222
+ end
223
+ end
224
+ end