bazaar_sources 0.2.1.1.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +7 -0
- data/init.rb +1 -0
- data/lib/api_helpers/amazon.rb +606 -0
- data/lib/api_helpers/api_helper.rb +9 -0
- data/lib/api_helpers/external_url.rb +127 -0
- data/lib/api_helpers/httparty_nokogiri_parser.rb +14 -0
- data/lib/api_helpers/reseller_ratings_api.rb +174 -0
- data/lib/api_helpers/shopping.rb +224 -0
- data/lib/api_helpers/shopping_bulk_api.rb +514 -0
- data/lib/api_helpers/shopzilla_api.rb +230 -0
- data/lib/bazaar_sources.rb +35 -0
- data/lib/sources/amazon_source.rb +94 -0
- data/lib/sources/buy_source.rb +34 -0
- data/lib/sources/ebay_source.rb +35 -0
- data/lib/sources/epinions_source.rb +80 -0
- data/lib/sources/google_source.rb +119 -0
- data/lib/sources/price_grabber_source.rb +94 -0
- data/lib/sources/reseller_ratings_source.rb +47 -0
- data/lib/sources/shopping_source.rb +136 -0
- data/lib/sources/shopzilla_source.rb +108 -0
- data/lib/sources/simple_sources.yml +71 -0
- data/lib/sources/source.rb +242 -0
- metadata +137 -0
@@ -0,0 +1,94 @@
|
|
1
|
+
require 'ostruct'
|
2
|
+
|
3
|
+
class PriceGrabberSource < Source
|
4
|
+
def initialize
|
5
|
+
super(:name => 'PriceGrabber',
|
6
|
+
:homepage => ' http://www.pricegrabber.com/',
|
7
|
+
:cpc => 0,
|
8
|
+
:offer_enabled => false,
|
9
|
+
:offer_ttl_seconds => 0,
|
10
|
+
:use_for_merchant_ratings => true,
|
11
|
+
:offer_affiliate => false,
|
12
|
+
:supports_lifetime_ratings => false,
|
13
|
+
:batch_fetch_delay => 2,
|
14
|
+
:product_code_regexp => /^\d{6,9}$/,
|
15
|
+
:product_code_examples => ['716698181', '563043'],
|
16
|
+
:product_page_link_erb => "http://reviews.pricegrabber.com/-/m/<%= product_code %>/")
|
17
|
+
end
|
18
|
+
|
19
|
+
def url_for_merchant_source_page(merchant_source_code)
|
20
|
+
"http://www.pricegrabber.com/info_retailer.php/r=#{merchant_source_code}"
|
21
|
+
end
|
22
|
+
|
23
|
+
def code_from_merchant_source_page_url(merchant_source_page_url)
|
24
|
+
merchant_source_page_url.match(/pricegrabber\.com.*\/r[\/=](\d+)/)[1]
|
25
|
+
end
|
26
|
+
|
27
|
+
def fetch_merchant_source(merchant_source_page_url)
|
28
|
+
delay_fetch
|
29
|
+
doc = Hpricot(open(merchant_source_page_url))
|
30
|
+
|
31
|
+
merchant_source = OpenStruct.new
|
32
|
+
merchant_source.source = self
|
33
|
+
|
34
|
+
merchant_identity_block = doc.at('div#merchantIdentityBlock')
|
35
|
+
|
36
|
+
unless merchant_identity_block.nil?
|
37
|
+
# merchant name
|
38
|
+
element = merchant_identity_block.at('/h4')
|
39
|
+
unless element.nil?
|
40
|
+
name = element.inner_text.strip
|
41
|
+
merchant_source.name = name
|
42
|
+
end
|
43
|
+
|
44
|
+
# merchant logo
|
45
|
+
element = merchant_identity_block.at('/img')
|
46
|
+
unless element.nil?
|
47
|
+
logo_url = element.attributes['src']
|
48
|
+
merchant_source.logo_url = logo_url
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# merchant code
|
53
|
+
merchant_source.code = code_from_merchant_source_page_url(merchant_source_page_url)
|
54
|
+
|
55
|
+
# merchant rating
|
56
|
+
ratings = doc.search('table#scoreTable/tr/th[text() = "Avg Rating"]/../td')
|
57
|
+
unless ratings.nil?
|
58
|
+
unless ratings[0].nil?
|
59
|
+
three_month_rating = ratings[0].inner_text.strip.to_f
|
60
|
+
merchant_source.merchant_rating = (three_month_rating * 20).round
|
61
|
+
end
|
62
|
+
|
63
|
+
unless ratings[2].nil?
|
64
|
+
lifetime_month_rating = ratings[2].inner_text.strip.to_f
|
65
|
+
merchant_source.merchant_rating_lifetime = (lifetime_month_rating * 20).round
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# Num Merchant Reviews
|
70
|
+
num_reviews = doc.search('table#scoreTable/tr/th[text() = "Total Reviews"]/../td')
|
71
|
+
unless num_reviews.nil?
|
72
|
+
unless num_reviews[0].nil?
|
73
|
+
merchant_source.num_merchant_reviews = num_reviews[0].inner_text.strip.to_i
|
74
|
+
end
|
75
|
+
|
76
|
+
unless num_reviews[2].nil?
|
77
|
+
merchant_source.num_merchant_reviews_lifetime = num_reviews[2].inner_text.strip.to_i
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# Homepage
|
82
|
+
element = doc.at('table#contactTable//th[text() = "Website:"]/../td/a')
|
83
|
+
unless element.nil?
|
84
|
+
homepage = element.inner_text.strip.downcase
|
85
|
+
merchant_source.homepage = homepage
|
86
|
+
end
|
87
|
+
|
88
|
+
merchant_source
|
89
|
+
end
|
90
|
+
|
91
|
+
def format_rating(merchant_source)
|
92
|
+
'%01.2f/5.0' % (merchant_source.get_merchant_rating.to_f / 20.0)
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'api_helpers/reseller_ratings_api'
|
2
|
+
|
3
|
+
class ResellerRatingsSource < Source
|
4
|
+
def initialize
|
5
|
+
super(:name => 'ResellerRatings.com',
|
6
|
+
:homepage => 'http://www.resellerratings.com/',
|
7
|
+
:cpc => 0,
|
8
|
+
:offer_enabled => false,
|
9
|
+
:offer_ttl_seconds => 0,
|
10
|
+
:use_for_merchant_ratings => true,
|
11
|
+
:offer_affiliate => false,
|
12
|
+
:supports_lifetime_ratings => true,
|
13
|
+
:batch_fetch_delay => 5,
|
14
|
+
:product_code_regexp => /^\d{9}$/,
|
15
|
+
:product_code_examples => ['652196596', '676109333'],
|
16
|
+
:product_page_link_erb => "http://resellerratings.nextag.com/<%= product_code %>/resellerratings/prices-html")
|
17
|
+
end
|
18
|
+
|
19
|
+
def url_for_merchant_source_page(merchant_source_code)
|
20
|
+
"http://www.resellerratings.com/seller#{merchant_source_code}.html"
|
21
|
+
end
|
22
|
+
|
23
|
+
def url_for_merchant_source_page_alt(merchant_source_alt_code)
|
24
|
+
"http://www.resellerratings.com/store/#{merchant_source_alt_code}"
|
25
|
+
end
|
26
|
+
|
27
|
+
def code_from_merchant_source_page_url(merchant_source_page_url)
|
28
|
+
nil
|
29
|
+
end
|
30
|
+
|
31
|
+
def fetch_merchant_source(merchant_source_page_url)
|
32
|
+
delay_fetch
|
33
|
+
ResellerRatingsAPI.fetch_merchant_source(merchant_source_page_url)
|
34
|
+
end
|
35
|
+
|
36
|
+
def search_for_merchant_source(search_text)
|
37
|
+
ResellerRatingsAPI.search_for_merchant_source(search_text)
|
38
|
+
end
|
39
|
+
|
40
|
+
def search_for_merchant_source_best_match(search_text)
|
41
|
+
ResellerRatingsAPI.search_for_merchant_source_best_match(search_text)
|
42
|
+
end
|
43
|
+
|
44
|
+
def format_rating(merchant_source)
|
45
|
+
'%01.1f/10' % (merchant_source.get_merchant_rating.to_f / 10.0)
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,136 @@
|
|
1
|
+
require 'ostruct'
|
2
|
+
require 'api_helpers/shopping'
|
3
|
+
|
4
|
+
class ShoppingSource < Source
|
5
|
+
def initialize
|
6
|
+
super(:name => 'Shopping.com',
|
7
|
+
:homepage => 'http://www.shopping.com/',
|
8
|
+
:cpc => 50,
|
9
|
+
:offer_enabled => true,
|
10
|
+
:offer_ttl_seconds => 1800,
|
11
|
+
:use_for_merchant_ratings => true,
|
12
|
+
:offer_affiliate => false,
|
13
|
+
:supports_lifetime_ratings => false,
|
14
|
+
:batch_fetch_delay => 2,
|
15
|
+
:product_code_regexp => /^\d{4,10}$/,
|
16
|
+
:product_code_examples => ['44393573', '37469715'])
|
17
|
+
end
|
18
|
+
|
19
|
+
def api
|
20
|
+
@api ||= Shopping::Publisher.new
|
21
|
+
end
|
22
|
+
|
23
|
+
def url_for_merchant_source_page(merchant_source_code)
|
24
|
+
"http://www.shopping.com/xMR-~MRD-#{merchant_source_code}"
|
25
|
+
end
|
26
|
+
|
27
|
+
def fetch_merchant_source(merchant_source_page_url)
|
28
|
+
delay_fetch
|
29
|
+
doc = nil
|
30
|
+
4.times do |i|
|
31
|
+
# This is a workaround for some weirdness with Shopping.com.
|
32
|
+
# About one in ten requests for a merchant's info page results
|
33
|
+
# in a page with zeros for everything (as if the merchant has
|
34
|
+
# never been reviewed). One indication that we received the
|
35
|
+
# bogus page is the the title will look like:
|
36
|
+
# Shopping.com: null - Compare Prices & Read Reviews
|
37
|
+
# If we see that 'null' in the title, try fetching the page
|
38
|
+
# again (up to 4 times).
|
39
|
+
doc = Hpricot(open(merchant_source_page_url))
|
40
|
+
page_title = doc.at('head/title').inner_text
|
41
|
+
break if page_title.match(/ null /).nil?
|
42
|
+
end
|
43
|
+
|
44
|
+
merchant_source = OpenStruct.new
|
45
|
+
merchant_source.source = self
|
46
|
+
|
47
|
+
# merchant name
|
48
|
+
element = doc.at('h1[@class = "pageTitle"]')
|
49
|
+
unless element.nil?
|
50
|
+
name = element.inner_text.strip
|
51
|
+
merchant_source.name = name
|
52
|
+
end
|
53
|
+
|
54
|
+
# merchant logo
|
55
|
+
element = doc.at('img[@class = "logoBorder1"]')
|
56
|
+
unless element.nil?
|
57
|
+
logo_url = element.attributes['src']
|
58
|
+
merchant_source.logo_url = logo_url
|
59
|
+
|
60
|
+
# merchant code
|
61
|
+
code = logo_url.match(/merch_logos\/(.+)\.gif/)[1]
|
62
|
+
merchant_source.code = code
|
63
|
+
end
|
64
|
+
|
65
|
+
# merchant rating
|
66
|
+
element = doc.at('td[@id = "image"]/img')
|
67
|
+
unless element.nil?
|
68
|
+
merchant_rating = element.attributes['title'].match(/((\d|,)*\.?\d)/)[1]
|
69
|
+
merchant_source.merchant_rating = merchant_rating.delete(',').to_f * 20.0 unless merchant_rating.nil?
|
70
|
+
end
|
71
|
+
|
72
|
+
# Num Merchant Reviews
|
73
|
+
element = doc.at('table[@class = "boxTableTop"]//h3[@class = "boxTitleNB"]')
|
74
|
+
unless element.nil?
|
75
|
+
num_merchant_reviews = element.inner_text.match(/of\s+((\d|,)+)/)[1]
|
76
|
+
merchant_source.num_merchant_reviews = num_merchant_reviews.delete(',').to_i unless num_merchant_reviews.nil?|| num_merchant_reviews.empty?
|
77
|
+
end
|
78
|
+
|
79
|
+
merchant_source
|
80
|
+
end
|
81
|
+
|
82
|
+
def search_for_merchant_source(search_text)
|
83
|
+
merchant_search_url = "http://www.shopping.com/xSD-#{CGI::escape(search_text.strip)}"
|
84
|
+
doc = Hpricot(open(merchant_search_url))
|
85
|
+
merchant_sources = []
|
86
|
+
element = doc.search('div[@class*="contentContainer"]/div[@class="boxMid"]')[1]
|
87
|
+
|
88
|
+
# Do we have any results?
|
89
|
+
unless element.nil?
|
90
|
+
element.search('tr[td/div/ul/li/a/span[text() = "See Store Info"]]').each do |result_row|
|
91
|
+
element = result_row.at('/td/a')
|
92
|
+
name = element.inner_text.strip
|
93
|
+
merchant_code = element.attributes['href'].match(/~MRD-(\d+)/)[1]
|
94
|
+
element = result_row.at('/td[@class = "smallTxt"]/img')
|
95
|
+
logo_url = element.attributes['src'] unless element.nil?
|
96
|
+
|
97
|
+
existing_merchant_source = MerchantSource.find_by_source_and_code(self, merchant_code)
|
98
|
+
if existing_merchant_source.nil?
|
99
|
+
merchant_sources << OpenStruct.new({:source => self, :name => name, :code => merchant_code, :logo_url => logo_url})
|
100
|
+
else
|
101
|
+
merchant_sources << existing_merchant_source
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
merchant_sources
|
106
|
+
end
|
107
|
+
|
108
|
+
def format_rating(merchant_source)
|
109
|
+
'%01.1f/5.0' % (merchant_source.get_merchant_rating.to_f / 20.0)
|
110
|
+
end
|
111
|
+
|
112
|
+
def nullify_offer_url(offer_url)
|
113
|
+
offer_url.gsub(/3068547/, '8039098')
|
114
|
+
end
|
115
|
+
|
116
|
+
def fetch_street_price(product_code)
|
117
|
+
delay_fetch
|
118
|
+
offers = fetch_offers(product_code)
|
119
|
+
num_offers = 0
|
120
|
+
total_prices = 0.0
|
121
|
+
offers.each do |offer|
|
122
|
+
if !offer.merchant_rating.nil? &&
|
123
|
+
offer.merchant_rating >= 55 &&
|
124
|
+
!offer.price.nil? &&
|
125
|
+
!offer.shipping.nil?
|
126
|
+
total_prices += offer.total_price
|
127
|
+
num_offers += 1
|
128
|
+
end
|
129
|
+
end
|
130
|
+
num_offers.zero? ? nil : (total_prices / num_offers)
|
131
|
+
end
|
132
|
+
|
133
|
+
def fetch_offers(product_code)
|
134
|
+
api.fetch_offers(product_code)
|
135
|
+
end
|
136
|
+
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
require 'ostruct'
|
2
|
+
require 'api_helpers/shopzilla_api'
|
3
|
+
|
4
|
+
class ShopzillaSource < Source
|
5
|
+
def initialize
|
6
|
+
super(:name => 'Shopzilla',
|
7
|
+
:homepage => 'http://www.shopzilla.com/',
|
8
|
+
:cpc => 39,
|
9
|
+
:offer_enabled => false,
|
10
|
+
:offer_ttl_seconds => 1800,
|
11
|
+
:use_for_merchant_ratings => true,
|
12
|
+
:offer_affiliate => false,
|
13
|
+
:supports_lifetime_ratings => false,
|
14
|
+
:batch_fetch_delay => 1,
|
15
|
+
:product_code_regexp => /^\d{7,11}$/,
|
16
|
+
:product_code_examples => ['1028968032', '852926140'],
|
17
|
+
:product_page_link_erb => "http://www.shopzilla.com/-/<%= product_code %>/shop")
|
18
|
+
end
|
19
|
+
|
20
|
+
def url_for_merchant_source_page(merchant_source_code)
|
21
|
+
"http://www.shopzilla.com/6E_-_mid--#{merchant_source_code}"
|
22
|
+
end
|
23
|
+
|
24
|
+
def code_from_merchant_source_page_url(merchant_source_page_url)
|
25
|
+
merchant_source_page_url.match /6[A-Z](\-\-.*)?_\-_mid\-\-(\d+)/i
|
26
|
+
$2
|
27
|
+
end
|
28
|
+
|
29
|
+
def fetch_merchant_source(merchant_source_page_url)
|
30
|
+
delay_fetch
|
31
|
+
merchant_source = OpenStruct.new
|
32
|
+
merchant_source.source = self
|
33
|
+
|
34
|
+
merchant_code = code_from_merchant_source_page_url(merchant_source_page_url)
|
35
|
+
merchant_source_detail = api.merchant_source_detail(merchant_code)
|
36
|
+
|
37
|
+
unless merchant_source_detail.nil?
|
38
|
+
merchant_source.code = merchant_source_detail[:code]
|
39
|
+
merchant_source.name = merchant_source_detail[:name]
|
40
|
+
merchant_source.logo_url = merchant_source_detail[:logo_url]
|
41
|
+
merchant_source.merchant_rating = merchant_source_detail[:merchant_rating]
|
42
|
+
merchant_source.homepage = merchant_source_detail[:homepage]
|
43
|
+
merchant_source.num_merchant_reviews = merchant_source_detail[:num_merchant_reviews]
|
44
|
+
end
|
45
|
+
merchant_source
|
46
|
+
end
|
47
|
+
|
48
|
+
SHOPZILLA_SEARCH_PAGE = 'http://www.bizrate.com/ratings_guide/guide.html'
|
49
|
+
SHOPZILLA_SEARCH_ACTION = 'http://www.bizrate.com/merchant/results.xpml'
|
50
|
+
def search_for_merchant_source(search_text)
|
51
|
+
merchant_sources = []
|
52
|
+
|
53
|
+
agent = WWW::Mechanize.new
|
54
|
+
agent.html_parser = Nokogiri::HTML
|
55
|
+
agent.user_agent_alias = 'Windows IE 7'
|
56
|
+
agent.follow_meta_refresh = true
|
57
|
+
|
58
|
+
search_page = agent.get(SHOPZILLA_SEARCH_PAGE)
|
59
|
+
if form = search_page.form_with(:action => /superfind/)
|
60
|
+
# Must switch the action given in the form, because BizRate does exactly this in JavaScript
|
61
|
+
form.action = SHOPZILLA_SEARCH_ACTION
|
62
|
+
form['SEARCH_GO'] = 'Find it!'
|
63
|
+
form.keyword = search_text
|
64
|
+
if result = form.submit
|
65
|
+
if single_store = result.at('table[id="merchant_overview"]')
|
66
|
+
if store = single_store.at('div[class="certified"] strong a')
|
67
|
+
add_merchant_source_from_store(merchant_sources, store)
|
68
|
+
end
|
69
|
+
elsif stores_rated_list = result.at('div[class="storesRatedList"]')
|
70
|
+
if stores = stores_rated_list.search('th a')
|
71
|
+
stores.each do |store|
|
72
|
+
add_merchant_source_from_store(merchant_sources, store)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
merchant_sources
|
79
|
+
end
|
80
|
+
|
81
|
+
def add_merchant_source_from_store(merchant_sources, store)
|
82
|
+
name = store.text
|
83
|
+
merchant_code = CGI.parse(URI.parse(store['href']).query)['mid']
|
84
|
+
logo_url = api.verified_logo_url(merchant_code)
|
85
|
+
existing_merchant_source = MerchantSource.find_by_source_and_code(self, merchant_code)
|
86
|
+
if existing_merchant_source.nil?
|
87
|
+
merchant_sources << OpenStruct.new({:source => self, :name => name, :code => merchant_code, :logo_url => logo_url})
|
88
|
+
else
|
89
|
+
merchant_sources << existing_merchant_source
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def format_rating(merchant_source)
|
94
|
+
'%01.1f/10' % (merchant_source.get_merchant_rating.to_f / 10.0)
|
95
|
+
end
|
96
|
+
|
97
|
+
def nullify_offer_url(offer_url)
|
98
|
+
offer_url.gsub(/af_id=3973/, 'af_id=3233')
|
99
|
+
end
|
100
|
+
|
101
|
+
def api
|
102
|
+
@api ||= ShopzillaAPI.new
|
103
|
+
end
|
104
|
+
|
105
|
+
def fetch_offers(product_code)
|
106
|
+
api.find_offers_by_product_id(product_code)
|
107
|
+
end
|
108
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
circuit-city:
|
2
|
+
name: Circuit City
|
3
|
+
mappable: true
|
4
|
+
for_offers: false
|
5
|
+
for_product_info: false
|
6
|
+
for_review_aggregates: true
|
7
|
+
search_url:
|
8
|
+
search_token_separator:
|
9
|
+
product_code_regexp: ^\d{7}$
|
10
|
+
product_code_examples: ['4497835', '5007270']
|
11
|
+
product_page_link_erb: http://www.circuitcity.com/applications/SearchTools/item-details.asp?EdpNo=<%= product_code %>
|
12
|
+
|
13
|
+
cnet:
|
14
|
+
name: CNET
|
15
|
+
mappable: true
|
16
|
+
for_offers: false
|
17
|
+
for_product_info: false
|
18
|
+
for_review_aggregates: true
|
19
|
+
search_url: http://cnet.search.com/search?chkpt=astg.cnet.fd.search.cnet&q=!placeholder!
|
20
|
+
search_token_separator: +
|
21
|
+
product_code_regexp: ^\d{4}-\d{4}_\d-\d{7,8}$
|
22
|
+
product_code_examples: ['4852-6501_7-9666631', '4505-6501_7-33765912']
|
23
|
+
product_page_link_erb: http://www.cnet.com/<%= product_code %>.html
|
24
|
+
|
25
|
+
newegg:
|
26
|
+
name: Newegg
|
27
|
+
mappable: true
|
28
|
+
for_offers: false
|
29
|
+
for_product_info: false
|
30
|
+
for_review_aggregates: true
|
31
|
+
search_url: http://www.newegg.com/Product/ProductList.asp?DEPA=0&type=&Description=!placeholder!&Submit=ENE&N=0&Ntk=all
|
32
|
+
search_token_separator: +
|
33
|
+
product_code_regexp: ^[A-Z0-9]{4}\d{11}$
|
34
|
+
product_code_examples: ['N82E16830111260', 'N82E16830144222']
|
35
|
+
product_page_link_erb: http://www.newegg.com/Product/Product.aspx?Item=<%= product_code %>
|
36
|
+
|
37
|
+
nextag:
|
38
|
+
name: NexTag
|
39
|
+
mappable: true
|
40
|
+
for_offers: false
|
41
|
+
for_product_info: false
|
42
|
+
for_review_aggregates: true
|
43
|
+
search_url: http://www.nextag.com/blah/search-html?search=!placeholder!
|
44
|
+
search_token_separator: +
|
45
|
+
product_code_regexp: ^\d{8,9}$
|
46
|
+
product_code_examples: ['560328520', '656751324']
|
47
|
+
product_page_link_erb: http://www.nextag.com/<%= product_code %>/prices-html
|
48
|
+
|
49
|
+
photography-review:
|
50
|
+
name: Photography Review
|
51
|
+
mappable: false
|
52
|
+
for_offers: false
|
53
|
+
for_product_info: false
|
54
|
+
for_review_aggregates: false
|
55
|
+
search_url: http://www.photographyreview.com/befid-449/keyword-!placeholder!/searchcrx.aspx
|
56
|
+
search_token_separator: %20
|
57
|
+
product_code_regexp:
|
58
|
+
product_code_examples: ['casio_exz550', 'canon_eos1dmkiv']
|
59
|
+
product_page_link_erb: http://www.dpreview.com/reviews/specs/Canon/<%= product_code %>.asp
|
60
|
+
|
61
|
+
yahoo-shopping:
|
62
|
+
name: Yahoo! Shopping
|
63
|
+
mappable: true
|
64
|
+
for_offers: false
|
65
|
+
for_product_info: false
|
66
|
+
for_review_aggregates: true
|
67
|
+
search_url: http://shopping.yahoo.com/search?p=!placeholder!&did=59
|
68
|
+
search_token_separator: +
|
69
|
+
product_code_regexp: ^\d{10}$
|
70
|
+
product_code_examples: ['1993713221', '2002987863']
|
71
|
+
product_page_link_erb: http://shopping.yahoo.com/p:-:<%= product_code %>
|
@@ -0,0 +1,242 @@
|
|
1
|
+
require 'set'
|
2
|
+
require 'erb'
|
3
|
+
|
4
|
+
class Source
|
5
|
+
class GeneralError < StandardError
|
6
|
+
attr_reader :source_keyname
|
7
|
+
def initialize(message, source_keyname)
|
8
|
+
super(message)
|
9
|
+
@source_keyname = source_keyname
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class ProductNotFoundError < GeneralError
|
14
|
+
attr_reader :product_code
|
15
|
+
def initialize(message, source_keyname, product_code)
|
16
|
+
super(message, source_keyname)
|
17
|
+
@product_code = product_code
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
class ProductFatalError < GeneralError
|
22
|
+
attr_reader :product_code
|
23
|
+
def initialize(message, source_keyname, product_code)
|
24
|
+
super(message, source_keyname)
|
25
|
+
@product_code = product_code
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
attr_reader :name
|
30
|
+
attr_reader :homepage
|
31
|
+
attr_reader :cpc
|
32
|
+
attr_reader :offer_enabled
|
33
|
+
alias :offer_enabled? :offer_enabled
|
34
|
+
alias :for_offers :offer_enabled
|
35
|
+
attr_reader :offer_ttl_seconds
|
36
|
+
attr_reader :use_for_merchant_ratings
|
37
|
+
alias :use_for_merchant_ratings? :use_for_merchant_ratings
|
38
|
+
attr_reader :offer_affiliate
|
39
|
+
alias :offer_affiliate? :offer_affiliate
|
40
|
+
attr_reader :supports_lifetime_ratings
|
41
|
+
alias :supports_lifetime_ratings? :supports_lifetime_ratings
|
42
|
+
attr_reader :batch_fetch_delay
|
43
|
+
attr_reader :product_code_regexp
|
44
|
+
attr_reader :product_code_examples
|
45
|
+
attr_reader :product_page_link_erb
|
46
|
+
|
47
|
+
# properties from the legacy Source table in DA/DCHQ
|
48
|
+
attr_reader :mappable
|
49
|
+
attr_reader :for_product_info
|
50
|
+
attr_reader :for_review_aggregates
|
51
|
+
attr_reader :search_url
|
52
|
+
attr_reader :search_token_separator
|
53
|
+
|
54
|
+
@@subclasses = []
|
55
|
+
@@sources = Set.new
|
56
|
+
@@sources_map = {}
|
57
|
+
@@offer_sources = []
|
58
|
+
@@affiliate_sources = []
|
59
|
+
@@merchant_rating_sources = []
|
60
|
+
|
61
|
+
SIMPLE_SOURCES_YAML_FILE = File.join(File.dirname(__FILE__), 'simple_sources.yml')
|
62
|
+
|
63
|
+
class << self
|
64
|
+
@keyname = nil
|
65
|
+
end
|
66
|
+
|
67
|
+
def self.keyname
|
68
|
+
if @keyname.nil? && !self.name.nil?
|
69
|
+
matches = self.name.match(/(.+)Source/)
|
70
|
+
@keyname = matches[1].gsub(/([a-z\d])([A-Z])/,'\1-\2').downcase unless matches.nil?
|
71
|
+
end
|
72
|
+
@keyname
|
73
|
+
end
|
74
|
+
|
75
|
+
def self.keyname=(keyname)
|
76
|
+
@keyname = keyname
|
77
|
+
end
|
78
|
+
|
79
|
+
def self.inherited(child)
|
80
|
+
@@subclasses << child
|
81
|
+
set_source_keyname_const(child.keyname)
|
82
|
+
super
|
83
|
+
end
|
84
|
+
|
85
|
+
def initialize(attributes)
|
86
|
+
attributes.each {|k, v| instance_variable_set("@#{k}", v)}
|
87
|
+
end
|
88
|
+
|
89
|
+
def keyname
|
90
|
+
self.class.keyname
|
91
|
+
end
|
92
|
+
|
93
|
+
def self.source(source_keyname)
|
94
|
+
load_sources
|
95
|
+
@@sources_map[source_keyname]
|
96
|
+
end
|
97
|
+
|
98
|
+
def self.sources
|
99
|
+
load_sources
|
100
|
+
@@sources
|
101
|
+
end
|
102
|
+
|
103
|
+
def self.offer_sources
|
104
|
+
load_sources
|
105
|
+
if @@offer_sources.empty?
|
106
|
+
@@offer_sources = @@sources.select{|source| source.offer_enabled?}
|
107
|
+
end
|
108
|
+
@@offer_sources
|
109
|
+
end
|
110
|
+
|
111
|
+
def self.affiliate_sources
|
112
|
+
load_sources
|
113
|
+
if @@affiliate_sources.empty?
|
114
|
+
@@affiliate_sources = @@sources.select{|source| source.offer_affiliate?}
|
115
|
+
end
|
116
|
+
@@affiliate_sources
|
117
|
+
end
|
118
|
+
|
119
|
+
def self.merchant_rating_sources
|
120
|
+
load_sources
|
121
|
+
if @@merchant_rating_sources.empty?
|
122
|
+
@@merchant_rating_sources = @@sources.select{|source| source.use_for_merchant_ratings?}
|
123
|
+
end
|
124
|
+
@@merchant_rating_sources
|
125
|
+
end
|
126
|
+
|
127
|
+
def self.method_missing(meth)
|
128
|
+
source = nil
|
129
|
+
if matches = meth.to_s.match(/(.+)_source$/)
|
130
|
+
source_keyname = matches[1].gsub('_', '-')
|
131
|
+
source = send(:source, source_keyname)
|
132
|
+
end
|
133
|
+
source.nil? ? super : source
|
134
|
+
end
|
135
|
+
|
136
|
+
def product_code_valid?(product_code)
|
137
|
+
product_code_regexp.nil? ? (!product_code.nil? && product_code.length > 0) : !(product_code =~ product_code_regexp).nil?
|
138
|
+
end
|
139
|
+
|
140
|
+
def product_page_link(product_code)
|
141
|
+
ERB.new(product_page_link_erb).result(binding) unless product_page_link_erb.nil?
|
142
|
+
end
|
143
|
+
|
144
|
+
def url_for_merchant_source_page(merchant_source_code)
|
145
|
+
nil
|
146
|
+
end
|
147
|
+
|
148
|
+
def url_for_merchant_source_page_alt(merchant_source_alt_code)
|
149
|
+
nil
|
150
|
+
end
|
151
|
+
|
152
|
+
def code_from_merchant_source_page_url(merchant_source_page_url)
|
153
|
+
nil
|
154
|
+
end
|
155
|
+
|
156
|
+
def fetch_merchant_source(merchant_source_page_url)
|
157
|
+
nil
|
158
|
+
end
|
159
|
+
|
160
|
+
def format_rating(merchant_source)
|
161
|
+
"#{merchant_source.get_merchant_rating}%"
|
162
|
+
end
|
163
|
+
|
164
|
+
def nullify_offer_url(offer_url)
|
165
|
+
offer_url
|
166
|
+
end
|
167
|
+
|
168
|
+
def fetch_offers(product_code)
|
169
|
+
nil
|
170
|
+
end
|
171
|
+
|
172
|
+
def hash
|
173
|
+
self.class.hash
|
174
|
+
end
|
175
|
+
|
176
|
+
def eql?(other)
|
177
|
+
self.class == other.class
|
178
|
+
end
|
179
|
+
|
180
|
+
def to_s
|
181
|
+
keyname
|
182
|
+
end
|
183
|
+
|
184
|
+
protected
|
185
|
+
|
186
|
+
def self.set_source_keyname_const(source_keyname)
|
187
|
+
unless source_keyname.nil? || source_keyname.empty?
|
188
|
+
const_name = source_keyname.gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').gsub(/([a-z\d])([A-Z])/,'\1_\2').tr('-~', '_').upcase + '_KEYNAME'
|
189
|
+
const_set(const_name.to_sym, source_keyname)
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
def self.load_sources
|
194
|
+
if @@sources.empty?
|
195
|
+
@@subclasses.each do |source_class|
|
196
|
+
source_instance = source_class.new
|
197
|
+
@@sources << source_instance
|
198
|
+
@@sources_map[source_instance.keyname] = source_instance
|
199
|
+
end
|
200
|
+
load_simple_sources.each do |source_instance|
|
201
|
+
@@sources << source_instance
|
202
|
+
@@sources_map[source_instance.keyname] = source_instance
|
203
|
+
end
|
204
|
+
@@sources.sort{|a,b| a.name <=> b.name}
|
205
|
+
end
|
206
|
+
nil
|
207
|
+
end
|
208
|
+
|
209
|
+
def self.load_simple_sources
|
210
|
+
simple_sources = []
|
211
|
+
simple_sources_map = YAML.load_file(SIMPLE_SOURCES_YAML_FILE)
|
212
|
+
simple_sources_map.each do |source_keyname, source_attributes|
|
213
|
+
const_name = source_keyname.gsub(/(?:^|[-_~])(.)/) { $1.upcase } + 'Source'
|
214
|
+
simple_source_class = Object.const_set(const_name, Class.new(Source))
|
215
|
+
simple_source_class.keyname = source_keyname
|
216
|
+
set_source_keyname_const(source_keyname)
|
217
|
+
product_code_regexp = source_attributes['product_code_regexp']
|
218
|
+
product_code_regexp = Regexp.new(product_code_regexp) unless product_code_regexp.nil?
|
219
|
+
source = simple_source_class.new(:name => source_attributes['name'],
|
220
|
+
:offer_enabled => source_attributes['for_offers'] == 'true',
|
221
|
+
:mappable => source_attributes['mappable'] == 'true',
|
222
|
+
:for_product_info => source_attributes['for_product_info'] == 'true',
|
223
|
+
:for_review_aggregates => source_attributes['for_review_aggregates'] == 'true',
|
224
|
+
:search_url => source_attributes['search_url'],
|
225
|
+
:search_token_separator => source_attributes['search_token_separator'],
|
226
|
+
:product_code_regexp => product_code_regexp,
|
227
|
+
:product_code_examples => source_attributes['product_code_examples'],
|
228
|
+
:product_page_link_erb => source_attributes['product_page_link_erb'])
|
229
|
+
simple_sources << source
|
230
|
+
end
|
231
|
+
simple_sources
|
232
|
+
end
|
233
|
+
|
234
|
+
def delay_fetch
|
235
|
+
if !@last_fetched_at.nil? &&
|
236
|
+
batch_fetch_delay > 0 &&
|
237
|
+
@last_fetched_at > batch_fetch_delay.seconds.ago
|
238
|
+
sleep(batch_fetch_delay)
|
239
|
+
end
|
240
|
+
@last_fetched_at = Time.now
|
241
|
+
end
|
242
|
+
end
|