espider 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +1 -0
- data/espider.gemspec +28 -0
- data/lib/espider.rb +10 -0
- data/lib/espider/api/baidu/map.rb +9 -0
- data/lib/espider/api/baidu/map/place.rb +53 -0
- data/lib/espider/api/dianping.rb +13 -0
- data/lib/espider/api/dianping/base.rb +35 -0
- data/lib/espider/api/dianping/business.rb +38 -0
- data/lib/espider/api/dianping/coupon.rb +0 -0
- data/lib/espider/api/dianping/deal.rb +0 -0
- data/lib/espider/api/dianping/metadata.rb +28 -0
- data/lib/espider/api/dianping/params.rb +34 -0
- data/lib/espider/api/dianping/review.rb +0 -0
- data/lib/espider/exceptions.rb +2 -0
- data/lib/espider/front/baidu/map/hotel.rb +12 -0
- data/lib/espider/front/baidu/map/hotel/detail.rb +60 -0
- data/lib/espider/front/baidu/map/hotel/list.rb +46 -0
- data/lib/espider/front/dianping.rb +7 -0
- data/lib/espider/front/dianping/hotel.rb +10 -0
- data/lib/espider/front/dianping/hotel/detail.rb +80 -0
- data/lib/espider/front/dianping/hotel/list.rb +56 -0
- data/lib/espider/front/front.rb +23 -0
- data/lib/espider/front/kuxun.rb +7 -0
- data/lib/espider/front/kuxun/detail.rb +100 -0
- data/lib/espider/front/qunar.rb +9 -0
- data/lib/espider/front/qunar/base.rb +25 -0
- data/lib/espider/front/qunar/hotel.rb +173 -0
- data/lib/espider/front/qunar/rank.rb +60 -0
- data/lib/espider/front/ta.rb +27 -0
- data/lib/espider/front/ta/advisor.rb +12 -0
- data/lib/espider/front/ta/daodao.rb +16 -0
- data/lib/espider/version.rb +3 -0
- data/spec/advisor_front_rank_spec.rb +11 -0
- data/spec/baidu_map_api_spec.rb +31 -0
- data/spec/baidu_map_front_spec.rb +53 -0
- data/spec/dadao_front_rank_spec.rb +11 -0
- data/spec/dianping_api_business_spec.rb +29 -0
- data/spec/dianping_api_metadata_spec.rb +44 -0
- data/spec/dianping_front_detail_spec.rb +27 -0
- data/spec/dianping_front_list_spec.rb +17 -0
- data/spec/kuxun_front_detail_spec.rb +26 -0
- data/spec/qunar_front_detail_spec.rb +74 -0
- data/spec/qunar_front_rank_alive_spec.rb +12 -0
- data/spec/qunar_front_rank_spec.rb +28 -0
- data/spec/spec_helper.rb +1 -0
- metadata +176 -0
@@ -0,0 +1,80 @@
|
|
1
|
+
module ESpider
|
2
|
+
module Front
|
3
|
+
module Dianping
|
4
|
+
module Hotel
|
5
|
+
class Detail
|
6
|
+
def initialize(id)
|
7
|
+
@host = 'www.dianping.com'
|
8
|
+
@id = id
|
9
|
+
@current_url = URI::HTTP.build(
|
10
|
+
:host => @host,
|
11
|
+
:path => "/shop/#{id}"
|
12
|
+
).to_s
|
13
|
+
@options = {
|
14
|
+
:headers => {
|
15
|
+
"User-Agent" => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36'
|
16
|
+
}
|
17
|
+
}
|
18
|
+
@hotel = Nokogiri::HTML(HTTParty.get(@current_url,@options))
|
19
|
+
@js = @hotel.search("//script[@class='J_auto-load']").map{|script|script.inner_html}.join("\n")
|
20
|
+
@js = Nokogiri::HTML(@js)
|
21
|
+
end
|
22
|
+
|
23
|
+
def name
|
24
|
+
@hotel.search("//h1").first.text
|
25
|
+
end
|
26
|
+
|
27
|
+
def intro
|
28
|
+
@js.search("//div[@class='intro-txt J_hotel-expand']/span[@class='Hide']").text
|
29
|
+
end
|
30
|
+
|
31
|
+
#return example:
|
32
|
+
#[{"title"=>"客房设施", "tags"=>["国内长途电话", "国际长途电话", "拖鞋", "雨伞", "书桌", "24小时热水", "电热水壶", "咖啡壶/茶壶", "免费洗漱用品(6样以上)", "免费瓶装水", "迷你吧", "小冰箱", "浴衣", "多种规格电源插座", "110V电压插座", "浴缸", "独立淋浴间", "吹风机", "房内保险箱", "中央空调"]}, {"title"=>"服务项目", "tags"=>["棋牌室", "室内游泳池", "健身室", "按摩室", "桑拿浴室", "足浴", "SPA", "烧烤"]}, {"title"=>"活动设施", "tags"=>["中餐厅", "西餐厅", "酒吧", "前台贵重物品保险柜", "免费停车场", "有可无线上网的公共区域", "大堂吧", "电梯", "有可无线上网的公共区域 免费"]}, {"title"=>"综合设施", "tags"=>["会议厅", "商务中心", "外币兑换服务", "旅游票务服务", "洗衣服务", "送餐服务", "专职行李员", "行李寄存", "叫醒服务", "接机服务"]}]
|
33
|
+
def facilities
|
34
|
+
return @facilities if @facilities
|
35
|
+
@facilities = []
|
36
|
+
@js.search("//div[@class='introd-box']//li").each do |li|
|
37
|
+
facility = {}
|
38
|
+
title = li.search("span[@class='tit']").first
|
39
|
+
next unless title
|
40
|
+
facility['title'] = title.text
|
41
|
+
facility['tags'] = []
|
42
|
+
li.search("span[@class='introd-tag']").each do |tag|
|
43
|
+
facility['tags'] << tag.text
|
44
|
+
end
|
45
|
+
@facilities << facility
|
46
|
+
end
|
47
|
+
@facilities
|
48
|
+
end
|
49
|
+
|
50
|
+
#return example:
|
51
|
+
#[{:url=>"http://www.dianping.com/photos/38858830", :text=>"门面", :small_pic=>"http://i2.s2.dpfile.com/pc/e96f1f5f242450480d4de87007a1649a(240c180)/thumb.jpg", :big_pic=>"http://i2.s2.dpfile.com/pc/e96f1f5f242450480d4de87007a1649a(700x700)/thumb.jpg"}, {:url=>"http://www.dianping.com/photos/38858827", :text=>"门面", :small_pic=>"http://i3.s2.dpfile.com/pc/f6f8e9d56d045db113cfc2808be5a88a(240c180)/thumb.jpg", :big_pic=>"http://i3.s2.dpfile.com/pc/f6f8e9d56d045db113cfc2808be5a88a(700x700)/thumb.jpg"}, {:url=>"http://www.dianping.com/photos/38858820", :text=>"门面", :small_pic=>"http://i2.s2.dpfile.com/pc/55cbdffa6d8c0d5c51e7b44e5c55cf3d(240c180)/thumb.jpg", :big_pic=>"http://i2.s2.dpfile.com/pc/55cbdffa6d8c0d5c51e7b44e5c55cf3d(700x700)/thumb.jpg"}, {:url=>"http://www.dianping.com/photos/38858806", :text=>"门面", :small_pic=>"http://i2.s2.dpfile.com/pc/5a4c29f16cac2a3149f4f041358d339d(240c180)/thumb.jpg", :big_pic=>"http://i2.s2.dpfile.com/pc/5a4c29f16cac2a3149f4f041358d339d(700x700)/thumb.jpg"}, {:url=>"http://www.dianping.com/photos/38858804", :text=>"门面", :small_pic=>"http://i3.s2.dpfile.com/pc/6d5c9ae960783b449802c8646eb14adc(240c180)/thumb.jpg", :big_pic=>"http://i3.s2.dpfile.com/pc/6d5c9ae960783b449802c8646eb14adc(700x700)/thumb.jpg"}, {:url=>"http://www.dianping.com/photos/38858801", :text=>"门面", :small_pic=>"http://i3.s2.dpfile.com/pc/e5f317a5ba775b05b1e0807492458ce1(240c180)/thumb.jpg", :big_pic=>"http://i3.s2.dpfile.com/pc/e5f317a5ba775b05b1e0807492458ce1(700x700)/thumb.jpg"}, {:url=>"http://www.dianping.com/photos/38858799", :text=>"门面", :small_pic=>"http://i2.s2.dpfile.com/pc/029f661b83bfd0550e858c3f78da8646(240c180)/thumb.jpg", :big_pic=>"http://i2.s2.dpfile.com/pc/029f661b83bfd0550e858c3f78da8646(700x700)/thumb.jpg"}, {:url=>"http://www.dianping.com/photos/38858782", :text=>"门面", :small_pic=>"http://i2.s2.dpfile.com/pc/2de4d7be0ad68aa43b82ba2bfb1f9934(240c180)/thumb.jpg", :big_pic=>"http://i2.s2.dpfile.com/pc/2de4d7be0ad68aa43b82ba2bfb1f9934(700x700)/thumb.jpg"}, {:url=>"http://www.dianping.com/photos/38858781", :text=>"门面", :small_pic=>"http://i1.s2.dpfile.com/pc/43b5d236fe1d2ce03b14b0894feec4c6(240c180)/thumb.jpg", :big_pic=>"http://i1.s2.dpfile.com/pc/43b5d236fe1d2ce03b14b0894feec4c6(700x700)/thumb.jpg"}, {:url=>"http://www.dianping.com/photos/38858777", :text=>"门面", :small_pic=>"http://i3.s2.dpfile.com/pc/4e9c512465435d767aa151068acaa45e(240c180)/thumb.jpg", :big_pic=>"http://i3.s2.dpfile.com/pc/4e9c512465435d767aa151068acaa45e(700x700)/thumb.jpg"}, {:url=>"http://www.dianping.com/photos/38858769", :text=>"门面", :small_pic=>"http://i3.s2.dpfile.com/pc/6a9bfc9811d4575ff3cc192b505e7453(240c180)/thumb.jpg", :big_pic=>"http://i3.s2.dpfile.com/pc/6a9bfc9811d4575ff3cc192b505e7453(700x700)/thumb.jpg"}, {:url=>"http://www.dianping.com/photos/38858762", :text=>"大堂", :small_pic=>"http://i2.s2.dpfile.com/pc/52c4cc845cfbb32b0ca00bf5a3911ee0(240c180)/thumb.jpg", :big_pic=>"http://i2.s2.dpfile.com/pc/52c4cc845cfbb32b0ca00bf5a3911ee0(700x700)/thumb.jpg"}, {:url=>"http://www.dianping.com/photos/38858758", :text=>"门面", :small_pic=>"http://i3.s2.dpfile.com/pc/47a152e45d30b0edf67d17d0a9807fcf(240c180)/thumb.jpg", :big_pic=>"http://i3.s2.dpfile.com/pc/47a152e45d30b0edf67d17d0a9807fcf(700x700)/thumb.jpg"}, {:url=>"http://www.dianping.com/photos/38858752", :text=>"大堂", :small_pic=>"http://i3.s2.dpfile.com/pc/eff2b60723a6d119b9da4e97d6af9639(240c180)/thumb.jpg", :big_pic=>"http://i3.s2.dpfile.com/pc/eff2b60723a6d119b9da4e97d6af9639(700x700)/thumb.jpg"}, {:url=>"http://www.dianping.com/photos/38858748", :text=>"大堂", :small_pic=>"http://i1.s2.dpfile.com/pc/3b5ae0aef1409cbb7196c880473e6142(240c180)/thumb.jpg", :big_pic=>"http://i1.s2.dpfile.com/pc/3b5ae0aef1409cbb7196c880473e6142(700x700)/thumb.jpg"}]
|
52
|
+
def images
|
53
|
+
url = URI::HTTP.build(
|
54
|
+
:host => @host,
|
55
|
+
:path => "/shop/#@id/photos"
|
56
|
+
)
|
57
|
+
images = Nokogiri::HTML(HTTParty.get(url.to_s,@options))
|
58
|
+
results = []
|
59
|
+
images.search("//li[@class='J_list']").each do |image|
|
60
|
+
href = image.search("div/a").first['href']
|
61
|
+
text = image.search("h3/a").text
|
62
|
+
thumb = image.search("img").first['src']
|
63
|
+
results << {
|
64
|
+
:url => URI.join(@current_url,href).to_s,
|
65
|
+
:text => text,
|
66
|
+
:small_pic => thumb,
|
67
|
+
:big_pic => thumb.sub(/\(.+\)/,'(700x700)')
|
68
|
+
}
|
69
|
+
end
|
70
|
+
results
|
71
|
+
end
|
72
|
+
|
73
|
+
def comments
|
74
|
+
@hotel.search("//div[@class='J_brief-cont']").map{|comment|comment.text.strip}
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'httparty'
|
3
|
+
module ESpider
|
4
|
+
module Front
|
5
|
+
module Dianping
|
6
|
+
module Hotel
|
7
|
+
class List < ESpider::Front::Front
|
8
|
+
attr_reader :total
|
9
|
+
BaseUri = "http://www.dianping.com/hotel/search/category"
|
10
|
+
def initialize(cityId,areaId='')
|
11
|
+
@cityId = cityId
|
12
|
+
@areaId = areaId
|
13
|
+
url = File.join(BaseUri,cityId,"60?district=#{areaId}")
|
14
|
+
@list = Nokogiri::HTML(HTTParty.get(url, :headers => {"User-Agent" => "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"}))
|
15
|
+
@total = @list.css('div.tit span').text.sub(/[\(\)]/,'').to_i
|
16
|
+
end
|
17
|
+
def areas
|
18
|
+
result = []
|
19
|
+
areas = @list.css('div.filter-dist div.con div.list-box ul li')
|
20
|
+
areas.each do |a|
|
21
|
+
areaId = a.css('input').first['data-query-value']
|
22
|
+
areaName = a.css('label').first.text.strip
|
23
|
+
result << [areaId,areaName] if !result.include?([areaId,areaName])
|
24
|
+
end
|
25
|
+
areas = @list.css('div.filter-dist div.popup div.con dl dd')
|
26
|
+
areas.each do |a|
|
27
|
+
areaId = a.css('input').first['data-query-value']
|
28
|
+
areaName = a.css('label').first.text.strip
|
29
|
+
result << [areaId,areaName] if !result.include?([areaId,areaName])
|
30
|
+
end
|
31
|
+
result
|
32
|
+
end
|
33
|
+
def hotels(page)
|
34
|
+
url = File.join(BaseUri,@cityId,'60',"p#{page}?district=#{@areaId}")
|
35
|
+
@list = Nokogiri::HTML(HTTParty.get(url, :headers => {"User-Agent" => "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"}))
|
36
|
+
result = []
|
37
|
+
hotels = @list.css('div.hotel-list-box div.h-list-box ul li.J_hotel-block')
|
38
|
+
hotels.each do |hotel|
|
39
|
+
hotel_id = hotel['data-shopid']
|
40
|
+
hotel_name = hand_str_nil(hotel,'div.tit h4',true)
|
41
|
+
hotel_tel = hand_str_nil(hotel,'div.hotel-info p span.tel',true)
|
42
|
+
hotel.css('div.hotel-info p span.tel').remove
|
43
|
+
hotel_addr = hand_str_nil(hotel,'div.hotel-info p.addr',true)
|
44
|
+
hotel_area = hand_str_nil(hotel,'div.hotel-info p.place a',true)
|
45
|
+
hotel_classify = hand_str_nil(hotel,'div.hotel-info p.tags em a',true)
|
46
|
+
hotel_title = hand_str_nil(hotel,'div.hotel-info p.promo em a',false,'title')
|
47
|
+
hotel_price = hand_str_nil(hotel,'div.hotel-remark div.price strong',true).sub('¥','')
|
48
|
+
result << [hotel_id,hotel_name,hotel_addr,hotel_tel,hotel_area,hotel_classify,hotel_title,hotel_price]
|
49
|
+
end
|
50
|
+
result
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module ESpider
|
2
|
+
module Front
|
3
|
+
class Front
|
4
|
+
def hand_str_nil(src,rule,is_text,attr_name=nil)
|
5
|
+
if is_text
|
6
|
+
node = src.css(rule).first
|
7
|
+
if node.nil?
|
8
|
+
''
|
9
|
+
else
|
10
|
+
node.text.strip
|
11
|
+
end
|
12
|
+
else
|
13
|
+
node = src.css(rule).first
|
14
|
+
if node.nil?
|
15
|
+
''
|
16
|
+
else
|
17
|
+
node[attr_name]
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
require 'uri'
|
2
|
+
require 'httparty'
|
3
|
+
require 'nokogiri'
|
4
|
+
module ESpider
|
5
|
+
module Front
|
6
|
+
module Kuxun
|
7
|
+
class Detail
|
8
|
+
#id example:
|
9
|
+
#beijing-xihuajingzhao
|
10
|
+
def initialize(id)
|
11
|
+
@id = id
|
12
|
+
@host = 'jiudian.kuxun.cn'
|
13
|
+
@current_url = URI::HTTP.build(
|
14
|
+
:host => @host,
|
15
|
+
:path => "/#{@id}-jiudian.html"
|
16
|
+
).to_s
|
17
|
+
@options = {
|
18
|
+
:headers => {
|
19
|
+
"User-Agent" => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36'
|
20
|
+
}
|
21
|
+
}
|
22
|
+
@hotel = Nokogiri::HTML(HTTParty.get(@current_url,@options))
|
23
|
+
end
|
24
|
+
|
25
|
+
def name
|
26
|
+
return '' if @hotel.search("h1").first.nil?
|
27
|
+
@hotel.search("h1").first.text.strip
|
28
|
+
end
|
29
|
+
|
30
|
+
def address
|
31
|
+
return '' if title_bd.nil?
|
32
|
+
title_bd.search('p').first.text.gsub!(/\s+/, ' ').sub('地址:','').squeeze.strip
|
33
|
+
end
|
34
|
+
|
35
|
+
def intro
|
36
|
+
return '' if title_bd.nil? or title_bd.search('p')[1].nil?
|
37
|
+
title_bd.search('p')[1].text.squeeze.gsub('描述:','').strip
|
38
|
+
end
|
39
|
+
|
40
|
+
#example:
|
41
|
+
#[{:text=>"客房", :small_pic=>"http://s2.static.hotel.kximg.cn/e1310221716/HA/78805a221a988e79ef3f42d7c1029790.I.Q.FH.jpg", :big_pic=>"http://s2.static.hotel.kximg.cn/e1310221716/HA/78805a221a988e79ef3f42d7c1029790.I.Q.FI.jpg"}, {:text=>"客房", :small_pic=>"http://s2.static.hotel.kximg.cn/e1310221716/HA/78805a221a988e79ef3f42d7c1029794.I.Q.FH.jpg", :big_pic=>"http://s2.static.hotel.kximg.cn/e1310221716/HA/78805a221a988e79ef3f42d7c1029794.I.Q.FI.jpg"}, {:text=>"客房", :small_pic=>"http://s2.static.hotel.kximg.cn/e1310221716/HA/78805a221a988e79ef3f42d7c1029795.I.Q.FH.jpg", :big_pic=>"http://s2.static.hotel.kximg.cn/e1310221716/HA/78805a221a988e79ef3f42d7c1029795.I.Q.FI.jpg"}
|
42
|
+
def images
|
43
|
+
url = URI::HTTP.build(
|
44
|
+
:host => @host,
|
45
|
+
:path => "/#{@id}-jiudian-tupian.html"
|
46
|
+
).to_s
|
47
|
+
options = {
|
48
|
+
:headers => {
|
49
|
+
"User-Agent" => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36'
|
50
|
+
}
|
51
|
+
}
|
52
|
+
@images = Nokogiri::HTML(HTTParty.get(url,@options))
|
53
|
+
results = []
|
54
|
+
@images.search("//ul[@id='detail_image_全部']/li").each do |li|
|
55
|
+
img = li.search('img').first['src']
|
56
|
+
img = li.search('img').first['original'] if img.include?'grey.gif'
|
57
|
+
title = li.search('p').first
|
58
|
+
results << {
|
59
|
+
:text => title.text,
|
60
|
+
:small_pic => img,
|
61
|
+
:big_pic => img.sub(/H.jpg$/,'I.jpg')
|
62
|
+
}
|
63
|
+
end
|
64
|
+
results
|
65
|
+
end
|
66
|
+
#example:
|
67
|
+
#[{"title"=>"酒店设施", "tags"=>["商务中心", "送餐服务", "洗衣服务", "叫醒服务", "旅游服务", "前台贵重物品保险柜", "收费停车", "外币兑换", "公共区域免费Wi-Fi"]}, {"title"=>"康体娱乐", "tags"=>["电子游戏机室"]}, {"title"=>"会议设施", "tags"=>["多功能厅:剧院式可容纳80人", "另有2个会议室,均容纳30人"]}, {"title"=>"餐饮服务", "tags"=>["中餐厅:主营鲁菜、川菜、可提供北京风味小吃", "西餐厅:可提供中西自助餐、酒水"]}]
|
68
|
+
def facilities
|
69
|
+
return @facilities if @facilities
|
70
|
+
@facilities = []
|
71
|
+
@hotel.search("//ul[@class='OpenFacCont']/li").each do |li|
|
72
|
+
title = li.search('b').first.text.strip
|
73
|
+
tags = li.search('p').first.text.strip.sub(/。$/,'')
|
74
|
+
if tags.include?';'
|
75
|
+
tags = tags.split(';')
|
76
|
+
else
|
77
|
+
tags = tags.split('、')
|
78
|
+
end
|
79
|
+
@facilities << {
|
80
|
+
'title' => title,
|
81
|
+
'tags' => tags
|
82
|
+
}
|
83
|
+
end
|
84
|
+
@facilities
|
85
|
+
end
|
86
|
+
def xy
|
87
|
+
src = @hotel.to_s
|
88
|
+
return [0, 0] if src.match(/var\s+b_longitude\s+=\s+\"(\d+\.\d+)\"/).nil?
|
89
|
+
return [src.match(/var\s+b_longitude\s+=\s+\"(\d+\.\d+)\"/)[1], src.match(/var\s+b_latitude\s+=\s+\"(\d+\.\d+)\"/)[1]]
|
90
|
+
end
|
91
|
+
|
92
|
+
private
|
93
|
+
def title_bd
|
94
|
+
@TitleBd ||= @hotel.search("//div[@class='TitleBd']").first
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'httparty'
|
3
|
+
module ESpider
|
4
|
+
module Front
|
5
|
+
module Qunar
|
6
|
+
class Base
|
7
|
+
BASEURI = "http://hotel.qunar.com"
|
8
|
+
def initialize(hotelId)
|
9
|
+
raise "id should be string" if !hotelId.instance_of?(String)
|
10
|
+
@HotelSEQ = hotelId
|
11
|
+
@cityurl = hotelId[0..hotelId.rindex("_",-1)-1]
|
12
|
+
@hotelcode = hotelId[hotelId.rindex("_",-1)+1..hotelId.length]
|
13
|
+
end
|
14
|
+
def parse_mixkey
|
15
|
+
uri = URI(File.join(BASEURI, 'city', @cityurl, "dt-#{@hotelcode}"))
|
16
|
+
http_res = HTTParty.get uri.to_s
|
17
|
+
res = Nokogiri::HTML http_res
|
18
|
+
node = res.search('//span[@id="eyKxim"]').first
|
19
|
+
return nil if node.nil?
|
20
|
+
return node.text
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,173 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'httparty'
|
3
|
+
module ESpider
|
4
|
+
module Front
|
5
|
+
module Qunar
|
6
|
+
class Hotel < Base
|
7
|
+
def initialize(hotelId)
|
8
|
+
super
|
9
|
+
uri = URI(File.join(BaseUri, 'city', @cityurl, "dt-#{@hotelcode}"))
|
10
|
+
@hotel = Nokogiri::HTML HTTParty.get(uri.to_s)
|
11
|
+
end
|
12
|
+
def name
|
13
|
+
name_tag = @hotel.search('//dl[@class="eriefinfo"]//dt').first
|
14
|
+
return nil if name_tag.nil?
|
15
|
+
name_tag.text[0..name_tag.text.index("\n",1)].strip
|
16
|
+
end
|
17
|
+
def star
|
18
|
+
name_tag = @hotel.search('//dl[@class="eriefinfo"]//dt').first
|
19
|
+
return 0 if name_tag.nil?
|
20
|
+
if name_tag.text.include?'经济型'
|
21
|
+
1
|
22
|
+
elsif name_tag.text.include?'舒适型'
|
23
|
+
3
|
24
|
+
elsif name_tag.text.include?'高档型'
|
25
|
+
4
|
26
|
+
elsif name_tag.text.include?'豪华型'
|
27
|
+
5
|
28
|
+
elsif name_tag.text.include?'star20'
|
29
|
+
2
|
30
|
+
elsif name_tag.text.include?'star50'
|
31
|
+
5
|
32
|
+
else
|
33
|
+
0
|
34
|
+
end
|
35
|
+
end
|
36
|
+
def citynameEn
|
37
|
+
@cityurl.sub('_city','')
|
38
|
+
end
|
39
|
+
def citynameCn
|
40
|
+
match_data = @hotel.to_s.match(/var\s*cityName\s*=\s*\'\p{Han}+\'/u)
|
41
|
+
return "" if match_data.nil?
|
42
|
+
match_data = match_data[0].match(/\p{Han}+/u)
|
43
|
+
return "" if match_data.nil?
|
44
|
+
match_data[0].strip
|
45
|
+
end
|
46
|
+
def id
|
47
|
+
@qunar_id
|
48
|
+
end
|
49
|
+
def address
|
50
|
+
address = @hotel.search("div[@class='adress']/span").first
|
51
|
+
return "" if address.nil?
|
52
|
+
return address['title'].strip
|
53
|
+
end
|
54
|
+
def phone
|
55
|
+
phone ||= @hotel.search("//li[contains(text(),'电话:')]").text.sub('电话:','').strip
|
56
|
+
phone
|
57
|
+
end
|
58
|
+
def desc
|
59
|
+
desc = @hotel.search("p[@class='summery less_summery']").text
|
60
|
+
desc = @hotel.search("p[@class='summery expan_summery']").text if desc.empty?
|
61
|
+
desc.gsub(/[[:space:]]/, '')
|
62
|
+
end
|
63
|
+
#开业时间
|
64
|
+
def insttime
|
65
|
+
date ||= @hotel.search("p[@class='insttime']/cite").text.sub("开业时间:","")
|
66
|
+
date.strip
|
67
|
+
end
|
68
|
+
#简略描述
|
69
|
+
def abstract_desc
|
70
|
+
abstract_desc = @hotel.search("p[@class='h_desc']").first
|
71
|
+
return "" if abstract_desc.nil?
|
72
|
+
return abstract_desc.text.sub('描述:','').strip
|
73
|
+
end
|
74
|
+
def brand
|
75
|
+
brand ||= @hotel.search("//li[contains(text(),'所属品牌:')]").text.sub('所属品牌:','').strip
|
76
|
+
brand.strip
|
77
|
+
end
|
78
|
+
def reference_price
|
79
|
+
match_data = @hotel.to_s.match(/var\s*miniRetailPrice\s*=\s*\'\d+\';/)
|
80
|
+
return 0 if match_data.nil?
|
81
|
+
match_data = match_data[0].match(/[\d]+/)
|
82
|
+
return 0 if match_data.nil?
|
83
|
+
match_data[0].strip
|
84
|
+
end
|
85
|
+
def xy
|
86
|
+
xy = @hotel.to_s.match(/hotelPoint=\[(\d+\.\d+),(\d+\.\d+)\];/)
|
87
|
+
if xy.nil?
|
88
|
+
return [0.0,0.0]
|
89
|
+
else
|
90
|
+
return [xy[1],xy[2]]
|
91
|
+
end
|
92
|
+
end
|
93
|
+
#设施
|
94
|
+
def facilities
|
95
|
+
facility = @hotel.search("//div[@class=\"roundmilieu htintroborder\"]").first
|
96
|
+
facilities = Array.new()
|
97
|
+
unless facility.nil?
|
98
|
+
facility.search("dl").each do |dl|
|
99
|
+
ele = Hash.new
|
100
|
+
facility_cata = dl.search("dt").first.text.delete(':')
|
101
|
+
ele1 = Array.new
|
102
|
+
dl.search("dd/ul/li").each do |facility_type|
|
103
|
+
ele1 << facility_type.text
|
104
|
+
end
|
105
|
+
ele['name'] = facility_cata
|
106
|
+
ele['info'] = ele1
|
107
|
+
facilities << ele
|
108
|
+
end
|
109
|
+
end
|
110
|
+
facilities
|
111
|
+
end
|
112
|
+
#交通信息
|
113
|
+
def traffic
|
114
|
+
traffic = "http://hotel.qunar.com/detail/detailMapData.jsp?seq=#{@qunar_id}&type=traffic,canguan,jingdian,ent"
|
115
|
+
traffic = HTTParty.get traffic
|
116
|
+
traffic_data = Array.new()
|
117
|
+
if traffic['ret'] == true
|
118
|
+
traffic['data']['ent'].each do |line|
|
119
|
+
ele = Hash.new
|
120
|
+
ele['name'] = line['name']
|
121
|
+
ele['distance'] = line['distance']
|
122
|
+
traffic_data << ele
|
123
|
+
end
|
124
|
+
end
|
125
|
+
traffic_data
|
126
|
+
end
|
127
|
+
def decorate_date
|
128
|
+
match_data = @hotel.to_s.match(/最后装修时间:\d+年/)
|
129
|
+
return "" if match_data.nil?
|
130
|
+
match_data[0].strip
|
131
|
+
end
|
132
|
+
def comments
|
133
|
+
url = "http://review.qunar.com/api/h/#{@qunar_id}/detail/rank/v1/page/1"
|
134
|
+
comments = HTTParty.get url
|
135
|
+
comments = JSON.parse comments
|
136
|
+
comments['data']['list'].map{|comment| JSON.parse(comment['content'])['feedContent'] }
|
137
|
+
end
|
138
|
+
#评论总数
|
139
|
+
def total_comment
|
140
|
+
url = "http://review.qunar.com/api/h/#{@qunar_id}/detail/rank/page/1?rate=all"
|
141
|
+
body = HTTParty.get url
|
142
|
+
match_data = body.match(/"count":\d+/)
|
143
|
+
return "0" if match_data.nil?
|
144
|
+
match_data[0].sub(/\"count\":/,"").strip
|
145
|
+
end
|
146
|
+
#好评
|
147
|
+
def good_comment
|
148
|
+
url = "http://review.qunar.com/api/h/#{@qunar_id}/detail/rank/page/1?rate=positive"
|
149
|
+
body = HTTParty.get url
|
150
|
+
match_data = body.match(/"count":\d+/)
|
151
|
+
return "0" if match_data.nil?
|
152
|
+
match_data[0].sub(/\"count\":/,"")
|
153
|
+
end
|
154
|
+
def images
|
155
|
+
url = "http://hotel.qunar.com/render/hotelDetailAllImage.jsp?hotelseq=#{@qunar_id}"
|
156
|
+
body = HTTParty.get url
|
157
|
+
body = JSON.parse(body)
|
158
|
+
return body['data']["all"] if(body['ret'])
|
159
|
+
end
|
160
|
+
def score
|
161
|
+
url = "http://review.qunar.com/api/h/#{@qunar_id}/detail"
|
162
|
+
body = HTTParty.get url
|
163
|
+
begin
|
164
|
+
body = JSON.parse body
|
165
|
+
return body["data"]["score"].strip
|
166
|
+
rescue Exception => e
|
167
|
+
return "0.0"
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|