espider 0.5.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/Gemfile +4 -0
  4. data/LICENSE.txt +22 -0
  5. data/README.md +29 -0
  6. data/Rakefile +1 -0
  7. data/espider.gemspec +28 -0
  8. data/lib/espider.rb +10 -0
  9. data/lib/espider/api/baidu/map.rb +9 -0
  10. data/lib/espider/api/baidu/map/place.rb +53 -0
  11. data/lib/espider/api/dianping.rb +13 -0
  12. data/lib/espider/api/dianping/base.rb +35 -0
  13. data/lib/espider/api/dianping/business.rb +38 -0
  14. data/lib/espider/api/dianping/coupon.rb +0 -0
  15. data/lib/espider/api/dianping/deal.rb +0 -0
  16. data/lib/espider/api/dianping/metadata.rb +28 -0
  17. data/lib/espider/api/dianping/params.rb +34 -0
  18. data/lib/espider/api/dianping/review.rb +0 -0
  19. data/lib/espider/exceptions.rb +2 -0
  20. data/lib/espider/front/baidu/map/hotel.rb +12 -0
  21. data/lib/espider/front/baidu/map/hotel/detail.rb +60 -0
  22. data/lib/espider/front/baidu/map/hotel/list.rb +46 -0
  23. data/lib/espider/front/dianping.rb +7 -0
  24. data/lib/espider/front/dianping/hotel.rb +10 -0
  25. data/lib/espider/front/dianping/hotel/detail.rb +80 -0
  26. data/lib/espider/front/dianping/hotel/list.rb +56 -0
  27. data/lib/espider/front/front.rb +23 -0
  28. data/lib/espider/front/kuxun.rb +7 -0
  29. data/lib/espider/front/kuxun/detail.rb +100 -0
  30. data/lib/espider/front/qunar.rb +9 -0
  31. data/lib/espider/front/qunar/base.rb +25 -0
  32. data/lib/espider/front/qunar/hotel.rb +173 -0
  33. data/lib/espider/front/qunar/rank.rb +60 -0
  34. data/lib/espider/front/ta.rb +27 -0
  35. data/lib/espider/front/ta/advisor.rb +12 -0
  36. data/lib/espider/front/ta/daodao.rb +16 -0
  37. data/lib/espider/version.rb +3 -0
  38. data/spec/advisor_front_rank_spec.rb +11 -0
  39. data/spec/baidu_map_api_spec.rb +31 -0
  40. data/spec/baidu_map_front_spec.rb +53 -0
  41. data/spec/dadao_front_rank_spec.rb +11 -0
  42. data/spec/dianping_api_business_spec.rb +29 -0
  43. data/spec/dianping_api_metadata_spec.rb +44 -0
  44. data/spec/dianping_front_detail_spec.rb +27 -0
  45. data/spec/dianping_front_list_spec.rb +17 -0
  46. data/spec/kuxun_front_detail_spec.rb +26 -0
  47. data/spec/qunar_front_detail_spec.rb +74 -0
  48. data/spec/qunar_front_rank_alive_spec.rb +12 -0
  49. data/spec/qunar_front_rank_spec.rb +28 -0
  50. data/spec/spec_helper.rb +1 -0
  51. metadata +176 -0
@@ -0,0 +1,60 @@
1
+ require 'nokogiri'
2
+ require 'headless'
3
+ require 'capybara'
4
+ require 'capybara/dsl'
5
+ module ESpider
6
+ module Front
7
+ module Qunar
8
+ class Rank < Base
9
+ include Capybara::DSL
10
+ def set_cookies
11
+ headless = Headless.new
12
+ headless.start
13
+ Capybara.current_driver = :selenium
14
+ Capybara.app_host = BASEURI
15
+ Capybara.default_wait_time = 10
16
+ begin
17
+ visit "/city/#{@cityurl}/dt-#{@hotelcode}/"
18
+ rescue Errno::ECONNREFUSED => e
19
+ raise
20
+ end
21
+ @cookies = {}
22
+ Capybara.current_session.driver.browser.manage.all_cookies.each { |cookie| @cookies[cookie[:name]] = cookie[:value] }
23
+ end
24
+ def load_price_ajax
25
+ mixKey = parse_mixkey
26
+ set_cookies
27
+ raise QunarMixKeyParsedException if mixKey.nil?
28
+ uri = URI(File.join(BASEURI, 'render', 'detailV2.jsp'))
29
+ params = {:fromDate=>Date.today+2, :toDate=>Date.today+3, :cityurl=>@cityurl, :HotelSEQ=>@HotelSEQ, :mixKey=>mixKey}
30
+ headers = Hash.new
31
+ headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 5.1; rv:26.0) Gecko/20100101 Firefox/26.0'
32
+ headers['Connection'] = 'keep-alive'
33
+ headers['Accept-Encoding'] = 'gzip, deflate'
34
+ headers['Cookie'] = @cookies.map{|k, v| k+'='+v}.join('; ')
35
+ uri.query = URI.encode_www_form(params)
36
+ res = HTTParty.get(uri.to_s, :headers=>headers)
37
+ json_ajax = parse_json_from_response_body(res.body)
38
+ if !json_ajax['errcode'].nil? and json_ajax['errcode'].eql?110
39
+ raise QunarIpBlockException,json_ajax
40
+ end
41
+ raise QunarIpBlockException,json_ajax if !json_ajax['errcode'].nil? and json_ajax['errcode'].eql?110
42
+ @ranks = json_ajax['result'].values
43
+ end
44
+ def rank(ota, room)
45
+ @ranks.each { |rank| return rank[15]+1 if rank[3].eql?room and rank[6].eql?ota and rank[9].eql?1 }
46
+ return nil
47
+ end
48
+ def rooms
49
+ @rooms ||= @ranks.map { |e| e[3] }.uniq
50
+ end
51
+ def otas
52
+ @otas ||= @ranks.map{|e| e[6] }.uniq
53
+ end
54
+ def parse_json_from_response_body(res)
55
+ JSON.parse(res.gsub(/[()]/,""))
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,27 @@
1
+ module ESpider
2
+ module Front
3
+ module TA
4
+ class TripAdvisor
5
+ attr_accessor :base_url
6
+ def initialize(ta_id)
7
+ url = base_url+ta_id
8
+ res = HTTParty.get(url)
9
+ @code = res.code
10
+ @page = res.force_encoding("UTF-8")
11
+ end
12
+ def code
13
+ @code
14
+ end
15
+ def rank
16
+ ranks = []
17
+ @page.scan(/(Vendor\(.*\);)$/) do |ota|
18
+ ranks << ota[0].match(/\}\),\s*\".*\",/)[0].sub("}), \"","").sub("\",","")
19
+ end
20
+ ranks
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ require 'espider/front/ta/daodao'
27
+ require 'espider/front/ta/advisor'
@@ -0,0 +1,12 @@
1
+ module ESpider
2
+ module Front
3
+ module TA
4
+ class Advisor < TA::TripAdvisor
5
+ def initialize(ta_id)
6
+ @base_url = "http://www.tripadvisor.com/"
7
+ super(ta_id)
8
+ end
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,16 @@
1
+ module ESpider
2
+ module Front
3
+ module TA
4
+ class Daodao < TA::TripAdvisor
5
+ def initialize(ta_id)
6
+ @base_url = "http://www.daodao.com/"
7
+ super(ta_id)
8
+ end
9
+ def rank
10
+ ranks = super
11
+ ranks[0...ranks.size/2]
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,3 @@
1
+ module ESpider
2
+ VERSION = "0.5.8"
3
+ end
@@ -0,0 +1,11 @@
1
+ require 'spec_helper'
2
+ describe 'Advisor' do
3
+ it '正确实例化' do
4
+ ad = ESpider::Front::TA::Advisor.new('199326')
5
+ ad.should be_an_instance_of ESpider::Front::TA::Advisor
6
+ end
7
+ it '正确解析排名数据' do
8
+ ad = ESpider::Front::TA::Advisor.new('199326')
9
+ ad.rank.should be_an_instance_of Array
10
+ end
11
+ end
@@ -0,0 +1,31 @@
1
+ #coding:utf-8
2
+ require 'spec_helper'
3
+ describe "百度地图api测试" do
4
+ place = ESpider::Baidu::Map::API::Place.new('KNFShrAjm3BvgNEsNLfPSlAP')
5
+ it "should 被实例化" do
6
+ place.class.should == ESpider::Baidu::Map::API::Place
7
+ end
8
+ it "城市内检索请求" do
9
+ place.search_by_city('酒店', '北京')
10
+ place.next_page.class.should == String
11
+ end
12
+ it "矩形框检索请求" do
13
+ place.search_by_rect('酒店', '39.915,116.404,39.975,116.414')
14
+ place.next_page.class.should == String
15
+ end
16
+ it "圆形框检索请求" do
17
+ place.search_by_city('酒店', '北京')
18
+ place.next_page.class.should == String
19
+ end
20
+ it "http状态码为200" do
21
+ place.page_code.should == 200
22
+ end
23
+ it "正确翻页" do
24
+ place.next_page.class.should == String
25
+ place.page_code.should == 200
26
+ end
27
+ it "获得查询结果总量" do
28
+ place.search_by_city('酒店', '北京')
29
+ place.total.class.should == Fixnum
30
+ end
31
+ end
@@ -0,0 +1,53 @@
1
+ #coding:utf-8
2
+ require 'spec_helper'
3
+ describe "百度地图酒店信息抓取测试" do
4
+ hotel_list = ESpider::Front::Baidu::Map::Hotel::List.new('黔西南')
5
+ hotels = hotel_list.list
6
+ hotels_count = hotel_list.total
7
+ page_num = hotels_count/10
8
+ hotel = ESpider::Front::Baidu::Map::Hotel::Detail.new('1258371975449935871')
9
+ it "should 被实例化" do
10
+ hotel_list.should be_an_instance_of ESpider::Front::Baidu::Map::Hotel::List
11
+ hotel.should be_an_instance_of ESpider::Front::Baidu::Map::Hotel::Detail
12
+ end
13
+ it "should 正确解析酒店数量" do
14
+ hotels_count.should be_an_instance_of Fixnum
15
+ end
16
+ it "should 正确返回酒店列表信息" do
17
+ hotels.should be_an_instance_of Array
18
+ hotels[0].should be_an_instance_of Hash
19
+ end
20
+ it "should 返回酒店名称" do
21
+ hotel.hotel_name.should be_an_instance_of String
22
+ end
23
+ it "should 返回酒店地址" do
24
+ hotel.hotel_addr.should be_an_instance_of String
25
+ end
26
+ it "should 返回酒店电话" do
27
+ hotel.hotel_tel.should be_an_instance_of String
28
+ end
29
+ it "should 返回酒店经纬度" do
30
+ hotel.hotel_geo.should be_an_instance_of Array
31
+ end
32
+ it "should 返回酒店星级" do
33
+ hotel.hotel_star.should be_an_instance_of String
34
+ end
35
+ it "should 返回酒店类别" do
36
+ hotel.hotel_category.should be_an_instance_of String
37
+ end
38
+ it "should 返回酒店价格" do
39
+ hotel.hotel_price.should be_an_instance_of String
40
+ end
41
+ it "should 返回酒店设施" do
42
+ hotel.hotel_facility.should be_an_instance_of String
43
+ end
44
+ it "should 返回酒店短评" do
45
+ hotel.hotel_short_comm.should be_an_instance_of String
46
+ end
47
+ it "should 返回酒店评论" do
48
+ hotel.hotel_review.should be_an_instance_of Array
49
+ end
50
+ it "should 返回酒店图片地址" do
51
+ hotel.hotel_image('669775d035aa1b42fd0eb008').should be_an_instance_of Array
52
+ end
53
+ end
@@ -0,0 +1,11 @@
1
+ require 'spec_helper'
2
+ describe 'Daodao' do
3
+ it '正确实例化' do
4
+ dd = ESpider::Front::TA::Daodao.new('199326')
5
+ dd.should be_an_instance_of ESpider::Front::TA::Daodao
6
+ end
7
+ it '正确解析排名数据' do
8
+ dd = ESpider::Front::TA::Daodao.new('199326')
9
+ dd.rank.should be_an_instance_of Array
10
+ end
11
+ end
@@ -0,0 +1,29 @@
1
+ require 'spec_helper'
2
+ describe "大众点评api测试,business类api" do
3
+ # it '正确实例化' do
4
+ # busi = ESpider::API::Dianping::Business.new('16935220','0c3d163c8fff4639a3372aea71df52a1')
5
+ # busi.class.should == ESpider::API::Dianping::Business
6
+ # end
7
+ # it '正确调用find_businesses接口,传入经纬度信息' do
8
+ # busi = ESpider::API::Dianping::Business.new('16935220','0c3d163c8fff4639a3372aea71df52a1')
9
+ # busi.find_businesses('上海','','31.18268013000488','121.42769622802734').should == true
10
+ # busi.get.class.should == String
11
+ # end
12
+ # it '正确调用find_businesses接口,并翻页' do
13
+ # busi = ESpider::API::Dianping::Business.new('16935220','0c3d163c8fff4639a3372aea71df52a1')
14
+ # busi.find_businesses('上海','','31.18268013000488','121.42769622802734').should == true
15
+ # busi.get.class.should == String
16
+ # p busi.get
17
+ # busi.next_page.class.should == String
18
+ # end
19
+ # it '正确调用find_businesses接口,加入分类信息' do
20
+ # busi = ESpider::API::Dianping::Business.new('16935220','0c3d163c8fff4639a3372aea71df52a1')
21
+ # busi.find_businesses('上海','','31.18268013000488','121.42769622802734').should == true
22
+ # busi.set_category('酒店')
23
+ # busi.next_page.class.should == String
24
+ # end
25
+ it '正确调用/get_single_business接口,返回单个商户信息' do
26
+ busi = ESpider::API::Dianping::Business.new('16935220','0c3d163c8fff4639a3372aea71df52a1')
27
+ busi.get_single_business('8733744').should be_instance_of String
28
+ end
29
+ end
@@ -0,0 +1,44 @@
1
+ #coding:utf-8
2
+ require 'spec_helper'
3
+ describe "大众点评api测试,metadata类api" do
4
+ mdata = ESpider::API::Dianping::Metadata.new('16935220','0c3d163c8fff4639a3372aea71df52a1')
5
+ it '正确实例化' do
6
+ mdata.class.should == ESpider::API::Dianping::Metadata
7
+ end
8
+ it '获取支持商户搜索的最新城市列表' do
9
+ mdata.get_cities_with_businesses
10
+ mdata.get.class.should == String
11
+ end
12
+ it '获取支持商户搜索的最新城市下属区域列表 ' do
13
+ mdata.get_regions_with_businesses
14
+ mdata.get.class.should == String
15
+ end
16
+ it '获取支持商户搜索的最新分类列表 ' do
17
+ mdata.get_categories_with_businesses
18
+ mdata.get.class.should == String
19
+ end
20
+ it '获取支持团购搜索的最新城市列表 ' do
21
+ mdata.get_cities_with_deals
22
+ mdata.get.class.should == String
23
+ end
24
+ it '获取支持团购搜索的最新城市下属区域列表 ' do
25
+ mdata.get_regions_with_deals
26
+ mdata.get.class.should == String
27
+ end
28
+ it '获取支持团购搜索的最新分类列表 ' do
29
+ mdata.get_categories_with_deals
30
+ mdata.get.class.should == String
31
+ end
32
+ it '获取支持优惠券搜索的最新城市列表 ' do
33
+ mdata.get_cities_with_coupons
34
+ mdata.get.class.should == String
35
+ end
36
+ it '获取支持优惠券搜索的最新城市下属区域列表 ' do
37
+ mdata.get_regions_with_coupons
38
+ mdata.get.class.should == String
39
+ end
40
+ it '获取支持优惠券搜索的最新分类列表 ' do
41
+ mdata.get_categories_with_coupons
42
+ mdata.get.class.should == String
43
+ end
44
+ end
@@ -0,0 +1,27 @@
1
+ require 'spec_helper'
2
+ describe "ESpider::Front::Dianping::Hotel::Detail" do
3
+ it '实例化' do
4
+ detail = ESpider::Front::Dianping::Hotel::Detail.new('2136037')
5
+ detail.should be_instance_of ESpider::Front::Dianping::Hotel::Detail
6
+ end
7
+ it '返回酒店名称' do
8
+ detail = ESpider::Front::Dianping::Hotel::Detail.new('2136037')
9
+ detail.name.should be_instance_of String
10
+ end
11
+ it '返回酒店描述' do
12
+ detail = ESpider::Front::Dianping::Hotel::Detail.new('2869420')
13
+ detail.intro.should be_instance_of String
14
+ end
15
+ it '返回酒店设施' do
16
+ detail = ESpider::Front::Dianping::Hotel::Detail.new('2869420')
17
+ detail.facilities.should be_instance_of Array
18
+ end
19
+ it '返回酒店图片' do
20
+ detail = ESpider::Front::Dianping::Hotel::Detail.new('2869420')
21
+ detail.images.should be_instance_of Array
22
+ end
23
+ it '返回酒店评论' do
24
+ detail = ESpider::Front::Dianping::Hotel::Detail.new('2869420')
25
+ detail.comments.should be_instance_of Array
26
+ end
27
+ end
@@ -0,0 +1,17 @@
1
+ require 'spec_helper'
2
+ describe "ESpider::Front::Dianping::Hotel::List" do
3
+ list = ESpider::Front::Dianping::Hotel::List.new('1')
4
+ it '实例化' do
5
+ list.should be_instance_of ESpider::Front::Dianping::Hotel::List
6
+ end
7
+ it '返回名称' do
8
+ list.should be_instance_of ESpider::Front::Dianping::Hotel::List
9
+ end
10
+ it '返回酒店列表' do
11
+ list.hotels(1).should be_instance_of Array
12
+ end
13
+ it '正确计算酒店数量' do
14
+ list = ESpider::Front::Dianping::Hotel::List.new('2','1489')
15
+ list.total.should be_instance_of Fixnum
16
+ end
17
+ end
@@ -0,0 +1,26 @@
1
+ require 'spec_helper'
2
+ describe "ESpider::Front::Kuxun::Detail" do
3
+ detail = ESpider::Front::Kuxun::Detail.new('jiarijiudian-haerbin-hotel_1')
4
+ it '实例化' do
5
+ detail.should be_instance_of ESpider::Front::Kuxun::Detail
6
+ end
7
+ it '返回酒店名称' do
8
+ detail.name.should be_instance_of String
9
+ end
10
+ it '返回酒店地址' do
11
+ detail.address.should be_instance_of String
12
+ end
13
+ it '返回酒店描述' do
14
+ detail.intro.should be_instance_of String
15
+ end
16
+ it '返回酒店设施' do
17
+ detail.facilities.should be_instance_of Array
18
+ end
19
+ it '返回酒店图片' do
20
+ detail.images.should be_instance_of Array
21
+ end
22
+ it '返回酒店经纬度' do
23
+ detail.xy.should be_instance_of Array
24
+ detail.xy.size.should == 2
25
+ end
26
+ end
@@ -0,0 +1,74 @@
1
+ #coding: UTF-8
2
+ require 'spec_helper'
3
+ describe "qunar信息抓取测试" do
4
+ qunar_id = "beijing_city_2807"
5
+ city = "beijing_city"
6
+ hotel = ESpider::Front::Qunar::Hotel.new(qunar_id)
7
+ it 'should 被正确实例化' do
8
+ hotel.should be_an_instance_of ESpider::Front::Qunar::Hotel
9
+ end
10
+ it 'should 返回酒店名称' do
11
+ hotel.name.should be_an_instance_of String
12
+ end
13
+ it 'should 返回正确的酒店星级' do
14
+ hotel.star.should >= 0
15
+ hotel.star.should <= 5
16
+ end
17
+ it 'should 返回qunar酒店城市英文名' do
18
+ hotel.citynameEn.should == 'beijing'
19
+ end
20
+ it 'should 返回qunar酒店地址' do
21
+ hotel.address.should be_an_instance_of String
22
+ end
23
+ it 'should 返回qunar酒店电话' do
24
+ hotel.phone.should be_an_instance_of String
25
+ end
26
+ # it 'should 返回qunar酒店描述' do
27
+ # hotel.desc.class.should == String
28
+ # end
29
+ # it 'should 返回qunar酒店开业日期' do
30
+ # hotel.insttime.should == '2008年'
31
+ # end
32
+ # it 'should 返回qunar酒店概述' do
33
+ # hotel.abstract_desc.should == '位于北京市东城区菊儿胡同、紧邻北二环主干线。网友评价说“小吃很多也便宜”。'
34
+ # end
35
+ # it 'should 返回qunar酒店品牌' do
36
+ # hotel.brand.should == '汉庭酒店'
37
+ # end
38
+ # it 'should 返回qunar酒店参考价格' do
39
+ # hotel.reference_price.should == '120'
40
+ # end
41
+ # it 'should 返回qunar酒店google经纬度' do
42
+ # hotel.xy.should == ["39.939323", "116.4054"]
43
+ # end
44
+ # it 'should 返回qunar酒店设施' do
45
+ # hotel.facilities.should == [{"name"=>"房间设施", "info"=>["宽带上网", "空调", "暖气", "24小时热水", "吹风机", "国际长途电话"]}, {"name"=>"酒店服务", "info"=>["接待外宾", "叫醒服务", "行李寄存", "洗衣服务", "租车", "早餐服务"]}, {"name"=>"酒店设施", "info"=>["无线上网公共区域", "停车场", "无烟房", "中式餐厅", "会议室", "商务中心"]}]
46
+ # end
47
+ # it 'should 返回qunar交通信息' do
48
+ # hotel.traffic.should == [{"name"=>"战鼓楼", "distance"=>"594.056489643499"}, {"name"=>"蓬蒿剧场", "distance"=>"395.18496791364"}, {"name"=>"中央戏剧学院实验剧场", "distance"=>"425.239611398247"}, {"name"=>"和中堂足道(交道口店)", "distance"=>"488.729152256305"}, {"name"=>"周末相声俱乐部", "distance"=>"524.822582351895"}, {"name"=>"鲸鱼桌游吧", "distance"=>"790.623187595583"}, {"name"=>"一来二去桌游主题休闲吧", "distance"=>"988.900711965127"}, {"name"=>"东城区文化馆", "distance"=>"606.324700795644"}, {"name"=>"国话小剧场", "distance"=>"757.099876935085"}, {"name"=>"东城区图书馆", "distance"=>"661.53698062375"}]
49
+ # end
50
+ # it 'should 返回qunar酒店最后装修时间' do
51
+ # hotel.decorate_date.should == ''
52
+ # end
53
+ # it 'should 返回qunar酒店评论' do
54
+ # hotel.comments.should == ''
55
+ # end
56
+ # it 'should 返回qunar酒店评论总数' do
57
+ # hotel.total_comment.should_not = '339'
58
+ # end
59
+ # it 'should 返回qunar酒店评论好评数' do
60
+ # hotel.good_comment.should_not = '250'
61
+ # end
62
+ # it 'should 返回qunar酒店图片' do
63
+ # hotel.images.should == ''
64
+ # end
65
+ # it 'should 返回qunar酒店评分' do
66
+ # hotel.score.to_f.should < 10.0
67
+ # end
68
+ # it 'should 返回qunar酒店城市中文名' do
69
+ # hotel.citynameCn.should == '北京'
70
+ # end
71
+ # it 'should 返回qunar酒店城市英文名' do
72
+ # hotel.citynameEn.should == 'beijing'
73
+ # end
74
+ end