espider 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/Gemfile +4 -0
  4. data/LICENSE.txt +22 -0
  5. data/README.md +29 -0
  6. data/Rakefile +1 -0
  7. data/espider.gemspec +28 -0
  8. data/lib/espider.rb +10 -0
  9. data/lib/espider/api/baidu/map.rb +9 -0
  10. data/lib/espider/api/baidu/map/place.rb +53 -0
  11. data/lib/espider/api/dianping.rb +13 -0
  12. data/lib/espider/api/dianping/base.rb +35 -0
  13. data/lib/espider/api/dianping/business.rb +38 -0
  14. data/lib/espider/api/dianping/coupon.rb +0 -0
  15. data/lib/espider/api/dianping/deal.rb +0 -0
  16. data/lib/espider/api/dianping/metadata.rb +28 -0
  17. data/lib/espider/api/dianping/params.rb +34 -0
  18. data/lib/espider/api/dianping/review.rb +0 -0
  19. data/lib/espider/exceptions.rb +2 -0
  20. data/lib/espider/front/baidu/map/hotel.rb +12 -0
  21. data/lib/espider/front/baidu/map/hotel/detail.rb +60 -0
  22. data/lib/espider/front/baidu/map/hotel/list.rb +46 -0
  23. data/lib/espider/front/dianping.rb +7 -0
  24. data/lib/espider/front/dianping/hotel.rb +10 -0
  25. data/lib/espider/front/dianping/hotel/detail.rb +80 -0
  26. data/lib/espider/front/dianping/hotel/list.rb +56 -0
  27. data/lib/espider/front/front.rb +23 -0
  28. data/lib/espider/front/kuxun.rb +7 -0
  29. data/lib/espider/front/kuxun/detail.rb +100 -0
  30. data/lib/espider/front/qunar.rb +9 -0
  31. data/lib/espider/front/qunar/base.rb +25 -0
  32. data/lib/espider/front/qunar/hotel.rb +173 -0
  33. data/lib/espider/front/qunar/rank.rb +60 -0
  34. data/lib/espider/front/ta.rb +27 -0
  35. data/lib/espider/front/ta/advisor.rb +12 -0
  36. data/lib/espider/front/ta/daodao.rb +16 -0
  37. data/lib/espider/version.rb +3 -0
  38. data/spec/advisor_front_rank_spec.rb +11 -0
  39. data/spec/baidu_map_api_spec.rb +31 -0
  40. data/spec/baidu_map_front_spec.rb +53 -0
  41. data/spec/dadao_front_rank_spec.rb +11 -0
  42. data/spec/dianping_api_business_spec.rb +29 -0
  43. data/spec/dianping_api_metadata_spec.rb +44 -0
  44. data/spec/dianping_front_detail_spec.rb +27 -0
  45. data/spec/dianping_front_list_spec.rb +17 -0
  46. data/spec/kuxun_front_detail_spec.rb +26 -0
  47. data/spec/qunar_front_detail_spec.rb +74 -0
  48. data/spec/qunar_front_rank_alive_spec.rb +12 -0
  49. data/spec/qunar_front_rank_spec.rb +28 -0
  50. data/spec/spec_helper.rb +1 -0
  51. metadata +176 -0
@@ -0,0 +1,60 @@
1
+ require 'nokogiri'
2
+ require 'headless'
3
+ require 'capybara'
4
+ require 'capybara/dsl'
5
+ module ESpider
6
+ module Front
7
+ module Qunar
8
+ class Rank < Base
9
+ include Capybara::DSL
10
+ def set_cookies
11
+ headless = Headless.new
12
+ headless.start
13
+ Capybara.current_driver = :selenium
14
+ Capybara.app_host = BASEURI
15
+ Capybara.default_wait_time = 10
16
+ begin
17
+ visit "/city/#{@cityurl}/dt-#{@hotelcode}/"
18
+ rescue Errno::ECONNREFUSED => e
19
+ raise
20
+ end
21
+ @cookies = {}
22
+ Capybara.current_session.driver.browser.manage.all_cookies.each { |cookie| @cookies[cookie[:name]] = cookie[:value] }
23
+ end
24
+ def load_price_ajax
25
+ mixKey = parse_mixkey
26
+ set_cookies
27
+ raise QunarMixKeyParsedException if mixKey.nil?
28
+ uri = URI(File.join(BASEURI, 'render', 'detailV2.jsp'))
29
+ params = {:fromDate=>Date.today+2, :toDate=>Date.today+3, :cityurl=>@cityurl, :HotelSEQ=>@HotelSEQ, :mixKey=>mixKey}
30
+ headers = Hash.new
31
+ headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 5.1; rv:26.0) Gecko/20100101 Firefox/26.0'
32
+ headers['Connection'] = 'keep-alive'
33
+ headers['Accept-Encoding'] = 'gzip, deflate'
34
+ headers['Cookie'] = @cookies.map{|k, v| k+'='+v}.join('; ')
35
+ uri.query = URI.encode_www_form(params)
36
+ res = HTTParty.get(uri.to_s, :headers=>headers)
37
+ json_ajax = parse_json_from_response_body(res.body)
38
+ if !json_ajax['errcode'].nil? and json_ajax['errcode'].eql?110
39
+ raise QunarIpBlockException,json_ajax
40
+ end
41
+ raise QunarIpBlockException,json_ajax if !json_ajax['errcode'].nil? and json_ajax['errcode'].eql?110
42
+ @ranks = json_ajax['result'].values
43
+ end
44
+ def rank(ota, room)
45
+ @ranks.each { |rank| return rank[15]+1 if rank[3].eql?room and rank[6].eql?ota and rank[9].eql?1 }
46
+ return nil
47
+ end
48
+ def rooms
49
+ @rooms ||= @ranks.map { |e| e[3] }.uniq
50
+ end
51
+ def otas
52
+ @otas ||= @ranks.map{|e| e[6] }.uniq
53
+ end
54
+ def parse_json_from_response_body(res)
55
+ JSON.parse(res.gsub(/[()]/,""))
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,27 @@
1
+ module ESpider
2
+ module Front
3
+ module TA
4
+ class TripAdvisor
5
+ attr_accessor :base_url
6
+ def initialize(ta_id)
7
+ url = base_url+ta_id
8
+ res = HTTParty.get(url)
9
+ @code = res.code
10
+ @page = res.force_encoding("UTF-8")
11
+ end
12
+ def code
13
+ @code
14
+ end
15
+ def rank
16
+ ranks = []
17
+ @page.scan(/(Vendor\(.*\);)$/) do |ota|
18
+ ranks << ota[0].match(/\}\),\s*\".*\",/)[0].sub("}), \"","").sub("\",","")
19
+ end
20
+ ranks
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ require 'espider/front/ta/daodao'
27
+ require 'espider/front/ta/advisor'
@@ -0,0 +1,12 @@
1
+ module ESpider
2
+ module Front
3
+ module TA
4
+ class Advisor < TA::TripAdvisor
5
+ def initialize(ta_id)
6
+ @base_url = "http://www.tripadvisor.com/"
7
+ super(ta_id)
8
+ end
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,16 @@
1
+ module ESpider
2
+ module Front
3
+ module TA
4
+ class Daodao < TA::TripAdvisor
5
+ def initialize(ta_id)
6
+ @base_url = "http://www.daodao.com/"
7
+ super(ta_id)
8
+ end
9
+ def rank
10
+ ranks = super
11
+ ranks[0...ranks.size/2]
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,3 @@
1
+ module ESpider
2
+ VERSION = "0.5.8"
3
+ end
@@ -0,0 +1,11 @@
1
+ require 'spec_helper'
2
+ describe 'Advisor' do
3
+ it '正确实例化' do
4
+ ad = ESpider::Front::TA::Advisor.new('199326')
5
+ ad.should be_an_instance_of ESpider::Front::TA::Advisor
6
+ end
7
+ it '正确解析排名数据' do
8
+ ad = ESpider::Front::TA::Advisor.new('199326')
9
+ ad.rank.should be_an_instance_of Array
10
+ end
11
+ end
@@ -0,0 +1,31 @@
1
+ #coding:utf-8
2
+ require 'spec_helper'
3
+ describe "百度地图api测试" do
4
+ place = ESpider::Baidu::Map::API::Place.new('KNFShrAjm3BvgNEsNLfPSlAP')
5
+ it "should 被实例化" do
6
+ place.class.should == ESpider::Baidu::Map::API::Place
7
+ end
8
+ it "城市内检索请求" do
9
+ place.search_by_city('酒店', '北京')
10
+ place.next_page.class.should == String
11
+ end
12
+ it "矩形框检索请求" do
13
+ place.search_by_rect('酒店', '39.915,116.404,39.975,116.414')
14
+ place.next_page.class.should == String
15
+ end
16
+ it "圆形框检索请求" do
17
+ place.search_by_city('酒店', '北京')
18
+ place.next_page.class.should == String
19
+ end
20
+ it "http状态码为200" do
21
+ place.page_code.should == 200
22
+ end
23
+ it "正确翻页" do
24
+ place.next_page.class.should == String
25
+ place.page_code.should == 200
26
+ end
27
+ it "获得查询结果总量" do
28
+ place.search_by_city('酒店', '北京')
29
+ place.total.class.should == Fixnum
30
+ end
31
+ end
@@ -0,0 +1,53 @@
1
+ #coding:utf-8
2
+ require 'spec_helper'
3
+ describe "百度地图酒店信息抓取测试" do
4
+ hotel_list = ESpider::Front::Baidu::Map::Hotel::List.new('黔西南')
5
+ hotels = hotel_list.list
6
+ hotels_count = hotel_list.total
7
+ page_num = hotels_count/10
8
+ hotel = ESpider::Front::Baidu::Map::Hotel::Detail.new('1258371975449935871')
9
+ it "should 被实例化" do
10
+ hotel_list.should be_an_instance_of ESpider::Front::Baidu::Map::Hotel::List
11
+ hotel.should be_an_instance_of ESpider::Front::Baidu::Map::Hotel::Detail
12
+ end
13
+ it "should 正确解析酒店数量" do
14
+ hotels_count.should be_an_instance_of Fixnum
15
+ end
16
+ it "should 正确返回酒店列表信息" do
17
+ hotels.should be_an_instance_of Array
18
+ hotels[0].should be_an_instance_of Hash
19
+ end
20
+ it "should 返回酒店名称" do
21
+ hotel.hotel_name.should be_an_instance_of String
22
+ end
23
+ it "should 返回酒店地址" do
24
+ hotel.hotel_addr.should be_an_instance_of String
25
+ end
26
+ it "should 返回酒店电话" do
27
+ hotel.hotel_tel.should be_an_instance_of String
28
+ end
29
+ it "should 返回酒店经纬度" do
30
+ hotel.hotel_geo.should be_an_instance_of Array
31
+ end
32
+ it "should 返回酒店星级" do
33
+ hotel.hotel_star.should be_an_instance_of String
34
+ end
35
+ it "should 返回酒店类别" do
36
+ hotel.hotel_category.should be_an_instance_of String
37
+ end
38
+ it "should 返回酒店价格" do
39
+ hotel.hotel_price.should be_an_instance_of String
40
+ end
41
+ it "should 返回酒店设施" do
42
+ hotel.hotel_facility.should be_an_instance_of String
43
+ end
44
+ it "should 返回酒店短评" do
45
+ hotel.hotel_short_comm.should be_an_instance_of String
46
+ end
47
+ it "should 返回酒店评论" do
48
+ hotel.hotel_review.should be_an_instance_of Array
49
+ end
50
+ it "should 返回酒店图片地址" do
51
+ hotel.hotel_image('669775d035aa1b42fd0eb008').should be_an_instance_of Array
52
+ end
53
+ end
@@ -0,0 +1,11 @@
1
+ require 'spec_helper'
2
+ describe 'Daodao' do
3
+ it '正确实例化' do
4
+ dd = ESpider::Front::TA::Daodao.new('199326')
5
+ dd.should be_an_instance_of ESpider::Front::TA::Daodao
6
+ end
7
+ it '正确解析排名数据' do
8
+ dd = ESpider::Front::TA::Daodao.new('199326')
9
+ dd.rank.should be_an_instance_of Array
10
+ end
11
+ end
@@ -0,0 +1,29 @@
1
+ require 'spec_helper'
2
+ describe "大众点评api测试,business类api" do
3
+ # it '正确实例化' do
4
+ # busi = ESpider::API::Dianping::Business.new('16935220','0c3d163c8fff4639a3372aea71df52a1')
5
+ # busi.class.should == ESpider::API::Dianping::Business
6
+ # end
7
+ # it '正确调用find_businesses接口,传入经纬度信息' do
8
+ # busi = ESpider::API::Dianping::Business.new('16935220','0c3d163c8fff4639a3372aea71df52a1')
9
+ # busi.find_businesses('上海','','31.18268013000488','121.42769622802734').should == true
10
+ # busi.get.class.should == String
11
+ # end
12
+ # it '正确调用find_businesses接口,并翻页' do
13
+ # busi = ESpider::API::Dianping::Business.new('16935220','0c3d163c8fff4639a3372aea71df52a1')
14
+ # busi.find_businesses('上海','','31.18268013000488','121.42769622802734').should == true
15
+ # busi.get.class.should == String
16
+ # p busi.get
17
+ # busi.next_page.class.should == String
18
+ # end
19
+ # it '正确调用find_businesses接口,加入分类信息' do
20
+ # busi = ESpider::API::Dianping::Business.new('16935220','0c3d163c8fff4639a3372aea71df52a1')
21
+ # busi.find_businesses('上海','','31.18268013000488','121.42769622802734').should == true
22
+ # busi.set_category('酒店')
23
+ # busi.next_page.class.should == String
24
+ # end
25
+ it '正确调用/get_single_business接口,返回单个商户信息' do
26
+ busi = ESpider::API::Dianping::Business.new('16935220','0c3d163c8fff4639a3372aea71df52a1')
27
+ busi.get_single_business('8733744').should be_instance_of String
28
+ end
29
+ end
@@ -0,0 +1,44 @@
1
+ #coding:utf-8
2
+ require 'spec_helper'
3
+ describe "大众点评api测试,metadata类api" do
4
+ mdata = ESpider::API::Dianping::Metadata.new('16935220','0c3d163c8fff4639a3372aea71df52a1')
5
+ it '正确实例化' do
6
+ mdata.class.should == ESpider::API::Dianping::Metadata
7
+ end
8
+ it '获取支持商户搜索的最新城市列表' do
9
+ mdata.get_cities_with_businesses
10
+ mdata.get.class.should == String
11
+ end
12
+ it '获取支持商户搜索的最新城市下属区域列表 ' do
13
+ mdata.get_regions_with_businesses
14
+ mdata.get.class.should == String
15
+ end
16
+ it '获取支持商户搜索的最新分类列表 ' do
17
+ mdata.get_categories_with_businesses
18
+ mdata.get.class.should == String
19
+ end
20
+ it '获取支持团购搜索的最新城市列表 ' do
21
+ mdata.get_cities_with_deals
22
+ mdata.get.class.should == String
23
+ end
24
+ it '获取支持团购搜索的最新城市下属区域列表 ' do
25
+ mdata.get_regions_with_deals
26
+ mdata.get.class.should == String
27
+ end
28
+ it '获取支持团购搜索的最新分类列表 ' do
29
+ mdata.get_categories_with_deals
30
+ mdata.get.class.should == String
31
+ end
32
+ it '获取支持优惠券搜索的最新城市列表 ' do
33
+ mdata.get_cities_with_coupons
34
+ mdata.get.class.should == String
35
+ end
36
+ it '获取支持优惠券搜索的最新城市下属区域列表 ' do
37
+ mdata.get_regions_with_coupons
38
+ mdata.get.class.should == String
39
+ end
40
+ it '获取支持优惠券搜索的最新分类列表 ' do
41
+ mdata.get_categories_with_coupons
42
+ mdata.get.class.should == String
43
+ end
44
+ end
@@ -0,0 +1,27 @@
1
+ require 'spec_helper'
2
+ describe "ESpider::Front::Dianping::Hotel::Detail" do
3
+ it '实例化' do
4
+ detail = ESpider::Front::Dianping::Hotel::Detail.new('2136037')
5
+ detail.should be_instance_of ESpider::Front::Dianping::Hotel::Detail
6
+ end
7
+ it '返回酒店名称' do
8
+ detail = ESpider::Front::Dianping::Hotel::Detail.new('2136037')
9
+ detail.name.should be_instance_of String
10
+ end
11
+ it '返回酒店描述' do
12
+ detail = ESpider::Front::Dianping::Hotel::Detail.new('2869420')
13
+ detail.intro.should be_instance_of String
14
+ end
15
+ it '返回酒店设施' do
16
+ detail = ESpider::Front::Dianping::Hotel::Detail.new('2869420')
17
+ detail.facilities.should be_instance_of Array
18
+ end
19
+ it '返回酒店图片' do
20
+ detail = ESpider::Front::Dianping::Hotel::Detail.new('2869420')
21
+ detail.images.should be_instance_of Array
22
+ end
23
+ it '返回酒店评论' do
24
+ detail = ESpider::Front::Dianping::Hotel::Detail.new('2869420')
25
+ detail.comments.should be_instance_of Array
26
+ end
27
+ end
@@ -0,0 +1,17 @@
1
+ require 'spec_helper'
2
+ describe "ESpider::Front::Dianping::Hotel::List" do
3
+ list = ESpider::Front::Dianping::Hotel::List.new('1')
4
+ it '实例化' do
5
+ list.should be_instance_of ESpider::Front::Dianping::Hotel::List
6
+ end
7
+ it '返回名称' do
8
+ list.should be_instance_of ESpider::Front::Dianping::Hotel::List
9
+ end
10
+ it '返回酒店列表' do
11
+ list.hotels(1).should be_instance_of Array
12
+ end
13
+ it '正确计算酒店数量' do
14
+ list = ESpider::Front::Dianping::Hotel::List.new('2','1489')
15
+ list.total.should be_instance_of Fixnum
16
+ end
17
+ end
@@ -0,0 +1,26 @@
1
+ require 'spec_helper'
2
+ describe "ESpider::Front::Kuxun::Detail" do
3
+ detail = ESpider::Front::Kuxun::Detail.new('jiarijiudian-haerbin-hotel_1')
4
+ it '实例化' do
5
+ detail.should be_instance_of ESpider::Front::Kuxun::Detail
6
+ end
7
+ it '返回酒店名称' do
8
+ detail.name.should be_instance_of String
9
+ end
10
+ it '返回酒店地址' do
11
+ detail.address.should be_instance_of String
12
+ end
13
+ it '返回酒店描述' do
14
+ detail.intro.should be_instance_of String
15
+ end
16
+ it '返回酒店设施' do
17
+ detail.facilities.should be_instance_of Array
18
+ end
19
+ it '返回酒店图片' do
20
+ detail.images.should be_instance_of Array
21
+ end
22
+ it '返回酒店经纬度' do
23
+ detail.xy.should be_instance_of Array
24
+ detail.xy.size.should == 2
25
+ end
26
+ end
@@ -0,0 +1,74 @@
1
+ #coding: UTF-8
2
+ require 'spec_helper'
3
+ describe "qunar信息抓取测试" do
4
+ qunar_id = "beijing_city_2807"
5
+ city = "beijing_city"
6
+ hotel = ESpider::Front::Qunar::Hotel.new(qunar_id)
7
+ it 'should 被正确实例化' do
8
+ hotel.should be_an_instance_of ESpider::Front::Qunar::Hotel
9
+ end
10
+ it 'should 返回酒店名称' do
11
+ hotel.name.should be_an_instance_of String
12
+ end
13
+ it 'should 返回正确的酒店星级' do
14
+ hotel.star.should >= 0
15
+ hotel.star.should <= 5
16
+ end
17
+ it 'should 返回qunar酒店城市英文名' do
18
+ hotel.citynameEn.should == 'beijing'
19
+ end
20
+ it 'should 返回qunar酒店地址' do
21
+ hotel.address.should be_an_instance_of String
22
+ end
23
+ it 'should 返回qunar酒店电话' do
24
+ hotel.phone.should be_an_instance_of String
25
+ end
26
+ # it 'should 返回qunar酒店描述' do
27
+ # hotel.desc.class.should == String
28
+ # end
29
+ # it 'should 返回qunar酒店开业日期' do
30
+ # hotel.insttime.should == '2008年'
31
+ # end
32
+ # it 'should 返回qunar酒店概述' do
33
+ # hotel.abstract_desc.should == '位于北京市东城区菊儿胡同、紧邻北二环主干线。网友评价说“小吃很多也便宜”。'
34
+ # end
35
+ # it 'should 返回qunar酒店品牌' do
36
+ # hotel.brand.should == '汉庭酒店'
37
+ # end
38
+ # it 'should 返回qunar酒店参考价格' do
39
+ # hotel.reference_price.should == '120'
40
+ # end
41
+ # it 'should 返回qunar酒店google经纬度' do
42
+ # hotel.xy.should == ["39.939323", "116.4054"]
43
+ # end
44
+ # it 'should 返回qunar酒店设施' do
45
+ # hotel.facilities.should == [{"name"=>"房间设施", "info"=>["宽带上网", "空调", "暖气", "24小时热水", "吹风机", "国际长途电话"]}, {"name"=>"酒店服务", "info"=>["接待外宾", "叫醒服务", "行李寄存", "洗衣服务", "租车", "早餐服务"]}, {"name"=>"酒店设施", "info"=>["无线上网公共区域", "停车场", "无烟房", "中式餐厅", "会议室", "商务中心"]}]
46
+ # end
47
+ # it 'should 返回qunar交通信息' do
48
+ # hotel.traffic.should == [{"name"=>"战鼓楼", "distance"=>"594.056489643499"}, {"name"=>"蓬蒿剧场", "distance"=>"395.18496791364"}, {"name"=>"中央戏剧学院实验剧场", "distance"=>"425.239611398247"}, {"name"=>"和中堂足道(交道口店)", "distance"=>"488.729152256305"}, {"name"=>"周末相声俱乐部", "distance"=>"524.822582351895"}, {"name"=>"鲸鱼桌游吧", "distance"=>"790.623187595583"}, {"name"=>"一来二去桌游主题休闲吧", "distance"=>"988.900711965127"}, {"name"=>"东城区文化馆", "distance"=>"606.324700795644"}, {"name"=>"国话小剧场", "distance"=>"757.099876935085"}, {"name"=>"东城区图书馆", "distance"=>"661.53698062375"}]
49
+ # end
50
+ # it 'should 返回qunar酒店最后装修时间' do
51
+ # hotel.decorate_date.should == ''
52
+ # end
53
+ # it 'should 返回qunar酒店评论' do
54
+ # hotel.comments.should == ''
55
+ # end
56
+ # it 'should 返回qunar酒店评论总数' do
57
+ # hotel.total_comment.should_not = '339'
58
+ # end
59
+ # it 'should 返回qunar酒店评论好评数' do
60
+ # hotel.good_comment.should_not = '250'
61
+ # end
62
+ # it 'should 返回qunar酒店图片' do
63
+ # hotel.images.should == ''
64
+ # end
65
+ # it 'should 返回qunar酒店评分' do
66
+ # hotel.score.to_f.should < 10.0
67
+ # end
68
+ # it 'should 返回qunar酒店城市中文名' do
69
+ # hotel.citynameCn.should == '北京'
70
+ # end
71
+ # it 'should 返回qunar酒店城市英文名' do
72
+ # hotel.citynameEn.should == 'beijing'
73
+ # end
74
+ end