qunar 0.1.37 → 0.1.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/qunar/hotel_list.rb +14 -88
- data/lib/qunar/version.rb +1 -1
- data/spec/hotel_list_spec.rb +1 -1
- data/spec/spec_helper.rb +0 -3
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 5b1ce75f252b6fec30181443dcd7e223af33b413
|
|
4
|
+
data.tar.gz: b41eb8b95cf6ef06b8dae33e8295c055f5cb4592
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6f8ee165e849bdc33790377a367a010a9560e97db86e56e34d38b125f5871fcf3f7c6bbfa80d5d83b38395c355b043cbb6696fa175e3d2f7fc54115a10fec0c5
|
|
7
|
+
data.tar.gz: 2fa5b86fc6466514fd0adad81b6349cd75696d5f5aa951699b28b186a268bc63faab870dd8c441cc4e3bf2a2ad1a147045e774deb66bb56b1948f1b477bcb607
|
data/lib/qunar/hotel_list.rb
CHANGED
|
@@ -1,102 +1,28 @@
|
|
|
1
1
|
module Qunar
|
|
2
2
|
class HotelList
|
|
3
|
-
|
|
3
|
+
include Capybara::DSL
|
|
4
4
|
def initialize(city_url)
|
|
5
5
|
@city = city_url
|
|
6
|
+
Capybara.current_driver = :webkit
|
|
7
|
+
Capybara.default_selector = :xpath
|
|
8
|
+
Capybara.app_host ='http://touch.qunar.com'
|
|
9
|
+
page.driver.header 'User-Agent' ,"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.91 Safari/537.36"
|
|
6
10
|
end
|
|
7
11
|
|
|
8
12
|
# Call ajax for more hotels of each page
|
|
9
13
|
def hotels_in_ajax(page_id)
|
|
10
14
|
sleep(2) # sleep for 2 seconds
|
|
11
|
-
@
|
|
12
|
-
page
|
|
13
|
-
@html = Nokogiri::HTML page
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
hotels =
|
|
17
|
-
title = hotel.at("./div
|
|
15
|
+
@uri = "/h5/hotel/hotellist?cityUrl=#{@city}&page=#{page_id}&tpl=hotel.hotelListTpl"
|
|
16
|
+
page.visit @uri
|
|
17
|
+
@html = Nokogiri::HTML page.html
|
|
18
|
+
qn_list = @html.xpath("//li[@class='qn_bt']")
|
|
19
|
+
# Collect hotels
|
|
20
|
+
hotels = qn_list.collect { |hotel|
|
|
21
|
+
title = hotel.at("./div/div[@class='title']")
|
|
18
22
|
item = { :city=>@city, :hotel_id=>hotel['data-seq'], :hotel_name=>title.nil? ? nil : title.text.strip }
|
|
19
23
|
}
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
end
|
|
23
|
-
|
|
24
|
-
=begin
|
|
25
|
-
include Capybara::DSL
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def initialize(city_en)
|
|
29
|
-
raise ArgumentError, "initizlize: Argument should be string " if !city_en.instance_of?(String)
|
|
30
|
-
#Global Setting
|
|
31
|
-
Capybara.run_server = false
|
|
32
|
-
Capybara.current_driver = :webkit #selenium/poltergeist
|
|
33
|
-
|
|
34
|
-
# Use XPath as the default selector for the find method
|
|
35
|
-
Capybara.default_selector = :xpath
|
|
36
|
-
|
|
37
|
-
Capybara.app_host = 'http://hotel.qunar.com'
|
|
38
|
-
# Set user agent
|
|
39
|
-
page.driver.header 'User-Agent', "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"
|
|
40
|
-
|
|
41
|
-
# For poltergeist
|
|
42
|
-
#page.driver.headers={'User-Agent'=> "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"}
|
|
43
|
-
|
|
44
|
-
@city = city_en
|
|
45
|
-
# log msg
|
|
46
|
-
puts "正在访问: http://hotel.qunar.com/city/#{@city}"
|
|
47
|
-
page.visit "/city/#{@city}"
|
|
48
|
-
@page = Nokogiri::HTML page.html
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
# Wait for ajax, a selector must be passed as capybara will wait for the element
|
|
52
|
-
def wait_for_ajax(selector)
|
|
53
|
-
puts "\e[32m Waiting for ajax...\e[0m"
|
|
54
|
-
# Retry 3 times at most, if still not found, give up
|
|
55
|
-
limiter = 0
|
|
56
|
-
begin
|
|
57
|
-
page.find(selector)
|
|
58
|
-
rescue Capybara::ElementNotFound
|
|
59
|
-
limiter+=1
|
|
60
|
-
retry if limiter<3
|
|
61
|
-
end
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
def hotels_in_page
|
|
65
|
-
# Scroll down and trigger ajax
|
|
66
|
-
page.execute_script "window.scrollTo(0, document.body.scrollHeight)"
|
|
67
|
-
self.wait_for_ajax("//span[@class='hotel_num js_hotel_num'][text()='30']")
|
|
68
|
-
hotels = []
|
|
69
|
-
@page = Nokogiri::HTML page.driver.browser.body
|
|
70
|
-
@page.css("div.b_result_box").each { |hotel|
|
|
71
|
-
item = {:city => @city, :hotel_id => hotel['data-id']}
|
|
72
|
-
item[:hotel_name] = hotel.search("a[@class='e_title js_list_name']").first.text
|
|
73
|
-
hotels << item
|
|
74
|
-
}
|
|
75
|
-
hotels
|
|
76
|
-
end
|
|
77
|
-
|
|
78
|
-
# click `下一页` to navigate to next page
|
|
79
|
-
def click_next?
|
|
80
|
-
next_button = page.all("//li[@class='item next ']/a").first
|
|
81
|
-
return false if next_button.nil?
|
|
82
|
-
puts "Next page"
|
|
83
|
-
next_button.click
|
|
84
|
-
self.wait_for_ajax("//span[@class='hotel_num js_hotel_num'][text()='15']")
|
|
85
|
-
return true
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
# Get the page number
|
|
89
|
-
def page_count
|
|
90
|
-
a_node = @page.css("li.pager_count")
|
|
91
|
-
a_node.empty? ? 0 : a_node.first.text.split('/')[1].to_i
|
|
92
|
-
end
|
|
93
|
-
|
|
94
|
-
# Get the screen_shot
|
|
95
|
-
def screen_shot
|
|
96
|
-
page.save_screenshot("#{Time.now}.png")
|
|
97
|
-
end
|
|
98
|
-
=end
|
|
99
|
-
|
|
24
|
+
hotels.empty? ? nil : hotels
|
|
25
|
+
end
|
|
100
26
|
# Html file
|
|
101
27
|
def html
|
|
102
28
|
@html
|
data/lib/qunar/version.rb
CHANGED
data/spec/hotel_list_spec.rb
CHANGED
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: qunar
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.39
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- JusticeChow
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2015-02-
|
|
11
|
+
date: 2015-02-09 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|