hostelify 0.4.4 → 0.4.5
Sign up to get free protection for your applications and to get access to all the features.
- data/Manifest +2 -0
- data/Rakefile +1 -1
- data/hostelify.gemspec +4 -4
- data/lib/hostelify/hostelbookers-old.rb +194 -0
- data/lib/hostelify/hostelbookers.rb +34 -111
- data/lib/hostelify/hostelworldmonkey.rb +205 -0
- data/lib/hostelify.rb +1 -0
- data/spec/hb_find_by_hostel.spec +1 -1
- metadata +7 -3
data/Manifest
CHANGED
@@ -5,9 +5,11 @@ hostelify.gemspec
|
|
5
5
|
lib/hostelify.rb
|
6
6
|
lib/hostelify/gomio.rb
|
7
7
|
lib/hostelify/hostel.rb
|
8
|
+
lib/hostelify/hostelbookers-old.rb
|
8
9
|
lib/hostelify/hostelbookers.rb
|
9
10
|
lib/hostelify/hostelify.rb
|
10
11
|
lib/hostelify/hostelworld.rb
|
12
|
+
lib/hostelify/hostelworldmonkey.rb
|
11
13
|
lib/items.rb
|
12
14
|
lib/test.rb
|
13
15
|
spec/_helper.rb
|
data/Rakefile
CHANGED
data/hostelify.gemspec
CHANGED
@@ -2,15 +2,15 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{hostelify}
|
5
|
-
s.version = "0.4.
|
5
|
+
s.version = "0.4.5"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["Holden Thomas"]
|
9
|
-
s.date = %q{2010-05-
|
9
|
+
s.date = %q{2010-05-20}
|
10
10
|
s.description = %q{Simple Hostel Webscrapper.}
|
11
11
|
s.email = %q{holden.thomas@gmail.com}
|
12
|
-
s.extra_rdoc_files = ["README.rdoc", "lib/hostelify.rb", "lib/hostelify/gomio.rb", "lib/hostelify/hostel.rb", "lib/hostelify/hostelbookers.rb", "lib/hostelify/hostelify.rb", "lib/hostelify/hostelworld.rb", "lib/items.rb", "lib/test.rb"]
|
13
|
-
s.files = ["Manifest", "README.rdoc", "Rakefile", "hostelify.gemspec", "lib/hostelify.rb", "lib/hostelify/gomio.rb", "lib/hostelify/hostel.rb", "lib/hostelify/hostelbookers.rb", "lib/hostelify/hostelify.rb", "lib/hostelify/hostelworld.rb", "lib/items.rb", "lib/test.rb", "spec/_helper.rb", "spec/hb_find_by_hostel.spec", "spec/hb_find_hostels.spec", "spec/helper.rb", "spec/hw_find_by_hostel.spec", "spec/hw_find_hostels.spec"]
|
12
|
+
s.extra_rdoc_files = ["README.rdoc", "lib/hostelify.rb", "lib/hostelify/gomio.rb", "lib/hostelify/hostel.rb", "lib/hostelify/hostelbookers-old.rb", "lib/hostelify/hostelbookers.rb", "lib/hostelify/hostelify.rb", "lib/hostelify/hostelworld.rb", "lib/hostelify/hostelworldmonkey.rb", "lib/items.rb", "lib/test.rb"]
|
13
|
+
s.files = ["Manifest", "README.rdoc", "Rakefile", "hostelify.gemspec", "lib/hostelify.rb", "lib/hostelify/gomio.rb", "lib/hostelify/hostel.rb", "lib/hostelify/hostelbookers-old.rb", "lib/hostelify/hostelbookers.rb", "lib/hostelify/hostelify.rb", "lib/hostelify/hostelworld.rb", "lib/hostelify/hostelworldmonkey.rb", "lib/items.rb", "lib/test.rb", "spec/_helper.rb", "spec/hb_find_by_hostel.spec", "spec/hb_find_hostels.spec", "spec/helper.rb", "spec/hw_find_by_hostel.spec", "spec/hw_find_hostels.spec"]
|
14
14
|
s.homepage = %q{http://github.com/holden/hostelify}
|
15
15
|
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Hostelify", "--main", "README.rdoc"]
|
16
16
|
s.require_paths = ["lib"]
|
@@ -0,0 +1,194 @@
|
|
1
|
+
class Hostelbookers
|
2
|
+
|
3
|
+
#constants
|
4
|
+
HB_SINGULAR_DETAIL_URL = "http://www.hostelbookers.com/hostels/" #poland/krakow/
|
5
|
+
HB_PLURAL_HOSTELS_URL = "http://www.hostelbookers.com/hostels/" #poland/wroclaw/
|
6
|
+
HB_DYNAMIC_URL = "http://www.hostelbookers.com/property/index.cfm?fuseaction=accommodation.search&straccommodationtype=hostels"
|
7
|
+
#&intnights=2&intpeople=1&dtearrival=21/05/2010&fromPropertyNameSearch=0&intpropertyid=6281
|
8
|
+
|
9
|
+
#options
|
10
|
+
@default_options = { :date => date=(Date.today+4).to_s, :no_days => "7", :live => true }
|
11
|
+
|
12
|
+
def self.find_hostels_by_location(options) #location
|
13
|
+
city = options[:location].split(',').first.rstrip.lstrip.gsub(' ','-').squeeze("-")
|
14
|
+
country = options[:location].split(',').last.rstrip.lstrip.gsub(' ','-').squeeze("-")
|
15
|
+
|
16
|
+
url = HB_PLURAL_HOSTELS_URL + "#{country}/#{city}"
|
17
|
+
|
18
|
+
|
19
|
+
if options[:date]
|
20
|
+
options = @default_options.merge(options)
|
21
|
+
date = Date.strptime(options[:date])
|
22
|
+
data = setSearch(url,options[:date],options[:no_days])
|
23
|
+
else
|
24
|
+
Retryable.try 3 do
|
25
|
+
data = Hpricot(open(url))
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
data = data.search("//div[@id='propertyResultsList']")
|
30
|
+
#@results = []
|
31
|
+
@results = HostelifyCollection.new
|
32
|
+
#coder = HTMLEntities.new
|
33
|
+
(data/"tr.propertyRow").each do |row|
|
34
|
+
name = row.at("a.propertyTitle").inner_text
|
35
|
+
url = row.at("a.propertyTitle")['href']
|
36
|
+
desc = row.at("p.shortDescription").inner_text
|
37
|
+
rating = row.at("td.rating/text()")
|
38
|
+
rating = rating.to_s.to_i unless rating.nil?
|
39
|
+
dorm = row.at("td.shared/text()")
|
40
|
+
single = row.at("td.private/text()")
|
41
|
+
hb_id = url.match(/[\d]{2,5}.$/).to_s.to_i
|
42
|
+
|
43
|
+
#@results << Hostelify.new(:hostel_id => hb_id, :name => name, :description => desc, :rating => rating, :dorm => dorm, :single => single)
|
44
|
+
@results << Hostelify.new(:hostel_id => hb_id, :name => name, :description => desc, :rating => rating, :dorm => dorm, :single => single)
|
45
|
+
end
|
46
|
+
return @results
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.find_hostel_by_id(options)
|
50
|
+
#city = options[:location].split(',').first.gsub(' ','')
|
51
|
+
#country = options[:location].split(',').last.gsub(' ','')
|
52
|
+
id = options[:id]
|
53
|
+
|
54
|
+
#url = HB_SINGULAR_DETAIL_URL + "#{country}/#{city}/#{id}"
|
55
|
+
url = HB_DYNAMIC_URL + "&intnights=#{options[:no_days]}&fromPropertyNameSearch=0&intpropertyid=#{options[:id]}"
|
56
|
+
|
57
|
+
hostel = Hostelify.new
|
58
|
+
|
59
|
+
if options[:date]
|
60
|
+
options = @default_options.merge(options)
|
61
|
+
else
|
62
|
+
options[:date] = (Date.today+4).to_s
|
63
|
+
end
|
64
|
+
|
65
|
+
date = Date.strptime(options[:date])
|
66
|
+
url2 = HB_DYNAMIC_URL + "&intnights=#{options[:no_days]}&intpeople=1&dtearrival=#{date.strftime('%d/%m/%Y')}&fromPropertyNameSearch=0&intpropertyid=6281"
|
67
|
+
data = Hpricot(open(url2))
|
68
|
+
#data = setSearch_id(url,options[:date],options[:no_days])
|
69
|
+
#url2 = HB_DYNAMIC_URL + "&intnights=#{options[:no_days]}&dtearrival=21/05/2010&fromPropertyNameSearch=0&intpropertyid=#{options[:id]}"
|
70
|
+
|
71
|
+
# else
|
72
|
+
# Retryable.try 3 do
|
73
|
+
# data = Hpricot(open(url))
|
74
|
+
# end
|
75
|
+
# end
|
76
|
+
|
77
|
+
hostel.hostel_id = id
|
78
|
+
hostel.name = data.at("h1").inner_text
|
79
|
+
hostel.address = data.at("p.address").inner_text
|
80
|
+
hostel.description = data.at('div[@id="overviewPane"]').inner_text
|
81
|
+
facilities_td = data.at("table.tableFacilities")
|
82
|
+
|
83
|
+
facilities = []
|
84
|
+
(facilities_td/"td").each do |row|
|
85
|
+
facilities << row.inner_text
|
86
|
+
end
|
87
|
+
hostel.facilities = facilities
|
88
|
+
extras = []
|
89
|
+
extras_td = data.at("table.tableFeatures")
|
90
|
+
(extras_td/"td.name").each do |row|
|
91
|
+
extras << "Free " + row.inner_text.to_s
|
92
|
+
end
|
93
|
+
facilities = facilities + extras
|
94
|
+
|
95
|
+
ratings = []
|
96
|
+
ratings_td = data.at('div[@id="overviewIndRtng"]/table')
|
97
|
+
|
98
|
+
(ratings_td/"tr").each do |row|
|
99
|
+
ratings << row.at("td").inner_text.to_s.to_f
|
100
|
+
end
|
101
|
+
|
102
|
+
hostel.ratings = ratings
|
103
|
+
images = []
|
104
|
+
image = data.at('div[@id="propMedia"]/table')
|
105
|
+
(image/"td").each do |row|
|
106
|
+
img = row.at("img")['onclick']
|
107
|
+
if img =~ /(http).*(jpg|gif|png|jpeg)/
|
108
|
+
images << img.match(/(http).*(jpg|gif|png|jpeg)/)[0]
|
109
|
+
else
|
110
|
+
#add youtube?
|
111
|
+
end
|
112
|
+
end
|
113
|
+
hostel.images = images
|
114
|
+
|
115
|
+
if options[:all]
|
116
|
+
data = Hpricot(open(url + "/map"))
|
117
|
+
data.search("h2").remove #get rid of header
|
118
|
+
hostel.directions = data.at('div[@id="directions"]').inner_text
|
119
|
+
hostel.geo = data.to_s.scan(/-{0,1}\d{1,3}\.\d{7}/).uniq!
|
120
|
+
end
|
121
|
+
|
122
|
+
if options[:date]
|
123
|
+
date = Date.strptime(options[:date])
|
124
|
+
@availables = []
|
125
|
+
available = data.at("div.tableAvailability/table")
|
126
|
+
if available
|
127
|
+
(available/"tr").each do |row|
|
128
|
+
name = row.at("td.roomType/label/text()")
|
129
|
+
people = row.at("td.people/select")
|
130
|
+
people = people.at("option:last-child").inner_text unless people.nil?
|
131
|
+
price = row.at("td.price")
|
132
|
+
price = price.inner_text.to_s.match(/[\d.]{1,5}/)[0] unless price.nil?
|
133
|
+
(0..(options[:no_days].to_i-1)).each do |x|
|
134
|
+
#@availables << { :name => name, :spots => people, :price => price, :bookdate => (date+x).to_s } unless price.nil?
|
135
|
+
@availables << HostelifyAvailable.new(name,price,people,(date+x).to_s) unless price.nil?
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
hostel.availability = @availables
|
140
|
+
end
|
141
|
+
|
142
|
+
return hostel
|
143
|
+
end
|
144
|
+
|
145
|
+
def self.setSearch(url,date,no_days)
|
146
|
+
date = Date.strptime(date).strftime("%d/%m/%Y")
|
147
|
+
agent = WWW::Mechanize.new
|
148
|
+
page = agent.get(url)
|
149
|
+
form = page.form_with(:name => 'searchForm') # => WWW::Mechanize::Form
|
150
|
+
form.field_with(:name => 'intnights').options[no_days.to_i-1].select
|
151
|
+
form.dtearrival = date #d/m/y
|
152
|
+
|
153
|
+
Retryable.try 3 do
|
154
|
+
page = agent.submit(form)
|
155
|
+
end
|
156
|
+
|
157
|
+
#to dollars!
|
158
|
+
form = page.forms[0]
|
159
|
+
form.field_with(:name => 'strSelectedCurrencyCode').options[5].select
|
160
|
+
|
161
|
+
Retryable.try 3 do
|
162
|
+
page = agent.submit(form)
|
163
|
+
end
|
164
|
+
|
165
|
+
data = page.search('//div[@id="content"]')
|
166
|
+
|
167
|
+
return data
|
168
|
+
end
|
169
|
+
|
170
|
+
def self.setSearch_id(url,date,no_days)
|
171
|
+
date = Date.strptime(date).strftime("%d/%m/%Y")
|
172
|
+
agent = WWW::Mechanize.new
|
173
|
+
page = agent.get(url)
|
174
|
+
form = page.form_with(:name => 'frmCheckAvailBook') # => WWW::Mechanize::Form
|
175
|
+
form.field_with(:name => 'intNights').options[no_days.to_i-1].select
|
176
|
+
form.dteArrival = date #d/m/y
|
177
|
+
|
178
|
+
Retryable.try 3 do
|
179
|
+
page = agent.submit(form)
|
180
|
+
end
|
181
|
+
#change currency to dollars
|
182
|
+
form = page.forms[1]
|
183
|
+
#puts form.name
|
184
|
+
form.field_with(:name => 'strSelectedCurrencyCode').options[5].select
|
185
|
+
|
186
|
+
Retryable.try 3 do
|
187
|
+
page = agent.submit(form)
|
188
|
+
end
|
189
|
+
data = page.search('//div[@id="content"]')
|
190
|
+
|
191
|
+
return data
|
192
|
+
end
|
193
|
+
|
194
|
+
end
|
@@ -1,35 +1,26 @@
|
|
1
1
|
class Hostelbookers
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
HB_PLURAL_HOSTELS_URL = "http://www.hostelbookers.com/hostels/" #poland/wroclaw/
|
6
|
-
HB_DYNAMIC_URL = "http://www.hostelbookers.com/property/index.cfm?fuseaction=accommodation.search&straccommodationtype=hostels"
|
7
|
-
#&intnights=2&intpeople=1&dtearrival=21/05/2010&fromPropertyNameSearch=0&intpropertyid=6281
|
3
|
+
HB_SINGULAR_URL = "http://www.hostelbookers.com/property/index.cfm?fuseaction=accommodation.search&straccommodationtype=hostels&fromPropertyNameSearch=0"
|
4
|
+
HB_PLURAL_URL = "http://www.hostelbookers.com/results/index.cfm?straccommodationtype=hostels&strSearchType=freeText&fuseaction=accommodation.search"
|
8
5
|
|
9
|
-
|
10
|
-
@default_options = { :date => date=(Date.today+4).to_s, :no_days => "7", :live => true }
|
6
|
+
@default_options = { :date => (Date.today+4).to_s, :no_days => "7", :live => true }
|
11
7
|
|
12
|
-
def self.find_hostels_by_location(options)
|
8
|
+
def self.find_hostels_by_location(options)
|
9
|
+
options = @default_options.merge(options)
|
10
|
+
date = Date.strptime(options[:date])
|
13
11
|
city = options[:location].split(',').first.rstrip.lstrip.gsub(' ','-').squeeze("-")
|
14
12
|
country = options[:location].split(',').last.rstrip.lstrip.gsub(' ','-').squeeze("-")
|
15
|
-
|
16
|
-
url = HB_PLURAL_HOSTELS_URL + "#{country}/#{city}"
|
17
|
-
|
18
13
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
data =
|
23
|
-
|
24
|
-
Retryable.try 3 do
|
25
|
-
data = Hpricot(open(url))
|
26
|
-
end
|
27
|
-
end
|
14
|
+
url = HB_PLURAL_URL + "&strkeywords=#{city},+#{country}&dtearrival=#{date.strftime('%d/%m/%Y')}"
|
15
|
+
|
16
|
+
#Retryable.try 3 do
|
17
|
+
data = Hpricot(open(url))
|
18
|
+
#end
|
28
19
|
|
29
20
|
data = data.search("//div[@id='propertyResultsList']")
|
30
|
-
|
21
|
+
|
31
22
|
@results = HostelifyCollection.new
|
32
|
-
|
23
|
+
|
33
24
|
(data/"tr.propertyRow").each do |row|
|
34
25
|
name = row.at("a.propertyTitle").inner_text
|
35
26
|
url = row.at("a.propertyTitle")['href']
|
@@ -40,39 +31,20 @@ class Hostelbookers
|
|
40
31
|
single = row.at("td.private/text()")
|
41
32
|
hb_id = url.match(/[\d]{2,5}.$/).to_s.to_i
|
42
33
|
|
43
|
-
#@results << Hostelify.new(:hostel_id => hb_id, :name => name, :description => desc, :rating => rating, :dorm => dorm, :single => single)
|
44
34
|
@results << Hostelify.new(:hostel_id => hb_id, :name => name, :description => desc, :rating => rating, :dorm => dorm, :single => single)
|
45
35
|
end
|
46
36
|
return @results
|
47
37
|
end
|
48
|
-
|
38
|
+
|
49
39
|
def self.find_hostel_by_id(options)
|
50
|
-
|
51
|
-
|
40
|
+
options = @default_options.merge(options)
|
41
|
+
date = Date.strptime(options[:date])
|
52
42
|
id = options[:id]
|
53
|
-
|
54
|
-
#url = HB_SINGULAR_DETAIL_URL + "#{country}/#{city}/#{id}"
|
55
|
-
url = HB_DYNAMIC_URL + "&intnights=#{options[:no_days]}&fromPropertyNameSearch=0&intpropertyid=#{options[:id]}"
|
56
|
-
|
57
43
|
hostel = Hostelify.new
|
58
|
-
|
59
|
-
if options[:date]
|
60
|
-
options = @default_options.merge(options)
|
61
|
-
else
|
62
|
-
options[:date] = (Date.today+4).to_s
|
63
|
-
end
|
64
44
|
|
65
|
-
|
66
|
-
|
67
|
-
data = Hpricot(open(
|
68
|
-
#data = setSearch_id(url,options[:date],options[:no_days])
|
69
|
-
#url2 = HB_DYNAMIC_URL + "&intnights=#{options[:no_days]}&dtearrival=21/05/2010&fromPropertyNameSearch=0&intpropertyid=#{options[:id]}"
|
70
|
-
|
71
|
-
# else
|
72
|
-
# Retryable.try 3 do
|
73
|
-
# data = Hpricot(open(url))
|
74
|
-
# end
|
75
|
-
# end
|
45
|
+
url = HB_SINGULAR_URL + "&intnights=#{options[:no_days]}&intpeople=1&dtearrival=#{date.strftime('%d/%m/%Y')}&intpropertyid=#{options[:id]}"
|
46
|
+
|
47
|
+
data = Hpricot(open(url))
|
76
48
|
|
77
49
|
hostel.hostel_id = id
|
78
50
|
hostel.name = data.at("h1").inner_text
|
@@ -119,76 +91,27 @@ class Hostelbookers
|
|
119
91
|
hostel.geo = data.to_s.scan(/-{0,1}\d{1,3}\.\d{7}/).uniq!
|
120
92
|
end
|
121
93
|
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
available
|
126
|
-
|
127
|
-
(
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
price
|
133
|
-
(
|
134
|
-
#@availables << { :name => name, :spots => people, :price => price, :bookdate => (date+x).to_s } unless price.nil?
|
135
|
-
@availables << HostelifyAvailable.new(name,price,people,(date+x).to_s) unless price.nil?
|
136
|
-
end
|
94
|
+
@availables = []
|
95
|
+
available = data.at("div.tableAvailability/table")
|
96
|
+
if available
|
97
|
+
(available/"tr").each do |row|
|
98
|
+
name = row.at("td.roomType/label/text()")
|
99
|
+
people = row.at("td.people/select")
|
100
|
+
people = people.at("option:last-child").inner_text unless people.nil?
|
101
|
+
price = row.at("td.price")
|
102
|
+
price = price.inner_text.to_s.match(/[\d.]{1,5}/)[0] unless price.nil?
|
103
|
+
(0..(options[:no_days].to_i-1)).each do |x|
|
104
|
+
#@availables << { :name => name, :spots => people, :price => price, :bookdate => (date+x).to_s } unless price.nil?
|
105
|
+
@availables << HostelifyAvailable.new(name,price,people,(date+x).to_s) unless price.nil?
|
137
106
|
end
|
138
107
|
end
|
108
|
+
|
139
109
|
hostel.availability = @availables
|
140
110
|
end
|
141
111
|
|
142
112
|
return hostel
|
143
|
-
end
|
144
113
|
|
145
|
-
def self.setSearch(url,date,no_days)
|
146
|
-
date = Date.strptime(date).strftime("%d/%m/%Y")
|
147
|
-
agent = WWW::Mechanize.new
|
148
|
-
page = agent.get(url)
|
149
|
-
form = page.form_with(:name => 'searchForm') # => WWW::Mechanize::Form
|
150
|
-
form.field_with(:name => 'intnights').options[no_days.to_i-1].select
|
151
|
-
form.dtearrival = date #d/m/y
|
152
|
-
|
153
|
-
Retryable.try 3 do
|
154
|
-
page = agent.submit(form)
|
155
|
-
end
|
156
|
-
|
157
|
-
#to dollars!
|
158
|
-
form = page.forms[0]
|
159
|
-
form.field_with(:name => 'strSelectedCurrencyCode').options[5].select
|
160
|
-
|
161
|
-
Retryable.try 3 do
|
162
|
-
page = agent.submit(form)
|
163
|
-
end
|
164
|
-
|
165
|
-
data = page.search('//div[@id="content"]')
|
166
|
-
|
167
|
-
return data
|
168
|
-
end
|
169
|
-
|
170
|
-
def self.setSearch_id(url,date,no_days)
|
171
|
-
date = Date.strptime(date).strftime("%d/%m/%Y")
|
172
|
-
agent = WWW::Mechanize.new
|
173
|
-
page = agent.get(url)
|
174
|
-
form = page.form_with(:name => 'frmCheckAvailBook') # => WWW::Mechanize::Form
|
175
|
-
form.field_with(:name => 'intNights').options[no_days.to_i-1].select
|
176
|
-
form.dteArrival = date #d/m/y
|
177
|
-
|
178
|
-
Retryable.try 3 do
|
179
|
-
page = agent.submit(form)
|
180
|
-
end
|
181
|
-
#change currency to dollars
|
182
|
-
form = page.forms[1]
|
183
|
-
#puts form.name
|
184
|
-
form.field_with(:name => 'strSelectedCurrencyCode').options[5].select
|
185
|
-
|
186
|
-
Retryable.try 3 do
|
187
|
-
page = agent.submit(form)
|
188
|
-
end
|
189
|
-
data = page.search('//div[@id="content"]')
|
190
|
-
|
191
|
-
return data
|
192
|
-
end
|
193
114
|
|
115
|
+
end
|
116
|
+
|
194
117
|
end
|
@@ -0,0 +1,205 @@
|
|
1
|
+
class Hostelworldmonkey
|
2
|
+
|
3
|
+
#constants
|
4
|
+
#location list includes/indexjs.js
|
5
|
+
HW_SINGULAR_DETAIL_URL = "http://www.hostelworld.com/hosteldetails.php?HostelNumber="
|
6
|
+
HW_SINGULAR_IMAGE_URL = "http://www.hostelworld.com/hostelpictures.php?HostelNumber="
|
7
|
+
HW_SINGULAR_AVAILABILITY = "http://www.hostelworld.com/availability.php/"
|
8
|
+
HW_SINGULAR_YOUTUBE_URL = "http://www.hostelworld.com/youtubevideo.php?HostelNumber="
|
9
|
+
HW_PLURAL_HOSTELS_URL = "http://www.hostelworld.com/findabed.php/"
|
10
|
+
|
11
|
+
#options
|
12
|
+
@default_options = { :date => date=(Date.today+4).to_s, :no_days => "7", :no_ppl => "2" }
|
13
|
+
|
14
|
+
def self.parse_html(url)
|
15
|
+
f = open(url)
|
16
|
+
f.rewind
|
17
|
+
Retryable.try 3 do
|
18
|
+
data = Hpricot(Iconv.conv('utf-8', f.charset, f.readlines.join("\n")))
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.find_hostel_by_id(options)
|
23
|
+
opts = { :directions => false, :images => false, :all => false }.merge options
|
24
|
+
id = options[:id].to_s
|
25
|
+
url = HW_SINGULAR_DETAIL_URL + id
|
26
|
+
|
27
|
+
#coder = HTMLEntities.new
|
28
|
+
hostel = Hostelify.new
|
29
|
+
hostel.hostel_id = id
|
30
|
+
|
31
|
+
if options[:date]
|
32
|
+
options = @default_options.merge(options)
|
33
|
+
date = Date.strptime(options[:date])
|
34
|
+
data = setSearch(url, options[:date], options[:no_ppl], options[:no_days])
|
35
|
+
else
|
36
|
+
data = parse_html(url)
|
37
|
+
end
|
38
|
+
|
39
|
+
data = data.search("//div[@id='content']")
|
40
|
+
data.search("h3").remove #get rid of header
|
41
|
+
|
42
|
+
#title, address, desc, facilities, ratings
|
43
|
+
hostel.name = data.at("h2").inner_text.gsub(/( in ).*$/,'')
|
44
|
+
hostel.address = data.at('div[@style="padding-top: 5px"]').inner_text.lstrip
|
45
|
+
|
46
|
+
if options[:date]
|
47
|
+
hostel.availability = parse_availables(data)
|
48
|
+
else
|
49
|
+
hostel.description = data.at('div[@id="microDescription2]').inner_text
|
50
|
+
end
|
51
|
+
|
52
|
+
#optional
|
53
|
+
no_photos = data.at('span/a[@id="picLink"]').inner_text.to_i
|
54
|
+
video = data.at('div[@id="microVideo"]')
|
55
|
+
|
56
|
+
facilities = []
|
57
|
+
(data/"li.microFacilitiesBoomLi").each do |item|
|
58
|
+
facilities << item.inner_text
|
59
|
+
end
|
60
|
+
|
61
|
+
ratings = []
|
62
|
+
(data/'div[@id="ratingsBar2"]').each do |item|
|
63
|
+
ratings << item.inner_text.to_i
|
64
|
+
end
|
65
|
+
|
66
|
+
hostel.facilities = facilities
|
67
|
+
hostel.ratings = ratings
|
68
|
+
|
69
|
+
if video #exists
|
70
|
+
data = parse_html(HW_SINGULAR_YOUTUBE_URL + id)
|
71
|
+
video_url = data.at('param[@name="movie"]')['value']
|
72
|
+
hostel.video = video_url
|
73
|
+
#video_url = data.at('tag')
|
74
|
+
end
|
75
|
+
|
76
|
+
if options[:directions] or options[:all]
|
77
|
+
data = parse_html(HW_SINGULAR_DETAIL_URL + id + "/directions/")
|
78
|
+
|
79
|
+
#directions, geo
|
80
|
+
hostel.directions = data.at('div[@id="content"]').inner_text.gsub(/^[\d\D\n]*(DIRECTIONS)/,'')
|
81
|
+
hostel.geo = data.to_s.scan(/-{0,1}\d{1,3}\.\d{7}/).uniq!
|
82
|
+
end
|
83
|
+
|
84
|
+
if no_photos and (options[:images] or options[:all])
|
85
|
+
images = []
|
86
|
+
(1..no_photos).each do |i|
|
87
|
+
data = parse_html(HW_SINGULAR_IMAGE_URL + id + '&PicNO=' + i.to_s)
|
88
|
+
images << (data/"img").first[:src].to_s
|
89
|
+
end
|
90
|
+
hostel.images = images
|
91
|
+
end
|
92
|
+
hostel # return
|
93
|
+
end
|
94
|
+
|
95
|
+
def self.find_hostels_by_location(options) #location
|
96
|
+
|
97
|
+
city = options[:location].split(',').first.gsub(' ','')
|
98
|
+
country = options[:location].split(',').last.gsub(' ','')
|
99
|
+
url = HW_PLURAL_HOSTELS_URL + "ChosenCity.#{city}/ChosenCountry.#{country}"
|
100
|
+
|
101
|
+
if options[:date]
|
102
|
+
options = @default_options.merge(options)
|
103
|
+
date = Date.strptime(options[:date])
|
104
|
+
data = setSearch(url, options[:date], options[:no_ppl], options[:no_days])
|
105
|
+
else
|
106
|
+
data = parse_html(url)
|
107
|
+
end
|
108
|
+
|
109
|
+
data = data.search("//div[@id='content']")
|
110
|
+
@results = HostelifyCollection.new
|
111
|
+
|
112
|
+
(data/"div.hostelListing").each do |row|
|
113
|
+
name = row.at("h3").inner_text
|
114
|
+
desc = row.at("div.hostelEntry/p").inner_text.to_s.chop.gsub('more info','').squeeze('.')
|
115
|
+
url = row.at("h3/a")['href']
|
116
|
+
rating = row.at("h4/text()")
|
117
|
+
rating = rating.to_s.to_i unless rating.nil?
|
118
|
+
type = row.at("div.hostelListingImage/span").inner_text
|
119
|
+
hostel_id = url.match(/[\d]*$/).to_s
|
120
|
+
|
121
|
+
if options[:date]
|
122
|
+
#price_USD = row.at("span.blueBeds").inner_text #need to fix float
|
123
|
+
dorm = (row.at("p.hostelListingRate/span.blueBeds/text()")).to_s.gsub(/[A-Z$]*/,'')
|
124
|
+
single = row.at("p.hostelListingPrivateRate/span.blueBeds/text()").to_s.gsub(/[A-Z$]*/,'')
|
125
|
+
available = row/"ul.hostelListingDates/li.noAvail/text()"
|
126
|
+
available = available.to_a.join(',').split(',')
|
127
|
+
@results << Hostelify.new(:hostel_id => hostel_id, :name => name, :description => desc, :rating => rating, :dorm => dorm, :single => single, :unavailable => available)
|
128
|
+
else
|
129
|
+
@results << Hostelify.new(:hostel_id => hostel_id, :name => name, :description => desc, :rating => rating)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
return @results
|
133
|
+
end
|
134
|
+
|
135
|
+
private
|
136
|
+
|
137
|
+
def self.setSearch(url,date,no_ppl,no_days)
|
138
|
+
|
139
|
+
date = Date.strptime(date)
|
140
|
+
month = date.strftime("%m").to_i
|
141
|
+
day = date.strftime("%d").to_i
|
142
|
+
if Time.now.strftime("%y") == date.strftime("%y") then year = 0 else year = 1 end
|
143
|
+
|
144
|
+
agent = WWW::Mechanize.new
|
145
|
+
page = agent.get(url)
|
146
|
+
|
147
|
+
#the form name
|
148
|
+
form = page.forms.first # => WWW::Mechanize::Form
|
149
|
+
|
150
|
+
Retryable.try 3 do
|
151
|
+
page = agent.submit(form)
|
152
|
+
end
|
153
|
+
|
154
|
+
#form must be submitted twice because the people writing hostelworld are retards
|
155
|
+
form = page.forms.first # => WWW::Mechanize::Form
|
156
|
+
form.field_with(:name => 'selMonth').options[month-1].select
|
157
|
+
form.field_with(:name => 'selDay').options[day-1].select
|
158
|
+
form.field_with(:name => 'selYear').options[year].select
|
159
|
+
form.field_with(:name => 'NumNights').options[no_days.to_i-1].select
|
160
|
+
form.field_with(:name => 'Persons').options[no_ppl.to_i-1].select
|
161
|
+
form.field_with(:name => 'Currency').options[4].select #US Currency
|
162
|
+
|
163
|
+
Retryable.try 3 do
|
164
|
+
page = agent.submit(form)
|
165
|
+
end
|
166
|
+
|
167
|
+
data = page.search("//div[@id='content']")
|
168
|
+
|
169
|
+
return data
|
170
|
+
end
|
171
|
+
|
172
|
+
def self.parse_availables(info)
|
173
|
+
|
174
|
+
availability = info.at('table[@id="tableDatesSelected2"]')
|
175
|
+
availability.search("div").remove
|
176
|
+
|
177
|
+
availables = []
|
178
|
+
|
179
|
+
(availability/"tr").each do |row|
|
180
|
+
name = (row/"td").first
|
181
|
+
name = name.inner_text unless name.nil?
|
182
|
+
|
183
|
+
(row/"td").each do |td|
|
184
|
+
night = td.attributes['title']
|
185
|
+
if night
|
186
|
+
price = night.to_s.match(/([\d]{1,3}).([\d]{2})/).to_s
|
187
|
+
available = night.to_s.match(/(available: )([\d]*)/)
|
188
|
+
date = night.to_s.match(/(Date: ).*$/).to_s.gsub(/(Date: )|(th)|(nd)|(rd)|(st)/,'')
|
189
|
+
date = Date.strptime(date, "%a %d %b '%y")
|
190
|
+
|
191
|
+
if available
|
192
|
+
beds = available.to_s.match(/[\d]{1,2}/)[0]
|
193
|
+
availables << HostelifyAvailable.new(name,price,beds,date)
|
194
|
+
else
|
195
|
+
availables << HostelifyAvailable.new(name,price,0,date)
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
return availables
|
201
|
+
|
202
|
+
end
|
203
|
+
|
204
|
+
|
205
|
+
end
|
data/lib/hostelify.rb
CHANGED
data/spec/hb_find_by_hostel.spec
CHANGED
@@ -49,7 +49,7 @@ end
|
|
49
49
|
|
50
50
|
describe "with dates to get availabilty and verify output!" do
|
51
51
|
before(:all) do
|
52
|
-
@h = Hostelbookers.find_hostel_by_id(:id => 19831, :date => (Date.today+10).to_s)
|
52
|
+
@h = Hostelbookers.find_hostel_by_id(:location => "krakow,poland", :id => 19831, :date => (Date.today+10).to_s)
|
53
53
|
end
|
54
54
|
|
55
55
|
it "get first availability and check it merit" do
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 4
|
8
|
-
-
|
9
|
-
version: 0.4.
|
8
|
+
- 5
|
9
|
+
version: 0.4.5
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Holden Thomas
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-05-
|
17
|
+
date: 2010-05-20 00:00:00 +02:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|
@@ -29,9 +29,11 @@ extra_rdoc_files:
|
|
29
29
|
- lib/hostelify.rb
|
30
30
|
- lib/hostelify/gomio.rb
|
31
31
|
- lib/hostelify/hostel.rb
|
32
|
+
- lib/hostelify/hostelbookers-old.rb
|
32
33
|
- lib/hostelify/hostelbookers.rb
|
33
34
|
- lib/hostelify/hostelify.rb
|
34
35
|
- lib/hostelify/hostelworld.rb
|
36
|
+
- lib/hostelify/hostelworldmonkey.rb
|
35
37
|
- lib/items.rb
|
36
38
|
- lib/test.rb
|
37
39
|
files:
|
@@ -42,9 +44,11 @@ files:
|
|
42
44
|
- lib/hostelify.rb
|
43
45
|
- lib/hostelify/gomio.rb
|
44
46
|
- lib/hostelify/hostel.rb
|
47
|
+
- lib/hostelify/hostelbookers-old.rb
|
45
48
|
- lib/hostelify/hostelbookers.rb
|
46
49
|
- lib/hostelify/hostelify.rb
|
47
50
|
- lib/hostelify/hostelworld.rb
|
51
|
+
- lib/hostelify/hostelworldmonkey.rb
|
48
52
|
- lib/items.rb
|
49
53
|
- lib/test.rb
|
50
54
|
- spec/_helper.rb
|