hostelify 0.3.7 → 0.3.8
Sign up to get free protection for your applications and to get access to all the features.
- data/Manifest +11 -16
- data/Rakefile +1 -1
- data/hostelify.gemspec +4 -4
- data/lib/hostel/hostel_available.rb +11 -0
- data/{pkg/hostelify-0.3.6/lib/hostelify → lib/hostel}/hostelbookers.rb +3 -3
- data/lib/hostel/hostelworld.rb +205 -0
- data/{pkg/hostelify-0.3.6/lib/hostelify.rb → lib/hostel.rb} +3 -3
- data/lib/hostelify/hostel.rb +61 -0
- data/lib/hostelify/hostelworld.rb +2 -2
- data/lib/hostelify/hostelworldmonkey.rb +205 -0
- data/lib/items.rb +23 -0
- data/lib/test.rb +5 -0
- data/{pkg/hostelify-0.3.6/spec/_helper.rb → spec/helper.rb} +4 -1
- metadata +23 -18
- data/pkg/hostelify-0.3.6/Manifest +0 -30
- data/pkg/hostelify-0.3.6/README.rdoc +0 -1
- data/pkg/hostelify-0.3.6/Rakefile +0 -14
- data/pkg/hostelify-0.3.6/hostelify.gemspec +0 -31
- data/pkg/hostelify-0.3.6/lib/hostelify/hostelworld.rb +0 -265
- data/pkg/hostelify-0.3.6/spec/hb_find_by_hostel.spec +0 -72
- data/pkg/hostelify-0.3.6/spec/hb_find_hostels.spec +0 -30
- data/pkg/hostelify-0.3.6/spec/hw_find_by_hostel.spec +0 -85
- data/pkg/hostelify-0.3.6/spec/hw_find_hostels.spec +0 -62
- data/pkg/hostelify-0.3.6.gem +0 -0
- data/pkg/hostelify-0.3.6.tar.gz +0 -0
- /data/{pkg/hostelify-0.3.6/lib/hostelify → lib/hostel}/gomio.rb +0 -0
- /data/{pkg/hostelify-0.3.6/lib/hostelify/hostelify.rb → lib/hostel/hostel.rb} +0 -0
data/Manifest
CHANGED
@@ -2,29 +2,24 @@ Manifest
|
|
2
2
|
README.rdoc
|
3
3
|
Rakefile
|
4
4
|
hostelify.gemspec
|
5
|
+
lib/hostel.rb
|
6
|
+
lib/hostel/gomio.rb
|
7
|
+
lib/hostel/hostel.rb
|
8
|
+
lib/hostel/hostel_available.rb
|
9
|
+
lib/hostel/hostelbookers.rb
|
10
|
+
lib/hostel/hostelworld.rb
|
5
11
|
lib/hostelify.rb
|
6
12
|
lib/hostelify/gomio.rb
|
13
|
+
lib/hostelify/hostel.rb
|
7
14
|
lib/hostelify/hostelbookers.rb
|
8
15
|
lib/hostelify/hostelify.rb
|
9
16
|
lib/hostelify/hostelworld.rb
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
pkg/hostelify-0.3.6/README.rdoc
|
14
|
-
pkg/hostelify-0.3.6/Rakefile
|
15
|
-
pkg/hostelify-0.3.6/hostelify.gemspec
|
16
|
-
pkg/hostelify-0.3.6/lib/hostelify.rb
|
17
|
-
pkg/hostelify-0.3.6/lib/hostelify/gomio.rb
|
18
|
-
pkg/hostelify-0.3.6/lib/hostelify/hostelbookers.rb
|
19
|
-
pkg/hostelify-0.3.6/lib/hostelify/hostelify.rb
|
20
|
-
pkg/hostelify-0.3.6/lib/hostelify/hostelworld.rb
|
21
|
-
pkg/hostelify-0.3.6/spec/_helper.rb
|
22
|
-
pkg/hostelify-0.3.6/spec/hb_find_by_hostel.spec
|
23
|
-
pkg/hostelify-0.3.6/spec/hb_find_hostels.spec
|
24
|
-
pkg/hostelify-0.3.6/spec/hw_find_by_hostel.spec
|
25
|
-
pkg/hostelify-0.3.6/spec/hw_find_hostels.spec
|
17
|
+
lib/hostelify/hostelworldmonkey.rb
|
18
|
+
lib/items.rb
|
19
|
+
lib/test.rb
|
26
20
|
spec/_helper.rb
|
27
21
|
spec/hb_find_by_hostel.spec
|
28
22
|
spec/hb_find_hostels.spec
|
23
|
+
spec/helper.rb
|
29
24
|
spec/hw_find_by_hostel.spec
|
30
25
|
spec/hw_find_hostels.spec
|
data/Rakefile
CHANGED
data/hostelify.gemspec
CHANGED
@@ -2,15 +2,15 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{hostelify}
|
5
|
-
s.version = "0.3.
|
5
|
+
s.version = "0.3.8"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["Holden Thomas"]
|
9
|
-
s.date = %q{
|
9
|
+
s.date = %q{2010-01-26}
|
10
10
|
s.description = %q{Simple Hostel Webscrapper.}
|
11
11
|
s.email = %q{holden.thomas@gmail.com}
|
12
|
-
s.extra_rdoc_files = ["README.rdoc", "lib/hostelify.rb", "lib/hostelify/gomio.rb", "lib/hostelify/hostelbookers.rb", "lib/hostelify/hostelify.rb", "lib/hostelify/hostelworld.rb"]
|
13
|
-
s.files = ["Manifest", "README.rdoc", "Rakefile", "hostelify.gemspec", "lib/
|
12
|
+
s.extra_rdoc_files = ["README.rdoc", "lib/hostel.rb", "lib/hostel/gomio.rb", "lib/hostel/hostel.rb", "lib/hostel/hostel_available.rb", "lib/hostel/hostelbookers.rb", "lib/hostel/hostelworld.rb", "lib/hostelify.rb", "lib/hostelify/gomio.rb", "lib/hostelify/hostel.rb", "lib/hostelify/hostelbookers.rb", "lib/hostelify/hostelify.rb", "lib/hostelify/hostelworld.rb", "lib/hostelify/hostelworldmonkey.rb", "lib/items.rb", "lib/test.rb"]
|
13
|
+
s.files = ["Manifest", "README.rdoc", "Rakefile", "hostelify.gemspec", "lib/hostel.rb", "lib/hostel/gomio.rb", "lib/hostel/hostel.rb", "lib/hostel/hostel_available.rb", "lib/hostel/hostelbookers.rb", "lib/hostel/hostelworld.rb", "lib/hostelify.rb", "lib/hostelify/gomio.rb", "lib/hostelify/hostel.rb", "lib/hostelify/hostelbookers.rb", "lib/hostelify/hostelify.rb", "lib/hostelify/hostelworld.rb", "lib/hostelify/hostelworldmonkey.rb", "lib/items.rb", "lib/test.rb", "spec/_helper.rb", "spec/hb_find_by_hostel.spec", "spec/hb_find_hostels.spec", "spec/helper.rb", "spec/hw_find_by_hostel.spec", "spec/hw_find_hostels.spec"]
|
14
14
|
s.homepage = %q{http://github.com/holden/hostelify}
|
15
15
|
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Hostelify", "--main", "README.rdoc"]
|
16
16
|
s.require_paths = ["lib"]
|
@@ -8,9 +8,9 @@ class Hostelbookers
|
|
8
8
|
@default_options = { :date => date=(Date.today+4).to_s, :no_days => "7", :live => true }
|
9
9
|
|
10
10
|
def self.find_hostels_by_location(options) #location
|
11
|
-
|
12
|
-
|
13
|
-
|
11
|
+
|
12
|
+
city = options[:location].split(',').first.gsub(' ','')
|
13
|
+
country = options[:location].split(',').last.gsub(' ','')
|
14
14
|
url = HB_PLURAL_HOSTELS_URL + "#{country}/#{city}"
|
15
15
|
|
16
16
|
if options[:date]
|
@@ -0,0 +1,205 @@
|
|
1
|
+
class Hostelworld
|
2
|
+
|
3
|
+
#constants
|
4
|
+
#location list includes/indexjs.js
|
5
|
+
HW_SINGULAR_DETAIL_URL = "http://www.hostelworld.com/hosteldetails.php?HostelNumber="
|
6
|
+
HW_SINGULAR_IMAGE_URL = "http://www.hostelworld.com/hostelpictures.php?HostelNumber="
|
7
|
+
HW_SINGULAR_AVAILABILITY = "http://www.hostelworld.com/availability.php/"
|
8
|
+
HW_SINGULAR_YOUTUBE_URL = "http://www.hostelworld.com/youtubevideo.php?HostelNumber="
|
9
|
+
HW_PLURAL_HOSTELS_URL = "http://www.hostelworld.com/findabed.php/"
|
10
|
+
|
11
|
+
#options
|
12
|
+
@default_options = { :date => date=(Date.today+4).to_s, :no_days => "7", :no_ppl => "2" }
|
13
|
+
|
14
|
+
def self.parse_html(url)
|
15
|
+
f = open(url)
|
16
|
+
f.rewind
|
17
|
+
Retryable.try 3 do
|
18
|
+
data = Hpricot(Iconv.conv('utf-8', f.charset, f.readlines.join("\n")))
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.find_hostel_by_id(options)
|
23
|
+
opts = { :directions => false, :images => false, :all => false }.merge options
|
24
|
+
id = options[:id].to_s
|
25
|
+
url = HW_SINGULAR_DETAIL_URL + id
|
26
|
+
|
27
|
+
#coder = HTMLEntities.new
|
28
|
+
hostel = Hostelify.new
|
29
|
+
hostel.hostel_id = id
|
30
|
+
|
31
|
+
if options[:date]
|
32
|
+
options = @default_options.merge(options)
|
33
|
+
date = Date.strptime(options[:date])
|
34
|
+
data = setSearch(url, options[:date], options[:no_ppl], options[:no_days])
|
35
|
+
else
|
36
|
+
data = parse_html(url)
|
37
|
+
end
|
38
|
+
|
39
|
+
data = data.search("//div[@id='content']")
|
40
|
+
data.search("h3").remove #get rid of header
|
41
|
+
|
42
|
+
#title, address, desc, facilities, ratings
|
43
|
+
hostel.name = data.at("h2").inner_text.gsub(/( in ).*$/,'')
|
44
|
+
hostel.address = data.at('div[@style="padding-top: 5px"]').inner_text.lstrip
|
45
|
+
|
46
|
+
if options[:date]
|
47
|
+
hostel.availability = parse_availables(data)
|
48
|
+
else
|
49
|
+
hostel.description = data.at('div[@id="microDescription2]').inner_text
|
50
|
+
end
|
51
|
+
|
52
|
+
#optional
|
53
|
+
no_photos = data.at('span/a[@id="picLink"]').inner_text.to_i
|
54
|
+
video = data.at('div[@id="microVideo"]')
|
55
|
+
|
56
|
+
facilities = []
|
57
|
+
(data/"li.microFacilitiesBoomLi").each do |item|
|
58
|
+
facilities << item.inner_text
|
59
|
+
end
|
60
|
+
|
61
|
+
ratings = []
|
62
|
+
(data/'div[@id="ratingsBar2"]').each do |item|
|
63
|
+
ratings << item.inner_text.to_i
|
64
|
+
end
|
65
|
+
|
66
|
+
hostel.facilities = facilities
|
67
|
+
hostel.ratings = ratings
|
68
|
+
|
69
|
+
if video #exists
|
70
|
+
data = parse_html(HW_SINGULAR_YOUTUBE_URL + id)
|
71
|
+
video_url = data.at('param[@name="movie"]')['value']
|
72
|
+
hostel.video = video_url
|
73
|
+
#video_url = data.at('tag')
|
74
|
+
end
|
75
|
+
|
76
|
+
if options[:directions] or options[:all]
|
77
|
+
data = parse_html(HW_SINGULAR_DETAIL_URL + id + "/directions/")
|
78
|
+
|
79
|
+
#directions, geo
|
80
|
+
hostel.directions = data.at('div[@id="content"]').inner_text.gsub(/^[\d\D\n]*(DIRECTIONS)/,'')
|
81
|
+
hostel.geo = data.to_s.scan(/-{0,1}\d{1,3}\.\d{7}/).uniq!
|
82
|
+
end
|
83
|
+
|
84
|
+
if no_photos and (options[:images] or options[:all])
|
85
|
+
images = []
|
86
|
+
(1..no_photos).each do |i|
|
87
|
+
data = parse_html(HW_SINGULAR_IMAGE_URL + id + '&PicNO=' + i.to_s)
|
88
|
+
images << (data/"img").first[:src].to_s
|
89
|
+
end
|
90
|
+
hostel.images = images
|
91
|
+
end
|
92
|
+
hostel # return
|
93
|
+
end
|
94
|
+
|
95
|
+
def self.find_hostels_by_location(options) #location
|
96
|
+
|
97
|
+
city = options[:location].split(',').first.gsub(' ','')
|
98
|
+
country = options[:location].split(',').last.gsub(' ','')
|
99
|
+
url = HW_PLURAL_HOSTELS_URL + "ChosenCity.#{city}/ChosenCountry.#{country}"
|
100
|
+
|
101
|
+
if options[:date]
|
102
|
+
options = @default_options.merge(options)
|
103
|
+
date = Date.strptime(options[:date])
|
104
|
+
data = setSearch(url, options[:date], options[:no_ppl], options[:no_days])
|
105
|
+
else
|
106
|
+
data = parse_html(url)
|
107
|
+
end
|
108
|
+
|
109
|
+
data = data.search("//div[@id='content']")
|
110
|
+
@results = HostelifyCollection.new
|
111
|
+
|
112
|
+
(data/"div.hostelListing").each do |row|
|
113
|
+
name = row.at("h3").inner_text
|
114
|
+
desc = row.at("div.hostelEntry/p").inner_text.to_s.chop.gsub('more info','').squeeze('.')
|
115
|
+
url = row.at("h3/a")['href']
|
116
|
+
rating = row.at("h4/text()")
|
117
|
+
rating = rating.to_s.to_i unless rating.nil?
|
118
|
+
type = row.at("div.hostelListingImage/span").inner_text
|
119
|
+
hostel_id = url.match(/[\d]*$/).to_s
|
120
|
+
|
121
|
+
if options[:date]
|
122
|
+
#price_USD = row.at("span.blueBeds").inner_text #need to fix float
|
123
|
+
dorm = (row.at("p.hostelListingRate/span.blueBeds/text()")).to_s.gsub(/[A-Z$]*/,'')
|
124
|
+
single = row.at("p.hostelListingPrivateRate/span.blueBeds/text()").to_s.gsub(/[A-Z$]*/,'')
|
125
|
+
available = row/"ul.hostelListingDates/li.noAvail/text()"
|
126
|
+
available = available.to_a.join(',').split(',')
|
127
|
+
@results << Hostelify.new(:hostel_id => hostel_id, :name => name, :description => desc, :rating => rating, :dorm => dorm, :single => single, :unavailable => available)
|
128
|
+
else
|
129
|
+
@results << Hostelify.new(:hostel_id => hostel_id, :name => name, :description => desc, :rating => rating)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
return @results
|
133
|
+
end
|
134
|
+
|
135
|
+
private
|
136
|
+
|
137
|
+
def self.setSearch(url,date,no_ppl,no_days)
|
138
|
+
|
139
|
+
date = Date.strptime(date)
|
140
|
+
month = date.strftime("%m").to_i
|
141
|
+
day = date.strftime("%d").to_i
|
142
|
+
if Time.now.strftime("%y") == date.strftime("%y") then year = 0 else year = 1 end
|
143
|
+
|
144
|
+
agent = WWW::Mechanize.new
|
145
|
+
page = agent.get(url)
|
146
|
+
|
147
|
+
#the form name
|
148
|
+
form = page.forms.first # => WWW::Mechanize::Form
|
149
|
+
|
150
|
+
Retryable.try 3 do
|
151
|
+
page = agent.submit(form)
|
152
|
+
end
|
153
|
+
|
154
|
+
#form must be submitted twice because the people writing hostelworld are retards
|
155
|
+
form = page.forms.first # => WWW::Mechanize::Form
|
156
|
+
form.field_with(:name => 'selMonth').options[month-1].select
|
157
|
+
form.field_with(:name => 'selDay').options[day-1].select
|
158
|
+
form.field_with(:name => 'selYear').options[year].select
|
159
|
+
form.field_with(:name => 'NumNights').options[no_days.to_i-1].select
|
160
|
+
form.field_with(:name => 'Persons').options[no_ppl.to_i-1].select
|
161
|
+
form.field_with(:name => 'Currency').options[4].select #US Currency
|
162
|
+
|
163
|
+
Retryable.try 3 do
|
164
|
+
page = agent.submit(form)
|
165
|
+
end
|
166
|
+
|
167
|
+
data = page.search("//div[@id='content']")
|
168
|
+
|
169
|
+
return data
|
170
|
+
end
|
171
|
+
|
172
|
+
def self.parse_availables(info)
|
173
|
+
|
174
|
+
availability = info.at('table[@id="tableDatesSelected2"]')
|
175
|
+
availability.search("div").remove
|
176
|
+
|
177
|
+
availables = []
|
178
|
+
|
179
|
+
(availability/"tr").each do |row|
|
180
|
+
name = (row/"td").first
|
181
|
+
name = name.inner_text unless name.nil?
|
182
|
+
|
183
|
+
(row/"td").each do |td|
|
184
|
+
night = td.attributes['title']
|
185
|
+
if night
|
186
|
+
price = night.to_s.match(/([\d]{1,3}).([\d]{2})/).to_s
|
187
|
+
available = night.to_s.match(/(available: )([\d]*)/)
|
188
|
+
date = night.to_s.match(/(Date: ).*$/).to_s.gsub(/(Date: )|(th)|(nd)|(rd)|(st)/,'')
|
189
|
+
date = Date.strptime(date, "%a %d %b '%y")
|
190
|
+
|
191
|
+
if available
|
192
|
+
beds = available.to_s.match(/[\d]{1,2}/)[0]
|
193
|
+
availables << HostelifyAvailable.new(name,price,beds,date)
|
194
|
+
else
|
195
|
+
availables << HostelifyAvailable.new(name,price,0,date)
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
return availables
|
201
|
+
|
202
|
+
end
|
203
|
+
|
204
|
+
|
205
|
+
end
|
@@ -5,6 +5,6 @@ require 'open-uri'
|
|
5
5
|
require 'date'
|
6
6
|
require 'htmlentities'
|
7
7
|
require 'iconv'
|
8
|
-
require '
|
9
|
-
require '
|
10
|
-
require '
|
8
|
+
require 'hostel/hostel'
|
9
|
+
require 'hostel/hostelworld'
|
10
|
+
require 'hostel/hostelbookers'
|
@@ -0,0 +1,61 @@
|
|
1
|
+
class Hostelify
|
2
|
+
attr_accessor :hostel_id, :name, :address, :description, :facilities, :ratings, :directions, :geo, :images, :video, :availability
|
3
|
+
attr_accessor :rating, :dorm, :single, :unavailable
|
4
|
+
|
5
|
+
def initialize(options = {})
|
6
|
+
options.each {
|
7
|
+
|k,v|
|
8
|
+
self.send( "#{k.to_s}=".intern, v)
|
9
|
+
}
|
10
|
+
end
|
11
|
+
|
12
|
+
end
|
13
|
+
|
14
|
+
class HostelifyCollection < Array
|
15
|
+
# This collection does everything an Array does, plus
|
16
|
+
# you can add utility methods like names.
|
17
|
+
|
18
|
+
def ids
|
19
|
+
collect do |i|
|
20
|
+
i.hostel_id
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def names
|
25
|
+
collect do |i|
|
26
|
+
i.name
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def descs
|
31
|
+
collect do |i|
|
32
|
+
i.description
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
class HostelifyAvailable
|
39
|
+
attr_accessor :name, :price, :spots, :bookdate
|
40
|
+
|
41
|
+
def initialize(name, price, spots, bookdate)
|
42
|
+
@name = name
|
43
|
+
@price = price
|
44
|
+
@spots = spots
|
45
|
+
@bookdate = bookdate
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
|
50
|
+
module Retryable
|
51
|
+
extend self
|
52
|
+
|
53
|
+
def try times = 1, options = {}, &block
|
54
|
+
val = yield
|
55
|
+
rescue options[:on] || Exception
|
56
|
+
retry if (times -= 1) > 0
|
57
|
+
else
|
58
|
+
val
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
@@ -127,9 +127,9 @@ class Hostelworld
|
|
127
127
|
@results = HostelifyCollection.new
|
128
128
|
|
129
129
|
(data/"div.hostelListing").each do |row|
|
130
|
-
name = row.at("
|
130
|
+
name = row.at("h2").inner_text
|
131
131
|
desc = row.at("div.hostelEntry/p").inner_text.to_s.chop.gsub('more info','').squeeze('.')
|
132
|
-
url = row.at("
|
132
|
+
url = row.at("h2/a")['href']
|
133
133
|
rating = row.at("h4/text()")
|
134
134
|
rating = rating.to_s.to_i unless rating.nil?
|
135
135
|
type = row.at("div.hostelListingImage/span").inner_text
|
@@ -0,0 +1,205 @@
|
|
1
|
+
class Hostelworldmonkey
|
2
|
+
|
3
|
+
#constants
|
4
|
+
#location list includes/indexjs.js
|
5
|
+
HW_SINGULAR_DETAIL_URL = "http://www.hostelworld.com/hosteldetails.php?HostelNumber="
|
6
|
+
HW_SINGULAR_IMAGE_URL = "http://www.hostelworld.com/hostelpictures.php?HostelNumber="
|
7
|
+
HW_SINGULAR_AVAILABILITY = "http://www.hostelworld.com/availability.php/"
|
8
|
+
HW_SINGULAR_YOUTUBE_URL = "http://www.hostelworld.com/youtubevideo.php?HostelNumber="
|
9
|
+
HW_PLURAL_HOSTELS_URL = "http://www.hostelworld.com/findabed.php/"
|
10
|
+
|
11
|
+
#options
|
12
|
+
@default_options = { :date => date=(Date.today+4).to_s, :no_days => "7", :no_ppl => "2" }
|
13
|
+
|
14
|
+
def self.parse_html(url)
|
15
|
+
f = open(url)
|
16
|
+
f.rewind
|
17
|
+
Retryable.try 3 do
|
18
|
+
data = Hpricot(Iconv.conv('utf-8', f.charset, f.readlines.join("\n")))
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.find_hostel_by_id(options)
|
23
|
+
opts = { :directions => false, :images => false, :all => false }.merge options
|
24
|
+
id = options[:id].to_s
|
25
|
+
url = HW_SINGULAR_DETAIL_URL + id
|
26
|
+
|
27
|
+
#coder = HTMLEntities.new
|
28
|
+
hostel = Hostelify.new
|
29
|
+
hostel.hostel_id = id
|
30
|
+
|
31
|
+
if options[:date]
|
32
|
+
options = @default_options.merge(options)
|
33
|
+
date = Date.strptime(options[:date])
|
34
|
+
data = setSearch(url, options[:date], options[:no_ppl], options[:no_days])
|
35
|
+
else
|
36
|
+
data = parse_html(url)
|
37
|
+
end
|
38
|
+
|
39
|
+
data = data.search("//div[@id='content']")
|
40
|
+
data.search("h3").remove #get rid of header
|
41
|
+
|
42
|
+
#title, address, desc, facilities, ratings
|
43
|
+
hostel.name = data.at("h2").inner_text.gsub(/( in ).*$/,'')
|
44
|
+
hostel.address = data.at('div[@style="padding-top: 5px"]').inner_text.lstrip
|
45
|
+
|
46
|
+
if options[:date]
|
47
|
+
hostel.availability = parse_availables(data)
|
48
|
+
else
|
49
|
+
hostel.description = data.at('div[@id="microDescription2]').inner_text
|
50
|
+
end
|
51
|
+
|
52
|
+
#optional
|
53
|
+
no_photos = data.at('span/a[@id="picLink"]').inner_text.to_i
|
54
|
+
video = data.at('div[@id="microVideo"]')
|
55
|
+
|
56
|
+
facilities = []
|
57
|
+
(data/"li.microFacilitiesBoomLi").each do |item|
|
58
|
+
facilities << item.inner_text
|
59
|
+
end
|
60
|
+
|
61
|
+
ratings = []
|
62
|
+
(data/'div[@id="ratingsBar2"]').each do |item|
|
63
|
+
ratings << item.inner_text.to_i
|
64
|
+
end
|
65
|
+
|
66
|
+
hostel.facilities = facilities
|
67
|
+
hostel.ratings = ratings
|
68
|
+
|
69
|
+
if video #exists
|
70
|
+
data = parse_html(HW_SINGULAR_YOUTUBE_URL + id)
|
71
|
+
video_url = data.at('param[@name="movie"]')['value']
|
72
|
+
hostel.video = video_url
|
73
|
+
#video_url = data.at('tag')
|
74
|
+
end
|
75
|
+
|
76
|
+
if options[:directions] or options[:all]
|
77
|
+
data = parse_html(HW_SINGULAR_DETAIL_URL + id + "/directions/")
|
78
|
+
|
79
|
+
#directions, geo
|
80
|
+
hostel.directions = data.at('div[@id="content"]').inner_text.gsub(/^[\d\D\n]*(DIRECTIONS)/,'')
|
81
|
+
hostel.geo = data.to_s.scan(/-{0,1}\d{1,3}\.\d{7}/).uniq!
|
82
|
+
end
|
83
|
+
|
84
|
+
if no_photos and (options[:images] or options[:all])
|
85
|
+
images = []
|
86
|
+
(1..no_photos).each do |i|
|
87
|
+
data = parse_html(HW_SINGULAR_IMAGE_URL + id + '&PicNO=' + i.to_s)
|
88
|
+
images << (data/"img").first[:src].to_s
|
89
|
+
end
|
90
|
+
hostel.images = images
|
91
|
+
end
|
92
|
+
hostel # return
|
93
|
+
end
|
94
|
+
|
95
|
+
def self.find_hostels_by_location(options) #location
|
96
|
+
|
97
|
+
city = options[:location].split(',').first.gsub(' ','')
|
98
|
+
country = options[:location].split(',').last.gsub(' ','')
|
99
|
+
url = HW_PLURAL_HOSTELS_URL + "ChosenCity.#{city}/ChosenCountry.#{country}"
|
100
|
+
|
101
|
+
if options[:date]
|
102
|
+
options = @default_options.merge(options)
|
103
|
+
date = Date.strptime(options[:date])
|
104
|
+
data = setSearch(url, options[:date], options[:no_ppl], options[:no_days])
|
105
|
+
else
|
106
|
+
data = parse_html(url)
|
107
|
+
end
|
108
|
+
|
109
|
+
data = data.search("//div[@id='content']")
|
110
|
+
@results = HostelifyCollection.new
|
111
|
+
|
112
|
+
(data/"div.hostelListing").each do |row|
|
113
|
+
name = row.at("h3").inner_text
|
114
|
+
desc = row.at("div.hostelEntry/p").inner_text.to_s.chop.gsub('more info','').squeeze('.')
|
115
|
+
url = row.at("h3/a")['href']
|
116
|
+
rating = row.at("h4/text()")
|
117
|
+
rating = rating.to_s.to_i unless rating.nil?
|
118
|
+
type = row.at("div.hostelListingImage/span").inner_text
|
119
|
+
hostel_id = url.match(/[\d]*$/).to_s
|
120
|
+
|
121
|
+
if options[:date]
|
122
|
+
#price_USD = row.at("span.blueBeds").inner_text #need to fix float
|
123
|
+
dorm = (row.at("p.hostelListingRate/span.blueBeds/text()")).to_s.gsub(/[A-Z$]*/,'')
|
124
|
+
single = row.at("p.hostelListingPrivateRate/span.blueBeds/text()").to_s.gsub(/[A-Z$]*/,'')
|
125
|
+
available = row/"ul.hostelListingDates/li.noAvail/text()"
|
126
|
+
available = available.to_a.join(',').split(',')
|
127
|
+
@results << Hostelify.new(:hostel_id => hostel_id, :name => name, :description => desc, :rating => rating, :dorm => dorm, :single => single, :unavailable => available)
|
128
|
+
else
|
129
|
+
@results << Hostelify.new(:hostel_id => hostel_id, :name => name, :description => desc, :rating => rating)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
return @results
|
133
|
+
end
|
134
|
+
|
135
|
+
private
|
136
|
+
|
137
|
+
def self.setSearch(url,date,no_ppl,no_days)
|
138
|
+
|
139
|
+
date = Date.strptime(date)
|
140
|
+
month = date.strftime("%m").to_i
|
141
|
+
day = date.strftime("%d").to_i
|
142
|
+
if Time.now.strftime("%y") == date.strftime("%y") then year = 0 else year = 1 end
|
143
|
+
|
144
|
+
agent = WWW::Mechanize.new
|
145
|
+
page = agent.get(url)
|
146
|
+
|
147
|
+
#the form name
|
148
|
+
form = page.forms.first # => WWW::Mechanize::Form
|
149
|
+
|
150
|
+
Retryable.try 3 do
|
151
|
+
page = agent.submit(form)
|
152
|
+
end
|
153
|
+
|
154
|
+
#form must be submitted twice because the people writing hostelworld are retards
|
155
|
+
form = page.forms.first # => WWW::Mechanize::Form
|
156
|
+
form.field_with(:name => 'selMonth').options[month-1].select
|
157
|
+
form.field_with(:name => 'selDay').options[day-1].select
|
158
|
+
form.field_with(:name => 'selYear').options[year].select
|
159
|
+
form.field_with(:name => 'NumNights').options[no_days.to_i-1].select
|
160
|
+
form.field_with(:name => 'Persons').options[no_ppl.to_i-1].select
|
161
|
+
form.field_with(:name => 'Currency').options[4].select #US Currency
|
162
|
+
|
163
|
+
Retryable.try 3 do
|
164
|
+
page = agent.submit(form)
|
165
|
+
end
|
166
|
+
|
167
|
+
data = page.search("//div[@id='content']")
|
168
|
+
|
169
|
+
return data
|
170
|
+
end
|
171
|
+
|
172
|
+
def self.parse_availables(info)
|
173
|
+
|
174
|
+
availability = info.at('table[@id="tableDatesSelected2"]')
|
175
|
+
availability.search("div").remove
|
176
|
+
|
177
|
+
availables = []
|
178
|
+
|
179
|
+
(availability/"tr").each do |row|
|
180
|
+
name = (row/"td").first
|
181
|
+
name = name.inner_text unless name.nil?
|
182
|
+
|
183
|
+
(row/"td").each do |td|
|
184
|
+
night = td.attributes['title']
|
185
|
+
if night
|
186
|
+
price = night.to_s.match(/([\d]{1,3}).([\d]{2})/).to_s
|
187
|
+
available = night.to_s.match(/(available: )([\d]*)/)
|
188
|
+
date = night.to_s.match(/(Date: ).*$/).to_s.gsub(/(Date: )|(th)|(nd)|(rd)|(st)/,'')
|
189
|
+
date = Date.strptime(date, "%a %d %b '%y")
|
190
|
+
|
191
|
+
if available
|
192
|
+
beds = available.to_s.match(/[\d]{1,2}/)[0]
|
193
|
+
availables << HostelifyAvailable.new(name,price,beds,date)
|
194
|
+
else
|
195
|
+
availables << HostelifyAvailable.new(name,price,0,date)
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
return availables
|
201
|
+
|
202
|
+
end
|
203
|
+
|
204
|
+
|
205
|
+
end
|
data/lib/items.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
class Items
|
2
|
+
@items = []
|
3
|
+
class << self
|
4
|
+
attr_accessor :items
|
5
|
+
end
|
6
|
+
attr_accessor :name, :description
|
7
|
+
#def self.each(&args)
|
8
|
+
# @items.each(&args)
|
9
|
+
#end
|
10
|
+
def initialize(name, description)
|
11
|
+
@name, @description = name, description
|
12
|
+
Items.items << self
|
13
|
+
end
|
14
|
+
def each(&block)
|
15
|
+
yield name
|
16
|
+
yield description
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.names
|
20
|
+
puts "hello"
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
data/lib/test.rb
ADDED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hostelify
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Holden Thomas
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date:
|
12
|
+
date: 2010-01-26 00:00:00 +01:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -21,40 +21,45 @@ extensions: []
|
|
21
21
|
|
22
22
|
extra_rdoc_files:
|
23
23
|
- README.rdoc
|
24
|
+
- lib/hostel.rb
|
25
|
+
- lib/hostel/gomio.rb
|
26
|
+
- lib/hostel/hostel.rb
|
27
|
+
- lib/hostel/hostel_available.rb
|
28
|
+
- lib/hostel/hostelbookers.rb
|
29
|
+
- lib/hostel/hostelworld.rb
|
24
30
|
- lib/hostelify.rb
|
25
31
|
- lib/hostelify/gomio.rb
|
32
|
+
- lib/hostelify/hostel.rb
|
26
33
|
- lib/hostelify/hostelbookers.rb
|
27
34
|
- lib/hostelify/hostelify.rb
|
28
35
|
- lib/hostelify/hostelworld.rb
|
36
|
+
- lib/hostelify/hostelworldmonkey.rb
|
37
|
+
- lib/items.rb
|
38
|
+
- lib/test.rb
|
29
39
|
files:
|
30
40
|
- Manifest
|
31
41
|
- README.rdoc
|
32
42
|
- Rakefile
|
33
43
|
- hostelify.gemspec
|
44
|
+
- lib/hostel.rb
|
45
|
+
- lib/hostel/gomio.rb
|
46
|
+
- lib/hostel/hostel.rb
|
47
|
+
- lib/hostel/hostel_available.rb
|
48
|
+
- lib/hostel/hostelbookers.rb
|
49
|
+
- lib/hostel/hostelworld.rb
|
34
50
|
- lib/hostelify.rb
|
35
51
|
- lib/hostelify/gomio.rb
|
52
|
+
- lib/hostelify/hostel.rb
|
36
53
|
- lib/hostelify/hostelbookers.rb
|
37
54
|
- lib/hostelify/hostelify.rb
|
38
55
|
- lib/hostelify/hostelworld.rb
|
39
|
-
-
|
40
|
-
-
|
41
|
-
-
|
42
|
-
- pkg/hostelify-0.3.6/README.rdoc
|
43
|
-
- pkg/hostelify-0.3.6/Rakefile
|
44
|
-
- pkg/hostelify-0.3.6/hostelify.gemspec
|
45
|
-
- pkg/hostelify-0.3.6/lib/hostelify.rb
|
46
|
-
- pkg/hostelify-0.3.6/lib/hostelify/gomio.rb
|
47
|
-
- pkg/hostelify-0.3.6/lib/hostelify/hostelbookers.rb
|
48
|
-
- pkg/hostelify-0.3.6/lib/hostelify/hostelify.rb
|
49
|
-
- pkg/hostelify-0.3.6/lib/hostelify/hostelworld.rb
|
50
|
-
- pkg/hostelify-0.3.6/spec/_helper.rb
|
51
|
-
- pkg/hostelify-0.3.6/spec/hb_find_by_hostel.spec
|
52
|
-
- pkg/hostelify-0.3.6/spec/hb_find_hostels.spec
|
53
|
-
- pkg/hostelify-0.3.6/spec/hw_find_by_hostel.spec
|
54
|
-
- pkg/hostelify-0.3.6/spec/hw_find_hostels.spec
|
56
|
+
- lib/hostelify/hostelworldmonkey.rb
|
57
|
+
- lib/items.rb
|
58
|
+
- lib/test.rb
|
55
59
|
- spec/_helper.rb
|
56
60
|
- spec/hb_find_by_hostel.spec
|
57
61
|
- spec/hb_find_hostels.spec
|
62
|
+
- spec/helper.rb
|
58
63
|
- spec/hw_find_by_hostel.spec
|
59
64
|
- spec/hw_find_hostels.spec
|
60
65
|
has_rdoc: true
|
@@ -1,30 +0,0 @@
|
|
1
|
-
Manifest
|
2
|
-
README.rdoc
|
3
|
-
Rakefile
|
4
|
-
hostelify.gemspec
|
5
|
-
lib/hostelify.rb
|
6
|
-
lib/hostelify/gomio.rb
|
7
|
-
lib/hostelify/hostelbookers.rb
|
8
|
-
lib/hostelify/hostelify.rb
|
9
|
-
lib/hostelify/hostelworld.rb
|
10
|
-
pkg/hostelify-0.3.6.gem
|
11
|
-
pkg/hostelify-0.3.6.tar.gz
|
12
|
-
pkg/hostelify-0.3.6/Manifest
|
13
|
-
pkg/hostelify-0.3.6/README.rdoc
|
14
|
-
pkg/hostelify-0.3.6/Rakefile
|
15
|
-
pkg/hostelify-0.3.6/hostelify.gemspec
|
16
|
-
pkg/hostelify-0.3.6/lib/hostelify.rb
|
17
|
-
pkg/hostelify-0.3.6/lib/hostelify/gomio.rb
|
18
|
-
pkg/hostelify-0.3.6/lib/hostelify/hostelbookers.rb
|
19
|
-
pkg/hostelify-0.3.6/lib/hostelify/hostelify.rb
|
20
|
-
pkg/hostelify-0.3.6/lib/hostelify/hostelworld.rb
|
21
|
-
pkg/hostelify-0.3.6/spec/_helper.rb
|
22
|
-
pkg/hostelify-0.3.6/spec/hb_find_by_hostel.spec
|
23
|
-
pkg/hostelify-0.3.6/spec/hb_find_hostels.spec
|
24
|
-
pkg/hostelify-0.3.6/spec/hw_find_by_hostel.spec
|
25
|
-
pkg/hostelify-0.3.6/spec/hw_find_hostels.spec
|
26
|
-
spec/_helper.rb
|
27
|
-
spec/hb_find_by_hostel.spec
|
28
|
-
spec/hb_find_hostels.spec
|
29
|
-
spec/hw_find_by_hostel.spec
|
30
|
-
spec/hw_find_hostels.spec
|
@@ -1 +0,0 @@
|
|
1
|
-
Nothin yet... still...
|
@@ -1,14 +0,0 @@
|
|
1
|
-
require 'rubygems'
|
2
|
-
require 'rake'
|
3
|
-
require 'echoe'
|
4
|
-
|
5
|
-
Echoe.new('hostelify', '0.3.7') do |p|
|
6
|
-
p.description = "Simple Hostel Webscrapper."
|
7
|
-
p.url = "http://github.com/holden/hostelify"
|
8
|
-
p.author = "Holden Thomas"
|
9
|
-
p.email = "holden.thomas@gmail.com"
|
10
|
-
p.ignore_pattern = ["tmp/*", "script/*"]
|
11
|
-
p.development_dependencies = []
|
12
|
-
end
|
13
|
-
|
14
|
-
Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
|
@@ -1,31 +0,0 @@
|
|
1
|
-
# -*- encoding: utf-8 -*-
|
2
|
-
|
3
|
-
Gem::Specification.new do |s|
|
4
|
-
s.name = %q{hostelify}
|
5
|
-
s.version = "0.3.6"
|
6
|
-
|
7
|
-
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
|
-
s.authors = ["Holden Thomas"]
|
9
|
-
s.date = %q{2009-09-23}
|
10
|
-
s.description = %q{Simple Hostel Webscrapper.}
|
11
|
-
s.email = %q{holden.thomas@gmail.com}
|
12
|
-
s.extra_rdoc_files = ["README.rdoc", "lib/hostelify.rb", "lib/hostelify/gomio.rb", "lib/hostelify/hostelbookers.rb", "lib/hostelify/hostelify.rb", "lib/hostelify/hostelworld.rb"]
|
13
|
-
s.files = ["Manifest", "README.rdoc", "Rakefile", "hostelify.gemspec", "lib/hostelify.rb", "lib/hostelify/gomio.rb", "lib/hostelify/hostelbookers.rb", "lib/hostelify/hostelify.rb", "lib/hostelify/hostelworld.rb", "spec/_helper.rb", "spec/hb_find_by_hostel.spec", "spec/hb_find_hostels.spec", "spec/hw_find_by_hostel.spec", "spec/hw_find_hostels.spec"]
|
14
|
-
s.has_rdoc = true
|
15
|
-
s.homepage = %q{http://github.com/holden/hostelify}
|
16
|
-
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Hostelify", "--main", "README.rdoc"]
|
17
|
-
s.require_paths = ["lib"]
|
18
|
-
s.rubyforge_project = %q{hostelify}
|
19
|
-
s.rubygems_version = %q{1.3.1}
|
20
|
-
s.summary = %q{Simple Hostel Webscrapper.}
|
21
|
-
|
22
|
-
if s.respond_to? :specification_version then
|
23
|
-
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
24
|
-
s.specification_version = 2
|
25
|
-
|
26
|
-
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
27
|
-
else
|
28
|
-
end
|
29
|
-
else
|
30
|
-
end
|
31
|
-
end
|
@@ -1,265 +0,0 @@
|
|
1
|
-
class Hostelworld
|
2
|
-
|
3
|
-
#constants
|
4
|
-
#location list includes/indexjs.js
|
5
|
-
HW_SINGULAR_DETAIL_URL = "http://www.hostelworld.com/hosteldetails.php?HostelNumber="
|
6
|
-
HW_SINGULAR_IMAGE_URL = "http://www.hostelworld.com/hostelpictures.php?HostelNumber="
|
7
|
-
HW_SINGULAR_AVAILABILITY = "http://www.hostelworld.com/availability.php/"
|
8
|
-
HW_SINGULAR_YOUTUBE_URL = "http://www.hostelworld.com/youtubevideo.php?HostelNumber="
|
9
|
-
HW_PLURAL_HOSTELS_URL = "http://www.hostelworld.com/findabed.php/"
|
10
|
-
|
11
|
-
#options
|
12
|
-
@default_options = { :date => date=(Date.today+4).to_s, :no_days => "7", :no_ppl => "2" }
|
13
|
-
|
14
|
-
def self.parse_html(url)
|
15
|
-
f = open(url)
|
16
|
-
f.rewind
|
17
|
-
Retryable.try 3 do
|
18
|
-
data = Hpricot(Iconv.conv('utf-8', f.charset, f.readlines.join("\n")))
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
def self.find_hostel_by_id(options)
|
23
|
-
opts = { :directions => false, :images => false, :all => false }.merge options
|
24
|
-
id = options[:id].to_s
|
25
|
-
url = HW_SINGULAR_DETAIL_URL + id
|
26
|
-
|
27
|
-
#coder = HTMLEntities.new
|
28
|
-
hostel = Hostelify.new
|
29
|
-
hostel.hostel_id = id
|
30
|
-
|
31
|
-
if options[:date]
|
32
|
-
options = @default_options.merge(options)
|
33
|
-
date = Date.strptime(options[:date])
|
34
|
-
data = setSearch(url, options[:date], options[:no_ppl], options[:no_days])
|
35
|
-
else
|
36
|
-
data = parse_html(url)
|
37
|
-
end
|
38
|
-
|
39
|
-
unless data == "Full"
|
40
|
-
data = data.search("//div[@id='content']")
|
41
|
-
data.search("h3").remove #get rid of header
|
42
|
-
|
43
|
-
#title, address, desc, facilities, ratings
|
44
|
-
hostel.name = data.at("h2").inner_text.gsub(/( in ).*$/,'')
|
45
|
-
hostel.address = data.at('div[@style="padding-top: 5px"]').inner_text.lstrip
|
46
|
-
|
47
|
-
if options[:date]
|
48
|
-
hostel.availability = parse_availables(data)
|
49
|
-
else
|
50
|
-
hostel.description = data.at('div[@id="microDescription2]').inner_text
|
51
|
-
end
|
52
|
-
|
53
|
-
#optional
|
54
|
-
no_photos = data.at('div[@id="microPicScroll"]/span/a').inner_text.to_i
|
55
|
-
#no_photos = data.at('span/a[@id="picLink"]').inner_text.to_i
|
56
|
-
video = data.at('div[@id="microVideo"]')
|
57
|
-
|
58
|
-
#facilities = []
|
59
|
-
#(data/"li.microFacilitiesBoomLi").each do |item|
|
60
|
-
# facilities << item.inner_text
|
61
|
-
#end
|
62
|
-
|
63
|
-
facilities = []
|
64
|
-
index_count = 1
|
65
|
-
(data/"ul.microFacilitiesBoomUl/li").each_with_index do |item,index|
|
66
|
-
if item.attributes['class'] == "microFacilitiesBoomLiInner"
|
67
|
-
facilities << "#{facilities[index-index_count]}: " + item.inner_text
|
68
|
-
index_count += 1
|
69
|
-
else
|
70
|
-
index_count = 1
|
71
|
-
facilities << item.inner_text
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
ratings = []
|
76
|
-
(data/'div[@id="ratingsBar2"]').each do |item|
|
77
|
-
ratings << item.inner_text.to_i
|
78
|
-
end
|
79
|
-
|
80
|
-
hostel.facilities = facilities
|
81
|
-
hostel.ratings = ratings
|
82
|
-
|
83
|
-
if video #exists
|
84
|
-
data = parse_html(HW_SINGULAR_YOUTUBE_URL + id)
|
85
|
-
video_url = data.at('param[@name="movie"]')['value']
|
86
|
-
hostel.video = video_url
|
87
|
-
#video_url = data.at('tag')
|
88
|
-
end
|
89
|
-
|
90
|
-
if options[:directions] or options[:all]
|
91
|
-
data = parse_html(HW_SINGULAR_DETAIL_URL + id + "/directions/")
|
92
|
-
|
93
|
-
#directions, geo
|
94
|
-
hostel.directions = data.at('div[@id="content"]').inner_text.gsub(/^[\d\D\n]*(DIRECTIONS)/,'')
|
95
|
-
hostel.geo = data.to_s.scan(/-{0,1}\d{1,3}\.\d{7}/).uniq!
|
96
|
-
end
|
97
|
-
|
98
|
-
if no_photos and (options[:images] or options[:all])
|
99
|
-
images = []
|
100
|
-
(1..no_photos).each do |i|
|
101
|
-
data = parse_html(HW_SINGULAR_IMAGE_URL + id + '&PicNO=' + i.to_s)
|
102
|
-
images << (data/"img").first[:src].to_s
|
103
|
-
end
|
104
|
-
hostel.images = images
|
105
|
-
end
|
106
|
-
else
|
107
|
-
hostel = nil
|
108
|
-
end
|
109
|
-
hostel # return
|
110
|
-
end
|
111
|
-
|
112
|
-
def self.find_hostels_by_location(options) #location
|
113
|
-
|
114
|
-
city = options[:location].split(',').first.gsub(' ','')
|
115
|
-
country = options[:location].split(',').last.gsub(' ','')
|
116
|
-
url = HW_PLURAL_HOSTELS_URL + "ChosenCity.#{city}/ChosenCountry.#{country}"
|
117
|
-
|
118
|
-
if options[:date]
|
119
|
-
options = @default_options.merge(options)
|
120
|
-
date = Date.strptime(options[:date])
|
121
|
-
data = setSearch2(url, options[:date], options[:no_ppl], options[:no_days])
|
122
|
-
else
|
123
|
-
data = parse_html(url)
|
124
|
-
end
|
125
|
-
|
126
|
-
data = data.search("//div[@id='content']")
|
127
|
-
@results = HostelifyCollection.new
|
128
|
-
|
129
|
-
(data/"div.hostelListing").each do |row|
|
130
|
-
name = row.at("h3").inner_text
|
131
|
-
desc = row.at("div.hostelEntry/p").inner_text.to_s.chop.gsub('more info','').squeeze('.')
|
132
|
-
url = row.at("h3/a")['href']
|
133
|
-
rating = row.at("h4/text()")
|
134
|
-
rating = rating.to_s.to_i unless rating.nil?
|
135
|
-
type = row.at("div.hostelListingImage/span").inner_text
|
136
|
-
hostel_id = url.match(/[\d]*$/).to_s
|
137
|
-
|
138
|
-
if options[:date]
|
139
|
-
#price_USD = row.at("span.blueBeds").inner_text #need to fix float
|
140
|
-
dorm = (row.at("p.hostelListingRate/span.blueBeds/text()")).to_s.gsub(/[A-Z$]*/,'')
|
141
|
-
single = row.at("p.hostelListingPrivateRate/span.blueBeds/text()").to_s.gsub(/[A-Z$]*/,'')
|
142
|
-
available = row/"ul.hostelListingDates/li.noAvail/text()"
|
143
|
-
available = available.to_a.join(',').split(',')
|
144
|
-
@results << Hostelify.new(:hostel_id => hostel_id, :name => name, :description => desc, :rating => rating, :dorm => dorm, :single => single, :unavailable => available)
|
145
|
-
else
|
146
|
-
@results << Hostelify.new(:hostel_id => hostel_id, :name => name, :description => desc, :rating => rating)
|
147
|
-
end
|
148
|
-
end
|
149
|
-
return @results
|
150
|
-
end
|
151
|
-
|
152
|
-
private
|
153
|
-
|
154
|
-
def self.setSearch(url,date,no_ppl,no_days)
|
155
|
-
|
156
|
-
date = Date.strptime(date)
|
157
|
-
month = date.strftime("%m").to_i
|
158
|
-
day = date.strftime("%d").to_i
|
159
|
-
if Time.now.strftime("%y") == date.strftime("%y") then year = 0 else year = 1 end
|
160
|
-
|
161
|
-
agent = WWW::Mechanize.new
|
162
|
-
page = agent.get(url)
|
163
|
-
|
164
|
-
#the form name
|
165
|
-
#form = page.forms.first # => WWW::Mechanize::Form
|
166
|
-
form = page.form_with(:name => 'theForm')
|
167
|
-
|
168
|
-
#page = agent.submit(form)
|
169
|
-
|
170
|
-
#form must be submitted twice because the people writing hostelworld are retards
|
171
|
-
#form = page.forms.first # => WWW::Mechanize::Form
|
172
|
-
#form = page.form_with(:name => 'theForm')
|
173
|
-
form.field_with(:name => 'selMonth2').options[month-1].select
|
174
|
-
form.field_with(:name => 'selDay2').options[day-1].select
|
175
|
-
form.field_with(:name => 'selYear2').options[year].select
|
176
|
-
#form.field_with(:name => { 0 => 'NumNights' }).options[no_days.to_i-1].select
|
177
|
-
my_fields = form.fields.select {|f| f.name == "NumNights"}
|
178
|
-
my_fields[1].value = no_days.to_i
|
179
|
-
#form.my_fields[1].whatever = "value"
|
180
|
-
#form.field_with(:name => 'Persons').options[no_ppl.to_i-1].select
|
181
|
-
#form.field_with(:name => 'Currency').options[4].select #US Currency
|
182
|
-
|
183
|
-
|
184
|
-
Retryable.try 3 do
|
185
|
-
page = agent.submit(form, form.button_with(:name => 'DateSelect'))
|
186
|
-
end
|
187
|
-
|
188
|
-
error = page.search("div.microBookingError2")
|
189
|
-
|
190
|
-
if error.to_s.length > 1
|
191
|
-
data = "Full"
|
192
|
-
else
|
193
|
-
data = page.search("//div[@id='content']")
|
194
|
-
end
|
195
|
-
|
196
|
-
return data
|
197
|
-
end
|
198
|
-
|
199
|
-
def self.setSearch2(url,date,no_ppl,no_days)
|
200
|
-
|
201
|
-
date = Date.strptime(date)
|
202
|
-
month = date.strftime("%m").to_i
|
203
|
-
day = date.strftime("%d").to_i
|
204
|
-
if Time.now.strftime("%y") == date.strftime("%y") then year = 0 else year = 1 end
|
205
|
-
|
206
|
-
agent = WWW::Mechanize.new
|
207
|
-
page = agent.get(url)
|
208
|
-
|
209
|
-
#the form name
|
210
|
-
#form = page.forms.first # => WWW::Mechanize::Form
|
211
|
-
form = page.form_with(:name => 'theForm')
|
212
|
-
|
213
|
-
#page = agent.submit(form)
|
214
|
-
|
215
|
-
#form must be submitted twice because the people writing hostelworld are retards
|
216
|
-
|
217
|
-
form.field_with(:name => 'selMonth').options[month-1].select
|
218
|
-
form.field_with(:name => 'selDay').options[day-1].select
|
219
|
-
form.field_with(:name => 'selYear').options[year].select
|
220
|
-
form.field_with(:name => 'NumNights').options[no_days.to_i-1].select
|
221
|
-
form.field_with(:name => 'Persons').options[no_ppl.to_i-1].select
|
222
|
-
form.field_with(:name => 'Currency').options[4].select #US Currency
|
223
|
-
|
224
|
-
Retryable.try 3 do
|
225
|
-
page = agent.submit(form)
|
226
|
-
end
|
227
|
-
data = page.search("//div[@id='content']")
|
228
|
-
return data
|
229
|
-
end
|
230
|
-
|
231
|
-
def self.parse_availables(info)
|
232
|
-
|
233
|
-
availability = info.at('table[@id="tableDatesSelected2"]')
|
234
|
-
availability.search("div").remove
|
235
|
-
availability.search("span.hwRoomTypeDesc").remove
|
236
|
-
|
237
|
-
availables = []
|
238
|
-
|
239
|
-
(availability/"tr").each do |row|
|
240
|
-
name = (row/"td").first
|
241
|
-
name = name.inner_text unless name.nil?
|
242
|
-
|
243
|
-
(row/"td").each do |td|
|
244
|
-
night = td.attributes['title']
|
245
|
-
if night
|
246
|
-
price = night.to_s.match(/([\d]{1,3}).([\d]{2})/).to_s
|
247
|
-
available = night.to_s.match(/(available: )([\d]*)/)
|
248
|
-
date = night.to_s.match(/(Date: ).*$/).to_s.gsub(/(Date: )|(th)|(nd)|(rd)|(st)/,'')
|
249
|
-
date = Date.strptime(date, "%a %d %b '%y")
|
250
|
-
|
251
|
-
if available
|
252
|
-
beds = available.to_s.match(/[\d]{1,2}/)[0]
|
253
|
-
availables << HostelifyAvailable.new(name,price,beds,date)
|
254
|
-
else
|
255
|
-
availables << HostelifyAvailable.new(name,price,0,date)
|
256
|
-
end
|
257
|
-
end
|
258
|
-
end
|
259
|
-
end
|
260
|
-
return availables
|
261
|
-
|
262
|
-
end
|
263
|
-
|
264
|
-
|
265
|
-
end
|
@@ -1,72 +0,0 @@
|
|
1
|
-
require 'spec/_helper'
|
2
|
-
|
3
|
-
describe "should find individual hostel and get object with name etc." do
|
4
|
-
|
5
|
-
before(:all) do
|
6
|
-
@h = Hostelbookers.find_hostel_by_id(:location => "krakow,poland", :id => 9330)
|
7
|
-
@h = Hostelbookers.find_hostel_by_id(:location => "lviv,ukraine", :id => 19606)
|
8
|
-
end
|
9
|
-
|
10
|
-
it "should query hostelbookers and return the correct name" do
|
11
|
-
@h.name.should match(/^.*(Hostel).*$/)
|
12
|
-
end
|
13
|
-
|
14
|
-
it "address" do
|
15
|
-
@h.address.should match(/^.*(Krakow|Lviv).*$/)
|
16
|
-
end
|
17
|
-
|
18
|
-
it "description" do
|
19
|
-
@h.address.should_not be nil
|
20
|
-
end
|
21
|
-
|
22
|
-
it "facilities" do
|
23
|
-
@h.should have_at_least(8).facilities
|
24
|
-
end
|
25
|
-
|
26
|
-
it "ratings" do
|
27
|
-
@h.should have(8).ratings
|
28
|
-
end
|
29
|
-
|
30
|
-
it "images at least 6" do
|
31
|
-
@h.should have_at_least(6).images
|
32
|
-
end
|
33
|
-
|
34
|
-
end
|
35
|
-
|
36
|
-
describe "all options" do
|
37
|
-
before(:all) do
|
38
|
-
@h = Hostelbookers.find_hostel_by_id(:location => "krakow,poland", :id => 9330, :all => true)
|
39
|
-
end
|
40
|
-
|
41
|
-
it "directions should have a certain length <" do
|
42
|
-
@h.directions.length.should be > 25
|
43
|
-
end
|
44
|
-
|
45
|
-
it "geo" do
|
46
|
-
@h.geo[0].to_f.round.should eql 50
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
describe "with dates to get availabilty and verify output!" do
|
51
|
-
before(:all) do
|
52
|
-
@h = Hostelbookers.find_hostel_by_id(:location => "krakow,poland", :id => 19831, :date => (Date.today+10).to_s)
|
53
|
-
end
|
54
|
-
|
55
|
-
it "get first availability and check it merit" do
|
56
|
-
@h.availability.first.name =~ /bed/
|
57
|
-
end
|
58
|
-
|
59
|
-
it "check number of avail beds" do
|
60
|
-
@h.availability.first.spots.to_i.should be > 1
|
61
|
-
end
|
62
|
-
|
63
|
-
it "last avail has a price > 5 US" do
|
64
|
-
@h.availability.last.price.to_i.should be > 5
|
65
|
-
end
|
66
|
-
|
67
|
-
it "book date eq today + 10" do
|
68
|
-
@h.availability.last.bookdate.should_not be nil
|
69
|
-
end
|
70
|
-
|
71
|
-
|
72
|
-
end
|
@@ -1,30 +0,0 @@
|
|
1
|
-
require 'spec/_helper'
|
2
|
-
|
3
|
-
describe "test hostelbookers hostel listings" do
|
4
|
-
|
5
|
-
before(:all) do
|
6
|
-
@h = Hostelbookers.find_hostels_by_location(:location => "krakow,poland")
|
7
|
-
end
|
8
|
-
|
9
|
-
it "should return a list of names" do
|
10
|
-
names = []
|
11
|
-
@h.each do |e|
|
12
|
-
names << e.name
|
13
|
-
end
|
14
|
-
names.should include("Flamingo Hostel")
|
15
|
-
names.should include("Mama's Hostel- Main Market Square")
|
16
|
-
end
|
17
|
-
|
18
|
-
it "rating should be high for first choices" do
|
19
|
-
@h.first.rating.to_i.should be > 50
|
20
|
-
end
|
21
|
-
|
22
|
-
it "desc should have a certain length <" do
|
23
|
-
@h.first.description.length.should be > 100
|
24
|
-
end
|
25
|
-
|
26
|
-
it "has a hostel number" do
|
27
|
-
@h.first.hostel_id.to_i.should_not be nil
|
28
|
-
end
|
29
|
-
|
30
|
-
end
|
@@ -1,85 +0,0 @@
|
|
1
|
-
require 'spec/_helper'
|
2
|
-
|
3
|
-
describe "find hostel by id, no options" do
|
4
|
-
|
5
|
-
before(:all) do
|
6
|
-
@h = Hostelworld.find_hostel_by_id(:id => 7113)
|
7
|
-
@h = Hostelworld.find_hostel_by_id(:id => 20763)
|
8
|
-
end
|
9
|
-
|
10
|
-
it "should query hostelworld and return the correct name" do
|
11
|
-
@h.name.should match(/^.*(Hostel).*$/)
|
12
|
-
end
|
13
|
-
|
14
|
-
it "address" do
|
15
|
-
@h.address.should match(/^.*(Krakow|Lviv).*$/)
|
16
|
-
end
|
17
|
-
|
18
|
-
it "description" do
|
19
|
-
@h.address.should_not be nil
|
20
|
-
end
|
21
|
-
|
22
|
-
it "facilities" do
|
23
|
-
@h.should have_at_least(15).facilities
|
24
|
-
end
|
25
|
-
|
26
|
-
it "ratings" do
|
27
|
-
@h.should have(6).ratings
|
28
|
-
end
|
29
|
-
|
30
|
-
end
|
31
|
-
|
32
|
-
describe "youtube" do
|
33
|
-
|
34
|
-
before(:all) do
|
35
|
-
@h3 = Hostelworld.find_hostel_by_id(:id => 7113)
|
36
|
-
end
|
37
|
-
|
38
|
-
it "video" do
|
39
|
-
@h3.video.should match(/^.*(youtube.com).*$/)
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
describe "find hostel with all options" do
|
44
|
-
before(:all) do
|
45
|
-
@h2 = Hostelworld.find_hostel_by_id(:id => 7113, :all => true)
|
46
|
-
@h2 = Hostelworld.find_hostel_by_id(:id => 20763, :all => true)
|
47
|
-
end
|
48
|
-
|
49
|
-
it "geo" do
|
50
|
-
@h2.geo[0].to_f.round.should eql 50
|
51
|
-
end
|
52
|
-
|
53
|
-
it "directions" do
|
54
|
-
@h2.directions.should_not be nil
|
55
|
-
end
|
56
|
-
|
57
|
-
it "images at least 6" do
|
58
|
-
@h2.should have_at_least(6).images
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
describe "with dates to get availabilty and verify output!" do
|
63
|
-
before(:all) do
|
64
|
-
#@h = Hostelworld.find_hostel_by_id(:id => 20763, :date => (Date.today+20).to_s)
|
65
|
-
@h = Hostelworld.find_hostel_by_id(:id => 7113, :date => (Date.today+20).to_s)
|
66
|
-
end
|
67
|
-
|
68
|
-
it "get first availability and check it merit" do
|
69
|
-
@h.availability.first.name =~ /bed/
|
70
|
-
end
|
71
|
-
|
72
|
-
it "check number of avail beds" do
|
73
|
-
@h.availability.last.spots.to_i.should be >= 1
|
74
|
-
end
|
75
|
-
|
76
|
-
it "last avail has a price > 5 US" do
|
77
|
-
@h.availability.last.price.to_i.should be > 5
|
78
|
-
end
|
79
|
-
|
80
|
-
it "book date eq today + 10" do
|
81
|
-
@h.availability.last.bookdate.should_not be nil
|
82
|
-
end
|
83
|
-
|
84
|
-
|
85
|
-
end
|
@@ -1,62 +0,0 @@
|
|
1
|
-
require 'spec/_helper'
|
2
|
-
|
3
|
-
describe "finds list of hostels" do
|
4
|
-
|
5
|
-
before(:all) do
|
6
|
-
@h = Hostelworld.find_hostels_by_location(:location => 'krakow,poland')
|
7
|
-
end
|
8
|
-
|
9
|
-
it "should get a list with name and brief desc" do
|
10
|
-
names = []
|
11
|
-
@h.each do |e|
|
12
|
-
names << e.name
|
13
|
-
end
|
14
|
-
names.should include("Mama's Hostel Main Market Square")
|
15
|
-
end
|
16
|
-
|
17
|
-
|
18
|
-
it "rating should be high for first choices" do
|
19
|
-
@h.first.rating.to_i.should be > 50
|
20
|
-
end
|
21
|
-
|
22
|
-
it "desc should have a certain length <" do
|
23
|
-
@h.first.description.length.should be > 80
|
24
|
-
end
|
25
|
-
|
26
|
-
it "has a hostel number" do
|
27
|
-
@h.first.hostel_id.to_i.should_not be nil
|
28
|
-
end
|
29
|
-
|
30
|
-
end
|
31
|
-
|
32
|
-
describe "find hostels with dates" do
|
33
|
-
|
34
|
-
before(:all) do
|
35
|
-
@h = Hostelworld.find_hostels_by_location(:location => 'krakow,poland', :date => (Date.today + 10).to_s)
|
36
|
-
end
|
37
|
-
|
38
|
-
it "rating should be high for first choices" do
|
39
|
-
@h.first.rating.to_i.should be > 50
|
40
|
-
end
|
41
|
-
|
42
|
-
it "desc should have a certain length <" do
|
43
|
-
@h.first.description.length.should be > 90
|
44
|
-
end
|
45
|
-
|
46
|
-
it "has a hostel number" do
|
47
|
-
@h.first.hostel_id.to_i.should_not be nil
|
48
|
-
end
|
49
|
-
|
50
|
-
it "has dorm rooms for greater than $5" do
|
51
|
-
@h.first.dorm.to_i.should be > 5
|
52
|
-
end
|
53
|
-
|
54
|
-
it "has available rooms!" do
|
55
|
-
@h.first.unavailable.first.should be nil
|
56
|
-
end
|
57
|
-
|
58
|
-
it "has unavailable rooms!" do
|
59
|
-
@h.last.unavailable.first.should_not be nil
|
60
|
-
end
|
61
|
-
|
62
|
-
end
|
data/pkg/hostelify-0.3.6.gem
DELETED
Binary file
|
data/pkg/hostelify-0.3.6.tar.gz
DELETED
Binary file
|
File without changes
|
File without changes
|