hostelify 0.3.7
Sign up to get free protection for your applications and to get access to all the features.
- data/Manifest +30 -0
- data/README.rdoc +1 -0
- data/Rakefile +14 -0
- data/hostelify.gemspec +30 -0
- data/lib/hostelify/gomio.rb +102 -0
- data/lib/hostelify/hostelbookers.rb +180 -0
- data/lib/hostelify/hostelify.rb +61 -0
- data/lib/hostelify/hostelworld.rb +265 -0
- data/lib/hostelify.rb +10 -0
- data/pkg/hostelify-0.3.6/Manifest +30 -0
- data/pkg/hostelify-0.3.6/README.rdoc +1 -0
- data/pkg/hostelify-0.3.6/Rakefile +14 -0
- data/pkg/hostelify-0.3.6/hostelify.gemspec +31 -0
- data/pkg/hostelify-0.3.6/lib/hostelify/gomio.rb +102 -0
- data/pkg/hostelify-0.3.6/lib/hostelify/hostelbookers.rb +180 -0
- data/pkg/hostelify-0.3.6/lib/hostelify/hostelify.rb +61 -0
- data/pkg/hostelify-0.3.6/lib/hostelify/hostelworld.rb +265 -0
- data/pkg/hostelify-0.3.6/lib/hostelify.rb +10 -0
- data/pkg/hostelify-0.3.6/spec/_helper.rb +4 -0
- data/pkg/hostelify-0.3.6/spec/hb_find_by_hostel.spec +72 -0
- data/pkg/hostelify-0.3.6/spec/hb_find_hostels.spec +30 -0
- data/pkg/hostelify-0.3.6/spec/hw_find_by_hostel.spec +85 -0
- data/pkg/hostelify-0.3.6/spec/hw_find_hostels.spec +62 -0
- data/pkg/hostelify-0.3.6.gem +0 -0
- data/pkg/hostelify-0.3.6.tar.gz +0 -0
- data/spec/_helper.rb +4 -0
- data/spec/hb_find_by_hostel.spec +72 -0
- data/spec/hb_find_hostels.spec +30 -0
- data/spec/hw_find_by_hostel.spec +85 -0
- data/spec/hw_find_hostels.spec +62 -0
- metadata +94 -0
data/Manifest
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
Manifest
|
2
|
+
README.rdoc
|
3
|
+
Rakefile
|
4
|
+
hostelify.gemspec
|
5
|
+
lib/hostelify.rb
|
6
|
+
lib/hostelify/gomio.rb
|
7
|
+
lib/hostelify/hostelbookers.rb
|
8
|
+
lib/hostelify/hostelify.rb
|
9
|
+
lib/hostelify/hostelworld.rb
|
10
|
+
pkg/hostelify-0.3.6.gem
|
11
|
+
pkg/hostelify-0.3.6.tar.gz
|
12
|
+
pkg/hostelify-0.3.6/Manifest
|
13
|
+
pkg/hostelify-0.3.6/README.rdoc
|
14
|
+
pkg/hostelify-0.3.6/Rakefile
|
15
|
+
pkg/hostelify-0.3.6/hostelify.gemspec
|
16
|
+
pkg/hostelify-0.3.6/lib/hostelify.rb
|
17
|
+
pkg/hostelify-0.3.6/lib/hostelify/gomio.rb
|
18
|
+
pkg/hostelify-0.3.6/lib/hostelify/hostelbookers.rb
|
19
|
+
pkg/hostelify-0.3.6/lib/hostelify/hostelify.rb
|
20
|
+
pkg/hostelify-0.3.6/lib/hostelify/hostelworld.rb
|
21
|
+
pkg/hostelify-0.3.6/spec/_helper.rb
|
22
|
+
pkg/hostelify-0.3.6/spec/hb_find_by_hostel.spec
|
23
|
+
pkg/hostelify-0.3.6/spec/hb_find_hostels.spec
|
24
|
+
pkg/hostelify-0.3.6/spec/hw_find_by_hostel.spec
|
25
|
+
pkg/hostelify-0.3.6/spec/hw_find_hostels.spec
|
26
|
+
spec/_helper.rb
|
27
|
+
spec/hb_find_by_hostel.spec
|
28
|
+
spec/hb_find_hostels.spec
|
29
|
+
spec/hw_find_by_hostel.spec
|
30
|
+
spec/hw_find_hostels.spec
|
data/README.rdoc
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Nothin yet... still...
|
data/Rakefile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'echoe'
|
4
|
+
|
5
|
+
Echoe.new('hostelify', '0.3.7') do |p|
|
6
|
+
p.description = "Simple Hostel Webscrapper."
|
7
|
+
p.url = "http://github.com/holden/hostelify"
|
8
|
+
p.author = "Holden Thomas"
|
9
|
+
p.email = "holden.thomas@gmail.com"
|
10
|
+
p.ignore_pattern = ["tmp/*", "script/*"]
|
11
|
+
p.development_dependencies = []
|
12
|
+
end
|
13
|
+
|
14
|
+
Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
|
data/hostelify.gemspec
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{hostelify}
|
5
|
+
s.version = "0.3.7"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Holden Thomas"]
|
9
|
+
s.date = %q{2009-10-19}
|
10
|
+
s.description = %q{Simple Hostel Webscrapper.}
|
11
|
+
s.email = %q{holden.thomas@gmail.com}
|
12
|
+
s.extra_rdoc_files = ["README.rdoc", "lib/hostelify.rb", "lib/hostelify/gomio.rb", "lib/hostelify/hostelbookers.rb", "lib/hostelify/hostelify.rb", "lib/hostelify/hostelworld.rb"]
|
13
|
+
s.files = ["Manifest", "README.rdoc", "Rakefile", "hostelify.gemspec", "lib/hostelify.rb", "lib/hostelify/gomio.rb", "lib/hostelify/hostelbookers.rb", "lib/hostelify/hostelify.rb", "lib/hostelify/hostelworld.rb", "pkg/hostelify-0.3.6.gem", "pkg/hostelify-0.3.6.tar.gz", "pkg/hostelify-0.3.6/Manifest", "pkg/hostelify-0.3.6/README.rdoc", "pkg/hostelify-0.3.6/Rakefile", "pkg/hostelify-0.3.6/hostelify.gemspec", "pkg/hostelify-0.3.6/lib/hostelify.rb", "pkg/hostelify-0.3.6/lib/hostelify/gomio.rb", "pkg/hostelify-0.3.6/lib/hostelify/hostelbookers.rb", "pkg/hostelify-0.3.6/lib/hostelify/hostelify.rb", "pkg/hostelify-0.3.6/lib/hostelify/hostelworld.rb", "pkg/hostelify-0.3.6/spec/_helper.rb", "pkg/hostelify-0.3.6/spec/hb_find_by_hostel.spec", "pkg/hostelify-0.3.6/spec/hb_find_hostels.spec", "pkg/hostelify-0.3.6/spec/hw_find_by_hostel.spec", "pkg/hostelify-0.3.6/spec/hw_find_hostels.spec", "spec/_helper.rb", "spec/hb_find_by_hostel.spec", "spec/hb_find_hostels.spec", "spec/hw_find_by_hostel.spec", "spec/hw_find_hostels.spec"]
|
14
|
+
s.homepage = %q{http://github.com/holden/hostelify}
|
15
|
+
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Hostelify", "--main", "README.rdoc"]
|
16
|
+
s.require_paths = ["lib"]
|
17
|
+
s.rubyforge_project = %q{hostelify}
|
18
|
+
s.rubygems_version = %q{1.3.5}
|
19
|
+
s.summary = %q{Simple Hostel Webscrapper.}
|
20
|
+
|
21
|
+
if s.respond_to? :specification_version then
|
22
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
23
|
+
s.specification_version = 3
|
24
|
+
|
25
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
26
|
+
else
|
27
|
+
end
|
28
|
+
else
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'mechanize'
|
3
|
+
require 'hpricot'
|
4
|
+
require 'open-uri'
|
5
|
+
require 'date'
|
6
|
+
require 'htmlentities'
|
7
|
+
|
8
|
+
Hpricot.buffer_size = 262144
|
9
|
+
|
10
|
+
class Gomio
|
11
|
+
|
12
|
+
#constants
|
13
|
+
GOMIO_SINGULAR = "http://www.gomio.com/reservation/chooseBed.aspx?HostelId="
|
14
|
+
GOMIO_PLURAL_HOSTELS_URL = "http://www.gomio.com/hostels/europe/" #europe/poland/krakow/mama%27s%20hostel/overview.htm
|
15
|
+
|
16
|
+
def self.find_hostel_by_id(options)
|
17
|
+
#opts = { :directions => false, :images => false, :all => false }.merge options
|
18
|
+
city = options[:location].split(',').first.gsub(' ','')
|
19
|
+
country = options[:location].split(',').last.gsub(' ','')
|
20
|
+
id = options[:id]
|
21
|
+
url = GOMIO_PLURAL_HOSTELS_URL + "#{country}/#{city}/#{id}/overview.htm"
|
22
|
+
|
23
|
+
#setSearch(url,"2009-09-20",2,7)
|
24
|
+
data = Hpricot(open(url))
|
25
|
+
|
26
|
+
data = data.search('div[@id="main"]')
|
27
|
+
name = data.at("h3/span").inner_text.lstrip.rstrip
|
28
|
+
address = data.at("span.br_address").inner_text.lstrip.rstrip
|
29
|
+
desc = data.at("div.br_detail").inner_text.lstrip.rstrip
|
30
|
+
available1 = data.at("td.HasNoAvail_Outer")
|
31
|
+
available2 = data.at("td.HasNoAvail")
|
32
|
+
puts "#{available1}, #{available2}"
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.find_hostels_by_location(options) #location
|
36
|
+
city = options[:location].split(',').first.gsub(' ','')
|
37
|
+
country = options[:location].split(',').last.gsub(' ','')
|
38
|
+
|
39
|
+
url = GOMIO_PLURAL_HOSTELS_URL + "#{country}/#{city}/search.htm"
|
40
|
+
|
41
|
+
#data = Hpricot(open(url))
|
42
|
+
data = setSearch(url,"2009-09-09",2,5)
|
43
|
+
|
44
|
+
data = data.search("div.SearchResultMembers")
|
45
|
+
|
46
|
+
(data/"div.SearchResultsHostel").each do |row|
|
47
|
+
name = row.at("h3").inner_text.lstrip.rstrip
|
48
|
+
desc = row.at("p").inner_text.lstrip.rstrip
|
49
|
+
url = row.at("h3/a")['href']
|
50
|
+
gomio_id = url.match(/(#{city}).([\d\D]*)(\/overview.htm)/)[2]
|
51
|
+
|
52
|
+
available1 = row/("td.HasNoAvail_Outer/text()").to_a.join(',').split(',')
|
53
|
+
available2 = row/("td.HasNoAvail_Outer/text()").to_a.join(',').split(',')
|
54
|
+
available = available1 + available2
|
55
|
+
|
56
|
+
@results = { :gomio_id => gomio_id, :name => name, :unavailable => available }
|
57
|
+
puts @results
|
58
|
+
end
|
59
|
+
return @results
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.setSearch(url,date,no_ppl,no_days)
|
63
|
+
|
64
|
+
date = Date.strptime(date)
|
65
|
+
month = date.strftime("%m").to_i
|
66
|
+
day = date.strftime("%d").to_i
|
67
|
+
if Time.now.strftime("%y") == date.strftime("%y") then year = 0 else year = 1 end
|
68
|
+
|
69
|
+
agent = WWW::Mechanize.new
|
70
|
+
page = agent.get(url)
|
71
|
+
|
72
|
+
#the form name
|
73
|
+
form = page.forms.first # => WWW::Mechanize::Form
|
74
|
+
#page = agent.submit(form)
|
75
|
+
|
76
|
+
#ctl00_searchbox_sb_ddlMonth
|
77
|
+
#ctl00_searchbox_sb_ddlDay
|
78
|
+
#ctl00_searchbox_sb_ddlYear
|
79
|
+
|
80
|
+
#ctl00_searchbox_sb_ddlNights
|
81
|
+
#ctl00_searchbox_sb_ddlBeds
|
82
|
+
|
83
|
+
form.field_with(:name => 'ctl00$searchbox$sb$ddlMonth').options[month-1].select
|
84
|
+
form.field_with(:name => 'ctl00$searchbox$sb$ddlDay').options[day-1].select
|
85
|
+
form.field_with(:name => 'ctl00$searchbox$sb$ddlYear').options[year].select
|
86
|
+
form.field_with(:name => 'ctl00$searchbox$sb$ddlNights').options[no_days.to_i-1].select
|
87
|
+
form.field_with(:name => 'ctl00$searchbox$sb$ddlBeds').options[no_ppl.to_i-1].select
|
88
|
+
#form.field_with(:id => 'Currency').options[4].select #US Currency
|
89
|
+
|
90
|
+
page = agent.submit(form)
|
91
|
+
data = page.search('//div[@id="main"]')
|
92
|
+
#puts data
|
93
|
+
return data
|
94
|
+
end
|
95
|
+
|
96
|
+
#url = GOMIO_PLURAL_HOSTELS_URL + "poland/krakow/search.htm"
|
97
|
+
|
98
|
+
#Gomio.setSearch(url,"2009-09-20",2,7)
|
99
|
+
Gomio.find_hostels_by_location(:location => "krakow,poland")
|
100
|
+
#Gomio.find_hostel_by_id(:id => "mama's%20hostel", :location => "krakow,poland")
|
101
|
+
|
102
|
+
end
|
@@ -0,0 +1,180 @@
|
|
1
|
+
class Hostelbookers
|
2
|
+
|
3
|
+
#constants
|
4
|
+
HB_SINGULAR_DETAIL_URL = "http://www.hostelbookers.com/hostels/" #poland/krakow/
|
5
|
+
HB_PLURAL_HOSTELS_URL = "http://www.hostelbookers.com/hostels/" #poland/wroclaw/
|
6
|
+
|
7
|
+
#options
|
8
|
+
@default_options = { :date => date=(Date.today+4).to_s, :no_days => "7", :live => true }
|
9
|
+
|
10
|
+
def self.find_hostels_by_location(options) #location
|
11
|
+
city = options[:location].split(',').first.rstrip.lstrip.gsub(' ','-').squeeze("-")
|
12
|
+
country = options[:location].split(',').last.rstrip.lstrip.gsub(' ','-').squeeze("-")
|
13
|
+
|
14
|
+
url = HB_PLURAL_HOSTELS_URL + "#{country}/#{city}"
|
15
|
+
|
16
|
+
if options[:date]
|
17
|
+
options = @default_options.merge(options)
|
18
|
+
date = Date.strptime(options[:date])
|
19
|
+
data = setSearch(url,options[:date],options[:no_days])
|
20
|
+
else
|
21
|
+
Retryable.try 3 do
|
22
|
+
data = Hpricot(open(url))
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
data = data.search("//div[@id='propertyResultsList']")
|
27
|
+
#@results = []
|
28
|
+
@results = HostelifyCollection.new
|
29
|
+
#coder = HTMLEntities.new
|
30
|
+
(data/"tr.propertyRow").each do |row|
|
31
|
+
name = row.at("a.propertyTitle").inner_text
|
32
|
+
url = row.at("a.propertyTitle")['href']
|
33
|
+
desc = row.at("p.shortDescription").inner_text
|
34
|
+
rating = row.at("td.rating/text()")
|
35
|
+
rating = rating.to_s.to_i unless rating.nil?
|
36
|
+
dorm = row.at("td.shared/text()")
|
37
|
+
single = row.at("td.private/text()")
|
38
|
+
hb_id = url.match(/[\d]{2,5}.$/).to_s.to_i
|
39
|
+
|
40
|
+
#@results << Hostelify.new(:hostel_id => hb_id, :name => name, :description => desc, :rating => rating, :dorm => dorm, :single => single)
|
41
|
+
@results << Hostelify.new(:hostel_id => hb_id, :name => name, :description => desc, :rating => rating, :dorm => dorm, :single => single)
|
42
|
+
end
|
43
|
+
return @results
|
44
|
+
end
|
45
|
+
|
46
|
+
def self.find_hostel_by_id(options)
|
47
|
+
city = options[:location].split(',').first.gsub(' ','')
|
48
|
+
country = options[:location].split(',').last.gsub(' ','')
|
49
|
+
id = options[:id]
|
50
|
+
url = HB_SINGULAR_DETAIL_URL + "#{country}/#{city}/#{id}"
|
51
|
+
|
52
|
+
hostel = Hostelify.new
|
53
|
+
|
54
|
+
if options[:date]
|
55
|
+
options = @default_options.merge(options)
|
56
|
+
data = setSearch_id(url,options[:date],options[:no_days])
|
57
|
+
else
|
58
|
+
Retryable.try 3 do
|
59
|
+
data = Hpricot(open(url))
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
hostel.hostel_id = id
|
64
|
+
hostel.name = data.at("h1").inner_text
|
65
|
+
hostel.address = data.at("p.address").inner_text
|
66
|
+
hostel.description = data.at('div[@id="overviewPane"]').inner_text
|
67
|
+
facilities_td = data.at("table.tableFacilities")
|
68
|
+
|
69
|
+
facilities = []
|
70
|
+
(facilities_td/"td").each do |row|
|
71
|
+
facilities << row.inner_text
|
72
|
+
end
|
73
|
+
hostel.facilities = facilities
|
74
|
+
extras = []
|
75
|
+
extras_td = data.at("table.tableFeatures")
|
76
|
+
(extras_td/"td.name").each do |row|
|
77
|
+
extras << "Free " + row.inner_text.to_s
|
78
|
+
end
|
79
|
+
facilities = facilities + extras
|
80
|
+
|
81
|
+
ratings = []
|
82
|
+
ratings_td = data.at('div[@id="overviewIndRtng"]/table')
|
83
|
+
|
84
|
+
(ratings_td/"tr").each do |row|
|
85
|
+
ratings << row.at("td").inner_text.to_s.to_f
|
86
|
+
end
|
87
|
+
|
88
|
+
hostel.ratings = ratings
|
89
|
+
images = []
|
90
|
+
image = data.at('div[@id="propMedia"]/table')
|
91
|
+
(image/"td").each do |row|
|
92
|
+
img = row.at("img")['onclick']
|
93
|
+
if img =~ /(http).*(jpg|gif|png|jpeg)/
|
94
|
+
images << img.match(/(http).*(jpg|gif|png|jpeg)/)[0]
|
95
|
+
else
|
96
|
+
#add youtube?
|
97
|
+
end
|
98
|
+
end
|
99
|
+
hostel.images = images
|
100
|
+
|
101
|
+
if options[:all]
|
102
|
+
data = Hpricot(open(url + "/map"))
|
103
|
+
data.search("h2").remove #get rid of header
|
104
|
+
hostel.directions = data.at('div[@id="directions"]').inner_text
|
105
|
+
hostel.geo = data.to_s.scan(/-{0,1}\d{1,3}\.\d{7}/).uniq!
|
106
|
+
end
|
107
|
+
|
108
|
+
if options[:date]
|
109
|
+
date = Date.strptime(options[:date])
|
110
|
+
@availables = []
|
111
|
+
available = data.at("div.tableAvailability/table")
|
112
|
+
if available
|
113
|
+
(available/"tr").each do |row|
|
114
|
+
name = row.at("td.roomType/label/text()")
|
115
|
+
people = row.at("td.people/select")
|
116
|
+
people = people.at("option:last-child").inner_text unless people.nil?
|
117
|
+
price = row.at("td.price")
|
118
|
+
price = price.inner_text.to_s.match(/[\d.]{1,5}/)[0] unless price.nil?
|
119
|
+
(0..(options[:no_days].to_i-1)).each do |x|
|
120
|
+
#@availables << { :name => name, :spots => people, :price => price, :bookdate => (date+x).to_s } unless price.nil?
|
121
|
+
@availables << HostelifyAvailable.new(name,price,people,(date+x).to_s) unless price.nil?
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
hostel.availability = @availables
|
126
|
+
end
|
127
|
+
|
128
|
+
return hostel
|
129
|
+
end
|
130
|
+
|
131
|
+
def self.setSearch(url,date,no_days)
|
132
|
+
date = Date.strptime(date).strftime("%d/%m/%Y")
|
133
|
+
agent = WWW::Mechanize.new
|
134
|
+
page = agent.get(url)
|
135
|
+
form = page.form_with(:name => 'searchForm') # => WWW::Mechanize::Form
|
136
|
+
form.field_with(:name => 'intnights').options[no_days.to_i-1].select
|
137
|
+
form.dtearrival = date #d/m/y
|
138
|
+
|
139
|
+
Retryable.try 3 do
|
140
|
+
page = agent.submit(form)
|
141
|
+
end
|
142
|
+
|
143
|
+
#to dollars!
|
144
|
+
form = page.forms[0]
|
145
|
+
form.field_with(:name => 'strSelectedCurrencyCode').options[5].select
|
146
|
+
|
147
|
+
Retryable.try 3 do
|
148
|
+
page = agent.submit(form)
|
149
|
+
end
|
150
|
+
|
151
|
+
data = page.search('//div[@id="content"]')
|
152
|
+
|
153
|
+
return data
|
154
|
+
end
|
155
|
+
|
156
|
+
def self.setSearch_id(url,date,no_days)
|
157
|
+
date = Date.strptime(date).strftime("%d/%m/%Y")
|
158
|
+
agent = WWW::Mechanize.new
|
159
|
+
page = agent.get(url)
|
160
|
+
form = page.form_with(:name => 'frmCheckAvailBook') # => WWW::Mechanize::Form
|
161
|
+
form.field_with(:name => 'intNights').options[no_days.to_i-1].select
|
162
|
+
form.dteArrival = date #d/m/y
|
163
|
+
|
164
|
+
Retryable.try 3 do
|
165
|
+
page = agent.submit(form)
|
166
|
+
end
|
167
|
+
#change currency to dollars
|
168
|
+
form = page.forms[1]
|
169
|
+
#puts form.name
|
170
|
+
form.field_with(:name => 'strSelectedCurrencyCode').options[5].select
|
171
|
+
|
172
|
+
Retryable.try 3 do
|
173
|
+
page = agent.submit(form)
|
174
|
+
end
|
175
|
+
data = page.search('//div[@id="content"]')
|
176
|
+
|
177
|
+
return data
|
178
|
+
end
|
179
|
+
|
180
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
class Hostelify
|
2
|
+
attr_accessor :hostel_id, :name, :address, :description, :facilities, :ratings, :directions, :geo, :images, :video, :availability
|
3
|
+
attr_accessor :rating, :dorm, :single, :unavailable
|
4
|
+
|
5
|
+
def initialize(options = {})
|
6
|
+
options.each {
|
7
|
+
|k,v|
|
8
|
+
self.send( "#{k.to_s}=".intern, v)
|
9
|
+
}
|
10
|
+
end
|
11
|
+
|
12
|
+
end
|
13
|
+
|
14
|
+
class HostelifyCollection < Array
|
15
|
+
# This collection does everything an Array does, plus
|
16
|
+
# you can add utility methods like names.
|
17
|
+
|
18
|
+
def ids
|
19
|
+
collect do |i|
|
20
|
+
i.hostel_id
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def names
|
25
|
+
collect do |i|
|
26
|
+
i.name
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def descs
|
31
|
+
collect do |i|
|
32
|
+
i.description
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
class HostelifyAvailable
|
39
|
+
attr_accessor :name, :price, :spots, :bookdate
|
40
|
+
|
41
|
+
def initialize(name, price, spots, bookdate)
|
42
|
+
@name = name
|
43
|
+
@price = price
|
44
|
+
@spots = spots
|
45
|
+
@bookdate = bookdate
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
|
50
|
+
module Retryable
|
51
|
+
extend self
|
52
|
+
|
53
|
+
def try times = 1, options = {}, &block
|
54
|
+
val = yield
|
55
|
+
rescue options[:on] || Exception
|
56
|
+
retry if (times -= 1) > 0
|
57
|
+
else
|
58
|
+
val
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
@@ -0,0 +1,265 @@
|
|
1
|
+
class Hostelworld
|
2
|
+
|
3
|
+
#constants
|
4
|
+
#location list includes/indexjs.js
|
5
|
+
HW_SINGULAR_DETAIL_URL = "http://www.hostelworld.com/hosteldetails.php?HostelNumber="
|
6
|
+
HW_SINGULAR_IMAGE_URL = "http://www.hostelworld.com/hostelpictures.php?HostelNumber="
|
7
|
+
HW_SINGULAR_AVAILABILITY = "http://www.hostelworld.com/availability.php/"
|
8
|
+
HW_SINGULAR_YOUTUBE_URL = "http://www.hostelworld.com/youtubevideo.php?HostelNumber="
|
9
|
+
HW_PLURAL_HOSTELS_URL = "http://www.hostelworld.com/findabed.php/"
|
10
|
+
|
11
|
+
#options
|
12
|
+
@default_options = { :date => date=(Date.today+4).to_s, :no_days => "7", :no_ppl => "2" }
|
13
|
+
|
14
|
+
def self.parse_html(url)
|
15
|
+
f = open(url)
|
16
|
+
f.rewind
|
17
|
+
Retryable.try 3 do
|
18
|
+
data = Hpricot(Iconv.conv('utf-8', f.charset, f.readlines.join("\n")))
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.find_hostel_by_id(options)
|
23
|
+
opts = { :directions => false, :images => false, :all => false }.merge options
|
24
|
+
id = options[:id].to_s
|
25
|
+
url = HW_SINGULAR_DETAIL_URL + id
|
26
|
+
|
27
|
+
#coder = HTMLEntities.new
|
28
|
+
hostel = Hostelify.new
|
29
|
+
hostel.hostel_id = id
|
30
|
+
|
31
|
+
if options[:date]
|
32
|
+
options = @default_options.merge(options)
|
33
|
+
date = Date.strptime(options[:date])
|
34
|
+
data = setSearch(url, options[:date], options[:no_ppl], options[:no_days])
|
35
|
+
else
|
36
|
+
data = parse_html(url)
|
37
|
+
end
|
38
|
+
|
39
|
+
unless data == "Full"
|
40
|
+
data = data.search("//div[@id='content']")
|
41
|
+
data.search("h3").remove #get rid of header
|
42
|
+
|
43
|
+
#title, address, desc, facilities, ratings
|
44
|
+
hostel.name = data.at("h2").inner_text.gsub(/( in ).*$/,'')
|
45
|
+
hostel.address = data.at('div[@style="padding-top: 5px"]').inner_text.lstrip
|
46
|
+
|
47
|
+
if options[:date]
|
48
|
+
hostel.availability = parse_availables(data)
|
49
|
+
else
|
50
|
+
hostel.description = data.at('div[@id="microDescription2]').inner_text
|
51
|
+
end
|
52
|
+
|
53
|
+
#optional
|
54
|
+
no_photos = data.at('div[@id="microPicScroll"]/span/a').inner_text.to_i
|
55
|
+
#no_photos = data.at('span/a[@id="picLink"]').inner_text.to_i
|
56
|
+
video = data.at('div[@id="microVideo"]')
|
57
|
+
|
58
|
+
#facilities = []
|
59
|
+
#(data/"li.microFacilitiesBoomLi").each do |item|
|
60
|
+
# facilities << item.inner_text
|
61
|
+
#end
|
62
|
+
|
63
|
+
facilities = []
|
64
|
+
index_count = 1
|
65
|
+
(data/"ul.microFacilitiesBoomUl/li").each_with_index do |item,index|
|
66
|
+
if item.attributes['class'] == "microFacilitiesBoomLiInner"
|
67
|
+
facilities << "#{facilities[index-index_count]}: " + item.inner_text
|
68
|
+
index_count += 1
|
69
|
+
else
|
70
|
+
index_count = 1
|
71
|
+
facilities << item.inner_text
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
ratings = []
|
76
|
+
(data/'div[@id="ratingsBar2"]').each do |item|
|
77
|
+
ratings << item.inner_text.to_i
|
78
|
+
end
|
79
|
+
|
80
|
+
hostel.facilities = facilities
|
81
|
+
hostel.ratings = ratings
|
82
|
+
|
83
|
+
if video #exists
|
84
|
+
data = parse_html(HW_SINGULAR_YOUTUBE_URL + id)
|
85
|
+
video_url = data.at('param[@name="movie"]')['value']
|
86
|
+
hostel.video = video_url
|
87
|
+
#video_url = data.at('tag')
|
88
|
+
end
|
89
|
+
|
90
|
+
if options[:directions] or options[:all]
|
91
|
+
data = parse_html(HW_SINGULAR_DETAIL_URL + id + "/directions/")
|
92
|
+
|
93
|
+
#directions, geo
|
94
|
+
hostel.directions = data.at('div[@id="content"]').inner_text.gsub(/^[\d\D\n]*(DIRECTIONS)/,'')
|
95
|
+
hostel.geo = data.to_s.scan(/-{0,1}\d{1,3}\.\d{7}/).uniq!
|
96
|
+
end
|
97
|
+
|
98
|
+
if no_photos and (options[:images] or options[:all])
|
99
|
+
images = []
|
100
|
+
(1..no_photos).each do |i|
|
101
|
+
data = parse_html(HW_SINGULAR_IMAGE_URL + id + '&PicNO=' + i.to_s)
|
102
|
+
images << (data/"img").first[:src].to_s
|
103
|
+
end
|
104
|
+
hostel.images = images
|
105
|
+
end
|
106
|
+
else
|
107
|
+
hostel = nil
|
108
|
+
end
|
109
|
+
hostel # return
|
110
|
+
end
|
111
|
+
|
112
|
+
def self.find_hostels_by_location(options) #location
|
113
|
+
|
114
|
+
city = options[:location].split(',').first.gsub(' ','')
|
115
|
+
country = options[:location].split(',').last.gsub(' ','')
|
116
|
+
url = HW_PLURAL_HOSTELS_URL + "ChosenCity.#{city}/ChosenCountry.#{country}"
|
117
|
+
|
118
|
+
if options[:date]
|
119
|
+
options = @default_options.merge(options)
|
120
|
+
date = Date.strptime(options[:date])
|
121
|
+
data = setSearch2(url, options[:date], options[:no_ppl], options[:no_days])
|
122
|
+
else
|
123
|
+
data = parse_html(url)
|
124
|
+
end
|
125
|
+
|
126
|
+
data = data.search("//div[@id='content']")
|
127
|
+
@results = HostelifyCollection.new
|
128
|
+
|
129
|
+
(data/"div.hostelListing").each do |row|
|
130
|
+
name = row.at("h3").inner_text
|
131
|
+
desc = row.at("div.hostelEntry/p").inner_text.to_s.chop.gsub('more info','').squeeze('.')
|
132
|
+
url = row.at("h3/a")['href']
|
133
|
+
rating = row.at("h4/text()")
|
134
|
+
rating = rating.to_s.to_i unless rating.nil?
|
135
|
+
type = row.at("div.hostelListingImage/span").inner_text
|
136
|
+
hostel_id = url.match(/[\d]*$/).to_s
|
137
|
+
|
138
|
+
if options[:date]
|
139
|
+
#price_USD = row.at("span.blueBeds").inner_text #need to fix float
|
140
|
+
dorm = (row.at("p.hostelListingRate/span.blueBeds/text()")).to_s.gsub(/[A-Z$]*/,'')
|
141
|
+
single = row.at("p.hostelListingPrivateRate/span.blueBeds/text()").to_s.gsub(/[A-Z$]*/,'')
|
142
|
+
available = row/"ul.hostelListingDates/li.noAvail/text()"
|
143
|
+
available = available.to_a.join(',').split(',')
|
144
|
+
@results << Hostelify.new(:hostel_id => hostel_id, :name => name, :description => desc, :rating => rating, :dorm => dorm, :single => single, :unavailable => available)
|
145
|
+
else
|
146
|
+
@results << Hostelify.new(:hostel_id => hostel_id, :name => name, :description => desc, :rating => rating)
|
147
|
+
end
|
148
|
+
end
|
149
|
+
return @results
|
150
|
+
end
|
151
|
+
|
152
|
+
private
|
153
|
+
|
154
|
+
def self.setSearch(url,date,no_ppl,no_days)
|
155
|
+
|
156
|
+
date = Date.strptime(date)
|
157
|
+
month = date.strftime("%m").to_i
|
158
|
+
day = date.strftime("%d").to_i
|
159
|
+
if Time.now.strftime("%y") == date.strftime("%y") then year = 0 else year = 1 end
|
160
|
+
|
161
|
+
agent = WWW::Mechanize.new
|
162
|
+
page = agent.get(url)
|
163
|
+
|
164
|
+
#the form name
|
165
|
+
#form = page.forms.first # => WWW::Mechanize::Form
|
166
|
+
form = page.form_with(:name => 'theForm')
|
167
|
+
|
168
|
+
#page = agent.submit(form)
|
169
|
+
|
170
|
+
#form must be submitted twice because the people writing hostelworld are retards
|
171
|
+
#form = page.forms.first # => WWW::Mechanize::Form
|
172
|
+
#form = page.form_with(:name => 'theForm')
|
173
|
+
form.field_with(:name => 'selMonth2').options[month-1].select
|
174
|
+
form.field_with(:name => 'selDay2').options[day-1].select
|
175
|
+
form.field_with(:name => 'selYear2').options[year].select
|
176
|
+
#form.field_with(:name => { 0 => 'NumNights' }).options[no_days.to_i-1].select
|
177
|
+
my_fields = form.fields.select {|f| f.name == "NumNights"}
|
178
|
+
my_fields[1].value = no_days.to_i
|
179
|
+
#form.my_fields[1].whatever = "value"
|
180
|
+
#form.field_with(:name => 'Persons').options[no_ppl.to_i-1].select
|
181
|
+
#form.field_with(:name => 'Currency').options[4].select #US Currency
|
182
|
+
|
183
|
+
|
184
|
+
Retryable.try 3 do
|
185
|
+
page = agent.submit(form, form.button_with(:name => 'DateSelect'))
|
186
|
+
end
|
187
|
+
|
188
|
+
error = page.search("div.microBookingError2")
|
189
|
+
|
190
|
+
if error.to_s.length > 1
|
191
|
+
data = "Full"
|
192
|
+
else
|
193
|
+
data = page.search("//div[@id='content']")
|
194
|
+
end
|
195
|
+
|
196
|
+
return data
|
197
|
+
end
|
198
|
+
|
199
|
+
def self.setSearch2(url,date,no_ppl,no_days)
|
200
|
+
|
201
|
+
date = Date.strptime(date)
|
202
|
+
month = date.strftime("%m").to_i
|
203
|
+
day = date.strftime("%d").to_i
|
204
|
+
if Time.now.strftime("%y") == date.strftime("%y") then year = 0 else year = 1 end
|
205
|
+
|
206
|
+
agent = WWW::Mechanize.new
|
207
|
+
page = agent.get(url)
|
208
|
+
|
209
|
+
#the form name
|
210
|
+
#form = page.forms.first # => WWW::Mechanize::Form
|
211
|
+
form = page.form_with(:name => 'theForm')
|
212
|
+
|
213
|
+
#page = agent.submit(form)
|
214
|
+
|
215
|
+
#form must be submitted twice because the people writing hostelworld are retards
|
216
|
+
|
217
|
+
form.field_with(:name => 'selMonth').options[month-1].select
|
218
|
+
form.field_with(:name => 'selDay').options[day-1].select
|
219
|
+
form.field_with(:name => 'selYear').options[year].select
|
220
|
+
form.field_with(:name => 'NumNights').options[no_days.to_i-1].select
|
221
|
+
form.field_with(:name => 'Persons').options[no_ppl.to_i-1].select
|
222
|
+
form.field_with(:name => 'Currency').options[4].select #US Currency
|
223
|
+
|
224
|
+
Retryable.try 3 do
|
225
|
+
page = agent.submit(form)
|
226
|
+
end
|
227
|
+
data = page.search("//div[@id='content']")
|
228
|
+
return data
|
229
|
+
end
|
230
|
+
|
231
|
+
def self.parse_availables(info)
|
232
|
+
|
233
|
+
availability = info.at('table[@id="tableDatesSelected2"]')
|
234
|
+
availability.search("div").remove
|
235
|
+
availability.search("span.hwRoomTypeDesc").remove
|
236
|
+
|
237
|
+
availables = []
|
238
|
+
|
239
|
+
(availability/"tr").each do |row|
|
240
|
+
name = (row/"td").first
|
241
|
+
name = name.inner_text unless name.nil?
|
242
|
+
|
243
|
+
(row/"td").each do |td|
|
244
|
+
night = td.attributes['title']
|
245
|
+
if night
|
246
|
+
price = night.to_s.match(/([\d]{1,3}).([\d]{2})/).to_s
|
247
|
+
available = night.to_s.match(/(available: )([\d]*)/)
|
248
|
+
date = night.to_s.match(/(Date: ).*$/).to_s.gsub(/(Date: )|(th)|(nd)|(rd)|(st)/,'')
|
249
|
+
date = Date.strptime(date, "%a %d %b '%y")
|
250
|
+
|
251
|
+
if available
|
252
|
+
beds = available.to_s.match(/[\d]{1,2}/)[0]
|
253
|
+
availables << HostelifyAvailable.new(name,price,beds,date)
|
254
|
+
else
|
255
|
+
availables << HostelifyAvailable.new(name,price,0,date)
|
256
|
+
end
|
257
|
+
end
|
258
|
+
end
|
259
|
+
end
|
260
|
+
return availables
|
261
|
+
|
262
|
+
end
|
263
|
+
|
264
|
+
|
265
|
+
end
|