holden-hostelify 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Manifest +15 -0
- data/README.rdoc +1 -0
- data/Rakefile +14 -0
- data/hostelify.gemspec +31 -0
- data/lib/hostel/gomio.rb +102 -0
- data/lib/hostel/hostel.rb +11 -0
- data/lib/hostel/hostel_available.rb +11 -0
- data/lib/hostel/hostelbookers.rb +164 -0
- data/lib/hostel/hostelworld.rb +203 -0
- data/lib/hostel.rb +11 -0
- data/lib/test.rb +5 -0
- data/spec/_helper.rb +4 -0
- data/spec/hb_find_by_hostel.spec +72 -0
- data/spec/hb_find_hostels.spec +30 -0
- data/spec/hw_find_by_hostel.spec +84 -0
- data/spec/hw_find_hostels.spec +62 -0
- metadata +80 -0
data/Manifest
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
Manifest
|
2
|
+
README.rdoc
|
3
|
+
Rakefile
|
4
|
+
lib/hostel.rb
|
5
|
+
lib/hostel/gomio.rb
|
6
|
+
lib/hostel/hostel.rb
|
7
|
+
lib/hostel/hostel_available.rb
|
8
|
+
lib/hostel/hostelbookers.rb
|
9
|
+
lib/hostel/hostelworld.rb
|
10
|
+
lib/test.rb
|
11
|
+
spec/_helper.rb
|
12
|
+
spec/hb_find_by_hostel.spec
|
13
|
+
spec/hb_find_hostels.spec
|
14
|
+
spec/hw_find_by_hostel.spec
|
15
|
+
spec/hw_find_hostels.spec
|
data/README.rdoc
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Nothin yet... still...
|
data/Rakefile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'echoe'
|
4
|
+
|
5
|
+
Echoe.new('hostelify', '0.2.0') do |p|
|
6
|
+
p.description = "Simple Hostel Webscrapper."
|
7
|
+
p.url = "http://github.com/holden/hostelify"
|
8
|
+
p.author = "Holden Thomas"
|
9
|
+
p.email = "holden.thomas@gmail.com"
|
10
|
+
p.ignore_pattern = ["tmp/*", "script/*"]
|
11
|
+
p.development_dependencies = []
|
12
|
+
end
|
13
|
+
|
14
|
+
Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
|
data/hostelify.gemspec
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{hostelify}
|
5
|
+
s.version = "0.2.0"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Holden Thomas"]
|
9
|
+
s.date = %q{2009-09-09}
|
10
|
+
s.description = %q{Simple Hostel Webscrapper.}
|
11
|
+
s.email = %q{holden.thomas@gmail.com}
|
12
|
+
s.extra_rdoc_files = ["README.rdoc", "lib/hostel.rb", "lib/hostel/gomio.rb", "lib/hostel/hostel.rb", "lib/hostel/hostel_available.rb", "lib/hostel/hostelbookers.rb", "lib/hostel/hostelworld.rb", "lib/test.rb"]
|
13
|
+
s.files = ["Manifest", "README.rdoc", "Rakefile", "lib/hostel.rb", "lib/hostel/gomio.rb", "lib/hostel/hostel.rb", "lib/hostel/hostel_available.rb", "lib/hostel/hostelbookers.rb", "lib/hostel/hostelworld.rb", "lib/test.rb", "spec/_helper.rb", "spec/hb_find_by_hostel.spec", "spec/hb_find_hostels.spec", "spec/hw_find_by_hostel.spec", "spec/hw_find_hostels.spec", "hostelify.gemspec"]
|
14
|
+
s.has_rdoc = true
|
15
|
+
s.homepage = %q{http://github.com/holden/hostelify}
|
16
|
+
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Hostelify", "--main", "README.rdoc"]
|
17
|
+
s.require_paths = ["lib"]
|
18
|
+
s.rubyforge_project = %q{hostelify}
|
19
|
+
s.rubygems_version = %q{1.3.1}
|
20
|
+
s.summary = %q{Simple Hostel Webscrapper.}
|
21
|
+
|
22
|
+
if s.respond_to? :specification_version then
|
23
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
24
|
+
s.specification_version = 2
|
25
|
+
|
26
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
27
|
+
else
|
28
|
+
end
|
29
|
+
else
|
30
|
+
end
|
31
|
+
end
|
data/lib/hostel/gomio.rb
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'mechanize'
|
3
|
+
require 'hpricot'
|
4
|
+
require 'open-uri'
|
5
|
+
require 'date'
|
6
|
+
require 'htmlentities'
|
7
|
+
|
8
|
+
Hpricot.buffer_size = 262144
|
9
|
+
|
10
|
+
class Gomio
|
11
|
+
|
12
|
+
#constants
|
13
|
+
GOMIO_SINGULAR = "http://www.gomio.com/reservation/chooseBed.aspx?HostelId="
|
14
|
+
GOMIO_PLURAL_HOSTELS_URL = "http://www.gomio.com/hostels/europe/" #europe/poland/krakow/mama%27s%20hostel/overview.htm
|
15
|
+
|
16
|
+
def self.find_hostel_by_id(options)
|
17
|
+
#opts = { :directions => false, :images => false, :all => false }.merge options
|
18
|
+
city = options[:location].split(',').first.gsub(' ','')
|
19
|
+
country = options[:location].split(',').last.gsub(' ','')
|
20
|
+
id = options[:id]
|
21
|
+
url = GOMIO_PLURAL_HOSTELS_URL + "#{country}/#{city}/#{id}/overview.htm"
|
22
|
+
|
23
|
+
#setSearch(url,"2009-09-20",2,7)
|
24
|
+
data = Hpricot(open(url))
|
25
|
+
|
26
|
+
data = data.search('div[@id="main"]')
|
27
|
+
name = data.at("h3/span").inner_text.lstrip.rstrip
|
28
|
+
address = data.at("span.br_address").inner_text.lstrip.rstrip
|
29
|
+
desc = data.at("div.br_detail").inner_text.lstrip.rstrip
|
30
|
+
available1 = data.at("td.HasNoAvail_Outer")
|
31
|
+
available2 = data.at("td.HasNoAvail")
|
32
|
+
puts "#{available1}, #{available2}"
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.find_hostels_by_location(options) #location
|
36
|
+
city = options[:location].split(',').first.gsub(' ','')
|
37
|
+
country = options[:location].split(',').last.gsub(' ','')
|
38
|
+
|
39
|
+
url = GOMIO_PLURAL_HOSTELS_URL + "#{country}/#{city}/search.htm"
|
40
|
+
|
41
|
+
#data = Hpricot(open(url))
|
42
|
+
data = setSearch(url,"2009-09-09",2,5)
|
43
|
+
|
44
|
+
data = data.search("div.SearchResultMembers")
|
45
|
+
|
46
|
+
(data/"div.SearchResultsHostel").each do |row|
|
47
|
+
name = row.at("h3").inner_text.lstrip.rstrip
|
48
|
+
desc = row.at("p").inner_text.lstrip.rstrip
|
49
|
+
url = row.at("h3/a")['href']
|
50
|
+
gomio_id = url.match(/(#{city}).([\d\D]*)(\/overview.htm)/)[2]
|
51
|
+
|
52
|
+
available1 = row/("td.HasNoAvail_Outer/text()").to_a.join(',').split(',')
|
53
|
+
available2 = row/("td.HasNoAvail_Outer/text()").to_a.join(',').split(',')
|
54
|
+
available = available1 + available2
|
55
|
+
|
56
|
+
@results = { :gomio_id => gomio_id, :name => name, :unavailable => available }
|
57
|
+
puts @results
|
58
|
+
end
|
59
|
+
return @results
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.setSearch(url,date,no_ppl,no_days)
|
63
|
+
|
64
|
+
date = Date.strptime(date)
|
65
|
+
month = date.strftime("%m").to_i
|
66
|
+
day = date.strftime("%d").to_i
|
67
|
+
if Time.now.strftime("%y") == date.strftime("%y") then year = 0 else year = 1 end
|
68
|
+
|
69
|
+
agent = WWW::Mechanize.new
|
70
|
+
page = agent.get(url)
|
71
|
+
|
72
|
+
#the form name
|
73
|
+
form = page.forms.first # => WWW::Mechanize::Form
|
74
|
+
#page = agent.submit(form)
|
75
|
+
|
76
|
+
#ctl00_searchbox_sb_ddlMonth
|
77
|
+
#ctl00_searchbox_sb_ddlDay
|
78
|
+
#ctl00_searchbox_sb_ddlYear
|
79
|
+
|
80
|
+
#ctl00_searchbox_sb_ddlNights
|
81
|
+
#ctl00_searchbox_sb_ddlBeds
|
82
|
+
|
83
|
+
form.field_with(:name => 'ctl00$searchbox$sb$ddlMonth').options[month-1].select
|
84
|
+
form.field_with(:name => 'ctl00$searchbox$sb$ddlDay').options[day-1].select
|
85
|
+
form.field_with(:name => 'ctl00$searchbox$sb$ddlYear').options[year].select
|
86
|
+
form.field_with(:name => 'ctl00$searchbox$sb$ddlNights').options[no_days.to_i-1].select
|
87
|
+
form.field_with(:name => 'ctl00$searchbox$sb$ddlBeds').options[no_ppl.to_i-1].select
|
88
|
+
#form.field_with(:id => 'Currency').options[4].select #US Currency
|
89
|
+
|
90
|
+
page = agent.submit(form)
|
91
|
+
data = page.search('//div[@id="main"]')
|
92
|
+
#puts data
|
93
|
+
return data
|
94
|
+
end
|
95
|
+
|
96
|
+
#url = GOMIO_PLURAL_HOSTELS_URL + "poland/krakow/search.htm"
|
97
|
+
|
98
|
+
#Gomio.setSearch(url,"2009-09-20",2,7)
|
99
|
+
Gomio.find_hostels_by_location(:location => "krakow,poland")
|
100
|
+
#Gomio.find_hostel_by_id(:id => "mama's%20hostel", :location => "krakow,poland")
|
101
|
+
|
102
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
class Hostel
|
2
|
+
attr_accessor :hostel_id, :name, :address, :description, :facilities, :ratings, :directions, :geo, :images, :video, :availability, :price
|
3
|
+
|
4
|
+
def initialize(options = {})
|
5
|
+
options.each {
|
6
|
+
|k,v|
|
7
|
+
self.send( "#{k.to_s}=".intern, v)
|
8
|
+
}
|
9
|
+
end
|
10
|
+
|
11
|
+
end
|
@@ -0,0 +1,164 @@
|
|
1
|
+
class Hostelbookers
|
2
|
+
|
3
|
+
#constants
|
4
|
+
HB_SINGULAR_DETAIL_URL = "http://www.hostelbookers.com/hostels/" #poland/krakow/
|
5
|
+
HB_PLURAL_HOSTELS_URL = "http://www.hostelbookers.com/hostels/" #poland/wroclaw/
|
6
|
+
|
7
|
+
#options
|
8
|
+
@default_options = { :date => date=(Date.today+4).to_s, :no_days => "7", :live => true }
|
9
|
+
|
10
|
+
def self.find_hostels_by_location(options) #location
|
11
|
+
|
12
|
+
city = options[:location].split(',').first.gsub(' ','')
|
13
|
+
country = options[:location].split(',').last.gsub(' ','')
|
14
|
+
url = HB_PLURAL_HOSTELS_URL + "#{country}/#{city}"
|
15
|
+
|
16
|
+
if options[:date]
|
17
|
+
options = @default_options.merge(options)
|
18
|
+
date = Date.strptime(options[:date])
|
19
|
+
data = setSearch(url,options[:date],options[:no_days])
|
20
|
+
else
|
21
|
+
data = Hpricot(open(url))
|
22
|
+
end
|
23
|
+
|
24
|
+
data = data.search("//div[@id='propertyResultsList']")
|
25
|
+
@results = []
|
26
|
+
#coder = HTMLEntities.new
|
27
|
+
(data/"tr.propertyRow").each do |row|
|
28
|
+
name = row.at("a.propertyTitle").inner_text
|
29
|
+
url = row.at("a.propertyTitle")['href']
|
30
|
+
desc = row.at("p.shortDescription").inner_text
|
31
|
+
rating = row.at("td.rating/text()")
|
32
|
+
rating = rating.to_s.to_i unless rating.nil?
|
33
|
+
dorm = row.at("td.shared/text()")
|
34
|
+
single = row.at("td.private/text()")
|
35
|
+
hb_id = url.match(/[\d]{2,5}.$/).to_s.to_i
|
36
|
+
|
37
|
+
@results << Hostel.new(:hostel_id => hb_id, :name => name, :description => desc, :ratings => rating, :price => dorm)
|
38
|
+
end
|
39
|
+
return @results
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.find_hostel_by_id(options)
|
43
|
+
city = options[:location].split(',').first.gsub(' ','')
|
44
|
+
country = options[:location].split(',').last.gsub(' ','')
|
45
|
+
id = options[:id]
|
46
|
+
url = HB_SINGULAR_DETAIL_URL + "#{country}/#{city}/#{id}"
|
47
|
+
|
48
|
+
hostel = Hostel.new
|
49
|
+
|
50
|
+
if options[:date]
|
51
|
+
options = @default_options.merge(options)
|
52
|
+
data = setSearch_id(url,options[:date],options[:no_days])
|
53
|
+
else
|
54
|
+
data = Hpricot(open(url))
|
55
|
+
end
|
56
|
+
|
57
|
+
hostel.hostel_id = id
|
58
|
+
hostel.name = data.at("h1").inner_text
|
59
|
+
hostel.address = data.at("p.address").inner_text
|
60
|
+
hostel.description = data.at('div[@id="overviewPane"]').inner_text
|
61
|
+
facilities_td = data.at("table.tableFacilities")
|
62
|
+
|
63
|
+
facilities = []
|
64
|
+
(facilities_td/"td").each do |row|
|
65
|
+
facilities << row.inner_text
|
66
|
+
end
|
67
|
+
hostel.facilities = facilities
|
68
|
+
extras = []
|
69
|
+
extras_td = data.at("table.tableFeatures")
|
70
|
+
(extras_td/"td.name").each do |row|
|
71
|
+
extras << "Free " + row.inner_text.to_s
|
72
|
+
end
|
73
|
+
facilities = facilities + extras
|
74
|
+
|
75
|
+
ratings = []
|
76
|
+
ratings_td = data.at('div[@id="overviewIndRtng"]/table')
|
77
|
+
|
78
|
+
(ratings_td/"tr").each do |row|
|
79
|
+
ratings << row.at("td").inner_text.to_s.to_f
|
80
|
+
end
|
81
|
+
|
82
|
+
hostel.ratings = ratings
|
83
|
+
images = []
|
84
|
+
image = data.at('div[@id="propMedia"]/table')
|
85
|
+
(image/"td").each do |row|
|
86
|
+
img = row.at("img")['onclick']
|
87
|
+
if img =~ /(http).*(jpg|gif|png|jpeg)/
|
88
|
+
images << img.match(/(http).*(jpg|gif|png|jpeg)/)[0]
|
89
|
+
else
|
90
|
+
#add youtube?
|
91
|
+
end
|
92
|
+
end
|
93
|
+
hostel.images = images
|
94
|
+
|
95
|
+
if options[:all]
|
96
|
+
data = Hpricot(open(url + "/map"))
|
97
|
+
data.search("h2").remove #get rid of header
|
98
|
+
hostel.directions = data.at('div[@id="directions"]').inner_text
|
99
|
+
hostel.geo = data.to_s.scan(/-{0,1}\d{1,3}\.\d{7}/).uniq!
|
100
|
+
end
|
101
|
+
|
102
|
+
if options[:date]
|
103
|
+
date = Date.strptime(options[:date])
|
104
|
+
@availables = []
|
105
|
+
available = data.at("div.tableAvailability/table")
|
106
|
+
if available
|
107
|
+
(available/"tr").each do |row|
|
108
|
+
name = row.at("td.roomType/label/text()")
|
109
|
+
people = row.at("td.people/select")
|
110
|
+
people = people.at("option:last-child").inner_text unless people.nil?
|
111
|
+
price = row.at("td.price")
|
112
|
+
price = price.inner_text.to_s.match(/[\d.]{1,5}/)[0] unless price.nil?
|
113
|
+
(0..(options[:no_days].to_i-1)).each do |x|
|
114
|
+
#@availables << { :name => name, :spots => people, :price => price, :bookdate => (date+x).to_s } unless price.nil?
|
115
|
+
@availables << HostelAvailable.new(name,price,people,(date+x).to_s) unless price.nil?
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
hostel.availability = @availables
|
120
|
+
end
|
121
|
+
|
122
|
+
hostel
|
123
|
+
end
|
124
|
+
|
125
|
+
def self.setSearch(url,date,no_days)
|
126
|
+
date = Date.strptime(date).strftime("%d/%m/%Y")
|
127
|
+
agent = WWW::Mechanize.new
|
128
|
+
page = agent.get(url)
|
129
|
+
form = page.form_with(:name => 'searchForm') # => WWW::Mechanize::Form
|
130
|
+
form.field_with(:name => 'intnights').options[no_days.to_i-1].select
|
131
|
+
form.dtearrival = date #d/m/y
|
132
|
+
page = agent.submit(form)
|
133
|
+
|
134
|
+
#to dollars!
|
135
|
+
form = page.forms[0]
|
136
|
+
form.field_with(:name => 'strSelectedCurrencyCode').options[5].select
|
137
|
+
page = agent.submit(form)
|
138
|
+
|
139
|
+
data = page.search('//div[@id="content"]')
|
140
|
+
|
141
|
+
return data
|
142
|
+
end
|
143
|
+
|
144
|
+
def self.setSearch_id(url,date,no_days)
|
145
|
+
date = Date.strptime(date).strftime("%d/%m/%Y")
|
146
|
+
agent = WWW::Mechanize.new
|
147
|
+
page = agent.get(url)
|
148
|
+
form = page.form_with(:name => 'frmCheckAvailBook') # => WWW::Mechanize::Form
|
149
|
+
form.field_with(:name => 'intNights').options[no_days.to_i-1].select
|
150
|
+
form.dteArrival = date #d/m/y
|
151
|
+
page = agent.submit(form)
|
152
|
+
|
153
|
+
#change currency to dollars
|
154
|
+
form = page.forms[1]
|
155
|
+
#puts form.name
|
156
|
+
form.field_with(:name => 'strSelectedCurrencyCode').options[5].select
|
157
|
+
page = agent.submit(form)
|
158
|
+
|
159
|
+
data = page.search('//div[@id="content"]')
|
160
|
+
|
161
|
+
return data
|
162
|
+
end
|
163
|
+
|
164
|
+
end
|
@@ -0,0 +1,203 @@
|
|
1
|
+
class Hostelworld
|
2
|
+
|
3
|
+
#constants
|
4
|
+
#location list includes/indexjs.js
|
5
|
+
HW_SINGULAR_DETAIL_URL = "http://www.hostelworld.com/hosteldetails.php?HostelNumber="
|
6
|
+
HW_SINGULAR_IMAGE_URL = "http://www.hostelworld.com/hostelpictures.php?HostelNumber="
|
7
|
+
HW_SINGULAR_AVAILABILITY = "http://www.hostelworld.com/availability.php/"
|
8
|
+
HW_SINGULAR_YOUTUBE_URL = "http://www.hostelworld.com/youtubevideo.php?HostelNumber="
|
9
|
+
HW_PLURAL_HOSTELS_URL = "http://www.hostelworld.com/findabed.php/"
|
10
|
+
|
11
|
+
#options
|
12
|
+
@default_options = { :date => date=(Date.today+4).to_s, :no_days => "7", :no_ppl => "2" }
|
13
|
+
|
14
|
+
def self.parse_html(url)
|
15
|
+
f = open(url)
|
16
|
+
f.rewind
|
17
|
+
data = Hpricot(Iconv.conv('utf-8', f.charset, f.readlines.join("\n")))
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.find_hostel_by_id(options)
|
21
|
+
opts = { :directions => false, :images => false, :all => false }.merge options
|
22
|
+
id = options[:id].to_s
|
23
|
+
url = HW_SINGULAR_DETAIL_URL + id
|
24
|
+
|
25
|
+
#coder = HTMLEntities.new
|
26
|
+
hostel = Hostel.new
|
27
|
+
hostel.hostel_id = id
|
28
|
+
|
29
|
+
if options[:date]
|
30
|
+
options = @default_options.merge(options)
|
31
|
+
date = Date.strptime(options[:date])
|
32
|
+
data = setSearch(url, options[:date], options[:no_ppl], options[:no_days])
|
33
|
+
else
|
34
|
+
data = parse_html(url)
|
35
|
+
end
|
36
|
+
|
37
|
+
data = data.search("//div[@id='content']")
|
38
|
+
data.search("h3").remove #get rid of header
|
39
|
+
|
40
|
+
#title, address, desc, facilities, ratings
|
41
|
+
hostel.name = data.at("h2").inner_text.gsub(/( in ).*$/,'')
|
42
|
+
hostel.address = data.at('div[@style="padding-top: 5px"]').inner_text.lstrip
|
43
|
+
|
44
|
+
if options[:date]
|
45
|
+
hostel.availability = parse_availables(data)
|
46
|
+
else
|
47
|
+
hostel.description = data.at('div[@id="microDescription2]').inner_text
|
48
|
+
end
|
49
|
+
|
50
|
+
#optional
|
51
|
+
no_photos = data.at('span/a[@id="picLink"]').inner_text.to_i
|
52
|
+
video = data.at('div[@id="microVideo"]')
|
53
|
+
|
54
|
+
facilities = []
|
55
|
+
(data/"li.microFacilitiesBoomLi").each do |item|
|
56
|
+
facilities << item.inner_text
|
57
|
+
end
|
58
|
+
|
59
|
+
ratings = []
|
60
|
+
(data/'div[@id="ratingsBar2"]').each do |item|
|
61
|
+
ratings << item.inner_text.to_i
|
62
|
+
end
|
63
|
+
|
64
|
+
hostel.facilities = facilities
|
65
|
+
hostel.ratings = ratings
|
66
|
+
|
67
|
+
if video #exists
|
68
|
+
data = parse_html(HW_SINGULAR_YOUTUBE_URL + id)
|
69
|
+
video_url = data.at('param[@name="movie"]')['value']
|
70
|
+
hostel.video = video_url
|
71
|
+
#video_url = data.at('tag')
|
72
|
+
end
|
73
|
+
|
74
|
+
if options[:directions] or options[:all]
|
75
|
+
data = parse_html(HW_SINGULAR_DETAIL_URL + id + "/directions/")
|
76
|
+
|
77
|
+
#directions, geo
|
78
|
+
hostel.directions = data.at('div[@id="content"]').inner_text.gsub(/^[\d\D\n]*(DIRECTIONS)/,'')
|
79
|
+
hostel.geo = data.to_s.scan(/-{0,1}\d{1,3}\.\d{7}/).uniq!
|
80
|
+
end
|
81
|
+
|
82
|
+
if no_photos and (options[:images] or options[:all])
|
83
|
+
images = []
|
84
|
+
(1..no_photos).each do |i|
|
85
|
+
data = parse_html(HW_SINGULAR_IMAGE_URL + id + '&PicNO=' + i.to_s)
|
86
|
+
images << (data/"img").first[:src].to_s
|
87
|
+
end
|
88
|
+
hostel.images = images
|
89
|
+
end
|
90
|
+
hostel # return
|
91
|
+
end
|
92
|
+
|
93
|
+
def self.find_hostels_by_location(options) #location
|
94
|
+
|
95
|
+
city = options[:location].split(',').first.gsub(' ','')
|
96
|
+
country = options[:location].split(',').last.gsub(' ','')
|
97
|
+
url = HW_PLURAL_HOSTELS_URL + "ChosenCity.#{city}/ChosenCountry.#{country}"
|
98
|
+
|
99
|
+
if options[:date]
|
100
|
+
options = @default_options.merge(options)
|
101
|
+
date = Date.strptime(options[:date])
|
102
|
+
data = setSearch(url, options[:date], options[:no_ppl], options[:no_days])
|
103
|
+
else
|
104
|
+
data = parse_html(url)
|
105
|
+
end
|
106
|
+
|
107
|
+
data = data.search("//div[@id='content']")
|
108
|
+
@results = []
|
109
|
+
|
110
|
+
(data/"div.hostelListing").each do |row|
|
111
|
+
name = row.at("h3").inner_text
|
112
|
+
desc = row.at("div.hostelEntry/p").inner_text.to_s.chop.gsub('more info','').squeeze('.')
|
113
|
+
url = row.at("h3/a")['href']
|
114
|
+
rating = row.at("h4/text()")
|
115
|
+
rating = rating.to_s.to_i unless rating.nil?
|
116
|
+
type = row.at("div.hostelListingImage/span").inner_text
|
117
|
+
hostel_id = url.match(/[\d]*$/).to_s
|
118
|
+
|
119
|
+
#@main_values = { :hostel_id => hostel_id, :name => name, :desc => desc, :type => type, :rating => rating }
|
120
|
+
#@extra = {}
|
121
|
+
|
122
|
+
if options[:date]
|
123
|
+
#price_USD = row.at("span.blueBeds").inner_text #need to fix float
|
124
|
+
dorm = (row.at("p.hostelListingRate/span.blueBeds/text()")).to_s.gsub(/[A-Z$]*/,'')
|
125
|
+
single = row.at("p.hostelListingPrivateRate/span.blueBeds/text()").to_s.gsub(/[A-Z$]*/,'')
|
126
|
+
available = row/"ul.hostelListingDates/li.noAvail/text()"
|
127
|
+
available = available.to_a.join(',').split(',')
|
128
|
+
#available2 = row/"ul.hostelListingDates"/"text()"
|
129
|
+
#@extra = { :dorm => dorm, :single => single, :unavailable => available }
|
130
|
+
@results << Hostel.new(:hostel_id => hostel_id, :name => name, :description => desc, :ratings => rating, :price => dorm, :availability => available)
|
131
|
+
else
|
132
|
+
@results << Hostel.new(:hostel_id => hostel_id, :name => name, :description => desc, :ratings => rating)
|
133
|
+
end
|
134
|
+
#@results << @main_values.merge(@extra)
|
135
|
+
end
|
136
|
+
return @results
|
137
|
+
end
|
138
|
+
|
139
|
+
private
|
140
|
+
|
141
|
+
def self.setSearch(url,date,no_ppl,no_days)
|
142
|
+
|
143
|
+
date = Date.strptime(date)
|
144
|
+
month = date.strftime("%m").to_i
|
145
|
+
day = date.strftime("%d").to_i
|
146
|
+
if Time.now.strftime("%y") == date.strftime("%y") then year = 0 else year = 1 end
|
147
|
+
|
148
|
+
agent = WWW::Mechanize.new
|
149
|
+
page = agent.get(url)
|
150
|
+
|
151
|
+
#the form name
|
152
|
+
form = page.forms.first # => WWW::Mechanize::Form
|
153
|
+
page = agent.submit(form)
|
154
|
+
|
155
|
+
#form must be submitted twice because the people writing hostelworld are retards
|
156
|
+
form = page.forms.first # => WWW::Mechanize::Form
|
157
|
+
form.field_with(:name => 'selMonth').options[month-1].select
|
158
|
+
form.field_with(:name => 'selDay').options[day-1].select
|
159
|
+
form.field_with(:name => 'selYear').options[year].select
|
160
|
+
form.field_with(:name => 'NumNights').options[no_days.to_i-1].select
|
161
|
+
form.field_with(:name => 'Persons').options[no_ppl.to_i-1].select
|
162
|
+
form.field_with(:name => 'Currency').options[4].select #US Currency
|
163
|
+
|
164
|
+
page = agent.submit(form)
|
165
|
+
data = page.search("//div[@id='content']")
|
166
|
+
|
167
|
+
return data
|
168
|
+
end
|
169
|
+
|
170
|
+
def self.parse_availables(info)
|
171
|
+
|
172
|
+
availability = info.at('table[@id="tableDatesSelected2"]')
|
173
|
+
availability.search("div").remove
|
174
|
+
|
175
|
+
availables = []
|
176
|
+
|
177
|
+
(availability/"tr").each do |row|
|
178
|
+
name = (row/"td").first
|
179
|
+
name = name.inner_text unless name.nil?
|
180
|
+
|
181
|
+
(row/"td").each do |td|
|
182
|
+
night = td.attributes['title']
|
183
|
+
if night
|
184
|
+
price = night.to_s.match(/([\d]{1,3}).([\d]{2})/).to_s
|
185
|
+
available = night.to_s.match(/(available: )([\d]*)/)
|
186
|
+
date = night.to_s.match(/(Date: ).*$/).to_s.gsub(/(Date: )|(th)|(nd)|(rd)|(st)/,'')
|
187
|
+
date = Date.strptime(date, "%a %d %b '%y")
|
188
|
+
|
189
|
+
if available
|
190
|
+
beds = available.to_s.match(/[\d]{1,2}/)[0]
|
191
|
+
availables << HostelAvailable.new(name,price,beds,date)
|
192
|
+
else
|
193
|
+
availables << HostelAvailable.new(name,price,0,date)
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
return availables
|
199
|
+
|
200
|
+
end
|
201
|
+
|
202
|
+
|
203
|
+
end
|
data/lib/hostel.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'mechanize'
|
3
|
+
require 'hpricot'
|
4
|
+
require 'open-uri'
|
5
|
+
require 'date'
|
6
|
+
require 'htmlentities'
|
7
|
+
require 'iconv'
|
8
|
+
require 'hostel/hostel'
|
9
|
+
require 'hostel/hostel_available'
|
10
|
+
require 'hostel/hostelworld'
|
11
|
+
require 'hostel/hostelbookers'
|
data/lib/test.rb
ADDED
data/spec/_helper.rb
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'spec/_helper'
|
2
|
+
|
3
|
+
describe "should find individual hostel and get object with name etc." do
|
4
|
+
|
5
|
+
before(:all) do
|
6
|
+
@h = Hostelbookers.find_hostel_by_id(:location => "krakow,poland", :id => 9330)
|
7
|
+
@h = Hostelbookers.find_hostel_by_id(:location => "lviv,ukraine", :id => 19606)
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should query hostelbookers and return the correct name" do
|
11
|
+
@h.name.should match(/^.*(Hostel).*$/)
|
12
|
+
end
|
13
|
+
|
14
|
+
it "address" do
|
15
|
+
@h.address.should match(/^.*(Krakow|Lviv).*$/)
|
16
|
+
end
|
17
|
+
|
18
|
+
it "description" do
|
19
|
+
@h.address.should_not be nil
|
20
|
+
end
|
21
|
+
|
22
|
+
it "facilities" do
|
23
|
+
@h.should have_at_least(8).facilities
|
24
|
+
end
|
25
|
+
|
26
|
+
it "ratings" do
|
27
|
+
@h.should have(8).ratings
|
28
|
+
end
|
29
|
+
|
30
|
+
it "images at least 6" do
|
31
|
+
@h.should have_at_least(6).images
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
describe "all options" do
|
37
|
+
before(:all) do
|
38
|
+
@h = Hostelbookers.find_hostel_by_id(:location => "krakow,poland", :id => 9330, :all => true)
|
39
|
+
end
|
40
|
+
|
41
|
+
it "directions should have a certain length <" do
|
42
|
+
@h.directions.length.should be > 25
|
43
|
+
end
|
44
|
+
|
45
|
+
it "geo" do
|
46
|
+
@h.geo[0].to_f.round.should eql 50
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
describe "with dates to get availabilty and verify output!" do
|
51
|
+
before(:all) do
|
52
|
+
@h = Hostelbookers.find_hostel_by_id(:location => "krakow,poland", :id => 19831, :date => (Date.today+10).to_s)
|
53
|
+
end
|
54
|
+
|
55
|
+
it "get first availability and check it merit" do
|
56
|
+
@h.availability.first.name =~ /bed/
|
57
|
+
end
|
58
|
+
|
59
|
+
it "check number of avail beds" do
|
60
|
+
@h.availability.first.spots.to_i.should be > 1
|
61
|
+
end
|
62
|
+
|
63
|
+
it "last avail has a price > 5 US" do
|
64
|
+
@h.availability.last.price.to_i.should be > 5
|
65
|
+
end
|
66
|
+
|
67
|
+
it "book date eq today + 10" do
|
68
|
+
@h.availability.last.bookdate.should_not be nil
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'spec/_helper'
|
2
|
+
|
3
|
+
describe "test hostelbookers hostel listings" do
|
4
|
+
|
5
|
+
before(:all) do
|
6
|
+
@h = Hostelbookers.find_hostels_by_location(:location => "krakow,poland")
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should return a list of names" do
|
10
|
+
names = []
|
11
|
+
@h.each do |e|
|
12
|
+
names << e.name
|
13
|
+
end
|
14
|
+
names.should include("Flamingo Hostel")
|
15
|
+
names.should include("Mama's Hostel- Main Market Square")
|
16
|
+
end
|
17
|
+
|
18
|
+
it "rating should be high for first choices" do
|
19
|
+
@h.first.ratings.to_i.should be > 50
|
20
|
+
end
|
21
|
+
|
22
|
+
it "desc should have a certain length <" do
|
23
|
+
@h.first.description.length.should be > 100
|
24
|
+
end
|
25
|
+
|
26
|
+
it "has a hostel number" do
|
27
|
+
@h.first.hostel_id.to_i.should_not be nil
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require 'spec/_helper'
|
2
|
+
|
3
|
+
describe "find hostel by id, no options" do
|
4
|
+
|
5
|
+
before(:all) do
|
6
|
+
@h = Hostelworld.find_hostel_by_id(:id => 7113)
|
7
|
+
@h = Hostelworld.find_hostel_by_id(:id => 20763)
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should query hostelworld and return the correct name" do
|
11
|
+
@h.name.should match(/^.*(Hostel).*$/)
|
12
|
+
end
|
13
|
+
|
14
|
+
it "address" do
|
15
|
+
@h.address.should match(/^.*(Krakow|Lviv).*$/)
|
16
|
+
end
|
17
|
+
|
18
|
+
it "description" do
|
19
|
+
@h.address.should_not be nil
|
20
|
+
end
|
21
|
+
|
22
|
+
it "facilities" do
|
23
|
+
@h.should have_at_least(15).facilities
|
24
|
+
end
|
25
|
+
|
26
|
+
it "ratings" do
|
27
|
+
@h.should have(6).ratings
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
describe "youtube" do
|
33
|
+
|
34
|
+
before(:all) do
|
35
|
+
@h3 = Hostelworld.find_hostel_by_id(:id => 7113)
|
36
|
+
end
|
37
|
+
|
38
|
+
it "video" do
|
39
|
+
@h3.video.should match(/^.*(youtube.com).*$/)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
describe "find hostel with all options" do
|
44
|
+
before(:all) do
|
45
|
+
@h2 = Hostelworld.find_hostel_by_id(:id => 7113, :all => true)
|
46
|
+
@h2 = Hostelworld.find_hostel_by_id(:id => 20763, :all => true)
|
47
|
+
end
|
48
|
+
|
49
|
+
it "geo" do
|
50
|
+
@h2.geo[0].to_f.round.should eql 50
|
51
|
+
end
|
52
|
+
|
53
|
+
it "directions" do
|
54
|
+
@h2.directions.should_not be nil
|
55
|
+
end
|
56
|
+
|
57
|
+
it "images at least 6" do
|
58
|
+
@h2.should have_at_least(6).images
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
describe "with dates to get availabilty and verify output!" do
|
63
|
+
before(:all) do
|
64
|
+
@h = Hostelworld.find_hostel_by_id(:id => 20763, :date => (Date.today+10).to_s)
|
65
|
+
end
|
66
|
+
|
67
|
+
it "get first availability and check it merit" do
|
68
|
+
@h.availability.first.name =~ /bed/
|
69
|
+
end
|
70
|
+
|
71
|
+
it "check number of avail beds" do
|
72
|
+
@h.availability.first.spots.to_i.should be >= 1
|
73
|
+
end
|
74
|
+
|
75
|
+
it "last avail has a price > 5 US" do
|
76
|
+
@h.availability.first.price.to_i.should be > 5
|
77
|
+
end
|
78
|
+
|
79
|
+
it "book date eq today + 10" do
|
80
|
+
@h.availability.last.bookdate.should_not be nil
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'spec/_helper'
|
2
|
+
|
3
|
+
describe "finds list of hostels" do
|
4
|
+
|
5
|
+
before(:all) do
|
6
|
+
@h = Hostelworld.find_hostels_by_location(:location => 'krakow,poland')
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should get a list with name and brief desc" do
|
10
|
+
names = []
|
11
|
+
@h.each do |e|
|
12
|
+
names << e.name
|
13
|
+
end
|
14
|
+
names.should include("Mama's Hostel Main Market Square")
|
15
|
+
end
|
16
|
+
|
17
|
+
|
18
|
+
it "rating should be high for first choices" do
|
19
|
+
@h.first.ratings.to_i.should be > 50
|
20
|
+
end
|
21
|
+
|
22
|
+
it "desc should have a certain length <" do
|
23
|
+
@h.first.description.length.should be > 100
|
24
|
+
end
|
25
|
+
|
26
|
+
it "has a hostel number" do
|
27
|
+
@h.first.hostel_id.to_i.should_not be nil
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
describe "find hostels with dates" do
|
33
|
+
|
34
|
+
before(:all) do
|
35
|
+
@h = Hostelworld.find_hostels_by_location(:location => 'krakow,poland', :date => (Date.today + 10).to_s)
|
36
|
+
end
|
37
|
+
|
38
|
+
it "rating should be high for first choices" do
|
39
|
+
@h.first.ratings.to_i.should be > 50
|
40
|
+
end
|
41
|
+
|
42
|
+
it "desc should have a certain length <" do
|
43
|
+
@h.first.description.length.should be > 90
|
44
|
+
end
|
45
|
+
|
46
|
+
it "has a hostel number" do
|
47
|
+
@h.first.hostel_id.to_i.should_not be nil
|
48
|
+
end
|
49
|
+
|
50
|
+
it "has dorm rooms for greater than $5" do
|
51
|
+
@h.first.price.to_i.should be > 5
|
52
|
+
end
|
53
|
+
|
54
|
+
it "has available rooms!" do
|
55
|
+
@h.first.availability.first.should be nil
|
56
|
+
end
|
57
|
+
|
58
|
+
it "has unavailable rooms!" do
|
59
|
+
@h.last.availability.first.should_not be nil
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
metadata
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: holden-hostelify
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Holden Thomas
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-09-09 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: Simple Hostel Webscrapper.
|
17
|
+
email: holden.thomas@gmail.com
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- README.rdoc
|
24
|
+
- lib/hostel.rb
|
25
|
+
- lib/hostel/gomio.rb
|
26
|
+
- lib/hostel/hostel.rb
|
27
|
+
- lib/hostel/hostel_available.rb
|
28
|
+
- lib/hostel/hostelbookers.rb
|
29
|
+
- lib/hostel/hostelworld.rb
|
30
|
+
- lib/test.rb
|
31
|
+
files:
|
32
|
+
- Manifest
|
33
|
+
- README.rdoc
|
34
|
+
- Rakefile
|
35
|
+
- lib/hostel.rb
|
36
|
+
- lib/hostel/gomio.rb
|
37
|
+
- lib/hostel/hostel.rb
|
38
|
+
- lib/hostel/hostel_available.rb
|
39
|
+
- lib/hostel/hostelbookers.rb
|
40
|
+
- lib/hostel/hostelworld.rb
|
41
|
+
- lib/test.rb
|
42
|
+
- spec/_helper.rb
|
43
|
+
- spec/hb_find_by_hostel.spec
|
44
|
+
- spec/hb_find_hostels.spec
|
45
|
+
- spec/hw_find_by_hostel.spec
|
46
|
+
- spec/hw_find_hostels.spec
|
47
|
+
- hostelify.gemspec
|
48
|
+
has_rdoc: true
|
49
|
+
homepage: http://github.com/holden/hostelify
|
50
|
+
post_install_message:
|
51
|
+
rdoc_options:
|
52
|
+
- --line-numbers
|
53
|
+
- --inline-source
|
54
|
+
- --title
|
55
|
+
- Hostelify
|
56
|
+
- --main
|
57
|
+
- README.rdoc
|
58
|
+
require_paths:
|
59
|
+
- lib
|
60
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: "0"
|
65
|
+
version:
|
66
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: "1.2"
|
71
|
+
version:
|
72
|
+
requirements: []
|
73
|
+
|
74
|
+
rubyforge_project: hostelify
|
75
|
+
rubygems_version: 1.2.0
|
76
|
+
signing_key:
|
77
|
+
specification_version: 2
|
78
|
+
summary: Simple Hostel Webscrapper.
|
79
|
+
test_files: []
|
80
|
+
|