hostelify 0.5.4 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,3 @@
1
+ pkg/*
2
+ *.gem
3
+ .bundle
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in hostelify.gemspec
4
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,35 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ hostelify (0.6.0)
5
+ nibbler (>= 1.2.1)
6
+ nokogiri
7
+ rest-client (>= 1.6.1)
8
+
9
+ GEM
10
+ remote: http://rubygems.org/
11
+ specs:
12
+ diff-lcs (1.1.2)
13
+ mime-types (1.16)
14
+ nibbler (1.2.1)
15
+ nokogiri (1.4.4)
16
+ rest-client (1.6.1)
17
+ mime-types (>= 1.16)
18
+ rspec (2.5.0)
19
+ rspec-core (~> 2.5.0)
20
+ rspec-expectations (~> 2.5.0)
21
+ rspec-mocks (~> 2.5.0)
22
+ rspec-core (2.5.1)
23
+ rspec-expectations (2.5.0)
24
+ diff-lcs (~> 1.1.2)
25
+ rspec-mocks (2.5.0)
26
+
27
+ PLATFORMS
28
+ ruby
29
+
30
+ DEPENDENCIES
31
+ hostelify!
32
+ nibbler (>= 1.2.1)
33
+ nokogiri
34
+ rest-client (>= 1.6.1)
35
+ rspec
data/README.markdown CHANGED
@@ -1,11 +1,9 @@
1
- # Hostelify
1
+ # Hostelify
2
2
 
3
3
  >This a gem which creates an API for booking and pricing data for the following sites:
4
4
 
5
5
  * Hostelworld
6
- * Hostelbookers
6
+ * Hostelbookers (back soon, use < 0.6)
7
7
  * Gomio (removed too slow to speak of)
8
8
 
9
- Hostelworld's real API is commercial and it sucks to high heaven... it was written as an afterthought by a Rent-a-coder.
10
- Hostelbookers' is also commercial only, but is quite ok.
11
-
9
+ Web scrapping utility for publicly accessible data such as dates and prices.
data/Rakefile CHANGED
@@ -1,17 +1,2 @@
1
- require 'rubygems'
2
- require 'rake'
3
- require 'echoe'
4
- require 'hpricot'
5
- require 'mechanize'
6
-
7
- Echoe.new('hostelify', '0.5.4') do |p|
8
- p.description = "Simple Hostel Webscrapper."
9
- p.url = "http://github.com/holden/hostelify"
10
- p.author = "Holden Thomas"
11
- p.email = "holden.thomas@gmail.com"
12
- p.ignore_pattern = ["tmp/*", "script/*"]
13
- p.development_dependencies = []
14
- p.runtime_dependencies = ["hpricot", "mechanize"]
15
- end
16
-
17
- Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
data/hostelify.gemspec CHANGED
@@ -1,36 +1,26 @@
1
1
  # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "hostelify/version"
2
4
 
3
5
  Gem::Specification.new do |s|
4
- s.name = %q{hostelify}
5
- s.version = "0.5.4"
6
+ s.name = "hostelify"
7
+ s.version = Hostelify::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Holden Thomas"]
10
+ s.email = ["holden@wombie.com"]
11
+ s.homepage = "http://wombie.com"
12
+ s.summary = %q{API for hostel related sites}
13
+ s.description = %q{scrapper for publicly accessible data from hostelworld hostelbookers etc.}
14
+
15
+ s.add_dependency('rest-client', '>= 1.6.1')
16
+ s.add_dependency('nokogiri')
17
+ s.add_dependency('nibbler', '>= 1.2.1')
18
+ s.add_development_dependency "rspec"
6
19
 
7
- s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
- s.authors = ["Holden Thomas"]
9
- s.date = %q{2010-10-05}
10
- s.description = %q{Simple Hostel Webscrapper.}
11
- s.email = %q{holden.thomas@gmail.com}
12
- s.extra_rdoc_files = ["README.markdown", "lib/hostelify.rb", "lib/hostelify/gomio.rb", "lib/hostelify/hostel.rb", "lib/hostelify/hostelbookers.rb", "lib/hostelify/hostelify.rb", "lib/hostelify/hostelworld.rb", "lib/items.rb"]
13
- s.files = ["Manifest", "README.markdown", "Rakefile", "hostelify.gemspec", "lib/hostelify.rb", "lib/hostelify/gomio.rb", "lib/hostelify/hostel.rb", "lib/hostelify/hostelbookers.rb", "lib/hostelify/hostelify.rb", "lib/hostelify/hostelworld.rb", "lib/items.rb", "spec/_helper.rb", "spec/hb_find_by_hostel.spec", "spec/hb_find_hostels.spec", "spec/helper.rb", "spec/hw_find_by_hostel.spec", "spec/hw_find_hostels.spec"]
14
- s.homepage = %q{http://github.com/holden/hostelify}
15
- s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Hostelify", "--main", "README.markdown"]
16
- s.require_paths = ["lib"]
17
- s.rubyforge_project = %q{hostelify}
18
- s.rubygems_version = %q{1.3.6}
19
- s.summary = %q{Simple Hostel Webscrapper.}
20
-
21
- if s.respond_to? :specification_version then
22
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
23
- s.specification_version = 3
20
+ s.rubyforge_project = "hostelify"
24
21
 
25
- if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
26
- s.add_runtime_dependency(%q<hpricot>, [">= 0"])
27
- s.add_runtime_dependency(%q<mechanize>, [">= 0"])
28
- else
29
- s.add_dependency(%q<hpricot>, [">= 0"])
30
- s.add_dependency(%q<mechanize>, [">= 0"])
31
- end
32
- else
33
- s.add_dependency(%q<hpricot>, [">= 0"])
34
- s.add_dependency(%q<mechanize>, [">= 0"])
35
- end
22
+ s.files = `git ls-files`.split("\n")
23
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
24
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
25
+ s.require_paths = ["lib"]
36
26
  end
@@ -1,265 +1,18 @@
1
- class Hostelworld
2
-
3
- #constants
4
- #location list includes/indexjs.js
5
- HW_SINGULAR_DETAIL_URL = "http://www2.hostelworld.com/hosteldetails.php?HostelNumber="
6
- HW_SINGULAR_IMAGE_URL = "http://www2.hostelworld.com/hostelpictures.php?HostelNumber="
7
- HW_SINGULAR_AVAILABILITY = "http://www2.hostelworld.com/availability.php/"
8
- HW_SINGULAR_YOUTUBE_URL = "http://www2.hostelworld.com/youtubevideo.php?HostelNumber="
9
- HW_PLURAL_HOSTELS_URL = "http://www2.hostelworld.com/findabed.php/"
1
+ class HostelWorld < Nibbler
2
+ element 'h1' => :name, :with => lambda { |node| node.inner_text.lstrip.rstrip }
3
+ element '.street-city' => :address, :with => lambda { |node| node.inner_text.gsub(/\s{6,}.*/,'').chop.lstrip }
4
+ element 'div.row //p[5]' => :content
5
+ element 'div#directions p' => :directions, :with => lambda { |node| node.inner_text.gsub(/(DIRECTIONS):.*$/,'').lstrip }
6
+ elements '//script[14]' => :geo, :with => lambda { |node| node.inner_text.scan(/-{0,1}\d{1,3}\.\d{7}/).uniq! }
7
+ elements 'div.links ul.column li' => :features, :with => lambda { |node| node.inner_text.first }
8
+ elements '.cboxElement img //@src' => :photos
9
+ elements '.rating-values li' => :ratings
10
10
 
11
- #options
12
- @default_options = { :date => date=(Date.today+4).to_s, :no_days => "7", :no_ppl => "2" }
13
-
14
- def self.parse_html(url)
15
- f = open(url)
16
- f.rewind
17
- Retryable.try 3 do
18
- data = Hpricot(Iconv.conv('utf-8', f.charset, f.readlines.join("\n")))
19
- end
20
- end
21
-
22
- def self.find_hostel_by_id(options)
23
- opts = { :directions => false, :images => false, :all => false }.merge options
24
- id = options[:id].to_s
25
- url = HW_SINGULAR_DETAIL_URL + id
26
-
27
- #coder = HTMLEntities.new
28
- hostel = Hostelify.new
29
- hostel.hostel_id = id
30
-
31
- if options[:date]
32
- options = @default_options.merge(options)
33
- date = Date.strptime(options[:date])
34
- data = setSearch(url, options[:date], options[:no_ppl], options[:no_days])
35
- else
36
- data = parse_html(url)
37
- end
38
-
39
- unless data == "Full"
40
- data = data.search("//div[@id='content']")
41
- data.search("h3").remove #get rid of header
42
-
43
- #title, address, desc, facilities, ratings
44
- hostel.name = data.at("h2").inner_text.gsub(/( in ).*$/,'')
45
- hostel.address = data.at('div[@style="padding-top: 5px"]').inner_text.lstrip
46
-
47
- if options[:date]
48
- hostel.availability = parse_availables(data)
49
- else
50
- hostel.description = data.at('div[@id="microDescription2]').inner_text
51
- end
52
-
53
- #optional
54
- no_photos = data.at('div[@id="microPicScroll"]/span/a').inner_text.to_i
55
- #no_photos = data.at('span/a[@id="picLink"]').inner_text.to_i
56
- video = data.at('div[@id="microVideo"]')
57
-
58
- #facilities = []
59
- #(data/"li.microFacilitiesBoomLi").each do |item|
60
- # facilities << item.inner_text
61
- #end
62
-
63
- facilities = []
64
- index_count = 1
65
- (data/"ul.microFacilitiesBoomUl/li").each_with_index do |item,index|
66
- if item.attributes['class'] == "microFacilitiesBoomLiInner"
67
- facilities << "#{facilities[index-index_count]}: " + item.inner_text
68
- index_count += 1
69
- else
70
- index_count = 1
71
- facilities << item.inner_text
72
- end
73
- end
74
-
75
- ratings = []
76
- (data/'div[@id="ratingsBar2"]').each do |item|
77
- ratings << item.inner_text.to_i
78
- end
79
-
80
- hostel.facilities = facilities
81
- hostel.ratings = ratings
82
-
83
- if video #exists
84
- data = parse_html(HW_SINGULAR_YOUTUBE_URL + id)
85
- video_url = data.at('param[@name="movie"]')['value']
86
- hostel.video = video_url
87
- #video_url = data.at('tag')
88
- end
89
-
90
- if options[:directions] or options[:all]
91
- data = parse_html(HW_SINGULAR_DETAIL_URL + id + "/directions/")
92
-
93
- #directions, geo
94
- hostel.directions = data.at('div[@id="content"]').inner_text.gsub(/^[\d\D\n]*(DIRECTIONS)/,'')
95
- hostel.geo = data.to_s.scan(/-{0,1}\d{1,3}\.\d{7}/).uniq!
96
- end
97
-
98
- if no_photos and (options[:images] or options[:all])
99
- images = []
100
- (1..no_photos).each do |i|
101
- data = parse_html(HW_SINGULAR_IMAGE_URL + id + '&PicNO=' + i.to_s)
102
- images << (data/"img").first[:src].to_s
103
- end
104
- hostel.images = images
105
- end
106
- else
107
- hostel = nil
108
- end
109
- hostel # return
110
- end
111
-
112
- def self.find_hostels_by_location(options) #location
113
-
114
- city = options[:location].split(',').first.gsub(' ','')
115
- country = options[:location].split(',').last.gsub(' ','')
116
- url = HW_PLURAL_HOSTELS_URL + "ChosenCity.#{city}/ChosenCountry.#{country}"
117
-
118
- if options[:date]
119
- options = @default_options.merge(options)
120
- date = Date.strptime(options[:date])
121
- data = setSearch2(url, options[:date], options[:no_ppl], options[:no_days])
122
- else
123
- data = parse_html(url)
124
- end
125
-
126
- data = data.search("//div[@id='content']")
127
- @results = HostelifyCollection.new
128
-
129
- (data/"div.hostelListing").each do |row|
130
- name = row.at("h2").inner_text
131
- desc = row.at("div.hostelEntry/p").inner_text.to_s.chop.gsub('more info','').squeeze('.')
132
- url = row.at("h2/a")['href']
133
- rating = row.at("h4/text()")
134
- rating = rating.to_s.to_i unless rating.nil?
135
- genre = row.at("div.hostelListingImage/span").inner_text
136
- hostel_id = url.match(/[\d]*$/).to_s
137
-
138
- if options[:date]
139
- #price_USD = row.at("span.blueBeds").inner_text #need to fix float
140
- dorm = (row.at("p.hostelListingRate/span.blueBeds/text()")).to_s.gsub(/[A-Z$]*/,'')
141
- single = row.at("p.hostelListingPrivateRate/span.blueBeds/text()").to_s.gsub(/[A-Z$]*/,'')
142
- available = row/"ul.hostelListingDates/li.noAvail/text()"
143
- available = available.to_a.join(',').split(',')
144
- @results << Hostelify.new(:hostel_id => hostel_id, :name => name, :description => desc, :rating => rating, :dorm => dorm, :single => single, :unavailable => available)
145
- else
146
- @results << Hostelify.new(:hostel_id => hostel_id, :name => name, :description => desc, :rating => rating, :genre => genre)
147
- end
11
+ elements '//table[@class="beds-details"]//tr[position() > 1]' => :beds do
12
+ element 'td.left span' => :title, :with => lambda { |node| node.inner_text.lstrip.rstrip.squeeze(" ") }
13
+ elements 'td[@style="cursor: help"]' => :nights do
14
+ element '.currency' => :price, :with => lambda { |node| node.inner_text.to_f }
15
+ element '@title' => :spots, :with => lambda { |node| node.inner_text.match(/\d{1,}$/).to_s.to_i }
148
16
  end
149
- return @results
150
17
  end
151
-
152
- private
153
-
154
- def self.setSearch(url,date,no_ppl,no_days)
155
-
156
- date = Date.strptime(date)
157
- month = date.strftime("%m").to_i
158
- day = date.strftime("%d").to_i
159
- if Time.now.strftime("%y") == date.strftime("%y") then year = 0 else year = 1 end
160
-
161
- agent = Mechanize.new
162
- page = agent.get(url)
163
-
164
- #the form name
165
- #form = page.forms.first # => WWW::Mechanize::Form
166
- form = page.form_with(:name => 'theForm')
167
-
168
- #page = agent.submit(form)
169
-
170
- #form must be submitted twice because the people writing hostelworld are retards
171
- #form = page.forms.first # => WWW::Mechanize::Form
172
- #form = page.form_with(:name => 'theForm')
173
- form.field_with(:name => 'selMonth2').options[month-1].select
174
- form.field_with(:name => 'selDay2').options[day-1].select
175
- form.field_with(:name => 'selYear2').options[year].select
176
- #form.field_with(:name => { 0 => 'NumNights' }).options[no_days.to_i-1].select
177
- my_fields = form.fields.select {|f| f.name == "NumNights"}
178
- my_fields[1].value = no_days.to_i
179
- #form.my_fields[1].whatever = "value"
180
- #form.field_with(:name => 'Persons').options[no_ppl.to_i-1].select
181
- form.field_with(:name => 'Currency').options[4].select #US Currency
182
-
183
-
184
- Retryable.try 3 do
185
- page = agent.submit(form, form.button_with(:name => 'DateSelect'))
186
- end
187
-
188
- error = page.search("div.microBookingError2")
189
-
190
- if error.to_s.length > 1
191
- data = "Full"
192
- else
193
- data = page.search("//div[@id='content']")
194
- end
195
-
196
- return data
197
- end
198
-
199
- def self.setSearch2(url,date,no_ppl,no_days)
200
-
201
- date = Date.strptime(date)
202
- month = date.strftime("%m").to_i
203
- day = date.strftime("%d").to_i
204
- if Time.now.strftime("%y") == date.strftime("%y") then year = 0 else year = 1 end
205
-
206
- agent = WWW::Mechanize.new
207
- page = agent.get(url)
208
-
209
- #the form name
210
- #form = page.forms.first # => WWW::Mechanize::Form
211
- form = page.form_with(:name => 'theForm')
212
-
213
- #page = agent.submit(form)
214
-
215
- #form must be submitted twice because the people writing hostelworld are retards
216
-
217
- form.field_with(:name => 'selMonth').options[month-1].select
218
- form.field_with(:name => 'selDay').options[day-1].select
219
- form.field_with(:name => 'selYear').options[year].select
220
- form.field_with(:name => 'NumNights').options[no_days.to_i-1].select
221
- form.field_with(:name => 'Persons').options[no_ppl.to_i-1].select
222
- form.field_with(:name => 'Currency').options[4].select #US Currency
223
-
224
- Retryable.try 3 do
225
- page = agent.submit(form)
226
- end
227
- data = page.search("//div[@id='content']")
228
- return data
229
- end
230
-
231
- def self.parse_availables(info)
232
-
233
- availability = info.at('table[@id="tableDatesSelected2"]')
234
- availability.search("div").remove
235
- availability.search("span.hwRoomTypeDesc").remove
236
-
237
- availables = []
238
-
239
- (availability/"tr").each do |row|
240
- name = (row/"td").first
241
- name = name.inner_text unless name.nil?
242
-
243
- (row/"td").each do |td|
244
- night = td.attributes['title']
245
- if night
246
- price = night.to_s.match(/([\d]{1,3}).([\d]{2})/).to_s
247
- available = night.to_s.match(/(available: )([\d]*)/)
248
- date = night.to_s.match(/(Date: ).*$/).to_s.gsub(/(Date: )|(th)|(nd)|(rd)|(st)/,'')
249
- date = Date.strptime(date, "%a %d %b '%y")
250
-
251
- if available
252
- beds = available.to_s.match(/[\d]{1,2}/)[0]
253
- availables << HostelifyAvailable.new(name,price,beds,date)
254
- else
255
- availables << HostelifyAvailable.new(name,price,0,date)
256
- end
257
- end
258
- end
259
- end
260
- return availables
261
-
262
- end
263
-
264
-
265
18
  end
@@ -0,0 +1,3 @@
1
+ module Hostelify
2
+ VERSION = "0.6.0"
3
+ end
data/lib/hostelify.rb CHANGED
@@ -1,10 +1,29 @@
1
1
  require 'rubygems'
2
- require 'mechanize'
3
- require 'hpricot'
4
- require 'open-uri'
5
2
  require 'date'
6
- require 'htmlentities'
7
- require 'iconv'
8
- require 'hostelify/hostelify'
3
+ require 'rest_client'
4
+ require 'nibbler'
9
5
  require 'hostelify/hostelworld'
10
- require 'hostelify/hostelbookers'
6
+
7
+ module Hostelify
8
+
9
+ def self.find(*args, &block)
10
+ options = { :currency => 'EUR', :date_from => Date.today+14, :directions => false }
11
+ options.merge!(args.pop) if args.last.kind_of? Hash
12
+
13
+ RestClient.head 'http://www.hostelworld.com/hosteldetails.php/' + options[:id].to_s do |response, request, result|
14
+ redirect = response.headers[:location]
15
+ redirect = redirect + '/directions' if options[:directions]
16
+ date = Date.strptime(options[:date_from].to_s)
17
+
18
+ @resource = RestClient.post redirect,
19
+ :date_from => date+1,
20
+ :date_to => date+8,
21
+ :searchperformedflag => 1,
22
+ :currency => options[:currency]
23
+ end
24
+
25
+ result = HostelWorld.parse @resource
26
+ end
27
+
28
+
29
+ end
@@ -0,0 +1,22 @@
1
+ require "spec_helper"
2
+
3
+ describe "hostelworld" do
4
+
5
+ before(:all) do
6
+ @hostel = Hostelify.find(:id => 20763, :date_from => Date.today+14)
7
+ #@hostel = Hostelify.find(:id => 7113)
8
+ end
9
+
10
+ it "should return a name containing the word hostel" do
11
+ @hostel.name.should match(/^.*(Hostel).*$/)
12
+ end
13
+
14
+ it "should have a description of at least 300 chars" do
15
+ @hostel.content.should_not be nil
16
+ end
17
+
18
+ it "should have at least 4 images" do
19
+ @hostel.should have_at_least(6).photos
20
+ end
21
+
22
+ end
@@ -0,0 +1,4 @@
1
+ require 'rubygems'
2
+ require 'bundler/setup'
3
+
4
+ Bundler.require(:default)
metadata CHANGED
@@ -1,12 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hostelify
3
3
  version: !ruby/object:Gem::Version
4
+ hash: 7
4
5
  prerelease: false
5
6
  segments:
6
7
  - 0
7
- - 5
8
- - 4
9
- version: 0.5.4
8
+ - 6
9
+ - 0
10
+ version: 0.6.0
10
11
  platform: ruby
11
12
  authors:
12
13
  - Holden Thomas
@@ -14,101 +15,124 @@ autorequire:
14
15
  bindir: bin
15
16
  cert_chain: []
16
17
 
17
- date: 2010-10-05 00:00:00 +02:00
18
+ date: 2011-03-31 00:00:00 +02:00
18
19
  default_executable:
19
20
  dependencies:
20
21
  - !ruby/object:Gem::Dependency
21
- name: hpricot
22
+ name: rest-client
22
23
  prerelease: false
23
24
  requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
24
26
  requirements:
25
27
  - - ">="
26
28
  - !ruby/object:Gem::Version
29
+ hash: 13
27
30
  segments:
28
- - 0
29
- version: "0"
31
+ - 1
32
+ - 6
33
+ - 1
34
+ version: 1.6.1
30
35
  type: :runtime
31
36
  version_requirements: *id001
32
37
  - !ruby/object:Gem::Dependency
33
- name: mechanize
38
+ name: nokogiri
34
39
  prerelease: false
35
40
  requirement: &id002 !ruby/object:Gem::Requirement
41
+ none: false
36
42
  requirements:
37
43
  - - ">="
38
44
  - !ruby/object:Gem::Version
45
+ hash: 3
39
46
  segments:
40
47
  - 0
41
48
  version: "0"
42
49
  type: :runtime
43
50
  version_requirements: *id002
44
- description: Simple Hostel Webscrapper.
45
- email: holden.thomas@gmail.com
51
+ - !ruby/object:Gem::Dependency
52
+ name: nibbler
53
+ prerelease: false
54
+ requirement: &id003 !ruby/object:Gem::Requirement
55
+ none: false
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ hash: 29
60
+ segments:
61
+ - 1
62
+ - 2
63
+ - 1
64
+ version: 1.2.1
65
+ type: :runtime
66
+ version_requirements: *id003
67
+ - !ruby/object:Gem::Dependency
68
+ name: rspec
69
+ prerelease: false
70
+ requirement: &id004 !ruby/object:Gem::Requirement
71
+ none: false
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ hash: 3
76
+ segments:
77
+ - 0
78
+ version: "0"
79
+ type: :development
80
+ version_requirements: *id004
81
+ description: scrapper for publicly accessible data from hostelworld hostelbookers etc.
82
+ email:
83
+ - holden@wombie.com
46
84
  executables: []
47
85
 
48
86
  extensions: []
49
87
 
50
- extra_rdoc_files:
51
- - README.markdown
52
- - lib/hostelify.rb
53
- - lib/hostelify/gomio.rb
54
- - lib/hostelify/hostel.rb
55
- - lib/hostelify/hostelbookers.rb
56
- - lib/hostelify/hostelify.rb
57
- - lib/hostelify/hostelworld.rb
58
- - lib/items.rb
88
+ extra_rdoc_files: []
89
+
59
90
  files:
60
- - Manifest
91
+ - .gitignore
92
+ - Gemfile
93
+ - Gemfile.lock
61
94
  - README.markdown
62
95
  - Rakefile
63
96
  - hostelify.gemspec
64
97
  - lib/hostelify.rb
65
- - lib/hostelify/gomio.rb
66
- - lib/hostelify/hostel.rb
67
- - lib/hostelify/hostelbookers.rb
68
- - lib/hostelify/hostelify.rb
69
98
  - lib/hostelify/hostelworld.rb
70
- - lib/items.rb
71
- - spec/_helper.rb
72
- - spec/hb_find_by_hostel.spec
73
- - spec/hb_find_hostels.spec
74
- - spec/helper.rb
75
- - spec/hw_find_by_hostel.spec
76
- - spec/hw_find_hostels.spec
99
+ - lib/hostelify/version.rb
100
+ - spec/hostelworld_spec.rb
101
+ - spec/spec_helper.rb
77
102
  has_rdoc: true
78
- homepage: http://github.com/holden/hostelify
103
+ homepage: http://wombie.com
79
104
  licenses: []
80
105
 
81
106
  post_install_message:
82
- rdoc_options:
83
- - --line-numbers
84
- - --inline-source
85
- - --title
86
- - Hostelify
87
- - --main
88
- - README.markdown
107
+ rdoc_options: []
108
+
89
109
  require_paths:
90
110
  - lib
91
111
  required_ruby_version: !ruby/object:Gem::Requirement
112
+ none: false
92
113
  requirements:
93
114
  - - ">="
94
115
  - !ruby/object:Gem::Version
116
+ hash: 3
95
117
  segments:
96
118
  - 0
97
119
  version: "0"
98
120
  required_rubygems_version: !ruby/object:Gem::Requirement
121
+ none: false
99
122
  requirements:
100
123
  - - ">="
101
124
  - !ruby/object:Gem::Version
125
+ hash: 3
102
126
  segments:
103
- - 1
104
- - 2
105
- version: "1.2"
127
+ - 0
128
+ version: "0"
106
129
  requirements: []
107
130
 
108
131
  rubyforge_project: hostelify
109
- rubygems_version: 1.3.6
132
+ rubygems_version: 1.3.7
110
133
  signing_key:
111
134
  specification_version: 3
112
- summary: Simple Hostel Webscrapper.
113
- test_files: []
114
-
135
+ summary: API for hostel related sites
136
+ test_files:
137
+ - spec/hostelworld_spec.rb
138
+ - spec/spec_helper.rb
data/Manifest DELETED
@@ -1,17 +0,0 @@
1
- Manifest
2
- README.markdown
3
- Rakefile
4
- hostelify.gemspec
5
- lib/hostelify.rb
6
- lib/hostelify/gomio.rb
7
- lib/hostelify/hostel.rb
8
- lib/hostelify/hostelbookers.rb
9
- lib/hostelify/hostelify.rb
10
- lib/hostelify/hostelworld.rb
11
- lib/items.rb
12
- spec/_helper.rb
13
- spec/hb_find_by_hostel.spec
14
- spec/hb_find_hostels.spec
15
- spec/helper.rb
16
- spec/hw_find_by_hostel.spec
17
- spec/hw_find_hostels.spec
@@ -1,104 +0,0 @@
1
- #gave up, gomio sucks
2
-
3
- require 'rubygems'
4
- require 'mechanize'
5
- require 'hpricot'
6
- require 'open-uri'
7
- require 'date'
8
- require 'htmlentities'
9
-
10
- Hpricot.buffer_size = 262144
11
-
12
- class Gomio
13
-
14
- #constants
15
- GOMIO_SINGULAR = "http://www.gomio.com/reservation/chooseBed.aspx?HostelId="
16
- GOMIO_PLURAL_HOSTELS_URL = "http://www.gomio.com/hostels/europe/" #europe/poland/krakow/mama%27s%20hostel/overview.htm
17
-
18
- def self.find_hostel_by_id(options)
19
- #opts = { :directions => false, :images => false, :all => false }.merge options
20
- city = options[:location].split(',').first.gsub(' ','')
21
- country = options[:location].split(',').last.gsub(' ','')
22
- id = options[:id]
23
- url = GOMIO_PLURAL_HOSTELS_URL + "#{country}/#{city}/#{id}/overview.htm"
24
-
25
- #setSearch(url,"2009-09-20",2,7)
26
- data = Hpricot(open(url))
27
-
28
- data = data.search('div[@id="main"]')
29
- name = data.at("h3/span").inner_text.lstrip.rstrip
30
- address = data.at("span.br_address").inner_text.lstrip.rstrip
31
- desc = data.at("div.br_detail").inner_text.lstrip.rstrip
32
- available1 = data.at("td.HasNoAvail_Outer")
33
- available2 = data.at("td.HasNoAvail")
34
- puts "#{available1}, #{available2}"
35
- end
36
-
37
- def self.find_hostels_by_location(options) #location
38
- city = options[:location].split(',').first.gsub(' ','')
39
- country = options[:location].split(',').last.gsub(' ','')
40
-
41
- url = GOMIO_PLURAL_HOSTELS_URL + "#{country}/#{city}/search.htm"
42
-
43
- #data = Hpricot(open(url))
44
- data = setSearch(url,"2009-09-09",2,5)
45
-
46
- data = data.search("div.SearchResultMembers")
47
-
48
- (data/"div.SearchResultsHostel").each do |row|
49
- name = row.at("h3").inner_text.lstrip.rstrip
50
- desc = row.at("p").inner_text.lstrip.rstrip
51
- url = row.at("h3/a")['href']
52
- gomio_id = url.match(/(#{city}).([\d\D]*)(\/overview.htm)/)[2]
53
-
54
- available1 = row/("td.HasNoAvail_Outer/text()").to_a.join(',').split(',')
55
- available2 = row/("td.HasNoAvail_Outer/text()").to_a.join(',').split(',')
56
- available = available1 + available2
57
-
58
- @results = { :gomio_id => gomio_id, :name => name, :unavailable => available }
59
- puts @results
60
- end
61
- return @results
62
- end
63
-
64
- def self.setSearch(url,date,no_ppl,no_days)
65
-
66
- date = Date.strptime(date)
67
- month = date.strftime("%m").to_i
68
- day = date.strftime("%d").to_i
69
- if Time.now.strftime("%y") == date.strftime("%y") then year = 0 else year = 1 end
70
-
71
- agent = WWW::Mechanize.new
72
- page = agent.get(url)
73
-
74
- #the form name
75
- form = page.forms.first # => WWW::Mechanize::Form
76
- #page = agent.submit(form)
77
-
78
- #ctl00_searchbox_sb_ddlMonth
79
- #ctl00_searchbox_sb_ddlDay
80
- #ctl00_searchbox_sb_ddlYear
81
-
82
- #ctl00_searchbox_sb_ddlNights
83
- #ctl00_searchbox_sb_ddlBeds
84
-
85
- form.field_with(:name => 'ctl00$searchbox$sb$ddlMonth').options[month-1].select
86
- form.field_with(:name => 'ctl00$searchbox$sb$ddlDay').options[day-1].select
87
- form.field_with(:name => 'ctl00$searchbox$sb$ddlYear').options[year].select
88
- form.field_with(:name => 'ctl00$searchbox$sb$ddlNights').options[no_days.to_i-1].select
89
- form.field_with(:name => 'ctl00$searchbox$sb$ddlBeds').options[no_ppl.to_i-1].select
90
- #form.field_with(:id => 'Currency').options[4].select #US Currency
91
-
92
- page = agent.submit(form)
93
- data = page.search('//div[@id="main"]')
94
- #puts data
95
- return data
96
- end
97
-
98
- #url = GOMIO_PLURAL_HOSTELS_URL + "poland/krakow/search.htm"
99
-
100
- #Gomio.setSearch(url,"2009-09-20",2,7)
101
- Gomio.find_hostels_by_location(:location => "krakow,poland")
102
- #Gomio.find_hostel_by_id(:id => "mama's%20hostel", :location => "krakow,poland")
103
-
104
- end
@@ -1,63 +0,0 @@
1
- #delete
2
-
3
- class Hostelify
4
- attr_accessor :hostel_id, :name, :address, :description, :facilities, :ratings, :directions, :geo, :images, :video, :availability
5
- attr_accessor :rating, :dorm, :single, :unavailable, :genre
6
-
7
- def initialize(options = {})
8
- options.each {
9
- |k,v|
10
- self.send( "#{k.to_s}=".intern, v)
11
- }
12
- end
13
-
14
- end
15
-
16
- class HostelifyCollection < Array
17
- # This collection does everything an Array does, plus
18
- # you can add utility methods like names.
19
-
20
- def ids
21
- collect do |i|
22
- i.hostel_id
23
- end
24
- end
25
-
26
- def names
27
- collect do |i|
28
- i.name
29
- end
30
- end
31
-
32
- def descs
33
- collect do |i|
34
- i.description
35
- end
36
- end
37
-
38
- end
39
-
40
- class HostelifyAvailable
41
- attr_accessor :name, :price, :spots, :bookdate
42
-
43
- def initialize(name, price, spots, bookdate)
44
- @name = name
45
- @price = price
46
- @spots = spots
47
- @bookdate = bookdate
48
- end
49
-
50
- end
51
-
52
- module Retryable
53
- extend self
54
-
55
- def try times = 1, options = {}, &block
56
- val = yield
57
- rescue options[:on] || Exception
58
- retry if (times -= 1) > 0
59
- else
60
- val
61
- end
62
- end
63
-
@@ -1,118 +0,0 @@
1
- class Hostelbookers
2
-
3
- HB_SINGULAR_URL = "http://www.hostelbookers.com/property/index.cfm?fuseaction=accommodation.search&straccommodationtype=hostels&fromPropertyNameSearch=0"
4
- HB_PLURAL_URL = "http://www.hostelbookers.com/results/index.cfm?straccommodationtype=hostels&strSearchType=freeText&fuseaction=accommodation.search"
5
-
6
- @default_options = { :date => (Date.today+4).to_s, :no_days => "7", :live => true }
7
-
8
- def self.find_hostels_by_location(options)
9
- options = @default_options.merge(options)
10
- date = Date.strptime(options[:date])
11
- city = options[:location].split(',').first.rstrip.lstrip.gsub(' ','-').squeeze("-")
12
- country = options[:location].split(',').last.rstrip.lstrip.gsub(' ','-').squeeze("-")
13
-
14
- url = HB_PLURAL_URL + "&strkeywords=#{city},+#{country}&dtearrival=#{date.strftime('%d/%m/%Y')}"
15
-
16
- #Retryable.try 3 do
17
- data = Hpricot(open(url))
18
- #end
19
-
20
- data = data.search("//div[@id='propertyResultsList']")
21
-
22
- @results = HostelifyCollection.new
23
-
24
- (data/"tr.propertyRow").each do |row|
25
- name = row.at("a.propertyTitle").inner_text
26
- url = row.at("a.propertyTitle")['href']
27
- desc = row.at("p.shortDescription").inner_text
28
- rating = row.at("td.rating/text()")
29
- rating = rating.to_s.to_i unless rating.nil?
30
- dorm = row.at("td.shared/text()")
31
- single = row.at("td.private/text()")
32
- hb_id = url.match(/[\d]{2,5}.$/).to_s.to_i
33
-
34
- @results << Hostelify.new(:hostel_id => hb_id, :name => name, :description => desc, :rating => rating, :dorm => dorm, :single => single)
35
- end
36
- return @results
37
- end
38
-
39
- def self.find_hostel_by_id(options)
40
- options = @default_options.merge(options)
41
- date = Date.strptime(options[:date])
42
- hostel = Hostelify.new
43
-
44
- url = HB_SINGULAR_URL + "&intnights=#{options[:no_days]}&intpeople=1&dtearrival=#{date.strftime('%d/%m/%Y')}&intpropertyid=#{options[:id]}"
45
-
46
- data = Hpricot(open(url))
47
-
48
- hostel.hostel_id = options[:id]
49
- hostel.name = data.at("h1").inner_text
50
- hostel.address = data.at("p.address").inner_text
51
- hostel.description = data.at('div[@id="overviewPane"]').inner_text
52
- facilities_td = data.at("table.tableFacilities")
53
-
54
- facilities = []
55
- (facilities_td/"td").each do |row|
56
- facilities << row.inner_text
57
- end
58
- hostel.facilities = facilities
59
- extras = []
60
- extras_td = data.at("table.tableFeatures")
61
- (extras_td/"td.name").each do |row|
62
- extras << "Free " + row.inner_text.to_s
63
- end
64
- facilities = facilities + extras
65
-
66
- ratings = []
67
- ratings_td = data.at('div[@id="overviewIndRtng"]')
68
-
69
-
70
- (ratings_td/"dd").each do |row|
71
- #ratings << row.at("td").inner_text.to_s.to_f
72
- ratings << row.at('div[@class="ratingPercentage"]').inner_text.to_s.to_f
73
- #puts row.at('div[@class="ratingPercentage"]')
74
- end
75
-
76
- hostel.ratings = ratings
77
- images = []
78
- image = data.at('div[@id="propMedia"]/table')
79
- (image/"td").each do |row|
80
- img = row.at("img")['onclick']
81
- if img =~ /(http).*(jpg|gif|png|jpeg)/
82
- images << img.match(/(http).*(jpg|gif|png|jpeg)/)[0]
83
- else
84
- #add youtube?
85
- end
86
- end
87
- hostel.images = images
88
-
89
- if options[:all]
90
- data = Hpricot(open(url + "&strTab=map"))
91
- data.search("h2").remove #get rid of header
92
- hostel.directions = data.at('div[@id="gpsMap"]').inner_text
93
- hostel.geo = data.to_s.scan(/-{0,1}\d{1,3}\.\d{7}/).uniq!
94
- end
95
-
96
- @availables = []
97
- available = data.at("div.tableAvailability/table")
98
- if available
99
- (available/"tr").each do |row|
100
- name = row.at("td.roomType/label/text()")
101
- people = row.at("td.people/select")
102
- people = people.at("option:last-child").inner_text unless people.nil?
103
- price = row.at("td.price")
104
- price = price.inner_text.to_s.match(/[\d.]{1,5}/)[0] unless price.nil?
105
- (0..(options[:no_days].to_i-1)).each do |x|
106
- @availables << HostelifyAvailable.new(name,price,people,(date+x).to_s) unless price.nil?
107
- end
108
- end
109
-
110
- hostel.availability = @availables
111
- end
112
-
113
- return hostel
114
-
115
-
116
- end
117
-
118
- end
@@ -1,61 +0,0 @@
1
- class Hostelify
2
- attr_accessor :hostel_id, :name, :address, :description, :facilities, :ratings, :directions, :geo, :images, :video, :availability
3
- attr_accessor :rating, :dorm, :single, :unavailable, :genre
4
-
5
- def initialize(options = {})
6
- options.each {
7
- |k,v|
8
- self.send( "#{k.to_s}=".intern, v)
9
- }
10
- end
11
-
12
- end
13
-
14
- class HostelifyCollection < Array
15
- # This collection does everything an Array does, plus
16
- # you can add utility methods like names.
17
-
18
- def ids
19
- collect do |i|
20
- i.hostel_id
21
- end
22
- end
23
-
24
- def names
25
- collect do |i|
26
- i.name
27
- end
28
- end
29
-
30
- def descs
31
- collect do |i|
32
- i.description
33
- end
34
- end
35
-
36
- end
37
-
38
- class HostelifyAvailable
39
- attr_accessor :name, :price, :spots, :bookdate
40
-
41
- def initialize(name, price, spots, bookdate)
42
- @name = name
43
- @price = price
44
- @spots = spots
45
- @bookdate = bookdate
46
- end
47
-
48
- end
49
-
50
- module Retryable
51
- extend self
52
-
53
- def try times = 1, options = {}, &block
54
- val = yield
55
- rescue options[:on] || Exception
56
- retry if (times -= 1) > 0
57
- else
58
- val
59
- end
60
- end
61
-
data/lib/items.rb DELETED
@@ -1,23 +0,0 @@
1
- class Items
2
- @items = []
3
- class << self
4
- attr_accessor :items
5
- end
6
- attr_accessor :name, :description
7
- #def self.each(&args)
8
- # @items.each(&args)
9
- #end
10
- def initialize(name, description)
11
- @name, @description = name, description
12
- Items.items << self
13
- end
14
- def each(&block)
15
- yield name
16
- yield description
17
- end
18
-
19
- def self.names
20
- puts "hello"
21
- end
22
-
23
- end
data/spec/_helper.rb DELETED
@@ -1,4 +0,0 @@
1
- $LOAD_PATH.unshift(File.dirname(__FILE__))
2
- $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
-
4
- require 'hostelify'
@@ -1,73 +0,0 @@
1
- require 'spec/_helper'
2
-
3
- describe "should find individual hostel and get object with name etc." do
4
-
5
- before(:all) do
6
- #@h = Hostelbookers.find_hostel_by_id(:location => "krakow,poland", :id => 9330)
7
- #@h = Hostelbookers.find_hostel_by_id(:location => "lviv,ukraine", :id => 19606)
8
- @h = Hostelbookers.find_hostel_by_id(:id => 19606)
9
- end
10
-
11
- it "should query hostelbookers and return the correct name" do
12
- @h.name.should match(/^.*(Hostel).*$/)
13
- end
14
-
15
- it "address" do
16
- @h.address.should match(/^.*(Krakow|Lviv).*$/)
17
- end
18
-
19
- it "description" do
20
- @h.address.should_not be nil
21
- end
22
-
23
- it "facilities" do
24
- @h.should have_at_least(8).facilities
25
- end
26
-
27
- it "ratings" do
28
- @h.should have(8).ratings
29
- end
30
-
31
- it "images at least 6" do
32
- @h.should have_at_least(6).images
33
- end
34
-
35
- end
36
-
37
- describe "all options" do
38
- before(:all) do
39
- @h = Hostelbookers.find_hostel_by_id(:id => 9330, :all => true)
40
- end
41
-
42
- it "directions should have a certain length <" do
43
- @h.directions.length.should be > 25
44
- end
45
-
46
- it "geo" do
47
- @h.geo[0].to_f.round.should eql 50
48
- end
49
- end
50
-
51
- describe "with dates to get availabilty and verify output!" do
52
- before(:all) do
53
- @h = Hostelbookers.find_hostel_by_id(:id => 19831, :date => (Date.today+10).to_s)
54
- end
55
-
56
- it "get first availability and check it merit" do
57
- @h.availability.first.name =~ /bed/
58
- end
59
-
60
- it "check number of avail beds" do
61
- @h.availability.first.spots.to_i.should be >= 1
62
- end
63
-
64
- it "last avail has a price > 5 US" do
65
- @h.availability.last.price.to_i.should be > 5
66
- end
67
-
68
- it "book date eq today + 10" do
69
- @h.availability.last.bookdate.should_not be nil
70
- end
71
-
72
-
73
- end
@@ -1,30 +0,0 @@
1
- require 'spec/_helper'
2
-
3
- describe "test hostelbookers hostel listings" do
4
-
5
- before(:all) do
6
- @h = Hostelbookers.find_hostels_by_location(:location => "krakow,poland")
7
- end
8
-
9
- it "should return a list of names" do
10
- names = []
11
- @h.each do |e|
12
- names << e.name
13
- end
14
- names.should include("Flamingo Hostel")
15
- names.should include("Mama's Hostel- Main Market Square")
16
- end
17
-
18
- it "rating should be high for first choices" do
19
- @h.first.rating.to_i.should be > 50
20
- end
21
-
22
- it "desc should have a certain length <" do
23
- @h.first.description.length.should be > 100
24
- end
25
-
26
- it "has a hostel number" do
27
- @h.first.hostel_id.to_i.should_not be nil
28
- end
29
-
30
- end
data/spec/helper.rb DELETED
@@ -1,7 +0,0 @@
1
- require 'rubygems'
2
- require 'spec'
3
-
4
- $LOAD_PATH.unshift(File.dirname(__FILE__))
5
- $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
-
7
- require 'hostel'
@@ -1,85 +0,0 @@
1
- require 'spec/_helper'
2
-
3
- describe "find hostel by id, no options" do
4
-
5
- before(:all) do
6
- @h = Hostelworld.find_hostel_by_id(:id => 7113)
7
- @h = Hostelworld.find_hostel_by_id(:id => 20763)
8
- end
9
-
10
- it "should query hostelworld and return the correct name" do
11
- @h.name.should match(/^.*(Hostel).*$/)
12
- end
13
-
14
- it "address" do
15
- @h.address.should match(/^.*(Krakow|Lviv).*$/)
16
- end
17
-
18
- it "description" do
19
- @h.address.should_not be nil
20
- end
21
-
22
- it "facilities" do
23
- @h.should have_at_least(15).facilities
24
- end
25
-
26
- it "ratings" do
27
- @h.should have(6).ratings
28
- end
29
-
30
- end
31
-
32
- describe "youtube" do
33
-
34
- before(:all) do
35
- @h3 = Hostelworld.find_hostel_by_id(:id => 7113)
36
- end
37
-
38
- #it "video" do
39
- # @h3.video.should match(/^.*(youtube.com).*$/)
40
- #end
41
- end
42
-
43
- describe "find hostel with all options" do
44
- before(:all) do
45
- @h2 = Hostelworld.find_hostel_by_id(:id => 7113, :all => true)
46
- @h2 = Hostelworld.find_hostel_by_id(:id => 20763, :all => true)
47
- end
48
-
49
- it "geo" do
50
- @h2.geo[0].to_f.round.should eql 50
51
- end
52
-
53
- it "directions" do
54
- @h2.directions.should_not be nil
55
- end
56
-
57
- it "images at least 6" do
58
- @h2.should have_at_least(6).images
59
- end
60
- end
61
-
62
- describe "with dates to get availabilty and verify output!" do
63
- before(:all) do
64
- #@h = Hostelworld.find_hostel_by_id(:id => 20763, :date => (Date.today+20).to_s)
65
- @h = Hostelworld.find_hostel_by_id(:id => 7113, :date => (Date.today+20).to_s)
66
- end
67
-
68
- it "get first availability and check it merit" do
69
- @h.availability.first.name =~ /bed/
70
- end
71
-
72
- it "check number of avail beds" do
73
- @h.availability.last.spots.to_i.should be >= 1
74
- end
75
-
76
- it "last avail has a price > 5 US" do
77
- @h.availability.last.price.to_i.should be > 5
78
- end
79
-
80
- it "book date eq today + 10" do
81
- @h.availability.last.bookdate.should_not be nil
82
- end
83
-
84
-
85
- end
@@ -1,62 +0,0 @@
1
- require 'spec/_helper'
2
-
3
- describe "finds list of hostels" do
4
-
5
- before(:all) do
6
- @h = Hostelworld.find_hostels_by_location(:location => 'krakow,poland')
7
- end
8
-
9
- it "should get a list with name and brief desc" do
10
- names = []
11
- @h.each do |e|
12
- names << e.name
13
- end
14
- names.should include("Mama's Hostel Main Market Square")
15
- end
16
-
17
-
18
- it "rating should be high for first choices" do
19
- @h.first.rating.to_i.should be > 50
20
- end
21
-
22
- it "desc should have a certain length <" do
23
- @h.first.description.length.should be > 80
24
- end
25
-
26
- it "has a hostel number" do
27
- @h.first.hostel_id.to_i.should_not be nil
28
- end
29
-
30
- end
31
-
32
- describe "find hostels with dates" do
33
-
34
- before(:all) do
35
- @h = Hostelworld.find_hostels_by_location(:location => 'krakow,poland', :date => (Date.today + 10).to_s)
36
- end
37
-
38
- it "rating should be high for first choices" do
39
- @h.first.rating.to_i.should be > 50
40
- end
41
-
42
- it "desc should have a certain length <" do
43
- @h.first.description.length.should be > 90
44
- end
45
-
46
- it "has a hostel number" do
47
- @h.first.hostel_id.to_i.should_not be nil
48
- end
49
-
50
- it "has dorm rooms for greater than $5" do
51
- @h.first.dorm.to_i.should be > 5
52
- end
53
-
54
- it "has available rooms!" do
55
- @h.first.unavailable.first.should be nil
56
- end
57
-
58
- it "has unavailable rooms!" do
59
- @h.last.unavailable.first.should_not be nil
60
- end
61
-
62
- end