hostelify 0.5.4 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,3 @@
1
+ pkg/*
2
+ *.gem
3
+ .bundle
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in hostelify.gemspec
4
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,35 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ hostelify (0.6.0)
5
+ nibbler (>= 1.2.1)
6
+ nokogiri
7
+ rest-client (>= 1.6.1)
8
+
9
+ GEM
10
+ remote: http://rubygems.org/
11
+ specs:
12
+ diff-lcs (1.1.2)
13
+ mime-types (1.16)
14
+ nibbler (1.2.1)
15
+ nokogiri (1.4.4)
16
+ rest-client (1.6.1)
17
+ mime-types (>= 1.16)
18
+ rspec (2.5.0)
19
+ rspec-core (~> 2.5.0)
20
+ rspec-expectations (~> 2.5.0)
21
+ rspec-mocks (~> 2.5.0)
22
+ rspec-core (2.5.1)
23
+ rspec-expectations (2.5.0)
24
+ diff-lcs (~> 1.1.2)
25
+ rspec-mocks (2.5.0)
26
+
27
+ PLATFORMS
28
+ ruby
29
+
30
+ DEPENDENCIES
31
+ hostelify!
32
+ nibbler (>= 1.2.1)
33
+ nokogiri
34
+ rest-client (>= 1.6.1)
35
+ rspec
data/README.markdown CHANGED
@@ -1,11 +1,9 @@
1
- # Hostelify
1
+ # Hostelify
2
2
 
3
3
  >This a gem which creates an API for booking and pricing data for the following sites:
4
4
 
5
5
  * Hostelworld
6
- * Hostelbookers
6
+ * Hostelbookers (back soon, use < 0.6)
7
7
  * Gomio (removed too slow to speak of)
8
8
 
9
- Hostelworld's real API is commercial and it sucks to high heaven... it was written as an afterthought by a Rent-a-coder.
10
- Hostelbookers' is also commercial only, but is quite ok.
11
-
9
+ Web scrapping utility for publicly accessible data such as dates and prices.
data/Rakefile CHANGED
@@ -1,17 +1,2 @@
1
- require 'rubygems'
2
- require 'rake'
3
- require 'echoe'
4
- require 'hpricot'
5
- require 'mechanize'
6
-
7
- Echoe.new('hostelify', '0.5.4') do |p|
8
- p.description = "Simple Hostel Webscrapper."
9
- p.url = "http://github.com/holden/hostelify"
10
- p.author = "Holden Thomas"
11
- p.email = "holden.thomas@gmail.com"
12
- p.ignore_pattern = ["tmp/*", "script/*"]
13
- p.development_dependencies = []
14
- p.runtime_dependencies = ["hpricot", "mechanize"]
15
- end
16
-
17
- Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
data/hostelify.gemspec CHANGED
@@ -1,36 +1,26 @@
1
1
  # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "hostelify/version"
2
4
 
3
5
  Gem::Specification.new do |s|
4
- s.name = %q{hostelify}
5
- s.version = "0.5.4"
6
+ s.name = "hostelify"
7
+ s.version = Hostelify::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Holden Thomas"]
10
+ s.email = ["holden@wombie.com"]
11
+ s.homepage = "http://wombie.com"
12
+ s.summary = %q{API for hostel related sites}
13
+ s.description = %q{scrapper for publicly accessible data from hostelworld hostelbookers etc.}
14
+
15
+ s.add_dependency('rest-client', '>= 1.6.1')
16
+ s.add_dependency('nokogiri')
17
+ s.add_dependency('nibbler', '>= 1.2.1')
18
+ s.add_development_dependency "rspec"
6
19
 
7
- s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
- s.authors = ["Holden Thomas"]
9
- s.date = %q{2010-10-05}
10
- s.description = %q{Simple Hostel Webscrapper.}
11
- s.email = %q{holden.thomas@gmail.com}
12
- s.extra_rdoc_files = ["README.markdown", "lib/hostelify.rb", "lib/hostelify/gomio.rb", "lib/hostelify/hostel.rb", "lib/hostelify/hostelbookers.rb", "lib/hostelify/hostelify.rb", "lib/hostelify/hostelworld.rb", "lib/items.rb"]
13
- s.files = ["Manifest", "README.markdown", "Rakefile", "hostelify.gemspec", "lib/hostelify.rb", "lib/hostelify/gomio.rb", "lib/hostelify/hostel.rb", "lib/hostelify/hostelbookers.rb", "lib/hostelify/hostelify.rb", "lib/hostelify/hostelworld.rb", "lib/items.rb", "spec/_helper.rb", "spec/hb_find_by_hostel.spec", "spec/hb_find_hostels.spec", "spec/helper.rb", "spec/hw_find_by_hostel.spec", "spec/hw_find_hostels.spec"]
14
- s.homepage = %q{http://github.com/holden/hostelify}
15
- s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Hostelify", "--main", "README.markdown"]
16
- s.require_paths = ["lib"]
17
- s.rubyforge_project = %q{hostelify}
18
- s.rubygems_version = %q{1.3.6}
19
- s.summary = %q{Simple Hostel Webscrapper.}
20
-
21
- if s.respond_to? :specification_version then
22
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
23
- s.specification_version = 3
20
+ s.rubyforge_project = "hostelify"
24
21
 
25
- if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
26
- s.add_runtime_dependency(%q<hpricot>, [">= 0"])
27
- s.add_runtime_dependency(%q<mechanize>, [">= 0"])
28
- else
29
- s.add_dependency(%q<hpricot>, [">= 0"])
30
- s.add_dependency(%q<mechanize>, [">= 0"])
31
- end
32
- else
33
- s.add_dependency(%q<hpricot>, [">= 0"])
34
- s.add_dependency(%q<mechanize>, [">= 0"])
35
- end
22
+ s.files = `git ls-files`.split("\n")
23
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
24
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
25
+ s.require_paths = ["lib"]
36
26
  end
@@ -1,265 +1,18 @@
1
- class Hostelworld
2
-
3
- #constants
4
- #location list includes/indexjs.js
5
- HW_SINGULAR_DETAIL_URL = "http://www2.hostelworld.com/hosteldetails.php?HostelNumber="
6
- HW_SINGULAR_IMAGE_URL = "http://www2.hostelworld.com/hostelpictures.php?HostelNumber="
7
- HW_SINGULAR_AVAILABILITY = "http://www2.hostelworld.com/availability.php/"
8
- HW_SINGULAR_YOUTUBE_URL = "http://www2.hostelworld.com/youtubevideo.php?HostelNumber="
9
- HW_PLURAL_HOSTELS_URL = "http://www2.hostelworld.com/findabed.php/"
1
+ class HostelWorld < Nibbler
2
+ element 'h1' => :name, :with => lambda { |node| node.inner_text.lstrip.rstrip }
3
+ element '.street-city' => :address, :with => lambda { |node| node.inner_text.gsub(/\s{6,}.*/,'').chop.lstrip }
4
+ element 'div.row //p[5]' => :content
5
+ element 'div#directions p' => :directions, :with => lambda { |node| node.inner_text.gsub(/(DIRECTIONS):.*$/,'').lstrip }
6
+ elements '//script[14]' => :geo, :with => lambda { |node| node.inner_text.scan(/-{0,1}\d{1,3}\.\d{7}/).uniq! }
7
+ elements 'div.links ul.column li' => :features, :with => lambda { |node| node.inner_text.first }
8
+ elements '.cboxElement img //@src' => :photos
9
+ elements '.rating-values li' => :ratings
10
10
 
11
- #options
12
- @default_options = { :date => date=(Date.today+4).to_s, :no_days => "7", :no_ppl => "2" }
13
-
14
- def self.parse_html(url)
15
- f = open(url)
16
- f.rewind
17
- Retryable.try 3 do
18
- data = Hpricot(Iconv.conv('utf-8', f.charset, f.readlines.join("\n")))
19
- end
20
- end
21
-
22
- def self.find_hostel_by_id(options)
23
- opts = { :directions => false, :images => false, :all => false }.merge options
24
- id = options[:id].to_s
25
- url = HW_SINGULAR_DETAIL_URL + id
26
-
27
- #coder = HTMLEntities.new
28
- hostel = Hostelify.new
29
- hostel.hostel_id = id
30
-
31
- if options[:date]
32
- options = @default_options.merge(options)
33
- date = Date.strptime(options[:date])
34
- data = setSearch(url, options[:date], options[:no_ppl], options[:no_days])
35
- else
36
- data = parse_html(url)
37
- end
38
-
39
- unless data == "Full"
40
- data = data.search("//div[@id='content']")
41
- data.search("h3").remove #get rid of header
42
-
43
- #title, address, desc, facilities, ratings
44
- hostel.name = data.at("h2").inner_text.gsub(/( in ).*$/,'')
45
- hostel.address = data.at('div[@style="padding-top: 5px"]').inner_text.lstrip
46
-
47
- if options[:date]
48
- hostel.availability = parse_availables(data)
49
- else
50
- hostel.description = data.at('div[@id="microDescription2]').inner_text
51
- end
52
-
53
- #optional
54
- no_photos = data.at('div[@id="microPicScroll"]/span/a').inner_text.to_i
55
- #no_photos = data.at('span/a[@id="picLink"]').inner_text.to_i
56
- video = data.at('div[@id="microVideo"]')
57
-
58
- #facilities = []
59
- #(data/"li.microFacilitiesBoomLi").each do |item|
60
- # facilities << item.inner_text
61
- #end
62
-
63
- facilities = []
64
- index_count = 1
65
- (data/"ul.microFacilitiesBoomUl/li").each_with_index do |item,index|
66
- if item.attributes['class'] == "microFacilitiesBoomLiInner"
67
- facilities << "#{facilities[index-index_count]}: " + item.inner_text
68
- index_count += 1
69
- else
70
- index_count = 1
71
- facilities << item.inner_text
72
- end
73
- end
74
-
75
- ratings = []
76
- (data/'div[@id="ratingsBar2"]').each do |item|
77
- ratings << item.inner_text.to_i
78
- end
79
-
80
- hostel.facilities = facilities
81
- hostel.ratings = ratings
82
-
83
- if video #exists
84
- data = parse_html(HW_SINGULAR_YOUTUBE_URL + id)
85
- video_url = data.at('param[@name="movie"]')['value']
86
- hostel.video = video_url
87
- #video_url = data.at('tag')
88
- end
89
-
90
- if options[:directions] or options[:all]
91
- data = parse_html(HW_SINGULAR_DETAIL_URL + id + "/directions/")
92
-
93
- #directions, geo
94
- hostel.directions = data.at('div[@id="content"]').inner_text.gsub(/^[\d\D\n]*(DIRECTIONS)/,'')
95
- hostel.geo = data.to_s.scan(/-{0,1}\d{1,3}\.\d{7}/).uniq!
96
- end
97
-
98
- if no_photos and (options[:images] or options[:all])
99
- images = []
100
- (1..no_photos).each do |i|
101
- data = parse_html(HW_SINGULAR_IMAGE_URL + id + '&PicNO=' + i.to_s)
102
- images << (data/"img").first[:src].to_s
103
- end
104
- hostel.images = images
105
- end
106
- else
107
- hostel = nil
108
- end
109
- hostel # return
110
- end
111
-
112
- def self.find_hostels_by_location(options) #location
113
-
114
- city = options[:location].split(',').first.gsub(' ','')
115
- country = options[:location].split(',').last.gsub(' ','')
116
- url = HW_PLURAL_HOSTELS_URL + "ChosenCity.#{city}/ChosenCountry.#{country}"
117
-
118
- if options[:date]
119
- options = @default_options.merge(options)
120
- date = Date.strptime(options[:date])
121
- data = setSearch2(url, options[:date], options[:no_ppl], options[:no_days])
122
- else
123
- data = parse_html(url)
124
- end
125
-
126
- data = data.search("//div[@id='content']")
127
- @results = HostelifyCollection.new
128
-
129
- (data/"div.hostelListing").each do |row|
130
- name = row.at("h2").inner_text
131
- desc = row.at("div.hostelEntry/p").inner_text.to_s.chop.gsub('more info','').squeeze('.')
132
- url = row.at("h2/a")['href']
133
- rating = row.at("h4/text()")
134
- rating = rating.to_s.to_i unless rating.nil?
135
- genre = row.at("div.hostelListingImage/span").inner_text
136
- hostel_id = url.match(/[\d]*$/).to_s
137
-
138
- if options[:date]
139
- #price_USD = row.at("span.blueBeds").inner_text #need to fix float
140
- dorm = (row.at("p.hostelListingRate/span.blueBeds/text()")).to_s.gsub(/[A-Z$]*/,'')
141
- single = row.at("p.hostelListingPrivateRate/span.blueBeds/text()").to_s.gsub(/[A-Z$]*/,'')
142
- available = row/"ul.hostelListingDates/li.noAvail/text()"
143
- available = available.to_a.join(',').split(',')
144
- @results << Hostelify.new(:hostel_id => hostel_id, :name => name, :description => desc, :rating => rating, :dorm => dorm, :single => single, :unavailable => available)
145
- else
146
- @results << Hostelify.new(:hostel_id => hostel_id, :name => name, :description => desc, :rating => rating, :genre => genre)
147
- end
11
+ elements '//table[@class="beds-details"]//tr[position() > 1]' => :beds do
12
+ element 'td.left span' => :title, :with => lambda { |node| node.inner_text.lstrip.rstrip.squeeze(" ") }
13
+ elements 'td[@style="cursor: help"]' => :nights do
14
+ element '.currency' => :price, :with => lambda { |node| node.inner_text.to_f }
15
+ element '@title' => :spots, :with => lambda { |node| node.inner_text.match(/\d{1,}$/).to_s.to_i }
148
16
  end
149
- return @results
150
17
  end
151
-
152
- private
153
-
154
- def self.setSearch(url,date,no_ppl,no_days)
155
-
156
- date = Date.strptime(date)
157
- month = date.strftime("%m").to_i
158
- day = date.strftime("%d").to_i
159
- if Time.now.strftime("%y") == date.strftime("%y") then year = 0 else year = 1 end
160
-
161
- agent = Mechanize.new
162
- page = agent.get(url)
163
-
164
- #the form name
165
- #form = page.forms.first # => WWW::Mechanize::Form
166
- form = page.form_with(:name => 'theForm')
167
-
168
- #page = agent.submit(form)
169
-
170
- #form must be submitted twice because the people writing hostelworld are retards
171
- #form = page.forms.first # => WWW::Mechanize::Form
172
- #form = page.form_with(:name => 'theForm')
173
- form.field_with(:name => 'selMonth2').options[month-1].select
174
- form.field_with(:name => 'selDay2').options[day-1].select
175
- form.field_with(:name => 'selYear2').options[year].select
176
- #form.field_with(:name => { 0 => 'NumNights' }).options[no_days.to_i-1].select
177
- my_fields = form.fields.select {|f| f.name == "NumNights"}
178
- my_fields[1].value = no_days.to_i
179
- #form.my_fields[1].whatever = "value"
180
- #form.field_with(:name => 'Persons').options[no_ppl.to_i-1].select
181
- form.field_with(:name => 'Currency').options[4].select #US Currency
182
-
183
-
184
- Retryable.try 3 do
185
- page = agent.submit(form, form.button_with(:name => 'DateSelect'))
186
- end
187
-
188
- error = page.search("div.microBookingError2")
189
-
190
- if error.to_s.length > 1
191
- data = "Full"
192
- else
193
- data = page.search("//div[@id='content']")
194
- end
195
-
196
- return data
197
- end
198
-
199
- def self.setSearch2(url,date,no_ppl,no_days)
200
-
201
- date = Date.strptime(date)
202
- month = date.strftime("%m").to_i
203
- day = date.strftime("%d").to_i
204
- if Time.now.strftime("%y") == date.strftime("%y") then year = 0 else year = 1 end
205
-
206
- agent = WWW::Mechanize.new
207
- page = agent.get(url)
208
-
209
- #the form name
210
- #form = page.forms.first # => WWW::Mechanize::Form
211
- form = page.form_with(:name => 'theForm')
212
-
213
- #page = agent.submit(form)
214
-
215
- #form must be submitted twice because the people writing hostelworld are retards
216
-
217
- form.field_with(:name => 'selMonth').options[month-1].select
218
- form.field_with(:name => 'selDay').options[day-1].select
219
- form.field_with(:name => 'selYear').options[year].select
220
- form.field_with(:name => 'NumNights').options[no_days.to_i-1].select
221
- form.field_with(:name => 'Persons').options[no_ppl.to_i-1].select
222
- form.field_with(:name => 'Currency').options[4].select #US Currency
223
-
224
- Retryable.try 3 do
225
- page = agent.submit(form)
226
- end
227
- data = page.search("//div[@id='content']")
228
- return data
229
- end
230
-
231
- def self.parse_availables(info)
232
-
233
- availability = info.at('table[@id="tableDatesSelected2"]')
234
- availability.search("div").remove
235
- availability.search("span.hwRoomTypeDesc").remove
236
-
237
- availables = []
238
-
239
- (availability/"tr").each do |row|
240
- name = (row/"td").first
241
- name = name.inner_text unless name.nil?
242
-
243
- (row/"td").each do |td|
244
- night = td.attributes['title']
245
- if night
246
- price = night.to_s.match(/([\d]{1,3}).([\d]{2})/).to_s
247
- available = night.to_s.match(/(available: )([\d]*)/)
248
- date = night.to_s.match(/(Date: ).*$/).to_s.gsub(/(Date: )|(th)|(nd)|(rd)|(st)/,'')
249
- date = Date.strptime(date, "%a %d %b '%y")
250
-
251
- if available
252
- beds = available.to_s.match(/[\d]{1,2}/)[0]
253
- availables << HostelifyAvailable.new(name,price,beds,date)
254
- else
255
- availables << HostelifyAvailable.new(name,price,0,date)
256
- end
257
- end
258
- end
259
- end
260
- return availables
261
-
262
- end
263
-
264
-
265
18
  end
@@ -0,0 +1,3 @@
1
+ module Hostelify
2
+ VERSION = "0.6.0"
3
+ end
data/lib/hostelify.rb CHANGED
@@ -1,10 +1,29 @@
1
1
  require 'rubygems'
2
- require 'mechanize'
3
- require 'hpricot'
4
- require 'open-uri'
5
2
  require 'date'
6
- require 'htmlentities'
7
- require 'iconv'
8
- require 'hostelify/hostelify'
3
+ require 'rest_client'
4
+ require 'nibbler'
9
5
  require 'hostelify/hostelworld'
10
- require 'hostelify/hostelbookers'
6
+
7
+ module Hostelify
8
+
9
+ def self.find(*args, &block)
10
+ options = { :currency => 'EUR', :date_from => Date.today+14, :directions => false }
11
+ options.merge!(args.pop) if args.last.kind_of? Hash
12
+
13
+ RestClient.head 'http://www.hostelworld.com/hosteldetails.php/' + options[:id].to_s do |response, request, result|
14
+ redirect = response.headers[:location]
15
+ redirect = redirect + '/directions' if options[:directions]
16
+ date = Date.strptime(options[:date_from].to_s)
17
+
18
+ @resource = RestClient.post redirect,
19
+ :date_from => date+1,
20
+ :date_to => date+8,
21
+ :searchperformedflag => 1,
22
+ :currency => options[:currency]
23
+ end
24
+
25
+ result = HostelWorld.parse @resource
26
+ end
27
+
28
+
29
+ end
@@ -0,0 +1,22 @@
1
+ require "spec_helper"
2
+
3
+ describe "hostelworld" do
4
+
5
+ before(:all) do
6
+ @hostel = Hostelify.find(:id => 20763, :date_from => Date.today+14)
7
+ #@hostel = Hostelify.find(:id => 7113)
8
+ end
9
+
10
+ it "should return a name containing the word hostel" do
11
+ @hostel.name.should match(/^.*(Hostel).*$/)
12
+ end
13
+
14
+ it "should have a description of at least 300 chars" do
15
+ @hostel.content.should_not be nil
16
+ end
17
+
18
+ it "should have at least 4 images" do
19
+ @hostel.should have_at_least(6).photos
20
+ end
21
+
22
+ end
@@ -0,0 +1,4 @@
1
+ require 'rubygems'
2
+ require 'bundler/setup'
3
+
4
+ Bundler.require(:default)
metadata CHANGED
@@ -1,12 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hostelify
3
3
  version: !ruby/object:Gem::Version
4
+ hash: 7
4
5
  prerelease: false
5
6
  segments:
6
7
  - 0
7
- - 5
8
- - 4
9
- version: 0.5.4
8
+ - 6
9
+ - 0
10
+ version: 0.6.0
10
11
  platform: ruby
11
12
  authors:
12
13
  - Holden Thomas
@@ -14,101 +15,124 @@ autorequire:
14
15
  bindir: bin
15
16
  cert_chain: []
16
17
 
17
- date: 2010-10-05 00:00:00 +02:00
18
+ date: 2011-03-31 00:00:00 +02:00
18
19
  default_executable:
19
20
  dependencies:
20
21
  - !ruby/object:Gem::Dependency
21
- name: hpricot
22
+ name: rest-client
22
23
  prerelease: false
23
24
  requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
24
26
  requirements:
25
27
  - - ">="
26
28
  - !ruby/object:Gem::Version
29
+ hash: 13
27
30
  segments:
28
- - 0
29
- version: "0"
31
+ - 1
32
+ - 6
33
+ - 1
34
+ version: 1.6.1
30
35
  type: :runtime
31
36
  version_requirements: *id001
32
37
  - !ruby/object:Gem::Dependency
33
- name: mechanize
38
+ name: nokogiri
34
39
  prerelease: false
35
40
  requirement: &id002 !ruby/object:Gem::Requirement
41
+ none: false
36
42
  requirements:
37
43
  - - ">="
38
44
  - !ruby/object:Gem::Version
45
+ hash: 3
39
46
  segments:
40
47
  - 0
41
48
  version: "0"
42
49
  type: :runtime
43
50
  version_requirements: *id002
44
- description: Simple Hostel Webscrapper.
45
- email: holden.thomas@gmail.com
51
+ - !ruby/object:Gem::Dependency
52
+ name: nibbler
53
+ prerelease: false
54
+ requirement: &id003 !ruby/object:Gem::Requirement
55
+ none: false
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ hash: 29
60
+ segments:
61
+ - 1
62
+ - 2
63
+ - 1
64
+ version: 1.2.1
65
+ type: :runtime
66
+ version_requirements: *id003
67
+ - !ruby/object:Gem::Dependency
68
+ name: rspec
69
+ prerelease: false
70
+ requirement: &id004 !ruby/object:Gem::Requirement
71
+ none: false
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ hash: 3
76
+ segments:
77
+ - 0
78
+ version: "0"
79
+ type: :development
80
+ version_requirements: *id004
81
+ description: scrapper for publicly accessible data from hostelworld hostelbookers etc.
82
+ email:
83
+ - holden@wombie.com
46
84
  executables: []
47
85
 
48
86
  extensions: []
49
87
 
50
- extra_rdoc_files:
51
- - README.markdown
52
- - lib/hostelify.rb
53
- - lib/hostelify/gomio.rb
54
- - lib/hostelify/hostel.rb
55
- - lib/hostelify/hostelbookers.rb
56
- - lib/hostelify/hostelify.rb
57
- - lib/hostelify/hostelworld.rb
58
- - lib/items.rb
88
+ extra_rdoc_files: []
89
+
59
90
  files:
60
- - Manifest
91
+ - .gitignore
92
+ - Gemfile
93
+ - Gemfile.lock
61
94
  - README.markdown
62
95
  - Rakefile
63
96
  - hostelify.gemspec
64
97
  - lib/hostelify.rb
65
- - lib/hostelify/gomio.rb
66
- - lib/hostelify/hostel.rb
67
- - lib/hostelify/hostelbookers.rb
68
- - lib/hostelify/hostelify.rb
69
98
  - lib/hostelify/hostelworld.rb
70
- - lib/items.rb
71
- - spec/_helper.rb
72
- - spec/hb_find_by_hostel.spec
73
- - spec/hb_find_hostels.spec
74
- - spec/helper.rb
75
- - spec/hw_find_by_hostel.spec
76
- - spec/hw_find_hostels.spec
99
+ - lib/hostelify/version.rb
100
+ - spec/hostelworld_spec.rb
101
+ - spec/spec_helper.rb
77
102
  has_rdoc: true
78
- homepage: http://github.com/holden/hostelify
103
+ homepage: http://wombie.com
79
104
  licenses: []
80
105
 
81
106
  post_install_message:
82
- rdoc_options:
83
- - --line-numbers
84
- - --inline-source
85
- - --title
86
- - Hostelify
87
- - --main
88
- - README.markdown
107
+ rdoc_options: []
108
+
89
109
  require_paths:
90
110
  - lib
91
111
  required_ruby_version: !ruby/object:Gem::Requirement
112
+ none: false
92
113
  requirements:
93
114
  - - ">="
94
115
  - !ruby/object:Gem::Version
116
+ hash: 3
95
117
  segments:
96
118
  - 0
97
119
  version: "0"
98
120
  required_rubygems_version: !ruby/object:Gem::Requirement
121
+ none: false
99
122
  requirements:
100
123
  - - ">="
101
124
  - !ruby/object:Gem::Version
125
+ hash: 3
102
126
  segments:
103
- - 1
104
- - 2
105
- version: "1.2"
127
+ - 0
128
+ version: "0"
106
129
  requirements: []
107
130
 
108
131
  rubyforge_project: hostelify
109
- rubygems_version: 1.3.6
132
+ rubygems_version: 1.3.7
110
133
  signing_key:
111
134
  specification_version: 3
112
- summary: Simple Hostel Webscrapper.
113
- test_files: []
114
-
135
+ summary: API for hostel related sites
136
+ test_files:
137
+ - spec/hostelworld_spec.rb
138
+ - spec/spec_helper.rb
data/Manifest DELETED
@@ -1,17 +0,0 @@
1
- Manifest
2
- README.markdown
3
- Rakefile
4
- hostelify.gemspec
5
- lib/hostelify.rb
6
- lib/hostelify/gomio.rb
7
- lib/hostelify/hostel.rb
8
- lib/hostelify/hostelbookers.rb
9
- lib/hostelify/hostelify.rb
10
- lib/hostelify/hostelworld.rb
11
- lib/items.rb
12
- spec/_helper.rb
13
- spec/hb_find_by_hostel.spec
14
- spec/hb_find_hostels.spec
15
- spec/helper.rb
16
- spec/hw_find_by_hostel.spec
17
- spec/hw_find_hostels.spec
@@ -1,104 +0,0 @@
1
- #gave up, gomio sucks
2
-
3
- require 'rubygems'
4
- require 'mechanize'
5
- require 'hpricot'
6
- require 'open-uri'
7
- require 'date'
8
- require 'htmlentities'
9
-
10
- Hpricot.buffer_size = 262144
11
-
12
- class Gomio
13
-
14
- #constants
15
- GOMIO_SINGULAR = "http://www.gomio.com/reservation/chooseBed.aspx?HostelId="
16
- GOMIO_PLURAL_HOSTELS_URL = "http://www.gomio.com/hostels/europe/" #europe/poland/krakow/mama%27s%20hostel/overview.htm
17
-
18
- def self.find_hostel_by_id(options)
19
- #opts = { :directions => false, :images => false, :all => false }.merge options
20
- city = options[:location].split(',').first.gsub(' ','')
21
- country = options[:location].split(',').last.gsub(' ','')
22
- id = options[:id]
23
- url = GOMIO_PLURAL_HOSTELS_URL + "#{country}/#{city}/#{id}/overview.htm"
24
-
25
- #setSearch(url,"2009-09-20",2,7)
26
- data = Hpricot(open(url))
27
-
28
- data = data.search('div[@id="main"]')
29
- name = data.at("h3/span").inner_text.lstrip.rstrip
30
- address = data.at("span.br_address").inner_text.lstrip.rstrip
31
- desc = data.at("div.br_detail").inner_text.lstrip.rstrip
32
- available1 = data.at("td.HasNoAvail_Outer")
33
- available2 = data.at("td.HasNoAvail")
34
- puts "#{available1}, #{available2}"
35
- end
36
-
37
- def self.find_hostels_by_location(options) #location
38
- city = options[:location].split(',').first.gsub(' ','')
39
- country = options[:location].split(',').last.gsub(' ','')
40
-
41
- url = GOMIO_PLURAL_HOSTELS_URL + "#{country}/#{city}/search.htm"
42
-
43
- #data = Hpricot(open(url))
44
- data = setSearch(url,"2009-09-09",2,5)
45
-
46
- data = data.search("div.SearchResultMembers")
47
-
48
- (data/"div.SearchResultsHostel").each do |row|
49
- name = row.at("h3").inner_text.lstrip.rstrip
50
- desc = row.at("p").inner_text.lstrip.rstrip
51
- url = row.at("h3/a")['href']
52
- gomio_id = url.match(/(#{city}).([\d\D]*)(\/overview.htm)/)[2]
53
-
54
- available1 = row/("td.HasNoAvail_Outer/text()").to_a.join(',').split(',')
55
- available2 = row/("td.HasNoAvail_Outer/text()").to_a.join(',').split(',')
56
- available = available1 + available2
57
-
58
- @results = { :gomio_id => gomio_id, :name => name, :unavailable => available }
59
- puts @results
60
- end
61
- return @results
62
- end
63
-
64
- def self.setSearch(url,date,no_ppl,no_days)
65
-
66
- date = Date.strptime(date)
67
- month = date.strftime("%m").to_i
68
- day = date.strftime("%d").to_i
69
- if Time.now.strftime("%y") == date.strftime("%y") then year = 0 else year = 1 end
70
-
71
- agent = WWW::Mechanize.new
72
- page = agent.get(url)
73
-
74
- #the form name
75
- form = page.forms.first # => WWW::Mechanize::Form
76
- #page = agent.submit(form)
77
-
78
- #ctl00_searchbox_sb_ddlMonth
79
- #ctl00_searchbox_sb_ddlDay
80
- #ctl00_searchbox_sb_ddlYear
81
-
82
- #ctl00_searchbox_sb_ddlNights
83
- #ctl00_searchbox_sb_ddlBeds
84
-
85
- form.field_with(:name => 'ctl00$searchbox$sb$ddlMonth').options[month-1].select
86
- form.field_with(:name => 'ctl00$searchbox$sb$ddlDay').options[day-1].select
87
- form.field_with(:name => 'ctl00$searchbox$sb$ddlYear').options[year].select
88
- form.field_with(:name => 'ctl00$searchbox$sb$ddlNights').options[no_days.to_i-1].select
89
- form.field_with(:name => 'ctl00$searchbox$sb$ddlBeds').options[no_ppl.to_i-1].select
90
- #form.field_with(:id => 'Currency').options[4].select #US Currency
91
-
92
- page = agent.submit(form)
93
- data = page.search('//div[@id="main"]')
94
- #puts data
95
- return data
96
- end
97
-
98
- #url = GOMIO_PLURAL_HOSTELS_URL + "poland/krakow/search.htm"
99
-
100
- #Gomio.setSearch(url,"2009-09-20",2,7)
101
- Gomio.find_hostels_by_location(:location => "krakow,poland")
102
- #Gomio.find_hostel_by_id(:id => "mama's%20hostel", :location => "krakow,poland")
103
-
104
- end
@@ -1,63 +0,0 @@
1
- #delete
2
-
3
- class Hostelify
4
- attr_accessor :hostel_id, :name, :address, :description, :facilities, :ratings, :directions, :geo, :images, :video, :availability
5
- attr_accessor :rating, :dorm, :single, :unavailable, :genre
6
-
7
- def initialize(options = {})
8
- options.each {
9
- |k,v|
10
- self.send( "#{k.to_s}=".intern, v)
11
- }
12
- end
13
-
14
- end
15
-
16
- class HostelifyCollection < Array
17
- # This collection does everything an Array does, plus
18
- # you can add utility methods like names.
19
-
20
- def ids
21
- collect do |i|
22
- i.hostel_id
23
- end
24
- end
25
-
26
- def names
27
- collect do |i|
28
- i.name
29
- end
30
- end
31
-
32
- def descs
33
- collect do |i|
34
- i.description
35
- end
36
- end
37
-
38
- end
39
-
40
- class HostelifyAvailable
41
- attr_accessor :name, :price, :spots, :bookdate
42
-
43
- def initialize(name, price, spots, bookdate)
44
- @name = name
45
- @price = price
46
- @spots = spots
47
- @bookdate = bookdate
48
- end
49
-
50
- end
51
-
52
- module Retryable
53
- extend self
54
-
55
- def try times = 1, options = {}, &block
56
- val = yield
57
- rescue options[:on] || Exception
58
- retry if (times -= 1) > 0
59
- else
60
- val
61
- end
62
- end
63
-
@@ -1,118 +0,0 @@
1
- class Hostelbookers
2
-
3
- HB_SINGULAR_URL = "http://www.hostelbookers.com/property/index.cfm?fuseaction=accommodation.search&straccommodationtype=hostels&fromPropertyNameSearch=0"
4
- HB_PLURAL_URL = "http://www.hostelbookers.com/results/index.cfm?straccommodationtype=hostels&strSearchType=freeText&fuseaction=accommodation.search"
5
-
6
- @default_options = { :date => (Date.today+4).to_s, :no_days => "7", :live => true }
7
-
8
- def self.find_hostels_by_location(options)
9
- options = @default_options.merge(options)
10
- date = Date.strptime(options[:date])
11
- city = options[:location].split(',').first.rstrip.lstrip.gsub(' ','-').squeeze("-")
12
- country = options[:location].split(',').last.rstrip.lstrip.gsub(' ','-').squeeze("-")
13
-
14
- url = HB_PLURAL_URL + "&strkeywords=#{city},+#{country}&dtearrival=#{date.strftime('%d/%m/%Y')}"
15
-
16
- #Retryable.try 3 do
17
- data = Hpricot(open(url))
18
- #end
19
-
20
- data = data.search("//div[@id='propertyResultsList']")
21
-
22
- @results = HostelifyCollection.new
23
-
24
- (data/"tr.propertyRow").each do |row|
25
- name = row.at("a.propertyTitle").inner_text
26
- url = row.at("a.propertyTitle")['href']
27
- desc = row.at("p.shortDescription").inner_text
28
- rating = row.at("td.rating/text()")
29
- rating = rating.to_s.to_i unless rating.nil?
30
- dorm = row.at("td.shared/text()")
31
- single = row.at("td.private/text()")
32
- hb_id = url.match(/[\d]{2,5}.$/).to_s.to_i
33
-
34
- @results << Hostelify.new(:hostel_id => hb_id, :name => name, :description => desc, :rating => rating, :dorm => dorm, :single => single)
35
- end
36
- return @results
37
- end
38
-
39
- def self.find_hostel_by_id(options)
40
- options = @default_options.merge(options)
41
- date = Date.strptime(options[:date])
42
- hostel = Hostelify.new
43
-
44
- url = HB_SINGULAR_URL + "&intnights=#{options[:no_days]}&intpeople=1&dtearrival=#{date.strftime('%d/%m/%Y')}&intpropertyid=#{options[:id]}"
45
-
46
- data = Hpricot(open(url))
47
-
48
- hostel.hostel_id = options[:id]
49
- hostel.name = data.at("h1").inner_text
50
- hostel.address = data.at("p.address").inner_text
51
- hostel.description = data.at('div[@id="overviewPane"]').inner_text
52
- facilities_td = data.at("table.tableFacilities")
53
-
54
- facilities = []
55
- (facilities_td/"td").each do |row|
56
- facilities << row.inner_text
57
- end
58
- hostel.facilities = facilities
59
- extras = []
60
- extras_td = data.at("table.tableFeatures")
61
- (extras_td/"td.name").each do |row|
62
- extras << "Free " + row.inner_text.to_s
63
- end
64
- facilities = facilities + extras
65
-
66
- ratings = []
67
- ratings_td = data.at('div[@id="overviewIndRtng"]')
68
-
69
-
70
- (ratings_td/"dd").each do |row|
71
- #ratings << row.at("td").inner_text.to_s.to_f
72
- ratings << row.at('div[@class="ratingPercentage"]').inner_text.to_s.to_f
73
- #puts row.at('div[@class="ratingPercentage"]')
74
- end
75
-
76
- hostel.ratings = ratings
77
- images = []
78
- image = data.at('div[@id="propMedia"]/table')
79
- (image/"td").each do |row|
80
- img = row.at("img")['onclick']
81
- if img =~ /(http).*(jpg|gif|png|jpeg)/
82
- images << img.match(/(http).*(jpg|gif|png|jpeg)/)[0]
83
- else
84
- #add youtube?
85
- end
86
- end
87
- hostel.images = images
88
-
89
- if options[:all]
90
- data = Hpricot(open(url + "&strTab=map"))
91
- data.search("h2").remove #get rid of header
92
- hostel.directions = data.at('div[@id="gpsMap"]').inner_text
93
- hostel.geo = data.to_s.scan(/-{0,1}\d{1,3}\.\d{7}/).uniq!
94
- end
95
-
96
- @availables = []
97
- available = data.at("div.tableAvailability/table")
98
- if available
99
- (available/"tr").each do |row|
100
- name = row.at("td.roomType/label/text()")
101
- people = row.at("td.people/select")
102
- people = people.at("option:last-child").inner_text unless people.nil?
103
- price = row.at("td.price")
104
- price = price.inner_text.to_s.match(/[\d.]{1,5}/)[0] unless price.nil?
105
- (0..(options[:no_days].to_i-1)).each do |x|
106
- @availables << HostelifyAvailable.new(name,price,people,(date+x).to_s) unless price.nil?
107
- end
108
- end
109
-
110
- hostel.availability = @availables
111
- end
112
-
113
- return hostel
114
-
115
-
116
- end
117
-
118
- end
@@ -1,61 +0,0 @@
1
- class Hostelify
2
- attr_accessor :hostel_id, :name, :address, :description, :facilities, :ratings, :directions, :geo, :images, :video, :availability
3
- attr_accessor :rating, :dorm, :single, :unavailable, :genre
4
-
5
- def initialize(options = {})
6
- options.each {
7
- |k,v|
8
- self.send( "#{k.to_s}=".intern, v)
9
- }
10
- end
11
-
12
- end
13
-
14
- class HostelifyCollection < Array
15
- # This collection does everything an Array does, plus
16
- # you can add utility methods like names.
17
-
18
- def ids
19
- collect do |i|
20
- i.hostel_id
21
- end
22
- end
23
-
24
- def names
25
- collect do |i|
26
- i.name
27
- end
28
- end
29
-
30
- def descs
31
- collect do |i|
32
- i.description
33
- end
34
- end
35
-
36
- end
37
-
38
- class HostelifyAvailable
39
- attr_accessor :name, :price, :spots, :bookdate
40
-
41
- def initialize(name, price, spots, bookdate)
42
- @name = name
43
- @price = price
44
- @spots = spots
45
- @bookdate = bookdate
46
- end
47
-
48
- end
49
-
50
- module Retryable
51
- extend self
52
-
53
- def try times = 1, options = {}, &block
54
- val = yield
55
- rescue options[:on] || Exception
56
- retry if (times -= 1) > 0
57
- else
58
- val
59
- end
60
- end
61
-
data/lib/items.rb DELETED
@@ -1,23 +0,0 @@
1
- class Items
2
- @items = []
3
- class << self
4
- attr_accessor :items
5
- end
6
- attr_accessor :name, :description
7
- #def self.each(&args)
8
- # @items.each(&args)
9
- #end
10
- def initialize(name, description)
11
- @name, @description = name, description
12
- Items.items << self
13
- end
14
- def each(&block)
15
- yield name
16
- yield description
17
- end
18
-
19
- def self.names
20
- puts "hello"
21
- end
22
-
23
- end
data/spec/_helper.rb DELETED
@@ -1,4 +0,0 @@
1
- $LOAD_PATH.unshift(File.dirname(__FILE__))
2
- $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
-
4
- require 'hostelify'
@@ -1,73 +0,0 @@
1
- require 'spec/_helper'
2
-
3
- describe "should find individual hostel and get object with name etc." do
4
-
5
- before(:all) do
6
- #@h = Hostelbookers.find_hostel_by_id(:location => "krakow,poland", :id => 9330)
7
- #@h = Hostelbookers.find_hostel_by_id(:location => "lviv,ukraine", :id => 19606)
8
- @h = Hostelbookers.find_hostel_by_id(:id => 19606)
9
- end
10
-
11
- it "should query hostelbookers and return the correct name" do
12
- @h.name.should match(/^.*(Hostel).*$/)
13
- end
14
-
15
- it "address" do
16
- @h.address.should match(/^.*(Krakow|Lviv).*$/)
17
- end
18
-
19
- it "description" do
20
- @h.address.should_not be nil
21
- end
22
-
23
- it "facilities" do
24
- @h.should have_at_least(8).facilities
25
- end
26
-
27
- it "ratings" do
28
- @h.should have(8).ratings
29
- end
30
-
31
- it "images at least 6" do
32
- @h.should have_at_least(6).images
33
- end
34
-
35
- end
36
-
37
- describe "all options" do
38
- before(:all) do
39
- @h = Hostelbookers.find_hostel_by_id(:id => 9330, :all => true)
40
- end
41
-
42
- it "directions should have a certain length <" do
43
- @h.directions.length.should be > 25
44
- end
45
-
46
- it "geo" do
47
- @h.geo[0].to_f.round.should eql 50
48
- end
49
- end
50
-
51
- describe "with dates to get availabilty and verify output!" do
52
- before(:all) do
53
- @h = Hostelbookers.find_hostel_by_id(:id => 19831, :date => (Date.today+10).to_s)
54
- end
55
-
56
- it "get first availability and check it merit" do
57
- @h.availability.first.name =~ /bed/
58
- end
59
-
60
- it "check number of avail beds" do
61
- @h.availability.first.spots.to_i.should be >= 1
62
- end
63
-
64
- it "last avail has a price > 5 US" do
65
- @h.availability.last.price.to_i.should be > 5
66
- end
67
-
68
- it "book date eq today + 10" do
69
- @h.availability.last.bookdate.should_not be nil
70
- end
71
-
72
-
73
- end
@@ -1,30 +0,0 @@
1
- require 'spec/_helper'
2
-
3
- describe "test hostelbookers hostel listings" do
4
-
5
- before(:all) do
6
- @h = Hostelbookers.find_hostels_by_location(:location => "krakow,poland")
7
- end
8
-
9
- it "should return a list of names" do
10
- names = []
11
- @h.each do |e|
12
- names << e.name
13
- end
14
- names.should include("Flamingo Hostel")
15
- names.should include("Mama's Hostel- Main Market Square")
16
- end
17
-
18
- it "rating should be high for first choices" do
19
- @h.first.rating.to_i.should be > 50
20
- end
21
-
22
- it "desc should have a certain length <" do
23
- @h.first.description.length.should be > 100
24
- end
25
-
26
- it "has a hostel number" do
27
- @h.first.hostel_id.to_i.should_not be nil
28
- end
29
-
30
- end
data/spec/helper.rb DELETED
@@ -1,7 +0,0 @@
1
- require 'rubygems'
2
- require 'spec'
3
-
4
- $LOAD_PATH.unshift(File.dirname(__FILE__))
5
- $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
-
7
- require 'hostel'
@@ -1,85 +0,0 @@
1
- require 'spec/_helper'
2
-
3
- describe "find hostel by id, no options" do
4
-
5
- before(:all) do
6
- @h = Hostelworld.find_hostel_by_id(:id => 7113)
7
- @h = Hostelworld.find_hostel_by_id(:id => 20763)
8
- end
9
-
10
- it "should query hostelworld and return the correct name" do
11
- @h.name.should match(/^.*(Hostel).*$/)
12
- end
13
-
14
- it "address" do
15
- @h.address.should match(/^.*(Krakow|Lviv).*$/)
16
- end
17
-
18
- it "description" do
19
- @h.address.should_not be nil
20
- end
21
-
22
- it "facilities" do
23
- @h.should have_at_least(15).facilities
24
- end
25
-
26
- it "ratings" do
27
- @h.should have(6).ratings
28
- end
29
-
30
- end
31
-
32
- describe "youtube" do
33
-
34
- before(:all) do
35
- @h3 = Hostelworld.find_hostel_by_id(:id => 7113)
36
- end
37
-
38
- #it "video" do
39
- # @h3.video.should match(/^.*(youtube.com).*$/)
40
- #end
41
- end
42
-
43
- describe "find hostel with all options" do
44
- before(:all) do
45
- @h2 = Hostelworld.find_hostel_by_id(:id => 7113, :all => true)
46
- @h2 = Hostelworld.find_hostel_by_id(:id => 20763, :all => true)
47
- end
48
-
49
- it "geo" do
50
- @h2.geo[0].to_f.round.should eql 50
51
- end
52
-
53
- it "directions" do
54
- @h2.directions.should_not be nil
55
- end
56
-
57
- it "images at least 6" do
58
- @h2.should have_at_least(6).images
59
- end
60
- end
61
-
62
- describe "with dates to get availabilty and verify output!" do
63
- before(:all) do
64
- #@h = Hostelworld.find_hostel_by_id(:id => 20763, :date => (Date.today+20).to_s)
65
- @h = Hostelworld.find_hostel_by_id(:id => 7113, :date => (Date.today+20).to_s)
66
- end
67
-
68
- it "get first availability and check it merit" do
69
- @h.availability.first.name =~ /bed/
70
- end
71
-
72
- it "check number of avail beds" do
73
- @h.availability.last.spots.to_i.should be >= 1
74
- end
75
-
76
- it "last avail has a price > 5 US" do
77
- @h.availability.last.price.to_i.should be > 5
78
- end
79
-
80
- it "book date eq today + 10" do
81
- @h.availability.last.bookdate.should_not be nil
82
- end
83
-
84
-
85
- end
@@ -1,62 +0,0 @@
1
- require 'spec/_helper'
2
-
3
- describe "finds list of hostels" do
4
-
5
- before(:all) do
6
- @h = Hostelworld.find_hostels_by_location(:location => 'krakow,poland')
7
- end
8
-
9
- it "should get a list with name and brief desc" do
10
- names = []
11
- @h.each do |e|
12
- names << e.name
13
- end
14
- names.should include("Mama's Hostel Main Market Square")
15
- end
16
-
17
-
18
- it "rating should be high for first choices" do
19
- @h.first.rating.to_i.should be > 50
20
- end
21
-
22
- it "desc should have a certain length <" do
23
- @h.first.description.length.should be > 80
24
- end
25
-
26
- it "has a hostel number" do
27
- @h.first.hostel_id.to_i.should_not be nil
28
- end
29
-
30
- end
31
-
32
- describe "find hostels with dates" do
33
-
34
- before(:all) do
35
- @h = Hostelworld.find_hostels_by_location(:location => 'krakow,poland', :date => (Date.today + 10).to_s)
36
- end
37
-
38
- it "rating should be high for first choices" do
39
- @h.first.rating.to_i.should be > 50
40
- end
41
-
42
- it "desc should have a certain length <" do
43
- @h.first.description.length.should be > 90
44
- end
45
-
46
- it "has a hostel number" do
47
- @h.first.hostel_id.to_i.should_not be nil
48
- end
49
-
50
- it "has dorm rooms for greater than $5" do
51
- @h.first.dorm.to_i.should be > 5
52
- end
53
-
54
- it "has available rooms!" do
55
- @h.first.unavailable.first.should be nil
56
- end
57
-
58
- it "has unavailable rooms!" do
59
- @h.last.unavailable.first.should_not be nil
60
- end
61
-
62
- end