hostelify 0.3.7

Sign up to get free protection for your applications and to get access to all the features.
data/lib/hostelify.rb ADDED
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'mechanize'
3
+ require 'hpricot'
4
+ require 'open-uri'
5
+ require 'date'
6
+ require 'htmlentities'
7
+ require 'iconv'
8
+ require 'hostelify/hostelify'
9
+ require 'hostelify/hostelworld'
10
+ require 'hostelify/hostelbookers'
@@ -0,0 +1,30 @@
1
+ Manifest
2
+ README.rdoc
3
+ Rakefile
4
+ hostelify.gemspec
5
+ lib/hostelify.rb
6
+ lib/hostelify/gomio.rb
7
+ lib/hostelify/hostelbookers.rb
8
+ lib/hostelify/hostelify.rb
9
+ lib/hostelify/hostelworld.rb
10
+ pkg/hostelify-0.3.6.gem
11
+ pkg/hostelify-0.3.6.tar.gz
12
+ pkg/hostelify-0.3.6/Manifest
13
+ pkg/hostelify-0.3.6/README.rdoc
14
+ pkg/hostelify-0.3.6/Rakefile
15
+ pkg/hostelify-0.3.6/hostelify.gemspec
16
+ pkg/hostelify-0.3.6/lib/hostelify.rb
17
+ pkg/hostelify-0.3.6/lib/hostelify/gomio.rb
18
+ pkg/hostelify-0.3.6/lib/hostelify/hostelbookers.rb
19
+ pkg/hostelify-0.3.6/lib/hostelify/hostelify.rb
20
+ pkg/hostelify-0.3.6/lib/hostelify/hostelworld.rb
21
+ pkg/hostelify-0.3.6/spec/_helper.rb
22
+ pkg/hostelify-0.3.6/spec/hb_find_by_hostel.spec
23
+ pkg/hostelify-0.3.6/spec/hb_find_hostels.spec
24
+ pkg/hostelify-0.3.6/spec/hw_find_by_hostel.spec
25
+ pkg/hostelify-0.3.6/spec/hw_find_hostels.spec
26
+ spec/_helper.rb
27
+ spec/hb_find_by_hostel.spec
28
+ spec/hb_find_hostels.spec
29
+ spec/hw_find_by_hostel.spec
30
+ spec/hw_find_hostels.spec
@@ -0,0 +1 @@
1
+ Nothin yet... still...
@@ -0,0 +1,14 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'echoe'
4
+
5
+ Echoe.new('hostelify', '0.3.7') do |p|
6
+ p.description = "Simple Hostel Webscrapper."
7
+ p.url = "http://github.com/holden/hostelify"
8
+ p.author = "Holden Thomas"
9
+ p.email = "holden.thomas@gmail.com"
10
+ p.ignore_pattern = ["tmp/*", "script/*"]
11
+ p.development_dependencies = []
12
+ end
13
+
14
+ Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
@@ -0,0 +1,31 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{hostelify}
5
+ s.version = "0.3.6"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Holden Thomas"]
9
+ s.date = %q{2009-09-23}
10
+ s.description = %q{Simple Hostel Webscrapper.}
11
+ s.email = %q{holden.thomas@gmail.com}
12
+ s.extra_rdoc_files = ["README.rdoc", "lib/hostelify.rb", "lib/hostelify/gomio.rb", "lib/hostelify/hostelbookers.rb", "lib/hostelify/hostelify.rb", "lib/hostelify/hostelworld.rb"]
13
+ s.files = ["Manifest", "README.rdoc", "Rakefile", "hostelify.gemspec", "lib/hostelify.rb", "lib/hostelify/gomio.rb", "lib/hostelify/hostelbookers.rb", "lib/hostelify/hostelify.rb", "lib/hostelify/hostelworld.rb", "spec/_helper.rb", "spec/hb_find_by_hostel.spec", "spec/hb_find_hostels.spec", "spec/hw_find_by_hostel.spec", "spec/hw_find_hostels.spec"]
14
+ s.has_rdoc = true
15
+ s.homepage = %q{http://github.com/holden/hostelify}
16
+ s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Hostelify", "--main", "README.rdoc"]
17
+ s.require_paths = ["lib"]
18
+ s.rubyforge_project = %q{hostelify}
19
+ s.rubygems_version = %q{1.3.1}
20
+ s.summary = %q{Simple Hostel Webscrapper.}
21
+
22
+ if s.respond_to? :specification_version then
23
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
24
+ s.specification_version = 2
25
+
26
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
27
+ else
28
+ end
29
+ else
30
+ end
31
+ end
@@ -0,0 +1,102 @@
1
+ require 'rubygems'
2
+ require 'mechanize'
3
+ require 'hpricot'
4
+ require 'open-uri'
5
+ require 'date'
6
+ require 'htmlentities'
7
+
8
+ Hpricot.buffer_size = 262144
9
+
10
+ class Gomio
11
+
12
+ #constants
13
+ GOMIO_SINGULAR = "http://www.gomio.com/reservation/chooseBed.aspx?HostelId="
14
+ GOMIO_PLURAL_HOSTELS_URL = "http://www.gomio.com/hostels/europe/" #europe/poland/krakow/mama%27s%20hostel/overview.htm
15
+
16
+ def self.find_hostel_by_id(options)
17
+ #opts = { :directions => false, :images => false, :all => false }.merge options
18
+ city = options[:location].split(',').first.gsub(' ','')
19
+ country = options[:location].split(',').last.gsub(' ','')
20
+ id = options[:id]
21
+ url = GOMIO_PLURAL_HOSTELS_URL + "#{country}/#{city}/#{id}/overview.htm"
22
+
23
+ #setSearch(url,"2009-09-20",2,7)
24
+ data = Hpricot(open(url))
25
+
26
+ data = data.search('div[@id="main"]')
27
+ name = data.at("h3/span").inner_text.lstrip.rstrip
28
+ address = data.at("span.br_address").inner_text.lstrip.rstrip
29
+ desc = data.at("div.br_detail").inner_text.lstrip.rstrip
30
+ available1 = data.at("td.HasNoAvail_Outer")
31
+ available2 = data.at("td.HasNoAvail")
32
+ puts "#{available1}, #{available2}"
33
+ end
34
+
35
+ def self.find_hostels_by_location(options) #location
36
+ city = options[:location].split(',').first.gsub(' ','')
37
+ country = options[:location].split(',').last.gsub(' ','')
38
+
39
+ url = GOMIO_PLURAL_HOSTELS_URL + "#{country}/#{city}/search.htm"
40
+
41
+ #data = Hpricot(open(url))
42
+ data = setSearch(url,"2009-09-09",2,5)
43
+
44
+ data = data.search("div.SearchResultMembers")
45
+
46
+ (data/"div.SearchResultsHostel").each do |row|
47
+ name = row.at("h3").inner_text.lstrip.rstrip
48
+ desc = row.at("p").inner_text.lstrip.rstrip
49
+ url = row.at("h3/a")['href']
50
+ gomio_id = url.match(/(#{city}).([\d\D]*)(\/overview.htm)/)[2]
51
+
52
+ available1 = row/("td.HasNoAvail_Outer/text()").to_a.join(',').split(',')
53
+ available2 = row/("td.HasNoAvail_Outer/text()").to_a.join(',').split(',')
54
+ available = available1 + available2
55
+
56
+ @results = { :gomio_id => gomio_id, :name => name, :unavailable => available }
57
+ puts @results
58
+ end
59
+ return @results
60
+ end
61
+
62
+ def self.setSearch(url,date,no_ppl,no_days)
63
+
64
+ date = Date.strptime(date)
65
+ month = date.strftime("%m").to_i
66
+ day = date.strftime("%d").to_i
67
+ if Time.now.strftime("%y") == date.strftime("%y") then year = 0 else year = 1 end
68
+
69
+ agent = WWW::Mechanize.new
70
+ page = agent.get(url)
71
+
72
+ #the form name
73
+ form = page.forms.first # => WWW::Mechanize::Form
74
+ #page = agent.submit(form)
75
+
76
+ #ctl00_searchbox_sb_ddlMonth
77
+ #ctl00_searchbox_sb_ddlDay
78
+ #ctl00_searchbox_sb_ddlYear
79
+
80
+ #ctl00_searchbox_sb_ddlNights
81
+ #ctl00_searchbox_sb_ddlBeds
82
+
83
+ form.field_with(:name => 'ctl00$searchbox$sb$ddlMonth').options[month-1].select
84
+ form.field_with(:name => 'ctl00$searchbox$sb$ddlDay').options[day-1].select
85
+ form.field_with(:name => 'ctl00$searchbox$sb$ddlYear').options[year].select
86
+ form.field_with(:name => 'ctl00$searchbox$sb$ddlNights').options[no_days.to_i-1].select
87
+ form.field_with(:name => 'ctl00$searchbox$sb$ddlBeds').options[no_ppl.to_i-1].select
88
+ #form.field_with(:id => 'Currency').options[4].select #US Currency
89
+
90
+ page = agent.submit(form)
91
+ data = page.search('//div[@id="main"]')
92
+ #puts data
93
+ return data
94
+ end
95
+
96
+ #url = GOMIO_PLURAL_HOSTELS_URL + "poland/krakow/search.htm"
97
+
98
+ #Gomio.setSearch(url,"2009-09-20",2,7)
99
+ Gomio.find_hostels_by_location(:location => "krakow,poland")
100
+ #Gomio.find_hostel_by_id(:id => "mama's%20hostel", :location => "krakow,poland")
101
+
102
+ end
@@ -0,0 +1,180 @@
1
+ class Hostelbookers
2
+
3
+ #constants
4
+ HB_SINGULAR_DETAIL_URL = "http://www.hostelbookers.com/hostels/" #poland/krakow/
5
+ HB_PLURAL_HOSTELS_URL = "http://www.hostelbookers.com/hostels/" #poland/wroclaw/
6
+
7
+ #options
8
+ @default_options = { :date => date=(Date.today+4).to_s, :no_days => "7", :live => true }
9
+
10
+ def self.find_hostels_by_location(options) #location
11
+ city = options[:location].split(',').first.rstrip.lstrip.gsub(' ','-').squeeze("-")
12
+ country = options[:location].split(',').last.rstrip.lstrip.gsub(' ','-').squeeze("-")
13
+
14
+ url = HB_PLURAL_HOSTELS_URL + "#{country}/#{city}"
15
+
16
+ if options[:date]
17
+ options = @default_options.merge(options)
18
+ date = Date.strptime(options[:date])
19
+ data = setSearch(url,options[:date],options[:no_days])
20
+ else
21
+ Retryable.try 3 do
22
+ data = Hpricot(open(url))
23
+ end
24
+ end
25
+
26
+ data = data.search("//div[@id='propertyResultsList']")
27
+ #@results = []
28
+ @results = HostelifyCollection.new
29
+ #coder = HTMLEntities.new
30
+ (data/"tr.propertyRow").each do |row|
31
+ name = row.at("a.propertyTitle").inner_text
32
+ url = row.at("a.propertyTitle")['href']
33
+ desc = row.at("p.shortDescription").inner_text
34
+ rating = row.at("td.rating/text()")
35
+ rating = rating.to_s.to_i unless rating.nil?
36
+ dorm = row.at("td.shared/text()")
37
+ single = row.at("td.private/text()")
38
+ hb_id = url.match(/[\d]{2,5}.$/).to_s.to_i
39
+
40
+ #@results << Hostelify.new(:hostel_id => hb_id, :name => name, :description => desc, :rating => rating, :dorm => dorm, :single => single)
41
+ @results << Hostelify.new(:hostel_id => hb_id, :name => name, :description => desc, :rating => rating, :dorm => dorm, :single => single)
42
+ end
43
+ return @results
44
+ end
45
+
46
+ def self.find_hostel_by_id(options)
47
+ city = options[:location].split(',').first.gsub(' ','')
48
+ country = options[:location].split(',').last.gsub(' ','')
49
+ id = options[:id]
50
+ url = HB_SINGULAR_DETAIL_URL + "#{country}/#{city}/#{id}"
51
+
52
+ hostel = Hostelify.new
53
+
54
+ if options[:date]
55
+ options = @default_options.merge(options)
56
+ data = setSearch_id(url,options[:date],options[:no_days])
57
+ else
58
+ Retryable.try 3 do
59
+ data = Hpricot(open(url))
60
+ end
61
+ end
62
+
63
+ hostel.hostel_id = id
64
+ hostel.name = data.at("h1").inner_text
65
+ hostel.address = data.at("p.address").inner_text
66
+ hostel.description = data.at('div[@id="overviewPane"]').inner_text
67
+ facilities_td = data.at("table.tableFacilities")
68
+
69
+ facilities = []
70
+ (facilities_td/"td").each do |row|
71
+ facilities << row.inner_text
72
+ end
73
+ hostel.facilities = facilities
74
+ extras = []
75
+ extras_td = data.at("table.tableFeatures")
76
+ (extras_td/"td.name").each do |row|
77
+ extras << "Free " + row.inner_text.to_s
78
+ end
79
+ facilities = facilities + extras
80
+
81
+ ratings = []
82
+ ratings_td = data.at('div[@id="overviewIndRtng"]/table')
83
+
84
+ (ratings_td/"tr").each do |row|
85
+ ratings << row.at("td").inner_text.to_s.to_f
86
+ end
87
+
88
+ hostel.ratings = ratings
89
+ images = []
90
+ image = data.at('div[@id="propMedia"]/table')
91
+ (image/"td").each do |row|
92
+ img = row.at("img")['onclick']
93
+ if img =~ /(http).*(jpg|gif|png|jpeg)/
94
+ images << img.match(/(http).*(jpg|gif|png|jpeg)/)[0]
95
+ else
96
+ #add youtube?
97
+ end
98
+ end
99
+ hostel.images = images
100
+
101
+ if options[:all]
102
+ data = Hpricot(open(url + "/map"))
103
+ data.search("h2").remove #get rid of header
104
+ hostel.directions = data.at('div[@id="directions"]').inner_text
105
+ hostel.geo = data.to_s.scan(/-{0,1}\d{1,3}\.\d{7}/).uniq!
106
+ end
107
+
108
+ if options[:date]
109
+ date = Date.strptime(options[:date])
110
+ @availables = []
111
+ available = data.at("div.tableAvailability/table")
112
+ if available
113
+ (available/"tr").each do |row|
114
+ name = row.at("td.roomType/label/text()")
115
+ people = row.at("td.people/select")
116
+ people = people.at("option:last-child").inner_text unless people.nil?
117
+ price = row.at("td.price")
118
+ price = price.inner_text.to_s.match(/[\d.]{1,5}/)[0] unless price.nil?
119
+ (0..(options[:no_days].to_i-1)).each do |x|
120
+ #@availables << { :name => name, :spots => people, :price => price, :bookdate => (date+x).to_s } unless price.nil?
121
+ @availables << HostelifyAvailable.new(name,price,people,(date+x).to_s) unless price.nil?
122
+ end
123
+ end
124
+ end
125
+ hostel.availability = @availables
126
+ end
127
+
128
+ return hostel
129
+ end
130
+
131
+ def self.setSearch(url,date,no_days)
132
+ date = Date.strptime(date).strftime("%d/%m/%Y")
133
+ agent = WWW::Mechanize.new
134
+ page = agent.get(url)
135
+ form = page.form_with(:name => 'searchForm') # => WWW::Mechanize::Form
136
+ form.field_with(:name => 'intnights').options[no_days.to_i-1].select
137
+ form.dtearrival = date #d/m/y
138
+
139
+ Retryable.try 3 do
140
+ page = agent.submit(form)
141
+ end
142
+
143
+ #to dollars!
144
+ form = page.forms[0]
145
+ form.field_with(:name => 'strSelectedCurrencyCode').options[5].select
146
+
147
+ Retryable.try 3 do
148
+ page = agent.submit(form)
149
+ end
150
+
151
+ data = page.search('//div[@id="content"]')
152
+
153
+ return data
154
+ end
155
+
156
+ def self.setSearch_id(url,date,no_days)
157
+ date = Date.strptime(date).strftime("%d/%m/%Y")
158
+ agent = WWW::Mechanize.new
159
+ page = agent.get(url)
160
+ form = page.form_with(:name => 'frmCheckAvailBook') # => WWW::Mechanize::Form
161
+ form.field_with(:name => 'intNights').options[no_days.to_i-1].select
162
+ form.dteArrival = date #d/m/y
163
+
164
+ Retryable.try 3 do
165
+ page = agent.submit(form)
166
+ end
167
+ #change currency to dollars
168
+ form = page.forms[1]
169
+ #puts form.name
170
+ form.field_with(:name => 'strSelectedCurrencyCode').options[5].select
171
+
172
+ Retryable.try 3 do
173
+ page = agent.submit(form)
174
+ end
175
+ data = page.search('//div[@id="content"]')
176
+
177
+ return data
178
+ end
179
+
180
+ end
@@ -0,0 +1,61 @@
1
+ class Hostelify
2
+ attr_accessor :hostel_id, :name, :address, :description, :facilities, :ratings, :directions, :geo, :images, :video, :availability
3
+ attr_accessor :rating, :dorm, :single, :unavailable
4
+
5
+ def initialize(options = {})
6
+ options.each {
7
+ |k,v|
8
+ self.send( "#{k.to_s}=".intern, v)
9
+ }
10
+ end
11
+
12
+ end
13
+
14
+ class HostelifyCollection < Array
15
+ # This collection does everything an Array does, plus
16
+ # you can add utility methods like names.
17
+
18
+ def ids
19
+ collect do |i|
20
+ i.hostel_id
21
+ end
22
+ end
23
+
24
+ def names
25
+ collect do |i|
26
+ i.name
27
+ end
28
+ end
29
+
30
+ def descs
31
+ collect do |i|
32
+ i.description
33
+ end
34
+ end
35
+
36
+ end
37
+
38
+ class HostelifyAvailable
39
+ attr_accessor :name, :price, :spots, :bookdate
40
+
41
+ def initialize(name, price, spots, bookdate)
42
+ @name = name
43
+ @price = price
44
+ @spots = spots
45
+ @bookdate = bookdate
46
+ end
47
+
48
+ end
49
+
50
+ module Retryable
51
+ extend self
52
+
53
+ def try times = 1, options = {}, &block
54
+ val = yield
55
+ rescue options[:on] || Exception
56
+ retry if (times -= 1) > 0
57
+ else
58
+ val
59
+ end
60
+ end
61
+