hostelify 0.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/hostelify.rb ADDED
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'mechanize'
3
+ require 'hpricot'
4
+ require 'open-uri'
5
+ require 'date'
6
+ require 'htmlentities'
7
+ require 'iconv'
8
+ require 'hostelify/hostelify'
9
+ require 'hostelify/hostelworld'
10
+ require 'hostelify/hostelbookers'
@@ -0,0 +1,30 @@
1
+ Manifest
2
+ README.rdoc
3
+ Rakefile
4
+ hostelify.gemspec
5
+ lib/hostelify.rb
6
+ lib/hostelify/gomio.rb
7
+ lib/hostelify/hostelbookers.rb
8
+ lib/hostelify/hostelify.rb
9
+ lib/hostelify/hostelworld.rb
10
+ pkg/hostelify-0.3.6.gem
11
+ pkg/hostelify-0.3.6.tar.gz
12
+ pkg/hostelify-0.3.6/Manifest
13
+ pkg/hostelify-0.3.6/README.rdoc
14
+ pkg/hostelify-0.3.6/Rakefile
15
+ pkg/hostelify-0.3.6/hostelify.gemspec
16
+ pkg/hostelify-0.3.6/lib/hostelify.rb
17
+ pkg/hostelify-0.3.6/lib/hostelify/gomio.rb
18
+ pkg/hostelify-0.3.6/lib/hostelify/hostelbookers.rb
19
+ pkg/hostelify-0.3.6/lib/hostelify/hostelify.rb
20
+ pkg/hostelify-0.3.6/lib/hostelify/hostelworld.rb
21
+ pkg/hostelify-0.3.6/spec/_helper.rb
22
+ pkg/hostelify-0.3.6/spec/hb_find_by_hostel.spec
23
+ pkg/hostelify-0.3.6/spec/hb_find_hostels.spec
24
+ pkg/hostelify-0.3.6/spec/hw_find_by_hostel.spec
25
+ pkg/hostelify-0.3.6/spec/hw_find_hostels.spec
26
+ spec/_helper.rb
27
+ spec/hb_find_by_hostel.spec
28
+ spec/hb_find_hostels.spec
29
+ spec/hw_find_by_hostel.spec
30
+ spec/hw_find_hostels.spec
@@ -0,0 +1 @@
1
+ Nothin yet... still...
@@ -0,0 +1,14 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'echoe'
4
+
5
+ Echoe.new('hostelify', '0.3.7') do |p|
6
+ p.description = "Simple Hostel Webscrapper."
7
+ p.url = "http://github.com/holden/hostelify"
8
+ p.author = "Holden Thomas"
9
+ p.email = "holden.thomas@gmail.com"
10
+ p.ignore_pattern = ["tmp/*", "script/*"]
11
+ p.development_dependencies = []
12
+ end
13
+
14
+ Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
@@ -0,0 +1,31 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{hostelify}
5
+ s.version = "0.3.6"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Holden Thomas"]
9
+ s.date = %q{2009-09-23}
10
+ s.description = %q{Simple Hostel Webscrapper.}
11
+ s.email = %q{holden.thomas@gmail.com}
12
+ s.extra_rdoc_files = ["README.rdoc", "lib/hostelify.rb", "lib/hostelify/gomio.rb", "lib/hostelify/hostelbookers.rb", "lib/hostelify/hostelify.rb", "lib/hostelify/hostelworld.rb"]
13
+ s.files = ["Manifest", "README.rdoc", "Rakefile", "hostelify.gemspec", "lib/hostelify.rb", "lib/hostelify/gomio.rb", "lib/hostelify/hostelbookers.rb", "lib/hostelify/hostelify.rb", "lib/hostelify/hostelworld.rb", "spec/_helper.rb", "spec/hb_find_by_hostel.spec", "spec/hb_find_hostels.spec", "spec/hw_find_by_hostel.spec", "spec/hw_find_hostels.spec"]
14
+ s.has_rdoc = true
15
+ s.homepage = %q{http://github.com/holden/hostelify}
16
+ s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Hostelify", "--main", "README.rdoc"]
17
+ s.require_paths = ["lib"]
18
+ s.rubyforge_project = %q{hostelify}
19
+ s.rubygems_version = %q{1.3.1}
20
+ s.summary = %q{Simple Hostel Webscrapper.}
21
+
22
+ if s.respond_to? :specification_version then
23
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
24
+ s.specification_version = 2
25
+
26
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
27
+ else
28
+ end
29
+ else
30
+ end
31
+ end
@@ -0,0 +1,102 @@
1
+ require 'rubygems'
2
+ require 'mechanize'
3
+ require 'hpricot'
4
+ require 'open-uri'
5
+ require 'date'
6
+ require 'htmlentities'
7
+
8
+ Hpricot.buffer_size = 262144
9
+
10
+ class Gomio
11
+
12
+ #constants
13
+ GOMIO_SINGULAR = "http://www.gomio.com/reservation/chooseBed.aspx?HostelId="
14
+ GOMIO_PLURAL_HOSTELS_URL = "http://www.gomio.com/hostels/europe/" #europe/poland/krakow/mama%27s%20hostel/overview.htm
15
+
16
+ def self.find_hostel_by_id(options)
17
+ #opts = { :directions => false, :images => false, :all => false }.merge options
18
+ city = options[:location].split(',').first.gsub(' ','')
19
+ country = options[:location].split(',').last.gsub(' ','')
20
+ id = options[:id]
21
+ url = GOMIO_PLURAL_HOSTELS_URL + "#{country}/#{city}/#{id}/overview.htm"
22
+
23
+ #setSearch(url,"2009-09-20",2,7)
24
+ data = Hpricot(open(url))
25
+
26
+ data = data.search('div[@id="main"]')
27
+ name = data.at("h3/span").inner_text.lstrip.rstrip
28
+ address = data.at("span.br_address").inner_text.lstrip.rstrip
29
+ desc = data.at("div.br_detail").inner_text.lstrip.rstrip
30
+ available1 = data.at("td.HasNoAvail_Outer")
31
+ available2 = data.at("td.HasNoAvail")
32
+ puts "#{available1}, #{available2}"
33
+ end
34
+
35
+ def self.find_hostels_by_location(options) #location
36
+ city = options[:location].split(',').first.gsub(' ','')
37
+ country = options[:location].split(',').last.gsub(' ','')
38
+
39
+ url = GOMIO_PLURAL_HOSTELS_URL + "#{country}/#{city}/search.htm"
40
+
41
+ #data = Hpricot(open(url))
42
+ data = setSearch(url,"2009-09-09",2,5)
43
+
44
+ data = data.search("div.SearchResultMembers")
45
+
46
+ (data/"div.SearchResultsHostel").each do |row|
47
+ name = row.at("h3").inner_text.lstrip.rstrip
48
+ desc = row.at("p").inner_text.lstrip.rstrip
49
+ url = row.at("h3/a")['href']
50
+ gomio_id = url.match(/(#{city}).([\d\D]*)(\/overview.htm)/)[2]
51
+
52
+ available1 = row/("td.HasNoAvail_Outer/text()").to_a.join(',').split(',')
53
+ available2 = row/("td.HasNoAvail_Outer/text()").to_a.join(',').split(',')
54
+ available = available1 + available2
55
+
56
+ @results = { :gomio_id => gomio_id, :name => name, :unavailable => available }
57
+ puts @results
58
+ end
59
+ return @results
60
+ end
61
+
62
+ def self.setSearch(url,date,no_ppl,no_days)
63
+
64
+ date = Date.strptime(date)
65
+ month = date.strftime("%m").to_i
66
+ day = date.strftime("%d").to_i
67
+ if Time.now.strftime("%y") == date.strftime("%y") then year = 0 else year = 1 end
68
+
69
+ agent = WWW::Mechanize.new
70
+ page = agent.get(url)
71
+
72
+ #the form name
73
+ form = page.forms.first # => WWW::Mechanize::Form
74
+ #page = agent.submit(form)
75
+
76
+ #ctl00_searchbox_sb_ddlMonth
77
+ #ctl00_searchbox_sb_ddlDay
78
+ #ctl00_searchbox_sb_ddlYear
79
+
80
+ #ctl00_searchbox_sb_ddlNights
81
+ #ctl00_searchbox_sb_ddlBeds
82
+
83
+ form.field_with(:name => 'ctl00$searchbox$sb$ddlMonth').options[month-1].select
84
+ form.field_with(:name => 'ctl00$searchbox$sb$ddlDay').options[day-1].select
85
+ form.field_with(:name => 'ctl00$searchbox$sb$ddlYear').options[year].select
86
+ form.field_with(:name => 'ctl00$searchbox$sb$ddlNights').options[no_days.to_i-1].select
87
+ form.field_with(:name => 'ctl00$searchbox$sb$ddlBeds').options[no_ppl.to_i-1].select
88
+ #form.field_with(:id => 'Currency').options[4].select #US Currency
89
+
90
+ page = agent.submit(form)
91
+ data = page.search('//div[@id="main"]')
92
+ #puts data
93
+ return data
94
+ end
95
+
96
+ #url = GOMIO_PLURAL_HOSTELS_URL + "poland/krakow/search.htm"
97
+
98
+ #Gomio.setSearch(url,"2009-09-20",2,7)
99
+ Gomio.find_hostels_by_location(:location => "krakow,poland")
100
+ #Gomio.find_hostel_by_id(:id => "mama's%20hostel", :location => "krakow,poland")
101
+
102
+ end
@@ -0,0 +1,180 @@
1
+ class Hostelbookers
2
+
3
+ #constants
4
+ HB_SINGULAR_DETAIL_URL = "http://www.hostelbookers.com/hostels/" #poland/krakow/
5
+ HB_PLURAL_HOSTELS_URL = "http://www.hostelbookers.com/hostels/" #poland/wroclaw/
6
+
7
+ #options
8
+ @default_options = { :date => date=(Date.today+4).to_s, :no_days => "7", :live => true }
9
+
10
+ def self.find_hostels_by_location(options) #location
11
+ city = options[:location].split(',').first.rstrip.lstrip.gsub(' ','-').squeeze("-")
12
+ country = options[:location].split(',').last.rstrip.lstrip.gsub(' ','-').squeeze("-")
13
+
14
+ url = HB_PLURAL_HOSTELS_URL + "#{country}/#{city}"
15
+
16
+ if options[:date]
17
+ options = @default_options.merge(options)
18
+ date = Date.strptime(options[:date])
19
+ data = setSearch(url,options[:date],options[:no_days])
20
+ else
21
+ Retryable.try 3 do
22
+ data = Hpricot(open(url))
23
+ end
24
+ end
25
+
26
+ data = data.search("//div[@id='propertyResultsList']")
27
+ #@results = []
28
+ @results = HostelifyCollection.new
29
+ #coder = HTMLEntities.new
30
+ (data/"tr.propertyRow").each do |row|
31
+ name = row.at("a.propertyTitle").inner_text
32
+ url = row.at("a.propertyTitle")['href']
33
+ desc = row.at("p.shortDescription").inner_text
34
+ rating = row.at("td.rating/text()")
35
+ rating = rating.to_s.to_i unless rating.nil?
36
+ dorm = row.at("td.shared/text()")
37
+ single = row.at("td.private/text()")
38
+ hb_id = url.match(/[\d]{2,5}.$/).to_s.to_i
39
+
40
+ #@results << Hostelify.new(:hostel_id => hb_id, :name => name, :description => desc, :rating => rating, :dorm => dorm, :single => single)
41
+ @results << Hostelify.new(:hostel_id => hb_id, :name => name, :description => desc, :rating => rating, :dorm => dorm, :single => single)
42
+ end
43
+ return @results
44
+ end
45
+
46
+ def self.find_hostel_by_id(options)
47
+ city = options[:location].split(',').first.gsub(' ','')
48
+ country = options[:location].split(',').last.gsub(' ','')
49
+ id = options[:id]
50
+ url = HB_SINGULAR_DETAIL_URL + "#{country}/#{city}/#{id}"
51
+
52
+ hostel = Hostelify.new
53
+
54
+ if options[:date]
55
+ options = @default_options.merge(options)
56
+ data = setSearch_id(url,options[:date],options[:no_days])
57
+ else
58
+ Retryable.try 3 do
59
+ data = Hpricot(open(url))
60
+ end
61
+ end
62
+
63
+ hostel.hostel_id = id
64
+ hostel.name = data.at("h1").inner_text
65
+ hostel.address = data.at("p.address").inner_text
66
+ hostel.description = data.at('div[@id="overviewPane"]').inner_text
67
+ facilities_td = data.at("table.tableFacilities")
68
+
69
+ facilities = []
70
+ (facilities_td/"td").each do |row|
71
+ facilities << row.inner_text
72
+ end
73
+ hostel.facilities = facilities
74
+ extras = []
75
+ extras_td = data.at("table.tableFeatures")
76
+ (extras_td/"td.name").each do |row|
77
+ extras << "Free " + row.inner_text.to_s
78
+ end
79
+ facilities = facilities + extras
80
+
81
+ ratings = []
82
+ ratings_td = data.at('div[@id="overviewIndRtng"]/table')
83
+
84
+ (ratings_td/"tr").each do |row|
85
+ ratings << row.at("td").inner_text.to_s.to_f
86
+ end
87
+
88
+ hostel.ratings = ratings
89
+ images = []
90
+ image = data.at('div[@id="propMedia"]/table')
91
+ (image/"td").each do |row|
92
+ img = row.at("img")['onclick']
93
+ if img =~ /(http).*(jpg|gif|png|jpeg)/
94
+ images << img.match(/(http).*(jpg|gif|png|jpeg)/)[0]
95
+ else
96
+ #add youtube?
97
+ end
98
+ end
99
+ hostel.images = images
100
+
101
+ if options[:all]
102
+ data = Hpricot(open(url + "/map"))
103
+ data.search("h2").remove #get rid of header
104
+ hostel.directions = data.at('div[@id="directions"]').inner_text
105
+ hostel.geo = data.to_s.scan(/-{0,1}\d{1,3}\.\d{7}/).uniq!
106
+ end
107
+
108
+ if options[:date]
109
+ date = Date.strptime(options[:date])
110
+ @availables = []
111
+ available = data.at("div.tableAvailability/table")
112
+ if available
113
+ (available/"tr").each do |row|
114
+ name = row.at("td.roomType/label/text()")
115
+ people = row.at("td.people/select")
116
+ people = people.at("option:last-child").inner_text unless people.nil?
117
+ price = row.at("td.price")
118
+ price = price.inner_text.to_s.match(/[\d.]{1,5}/)[0] unless price.nil?
119
+ (0..(options[:no_days].to_i-1)).each do |x|
120
+ #@availables << { :name => name, :spots => people, :price => price, :bookdate => (date+x).to_s } unless price.nil?
121
+ @availables << HostelifyAvailable.new(name,price,people,(date+x).to_s) unless price.nil?
122
+ end
123
+ end
124
+ end
125
+ hostel.availability = @availables
126
+ end
127
+
128
+ return hostel
129
+ end
130
+
131
+ def self.setSearch(url,date,no_days)
132
+ date = Date.strptime(date).strftime("%d/%m/%Y")
133
+ agent = WWW::Mechanize.new
134
+ page = agent.get(url)
135
+ form = page.form_with(:name => 'searchForm') # => WWW::Mechanize::Form
136
+ form.field_with(:name => 'intnights').options[no_days.to_i-1].select
137
+ form.dtearrival = date #d/m/y
138
+
139
+ Retryable.try 3 do
140
+ page = agent.submit(form)
141
+ end
142
+
143
+ #to dollars!
144
+ form = page.forms[0]
145
+ form.field_with(:name => 'strSelectedCurrencyCode').options[5].select
146
+
147
+ Retryable.try 3 do
148
+ page = agent.submit(form)
149
+ end
150
+
151
+ data = page.search('//div[@id="content"]')
152
+
153
+ return data
154
+ end
155
+
156
+ def self.setSearch_id(url,date,no_days)
157
+ date = Date.strptime(date).strftime("%d/%m/%Y")
158
+ agent = WWW::Mechanize.new
159
+ page = agent.get(url)
160
+ form = page.form_with(:name => 'frmCheckAvailBook') # => WWW::Mechanize::Form
161
+ form.field_with(:name => 'intNights').options[no_days.to_i-1].select
162
+ form.dteArrival = date #d/m/y
163
+
164
+ Retryable.try 3 do
165
+ page = agent.submit(form)
166
+ end
167
+ #change currency to dollars
168
+ form = page.forms[1]
169
+ #puts form.name
170
+ form.field_with(:name => 'strSelectedCurrencyCode').options[5].select
171
+
172
+ Retryable.try 3 do
173
+ page = agent.submit(form)
174
+ end
175
+ data = page.search('//div[@id="content"]')
176
+
177
+ return data
178
+ end
179
+
180
+ end
@@ -0,0 +1,61 @@
1
+ class Hostelify
2
+ attr_accessor :hostel_id, :name, :address, :description, :facilities, :ratings, :directions, :geo, :images, :video, :availability
3
+ attr_accessor :rating, :dorm, :single, :unavailable
4
+
5
+ def initialize(options = {})
6
+ options.each {
7
+ |k,v|
8
+ self.send( "#{k.to_s}=".intern, v)
9
+ }
10
+ end
11
+
12
+ end
13
+
14
+ class HostelifyCollection < Array
15
+ # This collection does everything an Array does, plus
16
+ # you can add utility methods like names.
17
+
18
+ def ids
19
+ collect do |i|
20
+ i.hostel_id
21
+ end
22
+ end
23
+
24
+ def names
25
+ collect do |i|
26
+ i.name
27
+ end
28
+ end
29
+
30
+ def descs
31
+ collect do |i|
32
+ i.description
33
+ end
34
+ end
35
+
36
+ end
37
+
38
+ class HostelifyAvailable
39
+ attr_accessor :name, :price, :spots, :bookdate
40
+
41
+ def initialize(name, price, spots, bookdate)
42
+ @name = name
43
+ @price = price
44
+ @spots = spots
45
+ @bookdate = bookdate
46
+ end
47
+
48
+ end
49
+
50
+ module Retryable
51
+ extend self
52
+
53
+ def try times = 1, options = {}, &block
54
+ val = yield
55
+ rescue options[:on] || Exception
56
+ retry if (times -= 1) > 0
57
+ else
58
+ val
59
+ end
60
+ end
61
+