holden-hostelify 0.2.8 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -1
- data/hostelify.gemspec +2 -2
- data/lib/hostelify/hostelworld.rb +53 -45
- data/spec/hb_find_by_hostel.spec +1 -1
- metadata +4 -3
data/Rakefile
CHANGED
data/hostelify.gemspec
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{hostelify}
|
5
|
-
s.version = "0.2.
|
5
|
+
s.version = "0.2.9"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["Holden Thomas"]
|
9
|
-
s.date = %q{2009-09-
|
9
|
+
s.date = %q{2009-09-15}
|
10
10
|
s.description = %q{Simple Hostel Webscrapper.}
|
11
11
|
s.email = %q{holden.thomas@gmail.com}
|
12
12
|
s.extra_rdoc_files = ["README.rdoc", "lib/hostelify.rb", "lib/hostelify/gomio.rb", "lib/hostelify/hostelbookers.rb", "lib/hostelify/hostelify.rb", "lib/hostelify/hostelworld.rb"]
|
@@ -36,58 +36,62 @@ class Hostelworld
|
|
36
36
|
data = parse_html(url)
|
37
37
|
end
|
38
38
|
|
39
|
-
data
|
40
|
-
|
39
|
+
unless data == "Full"
|
40
|
+
data = data.search("//div[@id='content']")
|
41
|
+
data.search("h3").remove #get rid of header
|
41
42
|
|
42
|
-
|
43
|
-
|
44
|
-
|
43
|
+
#title, address, desc, facilities, ratings
|
44
|
+
hostel.name = data.at("h2").inner_text.gsub(/( in ).*$/,'')
|
45
|
+
hostel.address = data.at('div[@style="padding-top: 5px"]').inner_text.lstrip
|
45
46
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
47
|
+
if options[:date]
|
48
|
+
hostel.availability = parse_availables(data)
|
49
|
+
else
|
50
|
+
hostel.description = data.at('div[@id="microDescription2]').inner_text
|
51
|
+
end
|
51
52
|
|
52
|
-
|
53
|
-
|
54
|
-
|
53
|
+
#optional
|
54
|
+
no_photos = data.at('span/a[@id="picLink"]').inner_text.to_i
|
55
|
+
video = data.at('div[@id="microVideo"]')
|
55
56
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
57
|
+
facilities = []
|
58
|
+
(data/"li.microFacilitiesBoomLi").each do |item|
|
59
|
+
facilities << item.inner_text
|
60
|
+
end
|
60
61
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
62
|
+
ratings = []
|
63
|
+
(data/'div[@id="ratingsBar2"]').each do |item|
|
64
|
+
ratings << item.inner_text.to_i
|
65
|
+
end
|
65
66
|
|
66
|
-
|
67
|
-
|
67
|
+
hostel.facilities = facilities
|
68
|
+
hostel.ratings = ratings
|
68
69
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
70
|
+
if video #exists
|
71
|
+
data = parse_html(HW_SINGULAR_YOUTUBE_URL + id)
|
72
|
+
video_url = data.at('param[@name="movie"]')['value']
|
73
|
+
hostel.video = video_url
|
74
|
+
#video_url = data.at('tag')
|
75
|
+
end
|
75
76
|
|
76
|
-
|
77
|
-
|
77
|
+
if options[:directions] or options[:all]
|
78
|
+
data = parse_html(HW_SINGULAR_DETAIL_URL + id + "/directions/")
|
78
79
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
80
|
+
#directions, geo
|
81
|
+
hostel.directions = data.at('div[@id="content"]').inner_text.gsub(/^[\d\D\n]*(DIRECTIONS)/,'')
|
82
|
+
hostel.geo = data.to_s.scan(/-{0,1}\d{1,3}\.\d{7}/).uniq!
|
83
|
+
end
|
83
84
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
85
|
+
if no_photos and (options[:images] or options[:all])
|
86
|
+
images = []
|
87
|
+
(1..no_photos).each do |i|
|
88
|
+
data = parse_html(HW_SINGULAR_IMAGE_URL + id + '&PicNO=' + i.to_s)
|
89
|
+
images << (data/"img").first[:src].to_s
|
90
|
+
end
|
91
|
+
hostel.images = images
|
89
92
|
end
|
90
|
-
|
93
|
+
else
|
94
|
+
hostel = nil
|
91
95
|
end
|
92
96
|
hostel # return
|
93
97
|
end
|
@@ -147,9 +151,7 @@ class Hostelworld
|
|
147
151
|
#the form name
|
148
152
|
form = page.forms.first # => WWW::Mechanize::Form
|
149
153
|
|
150
|
-
|
151
|
-
page = agent.submit(form)
|
152
|
-
end
|
154
|
+
page = agent.submit(form)
|
153
155
|
|
154
156
|
#form must be submitted twice because the people writing hostelworld are retards
|
155
157
|
form = page.forms.first # => WWW::Mechanize::Form
|
@@ -164,8 +166,14 @@ class Hostelworld
|
|
164
166
|
page = agent.submit(form)
|
165
167
|
end
|
166
168
|
|
167
|
-
|
169
|
+
error = page.search("div.microBookingError2")
|
168
170
|
|
171
|
+
if error.to_s.length > 1
|
172
|
+
data = "Full"
|
173
|
+
else
|
174
|
+
data = page.search("//div[@id='content']")
|
175
|
+
end
|
176
|
+
|
169
177
|
return data
|
170
178
|
end
|
171
179
|
|
data/spec/hb_find_by_hostel.spec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: holden-hostelify
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Holden Thomas
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-09-
|
12
|
+
date: 2009-09-15 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -43,6 +43,7 @@ files:
|
|
43
43
|
- spec/hw_find_hostels.spec
|
44
44
|
has_rdoc: true
|
45
45
|
homepage: http://github.com/holden/hostelify
|
46
|
+
licenses:
|
46
47
|
post_install_message:
|
47
48
|
rdoc_options:
|
48
49
|
- --line-numbers
|
@@ -68,7 +69,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
68
69
|
requirements: []
|
69
70
|
|
70
71
|
rubyforge_project: hostelify
|
71
|
-
rubygems_version: 1.
|
72
|
+
rubygems_version: 1.3.5
|
72
73
|
signing_key:
|
73
74
|
specification_version: 2
|
74
75
|
summary: Simple Hostel Webscrapper.
|