bookingscrapper 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/bookingscrapper.rb +89 -0
- metadata +42 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: b9c1a5aa34c2d3ecfb0942c8356456adccf685b0eb17c2895ba86a9dd76da5f3
|
4
|
+
data.tar.gz: 5b2f486b122bf40a7698ef18ec6230cf698e9fa52e9b185cec59c501742b6c90
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 597ec10415d0ce3e1c4e72f0ed3e564e1d066d992b6c037429b50722f2da77cd2adf2d2aa286a76dc1c807b427c42bc82d13fe87b2ee7f1a1c8ada60e0d2adfc
|
7
|
+
data.tar.gz: 0bda92aa445c505b5f77bdf13203e826c2fb5dbe67ea49df954a546eca5c5c9ae8d5485b88322f7e8761ba3b3b80eafcf83f5fcd923cb1bcc9e8c40e23e1ca43
|
@@ -0,0 +1,89 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'open-uri'
|
3
|
+
require 'pry'
|
4
|
+
require 'watir'
|
5
|
+
|
6
|
+
|
7
|
+
def scrape(value)
|
8
|
+
|
9
|
+
outputArray = []
|
10
|
+
browser = Watir::Browser.new
|
11
|
+
browser.goto value
|
12
|
+
doc = Nokogiri::HTML.parse(browser.html)
|
13
|
+
|
14
|
+
url =browser.url
|
15
|
+
#taking all the hotels list in HTML
|
16
|
+
hotel_list_snippets = doc.css('div.sr_item.sr_item_new.sr_item_default.sr_property_block')
|
17
|
+
|
18
|
+
#iterating over each hotel
|
19
|
+
hotel_list_snippets.each do |hotel_elements|
|
20
|
+
|
21
|
+
hotel_name= hotel_elements.search('span.sr-hotel__name').text.strip!
|
22
|
+
hotel_rank= hotel_elements.search('div.bui-review-score__badge').text.strip!
|
23
|
+
|
24
|
+
location_doc = hotel_elements.css('div.sr_card_address_line')
|
25
|
+
hotel_location = location_doc.search('a.bui-link').attribute('data-coords').value
|
26
|
+
hotel_address = location_doc.search('a').text.strip!
|
27
|
+
|
28
|
+
roomtype_doc =hotel_elements.css('a.room_link')
|
29
|
+
hotel_roomtype= roomtype_doc.at('strong')
|
30
|
+
|
31
|
+
#some hotel does not have this tag, and result will be Nil, so here its checking for Nil
|
32
|
+
if hotel_roomtype
|
33
|
+
hotel_roomtype= roomtype_doc.at('strong').text
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
bedtype_doc =hotel_elements.css('span.room_info')
|
38
|
+
hotel_bedtype= bedtype_doc.search('span.sr_gr_bed_type').text.strip!
|
39
|
+
|
40
|
+
hotel_price= hotel_elements.search('div.bui-price-display__value.prco-inline-block-maker-helper').text.strip!
|
41
|
+
|
42
|
+
|
43
|
+
hotel_name = hotel_name
|
44
|
+
hotel_rank = hotel_rank
|
45
|
+
hotel_location = hotel_location
|
46
|
+
|
47
|
+
#removing Show on map string before that its checking for Nil value
|
48
|
+
if hotel_address
|
49
|
+
hotel_address.slice! "Show on map"
|
50
|
+
hotel_address= hotel_address.strip!
|
51
|
+
end
|
52
|
+
|
53
|
+
#checking for Nil value
|
54
|
+
if hotel_roomtype
|
55
|
+
hotel_roomtype.gsub("\n","")
|
56
|
+
end
|
57
|
+
|
58
|
+
#checking for Nil Value
|
59
|
+
if hotel_bedtype
|
60
|
+
hotel_bedtype.gsub("\n","")
|
61
|
+
end
|
62
|
+
|
63
|
+
#checking for Nil Value
|
64
|
+
if hotel_price
|
65
|
+
hotel_price.slice! "\u20AC\u00A0"
|
66
|
+
end
|
67
|
+
|
68
|
+
#new Hash is created and all the values are pushed into the hash map
|
69
|
+
output = Hash.new
|
70
|
+
output.store("name", hotel_name)
|
71
|
+
output.store("rank", hotel_rank)
|
72
|
+
output.store("location", hotel_location)
|
73
|
+
output.store("address", hotel_address)
|
74
|
+
output.store("roomtype", hotel_roomtype)
|
75
|
+
output.store("bedtype", hotel_bedtype)
|
76
|
+
output.store("price", hotel_price)
|
77
|
+
|
78
|
+
#prints the hash map
|
79
|
+
outputArray.push(output)
|
80
|
+
|
81
|
+
end
|
82
|
+
|
83
|
+
return outputArray
|
84
|
+
|
85
|
+
end
|
86
|
+
|
87
|
+
res=scrape('https://www.booking.com/searchresults.en-gb.html?label=gen173nr-1DCAEoggI46AdIM1gEaGmIAQGYAQm4ARfIAQzYAQPoAQGIAgGoAgO4Aqju0O0FwAIB&lang=en-gb&sid=cedaa5898969a74044a5c76485351761&sb=1&src=index&src_elem=sb&error_url=https%3A%2F%2Fwww.booking.com%2Findex.en-gb.html%3Flabel%3Dgen173nr-1DCAEoggI46AdIM1gEaGmIAQGYAQm4ARfIAQzYAQPoAQGIAgGoAgO4Aqju0O0FwAIB%3Bsid%3Dcedaa5898969a74044a5c76485351761%3Bsb_price_type%3Dtotal%26%3B&ss=Dublin&is_ski_area=0&ssne=Dublin&ssne_untouched=Dublin&dest_id=-1502554&dest_type=city&checkin_monthday=1&checkin_month=11&checkin_year=2019&checkout_monthday=6&checkout_month=11&checkout_year=2019&group_adults=1&group_children=0&no_rooms=1&b_h4u_keep_filters=&from_sf=1')
|
88
|
+
|
89
|
+
puts res
|
metadata
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bookingscrapper
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Niranjan Karunanithi
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2019-10-28 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Retrives hotel data
|
14
|
+
email: niranjankarunanidhi@gmail.com
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- lib/bookingscrapper.rb
|
20
|
+
homepage: http://rubygems.org/gems/bookingscrapper
|
21
|
+
licenses: []
|
22
|
+
metadata: {}
|
23
|
+
post_install_message:
|
24
|
+
rdoc_options: []
|
25
|
+
require_paths:
|
26
|
+
- lib
|
27
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
28
|
+
requirements:
|
29
|
+
- - ">="
|
30
|
+
- !ruby/object:Gem::Version
|
31
|
+
version: '0'
|
32
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
33
|
+
requirements:
|
34
|
+
- - ">="
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: '0'
|
37
|
+
requirements: []
|
38
|
+
rubygems_version: 3.0.3
|
39
|
+
signing_key:
|
40
|
+
specification_version: 4
|
41
|
+
summary: Hotel data
|
42
|
+
test_files: []
|