bookingscrapper 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/bookingscrapper.rb +89 -0
  3. metadata +42 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: b9c1a5aa34c2d3ecfb0942c8356456adccf685b0eb17c2895ba86a9dd76da5f3
4
+ data.tar.gz: 5b2f486b122bf40a7698ef18ec6230cf698e9fa52e9b185cec59c501742b6c90
5
+ SHA512:
6
+ metadata.gz: 597ec10415d0ce3e1c4e72f0ed3e564e1d066d992b6c037429b50722f2da77cd2adf2d2aa286a76dc1c807b427c42bc82d13fe87b2ee7f1a1c8ada60e0d2adfc
7
+ data.tar.gz: 0bda92aa445c505b5f77bdf13203e826c2fb5dbe67ea49df954a546eca5c5c9ae8d5485b88322f7e8761ba3b3b80eafcf83f5fcd923cb1bcc9e8c40e23e1ca43
@@ -0,0 +1,89 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+ require 'pry'
4
+ require 'watir'
5
+
6
+
7
+ def scrape(value)
8
+
9
+ outputArray = []
10
+ browser = Watir::Browser.new
11
+ browser.goto value
12
+ doc = Nokogiri::HTML.parse(browser.html)
13
+
14
+ url =browser.url
15
+ #taking all the hotels list in HTML
16
+ hotel_list_snippets = doc.css('div.sr_item.sr_item_new.sr_item_default.sr_property_block')
17
+
18
+ #iterating over each hotel
19
+ hotel_list_snippets.each do |hotel_elements|
20
+
21
+ hotel_name= hotel_elements.search('span.sr-hotel__name').text.strip!
22
+ hotel_rank= hotel_elements.search('div.bui-review-score__badge').text.strip!
23
+
24
+ location_doc = hotel_elements.css('div.sr_card_address_line')
25
+ hotel_location = location_doc.search('a.bui-link').attribute('data-coords').value
26
+ hotel_address = location_doc.search('a').text.strip!
27
+
28
+ roomtype_doc =hotel_elements.css('a.room_link')
29
+ hotel_roomtype= roomtype_doc.at('strong')
30
+
31
+ #some hotel does not have this tag, and result will be Nil, so here its checking for Nil
32
+ if hotel_roomtype
33
+ hotel_roomtype= roomtype_doc.at('strong').text
34
+
35
+ end
36
+
37
+ bedtype_doc =hotel_elements.css('span.room_info')
38
+ hotel_bedtype= bedtype_doc.search('span.sr_gr_bed_type').text.strip!
39
+
40
+ hotel_price= hotel_elements.search('div.bui-price-display__value.prco-inline-block-maker-helper').text.strip!
41
+
42
+
43
+ hotel_name = hotel_name
44
+ hotel_rank = hotel_rank
45
+ hotel_location = hotel_location
46
+
47
+ #removing Show on map string before that its checking for Nil value
48
+ if hotel_address
49
+ hotel_address.slice! "Show on map"
50
+ hotel_address= hotel_address.strip!
51
+ end
52
+
53
+ #checking for Nil value
54
+ if hotel_roomtype
55
+ hotel_roomtype.gsub("\n","")
56
+ end
57
+
58
+ #checking for Nil Value
59
+ if hotel_bedtype
60
+ hotel_bedtype.gsub("\n","")
61
+ end
62
+
63
+ #checking for Nil Value
64
+ if hotel_price
65
+ hotel_price.slice! "\u20AC\u00A0"
66
+ end
67
+
68
+ #new Hash is created and all the values are pushed into the hash map
69
+ output = Hash.new
70
+ output.store("name", hotel_name)
71
+ output.store("rank", hotel_rank)
72
+ output.store("location", hotel_location)
73
+ output.store("address", hotel_address)
74
+ output.store("roomtype", hotel_roomtype)
75
+ output.store("bedtype", hotel_bedtype)
76
+ output.store("price", hotel_price)
77
+
78
+ #prints the hash map
79
+ outputArray.push(output)
80
+
81
+ end
82
+
83
+ return outputArray
84
+
85
+ end
86
+
87
+ res=scrape('https://www.booking.com/searchresults.en-gb.html?label=gen173nr-1DCAEoggI46AdIM1gEaGmIAQGYAQm4ARfIAQzYAQPoAQGIAgGoAgO4Aqju0O0FwAIB&lang=en-gb&sid=cedaa5898969a74044a5c76485351761&sb=1&src=index&src_elem=sb&error_url=https%3A%2F%2Fwww.booking.com%2Findex.en-gb.html%3Flabel%3Dgen173nr-1DCAEoggI46AdIM1gEaGmIAQGYAQm4ARfIAQzYAQPoAQGIAgGoAgO4Aqju0O0FwAIB%3Bsid%3Dcedaa5898969a74044a5c76485351761%3Bsb_price_type%3Dtotal%26%3B&ss=Dublin&is_ski_area=0&ssne=Dublin&ssne_untouched=Dublin&dest_id=-1502554&dest_type=city&checkin_monthday=1&checkin_month=11&checkin_year=2019&checkout_monthday=6&checkout_month=11&checkout_year=2019&group_adults=1&group_children=0&no_rooms=1&b_h4u_keep_filters=&from_sf=1')
88
+
89
+ puts res
metadata ADDED
@@ -0,0 +1,42 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bookingscrapper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Niranjan Karunanithi
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-10-28 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Retrives hotel data
14
+ email: niranjankarunanidhi@gmail.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/bookingscrapper.rb
20
+ homepage: http://rubygems.org/gems/bookingscrapper
21
+ licenses: []
22
+ metadata: {}
23
+ post_install_message:
24
+ rdoc_options: []
25
+ require_paths:
26
+ - lib
27
+ required_ruby_version: !ruby/object:Gem::Requirement
28
+ requirements:
29
+ - - ">="
30
+ - !ruby/object:Gem::Version
31
+ version: '0'
32
+ required_rubygems_version: !ruby/object:Gem::Requirement
33
+ requirements:
34
+ - - ">="
35
+ - !ruby/object:Gem::Version
36
+ version: '0'
37
+ requirements: []
38
+ rubygems_version: 3.0.3
39
+ signing_key:
40
+ specification_version: 4
41
+ summary: Hotel data
42
+ test_files: []