transportscrapper 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/TransportScrapper.rb +148 -0
  3. metadata +70 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 962ca5b21538ea50a440cb1091fc46177c4a7d4d2fe8a374ff484ea77c2f5b07
4
+ data.tar.gz: a52bfa7eaa60ccdf53f878b419c0ad674d055cf120873da5968bc10af9f395a0
5
+ SHA512:
6
+ metadata.gz: ee7fdeb9062f4e7bea0e03c3548427d20b1d1e40ac11b32950b5bc6ca7bc24894ebd493fb4615aa2ebb9cd91a82a99a1d1ff7848ba12ad2c4470dbfaf9899976
7
+ data.tar.gz: 33424b4dc0dceb300c9887265c35c1eeebb9d3fcace9b674510eebb8f9d699f0e50bd2cb876bf02684d7ba3faefa0d7ee6b79f5f6e6bf1ac1022763862ca9a27
@@ -0,0 +1,148 @@
1
+ require 'nokogiri'
2
+ require 'watir'
3
+
4
+ class TransportScrapper
5
+
6
+ args = %w[--disable-infobars --headless window-size=1600,1200 --no-sandbox --disable-gpu --disable-dev-shm-usage]
7
+ options = {
8
+ binary: ENV['GOOGLE_CHROME_BIN'],
9
+ prefs: { password_manager_enable: false, credentials_enable_service: false },
10
+ args: args
11
+ }
12
+
13
+ @browser = Watir::Browser.new(:chrome, options: options)
14
+
15
+ @outwardArr = []
16
+ @returnArr = []
17
+
18
+ def self.scrapewithtime(value)
19
+ @browser.goto value
20
+ doc = Nokogiri::HTML.parse(@browser.html)
21
+ #taking all the hotels list in HTML
22
+ onwardtransport_snip = doc.search('div#JourneyRowsOut')
23
+ onwardtransport_snip_Full= onwardtransport_snip.css('div.fullTable')
24
+ onwardtransport_snip_array = onwardtransport_snip_Full.css('div.tableDataRow.Divtrue')
25
+
26
+
27
+ #iterating over each hotel
28
+ onwardtransport_snip_array.each do |transport_elemets|
29
+ depart= transport_elemets.css('div.tableDataTime.d-T').text.strip!
30
+ arrival=transport_elemets.css('div.tableDataTime').text.strip!
31
+
32
+ route_element=transport_elemets.css('div.tableRowSelector')
33
+ route=route_element.css('div.tableData').text.strip!
34
+ price_element= transport_elemets.css('div.tableDataFare')
35
+ price= price_element.css('span.tablePrice').text
36
+
37
+ if arrival
38
+ arrivalArr=arrival.split(':')
39
+ val1 = arrivalArr[1].split(//).last(2).join
40
+ val2= arrivalArr[2].split(//).first(2).join
41
+ arrival ="#{val1}:#{val2}"
42
+ end
43
+
44
+
45
+ if price
46
+ price.slice! "\u20AC"
47
+ end
48
+
49
+ if route
50
+ route=route.gsub(" ","")
51
+ route=route.gsub("\n","")
52
+ end
53
+
54
+ #new Hash is created and all the values are pushed into the hash map
55
+ output = Hash.new
56
+ output.store("depart", depart)
57
+ output.store("arrival", arrival)
58
+ output.store("price", price)
59
+ output.store("id", "#{depart}_#{arrival}_#{route}")
60
+
61
+
62
+ #pushing to array
63
+ if(!@outwardArr.include?(output))
64
+ @outwardArr.push(output)
65
+ end
66
+ end
67
+
68
+
69
+
70
+
71
+ #taking all the hotels list in HTML
72
+ returntransport_snip = doc.search('div#JourneyRowsRet')
73
+ returntransport_snip_Full= returntransport_snip.css('div.fullTable')
74
+ returntransport_snip_array = returntransport_snip_Full.css('div.tableDataRow.Divtrue')
75
+
76
+
77
+ #iterating over each hotel
78
+ returntransport_snip_array.each do |transport_elemets|
79
+ depart= transport_elemets.css('div.tableDataTime.d-T').text.strip!
80
+ arrival= transport_elemets.css('div.tableDataTime').text.strip!
81
+ route_element=transport_elemets.css('div.tableRowSelector')
82
+ route=route_element.css('div.tableData').text.strip!
83
+ price_element= transport_elemets.css('div.tableDataFare')
84
+ price= price_element.css('span.tablePrice').text
85
+
86
+ if arrival
87
+ arrivalArr=arrival.split(':')
88
+ val1 = arrivalArr[1].split(//).last(2).join
89
+ val2= arrivalArr[2].split(//).first(2).join
90
+ arrival ="#{val1}:#{val2}"
91
+ end
92
+
93
+ if price
94
+ price.slice! "\u20AC"
95
+ end
96
+
97
+ if route
98
+ route=route.gsub(" ","")
99
+ route=route.gsub("\n","")
100
+ end
101
+
102
+ #new Hash is created and all the values are pushed into the hash map
103
+ output = Hash.new
104
+ output.store("depart", depart)
105
+ output.store("arrival", arrival)
106
+ output.store("price", price)
107
+ output.store("id", "#{depart}_#{arrival}_#{route}")
108
+
109
+ #pushing to array
110
+ if(!@returnArr.include?(output))
111
+ @returnArr.push(output)
112
+ end
113
+ end
114
+
115
+ puts '----------------------------------------------------------------------------'
116
+ puts @outwardArr
117
+ puts @returnArr
118
+ puts '----------------------------------------------------------------------------'
119
+
120
+ end
121
+
122
+ def self.scrape(value)
123
+
124
+ timeArr = ["0000","0400","0800","1200","1600","2000"]
125
+
126
+ for time in timeArr do
127
+ temp = value
128
+ joinstr ="&returntime=#{time}&departtime=#{time}"
129
+ temp = value+joinstr
130
+
131
+ self.scrapewithtime(temp)
132
+ end
133
+
134
+ #returning array
135
+ result = Hash.new
136
+ result.store("outward" ,@outwardArr)
137
+ result.store("return" ,@returnArr)
138
+
139
+ return result
140
+
141
+ end
142
+
143
+ value ='https://national.buseireann.ie/?originStop=13500&destinationstop=55504&ticketType=2&departdate=27-nov-2019&returndate=27-nov-2019&adult=1&child=0&student=0&family=0'
144
+
145
+ res = TransportScrapper.scrape(value)
146
+ puts res
147
+ end
148
+
metadata ADDED
@@ -0,0 +1,70 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: transportscrapper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Niranjan Karunanithi
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-11-27 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: watir
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: Scraps the transport data
42
+ email: niranjankarunanidhi@gmail.com
43
+ executables: []
44
+ extensions: []
45
+ extra_rdoc_files: []
46
+ files:
47
+ - lib/TransportScrapper.rb
48
+ homepage: http://rubygems.org/gems/transportscrapper
49
+ licenses: []
50
+ metadata: {}
51
+ post_install_message:
52
+ rdoc_options: []
53
+ require_paths:
54
+ - lib
55
+ required_ruby_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: '0'
60
+ required_rubygems_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: '0'
65
+ requirements: []
66
+ rubygems_version: 3.0.3
67
+ signing_key:
68
+ specification_version: 4
69
+ summary: Scraps the transport data
70
+ test_files: []