transportscrapper 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/TransportScrapper.rb +148 -0
  3. metadata +70 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 962ca5b21538ea50a440cb1091fc46177c4a7d4d2fe8a374ff484ea77c2f5b07
4
+ data.tar.gz: a52bfa7eaa60ccdf53f878b419c0ad674d055cf120873da5968bc10af9f395a0
5
+ SHA512:
6
+ metadata.gz: ee7fdeb9062f4e7bea0e03c3548427d20b1d1e40ac11b32950b5bc6ca7bc24894ebd493fb4615aa2ebb9cd91a82a99a1d1ff7848ba12ad2c4470dbfaf9899976
7
+ data.tar.gz: 33424b4dc0dceb300c9887265c35c1eeebb9d3fcace9b674510eebb8f9d699f0e50bd2cb876bf02684d7ba3faefa0d7ee6b79f5f6e6bf1ac1022763862ca9a27
@@ -0,0 +1,148 @@
1
+ require 'nokogiri'
2
+ require 'watir'
3
+
4
+ class TransportScrapper
5
+
6
+ args = %w[--disable-infobars --headless window-size=1600,1200 --no-sandbox --disable-gpu --disable-dev-shm-usage]
7
+ options = {
8
+ binary: ENV['GOOGLE_CHROME_BIN'],
9
+ prefs: { password_manager_enable: false, credentials_enable_service: false },
10
+ args: args
11
+ }
12
+
13
+ @browser = Watir::Browser.new(:chrome, options: options)
14
+
15
+ @outwardArr = []
16
+ @returnArr = []
17
+
18
+ def self.scrapewithtime(value)
19
+ @browser.goto value
20
+ doc = Nokogiri::HTML.parse(@browser.html)
21
+ #taking all the hotels list in HTML
22
+ onwardtransport_snip = doc.search('div#JourneyRowsOut')
23
+ onwardtransport_snip_Full= onwardtransport_snip.css('div.fullTable')
24
+ onwardtransport_snip_array = onwardtransport_snip_Full.css('div.tableDataRow.Divtrue')
25
+
26
+
27
+ #iterating over each hotel
28
+ onwardtransport_snip_array.each do |transport_elemets|
29
+ depart= transport_elemets.css('div.tableDataTime.d-T').text.strip!
30
+ arrival=transport_elemets.css('div.tableDataTime').text.strip!
31
+
32
+ route_element=transport_elemets.css('div.tableRowSelector')
33
+ route=route_element.css('div.tableData').text.strip!
34
+ price_element= transport_elemets.css('div.tableDataFare')
35
+ price= price_element.css('span.tablePrice').text
36
+
37
+ if arrival
38
+ arrivalArr=arrival.split(':')
39
+ val1 = arrivalArr[1].split(//).last(2).join
40
+ val2= arrivalArr[2].split(//).first(2).join
41
+ arrival ="#{val1}:#{val2}"
42
+ end
43
+
44
+
45
+ if price
46
+ price.slice! "\u20AC"
47
+ end
48
+
49
+ if route
50
+ route=route.gsub(" ","")
51
+ route=route.gsub("\n","")
52
+ end
53
+
54
+ #new Hash is created and all the values are pushed into the hash map
55
+ output = Hash.new
56
+ output.store("depart", depart)
57
+ output.store("arrival", arrival)
58
+ output.store("price", price)
59
+ output.store("id", "#{depart}_#{arrival}_#{route}")
60
+
61
+
62
+ #pushing to array
63
+ if(!@outwardArr.include?(output))
64
+ @outwardArr.push(output)
65
+ end
66
+ end
67
+
68
+
69
+
70
+
71
+ #taking all the hotels list in HTML
72
+ returntransport_snip = doc.search('div#JourneyRowsRet')
73
+ returntransport_snip_Full= returntransport_snip.css('div.fullTable')
74
+ returntransport_snip_array = returntransport_snip_Full.css('div.tableDataRow.Divtrue')
75
+
76
+
77
+ #iterating over each hotel
78
+ returntransport_snip_array.each do |transport_elemets|
79
+ depart= transport_elemets.css('div.tableDataTime.d-T').text.strip!
80
+ arrival= transport_elemets.css('div.tableDataTime').text.strip!
81
+ route_element=transport_elemets.css('div.tableRowSelector')
82
+ route=route_element.css('div.tableData').text.strip!
83
+ price_element= transport_elemets.css('div.tableDataFare')
84
+ price= price_element.css('span.tablePrice').text
85
+
86
+ if arrival
87
+ arrivalArr=arrival.split(':')
88
+ val1 = arrivalArr[1].split(//).last(2).join
89
+ val2= arrivalArr[2].split(//).first(2).join
90
+ arrival ="#{val1}:#{val2}"
91
+ end
92
+
93
+ if price
94
+ price.slice! "\u20AC"
95
+ end
96
+
97
+ if route
98
+ route=route.gsub(" ","")
99
+ route=route.gsub("\n","")
100
+ end
101
+
102
+ #new Hash is created and all the values are pushed into the hash map
103
+ output = Hash.new
104
+ output.store("depart", depart)
105
+ output.store("arrival", arrival)
106
+ output.store("price", price)
107
+ output.store("id", "#{depart}_#{arrival}_#{route}")
108
+
109
+ #pushing to array
110
+ if(!@returnArr.include?(output))
111
+ @returnArr.push(output)
112
+ end
113
+ end
114
+
115
+ puts '----------------------------------------------------------------------------'
116
+ puts @outwardArr
117
+ puts @returnArr
118
+ puts '----------------------------------------------------------------------------'
119
+
120
+ end
121
+
122
+ def self.scrape(value)
123
+
124
+ timeArr = ["0000","0400","0800","1200","1600","2000"]
125
+
126
+ for time in timeArr do
127
+ temp = value
128
+ joinstr ="&returntime=#{time}&departtime=#{time}"
129
+ temp = value+joinstr
130
+
131
+ self.scrapewithtime(temp)
132
+ end
133
+
134
+ #returning array
135
+ result = Hash.new
136
+ result.store("outward" ,@outwardArr)
137
+ result.store("return" ,@returnArr)
138
+
139
+ return result
140
+
141
+ end
142
+
143
+ value ='https://national.buseireann.ie/?originStop=13500&destinationstop=55504&ticketType=2&departdate=27-nov-2019&returndate=27-nov-2019&adult=1&child=0&student=0&family=0'
144
+
145
+ res = TransportScrapper.scrape(value)
146
+ puts res
147
+ end
148
+
metadata ADDED
@@ -0,0 +1,70 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: transportscrapper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Niranjan Karunanithi
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-11-27 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: watir
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: Scraps the transport data
42
+ email: niranjankarunanidhi@gmail.com
43
+ executables: []
44
+ extensions: []
45
+ extra_rdoc_files: []
46
+ files:
47
+ - lib/TransportScrapper.rb
48
+ homepage: http://rubygems.org/gems/transportscrapper
49
+ licenses: []
50
+ metadata: {}
51
+ post_install_message:
52
+ rdoc_options: []
53
+ require_paths:
54
+ - lib
55
+ required_ruby_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: '0'
60
+ required_rubygems_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: '0'
65
+ requirements: []
66
+ rubygems_version: 3.0.3
67
+ signing_key:
68
+ specification_version: 4
69
+ summary: Scraps the transport data
70
+ test_files: []