lakes 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,290 @@
1
+ require 'net/http'
2
+ require 'nokogiri'
3
+ require 'date'
4
+ require 'try'
5
+
6
+ module Lakes
7
+ class Texas
8
+ attr_reader :lake_data
9
+
10
+ def initialize
11
+ @lake_data = {}
12
+ end
13
+
14
+ def list
15
+ return @lake_data.keys unless @lake_data.empty?
16
+
17
+ base_url = 'http://tpwd.texas.gov/fishboat/fish/recreational/lakes/'
18
+ uri = URI("#{base_url}lakelist.phtml")
19
+ content = Net::HTTP.get(uri)
20
+ html_doc = Nokogiri::HTML(content)
21
+
22
+ # remove elements not needed to make parsing easier
23
+ html_doc.search('div.announce, div.alert, div#bottomwrapper').each do |src|
24
+ src.remove
25
+ end
26
+
27
+ html_doc.search('div#maincontent ul li a').each do |lake_html|
28
+ lake_name = cleanup_data(lake_html.text)
29
+ @lake_data[lake_name] = { details_uri: "#{base_url}#{lake_html[:href]}" }
30
+ end
31
+ @lake_data.keys
32
+ end
33
+
34
+ def get_details(lake_name)
35
+ list
36
+ data = lake_data[lake_name]
37
+ raise 'Lake not found' if data.nil?
38
+
39
+ parse_lake_details(data)
40
+ end
41
+
42
+ protected
43
+
44
+ def parse_lake_details(lake_data)
45
+ uri = URI(lake_data[:details_uri] + '/')
46
+ content = Net::HTTP.get(uri).encode('UTF-8', 'Windows-1252')
47
+
48
+ html_doc = Nokogiri::HTML(content)
49
+ main_div = html_doc.at('div#maincontent')
50
+
51
+ parse_lake_characteristics(main_div, lake_data)
52
+ parse_water_conditions(main_div, lake_data)
53
+ parse_reservoir_controlling_authority(main_div, lake_data)
54
+ parse_aquatic_vegetation(main_div, lake_data)
55
+ parse_predominant_fish_species(main_div, lake_data)
56
+ parse_lake_records(main_div, lake_data)
57
+ parse_current_fishing_report(main_div, lake_data)
58
+ parse_stocking_history(main_div, lake_data)
59
+ parse_lake_surveys(main_div, lake_data)
60
+ parse_lake_maps(main_div, lake_data)
61
+ parse_fishing_regulations(main_div, lake_data)
62
+ parse_angling_opportunities(main_div, lake_data)
63
+ parse_fishing_structure(main_div, lake_data)
64
+ parse_tips_and_tactics(main_div, lake_data)
65
+ lake_data
66
+ end
67
+
68
+ def parse_tips_and_tactics(main_div, lake_data)
69
+ data = main_div.xpath('//h6[contains(text(), "Tips & Tactics")]').first
70
+ content = data.try(:next_element).try(:text)
71
+ lake_data[:tips_and_tactics] = content
72
+ end
73
+
74
+ def parse_fishing_structure(main_div, lake_data)
75
+ data = main_div.xpath('//h6[contains(text(), "Fishing Cover/Structure")]').first
76
+ content = data.try(:next_element).try(:to_html)
77
+ lake_data[:structure_and_cover_description] = content
78
+ end
79
+
80
+ def parse_angling_opportunities(main_div, lake_data)
81
+ data = main_div.xpath('//h6[contains(text(), "Angling Opportunities")]').first
82
+ description = data.try(:next_element).try(:text)
83
+ lake_data[:angling_opportunities_description] = description
84
+
85
+ table = main_div.css('#Ratings')
86
+ quality = ['Poor', 'Fair', 'Good', 'Excellent']
87
+
88
+ fish_species_elements = table.css('tr th.highlight2')
89
+ lake_data[:angling_opportunities_details] = {}
90
+ fish_species_elements.each do |fish_species_element|
91
+ rating_index = 0
92
+ species = cleanup_data(fish_species_element.text)
93
+ while fish_species_element = fish_species_element.next_element
94
+ if fish_species_element.css('img').count == 0
95
+ rating_index += 1
96
+ next
97
+ else
98
+ lake_data[:angling_opportunities_details][species] = quality[rating_index]
99
+ break
100
+ end
101
+ end
102
+ end
103
+ end
104
+
105
+ def parse_fishing_regulations(main_div, lake_data)
106
+ data = main_div.xpath('//h6[contains(text(), "Fishing Regulations")]').first
107
+ content = data.try(:next_element).try(:to_html)
108
+ lake_data[:fishing_regulations] = content
109
+ end
110
+
111
+ def parse_lake_maps(main_div, lake_data)
112
+ data = main_div.xpath('//h6[contains(text(), "Lake Maps")]').first
113
+ content = data.try(:next_element).try(:text)
114
+ lake_data[:lake_maps] = content
115
+ end
116
+
117
+ def parse_lake_characteristics(main_div, lake_data)
118
+ data = main_div.xpath('//h6[contains(text(), "Lake Characteristics")]').first
119
+ content = data.try(:next_element).try(:text)
120
+ lake_data[:lake_characteristics] = content
121
+ end
122
+
123
+ def parse_water_conditions(main_div, lake_data)
124
+ data = main_div.xpath('//h6[contains(text(), "Water Conditions")]').first
125
+ content = data.try(:next_element).try(:to_html)
126
+ lake_data[:water_conditions] = content
127
+ end
128
+
129
+ def parse_reservoir_controlling_authority(main_div, lake_data)
130
+ data = main_div.xpath('//h6[contains(text(), "Reservoir Controlling Authority")]').first
131
+ content = data.try(:next_element).try(:text)
132
+ lake_data[:reservoir_controlling_authority] = content
133
+ end
134
+
135
+ def parse_aquatic_vegetation(main_div, lake_data)
136
+ data = main_div.xpath('//h6[contains(text(), "Aquatic Vegetation")]').first
137
+ content = data.try(:next_element).try(:text)
138
+ lake_data[:aquatic_vegetation] = content
139
+ end
140
+
141
+ def parse_predominant_fish_species(main_div, lake_data)
142
+ element = main_div.xpath('//h6[contains(text(), "Predominant Fish Species")]').first
143
+ species_root = element.try(:next_element)
144
+ species = species_root.nil? ? nil : species_root.css('li').map{ |e| cleanup_data(e.text) }
145
+ lake_data[:predominant_fish_species] = species
146
+ end
147
+
148
+ def parse_lake_surveys(main_div, lake_data)
149
+ link = main_div.xpath('p/a[contains(text(), "Latest Survey Report")]').first
150
+ stocking_history = if link.nil?
151
+ nil
152
+ else
153
+ uri = link['href']
154
+ lake_data[:latest_survey_report] = convert_relative_href(uri, lake_data[:details_uri])
155
+ end
156
+ end
157
+
158
+ def parse_stocking_history(main_div, lake_data)
159
+ link = main_div.xpath('p/a[contains(text(), "Stocking History")]').first
160
+ stocking_history = if link.nil?
161
+ nil
162
+ else
163
+ uri = link['href']
164
+ lake_data[:stocking_history_uri] = convert_relative_href(uri, lake_data[:details_uri])
165
+
166
+ uri = URI(lake_data[:stocking_history_uri])
167
+ content = Net::HTTP.get(uri).encode('UTF-8', 'Windows-1252')
168
+ stocking_history_doc = Nokogiri::HTML(content)
169
+
170
+ stocking_history_table = stocking_history_doc.at('div#maincontent table')
171
+ headers = stocking_history_table.xpath('tr/th').map{ |r| r.text }
172
+ rows = stocking_history_table.xpath('tr/td').map{ |r| r.text }
173
+
174
+ table_data = process_data_table(headers, rows)
175
+ lake_data[:stocking_history] = table_data
176
+ end
177
+ end
178
+
179
+ def parse_current_fishing_report(main_div, lake_data)
180
+ link = main_div.xpath('p/a[contains(text(), "Fishing Report")]').first
181
+ fishing_report = if link.nil?
182
+ nil
183
+ else
184
+ uri = link['href']
185
+ lake_data[:current_fishing_report_uri] = convert_relative_href(uri, lake_data[:details_uri])
186
+
187
+ uri = URI(lake_data[:current_fishing_report_uri])
188
+ content = Net::HTTP.get(uri).encode('UTF-8', 'Windows-1252')
189
+ current_fishing_report_doc = Nokogiri::HTML(content)
190
+ current_fishing_report_dl = current_fishing_report_doc.at('div.row.report div.container dl')
191
+
192
+ date = cleanup_data(current_fishing_report_dl.at('dt span.title').text)
193
+ report = cleanup_data(current_fishing_report_dl.xpath('dd').text)
194
+ {date: date, report: report}
195
+ end
196
+
197
+ lake_data[:current_fishing_report] = fishing_report
198
+ end
199
+
200
+ def parse_lake_records(main_div, lake_data)
201
+ link = main_div.xpath('//a[contains(text(), "Lake Records")]').first
202
+ if link.nil?
203
+ lake_data[:fishing_records_uri] = nil
204
+ return
205
+ end
206
+ uri = link['href']
207
+ lake_data[:fishing_records_uri] = convert_relative_href(uri, lake_data[:details_uri])
208
+
209
+ uri = URI(lake_data[:fishing_records_uri])
210
+ content = Net::HTTP.get(uri).encode('UTF-8', 'Windows-1252')
211
+ lake_records_doc = Nokogiri::HTML(content)
212
+ lake_records_main_div = lake_records_doc.at('div#maincontent')
213
+
214
+ # H2's are record types like:
215
+ # - weight records
216
+ # - catch and release records (by length)
217
+
218
+ element = lake_records_main_div.children.first
219
+ current_record_type = nil # Weight or Length
220
+ current_age_group = nil # all ages, youth, etc
221
+ fishing_records_data = {}
222
+ while element = element.next_element
223
+ case element.name
224
+ when 'h2'
225
+ current_record_type = cleanup_data(element.text)
226
+ fishing_records_data[current_record_type] = {}
227
+ when 'h3'
228
+ current_age_group = cleanup_data(element.text)
229
+ fishing_records_data[current_record_type][current_age_group] = {}
230
+ when 'table'
231
+ fishing_method = cleanup_data(element.xpath('caption/big').text)
232
+
233
+ if fishing_records_data[current_record_type][current_age_group][fishing_method].nil?
234
+ fishing_records_data[current_record_type][current_age_group][fishing_method] = []
235
+ end
236
+
237
+ headers = element.xpath('tr/th').map{ |r| r.text }
238
+ rows = element.xpath('tr/td').map{ |r| r.text }
239
+
240
+ table_data = process_data_table(headers, rows)
241
+ fishing_records_data[current_record_type][current_age_group][fishing_method] = table_data
242
+ end
243
+ end
244
+ lake_data[:fishing_records] = fishing_records_data
245
+ end
246
+
247
+ # converts a html table with headers and rows into
248
+ # an array of hashes with header => value
249
+ def process_data_table(headers, rows)
250
+ data = []
251
+ header_count = headers.length
252
+ row_count = rows.count / header_count
253
+
254
+ row_data_index = 0
255
+ row_count.times do |row_index|
256
+
257
+ entry = {}
258
+ header_count.times do |header_index|
259
+ header = cleanup_data(headers[header_index])
260
+ table_data = cleanup_data(rows[row_data_index])
261
+ row_data_index += 1
262
+ entry[header] = table_data
263
+ end
264
+ data << entry
265
+ end
266
+ data
267
+ end
268
+
269
+ # converts this:
270
+ # ../../../action/waterecords.php?WB_code=0001
271
+ # into this:
272
+ # http://tpwd.texas.gov/fishboat/fish/action/waterecords.php?WB_code=0001
273
+ # based on this:
274
+ # http://tpwd.texas.gov/fishboat/fish/recreational/lakes/abilene
275
+ def convert_relative_href(href, current_url)
276
+ relative_depth = href.split('..').count - 1
277
+ url_parts = current_url.split('/')
278
+ url_parts.slice!(-relative_depth, relative_depth)
279
+ fixed_href = href.gsub('../', '')
280
+ url_parts.join('/') + '/' + fixed_href
281
+ end
282
+
283
+ # texas lake websites use lots of non breaking spaces
284
+ def cleanup_data(value)
285
+ nbsp = 160.chr('UTF-8')
286
+ value = value.strip.gsub(nbsp, '')
287
+ value.empty? ? nil : value
288
+ end
289
+ end
290
+ end
data/lib/lakes.rb ADDED
@@ -0,0 +1,4 @@
1
+ require 'lakes/texas'
2
+ module Lakes
3
+ VERSION = '0.1.0'
4
+ end
data/lib/try.rb ADDED
@@ -0,0 +1,78 @@
1
+ class Object
2
+ # Invokes the public method whose name goes as first argument just like
3
+ # +public_send+ does, except that if the receiver does not respond to it the
4
+ # call returns +nil+ rather than raising an exception.
5
+ #
6
+ # This method is defined to be able to write
7
+ #
8
+ # @person.try(:name)
9
+ #
10
+ # instead of
11
+ #
12
+ # @person ? @person.name : nil
13
+ #
14
+ # +try+ returns +nil+ when called on +nil+ regardless of whether it responds
15
+ # to the method:
16
+ #
17
+ # nil.try(:to_i) # => nil, rather than 0
18
+ #
19
+ # Arguments and blocks are forwarded to the method if invoked:
20
+ #
21
+ # @posts.try(:each_slice, 2) do |a, b|
22
+ # ...
23
+ # end
24
+ #
25
+ # The number of arguments in the signature must match. If the object responds
26
+ # to the method the call is attempted and +ArgumentError+ is still raised
27
+ # otherwise.
28
+ #
29
+ # If +try+ is called without arguments it yields the receiver to a given
30
+ # block unless it is +nil+:
31
+ #
32
+ # @person.try do |p|
33
+ # ...
34
+ # end
35
+ #
36
+ # Please also note that +try+ is defined on +Object+, therefore it won't work
37
+ # with instances of classes that do not have +Object+ among their ancestors,
38
+ # like direct subclasses of +BasicObject+. For example, using +try+ with
39
+ # +SimpleDelegator+ will delegate +try+ to the target instead of calling it on
40
+ # delegator itself.
41
+ def try(*a, &b)
42
+ if a.empty? && block_given?
43
+ yield self
44
+ else
45
+ public_send(*a, &b) if respond_to?(a.first)
46
+ end
47
+ end
48
+
49
+ # Same as #try, but will raise a NoMethodError exception if the receiving is not nil and
50
+ # does not implement the tried method.
51
+ def try!(*a, &b)
52
+ if a.empty? && block_given?
53
+ yield self
54
+ else
55
+ public_send(*a, &b)
56
+ end
57
+ end
58
+ end
59
+
60
+ class NilClass
61
+ # Calling +try+ on +nil+ always returns +nil+.
62
+ # It becomes specially helpful when navigating through associations that may return +nil+.
63
+ #
64
+ # nil.try(:name) # => nil
65
+ #
66
+ # Without +try+
67
+ # @person && !@person.children.blank? && @person.children.first.name
68
+ #
69
+ # With +try+
70
+ # @person.try(:children).try(:first).try(:name)
71
+ def try(*args)
72
+ nil
73
+ end
74
+
75
+ def try!(*args)
76
+ nil
77
+ end
78
+ end
metadata ADDED
@@ -0,0 +1,113 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: lakes
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Shane Sherman
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2016-11-14 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.11'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.11'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: minitest
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '5.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '5.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: nokogiri
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.6'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.6'
69
+ description: I wrote this gem to originally parse texas lake data
70
+ email:
71
+ - shane.sherman@gmail.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - ".gitignore"
77
+ - CODE_OF_CONDUCT.md
78
+ - Gemfile
79
+ - LICENSE.txt
80
+ - README.md
81
+ - Rakefile
82
+ - bin/console
83
+ - bin/setup
84
+ - lakes.gemspec
85
+ - lakes.json
86
+ - lib/lakes.rb
87
+ - lib/lakes/texas.rb
88
+ - lib/try.rb
89
+ homepage:
90
+ licenses:
91
+ - mit
92
+ metadata: {}
93
+ post_install_message:
94
+ rdoc_options: []
95
+ require_paths:
96
+ - lib
97
+ required_ruby_version: !ruby/object:Gem::Requirement
98
+ requirements:
99
+ - - ">="
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ required_rubygems_version: !ruby/object:Gem::Requirement
103
+ requirements:
104
+ - - ">="
105
+ - !ruby/object:Gem::Version
106
+ version: '0'
107
+ requirements: []
108
+ rubyforge_project:
109
+ rubygems_version: 2.5.1
110
+ signing_key:
111
+ specification_version: 4
112
+ summary: This gem parses lake details from various government websites
113
+ test_files: []