lakes 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lakes.gemspec +3 -3
- data/lib/lakes/helper.rb +35 -0
- data/lib/lakes/texas/lake_characteristics_parser.rb +58 -54
- data/lib/lakes/texas/water_conditions_parser.rb +57 -0
- data/lib/lakes/texas/water_data_parser.rb +37 -0
- data/lib/lakes/texas.rb +37 -30
- data/lib/lakes.rb +3 -1
- metadata +9 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d4e064b1da9183c688a5e5c72310404c1632ddbe
|
4
|
+
data.tar.gz: a45be37ae9ae49c384676ffa687541cc6d1edab9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 74d1344a011c7d724a2f0b7f1acc074acc4328446694d52df375e35496005270ff9d44e1beefe69f88da865aa273c040f7f66c9b5b16fb77d906c1646060c8e0
|
7
|
+
data.tar.gz: 1dbf6042cecaa440ec9379fdf5da56dfcdc9929ee0312613946ccc6c928f2c53cce3d56e509f0e37675d9baf15fe0b60250ca22a8c0b2c837a0a10076cf70f32
|
data/lakes.gemspec
CHANGED
@@ -10,8 +10,8 @@ Gem::Specification.new do |spec|
|
|
10
10
|
|
11
11
|
spec.summary = 'This gem parses lake details from various government websites'
|
12
12
|
spec.description = 'I wrote this gem to originally parse texas lake data'
|
13
|
-
spec.homepage =
|
14
|
-
spec.license = '
|
13
|
+
spec.homepage = 'https://github.com/ssherman/lakes'
|
14
|
+
spec.license = 'MIT'
|
15
15
|
|
16
16
|
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
17
17
|
spec.bindir = "exe"
|
@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.require_paths = ['lib']
|
20
20
|
|
21
21
|
spec.add_development_dependency "bundler", "~> 1.11"
|
22
|
-
spec.add_development_dependency "rake", "~>
|
22
|
+
spec.add_development_dependency "rake", "~> 12.0"
|
23
23
|
spec.add_development_dependency "minitest", "~> 5.0"
|
24
24
|
spec.add_development_dependency 'nokogiri', "~> 1.7"
|
25
25
|
end
|
data/lib/lakes/helper.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
module Lakes
|
2
|
+
module Helper
|
3
|
+
|
4
|
+
# texas lake pages are encoded in Windows-1252 :(
|
5
|
+
def http_get(url)
|
6
|
+
uri = URI(url)
|
7
|
+
Net::HTTP.get(uri).encode('UTF-8', 'Windows-1252')
|
8
|
+
end
|
9
|
+
|
10
|
+
# texas lake websites use lots of non breaking spaces
|
11
|
+
def cleanup_data(value)
|
12
|
+
nbsp = 160.chr('UTF-8')
|
13
|
+
value = value.strip.gsub(nbsp, '')
|
14
|
+
value.empty? ? nil : value
|
15
|
+
end
|
16
|
+
|
17
|
+
def cleanup_raw_text(raw_text)
|
18
|
+
raw_text.try(:gsub, /\s+/, ' ').try(:strip)
|
19
|
+
end
|
20
|
+
|
21
|
+
# converts this:
|
22
|
+
# ../../../action/waterecords.php?WB_code=0001
|
23
|
+
# into this:
|
24
|
+
# http://tpwd.texas.gov/fishboat/fish/action/waterecords.php?WB_code=0001
|
25
|
+
# based on this:
|
26
|
+
# http://tpwd.texas.gov/fishboat/fish/recreational/lakes/abilene
|
27
|
+
def convert_relative_href(href, current_url)
|
28
|
+
relative_depth = href.split('..').count - 1
|
29
|
+
url_parts = current_url.split('/')
|
30
|
+
url_parts.slice!(-relative_depth, relative_depth)
|
31
|
+
fixed_href = href.gsub('../', '')
|
32
|
+
url_parts.join('/') + '/' + fixed_href
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -1,57 +1,61 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
.
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
1
|
+
module Lakes
|
2
|
+
class Texas
|
3
|
+
class LakeCharacteristicsParser
|
4
|
+
attr_reader :raw_text, :location_desc
|
5
|
+
attr_reader :surface_area_raw_text, :surface_area_in_acres
|
6
|
+
attr_reader :max_depth_raw_text, :max_depth_in_feet
|
7
|
+
attr_reader :year_impounded_raw_text, :year_impounded
|
8
|
+
|
9
|
+
def initialize(text)
|
10
|
+
@raw_text = text
|
11
|
+
parse
|
12
|
+
end
|
13
|
+
|
14
|
+
def parse
|
15
|
+
@location_desc = @raw_text.match(/^location:(.*)(surface area)|(surface acres)|(maximum depth|impounded):/im).captures.first
|
16
|
+
@surface_area_raw_text = @raw_text.match(/surface (area|acres):(.*)/i).try(:captures).try(:[], 1)
|
17
|
+
@max_depth_raw_text = @raw_text.match(/maximum depth:(.*)/i).try(:captures).try(:first)
|
18
|
+
@year_impounded_raw_text = @raw_text.match(/impounded:(.*)/im).try(:captures).try(:first)
|
19
|
+
|
20
|
+
@location_desc = cleanup_raw_text(@location_desc)
|
21
|
+
|
22
|
+
@surface_area_in_acres = cleanup_raw_text(@surface_area_raw_text)
|
23
|
+
.try(:match, /^([0-9,]+)/)
|
24
|
+
.try(:captures)
|
25
|
+
.try(:first)
|
26
|
+
.try(:delete, ',')
|
27
|
+
.try(:to_i)
|
28
|
+
|
29
|
+
@max_depth_in_feet = cleanup_raw_text(@max_depth_raw_text)
|
30
|
+
.try(:match, /^([0-9,]+)/)
|
31
|
+
.try(:captures)
|
32
|
+
.try(:first)
|
33
|
+
.try(:delete, ',')
|
34
|
+
.try(:to_i)
|
35
|
+
|
36
|
+
# need to handle bad data like Lake Fryer which is:
|
37
|
+
# Maximum depth: Average 13 feet, maximum 25 feet
|
38
|
+
if @max_depth_in_feet.nil?
|
39
|
+
@max_depth_in_feet = cleanup_raw_text(@max_depth_raw_text)
|
40
|
+
.try(:match, /maximum ([0-9,]+) feet/i)
|
41
|
+
.try(:captures)
|
42
|
+
.try(:first)
|
43
|
+
.try(:delete, ',')
|
44
|
+
.try(:to_i)
|
45
|
+
end
|
46
|
+
|
47
|
+
@year_impounded = cleanup_raw_text(@year_impounded_raw_text)
|
48
|
+
.try(:match, /([0-9,]+)/)
|
49
|
+
.try(:captures)
|
50
|
+
.try(:first)
|
51
|
+
.try(:delete, ',')
|
52
|
+
.try(:to_i)
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
def cleanup_raw_text(raw_text)
|
57
|
+
raw_text.try(:gsub, /\s+/, ' ').try(:strip)
|
58
|
+
end
|
43
59
|
end
|
44
|
-
|
45
|
-
@year_impounded = cleanup_raw_text(@year_impounded_raw_text)
|
46
|
-
.try(:match, /([0-9,]+)/)
|
47
|
-
.try(:captures)
|
48
|
-
.try(:first)
|
49
|
-
.try(:delete, ',')
|
50
|
-
.try(:to_i)
|
51
|
-
|
52
|
-
end
|
53
|
-
|
54
|
-
def cleanup_raw_text(raw_text)
|
55
|
-
raw_text.try(:gsub, /\s+/, ' ').try(:strip)
|
56
60
|
end
|
57
61
|
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
module Lakes
|
3
|
+
class Texas
|
4
|
+
class WaterConditionsParser
|
5
|
+
include Lakes::Helper
|
6
|
+
|
7
|
+
attr_reader :raw_text, :raw_text_without_whitespace
|
8
|
+
attr_reader :water_data_uri
|
9
|
+
attr_reader :conservation_pool_elevation_raw_text, :conservation_pool_elevation
|
10
|
+
attr_reader :conservation_pool_elevation_in_ft_msl
|
11
|
+
attr_reader :fluctuation_raw_text, :fluctuation
|
12
|
+
attr_reader :normal_clarity_raw_text, :normal_clarity
|
13
|
+
|
14
|
+
def initialize(text)
|
15
|
+
return if text.nil?
|
16
|
+
@raw_text = text
|
17
|
+
@raw_text_without_whitespace = text.gsub(/[\t\r\n\f]+/, '').gsub(/\s\s/, ' ')
|
18
|
+
parse
|
19
|
+
end
|
20
|
+
|
21
|
+
# <a href="http://waterdatafortexas.org/reservoirs/individual/belton">Current Lake Level</a>
|
22
|
+
# Conservation Pool Elevation: 594 ft. msl
|
23
|
+
# Fluctuation: 3-5 feet
|
24
|
+
# Normal Clarity: Moderate
|
25
|
+
def parse
|
26
|
+
html_doc = Nokogiri::HTML.fragment(@raw_text)
|
27
|
+
|
28
|
+
html_doc_without_whitespace_chars = Nokogiri::HTML.fragment(raw_text_without_whitespace)
|
29
|
+
water_data_link = html_doc_without_whitespace_chars.xpath('p/a[contains(text(), "Current Lake Level")]').first
|
30
|
+
@water_data_uri = water_data_link.try(:[], 'href')
|
31
|
+
if @water_data_uri && @water_data_uri.start_with?('http://')
|
32
|
+
@water_data_uri.gsub!('http://', 'https://')
|
33
|
+
end
|
34
|
+
|
35
|
+
text_doc = html_doc.text
|
36
|
+
text_doc_without_whitespace = html_doc_without_whitespace_chars.text
|
37
|
+
|
38
|
+
# so many inconsistencies in the data
|
39
|
+
@conservation_pool_elevation_raw_text = text_doc
|
40
|
+
.match(/(Conservation Pool Elevation:(.*))|(Normal water level:(.*))/i)
|
41
|
+
.try(:captures)
|
42
|
+
.try(:compact)
|
43
|
+
.try(:[], 1)
|
44
|
+
|
45
|
+
@conservation_pool_elevation = cleanup_raw_text(
|
46
|
+
@conservation_pool_elevation_raw_text
|
47
|
+
)
|
48
|
+
|
49
|
+
@fluctuation_raw_text = text_doc.match(/Fluctuation: (.*)Normal Clarity:/im).try(:captures).try(:first)
|
50
|
+
@fluctuation = cleanup_raw_text(@fluctuation_raw_text)
|
51
|
+
|
52
|
+
@normal_clarity_raw_text = text_doc_without_whitespace.match(/Normal Clarity: (.*)/i).try(:captures).try(:first)
|
53
|
+
@normal_clarity = cleanup_raw_text(@normal_clarity_raw_text)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
module Lakes
|
3
|
+
class Texas
|
4
|
+
class WaterDataParser
|
5
|
+
include Lakes::Helper
|
6
|
+
|
7
|
+
attr_reader :raw_text
|
8
|
+
attr_reader :conservation_pool_elevation_in_ft_msl
|
9
|
+
attr_reader :percentage_full
|
10
|
+
|
11
|
+
def initialize(text)
|
12
|
+
@raw_text = text
|
13
|
+
#File.write("test/data/water_data/Texoma.txt", @raw_text)
|
14
|
+
#puts "WaterDataParser: raw_text: #{@raw_text}"
|
15
|
+
parse
|
16
|
+
end
|
17
|
+
|
18
|
+
def parse
|
19
|
+
html_doc = Nokogiri::HTML(@raw_text)
|
20
|
+
cons_pool_elevation_header_element = html_doc.xpath('//td[contains(text(), "Conservation pool elevation")]').first
|
21
|
+
cons_pool_elevation_root = cons_pool_elevation_header_element.try(:next_element)
|
22
|
+
@conservation_pool_elevation_in_ft_msl = cleanup_raw_text(cons_pool_elevation_root.try(:text))
|
23
|
+
.try(:match, /([0-9\.]+)/)
|
24
|
+
.try(:captures)
|
25
|
+
.try(:first)
|
26
|
+
.try(:to_f)
|
27
|
+
|
28
|
+
percentage_full_element = cleanup_raw_text(html_doc.css('div.page-title h2 small').try(:text))
|
29
|
+
@percentage_full = percentage_full_element
|
30
|
+
.try(:match, /^([0-9]+\.?[0-9]+)/)
|
31
|
+
.try(:captures)
|
32
|
+
.try(:first)
|
33
|
+
.try(:to_f)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/lib/lakes/texas.rb
CHANGED
@@ -2,15 +2,26 @@ require 'net/http'
|
|
2
2
|
require 'nokogiri'
|
3
3
|
require 'date'
|
4
4
|
require 'try'
|
5
|
+
require 'lakes/helper'
|
5
6
|
|
6
7
|
module Lakes
|
7
8
|
class Texas
|
9
|
+
include Lakes::Helper
|
8
10
|
attr_reader :lake_data
|
9
11
|
|
10
12
|
def initialize
|
11
13
|
@lake_data = {}
|
12
14
|
end
|
13
15
|
|
16
|
+
def all_details
|
17
|
+
result = []
|
18
|
+
list.each do |lake_name|
|
19
|
+
result << get_details(lake_name)
|
20
|
+
sleep(1)
|
21
|
+
end
|
22
|
+
result
|
23
|
+
end
|
24
|
+
|
14
25
|
def list
|
15
26
|
return @lake_data.keys unless @lake_data.empty?
|
16
27
|
|
@@ -47,7 +58,7 @@ module Lakes
|
|
47
58
|
main_div = html_doc.at('div#maincontent')
|
48
59
|
|
49
60
|
parse_lake_characteristics(main_div, lake_data)
|
50
|
-
|
61
|
+
parse_water_conditions_and_data(main_div, lake_data)
|
51
62
|
parse_reservoir_controlling_authority(main_div, lake_data)
|
52
63
|
parse_aquatic_vegetation(main_div, lake_data)
|
53
64
|
parse_predominant_fish_species(main_div, lake_data)
|
@@ -122,8 +133,31 @@ module Lakes
|
|
122
133
|
lake_data[:lake_characteristics][:year_impounded] = parser.year_impounded
|
123
134
|
end
|
124
135
|
|
125
|
-
def
|
126
|
-
process_simple_section(main_div, lake_data, 'Water Conditions', :water_conditions, true)
|
136
|
+
def parse_water_conditions_and_data(main_div, lake_data)
|
137
|
+
lake_data[:raw_water_conditions] = process_simple_section(main_div, lake_data, 'Water Conditions', :water_conditions, true)
|
138
|
+
|
139
|
+
File.write("test/data/water_conditions/#{lake_data[:name]}.txt", lake_data[:raw_water_conditions])
|
140
|
+
parser = WaterConditionsParser.new(lake_data[:raw_water_conditions])
|
141
|
+
lake_data[:water] = {}
|
142
|
+
lake_data[:water][:conditions] = {}
|
143
|
+
lake_data[:water][:water_data_uri] = parser.water_data_uri
|
144
|
+
lake_data[:water][:conditions][:conservation_pool_elevation] = parser.conservation_pool_elevation
|
145
|
+
lake_data[:water][:conditions][:fluctuation] = parser.fluctuation
|
146
|
+
lake_data[:water][:conditions][:normal_clarity] = parser.normal_clarity
|
147
|
+
|
148
|
+
lake_data[:water][:data] = {}
|
149
|
+
return if parser.water_data_uri.nil?
|
150
|
+
content = begin
|
151
|
+
http_get(parser.water_data_uri)
|
152
|
+
rescue Errno::ECONNREFUSED, OpenSSL::SSL::SSLError => e
|
153
|
+
puts "#{e.message} for #{lake_data[:name]}: #{parser.water_data_uri}"
|
154
|
+
nil
|
155
|
+
end
|
156
|
+
|
157
|
+
return if content.nil?
|
158
|
+
water_data_parser = WaterDataParser.new(content)
|
159
|
+
lake_data[:water][:conservation_pool_elevation_in_ft_msl] = water_data_parser.conservation_pool_elevation_in_ft_msl
|
160
|
+
lake_data[:water][:percentage_full] = water_data_parser.conservation_pool_elevation_in_ft_msl
|
127
161
|
end
|
128
162
|
|
129
163
|
def parse_reservoir_controlling_authority(main_div, lake_data)
|
@@ -259,38 +293,11 @@ module Lakes
|
|
259
293
|
data
|
260
294
|
end
|
261
295
|
|
262
|
-
# converts this:
|
263
|
-
# ../../../action/waterecords.php?WB_code=0001
|
264
|
-
# into this:
|
265
|
-
# http://tpwd.texas.gov/fishboat/fish/action/waterecords.php?WB_code=0001
|
266
|
-
# based on this:
|
267
|
-
# http://tpwd.texas.gov/fishboat/fish/recreational/lakes/abilene
|
268
|
-
def convert_relative_href(href, current_url)
|
269
|
-
relative_depth = href.split('..').count - 1
|
270
|
-
url_parts = current_url.split('/')
|
271
|
-
url_parts.slice!(-relative_depth, relative_depth)
|
272
|
-
fixed_href = href.gsub('../', '')
|
273
|
-
url_parts.join('/') + '/' + fixed_href
|
274
|
-
end
|
275
|
-
|
276
|
-
# texas lake websites use lots of non breaking spaces
|
277
|
-
def cleanup_data(value)
|
278
|
-
nbsp = 160.chr('UTF-8')
|
279
|
-
value = value.strip.gsub(nbsp, '')
|
280
|
-
value.empty? ? nil : value
|
281
|
-
end
|
282
|
-
|
283
296
|
def process_simple_section(main_div, lake_data, section_title, data_name, html)
|
284
297
|
data = main_div.xpath("//h6[contains(text(), \"#{section_title}\")]").first
|
285
298
|
element_type_function = html ? :to_html : :text
|
286
299
|
content = data.try(:next_element).try(element_type_function)
|
287
300
|
lake_data[data_name] = content
|
288
301
|
end
|
289
|
-
|
290
|
-
# texas lake pages are encoded in Windows-1252 :(
|
291
|
-
def http_get(url)
|
292
|
-
uri = URI(url)
|
293
|
-
Net::HTTP.get(uri).encode('UTF-8', 'Windows-1252')
|
294
|
-
end
|
295
302
|
end
|
296
303
|
end
|
data/lib/lakes.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lakes
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shane Sherman
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-05-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '12.0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '12.0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: minitest
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -84,12 +84,15 @@ files:
|
|
84
84
|
- lakes.gemspec
|
85
85
|
- lakes.json
|
86
86
|
- lib/lakes.rb
|
87
|
+
- lib/lakes/helper.rb
|
87
88
|
- lib/lakes/texas.rb
|
88
89
|
- lib/lakes/texas/lake_characteristics_parser.rb
|
90
|
+
- lib/lakes/texas/water_conditions_parser.rb
|
91
|
+
- lib/lakes/texas/water_data_parser.rb
|
89
92
|
- lib/try.rb
|
90
|
-
homepage:
|
93
|
+
homepage: https://github.com/ssherman/lakes
|
91
94
|
licenses:
|
92
|
-
-
|
95
|
+
- MIT
|
93
96
|
metadata: {}
|
94
97
|
post_install_message:
|
95
98
|
rdoc_options: []
|