lakes 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lakes.gemspec +3 -3
- data/lib/lakes/helper.rb +35 -0
- data/lib/lakes/texas/lake_characteristics_parser.rb +58 -54
- data/lib/lakes/texas/water_conditions_parser.rb +57 -0
- data/lib/lakes/texas/water_data_parser.rb +37 -0
- data/lib/lakes/texas.rb +37 -30
- data/lib/lakes.rb +3 -1
- metadata +9 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d4e064b1da9183c688a5e5c72310404c1632ddbe
|
4
|
+
data.tar.gz: a45be37ae9ae49c384676ffa687541cc6d1edab9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 74d1344a011c7d724a2f0b7f1acc074acc4328446694d52df375e35496005270ff9d44e1beefe69f88da865aa273c040f7f66c9b5b16fb77d906c1646060c8e0
|
7
|
+
data.tar.gz: 1dbf6042cecaa440ec9379fdf5da56dfcdc9929ee0312613946ccc6c928f2c53cce3d56e509f0e37675d9baf15fe0b60250ca22a8c0b2c837a0a10076cf70f32
|
data/lakes.gemspec
CHANGED
@@ -10,8 +10,8 @@ Gem::Specification.new do |spec|
|
|
10
10
|
|
11
11
|
spec.summary = 'This gem parses lake details from various government websites'
|
12
12
|
spec.description = 'I wrote this gem to originally parse texas lake data'
|
13
|
-
spec.homepage =
|
14
|
-
spec.license = '
|
13
|
+
spec.homepage = 'https://github.com/ssherman/lakes'
|
14
|
+
spec.license = 'MIT'
|
15
15
|
|
16
16
|
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
17
17
|
spec.bindir = "exe"
|
@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.require_paths = ['lib']
|
20
20
|
|
21
21
|
spec.add_development_dependency "bundler", "~> 1.11"
|
22
|
-
spec.add_development_dependency "rake", "~>
|
22
|
+
spec.add_development_dependency "rake", "~> 12.0"
|
23
23
|
spec.add_development_dependency "minitest", "~> 5.0"
|
24
24
|
spec.add_development_dependency 'nokogiri', "~> 1.7"
|
25
25
|
end
|
data/lib/lakes/helper.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
module Lakes
|
2
|
+
module Helper
|
3
|
+
|
4
|
+
# texas lake pages are encoded in Windows-1252 :(
|
5
|
+
def http_get(url)
|
6
|
+
uri = URI(url)
|
7
|
+
Net::HTTP.get(uri).encode('UTF-8', 'Windows-1252')
|
8
|
+
end
|
9
|
+
|
10
|
+
# texas lake websites use lots of non breaking spaces
|
11
|
+
def cleanup_data(value)
|
12
|
+
nbsp = 160.chr('UTF-8')
|
13
|
+
value = value.strip.gsub(nbsp, '')
|
14
|
+
value.empty? ? nil : value
|
15
|
+
end
|
16
|
+
|
17
|
+
def cleanup_raw_text(raw_text)
|
18
|
+
raw_text.try(:gsub, /\s+/, ' ').try(:strip)
|
19
|
+
end
|
20
|
+
|
21
|
+
# converts this:
|
22
|
+
# ../../../action/waterecords.php?WB_code=0001
|
23
|
+
# into this:
|
24
|
+
# http://tpwd.texas.gov/fishboat/fish/action/waterecords.php?WB_code=0001
|
25
|
+
# based on this:
|
26
|
+
# http://tpwd.texas.gov/fishboat/fish/recreational/lakes/abilene
|
27
|
+
def convert_relative_href(href, current_url)
|
28
|
+
relative_depth = href.split('..').count - 1
|
29
|
+
url_parts = current_url.split('/')
|
30
|
+
url_parts.slice!(-relative_depth, relative_depth)
|
31
|
+
fixed_href = href.gsub('../', '')
|
32
|
+
url_parts.join('/') + '/' + fixed_href
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -1,57 +1,61 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
.
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
1
|
+
module Lakes
|
2
|
+
class Texas
|
3
|
+
class LakeCharacteristicsParser
|
4
|
+
attr_reader :raw_text, :location_desc
|
5
|
+
attr_reader :surface_area_raw_text, :surface_area_in_acres
|
6
|
+
attr_reader :max_depth_raw_text, :max_depth_in_feet
|
7
|
+
attr_reader :year_impounded_raw_text, :year_impounded
|
8
|
+
|
9
|
+
def initialize(text)
|
10
|
+
@raw_text = text
|
11
|
+
parse
|
12
|
+
end
|
13
|
+
|
14
|
+
def parse
|
15
|
+
@location_desc = @raw_text.match(/^location:(.*)(surface area)|(surface acres)|(maximum depth|impounded):/im).captures.first
|
16
|
+
@surface_area_raw_text = @raw_text.match(/surface (area|acres):(.*)/i).try(:captures).try(:[], 1)
|
17
|
+
@max_depth_raw_text = @raw_text.match(/maximum depth:(.*)/i).try(:captures).try(:first)
|
18
|
+
@year_impounded_raw_text = @raw_text.match(/impounded:(.*)/im).try(:captures).try(:first)
|
19
|
+
|
20
|
+
@location_desc = cleanup_raw_text(@location_desc)
|
21
|
+
|
22
|
+
@surface_area_in_acres = cleanup_raw_text(@surface_area_raw_text)
|
23
|
+
.try(:match, /^([0-9,]+)/)
|
24
|
+
.try(:captures)
|
25
|
+
.try(:first)
|
26
|
+
.try(:delete, ',')
|
27
|
+
.try(:to_i)
|
28
|
+
|
29
|
+
@max_depth_in_feet = cleanup_raw_text(@max_depth_raw_text)
|
30
|
+
.try(:match, /^([0-9,]+)/)
|
31
|
+
.try(:captures)
|
32
|
+
.try(:first)
|
33
|
+
.try(:delete, ',')
|
34
|
+
.try(:to_i)
|
35
|
+
|
36
|
+
# need to handle bad data like Lake Fryer which is:
|
37
|
+
# Maximum depth: Average 13 feet, maximum 25 feet
|
38
|
+
if @max_depth_in_feet.nil?
|
39
|
+
@max_depth_in_feet = cleanup_raw_text(@max_depth_raw_text)
|
40
|
+
.try(:match, /maximum ([0-9,]+) feet/i)
|
41
|
+
.try(:captures)
|
42
|
+
.try(:first)
|
43
|
+
.try(:delete, ',')
|
44
|
+
.try(:to_i)
|
45
|
+
end
|
46
|
+
|
47
|
+
@year_impounded = cleanup_raw_text(@year_impounded_raw_text)
|
48
|
+
.try(:match, /([0-9,]+)/)
|
49
|
+
.try(:captures)
|
50
|
+
.try(:first)
|
51
|
+
.try(:delete, ',')
|
52
|
+
.try(:to_i)
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
def cleanup_raw_text(raw_text)
|
57
|
+
raw_text.try(:gsub, /\s+/, ' ').try(:strip)
|
58
|
+
end
|
43
59
|
end
|
44
|
-
|
45
|
-
@year_impounded = cleanup_raw_text(@year_impounded_raw_text)
|
46
|
-
.try(:match, /([0-9,]+)/)
|
47
|
-
.try(:captures)
|
48
|
-
.try(:first)
|
49
|
-
.try(:delete, ',')
|
50
|
-
.try(:to_i)
|
51
|
-
|
52
|
-
end
|
53
|
-
|
54
|
-
def cleanup_raw_text(raw_text)
|
55
|
-
raw_text.try(:gsub, /\s+/, ' ').try(:strip)
|
56
60
|
end
|
57
61
|
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
module Lakes
|
3
|
+
class Texas
|
4
|
+
class WaterConditionsParser
|
5
|
+
include Lakes::Helper
|
6
|
+
|
7
|
+
attr_reader :raw_text, :raw_text_without_whitespace
|
8
|
+
attr_reader :water_data_uri
|
9
|
+
attr_reader :conservation_pool_elevation_raw_text, :conservation_pool_elevation
|
10
|
+
attr_reader :conservation_pool_elevation_in_ft_msl
|
11
|
+
attr_reader :fluctuation_raw_text, :fluctuation
|
12
|
+
attr_reader :normal_clarity_raw_text, :normal_clarity
|
13
|
+
|
14
|
+
def initialize(text)
|
15
|
+
return if text.nil?
|
16
|
+
@raw_text = text
|
17
|
+
@raw_text_without_whitespace = text.gsub(/[\t\r\n\f]+/, '').gsub(/\s\s/, ' ')
|
18
|
+
parse
|
19
|
+
end
|
20
|
+
|
21
|
+
# <a href="http://waterdatafortexas.org/reservoirs/individual/belton">Current Lake Level</a>
|
22
|
+
# Conservation Pool Elevation: 594 ft. msl
|
23
|
+
# Fluctuation: 3-5 feet
|
24
|
+
# Normal Clarity: Moderate
|
25
|
+
def parse
|
26
|
+
html_doc = Nokogiri::HTML.fragment(@raw_text)
|
27
|
+
|
28
|
+
html_doc_without_whitespace_chars = Nokogiri::HTML.fragment(raw_text_without_whitespace)
|
29
|
+
water_data_link = html_doc_without_whitespace_chars.xpath('p/a[contains(text(), "Current Lake Level")]').first
|
30
|
+
@water_data_uri = water_data_link.try(:[], 'href')
|
31
|
+
if @water_data_uri && @water_data_uri.start_with?('http://')
|
32
|
+
@water_data_uri.gsub!('http://', 'https://')
|
33
|
+
end
|
34
|
+
|
35
|
+
text_doc = html_doc.text
|
36
|
+
text_doc_without_whitespace = html_doc_without_whitespace_chars.text
|
37
|
+
|
38
|
+
# so many inconsistencies in the data
|
39
|
+
@conservation_pool_elevation_raw_text = text_doc
|
40
|
+
.match(/(Conservation Pool Elevation:(.*))|(Normal water level:(.*))/i)
|
41
|
+
.try(:captures)
|
42
|
+
.try(:compact)
|
43
|
+
.try(:[], 1)
|
44
|
+
|
45
|
+
@conservation_pool_elevation = cleanup_raw_text(
|
46
|
+
@conservation_pool_elevation_raw_text
|
47
|
+
)
|
48
|
+
|
49
|
+
@fluctuation_raw_text = text_doc.match(/Fluctuation: (.*)Normal Clarity:/im).try(:captures).try(:first)
|
50
|
+
@fluctuation = cleanup_raw_text(@fluctuation_raw_text)
|
51
|
+
|
52
|
+
@normal_clarity_raw_text = text_doc_without_whitespace.match(/Normal Clarity: (.*)/i).try(:captures).try(:first)
|
53
|
+
@normal_clarity = cleanup_raw_text(@normal_clarity_raw_text)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
module Lakes
|
3
|
+
class Texas
|
4
|
+
class WaterDataParser
|
5
|
+
include Lakes::Helper
|
6
|
+
|
7
|
+
attr_reader :raw_text
|
8
|
+
attr_reader :conservation_pool_elevation_in_ft_msl
|
9
|
+
attr_reader :percentage_full
|
10
|
+
|
11
|
+
def initialize(text)
|
12
|
+
@raw_text = text
|
13
|
+
#File.write("test/data/water_data/Texoma.txt", @raw_text)
|
14
|
+
#puts "WaterDataParser: raw_text: #{@raw_text}"
|
15
|
+
parse
|
16
|
+
end
|
17
|
+
|
18
|
+
def parse
|
19
|
+
html_doc = Nokogiri::HTML(@raw_text)
|
20
|
+
cons_pool_elevation_header_element = html_doc.xpath('//td[contains(text(), "Conservation pool elevation")]').first
|
21
|
+
cons_pool_elevation_root = cons_pool_elevation_header_element.try(:next_element)
|
22
|
+
@conservation_pool_elevation_in_ft_msl = cleanup_raw_text(cons_pool_elevation_root.try(:text))
|
23
|
+
.try(:match, /([0-9\.]+)/)
|
24
|
+
.try(:captures)
|
25
|
+
.try(:first)
|
26
|
+
.try(:to_f)
|
27
|
+
|
28
|
+
percentage_full_element = cleanup_raw_text(html_doc.css('div.page-title h2 small').try(:text))
|
29
|
+
@percentage_full = percentage_full_element
|
30
|
+
.try(:match, /^([0-9]+\.?[0-9]+)/)
|
31
|
+
.try(:captures)
|
32
|
+
.try(:first)
|
33
|
+
.try(:to_f)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/lib/lakes/texas.rb
CHANGED
@@ -2,15 +2,26 @@ require 'net/http'
|
|
2
2
|
require 'nokogiri'
|
3
3
|
require 'date'
|
4
4
|
require 'try'
|
5
|
+
require 'lakes/helper'
|
5
6
|
|
6
7
|
module Lakes
|
7
8
|
class Texas
|
9
|
+
include Lakes::Helper
|
8
10
|
attr_reader :lake_data
|
9
11
|
|
10
12
|
def initialize
|
11
13
|
@lake_data = {}
|
12
14
|
end
|
13
15
|
|
16
|
+
def all_details
|
17
|
+
result = []
|
18
|
+
list.each do |lake_name|
|
19
|
+
result << get_details(lake_name)
|
20
|
+
sleep(1)
|
21
|
+
end
|
22
|
+
result
|
23
|
+
end
|
24
|
+
|
14
25
|
def list
|
15
26
|
return @lake_data.keys unless @lake_data.empty?
|
16
27
|
|
@@ -47,7 +58,7 @@ module Lakes
|
|
47
58
|
main_div = html_doc.at('div#maincontent')
|
48
59
|
|
49
60
|
parse_lake_characteristics(main_div, lake_data)
|
50
|
-
|
61
|
+
parse_water_conditions_and_data(main_div, lake_data)
|
51
62
|
parse_reservoir_controlling_authority(main_div, lake_data)
|
52
63
|
parse_aquatic_vegetation(main_div, lake_data)
|
53
64
|
parse_predominant_fish_species(main_div, lake_data)
|
@@ -122,8 +133,31 @@ module Lakes
|
|
122
133
|
lake_data[:lake_characteristics][:year_impounded] = parser.year_impounded
|
123
134
|
end
|
124
135
|
|
125
|
-
def
|
126
|
-
process_simple_section(main_div, lake_data, 'Water Conditions', :water_conditions, true)
|
136
|
+
def parse_water_conditions_and_data(main_div, lake_data)
|
137
|
+
lake_data[:raw_water_conditions] = process_simple_section(main_div, lake_data, 'Water Conditions', :water_conditions, true)
|
138
|
+
|
139
|
+
File.write("test/data/water_conditions/#{lake_data[:name]}.txt", lake_data[:raw_water_conditions])
|
140
|
+
parser = WaterConditionsParser.new(lake_data[:raw_water_conditions])
|
141
|
+
lake_data[:water] = {}
|
142
|
+
lake_data[:water][:conditions] = {}
|
143
|
+
lake_data[:water][:water_data_uri] = parser.water_data_uri
|
144
|
+
lake_data[:water][:conditions][:conservation_pool_elevation] = parser.conservation_pool_elevation
|
145
|
+
lake_data[:water][:conditions][:fluctuation] = parser.fluctuation
|
146
|
+
lake_data[:water][:conditions][:normal_clarity] = parser.normal_clarity
|
147
|
+
|
148
|
+
lake_data[:water][:data] = {}
|
149
|
+
return if parser.water_data_uri.nil?
|
150
|
+
content = begin
|
151
|
+
http_get(parser.water_data_uri)
|
152
|
+
rescue Errno::ECONNREFUSED, OpenSSL::SSL::SSLError => e
|
153
|
+
puts "#{e.message} for #{lake_data[:name]}: #{parser.water_data_uri}"
|
154
|
+
nil
|
155
|
+
end
|
156
|
+
|
157
|
+
return if content.nil?
|
158
|
+
water_data_parser = WaterDataParser.new(content)
|
159
|
+
lake_data[:water][:conservation_pool_elevation_in_ft_msl] = water_data_parser.conservation_pool_elevation_in_ft_msl
|
160
|
+
lake_data[:water][:percentage_full] = water_data_parser.conservation_pool_elevation_in_ft_msl
|
127
161
|
end
|
128
162
|
|
129
163
|
def parse_reservoir_controlling_authority(main_div, lake_data)
|
@@ -259,38 +293,11 @@ module Lakes
|
|
259
293
|
data
|
260
294
|
end
|
261
295
|
|
262
|
-
# converts this:
|
263
|
-
# ../../../action/waterecords.php?WB_code=0001
|
264
|
-
# into this:
|
265
|
-
# http://tpwd.texas.gov/fishboat/fish/action/waterecords.php?WB_code=0001
|
266
|
-
# based on this:
|
267
|
-
# http://tpwd.texas.gov/fishboat/fish/recreational/lakes/abilene
|
268
|
-
def convert_relative_href(href, current_url)
|
269
|
-
relative_depth = href.split('..').count - 1
|
270
|
-
url_parts = current_url.split('/')
|
271
|
-
url_parts.slice!(-relative_depth, relative_depth)
|
272
|
-
fixed_href = href.gsub('../', '')
|
273
|
-
url_parts.join('/') + '/' + fixed_href
|
274
|
-
end
|
275
|
-
|
276
|
-
# texas lake websites use lots of non breaking spaces
|
277
|
-
def cleanup_data(value)
|
278
|
-
nbsp = 160.chr('UTF-8')
|
279
|
-
value = value.strip.gsub(nbsp, '')
|
280
|
-
value.empty? ? nil : value
|
281
|
-
end
|
282
|
-
|
283
296
|
def process_simple_section(main_div, lake_data, section_title, data_name, html)
|
284
297
|
data = main_div.xpath("//h6[contains(text(), \"#{section_title}\")]").first
|
285
298
|
element_type_function = html ? :to_html : :text
|
286
299
|
content = data.try(:next_element).try(element_type_function)
|
287
300
|
lake_data[data_name] = content
|
288
301
|
end
|
289
|
-
|
290
|
-
# texas lake pages are encoded in Windows-1252 :(
|
291
|
-
def http_get(url)
|
292
|
-
uri = URI(url)
|
293
|
-
Net::HTTP.get(uri).encode('UTF-8', 'Windows-1252')
|
294
|
-
end
|
295
302
|
end
|
296
303
|
end
|
data/lib/lakes.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lakes
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shane Sherman
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-05-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '12.0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '12.0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: minitest
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -84,12 +84,15 @@ files:
|
|
84
84
|
- lakes.gemspec
|
85
85
|
- lakes.json
|
86
86
|
- lib/lakes.rb
|
87
|
+
- lib/lakes/helper.rb
|
87
88
|
- lib/lakes/texas.rb
|
88
89
|
- lib/lakes/texas/lake_characteristics_parser.rb
|
90
|
+
- lib/lakes/texas/water_conditions_parser.rb
|
91
|
+
- lib/lakes/texas/water_data_parser.rb
|
89
92
|
- lib/try.rb
|
90
|
-
homepage:
|
93
|
+
homepage: https://github.com/ssherman/lakes
|
91
94
|
licenses:
|
92
|
-
-
|
95
|
+
- MIT
|
93
96
|
metadata: {}
|
94
97
|
post_install_message:
|
95
98
|
rdoc_options: []
|