lakes 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4fd4734a4458713c26f87380650f9f997b69ee47
4
- data.tar.gz: 98432482a89cac25e45637a1acd4a50d4d3fd5e3
3
+ metadata.gz: d4e064b1da9183c688a5e5c72310404c1632ddbe
4
+ data.tar.gz: a45be37ae9ae49c384676ffa687541cc6d1edab9
5
5
  SHA512:
6
- metadata.gz: 1e95cffa65e7e8778f7ae086dfc651d4b1f1dcd0e602b57fdad0a942745e0be6eff3e81a660020951184317811af5527d88ab477c1355df22f40c660576dadb2
7
- data.tar.gz: ed6fb9047856f64ea9ab52fbda4ea31e6895132998f2e06ce4877d649136968fd2d5ac0e7431e00afd94dd206f909481d228dfc44127ebcff243ee171ecca2d6
6
+ metadata.gz: 74d1344a011c7d724a2f0b7f1acc074acc4328446694d52df375e35496005270ff9d44e1beefe69f88da865aa273c040f7f66c9b5b16fb77d906c1646060c8e0
7
+ data.tar.gz: 1dbf6042cecaa440ec9379fdf5da56dfcdc9929ee0312613946ccc6c928f2c53cce3d56e509f0e37675d9baf15fe0b60250ca22a8c0b2c837a0a10076cf70f32
data/lakes.gemspec CHANGED
@@ -10,8 +10,8 @@ Gem::Specification.new do |spec|
10
10
 
11
11
  spec.summary = 'This gem parses lake details from various government websites'
12
12
  spec.description = 'I wrote this gem to originally parse texas lake data'
13
- spec.homepage = nil
14
- spec.license = 'mit'
13
+ spec.homepage = 'https://github.com/ssherman/lakes'
14
+ spec.license = 'MIT'
15
15
 
16
16
  spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
17
17
  spec.bindir = "exe"
@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ['lib']
20
20
 
21
21
  spec.add_development_dependency "bundler", "~> 1.11"
22
- spec.add_development_dependency "rake", "~> 10.0"
22
+ spec.add_development_dependency "rake", "~> 12.0"
23
23
  spec.add_development_dependency "minitest", "~> 5.0"
24
24
  spec.add_development_dependency 'nokogiri', "~> 1.7"
25
25
  end
@@ -0,0 +1,35 @@
1
+ module Lakes
2
+ module Helper
3
+
4
+ # texas lake pages are encoded in Windows-1252 :(
5
+ def http_get(url)
6
+ uri = URI(url)
7
+ Net::HTTP.get(uri).encode('UTF-8', 'Windows-1252')
8
+ end
9
+
10
+ # texas lake websites use lots of non breaking spaces
11
+ def cleanup_data(value)
12
+ nbsp = 160.chr('UTF-8')
13
+ value = value.strip.gsub(nbsp, '')
14
+ value.empty? ? nil : value
15
+ end
16
+
17
+ def cleanup_raw_text(raw_text)
18
+ raw_text.try(:gsub, /\s+/, ' ').try(:strip)
19
+ end
20
+
21
+ # converts this:
22
+ # ../../../action/waterecords.php?WB_code=0001
23
+ # into this:
24
+ # http://tpwd.texas.gov/fishboat/fish/action/waterecords.php?WB_code=0001
25
+ # based on this:
26
+ # http://tpwd.texas.gov/fishboat/fish/recreational/lakes/abilene
27
+ def convert_relative_href(href, current_url)
28
+ relative_depth = href.split('..').count - 1
29
+ url_parts = current_url.split('/')
30
+ url_parts.slice!(-relative_depth, relative_depth)
31
+ fixed_href = href.gsub('../', '')
32
+ url_parts.join('/') + '/' + fixed_href
33
+ end
34
+ end
35
+ end
@@ -1,57 +1,61 @@
1
- class LakeCharacteristicsParser
2
- attr_reader :raw_text, :location_desc
3
- attr_reader :surface_area_raw_text, :surface_area_in_acres
4
- attr_reader :max_depth_raw_text, :max_depth_in_feet
5
- attr_reader :year_impounded_raw_text, :year_impounded
6
-
7
- def initialize(text)
8
- @raw_text = text
9
- parse
10
- end
11
-
12
- def parse
13
- @location_desc = @raw_text.match(/^location:(.*)(surface area)|(surface acres)|(maximum depth|impounded):/im).captures.first
14
- @surface_area_raw_text = @raw_text.match(/surface (area|acres):(.*)/i).try(:captures).try(:[], 1)
15
- @max_depth_raw_text = @raw_text.match(/maximum depth:(.*)/i).try(:captures).try(:first)
16
- @year_impounded_raw_text = @raw_text.match(/impounded:(.*)/im).try(:captures).try(:first)
17
-
18
- @location_desc = cleanup_raw_text(@location_desc)
19
-
20
- @surface_area_in_acres = cleanup_raw_text(@surface_area_raw_text)
21
- .try(:match, /^([0-9,]+)/)
22
- .try(:captures)
23
- .try(:first)
24
- .try(:delete, ',')
25
- .try(:to_i)
26
-
27
- @max_depth_in_feet = cleanup_raw_text(@max_depth_raw_text)
28
- .try(:match, /^([0-9,]+)/)
29
- .try(:captures)
30
- .try(:first)
31
- .try(:delete, ',')
32
- .try(:to_i)
33
-
34
- # need to handle bad data like Lake Fryer which is:
35
- # Maximum depth: Average 13 feet, maximum 25 feet
36
- if @max_depth_in_feet.nil?
37
- @max_depth_in_feet = cleanup_raw_text(@max_depth_raw_text)
38
- .try(:match, /maximum ([0-9,]+) feet/i)
39
- .try(:captures)
40
- .try(:first)
41
- .try(:delete, ',')
42
- .try(:to_i)
1
+ module Lakes
2
+ class Texas
3
+ class LakeCharacteristicsParser
4
+ attr_reader :raw_text, :location_desc
5
+ attr_reader :surface_area_raw_text, :surface_area_in_acres
6
+ attr_reader :max_depth_raw_text, :max_depth_in_feet
7
+ attr_reader :year_impounded_raw_text, :year_impounded
8
+
9
+ def initialize(text)
10
+ @raw_text = text
11
+ parse
12
+ end
13
+
14
+ def parse
15
+ @location_desc = @raw_text.match(/^location:(.*)(surface area)|(surface acres)|(maximum depth|impounded):/im).captures.first
16
+ @surface_area_raw_text = @raw_text.match(/surface (area|acres):(.*)/i).try(:captures).try(:[], 1)
17
+ @max_depth_raw_text = @raw_text.match(/maximum depth:(.*)/i).try(:captures).try(:first)
18
+ @year_impounded_raw_text = @raw_text.match(/impounded:(.*)/im).try(:captures).try(:first)
19
+
20
+ @location_desc = cleanup_raw_text(@location_desc)
21
+
22
+ @surface_area_in_acres = cleanup_raw_text(@surface_area_raw_text)
23
+ .try(:match, /^([0-9,]+)/)
24
+ .try(:captures)
25
+ .try(:first)
26
+ .try(:delete, ',')
27
+ .try(:to_i)
28
+
29
+ @max_depth_in_feet = cleanup_raw_text(@max_depth_raw_text)
30
+ .try(:match, /^([0-9,]+)/)
31
+ .try(:captures)
32
+ .try(:first)
33
+ .try(:delete, ',')
34
+ .try(:to_i)
35
+
36
+ # need to handle bad data like Lake Fryer which is:
37
+ # Maximum depth: Average 13 feet, maximum 25 feet
38
+ if @max_depth_in_feet.nil?
39
+ @max_depth_in_feet = cleanup_raw_text(@max_depth_raw_text)
40
+ .try(:match, /maximum ([0-9,]+) feet/i)
41
+ .try(:captures)
42
+ .try(:first)
43
+ .try(:delete, ',')
44
+ .try(:to_i)
45
+ end
46
+
47
+ @year_impounded = cleanup_raw_text(@year_impounded_raw_text)
48
+ .try(:match, /([0-9,]+)/)
49
+ .try(:captures)
50
+ .try(:first)
51
+ .try(:delete, ',')
52
+ .try(:to_i)
53
+
54
+ end
55
+
56
+ def cleanup_raw_text(raw_text)
57
+ raw_text.try(:gsub, /\s+/, ' ').try(:strip)
58
+ end
43
59
  end
44
-
45
- @year_impounded = cleanup_raw_text(@year_impounded_raw_text)
46
- .try(:match, /([0-9,]+)/)
47
- .try(:captures)
48
- .try(:first)
49
- .try(:delete, ',')
50
- .try(:to_i)
51
-
52
- end
53
-
54
- def cleanup_raw_text(raw_text)
55
- raw_text.try(:gsub, /\s+/, ' ').try(:strip)
56
60
  end
57
61
  end
@@ -0,0 +1,57 @@
1
+ require 'nokogiri'
2
+ module Lakes
3
+ class Texas
4
+ class WaterConditionsParser
5
+ include Lakes::Helper
6
+
7
+ attr_reader :raw_text, :raw_text_without_whitespace
8
+ attr_reader :water_data_uri
9
+ attr_reader :conservation_pool_elevation_raw_text, :conservation_pool_elevation
10
+ attr_reader :conservation_pool_elevation_in_ft_msl
11
+ attr_reader :fluctuation_raw_text, :fluctuation
12
+ attr_reader :normal_clarity_raw_text, :normal_clarity
13
+
14
+ def initialize(text)
15
+ return if text.nil?
16
+ @raw_text = text
17
+ @raw_text_without_whitespace = text.gsub(/[\t\r\n\f]+/, '').gsub(/\s\s/, ' ')
18
+ parse
19
+ end
20
+
21
+ # <a href="http://waterdatafortexas.org/reservoirs/individual/belton">Current Lake Level</a>
22
+ # Conservation Pool Elevation: 594 ft. msl
23
+ # Fluctuation: 3-5 feet
24
+ # Normal Clarity: Moderate
25
+ def parse
26
+ html_doc = Nokogiri::HTML.fragment(@raw_text)
27
+
28
+ html_doc_without_whitespace_chars = Nokogiri::HTML.fragment(raw_text_without_whitespace)
29
+ water_data_link = html_doc_without_whitespace_chars.xpath('p/a[contains(text(), "Current Lake Level")]').first
30
+ @water_data_uri = water_data_link.try(:[], 'href')
31
+ if @water_data_uri && @water_data_uri.start_with?('http://')
32
+ @water_data_uri.gsub!('http://', 'https://')
33
+ end
34
+
35
+ text_doc = html_doc.text
36
+ text_doc_without_whitespace = html_doc_without_whitespace_chars.text
37
+
38
+ # so many inconsistencies in the data
39
+ @conservation_pool_elevation_raw_text = text_doc
40
+ .match(/(Conservation Pool Elevation:(.*))|(Normal water level:(.*))/i)
41
+ .try(:captures)
42
+ .try(:compact)
43
+ .try(:[], 1)
44
+
45
+ @conservation_pool_elevation = cleanup_raw_text(
46
+ @conservation_pool_elevation_raw_text
47
+ )
48
+
49
+ @fluctuation_raw_text = text_doc.match(/Fluctuation: (.*)Normal Clarity:/im).try(:captures).try(:first)
50
+ @fluctuation = cleanup_raw_text(@fluctuation_raw_text)
51
+
52
+ @normal_clarity_raw_text = text_doc_without_whitespace.match(/Normal Clarity: (.*)/i).try(:captures).try(:first)
53
+ @normal_clarity = cleanup_raw_text(@normal_clarity_raw_text)
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,37 @@
1
+ require 'nokogiri'
2
+ module Lakes
3
+ class Texas
4
+ class WaterDataParser
5
+ include Lakes::Helper
6
+
7
+ attr_reader :raw_text
8
+ attr_reader :conservation_pool_elevation_in_ft_msl
9
+ attr_reader :percentage_full
10
+
11
+ def initialize(text)
12
+ @raw_text = text
13
+ #File.write("test/data/water_data/Texoma.txt", @raw_text)
14
+ #puts "WaterDataParser: raw_text: #{@raw_text}"
15
+ parse
16
+ end
17
+
18
+ def parse
19
+ html_doc = Nokogiri::HTML(@raw_text)
20
+ cons_pool_elevation_header_element = html_doc.xpath('//td[contains(text(), "Conservation pool elevation")]').first
21
+ cons_pool_elevation_root = cons_pool_elevation_header_element.try(:next_element)
22
+ @conservation_pool_elevation_in_ft_msl = cleanup_raw_text(cons_pool_elevation_root.try(:text))
23
+ .try(:match, /([0-9\.]+)/)
24
+ .try(:captures)
25
+ .try(:first)
26
+ .try(:to_f)
27
+
28
+ percentage_full_element = cleanup_raw_text(html_doc.css('div.page-title h2 small').try(:text))
29
+ @percentage_full = percentage_full_element
30
+ .try(:match, /^([0-9]+\.?[0-9]+)/)
31
+ .try(:captures)
32
+ .try(:first)
33
+ .try(:to_f)
34
+ end
35
+ end
36
+ end
37
+ end
data/lib/lakes/texas.rb CHANGED
@@ -2,15 +2,26 @@ require 'net/http'
2
2
  require 'nokogiri'
3
3
  require 'date'
4
4
  require 'try'
5
+ require 'lakes/helper'
5
6
 
6
7
  module Lakes
7
8
  class Texas
9
+ include Lakes::Helper
8
10
  attr_reader :lake_data
9
11
 
10
12
  def initialize
11
13
  @lake_data = {}
12
14
  end
13
15
 
16
+ def all_details
17
+ result = []
18
+ list.each do |lake_name|
19
+ result << get_details(lake_name)
20
+ sleep(1)
21
+ end
22
+ result
23
+ end
24
+
14
25
  def list
15
26
  return @lake_data.keys unless @lake_data.empty?
16
27
 
@@ -47,7 +58,7 @@ module Lakes
47
58
  main_div = html_doc.at('div#maincontent')
48
59
 
49
60
  parse_lake_characteristics(main_div, lake_data)
50
- parse_water_conditions(main_div, lake_data)
61
+ parse_water_conditions_and_data(main_div, lake_data)
51
62
  parse_reservoir_controlling_authority(main_div, lake_data)
52
63
  parse_aquatic_vegetation(main_div, lake_data)
53
64
  parse_predominant_fish_species(main_div, lake_data)
@@ -122,8 +133,31 @@ module Lakes
122
133
  lake_data[:lake_characteristics][:year_impounded] = parser.year_impounded
123
134
  end
124
135
 
125
- def parse_water_conditions(main_div, lake_data)
126
- process_simple_section(main_div, lake_data, 'Water Conditions', :water_conditions, true)
136
+ def parse_water_conditions_and_data(main_div, lake_data)
137
+ lake_data[:raw_water_conditions] = process_simple_section(main_div, lake_data, 'Water Conditions', :water_conditions, true)
138
+
139
+ File.write("test/data/water_conditions/#{lake_data[:name]}.txt", lake_data[:raw_water_conditions])
140
+ parser = WaterConditionsParser.new(lake_data[:raw_water_conditions])
141
+ lake_data[:water] = {}
142
+ lake_data[:water][:conditions] = {}
143
+ lake_data[:water][:water_data_uri] = parser.water_data_uri
144
+ lake_data[:water][:conditions][:conservation_pool_elevation] = parser.conservation_pool_elevation
145
+ lake_data[:water][:conditions][:fluctuation] = parser.fluctuation
146
+ lake_data[:water][:conditions][:normal_clarity] = parser.normal_clarity
147
+
148
+ lake_data[:water][:data] = {}
149
+ return if parser.water_data_uri.nil?
150
+ content = begin
151
+ http_get(parser.water_data_uri)
152
+ rescue Errno::ECONNREFUSED, OpenSSL::SSL::SSLError => e
153
+ puts "#{e.message} for #{lake_data[:name]}: #{parser.water_data_uri}"
154
+ nil
155
+ end
156
+
157
+ return if content.nil?
158
+ water_data_parser = WaterDataParser.new(content)
159
+ lake_data[:water][:conservation_pool_elevation_in_ft_msl] = water_data_parser.conservation_pool_elevation_in_ft_msl
160
+ lake_data[:water][:percentage_full] = water_data_parser.conservation_pool_elevation_in_ft_msl
127
161
  end
128
162
 
129
163
  def parse_reservoir_controlling_authority(main_div, lake_data)
@@ -259,38 +293,11 @@ module Lakes
259
293
  data
260
294
  end
261
295
 
262
- # converts this:
263
- # ../../../action/waterecords.php?WB_code=0001
264
- # into this:
265
- # http://tpwd.texas.gov/fishboat/fish/action/waterecords.php?WB_code=0001
266
- # based on this:
267
- # http://tpwd.texas.gov/fishboat/fish/recreational/lakes/abilene
268
- def convert_relative_href(href, current_url)
269
- relative_depth = href.split('..').count - 1
270
- url_parts = current_url.split('/')
271
- url_parts.slice!(-relative_depth, relative_depth)
272
- fixed_href = href.gsub('../', '')
273
- url_parts.join('/') + '/' + fixed_href
274
- end
275
-
276
- # texas lake websites use lots of non breaking spaces
277
- def cleanup_data(value)
278
- nbsp = 160.chr('UTF-8')
279
- value = value.strip.gsub(nbsp, '')
280
- value.empty? ? nil : value
281
- end
282
-
283
296
  def process_simple_section(main_div, lake_data, section_title, data_name, html)
284
297
  data = main_div.xpath("//h6[contains(text(), \"#{section_title}\")]").first
285
298
  element_type_function = html ? :to_html : :text
286
299
  content = data.try(:next_element).try(element_type_function)
287
300
  lake_data[data_name] = content
288
301
  end
289
-
290
- # texas lake pages are encoded in Windows-1252 :(
291
- def http_get(url)
292
- uri = URI(url)
293
- Net::HTTP.get(uri).encode('UTF-8', 'Windows-1252')
294
- end
295
302
  end
296
303
  end
data/lib/lakes.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  require 'lakes/texas'
2
2
  require 'lakes/texas/lake_characteristics_parser'
3
+ require 'lakes/texas/water_conditions_parser'
4
+ require 'lakes/texas/water_data_parser'
3
5
  module Lakes
4
- VERSION = '0.1.2'.freeze
6
+ VERSION = '0.1.3'.freeze
5
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lakes
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shane Sherman
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-01-15 00:00:00.000000000 Z
11
+ date: 2017-05-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '10.0'
33
+ version: '12.0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '10.0'
40
+ version: '12.0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: minitest
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -84,12 +84,15 @@ files:
84
84
  - lakes.gemspec
85
85
  - lakes.json
86
86
  - lib/lakes.rb
87
+ - lib/lakes/helper.rb
87
88
  - lib/lakes/texas.rb
88
89
  - lib/lakes/texas/lake_characteristics_parser.rb
90
+ - lib/lakes/texas/water_conditions_parser.rb
91
+ - lib/lakes/texas/water_data_parser.rb
89
92
  - lib/try.rb
90
- homepage:
93
+ homepage: https://github.com/ssherman/lakes
91
94
  licenses:
92
- - mit
95
+ - MIT
93
96
  metadata: {}
94
97
  post_install_message:
95
98
  rdoc_options: []