lakes 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4fd4734a4458713c26f87380650f9f997b69ee47
4
- data.tar.gz: 98432482a89cac25e45637a1acd4a50d4d3fd5e3
3
+ metadata.gz: d4e064b1da9183c688a5e5c72310404c1632ddbe
4
+ data.tar.gz: a45be37ae9ae49c384676ffa687541cc6d1edab9
5
5
  SHA512:
6
- metadata.gz: 1e95cffa65e7e8778f7ae086dfc651d4b1f1dcd0e602b57fdad0a942745e0be6eff3e81a660020951184317811af5527d88ab477c1355df22f40c660576dadb2
7
- data.tar.gz: ed6fb9047856f64ea9ab52fbda4ea31e6895132998f2e06ce4877d649136968fd2d5ac0e7431e00afd94dd206f909481d228dfc44127ebcff243ee171ecca2d6
6
+ metadata.gz: 74d1344a011c7d724a2f0b7f1acc074acc4328446694d52df375e35496005270ff9d44e1beefe69f88da865aa273c040f7f66c9b5b16fb77d906c1646060c8e0
7
+ data.tar.gz: 1dbf6042cecaa440ec9379fdf5da56dfcdc9929ee0312613946ccc6c928f2c53cce3d56e509f0e37675d9baf15fe0b60250ca22a8c0b2c837a0a10076cf70f32
data/lakes.gemspec CHANGED
@@ -10,8 +10,8 @@ Gem::Specification.new do |spec|
10
10
 
11
11
  spec.summary = 'This gem parses lake details from various government websites'
12
12
  spec.description = 'I wrote this gem to originally parse texas lake data'
13
- spec.homepage = nil
14
- spec.license = 'mit'
13
+ spec.homepage = 'https://github.com/ssherman/lakes'
14
+ spec.license = 'MIT'
15
15
 
16
16
  spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
17
17
  spec.bindir = "exe"
@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ['lib']
20
20
 
21
21
  spec.add_development_dependency "bundler", "~> 1.11"
22
- spec.add_development_dependency "rake", "~> 10.0"
22
+ spec.add_development_dependency "rake", "~> 12.0"
23
23
  spec.add_development_dependency "minitest", "~> 5.0"
24
24
  spec.add_development_dependency 'nokogiri', "~> 1.7"
25
25
  end
@@ -0,0 +1,35 @@
1
+ module Lakes
2
+ module Helper
3
+
4
+ # texas lake pages are encoded in Windows-1252 :(
5
+ def http_get(url)
6
+ uri = URI(url)
7
+ Net::HTTP.get(uri).encode('UTF-8', 'Windows-1252')
8
+ end
9
+
10
+ # texas lake websites use lots of non breaking spaces
11
+ def cleanup_data(value)
12
+ nbsp = 160.chr('UTF-8')
13
+ value = value.strip.gsub(nbsp, '')
14
+ value.empty? ? nil : value
15
+ end
16
+
17
+ def cleanup_raw_text(raw_text)
18
+ raw_text.try(:gsub, /\s+/, ' ').try(:strip)
19
+ end
20
+
21
+ # converts this:
22
+ # ../../../action/waterecords.php?WB_code=0001
23
+ # into this:
24
+ # http://tpwd.texas.gov/fishboat/fish/action/waterecords.php?WB_code=0001
25
+ # based on this:
26
+ # http://tpwd.texas.gov/fishboat/fish/recreational/lakes/abilene
27
+ def convert_relative_href(href, current_url)
28
+ relative_depth = href.split('..').count - 1
29
+ url_parts = current_url.split('/')
30
+ url_parts.slice!(-relative_depth, relative_depth)
31
+ fixed_href = href.gsub('../', '')
32
+ url_parts.join('/') + '/' + fixed_href
33
+ end
34
+ end
35
+ end
@@ -1,57 +1,61 @@
1
- class LakeCharacteristicsParser
2
- attr_reader :raw_text, :location_desc
3
- attr_reader :surface_area_raw_text, :surface_area_in_acres
4
- attr_reader :max_depth_raw_text, :max_depth_in_feet
5
- attr_reader :year_impounded_raw_text, :year_impounded
6
-
7
- def initialize(text)
8
- @raw_text = text
9
- parse
10
- end
11
-
12
- def parse
13
- @location_desc = @raw_text.match(/^location:(.*)(surface area)|(surface acres)|(maximum depth|impounded):/im).captures.first
14
- @surface_area_raw_text = @raw_text.match(/surface (area|acres):(.*)/i).try(:captures).try(:[], 1)
15
- @max_depth_raw_text = @raw_text.match(/maximum depth:(.*)/i).try(:captures).try(:first)
16
- @year_impounded_raw_text = @raw_text.match(/impounded:(.*)/im).try(:captures).try(:first)
17
-
18
- @location_desc = cleanup_raw_text(@location_desc)
19
-
20
- @surface_area_in_acres = cleanup_raw_text(@surface_area_raw_text)
21
- .try(:match, /^([0-9,]+)/)
22
- .try(:captures)
23
- .try(:first)
24
- .try(:delete, ',')
25
- .try(:to_i)
26
-
27
- @max_depth_in_feet = cleanup_raw_text(@max_depth_raw_text)
28
- .try(:match, /^([0-9,]+)/)
29
- .try(:captures)
30
- .try(:first)
31
- .try(:delete, ',')
32
- .try(:to_i)
33
-
34
- # need to handle bad data like Lake Fryer which is:
35
- # Maximum depth: Average 13 feet, maximum 25 feet
36
- if @max_depth_in_feet.nil?
37
- @max_depth_in_feet = cleanup_raw_text(@max_depth_raw_text)
38
- .try(:match, /maximum ([0-9,]+) feet/i)
39
- .try(:captures)
40
- .try(:first)
41
- .try(:delete, ',')
42
- .try(:to_i)
1
+ module Lakes
2
+ class Texas
3
+ class LakeCharacteristicsParser
4
+ attr_reader :raw_text, :location_desc
5
+ attr_reader :surface_area_raw_text, :surface_area_in_acres
6
+ attr_reader :max_depth_raw_text, :max_depth_in_feet
7
+ attr_reader :year_impounded_raw_text, :year_impounded
8
+
9
+ def initialize(text)
10
+ @raw_text = text
11
+ parse
12
+ end
13
+
14
+ def parse
15
+ @location_desc = @raw_text.match(/^location:(.*)(surface area)|(surface acres)|(maximum depth|impounded):/im).captures.first
16
+ @surface_area_raw_text = @raw_text.match(/surface (area|acres):(.*)/i).try(:captures).try(:[], 1)
17
+ @max_depth_raw_text = @raw_text.match(/maximum depth:(.*)/i).try(:captures).try(:first)
18
+ @year_impounded_raw_text = @raw_text.match(/impounded:(.*)/im).try(:captures).try(:first)
19
+
20
+ @location_desc = cleanup_raw_text(@location_desc)
21
+
22
+ @surface_area_in_acres = cleanup_raw_text(@surface_area_raw_text)
23
+ .try(:match, /^([0-9,]+)/)
24
+ .try(:captures)
25
+ .try(:first)
26
+ .try(:delete, ',')
27
+ .try(:to_i)
28
+
29
+ @max_depth_in_feet = cleanup_raw_text(@max_depth_raw_text)
30
+ .try(:match, /^([0-9,]+)/)
31
+ .try(:captures)
32
+ .try(:first)
33
+ .try(:delete, ',')
34
+ .try(:to_i)
35
+
36
+ # need to handle bad data like Lake Fryer which is:
37
+ # Maximum depth: Average 13 feet, maximum 25 feet
38
+ if @max_depth_in_feet.nil?
39
+ @max_depth_in_feet = cleanup_raw_text(@max_depth_raw_text)
40
+ .try(:match, /maximum ([0-9,]+) feet/i)
41
+ .try(:captures)
42
+ .try(:first)
43
+ .try(:delete, ',')
44
+ .try(:to_i)
45
+ end
46
+
47
+ @year_impounded = cleanup_raw_text(@year_impounded_raw_text)
48
+ .try(:match, /([0-9,]+)/)
49
+ .try(:captures)
50
+ .try(:first)
51
+ .try(:delete, ',')
52
+ .try(:to_i)
53
+
54
+ end
55
+
56
+ def cleanup_raw_text(raw_text)
57
+ raw_text.try(:gsub, /\s+/, ' ').try(:strip)
58
+ end
43
59
  end
44
-
45
- @year_impounded = cleanup_raw_text(@year_impounded_raw_text)
46
- .try(:match, /([0-9,]+)/)
47
- .try(:captures)
48
- .try(:first)
49
- .try(:delete, ',')
50
- .try(:to_i)
51
-
52
- end
53
-
54
- def cleanup_raw_text(raw_text)
55
- raw_text.try(:gsub, /\s+/, ' ').try(:strip)
56
60
  end
57
61
  end
@@ -0,0 +1,57 @@
1
+ require 'nokogiri'
2
+ module Lakes
3
+ class Texas
4
+ class WaterConditionsParser
5
+ include Lakes::Helper
6
+
7
+ attr_reader :raw_text, :raw_text_without_whitespace
8
+ attr_reader :water_data_uri
9
+ attr_reader :conservation_pool_elevation_raw_text, :conservation_pool_elevation
10
+ attr_reader :conservation_pool_elevation_in_ft_msl
11
+ attr_reader :fluctuation_raw_text, :fluctuation
12
+ attr_reader :normal_clarity_raw_text, :normal_clarity
13
+
14
+ def initialize(text)
15
+ return if text.nil?
16
+ @raw_text = text
17
+ @raw_text_without_whitespace = text.gsub(/[\t\r\n\f]+/, '').gsub(/\s\s/, ' ')
18
+ parse
19
+ end
20
+
21
+ # <a href="http://waterdatafortexas.org/reservoirs/individual/belton">Current Lake Level</a>
22
+ # Conservation Pool Elevation: 594 ft. msl
23
+ # Fluctuation: 3-5 feet
24
+ # Normal Clarity: Moderate
25
+ def parse
26
+ html_doc = Nokogiri::HTML.fragment(@raw_text)
27
+
28
+ html_doc_without_whitespace_chars = Nokogiri::HTML.fragment(raw_text_without_whitespace)
29
+ water_data_link = html_doc_without_whitespace_chars.xpath('p/a[contains(text(), "Current Lake Level")]').first
30
+ @water_data_uri = water_data_link.try(:[], 'href')
31
+ if @water_data_uri && @water_data_uri.start_with?('http://')
32
+ @water_data_uri.gsub!('http://', 'https://')
33
+ end
34
+
35
+ text_doc = html_doc.text
36
+ text_doc_without_whitespace = html_doc_without_whitespace_chars.text
37
+
38
+ # so many inconsistencies in the data
39
+ @conservation_pool_elevation_raw_text = text_doc
40
+ .match(/(Conservation Pool Elevation:(.*))|(Normal water level:(.*))/i)
41
+ .try(:captures)
42
+ .try(:compact)
43
+ .try(:[], 1)
44
+
45
+ @conservation_pool_elevation = cleanup_raw_text(
46
+ @conservation_pool_elevation_raw_text
47
+ )
48
+
49
+ @fluctuation_raw_text = text_doc.match(/Fluctuation: (.*)Normal Clarity:/im).try(:captures).try(:first)
50
+ @fluctuation = cleanup_raw_text(@fluctuation_raw_text)
51
+
52
+ @normal_clarity_raw_text = text_doc_without_whitespace.match(/Normal Clarity: (.*)/i).try(:captures).try(:first)
53
+ @normal_clarity = cleanup_raw_text(@normal_clarity_raw_text)
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,37 @@
1
+ require 'nokogiri'
2
+ module Lakes
3
+ class Texas
4
+ class WaterDataParser
5
+ include Lakes::Helper
6
+
7
+ attr_reader :raw_text
8
+ attr_reader :conservation_pool_elevation_in_ft_msl
9
+ attr_reader :percentage_full
10
+
11
+ def initialize(text)
12
+ @raw_text = text
13
+ #File.write("test/data/water_data/Texoma.txt", @raw_text)
14
+ #puts "WaterDataParser: raw_text: #{@raw_text}"
15
+ parse
16
+ end
17
+
18
+ def parse
19
+ html_doc = Nokogiri::HTML(@raw_text)
20
+ cons_pool_elevation_header_element = html_doc.xpath('//td[contains(text(), "Conservation pool elevation")]').first
21
+ cons_pool_elevation_root = cons_pool_elevation_header_element.try(:next_element)
22
+ @conservation_pool_elevation_in_ft_msl = cleanup_raw_text(cons_pool_elevation_root.try(:text))
23
+ .try(:match, /([0-9\.]+)/)
24
+ .try(:captures)
25
+ .try(:first)
26
+ .try(:to_f)
27
+
28
+ percentage_full_element = cleanup_raw_text(html_doc.css('div.page-title h2 small').try(:text))
29
+ @percentage_full = percentage_full_element
30
+ .try(:match, /^([0-9]+\.?[0-9]+)/)
31
+ .try(:captures)
32
+ .try(:first)
33
+ .try(:to_f)
34
+ end
35
+ end
36
+ end
37
+ end
data/lib/lakes/texas.rb CHANGED
@@ -2,15 +2,26 @@ require 'net/http'
2
2
  require 'nokogiri'
3
3
  require 'date'
4
4
  require 'try'
5
+ require 'lakes/helper'
5
6
 
6
7
  module Lakes
7
8
  class Texas
9
+ include Lakes::Helper
8
10
  attr_reader :lake_data
9
11
 
10
12
  def initialize
11
13
  @lake_data = {}
12
14
  end
13
15
 
16
+ def all_details
17
+ result = []
18
+ list.each do |lake_name|
19
+ result << get_details(lake_name)
20
+ sleep(1)
21
+ end
22
+ result
23
+ end
24
+
14
25
  def list
15
26
  return @lake_data.keys unless @lake_data.empty?
16
27
 
@@ -47,7 +58,7 @@ module Lakes
47
58
  main_div = html_doc.at('div#maincontent')
48
59
 
49
60
  parse_lake_characteristics(main_div, lake_data)
50
- parse_water_conditions(main_div, lake_data)
61
+ parse_water_conditions_and_data(main_div, lake_data)
51
62
  parse_reservoir_controlling_authority(main_div, lake_data)
52
63
  parse_aquatic_vegetation(main_div, lake_data)
53
64
  parse_predominant_fish_species(main_div, lake_data)
@@ -122,8 +133,31 @@ module Lakes
122
133
  lake_data[:lake_characteristics][:year_impounded] = parser.year_impounded
123
134
  end
124
135
 
125
- def parse_water_conditions(main_div, lake_data)
126
- process_simple_section(main_div, lake_data, 'Water Conditions', :water_conditions, true)
136
+ def parse_water_conditions_and_data(main_div, lake_data)
137
+ lake_data[:raw_water_conditions] = process_simple_section(main_div, lake_data, 'Water Conditions', :water_conditions, true)
138
+
139
+ File.write("test/data/water_conditions/#{lake_data[:name]}.txt", lake_data[:raw_water_conditions])
140
+ parser = WaterConditionsParser.new(lake_data[:raw_water_conditions])
141
+ lake_data[:water] = {}
142
+ lake_data[:water][:conditions] = {}
143
+ lake_data[:water][:water_data_uri] = parser.water_data_uri
144
+ lake_data[:water][:conditions][:conservation_pool_elevation] = parser.conservation_pool_elevation
145
+ lake_data[:water][:conditions][:fluctuation] = parser.fluctuation
146
+ lake_data[:water][:conditions][:normal_clarity] = parser.normal_clarity
147
+
148
+ lake_data[:water][:data] = {}
149
+ return if parser.water_data_uri.nil?
150
+ content = begin
151
+ http_get(parser.water_data_uri)
152
+ rescue Errno::ECONNREFUSED, OpenSSL::SSL::SSLError => e
153
+ puts "#{e.message} for #{lake_data[:name]}: #{parser.water_data_uri}"
154
+ nil
155
+ end
156
+
157
+ return if content.nil?
158
+ water_data_parser = WaterDataParser.new(content)
159
+ lake_data[:water][:conservation_pool_elevation_in_ft_msl] = water_data_parser.conservation_pool_elevation_in_ft_msl
160
+ lake_data[:water][:percentage_full] = water_data_parser.conservation_pool_elevation_in_ft_msl
127
161
  end
128
162
 
129
163
  def parse_reservoir_controlling_authority(main_div, lake_data)
@@ -259,38 +293,11 @@ module Lakes
259
293
  data
260
294
  end
261
295
 
262
- # converts this:
263
- # ../../../action/waterecords.php?WB_code=0001
264
- # into this:
265
- # http://tpwd.texas.gov/fishboat/fish/action/waterecords.php?WB_code=0001
266
- # based on this:
267
- # http://tpwd.texas.gov/fishboat/fish/recreational/lakes/abilene
268
- def convert_relative_href(href, current_url)
269
- relative_depth = href.split('..').count - 1
270
- url_parts = current_url.split('/')
271
- url_parts.slice!(-relative_depth, relative_depth)
272
- fixed_href = href.gsub('../', '')
273
- url_parts.join('/') + '/' + fixed_href
274
- end
275
-
276
- # texas lake websites use lots of non breaking spaces
277
- def cleanup_data(value)
278
- nbsp = 160.chr('UTF-8')
279
- value = value.strip.gsub(nbsp, '')
280
- value.empty? ? nil : value
281
- end
282
-
283
296
  def process_simple_section(main_div, lake_data, section_title, data_name, html)
284
297
  data = main_div.xpath("//h6[contains(text(), \"#{section_title}\")]").first
285
298
  element_type_function = html ? :to_html : :text
286
299
  content = data.try(:next_element).try(element_type_function)
287
300
  lake_data[data_name] = content
288
301
  end
289
-
290
- # texas lake pages are encoded in Windows-1252 :(
291
- def http_get(url)
292
- uri = URI(url)
293
- Net::HTTP.get(uri).encode('UTF-8', 'Windows-1252')
294
- end
295
302
  end
296
303
  end
data/lib/lakes.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  require 'lakes/texas'
2
2
  require 'lakes/texas/lake_characteristics_parser'
3
+ require 'lakes/texas/water_conditions_parser'
4
+ require 'lakes/texas/water_data_parser'
3
5
  module Lakes
4
- VERSION = '0.1.2'.freeze
6
+ VERSION = '0.1.3'.freeze
5
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lakes
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shane Sherman
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-01-15 00:00:00.000000000 Z
11
+ date: 2017-05-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '10.0'
33
+ version: '12.0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '10.0'
40
+ version: '12.0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: minitest
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -84,12 +84,15 @@ files:
84
84
  - lakes.gemspec
85
85
  - lakes.json
86
86
  - lib/lakes.rb
87
+ - lib/lakes/helper.rb
87
88
  - lib/lakes/texas.rb
88
89
  - lib/lakes/texas/lake_characteristics_parser.rb
90
+ - lib/lakes/texas/water_conditions_parser.rb
91
+ - lib/lakes/texas/water_data_parser.rb
89
92
  - lib/try.rb
90
- homepage:
93
+ homepage: https://github.com/ssherman/lakes
91
94
  licenses:
92
- - mit
95
+ - MIT
93
96
  metadata: {}
94
97
  post_install_message:
95
98
  rdoc_options: []