governator 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 745347ec6c0ff7ff406f0625162db6ef4a632906
4
+ data.tar.gz: 9106f4daff8952ddac44dbb35c34e766a1941f55
5
+ SHA512:
6
+ metadata.gz: b4567506b6c7d8d6a648308471ba6e754c2348c162f4377087d216221b78dc7b78aba972c084964e9093a963cdf2c042ee9a5f9b2ec8567616994664371adb76
7
+ data.tar.gz: 898e473b417465ad3b712cfb2169abb4d6b4c644e3f5908541cf317f6f9c72b65f077c7896b9723150abf5fb5094a185ec7ce647c50b9ada64ef2f9a9ab89dcb
data/.gitignore ADDED
@@ -0,0 +1,11 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ /.idea/
11
+ /vendor/
data/Gemfile ADDED
@@ -0,0 +1,10 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in governator.gemspec
4
+ gemspec
5
+
6
+ group :development, :test do
7
+ gem 'pry'
8
+ gem 'rspec'
9
+ gem 'rubocop'
10
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2017 M. Simon Borg
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,42 @@
1
+ # Governator
2
+
3
+ Scraper for the data on US Governors.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'governator'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install governator
20
+
21
+ ## Usage
22
+
23
+ ```
24
+ Governator.scrape
25
+
26
+ governors = Governator.governors
27
+ ```
28
+
29
+ ## Development
30
+
31
+ After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
32
+
33
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
34
+
35
+ ## Contributing
36
+
37
+ Bug reports and pull requests are welcome on GitHub at https://github.com/msimonborg/governator.
38
+
39
+
40
+ ## License
41
+
42
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,9 @@
1
+ require 'bundler/gem_tasks'
2
+
3
+ require 'rspec/core/rake_task'
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ require 'rubocop/rake_task'
7
+ RuboCop::RakeTask.new
8
+
9
+ task default: %i[spec rubocop]
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'governator'
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ require 'pry'
11
+ Pry.start
12
+
13
+ # require "irb"
14
+ # IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,31 @@
1
+ # coding: utf-8
2
+
3
+ lib = File.expand_path('../lib', __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require 'governator/version'
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = 'governator'
9
+ spec.version = Governator::VERSION
10
+ spec.authors = ['M. Simon Borg']
11
+ spec.email = ['msimonborg@gmail.com']
12
+
13
+ spec.summary = 'Scraper for data on US Governors'
14
+ spec.description = 'Scraper for data on US Governors'
15
+ spec.homepage = 'https://github.com/msimonborg/governator'
16
+ spec.license = 'MIT'
17
+
18
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
19
+ f.match(%r{^(test|spec|features)/})
20
+ end
21
+ spec.bindir = 'exe'
22
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
23
+ spec.require_paths = ['lib']
24
+
25
+ spec.add_dependency 'faraday'
26
+ spec.add_dependency 'twitter'
27
+ spec.add_dependency 'nokogiri'
28
+
29
+ spec.add_development_dependency 'bundler', '~> 1.14'
30
+ spec.add_development_dependency 'rake', '~> 10.0'
31
+ end
@@ -0,0 +1,135 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'governator/page_scraper'
4
+
5
+ class Governator
6
+ class BioPage < PageScraper
7
+ attr_reader :uri
8
+
9
+ def initialize(uri)
10
+ @uri = uri
11
+ @raw = Nokogiri::HTML(CONN.get(uri).body)
12
+ check_for_alt_office
13
+ end
14
+
15
+ def check_for_alt_office
16
+ @alt_office_present = if raw.css('address')[2].to_s.match?(/Phone|Address|Fax/)
17
+ true
18
+ else
19
+ false
20
+ end
21
+ end
22
+
23
+ def alt_office_present?
24
+ @alt_office_present
25
+ end
26
+
27
+ def website
28
+ @_website ||= raw.css('.ullist-wrap a').first['href']
29
+ end
30
+
31
+ def party_panel
32
+ @_party_panel ||= if alt_office_present?
33
+ raw.css('address')[3]
34
+ else
35
+ raw.css('address')[2]
36
+ end
37
+ end
38
+
39
+ def party_paragraph
40
+ @_party_paragraph ||= party_panel.css('p').detect do |p|
41
+ p.text.include?('Party')
42
+ end
43
+ end
44
+
45
+ def party
46
+ @_party ||= party_paragraph.text.delete("\t\n#{nbsp}").sub('Party:', '')
47
+ end
48
+
49
+ def address_panel
50
+ @_address_panel ||= raw.css('address')[1]
51
+ end
52
+
53
+ def address_array
54
+ @_address_array ||=
55
+ address_panel.css('p').first.text.delete("\t\n").sub('Address:', '').split(' ') - [' ']
56
+ end
57
+
58
+ def address
59
+ @_address ||= address_array.join(' ')
60
+ end
61
+
62
+ def city
63
+ @_city ||= address_panel.css('p')[1].text.delete(',')
64
+ end
65
+
66
+ def state
67
+ @_state ||= address_panel.css('p')[2]&.text
68
+ end
69
+
70
+ def zip
71
+ @_zip ||= address_panel.css('p')[3]&.text
72
+ end
73
+
74
+ def phone
75
+ @_phone ||= address_panel.css('p')[4].text.delete("\t\nPhone: ").strip.sub('/', '-')
76
+ end
77
+
78
+ def fax
79
+ @_fax ||= address_panel.css('p')[5]&.text&.delete("\t\nFax:")&.strip&.sub('/', '-')
80
+ end
81
+
82
+ def office_type
83
+ 'capitol'
84
+ end
85
+
86
+ def alt_address_panel
87
+ @_alt_address_panel ||= raw.css('address')[2] if alt_office_present?
88
+ end
89
+
90
+ def alt_address_array
91
+ return unless alt_office_present?
92
+ @_alt_address_array ||=
93
+ alt_address_panel.css('p').first.text.delete("\t\n").sub('Address:', '').split(' ') - [' ']
94
+ end
95
+
96
+ def alt_building
97
+ @_alt_building ||= alt_address_array[0..7].join(' ') if alt_office_present?
98
+ end
99
+
100
+ def alt_address
101
+ @alt_address ||= alt_address_array[8..11].join(' ') if alt_office_present?
102
+ end
103
+
104
+ def alt_suite
105
+ @alt_suite ||= alt_address_array[13..14].join(' ') if alt_office_present?
106
+ end
107
+
108
+ def alt_city
109
+ @_alt_city ||= alt_address_panel.css('p')[1].text.delete(',') if alt_office_present?
110
+ end
111
+
112
+ def alt_state
113
+ @_alt_state ||= alt_address_panel.css('p')[2].text if alt_office_present?
114
+ end
115
+
116
+ def alt_zip
117
+ @_alt_zip ||= alt_address_panel.css('p')[3].text if alt_office_present?
118
+ end
119
+
120
+ def alt_phone
121
+ return unless alt_office_present?
122
+ @_alt_phone ||= alt_address_panel.css('p')[4].text.delete("\t\nPhone: ").strip.sub('/', '-')
123
+ end
124
+
125
+ def alt_fax
126
+ return unless alt_office_present?
127
+ @_alt_fax ||= alt_address_panel.css('p')[5]&.text&.delete("\t\nFax:")&.strip&.sub('/', '-')
128
+ end
129
+
130
+ def alt_office_type
131
+ return unless alt_office_present?
132
+ alt_state == 'DC' ? 'dc' : 'district'
133
+ end
134
+ end
135
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Governator
4
+ class NameParser
5
+ attr_reader :official_full, :first, :last, :middle, :nickname, :suffix
6
+
7
+ def initialize(official_full)
8
+ @official_full = official_full
9
+ end
10
+
11
+ def parse
12
+ split_name
13
+ [first, last, middle, nickname, suffix]
14
+ end
15
+
16
+ def name_array
17
+ @_name_array ||= official_full.split(' ')
18
+ end
19
+
20
+ def split_name
21
+ detect_nickname
22
+ detect_suffix
23
+ if name_array.length == 2
24
+ set_first_and_last
25
+ elsif name_array[0].include?('.') && name_array[1].include?('.')
26
+ set_name_with_initialed_first
27
+ elsif name_array.length >= 3
28
+ set_first_last_and_middle
29
+ end
30
+ end
31
+
32
+ def set_first_and_last
33
+ @first = name_array.first
34
+ @last = name_array.last
35
+ end
36
+
37
+ def set_name_with_initialed_first
38
+ @first = "#{name_array.shift} #{name_array.shift}"
39
+ @last = name_array.pop
40
+ @middle = name_array.pop
41
+ end
42
+
43
+ def set_first_last_and_middle
44
+ @first = name_array.shift
45
+ @last = name_array.pop
46
+ @middle = name_array.join(' ')
47
+ end
48
+
49
+ def detect_nickname
50
+ @nickname = name_array.detect { |name| name.include?('"') }
51
+ name_array.reject! { |name| name.include?('"') }
52
+ end
53
+
54
+ def detect_suffix
55
+ @suffix = if name_array[-2].include?(',')
56
+ name_array[-2].delete(',')
57
+ name_array.pop
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Governator
4
+ class PageScraper
5
+ attr_reader :raw
6
+
7
+ def initialize(raw)
8
+ @raw = raw
9
+ end
10
+
11
+ def nbsp
12
+ @_nbsp ||= Nokogiri::HTML('&nbsp;').text
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'governator/page_scraper'
4
+
5
+ class Governator
6
+ class Panel < PageScraper
7
+ def image
8
+ @_image ||= raw.css('.governors-img img').first['src']
9
+ end
10
+
11
+ def bio_page
12
+ @_bio_page ||= raw.css('.governors-state a').first['href']
13
+ end
14
+
15
+ def governor_name
16
+ @_governor_name ||= raw.css('.governors-state a')
17
+ .first
18
+ .text
19
+ .sub('Governor ', '')
20
+ .gsub(' ', ' ')
21
+ end
22
+
23
+ def state
24
+ state = raw.css('.governors-state h3').first.text
25
+ case state
26
+ when 'Northern Mariana Islands' then 'Commonwealth of the ' + state
27
+ when 'Virgin Islands' then 'United States ' + state
28
+ else state
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,3 @@
1
+ class Governator
2
+ VERSION = '0.1.0'.freeze
3
+ end
data/lib/governator.rb ADDED
@@ -0,0 +1,112 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'faraday'
4
+ require 'nokogiri'
5
+ require 'twitter'
6
+
7
+ require 'governator/bio_page'
8
+ require 'governator/name_parser'
9
+ require 'governator/panel'
10
+ require 'governator/version'
11
+
12
+ class Governator
13
+ BASE_URI = 'https://www.nga.org'.freeze
14
+ CONN = Faraday.new(url: BASE_URI)
15
+
16
+ def self.governors
17
+ @_governors ||= []
18
+ end
19
+
20
+ def self.index_page
21
+ @_index_page ||= Nokogiri::HTML(CONN.get('/cms/governors/bios').body)
22
+ end
23
+
24
+ def self.panels
25
+ @_panels ||= index_page.css('.panel.panel-default.governors').map do |panel|
26
+ Panel.new(panel)
27
+ end
28
+ end
29
+
30
+ def self.scrape!
31
+ governors.clear
32
+ panels.each do |panel|
33
+ governor = create(panel)
34
+ puts "Scraped #{governor.official_full} of #{governor.state_name}"
35
+ end
36
+
37
+ governors
38
+ end
39
+
40
+ def self.create(panel)
41
+ new(panel).tap do |g|
42
+ g.build
43
+ g.save
44
+ end
45
+ end
46
+
47
+ def self.to_json
48
+ governors.map(&:to_h)
49
+ end
50
+
51
+ attr_reader :panel, :state_name, :bio_page, :official_full, :first, :last,
52
+ :middle, :nickname, :suffix, :url, :party, :office_locations
53
+
54
+ def initialize(panel)
55
+ @panel = panel
56
+ end
57
+
58
+ def build
59
+ @bio_page = BioPage.new(panel.bio_page)
60
+ @state_name = panel.state
61
+ @official_full = panel.governor_name
62
+ @url = bio_page.website
63
+ @party = bio_page.party
64
+
65
+ @first, @last, @middle, @nickname, @suffix = NameParser.new(official_full).parse
66
+ build_office_locations
67
+ self
68
+ end
69
+
70
+ def photo_url
71
+ @_photo_url ||= "#{BASE_URI}#{panel.image}"
72
+ end
73
+
74
+ def build_office_locations
75
+ @office_locations = [primary_office]
76
+ @office_locations << secondary_office if bio_page.alt_office_present?
77
+ end
78
+
79
+ def primary_office
80
+ { address: bio_page.address, city: bio_page.city, state: bio_page.state, zip: bio_page.zip,
81
+ phone: bio_page.phone, fax: bio_page.fax, office_type: bio_page.office_type }
82
+ end
83
+
84
+ def secondary_office
85
+ { address: bio_page.alt_address, city: bio_page.alt_city, state: bio_page.alt_state,
86
+ zip: bio_page.alt_zip, phone: bio_page.alt_phone, fax: bio_page.alt_fax,
87
+ office_type: bio_page.alt_office_type }
88
+ end
89
+
90
+ def save
91
+ self.class.governors << self
92
+ self
93
+ end
94
+
95
+ def to_h
96
+ {
97
+ photo_url: photo_url,
98
+ state_name: state_name,
99
+ official_full: official_full,
100
+ url: url,
101
+ party: party,
102
+ office_locations: office_locations
103
+ }
104
+ end
105
+
106
+ def inspect
107
+ "#<Governator panel=#{panel} photo_url=\"#{photo_url}\" "\
108
+ "state_name=\"#{state_name}\" bio_page=#{bio_page} "\
109
+ "official_full=\"#{official_full}\" url=\"#{url}\" "\
110
+ "party=\"#{party}\" office_locations=\"#{office_locations}\">"
111
+ end
112
+ end
metadata ADDED
@@ -0,0 +1,128 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: governator
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - M. Simon Borg
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2017-06-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: faraday
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: twitter
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.14'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.14'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '10.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '10.0'
83
+ description: Scraper for data on US Governors
84
+ email:
85
+ - msimonborg@gmail.com
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - ".gitignore"
91
+ - Gemfile
92
+ - LICENSE.txt
93
+ - README.md
94
+ - Rakefile
95
+ - bin/console
96
+ - bin/setup
97
+ - governator.gemspec
98
+ - lib/governator.rb
99
+ - lib/governator/bio_page.rb
100
+ - lib/governator/name_parser.rb
101
+ - lib/governator/page_scraper.rb
102
+ - lib/governator/panel.rb
103
+ - lib/governator/version.rb
104
+ homepage: https://github.com/msimonborg/governator
105
+ licenses:
106
+ - MIT
107
+ metadata: {}
108
+ post_install_message:
109
+ rdoc_options: []
110
+ require_paths:
111
+ - lib
112
+ required_ruby_version: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - ">="
115
+ - !ruby/object:Gem::Version
116
+ version: '0'
117
+ required_rubygems_version: !ruby/object:Gem::Requirement
118
+ requirements:
119
+ - - ">="
120
+ - !ruby/object:Gem::Version
121
+ version: '0'
122
+ requirements: []
123
+ rubyforge_project:
124
+ rubygems_version: 2.6.11
125
+ signing_key:
126
+ specification_version: 4
127
+ summary: Scraper for data on US Governors
128
+ test_files: []