my_representatives 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +4 -0
- data/Gemfile +3 -0
- data/LICENSE.md +8 -0
- data/README.md +60 -0
- data/lib/my_representatives/abbreviatable.rb +29 -0
- data/lib/my_representatives/act/csv_lower.rb +118 -0
- data/lib/my_representatives/act/merge_lower.rb +38 -0
- data/lib/my_representatives/act/web_index.rb +37 -0
- data/lib/my_representatives/act/web_index_row.rb +86 -0
- data/lib/my_representatives/act/web_lower.rb +65 -0
- data/lib/my_representatives/act/web_show.rb +46 -0
- data/lib/my_representatives/commonwealth/commonwealth.rb +5 -0
- data/lib/my_representatives/commonwealth/csv_email.rb +93 -0
- data/lib/my_representatives/commonwealth/csv_lower.rb +130 -0
- data/lib/my_representatives/commonwealth/csv_upper.rb +129 -0
- data/lib/my_representatives/commonwealth/merge_lower.rb +54 -0
- data/lib/my_representatives/commonwealth/merge_upper.rb +54 -0
- data/lib/my_representatives/commonwealth/web_index.rb +57 -0
- data/lib/my_representatives/commonwealth/web_lower.rb +55 -0
- data/lib/my_representatives/commonwealth/web_show.rb +99 -0
- data/lib/my_representatives/commonwealth/web_upper.rb +54 -0
- data/lib/my_representatives/electorate.rb +146 -0
- data/lib/my_representatives/errors.rb +7 -0
- data/lib/my_representatives/fileable.rb +11 -0
- data/lib/my_representatives/guessable.rb +120 -0
- data/lib/my_representatives/hashable.rb +11 -0
- data/lib/my_representatives/nsw/csv_lower.rb +115 -0
- data/lib/my_representatives/nsw/csv_upper.rb +115 -0
- data/lib/my_representatives/nsw/merge_lower.rb +41 -0
- data/lib/my_representatives/nsw/merge_upper.rb +41 -0
- data/lib/my_representatives/nsw/web_index.rb +56 -0
- data/lib/my_representatives/nsw/web_lower.rb +54 -0
- data/lib/my_representatives/nsw/web_show.rb +140 -0
- data/lib/my_representatives/nsw/web_upper.rb +54 -0
- data/lib/my_representatives/nt/web_index.rb +46 -0
- data/lib/my_representatives/nt/web_lower.rb +54 -0
- data/lib/my_representatives/nt/web_show.rb +134 -0
- data/lib/my_representatives/person.rb +197 -0
- data/lib/my_representatives/qld/csv_lower.rb +117 -0
- data/lib/my_representatives/qld/merge_lower.rb +37 -0
- data/lib/my_representatives/qld/web_index.rb +47 -0
- data/lib/my_representatives/qld/web_lower.rb +54 -0
- data/lib/my_representatives/qld/web_show.rb +146 -0
- data/lib/my_representatives/sa/csv_lower.rb +120 -0
- data/lib/my_representatives/sa/csv_upper.rb +115 -0
- data/lib/my_representatives/sa/merge_lower.rb +39 -0
- data/lib/my_representatives/sa/merge_upper.rb +39 -0
- data/lib/my_representatives/sa/web_index_lower.rb +42 -0
- data/lib/my_representatives/sa/web_index_upper.rb +43 -0
- data/lib/my_representatives/sa/web_lower.rb +54 -0
- data/lib/my_representatives/sa/web_show.rb +158 -0
- data/lib/my_representatives/sa/web_upper.rb +54 -0
- data/lib/my_representatives/static.rb +5 -0
- data/lib/my_representatives/tas/csv_lower.rb +125 -0
- data/lib/my_representatives/tas/csv_upper.rb +125 -0
- data/lib/my_representatives/version.rb +5 -0
- data/lib/my_representatives/vic/csv_lower.rb +99 -0
- data/lib/my_representatives/vic/csv_upper.rb +97 -0
- data/lib/my_representatives/vic/merge_lower.rb +37 -0
- data/lib/my_representatives/vic/merge_upper.rb +37 -0
- data/lib/my_representatives/vic/web_index.rb +58 -0
- data/lib/my_representatives/vic/web_lower.rb +54 -0
- data/lib/my_representatives/vic/web_show.rb +118 -0
- data/lib/my_representatives/vic/web_upper.rb +54 -0
- data/lib/my_representatives/wa/csv_lower.rb +144 -0
- data/lib/my_representatives/wa/csv_upper.rb +131 -0
- data/lib/my_representatives/wa/merge_lower.rb +41 -0
- data/lib/my_representatives/wa/merge_upper.rb +37 -0
- data/lib/my_representatives/wa/web_index.rb +45 -0
- data/lib/my_representatives/wa/web_lower.rb +54 -0
- data/lib/my_representatives/wa/web_show.rb +195 -0
- data/lib/my_representatives/wa/web_upper.rb +54 -0
- data/lib/my_representatives.rb +150 -0
- data/my_representatives.gemspec +25 -0
- metadata +204 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: b0f5e144e58bda1fd4a0723829000e1f962b4ab2
|
4
|
+
data.tar.gz: c7a2f8c5caafe1cd8d473c17ce5ce0c6854cab7f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: b4c48b64e2d08ad906eb9cf48c4375d7e25ab7bcd0d2b711f1267ab11018bc2d3d7d6bc6a0a20df8421ed2975eca15a83a5f9e8698c615b093648d94e3bc8796
|
7
|
+
data.tar.gz: 3511bdfef1dcd5216f601f0d009c131c88c2c4b865f2e7864a7d5909559f4399b21ac1f52fe0437f271f798bab502f7be35578cd60578a0c1c51de70a03ed623
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.md
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
Copyright (c) 2017 Jeremy Tennant
|
3
|
+
|
4
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
5
|
+
|
6
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
7
|
+
|
8
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
# MyRepresentatives
|
2
|
+
Aggregates contact information for Australian State and Commonwealth Members, and Senators. Data is output as ```MyRepresentatives::Person``` and ```MyRepresentatives::Electorate``` ruby objects, or in CSV files.
|
3
|
+
|
4
|
+
## Quick Start
|
5
|
+
**Install the gem**
|
6
|
+
```ruby
|
7
|
+
$ gem install MyRepresentatives
|
8
|
+
```
|
9
|
+
**Start IRB**
|
10
|
+
```ruby
|
11
|
+
$ irb
|
12
|
+
irb(main):001:0> require 'my_representatives'
|
13
|
+
=> true
|
14
|
+
irb(main):002:0> MyRepresentatives.new
|
15
|
+
...
|
16
|
+
```
|
17
|
+
*A CSV file will be output to 'tmp/csv_all_representatives.csv' (relative to the working directory).*
|
18
|
+
|
19
|
+
**Or perhaps something like this:**
|
20
|
+
```ruby
|
21
|
+
$ irb
|
22
|
+
require 'my_representatives'
|
23
|
+
# State of Victoria Lower House (Legislative Assembly) - from CSV
|
24
|
+
representatives = MyRepresentatives::VIC::CSVLower.new
|
25
|
+
representatives.people
|
26
|
+
# Or Victoria's Upper House (Legislative Council) - scraped from the web
|
27
|
+
representatives = MyRepresentatives::VIC::WebUpper.new
|
28
|
+
representatives.people
|
29
|
+
...
|
30
|
+
# Commonwealth Upper House (Senate) - merge CSV and Web Scraping to form a 'complete' data source
|
31
|
+
representatives = MyRepresentatives::Commonwealth::MergeUpper.new
|
32
|
+
representatives.people
|
33
|
+
...
|
34
|
+
# Northern Territory Lower House (Legislative Assembly) - Scrape from the web
|
35
|
+
representatives = MyRepresentatives::NT::WebLower.new
|
36
|
+
representatives.people
|
37
|
+
...
|
38
|
+
```
|
39
|
+
|
40
|
+
**Looking for more?**
|
41
|
+
|
42
|
+
Browse the source!
|
43
|
+
|
44
|
+
## Edge cases
|
45
|
+
Tasmania and Northern Territory governments do not, at this time, supply CSV files (although they do offer Microsoft Word documents).
|
46
|
+
|
47
|
+
#### Tasmania
|
48
|
+
This gem presently uses manually created CSV files for Tasmania's [House of Assembly](https://gist.github.com/tennantje/c94bd62340bd078b9b1ae12168f48388) and [Legislative Council](https://gist.github.com/tennantje/c662bd7a2ac44d2e5839ad73d6d0a80e) based on these official sources - [House of Assembly](http://www.parliament.tas.gov.au/HA/HAMem.doc) and [Legislative Council](http://www.parliament.tas.gov.au/LC/LCMem.doc). Neither document are regularly checked for accuracy, therefore if you identify a change in government - please notify its author. Unfortunately there is no support for web scraping, as there is no scrapable data on the Tasmanian Parliament website at this time.
|
49
|
+
|
50
|
+
#### Northern Territory
|
51
|
+
Unfortunately there is no support for Northern Territory CSV at this time. There is, however, the ```MyRepresentatives:::NT::WebLower``` class if you're happy for the data to be scraped from the NT parliament website (which is a good source of data).
|
52
|
+
|
53
|
+
## Dependencies
|
54
|
+
Some State governments (Queensland and Western Australia) supply data in Microsoft Excel's legacy XLS format (rather than the modern XLSX format - or even CSV (our preference)). MyRepresentatives uses the **excel2csv** gem to convert XLS files to CSV. At this time, excel2csv requires [JDK](http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html) to be installed on your system.
|
55
|
+
|
56
|
+
## Accuracy of Data
|
57
|
+
Per the License, "THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND" please be aware that scraping data from websites is problematic - structural changes to Government websites are outside the control of the authors of this gem, and have the ability to break some or all compatibility without notice. In developing this gem, I have also identified several errors in the CSV files published by official sources - so this is also a significant roadblock in ensuring the data this gem produces is accurate.
|
58
|
+
|
59
|
+
## License
|
60
|
+
MIT - For terms refer to LICENSE.md
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module MyRepresentatives
|
2
|
+
module Abbreviatable
|
3
|
+
|
4
|
+
def state_abbreviation(str)
|
5
|
+
if Static::STATES.include?(str.upcase)
|
6
|
+
str.upcase
|
7
|
+
elsif str.upcase.strip == "AUSTRALIAN CAPITAL TERRITORY"
|
8
|
+
"ACT"
|
9
|
+
elsif str.upcase.strip == "NEW SOUTH WALES"
|
10
|
+
"NSW"
|
11
|
+
elsif str.upcase.strip == "NORTHERN TERRITORY"
|
12
|
+
"NT"
|
13
|
+
elsif str.upcase.strip == "QUEENSLAND"
|
14
|
+
"QLD"
|
15
|
+
elsif str.upcase.strip == "SOUTH AUSTRALIA"
|
16
|
+
"SA"
|
17
|
+
elsif str.upcase.strip == "TASMANIA"
|
18
|
+
"TAS"
|
19
|
+
elsif str.upcase.strip == "VICTORIA"
|
20
|
+
"VIC"
|
21
|
+
elsif str.upcase.strip == "WESTERN AUSTRALIA"
|
22
|
+
"WA"
|
23
|
+
else
|
24
|
+
"UNKNOWN" # Will likely trigger an error elsewhere in the app, but that's okay
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
end # Abbreviatable
|
29
|
+
end # MyRepresentatives
|
@@ -0,0 +1,118 @@
|
|
1
|
+
module MyRepresentatives
|
2
|
+
module ACT
|
3
|
+
class CSVLower
|
4
|
+
include MyRepresentatives::Fileable
|
5
|
+
include MyRepresentatives::Guessable
|
6
|
+
|
7
|
+
attr_accessor :csv_url, :csv_filename, :people, :logger
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
@logger = Logger.new(STDOUT)
|
11
|
+
@csv_url = "http://www.parliament.act.gov.au/__data/assets/file/0011/874235/Members-mail-merge-list-2016-1108.csv"
|
12
|
+
@csv_filename = "tmp/csv_act_lower.csv"
|
13
|
+
@people = []
|
14
|
+
|
15
|
+
csv_from_url
|
16
|
+
people_from_csv
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def csv_from_url
|
23
|
+
create_tmp
|
24
|
+
|
25
|
+
begin
|
26
|
+
open(@csv_filename, "wb") do |file|
|
27
|
+
open(@csv_url) do |uri|
|
28
|
+
file.write(uri.read)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
rescue SocketError => err
|
32
|
+
@logger.debug("Unable to connect to #{@csv_url}")
|
33
|
+
raise err
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def people_from_csv
|
38
|
+
CSV.foreach(@csv_filename, headers: true) do |row|
|
39
|
+
|
40
|
+
# Setup the electorate
|
41
|
+
electorate_name = find_electorate(row)
|
42
|
+
electorate = Electorate.new(electorate_name)
|
43
|
+
electorate.state_act_lower!
|
44
|
+
|
45
|
+
# Setup the Representative (Person)
|
46
|
+
person = Person.new(electorate)
|
47
|
+
|
48
|
+
person.title = find_title(row)
|
49
|
+
person.first_name = find_first_name(row)
|
50
|
+
person.last_name = find_last_name(row)
|
51
|
+
person.email = find_email(row)
|
52
|
+
person.phone = find_phone(row)
|
53
|
+
person.party_name = find_party(row)
|
54
|
+
person.honorifics = "MLA"
|
55
|
+
person.formal_name = format_formal_name(person)
|
56
|
+
person.physical_address = format_physical_address(row)
|
57
|
+
person.postal_address = format_postal_address(row)
|
58
|
+
person.gender = guess_gender(person.title)
|
59
|
+
person.preferred_name = nil
|
60
|
+
person.salutation = nil
|
61
|
+
person.image_url = nil
|
62
|
+
person.homepage_url = nil
|
63
|
+
|
64
|
+
@people << person
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def find_first_name(arr)
|
69
|
+
arr["F_Name"]
|
70
|
+
end
|
71
|
+
|
72
|
+
def find_last_name(arr)
|
73
|
+
if arr && arr["L_Name"]
|
74
|
+
arr["L_Name"].gsub(" MLA","")
|
75
|
+
else
|
76
|
+
nil
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def find_electorate(arr)
|
81
|
+
if arr && arr["JobTitle"]
|
82
|
+
arr["JobTitle"].gsub("Member for ","")
|
83
|
+
else
|
84
|
+
nil
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def find_email(arr)
|
89
|
+
arr["Email"]
|
90
|
+
end
|
91
|
+
|
92
|
+
def find_party(arr)
|
93
|
+
arr["Party"]
|
94
|
+
end
|
95
|
+
|
96
|
+
def find_phone(arr)
|
97
|
+
arr["Phone"]
|
98
|
+
end
|
99
|
+
|
100
|
+
def find_title(arr)
|
101
|
+
arr["Title"]
|
102
|
+
end
|
103
|
+
|
104
|
+
def format_formal_name(person)
|
105
|
+
"#{person.title} #{person.first_name} #{person.last_name} #{person.honorifics}"
|
106
|
+
end
|
107
|
+
|
108
|
+
def format_physical_address(arr)
|
109
|
+
"#{arr["Address1"]}\n#{arr["City"]} #{arr["State"]} #{arr["PostCode"]}"
|
110
|
+
end
|
111
|
+
|
112
|
+
def format_postal_address(arr)
|
113
|
+
"#{arr["Address1"]}\n#{arr["City"]} #{arr["State"]} #{arr["PostCode"]}"
|
114
|
+
end
|
115
|
+
|
116
|
+
end # CSVLower
|
117
|
+
end # ACT
|
118
|
+
end # MyRepresentatives
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module MyRepresentatives
|
2
|
+
module ACT
|
3
|
+
class MergeLower
|
4
|
+
attr_accessor :csv_people, :web_people, :people
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@csv_people = MyRepresentatives::ACT::CSVLower.new.people
|
8
|
+
@web_people = MyRepresentatives::ACT::WebLower.new.people
|
9
|
+
@people = []
|
10
|
+
check_and_update_person
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def check_and_update_person
|
16
|
+
@csv_people.each do |person|
|
17
|
+
if @web_people.any? { |wp| wp.phone == person.phone} || @web_people.any? { |wp| wp.email == person.email}
|
18
|
+
index = @web_people.find_index {|wp| wp.phone == person.phone}
|
19
|
+
|
20
|
+
if !index
|
21
|
+
index = @web_people.find_index { |wp| wp.email == person.email }
|
22
|
+
end
|
23
|
+
|
24
|
+
if index
|
25
|
+
person.homepage_url = @web_people[index].homepage_url
|
26
|
+
person.image_url = @web_people[index].image_url
|
27
|
+
person.successful_merge = true
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
@people << person
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
end # Merge
|
37
|
+
end # ACT
|
38
|
+
end # MyRepresentatives
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module MyRepresentatives
|
2
|
+
module ACT
|
3
|
+
class WebIndex
|
4
|
+
attr_accessor :index_url, :document, :representative_documents
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@logger = Logger.new(STDOUT)
|
8
|
+
@index_url = "http://www.parliament.act.gov.au/members/current"
|
9
|
+
@document = find_representatives
|
10
|
+
@representative_documents = []
|
11
|
+
representative_documents_from_document
|
12
|
+
end
|
13
|
+
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def find_representatives
|
18
|
+
begin
|
19
|
+
request = open(@index_url)
|
20
|
+
Nokogiri::HTML(request)
|
21
|
+
rescue SocketError => err
|
22
|
+
@logger.debug("Unable to connect to #{@index_url}")
|
23
|
+
raise err
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def representative_documents_from_document
|
28
|
+
table = @document.css("table.tablesorter")
|
29
|
+
|
30
|
+
table.css("tbody").css("tr").each do |row|
|
31
|
+
@representative_documents << row unless row.text.strip.empty?
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
end # WebIndex
|
36
|
+
end # ACT
|
37
|
+
end # MyRep
|
@@ -0,0 +1,86 @@
|
|
1
|
+
module MyRepresentatives
|
2
|
+
module ACT
|
3
|
+
class WebIndexRow
|
4
|
+
attr_accessor :document
|
5
|
+
|
6
|
+
def initialize(document)
|
7
|
+
raise MyRepresentatives::NokogiriDocumentExpectedError unless document && document.is_a?(Nokogiri::XML::Element)
|
8
|
+
@document = document
|
9
|
+
raise MyRepresentatives::NokogiriDocumentPropertiesError unless test_document_for_validity
|
10
|
+
end
|
11
|
+
|
12
|
+
def first_name
|
13
|
+
begin
|
14
|
+
@document.css("a")[0].text.split(",")[1]
|
15
|
+
rescue
|
16
|
+
nil
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def last_name
|
21
|
+
begin
|
22
|
+
@document.css("a")[0].text.split(",")[0]
|
23
|
+
rescue
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def electorate_name
|
28
|
+
begin
|
29
|
+
@document.css("td")[2].text
|
30
|
+
rescue
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def party_name
|
35
|
+
begin
|
36
|
+
@document.css("td")[3].text
|
37
|
+
rescue
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def phone
|
42
|
+
begin
|
43
|
+
text = @document.css("td")[4].text
|
44
|
+
regex = /Ph:(.+?)$/
|
45
|
+
regex.match(text)[1].strip
|
46
|
+
rescue
|
47
|
+
nil
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def email
|
52
|
+
begin
|
53
|
+
text = @document.css("td")[4].text
|
54
|
+
regex = /^(.+?@.+?.gov.au)/
|
55
|
+
regex.match(text)[1].strip
|
56
|
+
rescue
|
57
|
+
nil
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def address
|
62
|
+
nil
|
63
|
+
end
|
64
|
+
|
65
|
+
def homepage_url
|
66
|
+
begin
|
67
|
+
@document.css("a")[0].attr("href")
|
68
|
+
rescue
|
69
|
+
nil
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
private
|
74
|
+
|
75
|
+
def test_document_for_validity
|
76
|
+
if @document && @document.css("a") && @document.css("td")
|
77
|
+
true
|
78
|
+
else
|
79
|
+
@logger.debug("Failed to find a valid document to scrape")
|
80
|
+
false
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
end # WebIndexRow
|
85
|
+
end # ACT
|
86
|
+
end # MyRepresentatives
|
@@ -0,0 +1,65 @@
|
|
1
|
+
module MyRepresentatives
|
2
|
+
module ACT
|
3
|
+
class WebLower
|
4
|
+
|
5
|
+
attr_accessor :documents, :people
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
@logger = Logger.new(STDOUT)
|
9
|
+
@documents = WebIndex.new.representative_documents
|
10
|
+
@people = []
|
11
|
+
|
12
|
+
@documents.each do |document|
|
13
|
+
row = WebIndexRow.new(document)
|
14
|
+
create_person_from_row(row)
|
15
|
+
end
|
16
|
+
|
17
|
+
@people.each do |person|
|
18
|
+
update_person(person)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def create_person_from_row(row)
|
25
|
+
@logger.info("Attempting to create #{row.first_name} #{row.last_name}")
|
26
|
+
|
27
|
+
# Setup the electorate
|
28
|
+
electorate_name = row.electorate_name
|
29
|
+
electorate = MyRepresentatives::Electorate.new(electorate_name)
|
30
|
+
electorate.state_act_lower!
|
31
|
+
|
32
|
+
# Setup the Representative (Person)
|
33
|
+
person = MyRepresentatives::Person.new(electorate)
|
34
|
+
person.first_name = row.first_name
|
35
|
+
person.last_name = row.last_name
|
36
|
+
person.email = row.email
|
37
|
+
person.phone = row.phone
|
38
|
+
person.party_name = row.party_name
|
39
|
+
person.physical_address = row.address
|
40
|
+
person.postal_address = row.address
|
41
|
+
person.homepage_url = row.homepage_url
|
42
|
+
person.honorifics = "MLA"
|
43
|
+
person.formal_name = format_formal_name(person)
|
44
|
+
person.title = nil
|
45
|
+
person.gender = nil
|
46
|
+
person.preferred_name = nil
|
47
|
+
person.salutation = nil
|
48
|
+
person.image_url = nil
|
49
|
+
|
50
|
+
@people << person
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
def update_person(person)
|
55
|
+
details_page = WebShow.new(person.homepage_url)
|
56
|
+
person.image_url = details_page.image_url
|
57
|
+
end
|
58
|
+
|
59
|
+
def format_formal_name(person)
|
60
|
+
"#{person.first_name} #{person.last_name} #{person.honorifics}"
|
61
|
+
end
|
62
|
+
|
63
|
+
end # WebLower
|
64
|
+
end # ACT
|
65
|
+
end # MyRepresentatives
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module MyRepresentatives
|
2
|
+
module ACT
|
3
|
+
class WebShow
|
4
|
+
|
5
|
+
attr_accessor :url, :document, :logger
|
6
|
+
|
7
|
+
def initialize(url)
|
8
|
+
@logger = Logger.new(STDOUT)
|
9
|
+
raise MyRepresentatives::InvalidURLError unless url && url.is_a?(String)
|
10
|
+
@url = url
|
11
|
+
fetch_document
|
12
|
+
raise MyRepresentatives::NokogiriDocumentPropertiesError unless test_document_for_validity
|
13
|
+
end
|
14
|
+
|
15
|
+
def image_url
|
16
|
+
begin
|
17
|
+
@document.css("div.section").css("img")[0].attr('src')
|
18
|
+
rescue NoMethodError
|
19
|
+
nil
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def fetch_document
|
26
|
+
begin
|
27
|
+
request = open(@url)
|
28
|
+
@document = Nokogiri::HTML(request)
|
29
|
+
rescue => err
|
30
|
+
@logger.debug("Failed to connect to the url: #{@url}")
|
31
|
+
raise err
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_document_for_validity
|
36
|
+
if @document && @document.css("div.section") && @document.css("div.section").css("img")
|
37
|
+
true
|
38
|
+
else
|
39
|
+
@logger.debug("Failed to find a valid document to scrape")
|
40
|
+
false
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
end # WebShow
|
45
|
+
end # ACT
|
46
|
+
end # MyRepresentatives
|
@@ -0,0 +1,93 @@
|
|
1
|
+
module MyRepresentatives
|
2
|
+
module Commonwealth
|
3
|
+
class CSVEmail
|
4
|
+
include MyRepresentatives::Abbreviatable
|
5
|
+
include MyRepresentatives::Fileable
|
6
|
+
include MyRepresentatives::Guessable
|
7
|
+
|
8
|
+
attr_accessor :csv_url, :csv_filename, :people, :logger
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@logger = Logger.new(STDOUT)
|
12
|
+
# Email addresses for Commonwealth Members and Senators tend to be a little harder to find
|
13
|
+
# At this time, this gem uses a csv manually created by https://github.com/tennantje/.
|
14
|
+
# https://gist.github.com/tennantje/8ee584d9c534ce6a57910668b277394d
|
15
|
+
@csv_url = "https://gist.githubusercontent.com/tennantje/8ee584d9c534ce6a57910668b277394d/raw"
|
16
|
+
@csv_filename = "tmp/csv_commonwealth_emails.csv"
|
17
|
+
@people = []
|
18
|
+
|
19
|
+
csv_from_url
|
20
|
+
people_from_csv
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def csv_from_url
|
26
|
+
create_tmp
|
27
|
+
begin
|
28
|
+
open(@csv_filename, "wb") do |file|
|
29
|
+
open(@csv_url) do |uri|
|
30
|
+
file.write(uri.read)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
rescue SocketError => err
|
34
|
+
@logger.debug("Unable to connect to #{@csv_url}")
|
35
|
+
raise err
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def people_from_csv
|
40
|
+
CSV.foreach(@csv_filename, headers: true) do |row|
|
41
|
+
|
42
|
+
# Setup the electorate
|
43
|
+
electorate_name = "Not necessary" # These values don't matter, and are used to force-validate the object
|
44
|
+
electorate_state = "ACT" # These values don't matter, and are used to force-validate the object
|
45
|
+
electorate = Electorate.new(electorate_name)# These values don't matter, and are used to force-validate the object
|
46
|
+
electorate.commonwealth_lower!(electorate_state) # These values don't matter, and are used to force-validate the object
|
47
|
+
|
48
|
+
# Setup the Representative (Person)
|
49
|
+
person = Person.new(electorate)
|
50
|
+
|
51
|
+
person.first_name = find_first_name(row)
|
52
|
+
person.last_name = find_last_name(row)
|
53
|
+
person.formal_name = find_formal_name(row)
|
54
|
+
person.phone = find_phone(row)
|
55
|
+
person.email = find_email(row)
|
56
|
+
person.title = nil
|
57
|
+
person.honorifics = nil
|
58
|
+
person.party_name = nil
|
59
|
+
person.salutation = nil
|
60
|
+
person.gender = nil
|
61
|
+
person.preferred_name = nil
|
62
|
+
person.physical_address = nil
|
63
|
+
person.postal_address = nil
|
64
|
+
person.image_url = nil
|
65
|
+
person.homepage_url = nil
|
66
|
+
|
67
|
+
@people << person
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def find_first_name(arr)
|
72
|
+
arr["first"]
|
73
|
+
end
|
74
|
+
|
75
|
+
def find_last_name(arr)
|
76
|
+
arr["last"]
|
77
|
+
end
|
78
|
+
|
79
|
+
def find_formal_name(arr)
|
80
|
+
arr["formal_name"]
|
81
|
+
end
|
82
|
+
|
83
|
+
def find_email(arr)
|
84
|
+
arr["email"]
|
85
|
+
end
|
86
|
+
|
87
|
+
def find_phone(arr)
|
88
|
+
arr["phone"]
|
89
|
+
end
|
90
|
+
|
91
|
+
end #CSVLower
|
92
|
+
end # Commonwealth
|
93
|
+
end # MyRepresentatives
|