my_representatives 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +4 -0
  3. data/Gemfile +3 -0
  4. data/LICENSE.md +8 -0
  5. data/README.md +60 -0
  6. data/lib/my_representatives/abbreviatable.rb +29 -0
  7. data/lib/my_representatives/act/csv_lower.rb +118 -0
  8. data/lib/my_representatives/act/merge_lower.rb +38 -0
  9. data/lib/my_representatives/act/web_index.rb +37 -0
  10. data/lib/my_representatives/act/web_index_row.rb +86 -0
  11. data/lib/my_representatives/act/web_lower.rb +65 -0
  12. data/lib/my_representatives/act/web_show.rb +46 -0
  13. data/lib/my_representatives/commonwealth/commonwealth.rb +5 -0
  14. data/lib/my_representatives/commonwealth/csv_email.rb +93 -0
  15. data/lib/my_representatives/commonwealth/csv_lower.rb +130 -0
  16. data/lib/my_representatives/commonwealth/csv_upper.rb +129 -0
  17. data/lib/my_representatives/commonwealth/merge_lower.rb +54 -0
  18. data/lib/my_representatives/commonwealth/merge_upper.rb +54 -0
  19. data/lib/my_representatives/commonwealth/web_index.rb +57 -0
  20. data/lib/my_representatives/commonwealth/web_lower.rb +55 -0
  21. data/lib/my_representatives/commonwealth/web_show.rb +99 -0
  22. data/lib/my_representatives/commonwealth/web_upper.rb +54 -0
  23. data/lib/my_representatives/electorate.rb +146 -0
  24. data/lib/my_representatives/errors.rb +7 -0
  25. data/lib/my_representatives/fileable.rb +11 -0
  26. data/lib/my_representatives/guessable.rb +120 -0
  27. data/lib/my_representatives/hashable.rb +11 -0
  28. data/lib/my_representatives/nsw/csv_lower.rb +115 -0
  29. data/lib/my_representatives/nsw/csv_upper.rb +115 -0
  30. data/lib/my_representatives/nsw/merge_lower.rb +41 -0
  31. data/lib/my_representatives/nsw/merge_upper.rb +41 -0
  32. data/lib/my_representatives/nsw/web_index.rb +56 -0
  33. data/lib/my_representatives/nsw/web_lower.rb +54 -0
  34. data/lib/my_representatives/nsw/web_show.rb +140 -0
  35. data/lib/my_representatives/nsw/web_upper.rb +54 -0
  36. data/lib/my_representatives/nt/web_index.rb +46 -0
  37. data/lib/my_representatives/nt/web_lower.rb +54 -0
  38. data/lib/my_representatives/nt/web_show.rb +134 -0
  39. data/lib/my_representatives/person.rb +197 -0
  40. data/lib/my_representatives/qld/csv_lower.rb +117 -0
  41. data/lib/my_representatives/qld/merge_lower.rb +37 -0
  42. data/lib/my_representatives/qld/web_index.rb +47 -0
  43. data/lib/my_representatives/qld/web_lower.rb +54 -0
  44. data/lib/my_representatives/qld/web_show.rb +146 -0
  45. data/lib/my_representatives/sa/csv_lower.rb +120 -0
  46. data/lib/my_representatives/sa/csv_upper.rb +115 -0
  47. data/lib/my_representatives/sa/merge_lower.rb +39 -0
  48. data/lib/my_representatives/sa/merge_upper.rb +39 -0
  49. data/lib/my_representatives/sa/web_index_lower.rb +42 -0
  50. data/lib/my_representatives/sa/web_index_upper.rb +43 -0
  51. data/lib/my_representatives/sa/web_lower.rb +54 -0
  52. data/lib/my_representatives/sa/web_show.rb +158 -0
  53. data/lib/my_representatives/sa/web_upper.rb +54 -0
  54. data/lib/my_representatives/static.rb +5 -0
  55. data/lib/my_representatives/tas/csv_lower.rb +125 -0
  56. data/lib/my_representatives/tas/csv_upper.rb +125 -0
  57. data/lib/my_representatives/version.rb +5 -0
  58. data/lib/my_representatives/vic/csv_lower.rb +99 -0
  59. data/lib/my_representatives/vic/csv_upper.rb +97 -0
  60. data/lib/my_representatives/vic/merge_lower.rb +37 -0
  61. data/lib/my_representatives/vic/merge_upper.rb +37 -0
  62. data/lib/my_representatives/vic/web_index.rb +58 -0
  63. data/lib/my_representatives/vic/web_lower.rb +54 -0
  64. data/lib/my_representatives/vic/web_show.rb +118 -0
  65. data/lib/my_representatives/vic/web_upper.rb +54 -0
  66. data/lib/my_representatives/wa/csv_lower.rb +144 -0
  67. data/lib/my_representatives/wa/csv_upper.rb +131 -0
  68. data/lib/my_representatives/wa/merge_lower.rb +41 -0
  69. data/lib/my_representatives/wa/merge_upper.rb +37 -0
  70. data/lib/my_representatives/wa/web_index.rb +45 -0
  71. data/lib/my_representatives/wa/web_lower.rb +54 -0
  72. data/lib/my_representatives/wa/web_show.rb +195 -0
  73. data/lib/my_representatives/wa/web_upper.rb +54 -0
  74. data/lib/my_representatives.rb +150 -0
  75. data/my_representatives.gemspec +25 -0
  76. metadata +204 -0
@@ -0,0 +1,131 @@
1
+ module MyRepresentatives
2
+ module WA
3
+ class CSVUpper
4
+ include MyRepresentatives::Fileable
5
+ include MyRepresentatives::Guessable
6
+
7
+ attr_accessor :xls_url, :xls_filename, :csv_filename, :people
8
+
9
+ def initialize
10
+ self.xls_url = "http://www.parliament.wa.gov.au/WebCMS/WebCMS.nsf/resources/file-data-for-legislative-council-members/$file/DATA%20FOR%20LEGISLATIVE%20COUNCIL%20MEMBERS%2027042017.xls"
11
+ self.xls_filename = "tmp/csv_wa_upper.xls"
12
+ self.csv_filename = "tmp/csv_wa_upper.csv"
13
+ self.people = []
14
+
15
+ csv_from_url
16
+ people_from_csv
17
+ end
18
+
19
+ private
20
+
21
+ def csv_from_url
22
+ create_tmp
23
+
24
+ open(@xls_filename, "wb") do |file|
25
+ open(@xls_url) do |uri|
26
+ file.write(uri.read)
27
+ end
28
+ end
29
+
30
+ excel = Excel2CSV.read(@xls_filename)
31
+ CSV.open(@csv_filename, "wb") do |csv|
32
+ excel.each_with_index do |row, index|
33
+
34
+ if index == 0 # This exists to fix newlines in header row
35
+ new_row = []
36
+
37
+ row.each do |col|
38
+ col.gsub!("\n","")
39
+ new_row << col
40
+ end
41
+ csv << new_row
42
+ else
43
+ csv << row
44
+ end
45
+ end
46
+ end
47
+ FileUtils.rm_r(@xls_filename)
48
+ end
49
+
50
+ def people_from_csv
51
+ CSV.foreach(@csv_filename, headers: true) do |row|
52
+
53
+ if row["FIRST_NAME"] && row["SURNAME"] && row["REGION"] # Fix for blank rows
54
+
55
+ # Setup the electorate
56
+ electorate_name = find_electorate(row)
57
+ electorate = Electorate.new(electorate_name)
58
+ electorate.state_wa_upper!
59
+
60
+ # Setup the Representative (Person)
61
+ person = Person.new(electorate)
62
+
63
+ person.title = find_title(row)
64
+ person.first_name = find_first_name(row)
65
+ person.last_name = find_last_name(row)
66
+ person.preferred_name = find_preferred_name(row)
67
+ person.email = find_email(row)
68
+ person.phone = find_phone(row)
69
+ person.party_name = find_party(row)
70
+ person.physical_address = find_physical_address(row)
71
+ person.postal_address = find_postal_address(row)
72
+ person.gender = guess_gender(row["Title"])
73
+ person.formal_name = format_formal_name(row,person)
74
+ person.honorifics = "MLC"
75
+ person.salutation = nil
76
+ person.image_url = nil
77
+ person.homepage_url = nil
78
+
79
+ @people << person
80
+
81
+ end
82
+ end
83
+ end
84
+
85
+ def find_first_name(arr)
86
+ arr["FIRST_NAME"]
87
+ end
88
+
89
+ def find_last_name(arr)
90
+ arr["SURNAME"]
91
+ end
92
+
93
+ def find_preferred_name(arr)
94
+ arr["PREFERRED_NAME"]
95
+ end
96
+
97
+ def find_title(arr)
98
+ arr["HON"].nil? ? arr["TITLE"] : "Hon"
99
+ end
100
+
101
+ def find_electorate(arr)
102
+ arr["REGION"]
103
+ end
104
+
105
+ def find_email(arr)
106
+ nil
107
+ end
108
+
109
+ def find_party(arr)
110
+ arr["PARTY"]
111
+ end
112
+
113
+ def find_phone(arr)
114
+ nil
115
+ end
116
+
117
+ def format_formal_name(arr, person)
118
+ arr["HON"].nil? ? "#{person.title} #{person.first_name} #{person.last_name}" : "Hon #{person.first_name} #{person.last_name}"
119
+ end
120
+
121
+ def find_physical_address(arr)
122
+ arr["ELEC_ADDRESS"]
123
+ end
124
+
125
+ def find_postal_address(arr)
126
+ arr["ELEC_ADDRESS_MAILING"]
127
+ end
128
+
129
+ end # CSVUpper
130
+ end # WA
131
+ end # MyRepresentatives
@@ -0,0 +1,41 @@
1
+ module MyRepresentatives
2
+ module WA
3
+ class MergeLower
4
+ attr_accessor :csv_people, :web_people, :people
5
+
6
+ def initialize
7
+ @csv_people = MyRepresentatives::WA::CSVLower.new.people
8
+ @web_people = MyRepresentatives::WA::WebLower.new.people
9
+ @people = []
10
+ check_and_update_person
11
+ end
12
+
13
+ private
14
+
15
+ def check_and_update_person
16
+ @csv_people.each do |person|
17
+ index = @web_people.find_index {|wp| wp.email == person.email}
18
+
19
+ if !index
20
+ index = @web_people.find_index {|wp| wp.first_name == person.first_name && wp.last_name == person.last_name }
21
+ end
22
+
23
+ if !index
24
+ index = @web_people.find_index {|wp| wp.preferred_name == person.first_name && wp.last_name == person.last_name }
25
+ end
26
+
27
+ if index
28
+ person.phone = @web_people[index].phone
29
+ person.party_name = @web_people[index].party_name
30
+ person.preferred_name = @web_people[index].preferred_name
31
+ person.homepage_url = @web_people[index].homepage_url
32
+ person.image_url = @web_people[index].image_url
33
+ person.successful_merge = true
34
+ end
35
+ @people << person if person
36
+ end
37
+ end
38
+
39
+ end # MergeLower
40
+ end # WA
41
+ end # MyRepresentatives
@@ -0,0 +1,37 @@
1
+ module MyRepresentatives
2
+ module WA
3
+ class MergeUpper
4
+ attr_accessor :csv_people, :web_people, :people
5
+
6
+ def initialize
7
+ @csv_people = MyRepresentatives::WA::CSVUpper.new.people
8
+ @web_people = MyRepresentatives::WA::WebUpper.new.people
9
+ @people = []
10
+ check_and_update_person
11
+ end
12
+
13
+ private
14
+
15
+ def check_and_update_person
16
+ @csv_people.each do |person|
17
+ index = @web_people.find_index {|wp| wp.first_name == person.first_name && wp.last_name == person.last_name }
18
+
19
+ if !index
20
+ index = @web_people.find_index {|wp| wp.preferred_name == person.first_name && wp.last_name == person.last_name }
21
+ end
22
+
23
+ if index
24
+ person.phone = @web_people[index].phone
25
+ person.party_name = @web_people[index].party_name
26
+ person.preferred_name = @web_people[index].preferred_name
27
+ person.homepage_url = @web_people[index].homepage_url
28
+ person.image_url = @web_people[index].image_url
29
+ person.successful_merge = true
30
+ end
31
+ @people << person if person
32
+ end
33
+ end
34
+
35
+ end # MergeUpper
36
+ end # WA
37
+ end # MyRepresentatives
@@ -0,0 +1,45 @@
1
+ module MyRepresentatives
2
+ module WA
3
+ class WebIndex
4
+ attr_accessor :index_url, :document, :upper_urls, :lower_urls
5
+
6
+ def initialize
7
+ @logger = Logger.new(STDOUT)
8
+ @index_url = nil
9
+ @document = nil
10
+ @lower_urls = []
11
+ @upper_urls = []
12
+
13
+ @index_url = "http://www.parliament.wa.gov.au/parliament/memblist.nsf/WAllMembers"
14
+ @document = find_representatives
15
+ representative_urls_from_document
16
+ end
17
+
18
+
19
+ private
20
+
21
+ def find_representatives
22
+ begin
23
+ request = open(@index_url)
24
+ Nokogiri::HTML(request)
25
+ rescue SocketError => err
26
+ @logger.debug("Unable to connect to #{@index_url}")
27
+ raise err
28
+ end
29
+ end
30
+
31
+ def representative_urls_from_document
32
+ begin
33
+ links = @document.css('//a[@href*="WAllMembersFlat"]')
34
+ links.map do |link|
35
+ url = 'http://www.parliament.wa.gov.au'+link['href'].gsub("\\","/")
36
+ @lower_urls << url if link.parent.text.include?("MLA") && !@lower_urls.include?(url)
37
+ @upper_urls << url if link.parent.text.include?("MLC") && !@upper_urls.include?(url)
38
+ end
39
+ rescue
40
+ end
41
+ end
42
+
43
+ end # WebIndex
44
+ end # WA
45
+ end # MyRepresentatives
@@ -0,0 +1,54 @@
1
+ module MyRepresentatives
2
+ module WA
3
+ class WebLower
4
+ include MyRepresentatives::Abbreviatable
5
+ include MyRepresentatives::Guessable
6
+
7
+ attr_accessor :urls, :people
8
+
9
+ def initialize
10
+ @logger = Logger.new(STDOUT)
11
+ @urls = WebIndex.new.lower_urls
12
+ @people = []
13
+
14
+ @urls.each do |url|
15
+ document = WebShow.new(url)
16
+ create_person_from_document(document)
17
+ end
18
+ end
19
+
20
+ private
21
+
22
+ def create_person_from_document(document)
23
+ @logger.info("Attempting to create #{document.formal_name}")
24
+
25
+ # Setup the electorate
26
+ electorate_name = document.electorate_name
27
+ electorate = MyRepresentatives::Electorate.new(electorate_name)
28
+ electorate.state_wa_lower!
29
+
30
+ # Setup the Representative (Person)
31
+ person = MyRepresentatives::Person.new(electorate)
32
+ person.formal_name = document.formal_name
33
+ person.first_name = document.first_name
34
+ person.last_name = document.last_name
35
+ person.title = document.title
36
+ person.email = document.email
37
+ person.phone = document.phone
38
+ person.party_name = document.party_name
39
+ person.image_url = document.image_url
40
+ person.homepage_url = document.homepage_url
41
+ person.preferred_name = document.preferred_name
42
+ person.gender = guess_gender(document.formal_name)
43
+ person.physical_address = nil
44
+ person.postal_address = nil
45
+ person.honorifics = nil
46
+ person.salutation = nil
47
+
48
+ @people << person
49
+
50
+ end
51
+
52
+ end # WebLower
53
+ end # WA
54
+ end # MyRepresentatives
@@ -0,0 +1,195 @@
1
+ module MyRepresentatives
2
+ module WA
3
+ class WebShow
4
+ include MyRepresentatives::Guessable
5
+
6
+ attr_accessor :url, :document, :logger
7
+
8
+ def initialize(url)
9
+ @logger = Logger.new(STDOUT)
10
+ raise MyRepresentatives::InvalidURLError unless url && url.is_a?(String)
11
+ @url = url
12
+ fetch_document
13
+ raise MyRepresentatives::NokogiriDocumentPropertiesError unless test_document_for_validity
14
+ end
15
+
16
+ def homepage_url
17
+ @url
18
+ end
19
+
20
+ def formal_name
21
+ begin
22
+ @document.css("div#content").css("font")[0].text
23
+ rescue NoMethodError
24
+ nil
25
+ end
26
+ end
27
+
28
+ def title
29
+ begin
30
+ guess_title(formal_name)
31
+ rescue NoMethodError
32
+ nil
33
+ end
34
+ end
35
+
36
+ def first_name
37
+ begin
38
+ guess_first(formal_name)
39
+ rescue NoMethodError
40
+ nil
41
+ end
42
+ end
43
+
44
+ def last_name
45
+ begin
46
+ guess_last(formal_name)
47
+ rescue NoMethodError
48
+ nil
49
+ end
50
+ end
51
+
52
+ def preferred_name
53
+ begin
54
+ guess_preferred(formal_name)
55
+ rescue NoMethodError
56
+ nil
57
+ end
58
+ end
59
+
60
+ def electorate_name
61
+ begin
62
+ regex = /Electorate:(.+?)Inaugural/
63
+ alt_regex = /Electorate:(.+?)Speeches/
64
+
65
+ text = @document.css("font[size='2']").text.strip
66
+ match = text.match(regex)
67
+ alt_match = text.match(alt_regex)
68
+
69
+ if match
70
+ match[1]
71
+ elsif alt_match
72
+ alt_match[1]
73
+ else
74
+ nil
75
+ end
76
+ rescue NoMethodError
77
+ nil
78
+ end
79
+ end
80
+
81
+ def party_name
82
+ begin
83
+ regex = /Party:(.+?)Electorate/
84
+ alt_regex = /Party:(.+?)Inaugural/
85
+
86
+ text = @document.css("font[size='2']").text.strip
87
+ match = text.match(regex)
88
+ alt_match = text.match(alt_regex)
89
+
90
+ if match
91
+ match[1]
92
+ elsif alt_match
93
+ alt_match[1]
94
+ else
95
+ nil
96
+ end
97
+ rescue NoMethodError
98
+ nil
99
+ end
100
+ end
101
+
102
+ def email
103
+ begin
104
+ regex = /Email:\s(.+?wa.gov.au)/
105
+
106
+ text = @document.css("font[size='2']").text.strip
107
+ match = text.match(regex)
108
+
109
+ if match
110
+ match[1]
111
+ else
112
+ nil
113
+ end
114
+ rescue NoMethodError
115
+ nil
116
+ end
117
+ end
118
+
119
+ def image_url
120
+ begin
121
+ uri = nil
122
+ @document.css("div#content").css("img").each do |img|
123
+ link = img.attr("src")
124
+ uri = link if link.include?('(MemberPics)')
125
+ break if link.include?('(MemberPics)')
126
+ end
127
+
128
+ if uri
129
+ "http://www.parliament.wa.gov.au/Parliament/Memblist.nsf/#{uri.gsub("../","")}"
130
+ else
131
+ nil
132
+ end
133
+
134
+ rescue
135
+ nil
136
+ end
137
+ end
138
+
139
+ def phone
140
+ begin
141
+ regex1 = /Ph:\s([0-9]{4} [0-9]{4})Email/
142
+ regex2 = /Ph:\s(08 [0-9]{4} [0-9]{4})Email/
143
+ regex3 = /Ph:\s([0-9]{4} [0-9]{3} [0-9]{3})Email/
144
+ regex4 = /Ph:\s(.+?)$/
145
+ regex5 = /Ph:\s(.+?)Email/m
146
+
147
+ text = @document.css("font[size='2']").text.strip
148
+ match1 = text.match(regex1)
149
+ match2 = text.match(regex2)
150
+ match3 = text.match(regex3)
151
+ match4 = text.match(regex4)
152
+ match5 = text.match(regex5)
153
+
154
+ if match1
155
+ match = match1[1].gsub(/\D/, '')
156
+ match.start_with?("08") ? match : "08"+match
157
+ elsif match2
158
+ match = match2[1].gsub(/\D/, '')
159
+ match.start_with?("08") ? match : "08"+match
160
+ elsif match3
161
+ match = match1[3].gsub(/\D/, '')
162
+ match3
163
+ elsif match4
164
+ match = match4[1].gsub(/\D/, '')
165
+ match.start_with?("08") ? match : "08"+match
166
+ elsif match5
167
+ match = match5[1].gsub(/\D/, '')
168
+ match.start_with?("08") ? match : "08"+match
169
+ else
170
+ nil
171
+ end
172
+ rescue NoMethodError
173
+ nil
174
+ end
175
+ end
176
+
177
+ private
178
+
179
+ def fetch_document
180
+ begin
181
+ request = open(@url)
182
+ @document = Nokogiri::HTML(request)
183
+ rescue => err
184
+ @logger.debug("Failed to connect to the url: #{@url}")
185
+ raise err
186
+ end
187
+ end
188
+
189
+ def test_document_for_validity
190
+ true
191
+ end
192
+
193
+ end # WebShow
194
+ end # WA
195
+ end # MyRepresentatives
@@ -0,0 +1,54 @@
1
+ module MyRepresentatives
2
+ module WA
3
+ class WebUpper
4
+ include MyRepresentatives::Abbreviatable
5
+ include MyRepresentatives::Guessable
6
+
7
+ attr_accessor :urls, :people
8
+
9
+ def initialize
10
+ @logger = Logger.new(STDOUT)
11
+ @urls = WebIndex.new.upper_urls
12
+ @people = []
13
+
14
+ @urls.each do |url|
15
+ document = WebShow.new(url)
16
+ create_person_from_document(document)
17
+ end
18
+ end
19
+
20
+ private
21
+
22
+ def create_person_from_document(document)
23
+ @logger.info("Attempting to create #{document.formal_name}")
24
+
25
+ # Setup the electorate
26
+ electorate_name = document.electorate_name
27
+ electorate = MyRepresentatives::Electorate.new(electorate_name)
28
+ electorate.state_wa_upper!
29
+
30
+ # Setup the Representative (Person)
31
+ person = MyRepresentatives::Person.new(electorate)
32
+ person.formal_name = document.formal_name
33
+ person.first_name = document.first_name
34
+ person.last_name = document.last_name
35
+ person.title = document.title
36
+ person.email = document.email
37
+ person.phone = document.phone
38
+ person.party_name = document.party_name
39
+ person.image_url = document.image_url
40
+ person.homepage_url = document.homepage_url
41
+ person.preferred_name = document.preferred_name
42
+ person.gender = guess_gender(document.formal_name)
43
+ person.physical_address = nil
44
+ person.postal_address = nil
45
+ person.honorifics = nil
46
+ person.salutation = nil
47
+
48
+ @people << person
49
+
50
+ end
51
+
52
+ end # WebUpper
53
+ end # WA
54
+ end # MyRepresentatives