my_representatives 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +4 -0
  3. data/Gemfile +3 -0
  4. data/LICENSE.md +8 -0
  5. data/README.md +60 -0
  6. data/lib/my_representatives/abbreviatable.rb +29 -0
  7. data/lib/my_representatives/act/csv_lower.rb +118 -0
  8. data/lib/my_representatives/act/merge_lower.rb +38 -0
  9. data/lib/my_representatives/act/web_index.rb +37 -0
  10. data/lib/my_representatives/act/web_index_row.rb +86 -0
  11. data/lib/my_representatives/act/web_lower.rb +65 -0
  12. data/lib/my_representatives/act/web_show.rb +46 -0
  13. data/lib/my_representatives/commonwealth/commonwealth.rb +5 -0
  14. data/lib/my_representatives/commonwealth/csv_email.rb +93 -0
  15. data/lib/my_representatives/commonwealth/csv_lower.rb +130 -0
  16. data/lib/my_representatives/commonwealth/csv_upper.rb +129 -0
  17. data/lib/my_representatives/commonwealth/merge_lower.rb +54 -0
  18. data/lib/my_representatives/commonwealth/merge_upper.rb +54 -0
  19. data/lib/my_representatives/commonwealth/web_index.rb +57 -0
  20. data/lib/my_representatives/commonwealth/web_lower.rb +55 -0
  21. data/lib/my_representatives/commonwealth/web_show.rb +99 -0
  22. data/lib/my_representatives/commonwealth/web_upper.rb +54 -0
  23. data/lib/my_representatives/electorate.rb +146 -0
  24. data/lib/my_representatives/errors.rb +7 -0
  25. data/lib/my_representatives/fileable.rb +11 -0
  26. data/lib/my_representatives/guessable.rb +120 -0
  27. data/lib/my_representatives/hashable.rb +11 -0
  28. data/lib/my_representatives/nsw/csv_lower.rb +115 -0
  29. data/lib/my_representatives/nsw/csv_upper.rb +115 -0
  30. data/lib/my_representatives/nsw/merge_lower.rb +41 -0
  31. data/lib/my_representatives/nsw/merge_upper.rb +41 -0
  32. data/lib/my_representatives/nsw/web_index.rb +56 -0
  33. data/lib/my_representatives/nsw/web_lower.rb +54 -0
  34. data/lib/my_representatives/nsw/web_show.rb +140 -0
  35. data/lib/my_representatives/nsw/web_upper.rb +54 -0
  36. data/lib/my_representatives/nt/web_index.rb +46 -0
  37. data/lib/my_representatives/nt/web_lower.rb +54 -0
  38. data/lib/my_representatives/nt/web_show.rb +134 -0
  39. data/lib/my_representatives/person.rb +197 -0
  40. data/lib/my_representatives/qld/csv_lower.rb +117 -0
  41. data/lib/my_representatives/qld/merge_lower.rb +37 -0
  42. data/lib/my_representatives/qld/web_index.rb +47 -0
  43. data/lib/my_representatives/qld/web_lower.rb +54 -0
  44. data/lib/my_representatives/qld/web_show.rb +146 -0
  45. data/lib/my_representatives/sa/csv_lower.rb +120 -0
  46. data/lib/my_representatives/sa/csv_upper.rb +115 -0
  47. data/lib/my_representatives/sa/merge_lower.rb +39 -0
  48. data/lib/my_representatives/sa/merge_upper.rb +39 -0
  49. data/lib/my_representatives/sa/web_index_lower.rb +42 -0
  50. data/lib/my_representatives/sa/web_index_upper.rb +43 -0
  51. data/lib/my_representatives/sa/web_lower.rb +54 -0
  52. data/lib/my_representatives/sa/web_show.rb +158 -0
  53. data/lib/my_representatives/sa/web_upper.rb +54 -0
  54. data/lib/my_representatives/static.rb +5 -0
  55. data/lib/my_representatives/tas/csv_lower.rb +125 -0
  56. data/lib/my_representatives/tas/csv_upper.rb +125 -0
  57. data/lib/my_representatives/version.rb +5 -0
  58. data/lib/my_representatives/vic/csv_lower.rb +99 -0
  59. data/lib/my_representatives/vic/csv_upper.rb +97 -0
  60. data/lib/my_representatives/vic/merge_lower.rb +37 -0
  61. data/lib/my_representatives/vic/merge_upper.rb +37 -0
  62. data/lib/my_representatives/vic/web_index.rb +58 -0
  63. data/lib/my_representatives/vic/web_lower.rb +54 -0
  64. data/lib/my_representatives/vic/web_show.rb +118 -0
  65. data/lib/my_representatives/vic/web_upper.rb +54 -0
  66. data/lib/my_representatives/wa/csv_lower.rb +144 -0
  67. data/lib/my_representatives/wa/csv_upper.rb +131 -0
  68. data/lib/my_representatives/wa/merge_lower.rb +41 -0
  69. data/lib/my_representatives/wa/merge_upper.rb +37 -0
  70. data/lib/my_representatives/wa/web_index.rb +45 -0
  71. data/lib/my_representatives/wa/web_lower.rb +54 -0
  72. data/lib/my_representatives/wa/web_show.rb +195 -0
  73. data/lib/my_representatives/wa/web_upper.rb +54 -0
  74. data/lib/my_representatives.rb +150 -0
  75. data/my_representatives.gemspec +25 -0
  76. metadata +204 -0
@@ -0,0 +1,54 @@
1
+ module MyRepresentatives
2
+ module NSW
3
+ class WebLower
4
+ include MyRepresentatives::Abbreviatable
5
+ include MyRepresentatives::Guessable
6
+
7
+ attr_accessor :urls, :people
8
+
9
+ def initialize
10
+ @logger = Logger.new(STDOUT)
11
+ @urls = WebIndex.new.lower_urls
12
+ @people = []
13
+
14
+ @urls.each do |url|
15
+ document = WebShow.new(url)
16
+ create_person_from_document(document)
17
+ end
18
+ end
19
+
20
+ private
21
+
22
+ def create_person_from_document(document)
23
+ @logger.info("Attempting to create #{document.formal_name}")
24
+
25
+ # Setup the electorate
26
+ electorate_name = document.electorate_name
27
+ electorate = Electorate.new(electorate_name)
28
+ electorate.state_nsw_lower!
29
+
30
+ # Setup the Representative (Person)
31
+ person = MyRepresentatives::Person.new(electorate)
32
+ person.email = document.email
33
+ person.phone = document.phone
34
+ person.party_name = document.party_name
35
+ person.image_url = document.image_url
36
+ person.homepage_url = document.homepage_url
37
+ person.formal_name = document.formal_name
38
+ person.first_name = document.first_name
39
+ person.last_name = document.last_name
40
+ person.preferred_name = document.preferred_name
41
+ person.title = nil
42
+ person.physical_address = nil
43
+ person.postal_address = nil
44
+ person.honorifics = nil
45
+ person.gender = nil
46
+ person.salutation = nil
47
+
48
+ @people << person
49
+
50
+ end
51
+
52
+ end # WebLower
53
+ end # NSW
54
+ end # MyRepresentatives
@@ -0,0 +1,140 @@
1
+ module MyRepresentatives
2
+ module NSW
3
+ class WebShow
4
+ include MyRepresentatives::Guessable
5
+
6
+ attr_accessor :url, :document, :logger
7
+
8
+ def initialize(url)
9
+ @logger = Logger.new(STDOUT)
10
+ raise MyRepresentatives::InvalidURLError unless url && url.is_a?(String)
11
+ @url = url
12
+ fetch_document
13
+ raise MyRepresentatives::NokogiriDocumentPropertiesError unless test_document_for_validity
14
+ end
15
+
16
+ def homepage_url
17
+ @url
18
+ end
19
+
20
+ def formal_name
21
+ begin
22
+ unformatted_name.split(",")[0].titleize
23
+ rescue NoMethodError
24
+ nil
25
+ end
26
+ end
27
+
28
+ def first_name
29
+ begin
30
+ guess_first(formal_name)
31
+ rescue NoMethodError
32
+ nil
33
+ end
34
+ end
35
+
36
+ def preferred_name
37
+ begin
38
+ guess_preferred(formal_name)
39
+ rescue NoMethodError
40
+ nil
41
+ end
42
+ end
43
+
44
+ def last_name
45
+ begin
46
+ guess_last(formal_name)
47
+ rescue NoMethodError
48
+ nil
49
+ end
50
+ end
51
+
52
+ def party_name
53
+ begin
54
+ rescue NoMethodError
55
+ nil
56
+ end
57
+ end
58
+
59
+ def phone
60
+ begin
61
+ phone = @document.css(".contact_office").css("th.label_cell")
62
+ index = phone.find_index { |p| p.text == "P" }
63
+
64
+ if index
65
+ phone[index].next_element.text
66
+ else
67
+ nil
68
+ end
69
+ rescue NoMethodError
70
+ nil
71
+ end
72
+ end
73
+
74
+ def email
75
+ begin
76
+ email = @document.css(".contact_office").at('a:contains("@parliament.nsw.gov.au")')
77
+ if email.is_a?(Array)
78
+ email.first.attr('href').strip.gsub("mailto:","")
79
+ else
80
+ email.attr('href').strip.gsub("mailto:","")
81
+ end
82
+ rescue NoMethodError
83
+ nil
84
+ end
85
+ end
86
+
87
+ def electorate_name
88
+ begin
89
+ electorate = @document.css(".office").css("div.title")[0].text.strip
90
+ if electorate.include?("Electorate Office")
91
+ electorate.gsub!(" Electorate Office","")
92
+ else
93
+ "All NSW"
94
+ end
95
+ rescue NoMethodError
96
+ nil
97
+ end
98
+ end
99
+
100
+ def image_url
101
+ begin
102
+ "https://www.parliament.nsw.gov.au" + @document.css("div.img_position").css("img").attr('src').value.strip
103
+ rescue NoMethodError
104
+ nil
105
+ end
106
+ end
107
+
108
+ private
109
+
110
+ def fetch_document
111
+ begin
112
+ request = open(@url)
113
+ @document = Nokogiri::HTML(request)
114
+ rescue => err
115
+ @logger.debug("Failed to connect to the url: #{@url}")
116
+ raise err
117
+ end
118
+ end
119
+
120
+ def test_document_for_validity
121
+ true
122
+ # if @document && @document.css("div.section") && @document.css("div.section").css("img")
123
+ # true
124
+ # else
125
+ # @logger.debug("Failed to find a valid document to scrape")
126
+ # false
127
+ # end
128
+ end
129
+
130
+ def unformatted_name
131
+ begin
132
+ @document.css("h1.page-title").text.strip
133
+ rescue NoMethodError
134
+ nil
135
+ end
136
+ end
137
+
138
+ end # WebShow
139
+ end # NSW
140
+ end # MyRepresentatives
@@ -0,0 +1,54 @@
1
+ module MyRepresentatives
2
+ module NSW
3
+ class WebUpper
4
+ include MyRepresentatives::Abbreviatable
5
+ include MyRepresentatives::Guessable
6
+
7
+ attr_accessor :urls, :people
8
+
9
+ def initialize
10
+ @logger = Logger.new(STDOUT)
11
+ @urls = WebIndex.new.upper_urls
12
+ @people = []
13
+
14
+ @urls.each do |url|
15
+ document = WebShow.new(url)
16
+ create_person_from_document(document)
17
+ end
18
+ end
19
+
20
+ private
21
+
22
+ def create_person_from_document(document)
23
+ @logger.info("Attempting to create #{document.formal_name}")
24
+
25
+ # Setup the electorate
26
+ electorate_name = document.electorate_name
27
+ electorate = Electorate.new(electorate_name)
28
+ electorate.state_nsw_upper!
29
+
30
+ # Setup the Representative (Person)
31
+ person = MyRepresentatives::Person.new(electorate)
32
+ person.email = document.email
33
+ person.phone = document.phone
34
+ person.party_name = document.party_name
35
+ person.image_url = document.image_url
36
+ person.homepage_url = document.homepage_url
37
+ person.formal_name = document.formal_name
38
+ person.first_name = document.first_name
39
+ person.last_name = document.last_name
40
+ person.preferred_name = document.preferred_name
41
+ person.title = nil
42
+ person.physical_address = nil
43
+ person.postal_address = nil
44
+ person.honorifics = nil
45
+ person.gender = nil
46
+ person.salutation = nil
47
+
48
+ @people << person
49
+
50
+ end
51
+
52
+ end # WebUpper
53
+ end # NSW
54
+ end # MyRepresentatives
@@ -0,0 +1,46 @@
1
+ module MyRepresentatives
2
+ module NT
3
+ class WebIndex
4
+ attr_accessor :index_url, :document, :lower_urls
5
+
6
+ def initialize
7
+ @logger = Logger.new(STDOUT)
8
+ @lower_urls = []
9
+
10
+ # Lower House
11
+ @index_url = "https://parliament.nt.gov.au/members-of-parliament/members-by-name"
12
+ @document = find_representatives
13
+ representative_urls_from_document
14
+
15
+ end
16
+
17
+
18
+ private
19
+
20
+ def find_representatives
21
+ begin
22
+ request = open(@index_url)
23
+ Nokogiri::HTML(request)
24
+ rescue SocketError => err
25
+ @logger.debug("Unable to connect to #{@index_url}")
26
+ raise err
27
+ end
28
+ end
29
+
30
+ def representative_urls_from_document
31
+ begin
32
+ representatives = @document.css(".content-body").css("h3")
33
+ rescue
34
+ end
35
+
36
+ representatives.each do |row|
37
+ begin
38
+ @lower_urls << row.css("a").attr("href").value.strip
39
+ rescue
40
+ end
41
+ end
42
+ end
43
+
44
+ end # WebIndex
45
+ end # NT
46
+ end # MyRepresentatives
@@ -0,0 +1,54 @@
1
+ module MyRepresentatives
2
+ module NT
3
+ class WebLower
4
+ include MyRepresentatives::Abbreviatable
5
+ include MyRepresentatives::Guessable
6
+
7
+ attr_accessor :urls, :people
8
+
9
+ def initialize
10
+ @logger = Logger.new(STDOUT)
11
+ @urls = WebIndex.new.lower_urls
12
+ @people = []
13
+
14
+ @urls.each do |url|
15
+ document = WebShow.new(url)
16
+ create_person_from_document(document)
17
+ end
18
+ end
19
+
20
+ private
21
+
22
+ def create_person_from_document(document)
23
+ @logger.info("Attempting to create #{document.formal_name}")
24
+
25
+ # Setup the electorate
26
+ electorate_name = document.electorate_name
27
+ electorate = Electorate.new(electorate_name)
28
+ electorate.state_nt_lower!
29
+
30
+ # Setup the Representative (Person)
31
+ person = MyRepresentatives::Person.new(electorate)
32
+ person.email = document.email
33
+ person.phone = document.phone
34
+ person.party_name = document.party_name
35
+ person.image_url = document.image_url
36
+ person.homepage_url = document.homepage_url
37
+ person.formal_name = document.formal_name
38
+ person.first_name = document.first_name
39
+ person.last_name = document.last_name
40
+ person.preferred_name = document.preferred_name
41
+ person.title = document.title
42
+ person.physical_address = document.physical_address
43
+ person.postal_address = document.postal_address
44
+ person.gender = guess_gender(document.title)
45
+ person.honorifics = nil
46
+ person.salutation = nil
47
+
48
+ @people << person
49
+
50
+ end
51
+
52
+ end # WebLower
53
+ end # NT
54
+ end # MyRepresentatives
@@ -0,0 +1,134 @@
1
+ module MyRepresentatives
2
+ module NT
3
+ class WebShow
4
+ include MyRepresentatives::Guessable
5
+
6
+ attr_accessor :url, :document, :logger
7
+
8
+ def initialize(url)
9
+ @logger = Logger.new(STDOUT)
10
+ raise MyRepresentatives::InvalidURLError unless url && url.is_a?(String)
11
+ @url = url
12
+ fetch_document
13
+ raise MyRepresentatives::NokogiriDocumentPropertiesError unless test_document_for_validity
14
+ end
15
+
16
+ def homepage_url
17
+ @url
18
+ end
19
+
20
+ def formal_name
21
+ begin
22
+ @document.css("h1#main").text
23
+ rescue NoMethodError
24
+ nil
25
+ end
26
+ end
27
+
28
+ def title
29
+ begin
30
+ guess_title(formal_name)
31
+ rescue NoMethodError
32
+ nil
33
+ end
34
+ end
35
+
36
+ def first_name
37
+ begin
38
+ guess_first(formal_name)
39
+ rescue NoMethodError
40
+ nil
41
+ end
42
+ end
43
+
44
+ def preferred_name
45
+ begin
46
+ guess_preferred(formal_name)
47
+ rescue NoMethodError
48
+ nil
49
+ end
50
+ end
51
+
52
+ def last_name
53
+ begin
54
+ guess_last(formal_name)
55
+ rescue NoMethodError
56
+ nil
57
+ end
58
+ end
59
+
60
+ def phone
61
+ begin
62
+ @document.css("table").at('th:contains("Phone (business hours)")').next_element.text.strip.gsub("+61 ","")
63
+ rescue NoMethodError
64
+ nil
65
+ end
66
+ end
67
+
68
+ def email
69
+ begin
70
+ @document.css("table").at('th:contains("Electorate office")').next_element.text.strip
71
+ rescue NoMethodError
72
+ nil
73
+ end
74
+ end
75
+
76
+ def electorate_name
77
+ begin
78
+ @document.css("table").at('th:contains("Electorate")').next_element.text.strip
79
+ rescue NoMethodError
80
+ nil
81
+ end
82
+ end
83
+
84
+ def party_name
85
+ begin
86
+ @document.css("table").at('th:contains("Party")').next_element.text.strip
87
+ rescue NoMethodError
88
+ nil
89
+ end
90
+ end
91
+
92
+ def physical_address
93
+ begin
94
+ @document.css("table").at('th:contains("Address")').next_element.text.strip.gsub(", ","\n")
95
+ rescue NoMethodError
96
+ nil
97
+ end
98
+ end
99
+
100
+ def postal_address
101
+ begin
102
+ @document.css("table").at('th:contains("Postal address")').next_element.text.strip.gsub(", ","\n")
103
+ rescue NoMethodError
104
+ nil
105
+ end
106
+ end
107
+
108
+ def image_url
109
+ begin
110
+ @document.css("div.thumbnail").css("img").attr('src').value
111
+ rescue NoMethodError
112
+ nil
113
+ end
114
+ end
115
+
116
+ private
117
+
118
+ def fetch_document
119
+ begin
120
+ request = open(@url)
121
+ @document = Nokogiri::HTML(request)
122
+ rescue => err
123
+ @logger.debug("Failed to connect to the url: #{@url}")
124
+ raise err
125
+ end
126
+ end
127
+
128
+ def test_document_for_validity
129
+ true
130
+ end
131
+
132
+ end # WebShow
133
+ end # NT
134
+ end # MyRepresentatives
@@ -0,0 +1,197 @@
1
+ require 'active_support'
2
+ require 'active_support/inflector'
3
+
4
+ module MyRepresentatives
5
+ class Person
6
+ class Error < StandardError; end
7
+ class IsNotAnElectorateObjectError < Error; end
8
+
9
+ include MyRepresentatives::Hashable
10
+
11
+ attr_accessor :electorate, :successful_merge
12
+ attr_reader :formal_name, :first_name, :last_name, :preferred_name, :salutation, :title, :honorifics, :gender, :email, :phone, :physical_address, :postal_address, :image_url, :homepage_url, :party_name
13
+
14
+ def initialize(electorate)
15
+ raise IsNotAnElectorateObjectError unless electorate && electorate.is_a?(Electorate)
16
+ @electorate = electorate
17
+ @successful_merge = false
18
+ end
19
+
20
+ def to_hash
21
+ super
22
+ end
23
+
24
+ def formal_name=(value)
25
+ @formal_name = if value
26
+ if value.strip.empty?
27
+ nil
28
+ else
29
+ value.strip.gsub(/\s{2,}/," ")
30
+ end
31
+ else
32
+ nil
33
+ end
34
+ end
35
+
36
+ def first_name=(value)
37
+ @first_name = if value
38
+ value.strip
39
+ else
40
+ nil
41
+ end
42
+ end
43
+
44
+ def last_name=(value)
45
+ @last_name = if value
46
+ value.strip.gsub(" MP", "").gsub(" MLA", "").gsub(" MLC", "").gsub(" QC", "").gsub(" CSC", "").gsub(" AO", "").gsub(" AOM", "")
47
+ else
48
+ nil
49
+ end
50
+ end
51
+
52
+ def preferred_name=(value)
53
+ @preferred_name = if value
54
+ value.strip
55
+ else
56
+ nil
57
+ end
58
+ end
59
+
60
+ def salutation=(value)
61
+ @salutation = if value
62
+ value.strip
63
+ else
64
+ nil
65
+ end
66
+ end
67
+
68
+ def title=(value)
69
+ @title = if value
70
+ if value.strip.empty?
71
+ nil
72
+ else
73
+ value.strip
74
+ end
75
+ else
76
+ nil
77
+ end
78
+ end
79
+
80
+ def honorifics=(value)
81
+ @honorifics = if value
82
+ value.strip
83
+ else
84
+ nil
85
+ end
86
+ end
87
+
88
+ def gender=(value)
89
+ @gender = if value
90
+ value.strip.titleize
91
+ else
92
+ nil
93
+ end
94
+ end
95
+
96
+ def email=(value)
97
+ @email = if value
98
+ value.downcase.strip.split(" ")[0]
99
+ else
100
+ nil
101
+ end
102
+ end
103
+
104
+ def phone=(value)
105
+ @phone = if value
106
+ value.strip.gsub(/\D/, '')
107
+ else
108
+ nil
109
+ end
110
+ end
111
+
112
+ def physical_address=(value)
113
+ @physical_address = if value && !value.upcase.include?("PO BOX")
114
+ if value.strip.empty?
115
+ nil
116
+ else
117
+ value.strip.gsub(/[ ]{2,}/," ").gsub("\n\n","\n").gsub("\n ","\n")
118
+ end
119
+ else
120
+ nil
121
+ end
122
+ end
123
+
124
+ def postal_address=(value)
125
+ @postal_address = if value
126
+ if value.strip.empty?
127
+ nil
128
+ else
129
+ value.strip.gsub(/[ ]{2,}/," ").gsub("\n\n","\n").gsub("\n ","\n")
130
+ end
131
+ else
132
+ nil
133
+ end
134
+ end
135
+
136
+ def image_url=(value)
137
+ @image_url = if value
138
+ value.strip
139
+ else
140
+ nil
141
+ end
142
+ end
143
+
144
+ def homepage_url=(value)
145
+ @homepage_url = if value
146
+ value.strip
147
+ else
148
+ nil
149
+ end
150
+ end
151
+
152
+ def party_name=(value)
153
+ @party_name = if value
154
+ if value.upcase == "LP" || value.upcase == "LIB"
155
+ "Liberal Party"
156
+ elsif value.upcase == "ALP" || value == "Australian Labor Party"
157
+ "Australian Labor Party"
158
+ elsif value.upcase == "AG"
159
+ "Australian Greens"
160
+ elsif value.upcase == "NATS" || value.upcase == "NAT"
161
+ "The Nationals"
162
+ elsif value.upcase == "AUS" || value.upcase == "KAP"
163
+ "Katter's Australian Party"
164
+ elsif value.upcase.start_with?("IND")
165
+ "Independent"
166
+ elsif value.upcase == "NXT"
167
+ "Nick Xenophon Team"
168
+ elsif value.upcase == "JLN"
169
+ "Jacqui Lambie Network"
170
+ elsif value.upcase == "AC"
171
+ "Australan Conservatives"
172
+ elsif value.upcase == "PHON" || value.upcase == "ONP"
173
+ "Pauline Hanson's One Nation"
174
+ elsif value.upcase == "DJHP"
175
+ "Derryn Hinch's Justice Party"
176
+ elsif value.upcase == "LDP"
177
+ "Liberal Democrats"
178
+ elsif value.upcase == "CLP"
179
+ "Country Liberal Party"
180
+ elsif value.upcase == "LNP"
181
+ "Liberal National Party"
182
+ elsif value.upcase == "GWA"
183
+ "Greens Western Australia"
184
+ elsif value.upcase == "SFF"
185
+ "Shooters, Fisher & Farmers Party"
186
+ elsif value.upcase == "NPA"
187
+ "Greens Western Australia"
188
+ else
189
+ value.strip
190
+ end
191
+ else
192
+ nil
193
+ end
194
+ end
195
+
196
+ end
197
+ end