statement 1.8.10 → 1.8.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6fbbdddce74592c0f23a2d06ce5c7f8be60da5c7
4
- data.tar.gz: 6b275d77a43cb0aafd2823dfac541db58f11a11a
3
+ metadata.gz: 6d7870c8335be4f10c96f6b9ca858ace626fd77a
4
+ data.tar.gz: c4742a7f24a2ab59291d4a4ba331c5f356f84889
5
5
  SHA512:
6
- metadata.gz: 281e41c06643a6bc3649760942082d24bae94c434c244f4ac596ea959cd33d8c2648731b4f91a0442b500da86feade7c766e4e6c6b13b343e9032c25a8a5b2b6
7
- data.tar.gz: 5c83f37e1253c17b22e7a6ca520048d82d1918db30fb0f50f592b8a2d82b606fe602fd9fe530d786f4427da10bd4937d1779631f6b2b9e9d6e79c01f7dc5ec80
6
+ metadata.gz: ac2b0fef268e2991a80fa4b7a5df1c51c7b30869a0dd966d7c673bf3f7b39879b16c66df0437e7aeda57fa3786428947236e6a6931f7fc67c9eb0e30e1f956e9
7
+ data.tar.gz: ef3c6a1d6fdc12ac29b862de24ffac76c697c5ee0e9aa486c0adafb79d9e8c00c1e932aa90fd4580c3b3c1310a1e8ed8a755d6745e5e81988efe59b435f24ef9
@@ -6,7 +6,7 @@ require 'nokogiri'
6
6
 
7
7
  module Statement
8
8
  class Scraper
9
-
9
+
10
10
  def self.open_html(url)
11
11
  begin
12
12
  Nokogiri::HTML(open(url).read)
@@ -14,47 +14,47 @@ module Statement
14
14
  nil
15
15
  end
16
16
  end
17
-
17
+
18
18
  def self.house_gop(url)
19
19
  doc = open_html(url)
20
20
  return unless doc
21
21
  uri = URI.parse(url)
22
22
  date = Date.parse(uri.query.split('=').last)
23
23
  links = doc.xpath("//ul[@id='membernews']").search('a')
24
- results = links.map do |link|
24
+ results = links.map do |link|
25
25
  abs_link = Utils.absolute_link(url, link["href"])
26
26
  { :source => url, :url => abs_link, :title => link.text.strip, :date => date, :domain => URI.parse(link["href"]).host }
27
27
  end
28
28
  Utils.remove_generic_urls!(results)
29
29
  end
30
-
30
+
31
31
  def self.member_methods
32
- [:capuano, :cold_fusion, :conaway, :chabot, :susandavis, :freshman_senators, :klobuchar, :billnelson, :lautenberg, :crapo, :coburn, :boxer, :vitter, :donnelly, :inhofe, :palazzo, :roe, :document_query, :swalwell, :fischer, :clark, :edwards, :culberson_chabot_grisham, :barton, :wolf_sherman_mccaul, :welch, :sessions, :gabbard, :ellison, :costa, :farr, :mcclintock, :mcnerney, :olson]
32
+ [:crenshaw, :capuano, :cold_fusion, :conaway, :chabot, :susandavis, :freshman_senators, :klobuchar, :billnelson, :lautenberg, :crapo, :coburn, :boxer, :vitter, :donnelly, :inhofe, :palazzo, :roe, :document_query, :swalwell, :fischer, :clark, :edwards, :culberson_chabot_grisham, :barton, :wolf_sherman_mccaul, :welch, :sessions, :gabbard, :ellison, :costa, :farr, :mcclintock, :mcnerney, :olson]
33
33
  end
34
-
34
+
35
35
  def self.committee_methods
36
36
  [:senate_approps_majority, :senate_approps_minority, :senate_banking, :senate_hsag_majority, :senate_hsag_minority, :senate_indian, :senate_aging, :senate_smallbiz_minority, :senate_intel, :house_energy_minority, :house_homeland_security_minority, :house_judiciary_majority, :house_rules_majority, :house_ways_means_majority]
37
37
  end
38
-
38
+
39
39
  def self.member_scrapers
40
40
  year = Date.today.year
41
- results = [capuano, cold_fusion(year, 0), conaway, chabot, susandavis, klobuchar(year), palazzo(page=1), roe(page=1), billnelson(year=year),
41
+ results = [crenshaw, capuano, cold_fusion(year, 0), conaway, chabot, susandavis, klobuchar(year), palazzo(page=1), roe(page=1), billnelson(year=year),
42
42
  document_query(page=1), document_query(page=2), swalwell(page=1), donnelly(year=year), crapo, coburn, boxer(start=1),
43
43
  vitter(year=year), inhofe(year=year), fischer, clark(year=year), edwards, culberson_chabot_grisham(page=1), barton, wolf_sherman_mccaul, welch,
44
44
  sessions(year=year), gabbard, pryor, ellison(page=0), costa, farr, mcclintock, olson, mcnerney].flatten
45
45
  results = results.compact
46
46
  Utils.remove_generic_urls!(results)
47
47
  end
48
-
48
+
49
49
  def self.backfill_from_scrapers
50
- results = [cold_fusion(2012, 0), cold_fusion(2011, 0), cold_fusion(2010, 0), billnelson(year=2012), document_query(page=3),
51
- document_query(page=4), coburn(year=2012), coburn(year=2011), coburn(year=2010), boxer(start=11), boxer(start=21),
50
+ results = [cold_fusion(2012, 0), cold_fusion(2011, 0), cold_fusion(2010, 0), billnelson(year=2012), document_query(page=3),
51
+ document_query(page=4), coburn(year=2012), coburn(year=2011), coburn(year=2010), boxer(start=11), boxer(start=21),
52
52
  boxer(start=31), boxer(start=41), vitter(year=2012), vitter(year=2011), swalwell(page=2), swalwell(page=3), clark(year=2013), culberson_chabot_grisham(page=2),
53
- wolf_sherman_mccaul(page=1), sessions(year=2013), pryor(page=1), ellison(page=1), ellison(page=2), ellison(page=3), farr(year=2013), farr(year=2012), farr(year=2011),
54
- mcnerney(page-2), mcnerney(page=3), mcnerney(page=4), mcnerney(page=5), mcnerney(page=6), olson(year=2013)].flatten
53
+ wolf_sherman_mccaul(page=1), sessions(year=2013), pryor(page=1), ellison(page=1), ellison(page=2), ellison(page=3), farr(year=2013), farr(year=2012), farr(year=2011),
54
+ mcnerney(page=2), mcnerney(page=3), mcnerney(page=4), mcnerney(page=5), mcnerney(page=6), olson(year=2013)].flatten
55
55
  Utils.remove_generic_urls!(results)
56
56
  end
57
-
57
+
58
58
  def self.committee_scrapers
59
59
  year = Date.today.year
60
60
  results = [senate_approps_majority, senate_approps_minority, senate_banking(year), senate_hsag_majority(year), senate_hsag_minority(year),
@@ -62,9 +62,9 @@ module Statement
62
62
  house_judiciary_majority, house_rules_majority, house_ways_means_majority].flatten
63
63
  Utils.remove_generic_urls!(results)
64
64
  end
65
-
65
+
66
66
  ## special cases for committees without RSS feeds
67
-
67
+
68
68
  def self.senate_approps_majority
69
69
  results = []
70
70
  url = "http://www.appropriations.senate.gov/news.cfm"
@@ -77,7 +77,7 @@ module Statement
77
77
  end
78
78
  results
79
79
  end
80
-
80
+
81
81
  def self.senate_approps_minority
82
82
  results = []
83
83
  url = "http://www.appropriations.senate.gov/republican.cfm"
@@ -90,7 +90,7 @@ module Statement
90
90
  end
91
91
  results
92
92
  end
93
-
93
+
94
94
  def self.senate_banking(year=Date.today.year)
95
95
  results = []
96
96
  url = "http://www.banking.senate.gov/public/index.cfm?FuseAction=Newsroom.PressReleases&ContentRecordType_id=b94acc28-404a-4fc6-b143-a9e15bf92da4&Region_id=&Issue_id=&MonthDisplay=0&YearDisplay=#{year}"
@@ -101,7 +101,7 @@ module Statement
101
101
  end
102
102
  results
103
103
  end
104
-
104
+
105
105
  def self.senate_hsag_majority(year=Date.today.year)
106
106
  results = []
107
107
  url = "http://www.hsgac.senate.gov/media/majority-media?year=#{year}"
@@ -113,7 +113,7 @@ module Statement
113
113
  end
114
114
  results
115
115
  end
116
-
116
+
117
117
  def self.senate_hsag_minority(year=Date.today.year)
118
118
  results = []
119
119
  url = "http://www.hsgac.senate.gov/media/minority-media?year=#{year}"
@@ -125,7 +125,7 @@ module Statement
125
125
  end
126
126
  results
127
127
  end
128
-
128
+
129
129
  def self.senate_indian
130
130
  results = []
131
131
  url = "http://www.indian.senate.gov/news/index.cfm"
@@ -136,7 +136,7 @@ module Statement
136
136
  end
137
137
  results
138
138
  end
139
-
139
+
140
140
  def self.senate_aging
141
141
  results = []
142
142
  url = "http://www.aging.senate.gov/pressroom.cfm?maxrows=100&startrow=1&&type=1"
@@ -147,18 +147,18 @@ module Statement
147
147
  end
148
148
  results
149
149
  end
150
-
150
+
151
151
  def self.senate_smallbiz_minority
152
152
  results = []
153
153
  url = "http://www.sbc.senate.gov/public/index.cfm?p=RepublicanPressRoom"
154
154
  doc = open_html(url)
155
- return if doc.nil?
155
+ return if doc.nil?
156
156
  doc.xpath("//ul[@class='recordList']").each do |row|
157
157
  results << { :source => url, :url => row.children[0].children[2].children[0]['href'], :title => row.children[0].children[2].children[0].text, :date => Date.parse(row.children[0].children[0].text), :domain => "http://www.sbc.senate.gov/", :party => 'minority' }
158
158
  end
159
159
  results
160
160
  end
161
-
161
+
162
162
  def self.senate_intel(congress=113, start_year=2013, end_year=2014)
163
163
  results = []
164
164
  url = "http://www.intelligence.senate.gov/press/releases.cfm?congress=#{congress}&y1=#{start_year}&y2=#{end_year}"
@@ -169,7 +169,7 @@ module Statement
169
169
  end
170
170
  results
171
171
  end
172
-
172
+
173
173
  def self.house_energy_minority
174
174
  results = []
175
175
  url = "http://democrats.energycommerce.house.gov/index.php?q=news-releases"
@@ -180,7 +180,7 @@ module Statement
180
180
  end
181
181
  results
182
182
  end
183
-
183
+
184
184
  def self.house_homeland_security_minority
185
185
  results = []
186
186
  url = "http://chsdemocrats.house.gov/press/index.asp?subsection=1"
@@ -191,7 +191,7 @@ module Statement
191
191
  end
192
192
  results
193
193
  end
194
-
194
+
195
195
  def self.house_judiciary_majority
196
196
  results = []
197
197
  url = "http://judiciary.house.gov/news/press2013.html"
@@ -203,7 +203,7 @@ module Statement
203
203
  end
204
204
  results
205
205
  end
206
-
206
+
207
207
  def self.house_rules_majority
208
208
  results = []
209
209
  url = "http://www.rules.house.gov/News/Default.aspx"
@@ -215,7 +215,7 @@ module Statement
215
215
  end
216
216
  results
217
217
  end
218
-
218
+
219
219
  def self.house_ways_means_majority
220
220
  results = []
221
221
  url = "http://waysandmeans.house.gov/news/documentquery.aspx?DocumentTypeID=1496"
@@ -227,9 +227,9 @@ module Statement
227
227
  end
228
228
  results
229
229
  end
230
-
230
+
231
231
  ## special cases for members without RSS feeds
232
-
232
+
233
233
  def self.swalwell(page=1)
234
234
  results = []
235
235
  url = "http://swalwell.house.gov/category/press-releases/page/#{page}/"
@@ -250,7 +250,7 @@ module Statement
250
250
  doc.xpath("//a").select{|l| !l['href'].nil? and l['href'].include?('/pr')}[1..-5].each do |link|
251
251
  begin
252
252
  year = link['href'].split('/').first
253
- date = Date.parse(link.text.split(' ').first+'/'+year)
253
+ date = Date.parse(link.text.split(' ').first+'/'+year)
254
254
  rescue
255
255
  date = nil
256
256
  end
@@ -258,17 +258,44 @@ module Statement
258
258
  end
259
259
  return results[0..-5]
260
260
  end
261
-
262
- def self.cold_fusion(year=Date.today.year, month=0)
261
+
262
+ def self.crenshaw(year=Date.today.year, month=nil)
263
263
  results = []
264
264
  year = Date.today.year if not year
265
- month = 0 if not month
266
- domains = ['crenshaw.house.gov', 'www.ronjohnson.senate.gov/public/','www.risch.senate.gov/public/']
265
+ domain = 'crenshaw.house.gov'
266
+ if month
267
+ url = "http://crenshaw.house.gov/index.cfm/pressreleases?YearDisplay=#{year}&MonthDisplay=#{month}&page=1"
268
+ else
269
+ url = "http://crenshaw.house.gov/index.cfm/pressreleases"
270
+ end
271
+ doc = Statement::Scraper.open_html(url)
272
+ return if doc.nil?
273
+ doc.xpath("//tr")[2..-1].each do |row|
274
+ date_text, title = row.children.map{|c| c.text.strip}.reject{|c| c.empty?}
275
+ next if date_text == 'Date' or date_text.size > 10
276
+ date = Date.parse(date_text)
277
+ results << { :source => url, :url => row.children[3].children.first['href'], :title => title, :date => date, :domain => domain }
278
+ end
279
+ results
280
+ end
281
+
282
+ def self.cold_fusion(year=Date.today.year, month=nil)
283
+ results = []
284
+ year = Date.today.year if not year
285
+ domains = ['www.ronjohnson.senate.gov/public/','www.risch.senate.gov/public/']
267
286
  domains.each do |domain|
268
- if domain == 'crenshaw.house.gov' or domain == 'www.risch.senate.gov/public/'
269
- url = "http://"+domain + "/index.cfm/pressreleases?YearDisplay=#{year}&MonthDisplay=#{month}&page=1"
287
+ if domain == 'www.risch.senate.gov/public/'
288
+ if not month
289
+ url = "http://www.risch.senate.gov/public/index.cfm/pressreleases"
290
+ else
291
+ url = "http://www.risch.senate.gov/public/index.cfm/pressreleases?YearDisplay=#{year}&MonthDisplay=#{month}&page=1"
292
+ end
270
293
  else
271
- url = "http://"+domain + "index.cfm/press-releases?YearDisplay=#{year}&MonthDisplay=#{month}&page=1"
294
+ if not month
295
+ url = "http://www.ronjohnson.senate.gov/public/index.cfm/press-releases"
296
+ else
297
+ url = "http://www.ronjohnson.senate.gov/public/index.cfm/press-releases?YearDisplay=#{year}&MonthDisplay=#{month}&page=1"
298
+ end
272
299
  end
273
300
  doc = Statement::Scraper.open_html(url)
274
301
  return if doc.nil?
@@ -281,7 +308,7 @@ module Statement
281
308
  end
282
309
  results.flatten
283
310
  end
284
-
311
+
285
312
  def self.conaway(page=1)
286
313
  results = []
287
314
  base_url = "http://conaway.house.gov/news/"
@@ -293,7 +320,7 @@ module Statement
293
320
  end
294
321
  results
295
322
  end
296
-
323
+
297
324
  def self.chabot(year=Date.today.year)
298
325
  results = []
299
326
  base_url = "http://chabot.house.gov/news/"
@@ -306,7 +333,7 @@ module Statement
306
333
  end
307
334
  results
308
335
  end
309
-
336
+
310
337
  def self.susandavis
311
338
  results = []
312
339
  base_url = "http://www.house.gov/susandavis/"
@@ -318,7 +345,7 @@ module Statement
318
345
  end
319
346
  results
320
347
  end
321
-
348
+
322
349
  def self.klobuchar(year)
323
350
  results = []
324
351
  base_url = "http://www.klobuchar.senate.gov/"
@@ -333,7 +360,7 @@ module Statement
333
360
  end
334
361
  results
335
362
  end
336
-
363
+
337
364
  def self.lujan
338
365
  results = []
339
366
  base_url = 'http://lujan.house.gov/'
@@ -345,7 +372,7 @@ module Statement
345
372
  end
346
373
  results
347
374
  end
348
-
375
+
349
376
  def self.billnelson(year=2013)
350
377
  results = []
351
378
  base_url = "http://www.billnelson.senate.gov/news/"
@@ -357,7 +384,7 @@ module Statement
357
384
  end
358
385
  results
359
386
  end
360
-
387
+
361
388
  # fetches the latest 1000 releases, can be altered
362
389
  def self.lautenberg(rows=1000)
363
390
  results = []
@@ -370,7 +397,7 @@ module Statement
370
397
  end
371
398
  results
372
399
  end
373
-
400
+
374
401
  def self.crapo
375
402
  results = []
376
403
  base_url = "http://www.crapo.senate.gov/media/newsreleases/"
@@ -394,7 +421,7 @@ module Statement
394
421
  end
395
422
  results
396
423
  end
397
-
424
+
398
425
  def self.coburn(year=Date.today.year)
399
426
  results = []
400
427
  url = "http://www.coburn.senate.gov/public/index.cfm?p=PressReleases&ContentType_id=d741b7a7-7863-4223-9904-8cb9378aa03a&Group_id=7a55cb96-4639-4dac-8c0c-99a4a227bd3a&MonthDisplay=0&YearDisplay=#{year}"
@@ -406,7 +433,7 @@ module Statement
406
433
  end
407
434
  results
408
435
  end
409
-
436
+
410
437
  def self.boxer(start=1)
411
438
  results = []
412
439
  url = "http://www.boxer.senate.gov/en/press/releases.cfm?start=#{start}"
@@ -418,7 +445,7 @@ module Statement
418
445
  end
419
446
  results
420
447
  end
421
-
448
+
422
449
  def self.vitter(year=Date.today.year)
423
450
  results = []
424
451
  url = "http://www.vitter.senate.gov/newsroom/"
@@ -431,7 +458,7 @@ module Statement
431
458
  end
432
459
  results
433
460
  end
434
-
461
+
435
462
  def self.donnelly(year=Date.today.year)
436
463
  results = []
437
464
  url = "http://www.donnelly.senate.gov/newsroom/"
@@ -444,7 +471,7 @@ module Statement
444
471
  end
445
472
  results
446
473
  end
447
-
474
+
448
475
  def self.inhofe(year=Date.today.year)
449
476
  results = []
450
477
  url = "http://www.inhofe.senate.gov/newsroom/press-releases?year=#{year}"
@@ -457,7 +484,7 @@ module Statement
457
484
  end
458
485
  results
459
486
  end
460
-
487
+
461
488
  def self.palazzo(page=1)
462
489
  results = []
463
490
  domain = "palazzo.house.gov"
@@ -598,7 +625,7 @@ module Statement
598
625
  doc = open_html(url)
599
626
  return if doc.nil?
600
627
  doc.css('ul.fc_leading li').each do |row|
601
- results << {:source => url, :url => "http://gabbard.house.gov"+row.children[0].children[1]['href'], :title => row.children[0].children[1].text.strip, :date => Date.parse(row.children[2].text), :domain => domain}
628
+ results << {:source => url, :url => "http://gabbard.house.gov"+row.children[0].children[1]['href'], :title => row.children[0].children[1].text.strip, :date => Date.parse(row.children[2].text), :domain => domain}
602
629
  end
603
630
  results
604
631
  end
@@ -692,6 +719,26 @@ module Statement
692
719
  end
693
720
  results.flatten
694
721
  end
695
-
722
+
723
+ def self.backfill_bilirakis
724
+ results = []
725
+ domain = 'bilirakis.house.gov'
726
+ url = 'http://bilirakis.house.gov/press-releases/'
727
+ doc = open_html(url)
728
+ return if doc.nil?
729
+ doc.css("ul li[@class='article articleright']").each do |row|
730
+ results << {:source => url, :url => 'http://bilirakis.house.gov' + row.children[3].children[1]['href'], :title => row.children[3].text.strip, :date => Date.parse(row.children[5].text), :domain => domain }
731
+ end
732
+ end
733
+
734
+ def self.backfill_boustany
735
+ results = []
736
+ domain = 'boustany.house.gov'
737
+ url = 'http://boustany.house.gov/113th-congress/showallitems/'
738
+ doc = open_html(url)
739
+ return if doc.nil?
740
+
741
+ end
742
+
696
743
  end
697
- end
744
+ end
@@ -1,3 +1,3 @@
1
1
  module Statement
2
- VERSION = "1.8.10"
2
+ VERSION = "1.8.11"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: statement
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.8.10
4
+ version: 1.8.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Derek Willis
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-24 00:00:00.000000000 Z
11
+ date: 2015-01-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler