statement 1.8.10 → 1.8.11

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6fbbdddce74592c0f23a2d06ce5c7f8be60da5c7
4
- data.tar.gz: 6b275d77a43cb0aafd2823dfac541db58f11a11a
3
+ metadata.gz: 6d7870c8335be4f10c96f6b9ca858ace626fd77a
4
+ data.tar.gz: c4742a7f24a2ab59291d4a4ba331c5f356f84889
5
5
  SHA512:
6
- metadata.gz: 281e41c06643a6bc3649760942082d24bae94c434c244f4ac596ea959cd33d8c2648731b4f91a0442b500da86feade7c766e4e6c6b13b343e9032c25a8a5b2b6
7
- data.tar.gz: 5c83f37e1253c17b22e7a6ca520048d82d1918db30fb0f50f592b8a2d82b606fe602fd9fe530d786f4427da10bd4937d1779631f6b2b9e9d6e79c01f7dc5ec80
6
+ metadata.gz: ac2b0fef268e2991a80fa4b7a5df1c51c7b30869a0dd966d7c673bf3f7b39879b16c66df0437e7aeda57fa3786428947236e6a6931f7fc67c9eb0e30e1f956e9
7
+ data.tar.gz: ef3c6a1d6fdc12ac29b862de24ffac76c697c5ee0e9aa486c0adafb79d9e8c00c1e932aa90fd4580c3b3c1310a1e8ed8a755d6745e5e81988efe59b435f24ef9
@@ -6,7 +6,7 @@ require 'nokogiri'
6
6
 
7
7
  module Statement
8
8
  class Scraper
9
-
9
+
10
10
  def self.open_html(url)
11
11
  begin
12
12
  Nokogiri::HTML(open(url).read)
@@ -14,47 +14,47 @@ module Statement
14
14
  nil
15
15
  end
16
16
  end
17
-
17
+
18
18
  def self.house_gop(url)
19
19
  doc = open_html(url)
20
20
  return unless doc
21
21
  uri = URI.parse(url)
22
22
  date = Date.parse(uri.query.split('=').last)
23
23
  links = doc.xpath("//ul[@id='membernews']").search('a')
24
- results = links.map do |link|
24
+ results = links.map do |link|
25
25
  abs_link = Utils.absolute_link(url, link["href"])
26
26
  { :source => url, :url => abs_link, :title => link.text.strip, :date => date, :domain => URI.parse(link["href"]).host }
27
27
  end
28
28
  Utils.remove_generic_urls!(results)
29
29
  end
30
-
30
+
31
31
  def self.member_methods
32
- [:capuano, :cold_fusion, :conaway, :chabot, :susandavis, :freshman_senators, :klobuchar, :billnelson, :lautenberg, :crapo, :coburn, :boxer, :vitter, :donnelly, :inhofe, :palazzo, :roe, :document_query, :swalwell, :fischer, :clark, :edwards, :culberson_chabot_grisham, :barton, :wolf_sherman_mccaul, :welch, :sessions, :gabbard, :ellison, :costa, :farr, :mcclintock, :mcnerney, :olson]
32
+ [:crenshaw, :capuano, :cold_fusion, :conaway, :chabot, :susandavis, :freshman_senators, :klobuchar, :billnelson, :lautenberg, :crapo, :coburn, :boxer, :vitter, :donnelly, :inhofe, :palazzo, :roe, :document_query, :swalwell, :fischer, :clark, :edwards, :culberson_chabot_grisham, :barton, :wolf_sherman_mccaul, :welch, :sessions, :gabbard, :ellison, :costa, :farr, :mcclintock, :mcnerney, :olson]
33
33
  end
34
-
34
+
35
35
  def self.committee_methods
36
36
  [:senate_approps_majority, :senate_approps_minority, :senate_banking, :senate_hsag_majority, :senate_hsag_minority, :senate_indian, :senate_aging, :senate_smallbiz_minority, :senate_intel, :house_energy_minority, :house_homeland_security_minority, :house_judiciary_majority, :house_rules_majority, :house_ways_means_majority]
37
37
  end
38
-
38
+
39
39
  def self.member_scrapers
40
40
  year = Date.today.year
41
- results = [capuano, cold_fusion(year, 0), conaway, chabot, susandavis, klobuchar(year), palazzo(page=1), roe(page=1), billnelson(year=year),
41
+ results = [crenshaw, capuano, cold_fusion(year, 0), conaway, chabot, susandavis, klobuchar(year), palazzo(page=1), roe(page=1), billnelson(year=year),
42
42
  document_query(page=1), document_query(page=2), swalwell(page=1), donnelly(year=year), crapo, coburn, boxer(start=1),
43
43
  vitter(year=year), inhofe(year=year), fischer, clark(year=year), edwards, culberson_chabot_grisham(page=1), barton, wolf_sherman_mccaul, welch,
44
44
  sessions(year=year), gabbard, pryor, ellison(page=0), costa, farr, mcclintock, olson, mcnerney].flatten
45
45
  results = results.compact
46
46
  Utils.remove_generic_urls!(results)
47
47
  end
48
-
48
+
49
49
  def self.backfill_from_scrapers
50
- results = [cold_fusion(2012, 0), cold_fusion(2011, 0), cold_fusion(2010, 0), billnelson(year=2012), document_query(page=3),
51
- document_query(page=4), coburn(year=2012), coburn(year=2011), coburn(year=2010), boxer(start=11), boxer(start=21),
50
+ results = [cold_fusion(2012, 0), cold_fusion(2011, 0), cold_fusion(2010, 0), billnelson(year=2012), document_query(page=3),
51
+ document_query(page=4), coburn(year=2012), coburn(year=2011), coburn(year=2010), boxer(start=11), boxer(start=21),
52
52
  boxer(start=31), boxer(start=41), vitter(year=2012), vitter(year=2011), swalwell(page=2), swalwell(page=3), clark(year=2013), culberson_chabot_grisham(page=2),
53
- wolf_sherman_mccaul(page=1), sessions(year=2013), pryor(page=1), ellison(page=1), ellison(page=2), ellison(page=3), farr(year=2013), farr(year=2012), farr(year=2011),
54
- mcnerney(page-2), mcnerney(page=3), mcnerney(page=4), mcnerney(page=5), mcnerney(page=6), olson(year=2013)].flatten
53
+ wolf_sherman_mccaul(page=1), sessions(year=2013), pryor(page=1), ellison(page=1), ellison(page=2), ellison(page=3), farr(year=2013), farr(year=2012), farr(year=2011),
54
+ mcnerney(page=2), mcnerney(page=3), mcnerney(page=4), mcnerney(page=5), mcnerney(page=6), olson(year=2013)].flatten
55
55
  Utils.remove_generic_urls!(results)
56
56
  end
57
-
57
+
58
58
  def self.committee_scrapers
59
59
  year = Date.today.year
60
60
  results = [senate_approps_majority, senate_approps_minority, senate_banking(year), senate_hsag_majority(year), senate_hsag_minority(year),
@@ -62,9 +62,9 @@ module Statement
62
62
  house_judiciary_majority, house_rules_majority, house_ways_means_majority].flatten
63
63
  Utils.remove_generic_urls!(results)
64
64
  end
65
-
65
+
66
66
  ## special cases for committees without RSS feeds
67
-
67
+
68
68
  def self.senate_approps_majority
69
69
  results = []
70
70
  url = "http://www.appropriations.senate.gov/news.cfm"
@@ -77,7 +77,7 @@ module Statement
77
77
  end
78
78
  results
79
79
  end
80
-
80
+
81
81
  def self.senate_approps_minority
82
82
  results = []
83
83
  url = "http://www.appropriations.senate.gov/republican.cfm"
@@ -90,7 +90,7 @@ module Statement
90
90
  end
91
91
  results
92
92
  end
93
-
93
+
94
94
  def self.senate_banking(year=Date.today.year)
95
95
  results = []
96
96
  url = "http://www.banking.senate.gov/public/index.cfm?FuseAction=Newsroom.PressReleases&ContentRecordType_id=b94acc28-404a-4fc6-b143-a9e15bf92da4&Region_id=&Issue_id=&MonthDisplay=0&YearDisplay=#{year}"
@@ -101,7 +101,7 @@ module Statement
101
101
  end
102
102
  results
103
103
  end
104
-
104
+
105
105
  def self.senate_hsag_majority(year=Date.today.year)
106
106
  results = []
107
107
  url = "http://www.hsgac.senate.gov/media/majority-media?year=#{year}"
@@ -113,7 +113,7 @@ module Statement
113
113
  end
114
114
  results
115
115
  end
116
-
116
+
117
117
  def self.senate_hsag_minority(year=Date.today.year)
118
118
  results = []
119
119
  url = "http://www.hsgac.senate.gov/media/minority-media?year=#{year}"
@@ -125,7 +125,7 @@ module Statement
125
125
  end
126
126
  results
127
127
  end
128
-
128
+
129
129
  def self.senate_indian
130
130
  results = []
131
131
  url = "http://www.indian.senate.gov/news/index.cfm"
@@ -136,7 +136,7 @@ module Statement
136
136
  end
137
137
  results
138
138
  end
139
-
139
+
140
140
  def self.senate_aging
141
141
  results = []
142
142
  url = "http://www.aging.senate.gov/pressroom.cfm?maxrows=100&startrow=1&&type=1"
@@ -147,18 +147,18 @@ module Statement
147
147
  end
148
148
  results
149
149
  end
150
-
150
+
151
151
  def self.senate_smallbiz_minority
152
152
  results = []
153
153
  url = "http://www.sbc.senate.gov/public/index.cfm?p=RepublicanPressRoom"
154
154
  doc = open_html(url)
155
- return if doc.nil?
155
+ return if doc.nil?
156
156
  doc.xpath("//ul[@class='recordList']").each do |row|
157
157
  results << { :source => url, :url => row.children[0].children[2].children[0]['href'], :title => row.children[0].children[2].children[0].text, :date => Date.parse(row.children[0].children[0].text), :domain => "http://www.sbc.senate.gov/", :party => 'minority' }
158
158
  end
159
159
  results
160
160
  end
161
-
161
+
162
162
  def self.senate_intel(congress=113, start_year=2013, end_year=2014)
163
163
  results = []
164
164
  url = "http://www.intelligence.senate.gov/press/releases.cfm?congress=#{congress}&y1=#{start_year}&y2=#{end_year}"
@@ -169,7 +169,7 @@ module Statement
169
169
  end
170
170
  results
171
171
  end
172
-
172
+
173
173
  def self.house_energy_minority
174
174
  results = []
175
175
  url = "http://democrats.energycommerce.house.gov/index.php?q=news-releases"
@@ -180,7 +180,7 @@ module Statement
180
180
  end
181
181
  results
182
182
  end
183
-
183
+
184
184
  def self.house_homeland_security_minority
185
185
  results = []
186
186
  url = "http://chsdemocrats.house.gov/press/index.asp?subsection=1"
@@ -191,7 +191,7 @@ module Statement
191
191
  end
192
192
  results
193
193
  end
194
-
194
+
195
195
  def self.house_judiciary_majority
196
196
  results = []
197
197
  url = "http://judiciary.house.gov/news/press2013.html"
@@ -203,7 +203,7 @@ module Statement
203
203
  end
204
204
  results
205
205
  end
206
-
206
+
207
207
  def self.house_rules_majority
208
208
  results = []
209
209
  url = "http://www.rules.house.gov/News/Default.aspx"
@@ -215,7 +215,7 @@ module Statement
215
215
  end
216
216
  results
217
217
  end
218
-
218
+
219
219
  def self.house_ways_means_majority
220
220
  results = []
221
221
  url = "http://waysandmeans.house.gov/news/documentquery.aspx?DocumentTypeID=1496"
@@ -227,9 +227,9 @@ module Statement
227
227
  end
228
228
  results
229
229
  end
230
-
230
+
231
231
  ## special cases for members without RSS feeds
232
-
232
+
233
233
  def self.swalwell(page=1)
234
234
  results = []
235
235
  url = "http://swalwell.house.gov/category/press-releases/page/#{page}/"
@@ -250,7 +250,7 @@ module Statement
250
250
  doc.xpath("//a").select{|l| !l['href'].nil? and l['href'].include?('/pr')}[1..-5].each do |link|
251
251
  begin
252
252
  year = link['href'].split('/').first
253
- date = Date.parse(link.text.split(' ').first+'/'+year)
253
+ date = Date.parse(link.text.split(' ').first+'/'+year)
254
254
  rescue
255
255
  date = nil
256
256
  end
@@ -258,17 +258,44 @@ module Statement
258
258
  end
259
259
  return results[0..-5]
260
260
  end
261
-
262
- def self.cold_fusion(year=Date.today.year, month=0)
261
+
262
+ def self.crenshaw(year=Date.today.year, month=nil)
263
263
  results = []
264
264
  year = Date.today.year if not year
265
- month = 0 if not month
266
- domains = ['crenshaw.house.gov', 'www.ronjohnson.senate.gov/public/','www.risch.senate.gov/public/']
265
+ domain = 'crenshaw.house.gov'
266
+ if month
267
+ url = "http://crenshaw.house.gov/index.cfm/pressreleases?YearDisplay=#{year}&MonthDisplay=#{month}&page=1"
268
+ else
269
+ url = "http://crenshaw.house.gov/index.cfm/pressreleases"
270
+ end
271
+ doc = Statement::Scraper.open_html(url)
272
+ return if doc.nil?
273
+ doc.xpath("//tr")[2..-1].each do |row|
274
+ date_text, title = row.children.map{|c| c.text.strip}.reject{|c| c.empty?}
275
+ next if date_text == 'Date' or date_text.size > 10
276
+ date = Date.parse(date_text)
277
+ results << { :source => url, :url => row.children[3].children.first['href'], :title => title, :date => date, :domain => domain }
278
+ end
279
+ results
280
+ end
281
+
282
+ def self.cold_fusion(year=Date.today.year, month=nil)
283
+ results = []
284
+ year = Date.today.year if not year
285
+ domains = ['www.ronjohnson.senate.gov/public/','www.risch.senate.gov/public/']
267
286
  domains.each do |domain|
268
- if domain == 'crenshaw.house.gov' or domain == 'www.risch.senate.gov/public/'
269
- url = "http://"+domain + "/index.cfm/pressreleases?YearDisplay=#{year}&MonthDisplay=#{month}&page=1"
287
+ if domain == 'www.risch.senate.gov/public/'
288
+ if not month
289
+ url = "http://www.risch.senate.gov/public/index.cfm/pressreleases"
290
+ else
291
+ url = "http://www.risch.senate.gov/public/index.cfm/pressreleases?YearDisplay=#{year}&MonthDisplay=#{month}&page=1"
292
+ end
270
293
  else
271
- url = "http://"+domain + "index.cfm/press-releases?YearDisplay=#{year}&MonthDisplay=#{month}&page=1"
294
+ if not month
295
+ url = "http://www.ronjohnson.senate.gov/public/index.cfm/press-releases"
296
+ else
297
+ url = "http://www.ronjohnson.senate.gov/public/index.cfm/press-releases?YearDisplay=#{year}&MonthDisplay=#{month}&page=1"
298
+ end
272
299
  end
273
300
  doc = Statement::Scraper.open_html(url)
274
301
  return if doc.nil?
@@ -281,7 +308,7 @@ module Statement
281
308
  end
282
309
  results.flatten
283
310
  end
284
-
311
+
285
312
  def self.conaway(page=1)
286
313
  results = []
287
314
  base_url = "http://conaway.house.gov/news/"
@@ -293,7 +320,7 @@ module Statement
293
320
  end
294
321
  results
295
322
  end
296
-
323
+
297
324
  def self.chabot(year=Date.today.year)
298
325
  results = []
299
326
  base_url = "http://chabot.house.gov/news/"
@@ -306,7 +333,7 @@ module Statement
306
333
  end
307
334
  results
308
335
  end
309
-
336
+
310
337
  def self.susandavis
311
338
  results = []
312
339
  base_url = "http://www.house.gov/susandavis/"
@@ -318,7 +345,7 @@ module Statement
318
345
  end
319
346
  results
320
347
  end
321
-
348
+
322
349
  def self.klobuchar(year)
323
350
  results = []
324
351
  base_url = "http://www.klobuchar.senate.gov/"
@@ -333,7 +360,7 @@ module Statement
333
360
  end
334
361
  results
335
362
  end
336
-
363
+
337
364
  def self.lujan
338
365
  results = []
339
366
  base_url = 'http://lujan.house.gov/'
@@ -345,7 +372,7 @@ module Statement
345
372
  end
346
373
  results
347
374
  end
348
-
375
+
349
376
  def self.billnelson(year=2013)
350
377
  results = []
351
378
  base_url = "http://www.billnelson.senate.gov/news/"
@@ -357,7 +384,7 @@ module Statement
357
384
  end
358
385
  results
359
386
  end
360
-
387
+
361
388
  # fetches the latest 1000 releases, can be altered
362
389
  def self.lautenberg(rows=1000)
363
390
  results = []
@@ -370,7 +397,7 @@ module Statement
370
397
  end
371
398
  results
372
399
  end
373
-
400
+
374
401
  def self.crapo
375
402
  results = []
376
403
  base_url = "http://www.crapo.senate.gov/media/newsreleases/"
@@ -394,7 +421,7 @@ module Statement
394
421
  end
395
422
  results
396
423
  end
397
-
424
+
398
425
  def self.coburn(year=Date.today.year)
399
426
  results = []
400
427
  url = "http://www.coburn.senate.gov/public/index.cfm?p=PressReleases&ContentType_id=d741b7a7-7863-4223-9904-8cb9378aa03a&Group_id=7a55cb96-4639-4dac-8c0c-99a4a227bd3a&MonthDisplay=0&YearDisplay=#{year}"
@@ -406,7 +433,7 @@ module Statement
406
433
  end
407
434
  results
408
435
  end
409
-
436
+
410
437
  def self.boxer(start=1)
411
438
  results = []
412
439
  url = "http://www.boxer.senate.gov/en/press/releases.cfm?start=#{start}"
@@ -418,7 +445,7 @@ module Statement
418
445
  end
419
446
  results
420
447
  end
421
-
448
+
422
449
  def self.vitter(year=Date.today.year)
423
450
  results = []
424
451
  url = "http://www.vitter.senate.gov/newsroom/"
@@ -431,7 +458,7 @@ module Statement
431
458
  end
432
459
  results
433
460
  end
434
-
461
+
435
462
  def self.donnelly(year=Date.today.year)
436
463
  results = []
437
464
  url = "http://www.donnelly.senate.gov/newsroom/"
@@ -444,7 +471,7 @@ module Statement
444
471
  end
445
472
  results
446
473
  end
447
-
474
+
448
475
  def self.inhofe(year=Date.today.year)
449
476
  results = []
450
477
  url = "http://www.inhofe.senate.gov/newsroom/press-releases?year=#{year}"
@@ -457,7 +484,7 @@ module Statement
457
484
  end
458
485
  results
459
486
  end
460
-
487
+
461
488
  def self.palazzo(page=1)
462
489
  results = []
463
490
  domain = "palazzo.house.gov"
@@ -598,7 +625,7 @@ module Statement
598
625
  doc = open_html(url)
599
626
  return if doc.nil?
600
627
  doc.css('ul.fc_leading li').each do |row|
601
- results << {:source => url, :url => "http://gabbard.house.gov"+row.children[0].children[1]['href'], :title => row.children[0].children[1].text.strip, :date => Date.parse(row.children[2].text), :domain => domain}
628
+ results << {:source => url, :url => "http://gabbard.house.gov"+row.children[0].children[1]['href'], :title => row.children[0].children[1].text.strip, :date => Date.parse(row.children[2].text), :domain => domain}
602
629
  end
603
630
  results
604
631
  end
@@ -692,6 +719,26 @@ module Statement
692
719
  end
693
720
  results.flatten
694
721
  end
695
-
722
+
723
+ def self.backfill_bilirakis
724
+ results = []
725
+ domain = 'bilirakis.house.gov'
726
+ url = 'http://bilirakis.house.gov/press-releases/'
727
+ doc = open_html(url)
728
+ return if doc.nil?
729
+ doc.css("ul li[@class='article articleright']").each do |row|
730
+ results << {:source => url, :url => 'http://bilirakis.house.gov' + row.children[3].children[1]['href'], :title => row.children[3].text.strip, :date => Date.parse(row.children[5].text), :domain => domain }
731
+ end
732
+ end
733
+
734
+ def self.backfill_boustany
735
+ results = []
736
+ domain = 'boustany.house.gov'
737
+ url = 'http://boustany.house.gov/113th-congress/showallitems/'
738
+ doc = open_html(url)
739
+ return if doc.nil?
740
+
741
+ end
742
+
696
743
  end
697
- end
744
+ end
@@ -1,3 +1,3 @@
1
1
  module Statement
2
- VERSION = "1.8.10"
2
+ VERSION = "1.8.11"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: statement
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.8.10
4
+ version: 1.8.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Derek Willis
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-24 00:00:00.000000000 Z
11
+ date: 2015-01-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler