statement 1.8.6 → 1.8.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/statement/scraper.rb +19 -31
- data/lib/statement/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5d24d337a4b4bf8bc8f8789940543cec80eee884
|
4
|
+
data.tar.gz: b718f210fc6e0c7a0d30b81304fc64d40029bf28
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 474fc04b2a15b38ddbecd92ce6af3438a1be3fb0baa1ae56456315fbc10ae4358646f2f19bf37219ef25b630308219fdf85e04152728a2fb320af484be5a1171
|
7
|
+
data.tar.gz: 093756d65a5eba4b75bf21fb05784eb6bea4898fd72106cdbd50e9d94449c158f469dce3b9a4db3ab6fd8977792173eea9072ce0b3eb7aa8d252e3313f4d91e9
|
data/lib/statement/scraper.rb
CHANGED
@@ -29,7 +29,7 @@ module Statement
|
|
29
29
|
end
|
30
30
|
|
31
31
|
def self.member_methods
|
32
|
-
[:capuano, :cold_fusion, :conaway, :chabot, :susandavis, :freshman_senators, :klobuchar, :billnelson, :lautenberg, :crapo, :coburn, :boxer, :vitter, :donnelly, :inhofe, :
|
32
|
+
[:capuano, :cold_fusion, :conaway, :chabot, :susandavis, :freshman_senators, :klobuchar, :billnelson, :lautenberg, :crapo, :coburn, :boxer, :vitter, :donnelly, :inhofe, :palazzo, :roe, :document_query, :swalwell, :fischer, :clark, :edwards, :culberson_chabot_grisham, :barton, :wolf_sherman_mccaul, :welch, :sessions, :gabbard]
|
33
33
|
end
|
34
34
|
|
35
35
|
def self.committee_methods
|
@@ -38,9 +38,9 @@ module Statement
|
|
38
38
|
|
39
39
|
def self.member_scrapers
|
40
40
|
year = Date.today.year
|
41
|
-
results = [capuano, cold_fusion(year, 0), conaway, chabot, susandavis, klobuchar, palazzo(page=1), roe(page=1), billnelson(year=year),
|
41
|
+
results = [capuano, cold_fusion(year, 0), conaway, chabot, susandavis, klobuchar(year), palazzo(page=1), roe(page=1), billnelson(year=year),
|
42
42
|
document_query(page=1), document_query(page=2), swalwell(page=1), donnelly(year=year), crapo, coburn, boxer(start=1),
|
43
|
-
vitter(year=year), inhofe(year=year),
|
43
|
+
vitter(year=year), inhofe(year=year), fischer, clark(year=year), edwards, culberson_chabot_grisham(page=1), barton, wolf_sherman_mccaul, welch,
|
44
44
|
sessions(year=year), gabbard, pryor].flatten
|
45
45
|
results = results.compact
|
46
46
|
Utils.remove_generic_urls!(results)
|
@@ -318,16 +318,16 @@ module Statement
|
|
318
318
|
results
|
319
319
|
end
|
320
320
|
|
321
|
-
def self.klobuchar
|
321
|
+
def self.klobuchar(year)
|
322
322
|
results = []
|
323
323
|
base_url = "http://www.klobuchar.senate.gov/"
|
324
|
-
[
|
324
|
+
[year.to_i-1,year.to_i].each do |year|
|
325
325
|
year_url = base_url + "public/news-releases?MonthDisplay=0&YearDisplay=#{year}"
|
326
326
|
doc = open_html(year_url)
|
327
327
|
return if doc.nil?
|
328
328
|
doc.xpath("//tr")[1..-1].each do |row|
|
329
|
-
next if row.children[
|
330
|
-
results << { :source => year_url, :url => row.children[
|
329
|
+
next if row.children[3].children[0].text.strip == 'Title'
|
330
|
+
results << { :source => year_url, :url => row.children[3].children[0]['href'], :title => row.children[3].children[0].text.strip, :date => Date.strptime(row.children[1].text, "%m/%d/%y"), :domain => "klobuchar.senate.gov" }
|
331
331
|
end
|
332
332
|
end
|
333
333
|
results
|
@@ -377,7 +377,7 @@ module Statement
|
|
377
377
|
doc = open_html(url)
|
378
378
|
return if doc.nil?
|
379
379
|
doc.xpath("//tr").each do |row|
|
380
|
-
results << { :source => url, :url => base_url + row.children[
|
380
|
+
results << { :source => url, :url => base_url + row.children[3].children[0]['href'], :title => row.children[3].text.strip, :date => Date.parse(row.children[1].text.strip.gsub('-','/')), :domain => "crapo.senate.gov" }
|
381
381
|
end
|
382
382
|
results
|
383
383
|
end
|
@@ -388,8 +388,8 @@ module Statement
|
|
388
388
|
doc = open_html(url)
|
389
389
|
return if doc.nil?
|
390
390
|
doc.xpath("//tr")[2..-1].each do |row|
|
391
|
-
next if row.text[0..3] == "Date"
|
392
|
-
results << { :source => url, :url => row.children[
|
391
|
+
next if row.text.strip[0..3] == "Date"
|
392
|
+
results << { :source => url, :url => row.children[3].children[0]['href'], :title => row.children[3].text.strip, :date => Date.strptime(row.children[1].text.strip, "%m/%d/%y"), :domain => "fischer.senate.gov" }
|
393
393
|
end
|
394
394
|
results
|
395
395
|
end
|
@@ -400,8 +400,8 @@ module Statement
|
|
400
400
|
doc = open_html(url)
|
401
401
|
return if doc.nil?
|
402
402
|
doc.xpath("//tr")[2..-1].each do |row|
|
403
|
-
next if row.text[0..3] == "Date"
|
404
|
-
results << { :source => url, :url => row.children[
|
403
|
+
next if row.text.strip[0..3] == "Date"
|
404
|
+
results << { :source => url, :url => row.children[3].children[0]['href'], :title => row.children[3].text.strip, :date => Date.strptime(row.children[1].text.strip, "%m/%d/%y"), :domain => "coburn.senate.gov" }
|
405
405
|
end
|
406
406
|
results
|
407
407
|
end
|
@@ -426,7 +426,7 @@ module Statement
|
|
426
426
|
return if doc.nil?
|
427
427
|
doc.xpath("//tr")[1..-1].each do |row|
|
428
428
|
next if row.text.strip.size < 30
|
429
|
-
results << { :source => url, :url => row.children[
|
429
|
+
results << { :source => url, :url => row.children[3].children[0]['href'].strip, :title => row.children[3].text, :date => Date.strptime(row.children[1].text, "%m/%d/%y"), :domain => domain}
|
430
430
|
end
|
431
431
|
results
|
432
432
|
end
|
@@ -439,7 +439,7 @@ module Statement
|
|
439
439
|
return if doc.nil?
|
440
440
|
doc.xpath("//tr")[1..-1].each do |row|
|
441
441
|
next if row.text.strip.size < 30
|
442
|
-
results << { :source => url, :url => "http://www.donnelly.senate.gov"+row.children[
|
442
|
+
results << { :source => url, :url => "http://www.donnelly.senate.gov"+row.children[3].children[1]['href'].strip, :title => row.children[3].text.strip, :date => Date.strptime(row.children[1].text, "%m/%d/%y"), :domain => domain}
|
443
443
|
end
|
444
444
|
results
|
445
445
|
end
|
@@ -452,19 +452,7 @@ module Statement
|
|
452
452
|
return if doc.nil?
|
453
453
|
doc.xpath("//tr")[1..-1].each do |row|
|
454
454
|
next if row.text.strip.size < 30
|
455
|
-
results << { :source => url, :url => row.children[
|
456
|
-
end
|
457
|
-
results
|
458
|
-
end
|
459
|
-
|
460
|
-
def self.reid
|
461
|
-
results = []
|
462
|
-
url = "http://www.reid.senate.gov/newsroom/press_releases.cfm"
|
463
|
-
domain = "www.reid.senate.gov"
|
464
|
-
doc = open_html(url)
|
465
|
-
return if doc.nil?
|
466
|
-
doc.xpath("//table[@id='CS_PgIndex_21891_21893']//tr")[1..-1].each do |row|
|
467
|
-
results << { :source => url, :url => "http://www.reid.senate.gov"+row.children[0].children[0]['href'], :title => row.children[0].children[0].text, :date => Date.parse(row.children[0].children[2].text), :domain => domain}
|
455
|
+
results << { :source => url, :url => row.children[3].children[0]['href'].strip, :title => row.children[3].text, :date => Date.strptime(row.children[1].text, "%m/%d/%y"), :domain => domain}
|
468
456
|
end
|
469
457
|
results
|
470
458
|
end
|
@@ -500,8 +488,8 @@ module Statement
|
|
500
488
|
doc = open_html(url)
|
501
489
|
return if doc.nil?
|
502
490
|
(doc/:tr)[1..-1].each do |row|
|
503
|
-
next if row.children.
|
504
|
-
results << { :source => url, :date => Date.parse(row.children.
|
491
|
+
next if row.children[1].text.strip == 'Date'
|
492
|
+
results << { :source => url, :date => Date.parse(row.children[1].text.strip), :title => row.children[3].children.text, :url => row.children[3].children[0]['href'], :domain => domain}
|
505
493
|
end
|
506
494
|
results
|
507
495
|
end
|
@@ -513,8 +501,8 @@ module Statement
|
|
513
501
|
doc = open_html(url)
|
514
502
|
return if doc.nil?
|
515
503
|
(doc/:tr)[1..-1].each do |row|
|
516
|
-
next if row.children.
|
517
|
-
results << { :source => url, :date => Date.parse(row.children.
|
504
|
+
next if row.children[1].text.strip == 'Date'
|
505
|
+
results << { :source => url, :date => Date.parse(row.children[1].text), :title => row.children[3].children.text, :url => row.children[3].children[0]['href'], :domain => domain}
|
518
506
|
end
|
519
507
|
results
|
520
508
|
end
|
data/lib/statement/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: statement
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.8.
|
4
|
+
version: 1.8.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Derek Willis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-09-
|
11
|
+
date: 2014-09-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|