statement 1.8.6 → 1.8.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/statement/scraper.rb +19 -31
- data/lib/statement/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5d24d337a4b4bf8bc8f8789940543cec80eee884
|
4
|
+
data.tar.gz: b718f210fc6e0c7a0d30b81304fc64d40029bf28
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 474fc04b2a15b38ddbecd92ce6af3438a1be3fb0baa1ae56456315fbc10ae4358646f2f19bf37219ef25b630308219fdf85e04152728a2fb320af484be5a1171
|
7
|
+
data.tar.gz: 093756d65a5eba4b75bf21fb05784eb6bea4898fd72106cdbd50e9d94449c158f469dce3b9a4db3ab6fd8977792173eea9072ce0b3eb7aa8d252e3313f4d91e9
|
data/lib/statement/scraper.rb
CHANGED
@@ -29,7 +29,7 @@ module Statement
|
|
29
29
|
end
|
30
30
|
|
31
31
|
def self.member_methods
|
32
|
-
[:capuano, :cold_fusion, :conaway, :chabot, :susandavis, :freshman_senators, :klobuchar, :billnelson, :lautenberg, :crapo, :coburn, :boxer, :vitter, :donnelly, :inhofe, :
|
32
|
+
[:capuano, :cold_fusion, :conaway, :chabot, :susandavis, :freshman_senators, :klobuchar, :billnelson, :lautenberg, :crapo, :coburn, :boxer, :vitter, :donnelly, :inhofe, :palazzo, :roe, :document_query, :swalwell, :fischer, :clark, :edwards, :culberson_chabot_grisham, :barton, :wolf_sherman_mccaul, :welch, :sessions, :gabbard]
|
33
33
|
end
|
34
34
|
|
35
35
|
def self.committee_methods
|
@@ -38,9 +38,9 @@ module Statement
|
|
38
38
|
|
39
39
|
def self.member_scrapers
|
40
40
|
year = Date.today.year
|
41
|
-
results = [capuano, cold_fusion(year, 0), conaway, chabot, susandavis, klobuchar, palazzo(page=1), roe(page=1), billnelson(year=year),
|
41
|
+
results = [capuano, cold_fusion(year, 0), conaway, chabot, susandavis, klobuchar(year), palazzo(page=1), roe(page=1), billnelson(year=year),
|
42
42
|
document_query(page=1), document_query(page=2), swalwell(page=1), donnelly(year=year), crapo, coburn, boxer(start=1),
|
43
|
-
vitter(year=year), inhofe(year=year),
|
43
|
+
vitter(year=year), inhofe(year=year), fischer, clark(year=year), edwards, culberson_chabot_grisham(page=1), barton, wolf_sherman_mccaul, welch,
|
44
44
|
sessions(year=year), gabbard, pryor].flatten
|
45
45
|
results = results.compact
|
46
46
|
Utils.remove_generic_urls!(results)
|
@@ -318,16 +318,16 @@ module Statement
|
|
318
318
|
results
|
319
319
|
end
|
320
320
|
|
321
|
-
def self.klobuchar
|
321
|
+
def self.klobuchar(year)
|
322
322
|
results = []
|
323
323
|
base_url = "http://www.klobuchar.senate.gov/"
|
324
|
-
[
|
324
|
+
[year.to_i-1,year.to_i].each do |year|
|
325
325
|
year_url = base_url + "public/news-releases?MonthDisplay=0&YearDisplay=#{year}"
|
326
326
|
doc = open_html(year_url)
|
327
327
|
return if doc.nil?
|
328
328
|
doc.xpath("//tr")[1..-1].each do |row|
|
329
|
-
next if row.children[
|
330
|
-
results << { :source => year_url, :url => row.children[
|
329
|
+
next if row.children[3].children[0].text.strip == 'Title'
|
330
|
+
results << { :source => year_url, :url => row.children[3].children[0]['href'], :title => row.children[3].children[0].text.strip, :date => Date.strptime(row.children[1].text, "%m/%d/%y"), :domain => "klobuchar.senate.gov" }
|
331
331
|
end
|
332
332
|
end
|
333
333
|
results
|
@@ -377,7 +377,7 @@ module Statement
|
|
377
377
|
doc = open_html(url)
|
378
378
|
return if doc.nil?
|
379
379
|
doc.xpath("//tr").each do |row|
|
380
|
-
results << { :source => url, :url => base_url + row.children[
|
380
|
+
results << { :source => url, :url => base_url + row.children[3].children[0]['href'], :title => row.children[3].text.strip, :date => Date.parse(row.children[1].text.strip.gsub('-','/')), :domain => "crapo.senate.gov" }
|
381
381
|
end
|
382
382
|
results
|
383
383
|
end
|
@@ -388,8 +388,8 @@ module Statement
|
|
388
388
|
doc = open_html(url)
|
389
389
|
return if doc.nil?
|
390
390
|
doc.xpath("//tr")[2..-1].each do |row|
|
391
|
-
next if row.text[0..3] == "Date"
|
392
|
-
results << { :source => url, :url => row.children[
|
391
|
+
next if row.text.strip[0..3] == "Date"
|
392
|
+
results << { :source => url, :url => row.children[3].children[0]['href'], :title => row.children[3].text.strip, :date => Date.strptime(row.children[1].text.strip, "%m/%d/%y"), :domain => "fischer.senate.gov" }
|
393
393
|
end
|
394
394
|
results
|
395
395
|
end
|
@@ -400,8 +400,8 @@ module Statement
|
|
400
400
|
doc = open_html(url)
|
401
401
|
return if doc.nil?
|
402
402
|
doc.xpath("//tr")[2..-1].each do |row|
|
403
|
-
next if row.text[0..3] == "Date"
|
404
|
-
results << { :source => url, :url => row.children[
|
403
|
+
next if row.text.strip[0..3] == "Date"
|
404
|
+
results << { :source => url, :url => row.children[3].children[0]['href'], :title => row.children[3].text.strip, :date => Date.strptime(row.children[1].text.strip, "%m/%d/%y"), :domain => "coburn.senate.gov" }
|
405
405
|
end
|
406
406
|
results
|
407
407
|
end
|
@@ -426,7 +426,7 @@ module Statement
|
|
426
426
|
return if doc.nil?
|
427
427
|
doc.xpath("//tr")[1..-1].each do |row|
|
428
428
|
next if row.text.strip.size < 30
|
429
|
-
results << { :source => url, :url => row.children[
|
429
|
+
results << { :source => url, :url => row.children[3].children[0]['href'].strip, :title => row.children[3].text, :date => Date.strptime(row.children[1].text, "%m/%d/%y"), :domain => domain}
|
430
430
|
end
|
431
431
|
results
|
432
432
|
end
|
@@ -439,7 +439,7 @@ module Statement
|
|
439
439
|
return if doc.nil?
|
440
440
|
doc.xpath("//tr")[1..-1].each do |row|
|
441
441
|
next if row.text.strip.size < 30
|
442
|
-
results << { :source => url, :url => "http://www.donnelly.senate.gov"+row.children[
|
442
|
+
results << { :source => url, :url => "http://www.donnelly.senate.gov"+row.children[3].children[1]['href'].strip, :title => row.children[3].text.strip, :date => Date.strptime(row.children[1].text, "%m/%d/%y"), :domain => domain}
|
443
443
|
end
|
444
444
|
results
|
445
445
|
end
|
@@ -452,19 +452,7 @@ module Statement
|
|
452
452
|
return if doc.nil?
|
453
453
|
doc.xpath("//tr")[1..-1].each do |row|
|
454
454
|
next if row.text.strip.size < 30
|
455
|
-
results << { :source => url, :url => row.children[
|
456
|
-
end
|
457
|
-
results
|
458
|
-
end
|
459
|
-
|
460
|
-
def self.reid
|
461
|
-
results = []
|
462
|
-
url = "http://www.reid.senate.gov/newsroom/press_releases.cfm"
|
463
|
-
domain = "www.reid.senate.gov"
|
464
|
-
doc = open_html(url)
|
465
|
-
return if doc.nil?
|
466
|
-
doc.xpath("//table[@id='CS_PgIndex_21891_21893']//tr")[1..-1].each do |row|
|
467
|
-
results << { :source => url, :url => "http://www.reid.senate.gov"+row.children[0].children[0]['href'], :title => row.children[0].children[0].text, :date => Date.parse(row.children[0].children[2].text), :domain => domain}
|
455
|
+
results << { :source => url, :url => row.children[3].children[0]['href'].strip, :title => row.children[3].text, :date => Date.strptime(row.children[1].text, "%m/%d/%y"), :domain => domain}
|
468
456
|
end
|
469
457
|
results
|
470
458
|
end
|
@@ -500,8 +488,8 @@ module Statement
|
|
500
488
|
doc = open_html(url)
|
501
489
|
return if doc.nil?
|
502
490
|
(doc/:tr)[1..-1].each do |row|
|
503
|
-
next if row.children.
|
504
|
-
results << { :source => url, :date => Date.parse(row.children.
|
491
|
+
next if row.children[1].text.strip == 'Date'
|
492
|
+
results << { :source => url, :date => Date.parse(row.children[1].text.strip), :title => row.children[3].children.text, :url => row.children[3].children[0]['href'], :domain => domain}
|
505
493
|
end
|
506
494
|
results
|
507
495
|
end
|
@@ -513,8 +501,8 @@ module Statement
|
|
513
501
|
doc = open_html(url)
|
514
502
|
return if doc.nil?
|
515
503
|
(doc/:tr)[1..-1].each do |row|
|
516
|
-
next if row.children.
|
517
|
-
results << { :source => url, :date => Date.parse(row.children.
|
504
|
+
next if row.children[1].text.strip == 'Date'
|
505
|
+
results << { :source => url, :date => Date.parse(row.children[1].text), :title => row.children[3].children.text, :url => row.children[3].children[0]['href'], :domain => domain}
|
518
506
|
end
|
519
507
|
results
|
520
508
|
end
|
data/lib/statement/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: statement
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.8.
|
4
|
+
version: 1.8.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Derek Willis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-09-
|
11
|
+
date: 2014-09-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|