statement 1.8.7 → 1.8.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/statement/scraper.rb +47 -6
- data/lib/statement/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 88ef7c73e2da9cf4f89002e9d60a228508c4fb9d
|
4
|
+
data.tar.gz: 0423e9d85fd4ab80a247fe4a836e10fdaf31a2ab
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0753f5edd8d58526b457cde2bda819e814e3d805f547e302f920e061f4204837786c088f0083a8afa7b6fcd625502d55f978c1b0b98feb9ff0ed19c451943f36
|
7
|
+
data.tar.gz: 2e3ab0f04fdeaa2d4c5d4b9c12ca004620dbab6ee7bb483688faf99d171c861ce5f31acf5da1c62246e0ce33564adc9b45a9eccf5708fb66c0512404991301e8
|
data/lib/statement/scraper.rb
CHANGED
@@ -29,7 +29,7 @@ module Statement
|
|
29
29
|
end
|
30
30
|
|
31
31
|
def self.member_methods
|
32
|
-
[:capuano, :cold_fusion, :conaway, :chabot, :susandavis, :freshman_senators, :klobuchar, :billnelson, :lautenberg, :crapo, :coburn, :boxer, :vitter, :donnelly, :inhofe, :palazzo, :roe, :document_query, :swalwell, :fischer, :clark, :edwards, :culberson_chabot_grisham, :barton, :wolf_sherman_mccaul, :welch, :sessions, :gabbard]
|
32
|
+
[:capuano, :cold_fusion, :conaway, :chabot, :susandavis, :freshman_senators, :klobuchar, :billnelson, :lautenberg, :crapo, :coburn, :boxer, :vitter, :donnelly, :inhofe, :palazzo, :roe, :document_query, :swalwell, :fischer, :clark, :edwards, :culberson_chabot_grisham, :barton, :wolf_sherman_mccaul, :welch, :sessions, :gabbard, :ellison, :costa, :farr]
|
33
33
|
end
|
34
34
|
|
35
35
|
def self.committee_methods
|
@@ -41,7 +41,7 @@ module Statement
|
|
41
41
|
results = [capuano, cold_fusion(year, 0), conaway, chabot, susandavis, klobuchar(year), palazzo(page=1), roe(page=1), billnelson(year=year),
|
42
42
|
document_query(page=1), document_query(page=2), swalwell(page=1), donnelly(year=year), crapo, coburn, boxer(start=1),
|
43
43
|
vitter(year=year), inhofe(year=year), fischer, clark(year=year), edwards, culberson_chabot_grisham(page=1), barton, wolf_sherman_mccaul, welch,
|
44
|
-
sessions(year=year), gabbard, pryor].flatten
|
44
|
+
sessions(year=year), gabbard, pryor, ellison(page=0), costa, farr].flatten
|
45
45
|
results = results.compact
|
46
46
|
Utils.remove_generic_urls!(results)
|
47
47
|
end
|
@@ -50,7 +50,7 @@ module Statement
|
|
50
50
|
results = [cold_fusion(2012, 0), cold_fusion(2011, 0), cold_fusion(2010, 0), billnelson(year=2012), document_query(page=3),
|
51
51
|
document_query(page=4), coburn(year=2012), coburn(year=2011), coburn(year=2010), boxer(start=11), boxer(start=21),
|
52
52
|
boxer(start=31), boxer(start=41), vitter(year=2012), vitter(year=2011), swalwell(page=2), swalwell(page=3), clark(year=2013), culberson_chabot_grisham(page=2),
|
53
|
-
wolf_sherman_mccaul(page=1), sessions(year=2013), pryor(page=1)].flatten
|
53
|
+
wolf_sherman_mccaul(page=1), sessions(year=2013), pryor(page=1), ellison(page=1), ellison(page=2), ellison(page=3), farr(year=2013), farr(year=2012), farr(year=2011)].flatten
|
54
54
|
Utils.remove_generic_urls!(results)
|
55
55
|
end
|
56
56
|
|
@@ -602,14 +602,55 @@ module Statement
|
|
602
602
|
results
|
603
603
|
end
|
604
604
|
|
605
|
+
def self.ellison(page=0)
|
606
|
+
results = []
|
607
|
+
domain = 'ellison.house.gov'
|
608
|
+
url = "http://ellison.house.gov/media-center/press-releases?page=#{page}"
|
609
|
+
doc = open_html(url)
|
610
|
+
return if doc.nil?
|
611
|
+
doc.xpath("//div[@class='views-field views-field-created datebar']").each do |row|
|
612
|
+
next if row.nil?
|
613
|
+
results << { :source => url, :url => "http://ellison.house.gov" + row.next.next.children[1].children[0]['href'], :title => row.next.next.children[1].children[0].text.strip, :date => Date.parse(row.text.strip), :domain => domain}
|
614
|
+
end
|
615
|
+
results
|
616
|
+
end
|
617
|
+
|
618
|
+
def self.costa
|
619
|
+
results = []
|
620
|
+
domain = 'costa.house.gov'
|
621
|
+
url = "http://costa.house.gov/index.php/newsroom30/press-releases12"
|
622
|
+
doc = open_html(url)
|
623
|
+
return if doc.nil?
|
624
|
+
doc.xpath("//div[@class='nspArt']").each do |row|
|
625
|
+
results << { :source => url, :url => "http://costa.house.gov" + row.children[0].children[1].children[0]['href'], :title => row.children[0].children[1].children[0].text.strip, :date => Date.parse(row.children[0].children[0].text), :domain => domain}
|
626
|
+
end
|
627
|
+
results
|
628
|
+
end
|
629
|
+
|
630
|
+
def self.farr(year=2014)
|
631
|
+
results = []
|
632
|
+
domain = 'www.farr.house.gov'
|
633
|
+
if year == 2014
|
634
|
+
url = "http://www.farr.house.gov/index.php/newsroom/press-releases"
|
635
|
+
else
|
636
|
+
url = "http://www.farr.house.gov/index.php/newsroom/press-releases-archive/#{year.to_s}-press-releases"
|
637
|
+
end
|
638
|
+
doc = open_html(url)
|
639
|
+
return if doc.nil?
|
640
|
+
doc.xpath("//tr[@class='cat-list-row0']").each do |row|
|
641
|
+
results << { :source => url, :url => "http://farr.house.gov" + row.children[1].children[1]['href'], :title => row.children[1].children[1].text.strip, :date => Date.parse(row.children[3].text.strip), :domain => domain}
|
642
|
+
end
|
643
|
+
results
|
644
|
+
end
|
645
|
+
|
605
646
|
def self.document_query(page=1)
|
606
647
|
results = []
|
607
|
-
domains = [{"thornberry.house.gov" => 1776}, {"wenstrup.house.gov" => 2491}]
|
648
|
+
domains = [{"thornberry.house.gov" => 1776}, {"wenstrup.house.gov" => 2491}, {"clawson.house.gov" => 2641}]
|
608
649
|
domains.each do |domain|
|
609
650
|
doc = open_html("http://"+domain.keys.first+"/news/documentquery.aspx?DocumentTypeID=#{domain.values.first}&Page=#{page}")
|
610
651
|
return if doc.nil?
|
611
|
-
doc.xpath("//
|
612
|
-
results << { :source => "http://"+domain.keys.first+"/news/"+"documentquery.aspx?DocumentTypeID=#{domain.values.first}&Page=#{page}", :url => "http://"+domain.keys.first+"/news/" + row.children[
|
652
|
+
doc.xpath("//div[@class='middlecopy']//li").each do |row|
|
653
|
+
results << { :source => "http://"+domain.keys.first+"/news/"+"documentquery.aspx?DocumentTypeID=#{domain.values.first}&Page=#{page}", :url => "http://"+domain.keys.first+"/news/" + row.children[1]['href'], :title => row.children[1].text.strip, :date => Date.parse(row.children[3].text.strip), :domain => domain.keys.first }
|
613
654
|
end
|
614
655
|
end
|
615
656
|
results.flatten
|
data/lib/statement/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: statement
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.8.
|
4
|
+
version: 1.8.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Derek Willis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-10-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|