statement 2.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/statement/feed.rb +15 -1
- data/lib/statement/scraper.rb +38 -4
- data/lib/statement/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 74eeb1be0ee4ada11df34f5b8ddce00ffd5649cf
|
4
|
+
data.tar.gz: dd7fa7d9e6b0ccf0ae0bd3cdfa3f61f9306f4ef4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 392340713cdf259bff3e60816d3ea12966a09127b0cad75c65f71307b18a692812bb7dc594fc332bbe9c49df15d52f6ab9d396d27795890d736de65ff10330a7
|
7
|
+
data.tar.gz: a80659fe9a3a651f8db4585112dd193fe413762d1409afd4703ff1d63454599298c22416e4f1f8735897a04169e4e09d5c3436a3e8699c67d7f124654423033f
|
data/lib/statement/feed.rb
CHANGED
@@ -17,6 +17,7 @@ module Statement
|
|
17
17
|
req.on_complete do |response|
|
18
18
|
if response.success?
|
19
19
|
doc = Nokogiri::XML(response.body)
|
20
|
+
results << parse_atom(doc, url) if url == "http://larson.house.gov/index.php?option=com_ninjarsssyndicator&feed_id=1&format=raw"
|
20
21
|
results << parse_rss(doc, url)
|
21
22
|
else
|
22
23
|
failures << url
|
@@ -51,7 +52,11 @@ module Statement
|
|
51
52
|
def self.from_rss(url)
|
52
53
|
doc = open_rss(url)
|
53
54
|
return unless doc
|
54
|
-
|
55
|
+
if url == "http://larson.house.gov/index.php?option=com_ninjarsssyndicator&feed_id=1&format=raw"
|
56
|
+
parse_atom(doc, url)
|
57
|
+
else
|
58
|
+
parse_rss(doc, url)
|
59
|
+
end
|
55
60
|
end
|
56
61
|
|
57
62
|
def self.parse_rss(doc, url)
|
@@ -65,5 +70,14 @@ module Statement
|
|
65
70
|
end
|
66
71
|
Utils.remove_generic_urls!(results)
|
67
72
|
end
|
73
|
+
|
74
|
+
def self.parse_atom(doc, url)
|
75
|
+
links = (doc/:entry)
|
76
|
+
return if links.empty?
|
77
|
+
results = links.map do |link|
|
78
|
+
{ :source => url, :url => link.children[3]['href'], :title => link.children[1].text, :date => Date.parse(link.children[5].text), :domain => URI.parse(url).host }
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
68
82
|
end
|
69
83
|
end
|
data/lib/statement/scraper.rb
CHANGED
@@ -32,7 +32,7 @@ module Statement
|
|
32
32
|
[:crenshaw, :capuano, :cold_fusion, :conaway, :chabot, :freshman_senators, :klobuchar, :billnelson, :crapo, :boxer,
|
33
33
|
:vitter, :inhofe, :document_query, :swalwell, :fischer, :clark, :edwards, :culberson_chabot_grisham, :barton,
|
34
34
|
:welch, :sessions, :gabbard, :costa, :farr, :mcclintock, :olson, :schumer, :lamborn, :walden,
|
35
|
-
:bennie_thompson, :speier, :poe, :grassley, :bennet, :shaheen, :keating, :drupal, :jenkins]
|
35
|
+
:bennie_thompson, :speier, :poe, :grassley, :bennet, :shaheen, :keating, :drupal, :jenkins, :durbin_burr]
|
36
36
|
end
|
37
37
|
|
38
38
|
def self.committee_methods
|
@@ -45,7 +45,7 @@ module Statement
|
|
45
45
|
document_query(page=1), document_query(page=2), swalwell(page=1), crapo, boxer, grassley(page=0),
|
46
46
|
vitter(year=year), inhofe(year=year), fischer, clark(year=year), edwards, culberson_chabot_grisham(page=1), barton, welch,
|
47
47
|
sessions(year=year), gabbard, costa, farr, olson, schumer, lamborn(limit=10), walden, bennie_thompson, speier,
|
48
|
-
poe(year=year, month=0), bennet(page=1), shaheen(page=1), perlmutter, keating, drupal, jenkins].flatten
|
48
|
+
poe(year=year, month=0), bennet(page=1), shaheen(page=1), perlmutter, keating, drupal, jenkins, durbin_burr(page=1)].flatten
|
49
49
|
results = results.compact
|
50
50
|
Utils.remove_generic_urls!(results)
|
51
51
|
end
|
@@ -491,6 +491,20 @@ module Statement
|
|
491
491
|
results
|
492
492
|
end
|
493
493
|
|
494
|
+
def self.durbin_burr(page=1)
|
495
|
+
results = []
|
496
|
+
domains = ["www.durbin.senate.gov", "www.burr.senate.gov"]
|
497
|
+
domains.each do |domain|
|
498
|
+
url = "http://#{domain}/newsroom/press-releases?PageNum_rs=#{page}&"
|
499
|
+
doc = open_html(url)
|
500
|
+
return if doc.nil?
|
501
|
+
doc.xpath("//div[@id='press']//h2").each do |row|
|
502
|
+
results << { :source => url, :url => "http://#{domain}"+row.children[0]['href'], :title => row.children[0].text.strip, :date => Date.parse(row.previous.previous.text.gsub(".","/")), :domain => domain}
|
503
|
+
end
|
504
|
+
end
|
505
|
+
results
|
506
|
+
end
|
507
|
+
|
494
508
|
def self.inhofe(year=Date.today.year)
|
495
509
|
results = []
|
496
510
|
url = "http://www.inhofe.senate.gov/newsroom/press-releases?year=#{year}"
|
@@ -651,7 +665,25 @@ module Statement
|
|
651
665
|
|
652
666
|
def self.document_query(page=1)
|
653
667
|
results = []
|
654
|
-
domains = [
|
668
|
+
domains = [
|
669
|
+
{"thornberry.house.gov" => 1776},
|
670
|
+
{"wenstrup.house.gov" => 2491},
|
671
|
+
{"clawson.house.gov" => 2641},
|
672
|
+
{"palazzo.house.gov" => 2519},
|
673
|
+
{"roe.house.gov" => 1532},
|
674
|
+
{"perry.house.gov" => 2608},
|
675
|
+
{"rodneydavis.house.gov" => 2427},
|
676
|
+
{"kevinbrady.house.gov" => 2657},
|
677
|
+
{"loudermilk.house.gov" => 27},
|
678
|
+
{"babin.house.gov" => 27},
|
679
|
+
{"bridenstine.house.gov" => 2412},
|
680
|
+
{"allen.house.gov" => 27},
|
681
|
+
{"davidscott.house.gov" => 377},
|
682
|
+
{"buddycarter.house.gov" => 27},
|
683
|
+
{"grothman.house.gov" => 27},
|
684
|
+
{"beyer.house.gov" => 27},
|
685
|
+
{"kathleenrice.house.gov" => 27}
|
686
|
+
]
|
655
687
|
domains.each do |domain|
|
656
688
|
doc = open_html("http://"+domain.keys.first+"/news/documentquery.aspx?DocumentTypeID=#{domain.values.first}&Page=#{page}")
|
657
689
|
return if doc.nil?
|
@@ -827,7 +859,9 @@ module Statement
|
|
827
859
|
"http://sarbanes.house.gov/media-center/press-releases",
|
828
860
|
"http://wilson.house.gov/media-center/press-releases",
|
829
861
|
"https://bilirakis.house.gov/press-releases",
|
830
|
-
"http://quigley.house.gov/media-center/press-releases"
|
862
|
+
"http://quigley.house.gov/media-center/press-releases",
|
863
|
+
"https://denham.house.gov/media-center/press-releases",
|
864
|
+
"https://sewell.house.gov/media-center/press-releases"
|
831
865
|
]
|
832
866
|
end
|
833
867
|
|
data/lib/statement/version.rb
CHANGED