statement 1.9.1 → 1.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/statement/feed.rb +5 -5
- data/lib/statement/scraper.rb +89 -16
- data/lib/statement/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0be73a63b6661e11498742d7877c36ed2da5e182
|
4
|
+
data.tar.gz: 9368bd6373159d94b9463e57e657616c440d9c01
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4ecf8b06def3327c5eafb8987b3037c793a08cf1ff85a54ceaf8823c3dce1a814289fecda74d359b49da75197ff57e104b60db42899cd2f2d7307c25bd930f0c
|
7
|
+
data.tar.gz: d70e2b77e900bbbd3122cce1665bc9d92293063c87bd4ce39b7f82b732e672152e8f4f1750160790942142f39cd4773beb8b8c082baf4714647fd4152c63aebf
|
data/lib/statement/feed.rb
CHANGED
@@ -7,7 +7,7 @@ require 'typhoeus'
|
|
7
7
|
|
8
8
|
module Statement
|
9
9
|
class Feed
|
10
|
-
|
10
|
+
|
11
11
|
def self.batch(urls)
|
12
12
|
results = []
|
13
13
|
failures = []
|
@@ -27,7 +27,7 @@ module Statement
|
|
27
27
|
hydra.run
|
28
28
|
[results.flatten, failures]
|
29
29
|
end
|
30
|
-
|
30
|
+
|
31
31
|
def self.open_rss(url)
|
32
32
|
begin
|
33
33
|
Nokogiri::XML(open(url))
|
@@ -35,10 +35,10 @@ module Statement
|
|
35
35
|
nil
|
36
36
|
end
|
37
37
|
end
|
38
|
-
|
38
|
+
|
39
39
|
def self.date_from_rss_item(link)
|
40
40
|
if !link.xpath('pubDate').text.empty?
|
41
|
-
|
41
|
+
Date.parse(link.xpath('pubDate').text)
|
42
42
|
elsif !link.xpath('pubdate').empty?
|
43
43
|
Date.parse(link.xpath('pubdate').text)
|
44
44
|
elsif link.xpath('link').text.include?("mikulski.senate.gov") and link.xpath('link').text.include?("-2014")
|
@@ -53,7 +53,7 @@ module Statement
|
|
53
53
|
return unless doc
|
54
54
|
parse_rss(doc, url)
|
55
55
|
end
|
56
|
-
|
56
|
+
|
57
57
|
def self.parse_rss(doc, url)
|
58
58
|
links = doc.xpath('//item')
|
59
59
|
return if links.empty?
|
data/lib/statement/scraper.rb
CHANGED
@@ -29,7 +29,10 @@ module Statement
|
|
29
29
|
end
|
30
30
|
|
31
31
|
def self.member_methods
|
32
|
-
[:crenshaw, :capuano, :cold_fusion, :conaway, :chabot, :susandavis, :freshman_senators, :klobuchar, :billnelson, :crapo, :boxer,
|
32
|
+
[:crenshaw, :capuano, :cold_fusion, :conaway, :chabot, :susandavis, :freshman_senators, :klobuchar, :billnelson, :crapo, :boxer,
|
33
|
+
:vitter, :inhofe, :palazzo, :roe, :document_query, :swalwell, :fischer, :clark, :edwards, :culberson_chabot_grisham, :barton,
|
34
|
+
:sherman_mccaul, :welch, :sessions, :gabbard, :ellison, :costa, :farr, :mcclintock, :mcnerney, :olson, :schumer, :lamborn, :walden,
|
35
|
+
:bennie_thompson, :speier, :poe]
|
33
36
|
end
|
34
37
|
|
35
38
|
def self.committee_methods
|
@@ -40,8 +43,9 @@ module Statement
|
|
40
43
|
year = Date.today.year
|
41
44
|
results = [crenshaw, capuano, cold_fusion(year, nil), conaway, chabot, susandavis, klobuchar(year), palazzo(page=1), roe(page=1), billnelson(year=year),
|
42
45
|
document_query(page=1), document_query(page=2), swalwell(page=1), crapo, coburn, boxer(start=1),
|
43
|
-
vitter(year=year), inhofe(year=
|
44
|
-
sessions(year=year), gabbard, ellison(page=0), costa, farr, olson, mcnerney
|
46
|
+
vitter(year=year), inhofe(year=year), fischer, clark(year=year), edwards, culberson_chabot_grisham(page=1), barton, sherman_mccaul, welch,
|
47
|
+
sessions(year=year), gabbard, ellison(page=0), costa, farr, olson, mcnerney, schumer, lamborn(limit=10), walden, bennie_thompson, speier,
|
48
|
+
poe(year=year, month=0)].flatten
|
45
49
|
results = results.compact
|
46
50
|
Utils.remove_generic_urls!(results)
|
47
51
|
end
|
@@ -51,7 +55,8 @@ module Statement
|
|
51
55
|
document_query(page=4), coburn(year=2012), coburn(year=2011), coburn(year=2010), boxer(start=11), boxer(start=21),
|
52
56
|
boxer(start=31), boxer(start=41), vitter(year=2012), vitter(year=2011), swalwell(page=2), swalwell(page=3), clark(year=2013), culberson_chabot_grisham(page=2),
|
53
57
|
sherman_mccaul(page=1), sessions(year=2013), pryor(page=1), ellison(page=1), ellison(page=2), ellison(page=3), farr(year=2013), farr(year=2012), farr(year=2011),
|
54
|
-
mcnerney(page=2), mcnerney(page=3), mcnerney(page=4), mcnerney(page=5), mcnerney(page=6), olson(year=2013)
|
58
|
+
mcnerney(page=2), mcnerney(page=3), mcnerney(page=4), mcnerney(page=5), mcnerney(page=6), olson(year=2013), schumer(page=2), schumer(page=3), poe(year=2015, month=2),
|
59
|
+
poe(year=2015, month=1)].flatten
|
55
60
|
Utils.remove_generic_urls!(results)
|
56
61
|
end
|
57
62
|
|
@@ -334,18 +339,6 @@ module Statement
|
|
334
339
|
results
|
335
340
|
end
|
336
341
|
|
337
|
-
def self.susandavis
|
338
|
-
results = []
|
339
|
-
base_url = "http://www.house.gov/susandavis/"
|
340
|
-
doc = open_html(base_url+'news.shtml')
|
341
|
-
return if doc.nil?
|
342
|
-
doc.search("ul")[6].children.each do |row|
|
343
|
-
next if row.text.strip == ''
|
344
|
-
results << { :source => base_url+'news.shtml', :url => base_url + row.children[1]['href'], :title => row.children[1].text.split.join(' '), :date => Date.parse(row.children.first.text), :domain => "house.gov/susandavis" }
|
345
|
-
end
|
346
|
-
results
|
347
|
-
end
|
348
|
-
|
349
342
|
def self.klobuchar(year)
|
350
343
|
results = []
|
351
344
|
base_url = "http://www.klobuchar.senate.gov/"
|
@@ -361,6 +354,20 @@ module Statement
|
|
361
354
|
results
|
362
355
|
end
|
363
356
|
|
357
|
+
def self.poe(year, month=0)
|
358
|
+
results = []
|
359
|
+
base_url = "http://poe.house.gov"
|
360
|
+
month_url = base_url + "/press-releases?MonthDisplay=#{month}&YearDisplay=#{year}"
|
361
|
+
doc = open_html(year_url)
|
362
|
+
return if doc.nil?
|
363
|
+
doc.xpath("//tr")[1..-1].each do |row|
|
364
|
+
next if row.children[3].children[0].text.strip == 'Title'
|
365
|
+
results << { :source => month_url, :url => base_url + row.children[3].children[0]['href'], :title => row.children[3].children[0].text.strip, :date => Date.strptime(row.children[1].text, "%m/%d/%y"), :domain => "poe.house.gov" }
|
366
|
+
end
|
367
|
+
end
|
368
|
+
results
|
369
|
+
end
|
370
|
+
|
364
371
|
def self.lujan
|
365
372
|
results = []
|
366
373
|
base_url = 'http://lujan.house.gov/'
|
@@ -709,6 +716,72 @@ module Statement
|
|
709
716
|
results.flatten
|
710
717
|
end
|
711
718
|
|
719
|
+
def self.schumer(page=1)
|
720
|
+
results = []
|
721
|
+
domain = 'www.schumer.senate.gov'
|
722
|
+
url = "http://www.schumer.senate.gov/newsroom/press-releases/table?PageNum_rs=#{page}"
|
723
|
+
doc = open_html(url)
|
724
|
+
return if doc.nil?
|
725
|
+
rows = (doc/:table/:tr).select{|r| !r.children[3].nil?}
|
726
|
+
rows.each do |row|
|
727
|
+
results << {:source => url, :url => row.children[3].children[1]['href'].strip, :title => row.children[3].text.strip, :date => Date.parse(row.children[1].text.strip), :domain => domain }
|
728
|
+
end
|
729
|
+
results
|
730
|
+
end
|
731
|
+
|
732
|
+
def self.lamborn(limit=nil)
|
733
|
+
results = []
|
734
|
+
domain = 'lamborn.house.gov'
|
735
|
+
url = "http://lamborn.house.gov/2015-press-releases/"
|
736
|
+
doc = open_html(url)
|
737
|
+
return if doc.nil?
|
738
|
+
links = (doc/:h3).map{|h| { "http://lamborn.house.gov"+h.children[1]['href'] => h.text.strip} }
|
739
|
+
links = links.first(limit) if limit
|
740
|
+
links.each do |link|
|
741
|
+
page = open_html(link.keys.first)
|
742
|
+
print_path = page.search("a").detect{|a| a['onclick'] && a['onclick'].include?('popup')}['onclick'].split("'")[1]
|
743
|
+
print_page = open_html("http://lamborn.house.gov"+print_path)
|
744
|
+
results << {:source => url, :url => link.keys.first, :title => link.values.first, :date => Date.parse(print_page.xpath('//*[@class="PopupNewsDetailsDate"]').text), :domain => domain }
|
745
|
+
end
|
746
|
+
results
|
747
|
+
end
|
748
|
+
|
749
|
+
def self.walden
|
750
|
+
results = []
|
751
|
+
domain = 'walden.house.gov'
|
752
|
+
url = "http://walden.house.gov/s2015/"
|
753
|
+
doc = open_html(url)
|
754
|
+
return if doc.nil?
|
755
|
+
doc.xpath('//*[@id="centerbox"]/div[1]/ul/li').each do |row|
|
756
|
+
results << {:source => url, :url => 'http://walden.house.gov' + row.children[3].children[1]['href'], :title => row.children[3].text.strip, :date => Date.parse(row.children[5].text), :domain => domain }
|
757
|
+
end
|
758
|
+
results
|
759
|
+
end
|
760
|
+
|
761
|
+
def self.bennie_thompson
|
762
|
+
results = []
|
763
|
+
domain = "benniethompson.house.gov"
|
764
|
+
url = "http://benniethompson.house.gov/index.php?option=com_content&view=category&id=41&Itemid=148"
|
765
|
+
doc = open_html(url)
|
766
|
+
return if doc.nil?
|
767
|
+
doc.xpath('//*[@id="adminForm"]/table/tbody/tr').each do |row|
|
768
|
+
results << {:source => url, :url => 'http://benniethompson.house.gov' + row.children[1].children[1]['href'], :title => row.children[1].children[1].text.strip, :date => Date.parse(row.children[3].text.strip), :domain => domain }
|
769
|
+
end
|
770
|
+
results
|
771
|
+
end
|
772
|
+
|
773
|
+
def self.speier
|
774
|
+
results = []
|
775
|
+
domain = "speier.house.gov"
|
776
|
+
url = "http://speier.house.gov/index.php?option=com_content&view=category&id=20&Itemid=14"
|
777
|
+
doc = open_html(url)
|
778
|
+
return if doc.nil?
|
779
|
+
doc.xpath('//*[@id="adminForm"]/table/tbody/tr').each do |row|
|
780
|
+
results << {:source => url, :url => 'http://speier.house.gov' + row.children[1].children[1]['href'], :title => row.children[1].children[1].text.strip, :date => Date.parse(row.children[3].text.strip), :domain => domain }
|
781
|
+
end
|
782
|
+
results
|
783
|
+
end
|
784
|
+
|
712
785
|
def self.backfill_bilirakis
|
713
786
|
results = []
|
714
787
|
domain = 'bilirakis.house.gov'
|
data/lib/statement/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: statement
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.9.
|
4
|
+
version: 1.9.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Derek Willis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-03-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|