statement 1.0.9 → 1.0.10
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -1
- data/lib/statement/scraper.rb +7 -17
- data/lib/statement/version.rb +1 -1
- data/spec/statement_spec.rb +12 -16
- data/statement.gemspec +3 -3
- metadata +7 -7
data/Rakefile
CHANGED
data/lib/statement/scraper.rb
CHANGED
@@ -29,7 +29,7 @@ module Statement
|
|
29
29
|
end
|
30
30
|
|
31
31
|
def self.member_methods
|
32
|
-
[:capuano, :cold_fusion, :conaway, :susandavis, :
|
32
|
+
[:capuano, :cold_fusion, :conaway, :susandavis, :freshman_senators, :klobuchar, :lujan, :billnelson, :lautenberg, :crapo, :coburn, :boxer, :mccain, :vitter, :donnelly, :inhofe, :levin, :reid, :palazzo, :document_query, :farenthold, :swalwell, :fischer]
|
33
33
|
end
|
34
34
|
|
35
35
|
def self.committee_methods
|
@@ -38,7 +38,7 @@ module Statement
|
|
38
38
|
|
39
39
|
def self.member_scrapers
|
40
40
|
year = Date.today.year
|
41
|
-
results = [freshman_senators, capuano, cold_fusion(year, 0), conaway, susandavis,
|
41
|
+
results = [freshman_senators, capuano, cold_fusion(year, 0), conaway, susandavis, klobuchar, lujan, palazzo(page=1), billnelson(year=year),
|
42
42
|
document_query(page=1), document_query(page=2), farenthold(year), swalwell(page=1), donnelly(year=year), crapo, coburn, boxer(start=1), mccain(year=year),
|
43
43
|
vitter(year=year), inhofe(year=year), reid, fischer].flatten
|
44
44
|
Utils.remove_generic_urls!(results)
|
@@ -305,20 +305,9 @@ module Statement
|
|
305
305
|
results
|
306
306
|
end
|
307
307
|
|
308
|
-
def self.faleomavaega
|
309
|
-
results = []
|
310
|
-
base_url = "http://www.house.gov/faleomavaega/news-press.shtml"
|
311
|
-
doc = open_html(base_url)
|
312
|
-
return if doc.nil?
|
313
|
-
doc.xpath("//li[@type='disc']").each do |row|
|
314
|
-
results << { :source => base_url, :url => "http://www.house.gov/" + row.children[0]['href'], :title => row.children[0].text.gsub(/[u201cu201d]/, '').split('Washington, D.C.').last, :date => Date.parse(row.children[1].text), :domain => "house.gov/faleomavaega" }
|
315
|
-
end
|
316
|
-
results
|
317
|
-
end
|
318
|
-
|
319
308
|
def self.freshman_senators
|
320
309
|
results = []
|
321
|
-
['
|
310
|
+
['murphy','cruz'].each do |senator|
|
322
311
|
base_url = "http://www.#{senator}.senate.gov/"
|
323
312
|
doc = open_html(base_url+'press.cfm?maxrows=200&startrow=1&&type=1')
|
324
313
|
return if doc.nil?
|
@@ -334,11 +323,12 @@ module Statement
|
|
334
323
|
results = []
|
335
324
|
base_url = "http://www.klobuchar.senate.gov/"
|
336
325
|
[2012,2013].each do |year|
|
337
|
-
year_url = base_url + "
|
326
|
+
year_url = base_url + "public/news-releases?MonthDisplay=0&YearDisplay=#{year}"
|
338
327
|
doc = open_html(year_url)
|
339
328
|
return if doc.nil?
|
340
|
-
doc.xpath("//
|
341
|
-
|
329
|
+
doc.xpath("//tr")[1..-1].each do |row|
|
330
|
+
next if row.children[2].children[0].text.strip == 'Title'
|
331
|
+
results << { :source => year_url, :url => row.children[2].children[0]['href'], :title => row.children[2].children[0].text.strip, :date => Date.strptime(row.children[0].text, "%m/%d/%y"), :domain => "klobuchar.senate.gov" }
|
342
332
|
end
|
343
333
|
end
|
344
334
|
results
|
data/lib/statement/version.rb
CHANGED
data/spec/statement_spec.rb
CHANGED
@@ -6,28 +6,28 @@ include Statement
|
|
6
6
|
describe Statement do
|
7
7
|
it "parses an rss feed" do
|
8
8
|
@feed_url = "http://ruiz.house.gov/rss.xml"
|
9
|
-
stub_request(:any, @feed_url).to_return(:body => File.new(File.join(File.dirname(__FILE__), "ruiz_rss.xml")), :status => 200)
|
9
|
+
WebMock.stub_request(:any, @feed_url).to_return(:body => File.new(File.join(File.dirname(__FILE__), "ruiz_rss.xml")), :status => 200)
|
10
10
|
@results = Feed.from_rss(@feed_url)
|
11
11
|
@results.first[:domain].must_equal "ruiz.house.gov"
|
12
12
|
end
|
13
13
|
|
14
14
|
it "parses House GOP press release page" do
|
15
15
|
@feed_url = "http://www.gop.gov/republicans/news?offset=03/29/13"
|
16
|
-
stub_request(:any, @feed_url).to_return(:body => File.new(File.join(File.dirname(__FILE__), "house_gop_releases.html")), :status => 200)
|
16
|
+
WebMock.stub_request(:any, @feed_url).to_return(:body => File.new(File.join(File.dirname(__FILE__), "house_gop_releases.html")), :status => 200)
|
17
17
|
@results = Scraper.house_gop(@feed_url)
|
18
18
|
@results.first[:source].must_equal @feed_url
|
19
19
|
end
|
20
20
|
|
21
21
|
it "does not attempt to parse dates when none are present" do
|
22
22
|
@feed_url = "http://culberson.house.gov/feed/rss/"
|
23
|
-
stub_request(:any, @feed_url).to_return(:body => File.new(File.join(File.dirname(__FILE__), "culberson_rss.xml")), :status => 200)
|
23
|
+
WebMock.stub_request(:any, @feed_url).to_return(:body => File.new(File.join(File.dirname(__FILE__), "culberson_rss.xml")), :status => 200)
|
24
24
|
@results = Feed.from_rss(@feed_url)
|
25
25
|
@results.first[:date].must_equal nil
|
26
26
|
end
|
27
27
|
|
28
28
|
it "parses invalid RSS" do
|
29
29
|
@feed_url = "http://www.burr.senate.gov/public/index.cfm?FuseAction=RSS.Feed"
|
30
|
-
stub_request(:any, @feed_url).to_return(:body => File.new(File.join(File.dirname(__FILE__), "richard_burr.xml")), :status => 200)
|
30
|
+
WebMock.stub_request(:any, @feed_url).to_return(:body => File.new(File.join(File.dirname(__FILE__), "richard_burr.xml")), :status => 200)
|
31
31
|
@results = Feed.from_rss(@feed_url)
|
32
32
|
@results.first[:url].must_equal "http://www.burr.senate.gov/public/index.cfm?FuseAction=PressOffice.PressReleases&Type=Press Release&ContentRecord_id=65dbea38-d64c-6208-ef8f-2b000e899b3a"
|
33
33
|
@results.first[:date].to_s.must_equal "2013-05-02"
|
@@ -35,33 +35,29 @@ describe Statement do
|
|
35
35
|
|
36
36
|
it "handles relative URLs" do
|
37
37
|
@feed_url = "http://www.gop.gov/republicans/news?offset=03/29/13"
|
38
|
-
stub_request(:any, @feed_url).to_return(:body => File.new(File.join(File.dirname(__FILE__), "house_gop_releases.html")), :status => 200)
|
38
|
+
WebMock.stub_request(:any, @feed_url).to_return(:body => File.new(File.join(File.dirname(__FILE__), "house_gop_releases.html")), :status => 200)
|
39
39
|
@results = Scraper.house_gop(@feed_url)
|
40
40
|
@results.last[:url].must_equal "http://www.gop.gov/republicans/other/relative_url_test.html"
|
41
41
|
end
|
42
42
|
|
43
43
|
it "scrapes a senate cold fusion page" do
|
44
44
|
@url = "http://www.billnelson.senate.gov/news/media.cfm?year=2013"
|
45
|
-
stub_request(:any, @url).to_return(:body => File.new(File.join(File.dirname(__FILE__), 'bill_nelson_press.html')), :status => 200)
|
45
|
+
WebMock.stub_request(:any, @url).to_return(:body => File.new(File.join(File.dirname(__FILE__), 'bill_nelson_press.html')), :status => 200)
|
46
46
|
@results = Scraper.billnelson(year=2013)
|
47
47
|
@results.last[:url].must_equal "http://www.billnelson.senate.gov/news/details.cfm?id=338190&"
|
48
48
|
end
|
49
49
|
|
50
|
-
it "scrapes vitter
|
50
|
+
it "scrapes vitter pages for 2013" do
|
51
51
|
@vitter = "http://www.vitter.senate.gov/newsroom/press?year=2013"
|
52
|
-
@
|
53
|
-
|
54
|
-
|
55
|
-
@results = Scraper.vitter_cowan(year=2013)
|
56
|
-
@results.map{|r| r[:domain]}.uniq.must_equal ["www.vitter.senate.gov", "www.cowan.senate.gov"]
|
52
|
+
WebMock.stub_request(:any, @vitter).to_return(:body => File.new(File.join(File.dirname(__FILE__), 'vitter_press.html')), :status => 200)
|
53
|
+
@results = Scraper.vitter(year=2013)
|
54
|
+
@results.map{|r| r[:domain]}.uniq.must_equal ["www.vitter.senate.gov"]
|
57
55
|
end
|
58
56
|
|
59
57
|
it "only scrapes vitter page for 2012" do
|
60
58
|
@vitter = "http://www.vitter.senate.gov/newsroom/press?year=2012"
|
61
|
-
@
|
62
|
-
|
63
|
-
stub_request(:any, @cowan).to_return(:body => File.new(File.join(File.dirname(__FILE__), 'cowan_press.html')), :status => 200)
|
64
|
-
@results = Scraper.vitter_cowan(year=2012)
|
59
|
+
WebMock.stub_request(:any, @vitter).to_return(:body => File.new(File.join(File.dirname(__FILE__), 'vitter_press.html')), :status => 200)
|
60
|
+
@results = Scraper.vitter(year=2012)
|
65
61
|
@results.map{|r| r[:domain]}.uniq.must_equal ["www.vitter.senate.gov"]
|
66
62
|
end
|
67
63
|
|
data/statement.gemspec
CHANGED
@@ -19,9 +19,9 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
21
|
spec.add_development_dependency "bundler", "~> 1.3"
|
22
|
-
spec.
|
23
|
-
spec.
|
24
|
-
spec.
|
22
|
+
spec.add_dependency "rake"
|
23
|
+
spec.add_dependency 'webmock'
|
24
|
+
spec.add_dependency 'minitest'
|
25
25
|
spec.add_dependency "american_date"
|
26
26
|
if RUBY_VERSION > "1.8.7"
|
27
27
|
spec.add_dependency "nokogiri"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: statement
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.10
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-08-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -35,7 +35,7 @@ dependencies:
|
|
35
35
|
- - ! '>='
|
36
36
|
- !ruby/object:Gem::Version
|
37
37
|
version: '0'
|
38
|
-
type: :
|
38
|
+
type: :runtime
|
39
39
|
prerelease: false
|
40
40
|
version_requirements: !ruby/object:Gem::Requirement
|
41
41
|
none: false
|
@@ -51,7 +51,7 @@ dependencies:
|
|
51
51
|
- - ! '>='
|
52
52
|
- !ruby/object:Gem::Version
|
53
53
|
version: '0'
|
54
|
-
type: :
|
54
|
+
type: :runtime
|
55
55
|
prerelease: false
|
56
56
|
version_requirements: !ruby/object:Gem::Requirement
|
57
57
|
none: false
|
@@ -67,7 +67,7 @@ dependencies:
|
|
67
67
|
- - ! '>='
|
68
68
|
- !ruby/object:Gem::Version
|
69
69
|
version: '0'
|
70
|
-
type: :
|
70
|
+
type: :runtime
|
71
71
|
prerelease: false
|
72
72
|
version_requirements: !ruby/object:Gem::Requirement
|
73
73
|
none: false
|
@@ -214,7 +214,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
214
214
|
version: '0'
|
215
215
|
segments:
|
216
216
|
- 0
|
217
|
-
hash: -
|
217
|
+
hash: -2139780170107598593
|
218
218
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
219
219
|
none: false
|
220
220
|
requirements:
|
@@ -223,7 +223,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
223
223
|
version: '0'
|
224
224
|
segments:
|
225
225
|
- 0
|
226
|
-
hash: -
|
226
|
+
hash: -2139780170107598593
|
227
227
|
requirements: []
|
228
228
|
rubyforge_project:
|
229
229
|
rubygems_version: 1.8.25
|