statement 1.0.9 → 1.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -1
- data/lib/statement/scraper.rb +7 -17
- data/lib/statement/version.rb +1 -1
- data/spec/statement_spec.rb +12 -16
- data/statement.gemspec +3 -3
- metadata +7 -7
data/Rakefile
CHANGED
data/lib/statement/scraper.rb
CHANGED
@@ -29,7 +29,7 @@ module Statement
|
|
29
29
|
end
|
30
30
|
|
31
31
|
def self.member_methods
|
32
|
-
[:capuano, :cold_fusion, :conaway, :susandavis, :
|
32
|
+
[:capuano, :cold_fusion, :conaway, :susandavis, :freshman_senators, :klobuchar, :lujan, :billnelson, :lautenberg, :crapo, :coburn, :boxer, :mccain, :vitter, :donnelly, :inhofe, :levin, :reid, :palazzo, :document_query, :farenthold, :swalwell, :fischer]
|
33
33
|
end
|
34
34
|
|
35
35
|
def self.committee_methods
|
@@ -38,7 +38,7 @@ module Statement
|
|
38
38
|
|
39
39
|
def self.member_scrapers
|
40
40
|
year = Date.today.year
|
41
|
-
results = [freshman_senators, capuano, cold_fusion(year, 0), conaway, susandavis,
|
41
|
+
results = [freshman_senators, capuano, cold_fusion(year, 0), conaway, susandavis, klobuchar, lujan, palazzo(page=1), billnelson(year=year),
|
42
42
|
document_query(page=1), document_query(page=2), farenthold(year), swalwell(page=1), donnelly(year=year), crapo, coburn, boxer(start=1), mccain(year=year),
|
43
43
|
vitter(year=year), inhofe(year=year), reid, fischer].flatten
|
44
44
|
Utils.remove_generic_urls!(results)
|
@@ -305,20 +305,9 @@ module Statement
|
|
305
305
|
results
|
306
306
|
end
|
307
307
|
|
308
|
-
def self.faleomavaega
|
309
|
-
results = []
|
310
|
-
base_url = "http://www.house.gov/faleomavaega/news-press.shtml"
|
311
|
-
doc = open_html(base_url)
|
312
|
-
return if doc.nil?
|
313
|
-
doc.xpath("//li[@type='disc']").each do |row|
|
314
|
-
results << { :source => base_url, :url => "http://www.house.gov/" + row.children[0]['href'], :title => row.children[0].text.gsub(/[u201cu201d]/, '').split('Washington, D.C.').last, :date => Date.parse(row.children[1].text), :domain => "house.gov/faleomavaega" }
|
315
|
-
end
|
316
|
-
results
|
317
|
-
end
|
318
|
-
|
319
308
|
def self.freshman_senators
|
320
309
|
results = []
|
321
|
-
['
|
310
|
+
['murphy','cruz'].each do |senator|
|
322
311
|
base_url = "http://www.#{senator}.senate.gov/"
|
323
312
|
doc = open_html(base_url+'press.cfm?maxrows=200&startrow=1&&type=1')
|
324
313
|
return if doc.nil?
|
@@ -334,11 +323,12 @@ module Statement
|
|
334
323
|
results = []
|
335
324
|
base_url = "http://www.klobuchar.senate.gov/"
|
336
325
|
[2012,2013].each do |year|
|
337
|
-
year_url = base_url + "
|
326
|
+
year_url = base_url + "public/news-releases?MonthDisplay=0&YearDisplay=#{year}"
|
338
327
|
doc = open_html(year_url)
|
339
328
|
return if doc.nil?
|
340
|
-
doc.xpath("//
|
341
|
-
|
329
|
+
doc.xpath("//tr")[1..-1].each do |row|
|
330
|
+
next if row.children[2].children[0].text.strip == 'Title'
|
331
|
+
results << { :source => year_url, :url => row.children[2].children[0]['href'], :title => row.children[2].children[0].text.strip, :date => Date.strptime(row.children[0].text, "%m/%d/%y"), :domain => "klobuchar.senate.gov" }
|
342
332
|
end
|
343
333
|
end
|
344
334
|
results
|
data/lib/statement/version.rb
CHANGED
data/spec/statement_spec.rb
CHANGED
@@ -6,28 +6,28 @@ include Statement
|
|
6
6
|
describe Statement do
|
7
7
|
it "parses an rss feed" do
|
8
8
|
@feed_url = "http://ruiz.house.gov/rss.xml"
|
9
|
-
stub_request(:any, @feed_url).to_return(:body => File.new(File.join(File.dirname(__FILE__), "ruiz_rss.xml")), :status => 200)
|
9
|
+
WebMock.stub_request(:any, @feed_url).to_return(:body => File.new(File.join(File.dirname(__FILE__), "ruiz_rss.xml")), :status => 200)
|
10
10
|
@results = Feed.from_rss(@feed_url)
|
11
11
|
@results.first[:domain].must_equal "ruiz.house.gov"
|
12
12
|
end
|
13
13
|
|
14
14
|
it "parses House GOP press release page" do
|
15
15
|
@feed_url = "http://www.gop.gov/republicans/news?offset=03/29/13"
|
16
|
-
stub_request(:any, @feed_url).to_return(:body => File.new(File.join(File.dirname(__FILE__), "house_gop_releases.html")), :status => 200)
|
16
|
+
WebMock.stub_request(:any, @feed_url).to_return(:body => File.new(File.join(File.dirname(__FILE__), "house_gop_releases.html")), :status => 200)
|
17
17
|
@results = Scraper.house_gop(@feed_url)
|
18
18
|
@results.first[:source].must_equal @feed_url
|
19
19
|
end
|
20
20
|
|
21
21
|
it "does not attempt to parse dates when none are present" do
|
22
22
|
@feed_url = "http://culberson.house.gov/feed/rss/"
|
23
|
-
stub_request(:any, @feed_url).to_return(:body => File.new(File.join(File.dirname(__FILE__), "culberson_rss.xml")), :status => 200)
|
23
|
+
WebMock.stub_request(:any, @feed_url).to_return(:body => File.new(File.join(File.dirname(__FILE__), "culberson_rss.xml")), :status => 200)
|
24
24
|
@results = Feed.from_rss(@feed_url)
|
25
25
|
@results.first[:date].must_equal nil
|
26
26
|
end
|
27
27
|
|
28
28
|
it "parses invalid RSS" do
|
29
29
|
@feed_url = "http://www.burr.senate.gov/public/index.cfm?FuseAction=RSS.Feed"
|
30
|
-
stub_request(:any, @feed_url).to_return(:body => File.new(File.join(File.dirname(__FILE__), "richard_burr.xml")), :status => 200)
|
30
|
+
WebMock.stub_request(:any, @feed_url).to_return(:body => File.new(File.join(File.dirname(__FILE__), "richard_burr.xml")), :status => 200)
|
31
31
|
@results = Feed.from_rss(@feed_url)
|
32
32
|
@results.first[:url].must_equal "http://www.burr.senate.gov/public/index.cfm?FuseAction=PressOffice.PressReleases&Type=Press Release&ContentRecord_id=65dbea38-d64c-6208-ef8f-2b000e899b3a"
|
33
33
|
@results.first[:date].to_s.must_equal "2013-05-02"
|
@@ -35,33 +35,29 @@ describe Statement do
|
|
35
35
|
|
36
36
|
it "handles relative URLs" do
|
37
37
|
@feed_url = "http://www.gop.gov/republicans/news?offset=03/29/13"
|
38
|
-
stub_request(:any, @feed_url).to_return(:body => File.new(File.join(File.dirname(__FILE__), "house_gop_releases.html")), :status => 200)
|
38
|
+
WebMock.stub_request(:any, @feed_url).to_return(:body => File.new(File.join(File.dirname(__FILE__), "house_gop_releases.html")), :status => 200)
|
39
39
|
@results = Scraper.house_gop(@feed_url)
|
40
40
|
@results.last[:url].must_equal "http://www.gop.gov/republicans/other/relative_url_test.html"
|
41
41
|
end
|
42
42
|
|
43
43
|
it "scrapes a senate cold fusion page" do
|
44
44
|
@url = "http://www.billnelson.senate.gov/news/media.cfm?year=2013"
|
45
|
-
stub_request(:any, @url).to_return(:body => File.new(File.join(File.dirname(__FILE__), 'bill_nelson_press.html')), :status => 200)
|
45
|
+
WebMock.stub_request(:any, @url).to_return(:body => File.new(File.join(File.dirname(__FILE__), 'bill_nelson_press.html')), :status => 200)
|
46
46
|
@results = Scraper.billnelson(year=2013)
|
47
47
|
@results.last[:url].must_equal "http://www.billnelson.senate.gov/news/details.cfm?id=338190&"
|
48
48
|
end
|
49
49
|
|
50
|
-
it "scrapes vitter
|
50
|
+
it "scrapes vitter pages for 2013" do
|
51
51
|
@vitter = "http://www.vitter.senate.gov/newsroom/press?year=2013"
|
52
|
-
@
|
53
|
-
|
54
|
-
|
55
|
-
@results = Scraper.vitter_cowan(year=2013)
|
56
|
-
@results.map{|r| r[:domain]}.uniq.must_equal ["www.vitter.senate.gov", "www.cowan.senate.gov"]
|
52
|
+
WebMock.stub_request(:any, @vitter).to_return(:body => File.new(File.join(File.dirname(__FILE__), 'vitter_press.html')), :status => 200)
|
53
|
+
@results = Scraper.vitter(year=2013)
|
54
|
+
@results.map{|r| r[:domain]}.uniq.must_equal ["www.vitter.senate.gov"]
|
57
55
|
end
|
58
56
|
|
59
57
|
it "only scrapes vitter page for 2012" do
|
60
58
|
@vitter = "http://www.vitter.senate.gov/newsroom/press?year=2012"
|
61
|
-
@
|
62
|
-
|
63
|
-
stub_request(:any, @cowan).to_return(:body => File.new(File.join(File.dirname(__FILE__), 'cowan_press.html')), :status => 200)
|
64
|
-
@results = Scraper.vitter_cowan(year=2012)
|
59
|
+
WebMock.stub_request(:any, @vitter).to_return(:body => File.new(File.join(File.dirname(__FILE__), 'vitter_press.html')), :status => 200)
|
60
|
+
@results = Scraper.vitter(year=2012)
|
65
61
|
@results.map{|r| r[:domain]}.uniq.must_equal ["www.vitter.senate.gov"]
|
66
62
|
end
|
67
63
|
|
data/statement.gemspec
CHANGED
@@ -19,9 +19,9 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
21
|
spec.add_development_dependency "bundler", "~> 1.3"
|
22
|
-
spec.
|
23
|
-
spec.
|
24
|
-
spec.
|
22
|
+
spec.add_dependency "rake"
|
23
|
+
spec.add_dependency 'webmock'
|
24
|
+
spec.add_dependency 'minitest'
|
25
25
|
spec.add_dependency "american_date"
|
26
26
|
if RUBY_VERSION > "1.8.7"
|
27
27
|
spec.add_dependency "nokogiri"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: statement
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.10
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-08-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -35,7 +35,7 @@ dependencies:
|
|
35
35
|
- - ! '>='
|
36
36
|
- !ruby/object:Gem::Version
|
37
37
|
version: '0'
|
38
|
-
type: :
|
38
|
+
type: :runtime
|
39
39
|
prerelease: false
|
40
40
|
version_requirements: !ruby/object:Gem::Requirement
|
41
41
|
none: false
|
@@ -51,7 +51,7 @@ dependencies:
|
|
51
51
|
- - ! '>='
|
52
52
|
- !ruby/object:Gem::Version
|
53
53
|
version: '0'
|
54
|
-
type: :
|
54
|
+
type: :runtime
|
55
55
|
prerelease: false
|
56
56
|
version_requirements: !ruby/object:Gem::Requirement
|
57
57
|
none: false
|
@@ -67,7 +67,7 @@ dependencies:
|
|
67
67
|
- - ! '>='
|
68
68
|
- !ruby/object:Gem::Version
|
69
69
|
version: '0'
|
70
|
-
type: :
|
70
|
+
type: :runtime
|
71
71
|
prerelease: false
|
72
72
|
version_requirements: !ruby/object:Gem::Requirement
|
73
73
|
none: false
|
@@ -214,7 +214,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
214
214
|
version: '0'
|
215
215
|
segments:
|
216
216
|
- 0
|
217
|
-
hash: -
|
217
|
+
hash: -2139780170107598593
|
218
218
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
219
219
|
none: false
|
220
220
|
requirements:
|
@@ -223,7 +223,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
223
223
|
version: '0'
|
224
224
|
segments:
|
225
225
|
- 0
|
226
|
-
hash: -
|
226
|
+
hash: -2139780170107598593
|
227
227
|
requirements: []
|
228
228
|
rubyforge_project:
|
229
229
|
rubygems_version: 1.8.25
|