statement 1.9.9 → 2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +11 -7
- data/lib/statement/scraper.rb +140 -88
- data/lib/statement/version.rb +1 -1
- data/scraper_guide.md +49 -0
- data/spec/butterfield_press.html +407 -0
- data/spec/drupal_press.html +524 -0
- data/spec/ed_perlmutter_press.html +5032 -0
- data/spec/keating_press.html +2211 -0
- data/spec/statement_spec.rb +62 -10
- metadata +11 -2
data/spec/statement_spec.rb
CHANGED
@@ -10,21 +10,21 @@ describe Statement do
|
|
10
10
|
@results = Feed.from_rss(@feed_url)
|
11
11
|
@results.first[:domain].must_equal "ruiz.house.gov"
|
12
12
|
end
|
13
|
-
|
13
|
+
|
14
14
|
it "parses House GOP press release page" do
|
15
15
|
@feed_url = "http://www.gop.gov/republicans/news?offset=03/29/13"
|
16
16
|
WebMock.stub_request(:any, @feed_url).to_return(:body => File.new(File.join(File.dirname(__FILE__), "house_gop_releases.html")), :status => 200)
|
17
17
|
@results = Scraper.house_gop(@feed_url)
|
18
18
|
@results.first[:source].must_equal @feed_url
|
19
19
|
end
|
20
|
-
|
20
|
+
|
21
21
|
it "does not attempt to parse dates when none are present" do
|
22
22
|
@feed_url = "http://culberson.house.gov/feed/rss/"
|
23
23
|
WebMock.stub_request(:any, @feed_url).to_return(:body => File.new(File.join(File.dirname(__FILE__), "culberson_rss.xml")), :status => 200)
|
24
24
|
@results = Feed.from_rss(@feed_url)
|
25
25
|
@results.first[:date].must_equal nil
|
26
26
|
end
|
27
|
-
|
27
|
+
|
28
28
|
it "parses invalid RSS" do
|
29
29
|
@feed_url = "http://www.burr.senate.gov/public/index.cfm?FuseAction=RSS.Feed"
|
30
30
|
WebMock.stub_request(:any, @feed_url).to_return(:body => File.new(File.join(File.dirname(__FILE__), "richard_burr.xml")), :status => 200)
|
@@ -39,26 +39,78 @@ describe Statement do
|
|
39
39
|
@results = Scraper.house_gop(@feed_url)
|
40
40
|
@results.last[:url].must_equal "http://www.gop.gov/republicans/other/relative_url_test.html"
|
41
41
|
end
|
42
|
-
|
42
|
+
|
43
43
|
it "scrapes a senate cold fusion page" do
|
44
44
|
@url = "http://www.billnelson.senate.gov/news/media.cfm?year=2013"
|
45
|
-
WebMock.stub_request(:any, @url).to_return(:body => File.new(File.join(File.dirname(__FILE__), 'bill_nelson_press.html')), :status => 200)
|
45
|
+
WebMock.stub_request(:any, @url).with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).to_return(:headers => {}, :body => File.new(File.join(File.dirname(__FILE__), 'bill_nelson_press.html')), :status => 200)
|
46
46
|
@results = Scraper.billnelson(year=2013)
|
47
47
|
@results.last[:url].must_equal "http://www.billnelson.senate.gov/news/details.cfm?id=338190&"
|
48
48
|
end
|
49
|
-
|
49
|
+
|
50
50
|
it "scrapes vitter pages for 2013" do
|
51
51
|
@vitter = "http://www.vitter.senate.gov/newsroom/press?year=2013"
|
52
52
|
WebMock.stub_request(:any, @vitter).to_return(:body => File.new(File.join(File.dirname(__FILE__), 'vitter_press.html')), :status => 200)
|
53
53
|
@results = Scraper.vitter(year=2013)
|
54
54
|
@results.map{|r| r[:domain]}.uniq.must_equal ["www.vitter.senate.gov"]
|
55
55
|
end
|
56
|
-
|
56
|
+
|
57
57
|
it "only scrapes vitter page for 2012" do
|
58
58
|
@vitter = "http://www.vitter.senate.gov/newsroom/press?year=2012"
|
59
59
|
WebMock.stub_request(:any, @vitter).to_return(:body => File.new(File.join(File.dirname(__FILE__), 'vitter_press.html')), :status => 200)
|
60
60
|
@results = Scraper.vitter(year=2012)
|
61
|
-
@results.map{|r| r[:domain]}.uniq.must_equal ["www.vitter.senate.gov"]
|
61
|
+
@results.map{|r| r[:domain]}.uniq.must_equal ["www.vitter.senate.gov"]
|
62
|
+
end
|
63
|
+
|
64
|
+
it "scrapes perlmutter's press page" do
|
65
|
+
@perlmutter_url = "http://perlmutter.house.gov/index.php/media-center/press-releases-86821"
|
66
|
+
@perlmutter_page = File.new(File.join(File.dirname(__FILE__), 'ed_perlmutter_press.html'))
|
67
|
+
WebMock.stub_request(:any, @perlmutter_url).to_return(:body => @perlmutter_page, :status => 200)
|
68
|
+
|
69
|
+
expected_result = {
|
70
|
+
:source => "http://perlmutter.house.gov/index.php/media-center/press-releases-86821",
|
71
|
+
:url => "http://perlmutter.house.gov/index.php/media-center/press-releases-86821/1505-polis-perlmutter-host-fed-reserve-chief-for-roundtable-with-cannabis-businesses",
|
72
|
+
:title => "Polis, Perlmutter Host Fed Reserve Chief for Roundtable with Cannabis Businesses",
|
73
|
+
:date => Date.parse("2015-04-10"),
|
74
|
+
:domain => "perlmutter.house.gov"
|
75
|
+
}
|
76
|
+
|
77
|
+
@results = Scraper.perlmutter
|
78
|
+
@results.first.must_equal expected_result
|
79
|
+
end
|
80
|
+
|
81
|
+
it "scrapes keating's press page" do
|
82
|
+
@keating_url = "http://keating.house.gov/index.php?option=com_content&view=category&id=14&Itemid=13"
|
83
|
+
@keating_page = File.new(File.join(File.dirname(__FILE__), 'keating_press.html'))
|
84
|
+
WebMock.stub_request(:any, @keating_url).to_return(:body => @keating_page, :status => 200)
|
85
|
+
|
86
|
+
expected_result = {
|
87
|
+
:source => "http://keating.house.gov/index.php?option=com_content&view=category&id=14&Itemid=13",
|
88
|
+
:url => "http://keating.house.gov/index.php?option=com_content&view=article&id=314:keating-announces-epa-grant-for-new-bedford&catid=14&Itemid=13",
|
89
|
+
:title => "Keating Announces EPA Grant for New Bedford",
|
90
|
+
:date => Date.parse("2015-03-13"),
|
91
|
+
:domain => "keating.house.gov"
|
92
|
+
}
|
93
|
+
|
94
|
+
@results = Scraper.keating
|
95
|
+
@results.first.must_equal expected_result
|
96
|
+
end
|
97
|
+
|
98
|
+
it "scrapes a drupal press page" do
|
99
|
+
@drupal_url = "http://walz.house.gov/media-center/press-releases"
|
100
|
+
@drupal_page = File.new(File.join(File.dirname(__FILE__), 'drupal_press.html'))
|
101
|
+
puts @drupal_page
|
102
|
+
WebMock.stub_request(:any, "#{@drupal_url}?page=0").to_return(:body => @drupal_page, :status => 200)
|
103
|
+
|
104
|
+
expected_result = {
|
105
|
+
:source => "http://walz.house.gov/media-center/press-releases?page=0",
|
106
|
+
:url => "http://walz.house.gov/media-center/press-releases/walz-calls-for-passage-of-the-paycheck-fairness-act-on-equal-pay-day-0",
|
107
|
+
:title => "Walz Calls for Passage of the Paycheck Fairness Act on Equal Pay Day",
|
108
|
+
:date => Date.parse("2015-04-14"),
|
109
|
+
:domain => "walz.house.gov"
|
110
|
+
}
|
111
|
+
|
112
|
+
@results = Scraper.drupal(urls=[@drupal_url])
|
113
|
+
@results.length.must_equal 10
|
114
|
+
@results.first.must_equal expected_result
|
62
115
|
end
|
63
|
-
|
64
|
-
end
|
116
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: statement
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: '2.0'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Derek Willis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-04-
|
11
|
+
date: 2015-04-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -169,10 +169,15 @@ files:
|
|
169
169
|
- lib/statement/tweets.rb
|
170
170
|
- lib/statement/utils.rb
|
171
171
|
- lib/statement/version.rb
|
172
|
+
- scraper_guide.md
|
172
173
|
- spec/bill_nelson_press.html
|
174
|
+
- spec/butterfield_press.html
|
173
175
|
- spec/cowan_press.html
|
174
176
|
- spec/culberson_rss.xml
|
177
|
+
- spec/drupal_press.html
|
178
|
+
- spec/ed_perlmutter_press.html
|
175
179
|
- spec/house_gop_releases.html
|
180
|
+
- spec/keating_press.html
|
176
181
|
- spec/richard_burr.xml
|
177
182
|
- spec/ruiz_rss.xml
|
178
183
|
- spec/statement_spec.rb
|
@@ -204,9 +209,13 @@ specification_version: 4
|
|
204
209
|
summary: Given a url, Statement returns links to press releases and official statements.
|
205
210
|
test_files:
|
206
211
|
- spec/bill_nelson_press.html
|
212
|
+
- spec/butterfield_press.html
|
207
213
|
- spec/cowan_press.html
|
208
214
|
- spec/culberson_rss.xml
|
215
|
+
- spec/drupal_press.html
|
216
|
+
- spec/ed_perlmutter_press.html
|
209
217
|
- spec/house_gop_releases.html
|
218
|
+
- spec/keating_press.html
|
210
219
|
- spec/richard_burr.xml
|
211
220
|
- spec/ruiz_rss.xml
|
212
221
|
- spec/statement_spec.rb
|