statement 1.9.9 → 2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -10,21 +10,21 @@ describe Statement do
10
10
  @results = Feed.from_rss(@feed_url)
11
11
  @results.first[:domain].must_equal "ruiz.house.gov"
12
12
  end
13
-
13
+
14
14
  it "parses House GOP press release page" do
15
15
  @feed_url = "http://www.gop.gov/republicans/news?offset=03/29/13"
16
16
  WebMock.stub_request(:any, @feed_url).to_return(:body => File.new(File.join(File.dirname(__FILE__), "house_gop_releases.html")), :status => 200)
17
17
  @results = Scraper.house_gop(@feed_url)
18
18
  @results.first[:source].must_equal @feed_url
19
19
  end
20
-
20
+
21
21
  it "does not attempt to parse dates when none are present" do
22
22
  @feed_url = "http://culberson.house.gov/feed/rss/"
23
23
  WebMock.stub_request(:any, @feed_url).to_return(:body => File.new(File.join(File.dirname(__FILE__), "culberson_rss.xml")), :status => 200)
24
24
  @results = Feed.from_rss(@feed_url)
25
25
  @results.first[:date].must_equal nil
26
26
  end
27
-
27
+
28
28
  it "parses invalid RSS" do
29
29
  @feed_url = "http://www.burr.senate.gov/public/index.cfm?FuseAction=RSS.Feed"
30
30
  WebMock.stub_request(:any, @feed_url).to_return(:body => File.new(File.join(File.dirname(__FILE__), "richard_burr.xml")), :status => 200)
@@ -39,26 +39,78 @@ describe Statement do
39
39
  @results = Scraper.house_gop(@feed_url)
40
40
  @results.last[:url].must_equal "http://www.gop.gov/republicans/other/relative_url_test.html"
41
41
  end
42
-
42
+
43
43
  it "scrapes a senate cold fusion page" do
44
44
  @url = "http://www.billnelson.senate.gov/news/media.cfm?year=2013"
45
- WebMock.stub_request(:any, @url).to_return(:body => File.new(File.join(File.dirname(__FILE__), 'bill_nelson_press.html')), :status => 200)
45
+ WebMock.stub_request(:any, @url).with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).to_return(:headers => {}, :body => File.new(File.join(File.dirname(__FILE__), 'bill_nelson_press.html')), :status => 200)
46
46
  @results = Scraper.billnelson(year=2013)
47
47
  @results.last[:url].must_equal "http://www.billnelson.senate.gov/news/details.cfm?id=338190&"
48
48
  end
49
-
49
+
50
50
  it "scrapes vitter pages for 2013" do
51
51
  @vitter = "http://www.vitter.senate.gov/newsroom/press?year=2013"
52
52
  WebMock.stub_request(:any, @vitter).to_return(:body => File.new(File.join(File.dirname(__FILE__), 'vitter_press.html')), :status => 200)
53
53
  @results = Scraper.vitter(year=2013)
54
54
  @results.map{|r| r[:domain]}.uniq.must_equal ["www.vitter.senate.gov"]
55
55
  end
56
-
56
+
57
57
  it "only scrapes vitter page for 2012" do
58
58
  @vitter = "http://www.vitter.senate.gov/newsroom/press?year=2012"
59
59
  WebMock.stub_request(:any, @vitter).to_return(:body => File.new(File.join(File.dirname(__FILE__), 'vitter_press.html')), :status => 200)
60
60
  @results = Scraper.vitter(year=2012)
61
- @results.map{|r| r[:domain]}.uniq.must_equal ["www.vitter.senate.gov"]
61
+ @results.map{|r| r[:domain]}.uniq.must_equal ["www.vitter.senate.gov"]
62
+ end
63
+
64
+ it "scrapes perlmutter's press page" do
65
+ @perlmutter_url = "http://perlmutter.house.gov/index.php/media-center/press-releases-86821"
66
+ @perlmutter_page = File.new(File.join(File.dirname(__FILE__), 'ed_perlmutter_press.html'))
67
+ WebMock.stub_request(:any, @perlmutter_url).to_return(:body => @perlmutter_page, :status => 200)
68
+
69
+ expected_result = {
70
+ :source => "http://perlmutter.house.gov/index.php/media-center/press-releases-86821",
71
+ :url => "http://perlmutter.house.gov/index.php/media-center/press-releases-86821/1505-polis-perlmutter-host-fed-reserve-chief-for-roundtable-with-cannabis-businesses",
72
+ :title => "Polis, Perlmutter Host Fed Reserve Chief for Roundtable with Cannabis Businesses",
73
+ :date => Date.parse("2015-04-10"),
74
+ :domain => "perlmutter.house.gov"
75
+ }
76
+
77
+ @results = Scraper.perlmutter
78
+ @results.first.must_equal expected_result
79
+ end
80
+
81
+ it "scrapes keating's press page" do
82
+ @keating_url = "http://keating.house.gov/index.php?option=com_content&view=category&id=14&Itemid=13"
83
+ @keating_page = File.new(File.join(File.dirname(__FILE__), 'keating_press.html'))
84
+ WebMock.stub_request(:any, @keating_url).to_return(:body => @keating_page, :status => 200)
85
+
86
+ expected_result = {
87
+ :source => "http://keating.house.gov/index.php?option=com_content&view=category&id=14&Itemid=13",
88
+ :url => "http://keating.house.gov/index.php?option=com_content&view=article&id=314:keating-announces-epa-grant-for-new-bedford&catid=14&Itemid=13",
89
+ :title => "Keating Announces EPA Grant for New Bedford",
90
+ :date => Date.parse("2015-03-13"),
91
+ :domain => "keating.house.gov"
92
+ }
93
+
94
+ @results = Scraper.keating
95
+ @results.first.must_equal expected_result
96
+ end
97
+
98
+ it "scrapes a drupal press page" do
99
+ @drupal_url = "http://walz.house.gov/media-center/press-releases"
100
+ @drupal_page = File.new(File.join(File.dirname(__FILE__), 'drupal_press.html'))
101
+ puts @drupal_page
102
+ WebMock.stub_request(:any, "#{@drupal_url}?page=0").to_return(:body => @drupal_page, :status => 200)
103
+
104
+ expected_result = {
105
+ :source => "http://walz.house.gov/media-center/press-releases?page=0",
106
+ :url => "http://walz.house.gov/media-center/press-releases/walz-calls-for-passage-of-the-paycheck-fairness-act-on-equal-pay-day-0",
107
+ :title => "Walz Calls for Passage of the Paycheck Fairness Act on Equal Pay Day",
108
+ :date => Date.parse("2015-04-14"),
109
+ :domain => "walz.house.gov"
110
+ }
111
+
112
+ @results = Scraper.drupal(urls=[@drupal_url])
113
+ @results.length.must_equal 10
114
+ @results.first.must_equal expected_result
62
115
  end
63
-
64
- end
116
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: statement
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.9.9
4
+ version: '2.0'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Derek Willis
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-17 00:00:00.000000000 Z
11
+ date: 2015-04-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -169,10 +169,15 @@ files:
169
169
  - lib/statement/tweets.rb
170
170
  - lib/statement/utils.rb
171
171
  - lib/statement/version.rb
172
+ - scraper_guide.md
172
173
  - spec/bill_nelson_press.html
174
+ - spec/butterfield_press.html
173
175
  - spec/cowan_press.html
174
176
  - spec/culberson_rss.xml
177
+ - spec/drupal_press.html
178
+ - spec/ed_perlmutter_press.html
175
179
  - spec/house_gop_releases.html
180
+ - spec/keating_press.html
176
181
  - spec/richard_burr.xml
177
182
  - spec/ruiz_rss.xml
178
183
  - spec/statement_spec.rb
@@ -204,9 +209,13 @@ specification_version: 4
204
209
  summary: Given a url, Statement returns links to press releases and official statements.
205
210
  test_files:
206
211
  - spec/bill_nelson_press.html
212
+ - spec/butterfield_press.html
207
213
  - spec/cowan_press.html
208
214
  - spec/culberson_rss.xml
215
+ - spec/drupal_press.html
216
+ - spec/ed_perlmutter_press.html
209
217
  - spec/house_gop_releases.html
218
+ - spec/keating_press.html
210
219
  - spec/richard_burr.xml
211
220
  - spec/ruiz_rss.xml
212
221
  - spec/statement_spec.rb