statement 1.9.9 → 2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,21 +10,21 @@ describe Statement do
10
10
  @results = Feed.from_rss(@feed_url)
11
11
  @results.first[:domain].must_equal "ruiz.house.gov"
12
12
  end
13
-
13
+
14
14
  it "parses House GOP press release page" do
15
15
  @feed_url = "http://www.gop.gov/republicans/news?offset=03/29/13"
16
16
  WebMock.stub_request(:any, @feed_url).to_return(:body => File.new(File.join(File.dirname(__FILE__), "house_gop_releases.html")), :status => 200)
17
17
  @results = Scraper.house_gop(@feed_url)
18
18
  @results.first[:source].must_equal @feed_url
19
19
  end
20
-
20
+
21
21
  it "does not attempt to parse dates when none are present" do
22
22
  @feed_url = "http://culberson.house.gov/feed/rss/"
23
23
  WebMock.stub_request(:any, @feed_url).to_return(:body => File.new(File.join(File.dirname(__FILE__), "culberson_rss.xml")), :status => 200)
24
24
  @results = Feed.from_rss(@feed_url)
25
25
  @results.first[:date].must_equal nil
26
26
  end
27
-
27
+
28
28
  it "parses invalid RSS" do
29
29
  @feed_url = "http://www.burr.senate.gov/public/index.cfm?FuseAction=RSS.Feed"
30
30
  WebMock.stub_request(:any, @feed_url).to_return(:body => File.new(File.join(File.dirname(__FILE__), "richard_burr.xml")), :status => 200)
@@ -39,26 +39,78 @@ describe Statement do
39
39
  @results = Scraper.house_gop(@feed_url)
40
40
  @results.last[:url].must_equal "http://www.gop.gov/republicans/other/relative_url_test.html"
41
41
  end
42
-
42
+
43
43
  it "scrapes a senate cold fusion page" do
44
44
  @url = "http://www.billnelson.senate.gov/news/media.cfm?year=2013"
45
- WebMock.stub_request(:any, @url).to_return(:body => File.new(File.join(File.dirname(__FILE__), 'bill_nelson_press.html')), :status => 200)
45
+ WebMock.stub_request(:any, @url).with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).to_return(:headers => {}, :body => File.new(File.join(File.dirname(__FILE__), 'bill_nelson_press.html')), :status => 200)
46
46
  @results = Scraper.billnelson(year=2013)
47
47
  @results.last[:url].must_equal "http://www.billnelson.senate.gov/news/details.cfm?id=338190&"
48
48
  end
49
-
49
+
50
50
  it "scrapes vitter pages for 2013" do
51
51
  @vitter = "http://www.vitter.senate.gov/newsroom/press?year=2013"
52
52
  WebMock.stub_request(:any, @vitter).to_return(:body => File.new(File.join(File.dirname(__FILE__), 'vitter_press.html')), :status => 200)
53
53
  @results = Scraper.vitter(year=2013)
54
54
  @results.map{|r| r[:domain]}.uniq.must_equal ["www.vitter.senate.gov"]
55
55
  end
56
-
56
+
57
57
  it "only scrapes vitter page for 2012" do
58
58
  @vitter = "http://www.vitter.senate.gov/newsroom/press?year=2012"
59
59
  WebMock.stub_request(:any, @vitter).to_return(:body => File.new(File.join(File.dirname(__FILE__), 'vitter_press.html')), :status => 200)
60
60
  @results = Scraper.vitter(year=2012)
61
- @results.map{|r| r[:domain]}.uniq.must_equal ["www.vitter.senate.gov"]
61
+ @results.map{|r| r[:domain]}.uniq.must_equal ["www.vitter.senate.gov"]
62
+ end
63
+
64
+ it "scrapes perlmutter's press page" do
65
+ @perlmutter_url = "http://perlmutter.house.gov/index.php/media-center/press-releases-86821"
66
+ @perlmutter_page = File.new(File.join(File.dirname(__FILE__), 'ed_perlmutter_press.html'))
67
+ WebMock.stub_request(:any, @perlmutter_url).to_return(:body => @perlmutter_page, :status => 200)
68
+
69
+ expected_result = {
70
+ :source => "http://perlmutter.house.gov/index.php/media-center/press-releases-86821",
71
+ :url => "http://perlmutter.house.gov/index.php/media-center/press-releases-86821/1505-polis-perlmutter-host-fed-reserve-chief-for-roundtable-with-cannabis-businesses",
72
+ :title => "Polis, Perlmutter Host Fed Reserve Chief for Roundtable with Cannabis Businesses",
73
+ :date => Date.parse("2015-04-10"),
74
+ :domain => "perlmutter.house.gov"
75
+ }
76
+
77
+ @results = Scraper.perlmutter
78
+ @results.first.must_equal expected_result
79
+ end
80
+
81
+ it "scrapes keating's press page" do
82
+ @keating_url = "http://keating.house.gov/index.php?option=com_content&view=category&id=14&Itemid=13"
83
+ @keating_page = File.new(File.join(File.dirname(__FILE__), 'keating_press.html'))
84
+ WebMock.stub_request(:any, @keating_url).to_return(:body => @keating_page, :status => 200)
85
+
86
+ expected_result = {
87
+ :source => "http://keating.house.gov/index.php?option=com_content&view=category&id=14&Itemid=13",
88
+ :url => "http://keating.house.gov/index.php?option=com_content&view=article&id=314:keating-announces-epa-grant-for-new-bedford&catid=14&Itemid=13",
89
+ :title => "Keating Announces EPA Grant for New Bedford",
90
+ :date => Date.parse("2015-03-13"),
91
+ :domain => "keating.house.gov"
92
+ }
93
+
94
+ @results = Scraper.keating
95
+ @results.first.must_equal expected_result
96
+ end
97
+
98
+ it "scrapes a drupal press page" do
99
+ @drupal_url = "http://walz.house.gov/media-center/press-releases"
100
+ @drupal_page = File.new(File.join(File.dirname(__FILE__), 'drupal_press.html'))
101
+ puts @drupal_page
102
+ WebMock.stub_request(:any, "#{@drupal_url}?page=0").to_return(:body => @drupal_page, :status => 200)
103
+
104
+ expected_result = {
105
+ :source => "http://walz.house.gov/media-center/press-releases?page=0",
106
+ :url => "http://walz.house.gov/media-center/press-releases/walz-calls-for-passage-of-the-paycheck-fairness-act-on-equal-pay-day-0",
107
+ :title => "Walz Calls for Passage of the Paycheck Fairness Act on Equal Pay Day",
108
+ :date => Date.parse("2015-04-14"),
109
+ :domain => "walz.house.gov"
110
+ }
111
+
112
+ @results = Scraper.drupal(urls=[@drupal_url])
113
+ @results.length.must_equal 10
114
+ @results.first.must_equal expected_result
62
115
  end
63
-
64
- end
116
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: statement
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.9.9
4
+ version: '2.0'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Derek Willis
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-17 00:00:00.000000000 Z
11
+ date: 2015-04-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -169,10 +169,15 @@ files:
169
169
  - lib/statement/tweets.rb
170
170
  - lib/statement/utils.rb
171
171
  - lib/statement/version.rb
172
+ - scraper_guide.md
172
173
  - spec/bill_nelson_press.html
174
+ - spec/butterfield_press.html
173
175
  - spec/cowan_press.html
174
176
  - spec/culberson_rss.xml
177
+ - spec/drupal_press.html
178
+ - spec/ed_perlmutter_press.html
175
179
  - spec/house_gop_releases.html
180
+ - spec/keating_press.html
176
181
  - spec/richard_burr.xml
177
182
  - spec/ruiz_rss.xml
178
183
  - spec/statement_spec.rb
@@ -204,9 +209,13 @@ specification_version: 4
204
209
  summary: Given a url, Statement returns links to press releases and official statements.
205
210
  test_files:
206
211
  - spec/bill_nelson_press.html
212
+ - spec/butterfield_press.html
207
213
  - spec/cowan_press.html
208
214
  - spec/culberson_rss.xml
215
+ - spec/drupal_press.html
216
+ - spec/ed_perlmutter_press.html
209
217
  - spec/house_gop_releases.html
218
+ - spec/keating_press.html
210
219
  - spec/richard_burr.xml
211
220
  - spec/ruiz_rss.xml
212
221
  - spec/statement_spec.rb