statement 0.7.5 → 0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/lib/statement/version.rb +1 -1
  2. data/lib/statement.rb +172 -0
  3. metadata +13 -44
@@ -1,3 +1,3 @@
1
1
  module Statement
2
- VERSION = "0.7.5"
2
+ VERSION = "0.8"
3
3
  end
data/lib/statement.rb CHANGED
@@ -77,6 +77,178 @@ module Statement
77
77
  ].flatten
78
78
  end
79
79
 
80
+ def self.committee_scrapers
81
+ year = Date.today.year
82
+ [senate_approps_majority, senate_approps_minority, senate_banking(year), senate_hsag_majority(year), senate_hsag_minority(year),
83
+ senate_indian, senate_aging, senate_smallbiz_minority, senate_intel(113, 2013, 2014), house_energy_minority, house_homeland_security_minority,
84
+ house_judiciary_majority, house_rules_majority, house_ways_means_majority].flatten
85
+ end
86
+
87
+ ## special cases for committees without RSS feeds
88
+
89
+ def self.senate_approps_majority
90
+ results = []
91
+ url = "http://www.appropriations.senate.gov/news.cfm"
92
+ doc = open_html(url)
93
+ return if doc.nil?
94
+ doc.xpath("//div[@class='newsDateUnderlined']").each do |date|
95
+ date.next.next.children.reject{|c| c.text.strip.empty?}.each do |row|
96
+ results << { :source => url, :url => url + row.children[0]['href'], :title => row.text, :date => Date.parse(date.text), :domain => "http://www.appropriations.senate.gov/", :party => 'majority' }
97
+ end
98
+ end
99
+ results
100
+ end
101
+
102
+ def self.senate_approps_minority
103
+ results = []
104
+ url = "http://www.appropriations.senate.gov/republican.cfm"
105
+ doc = open_html(url)
106
+ return if doc.nil?
107
+ doc.xpath("//div[@class='newsDateUnderlined']").each do |date|
108
+ date.next.next.children.reject{|c| c.text.strip.empty?}.each do |row|
109
+ results << { :source => url, :url => url + row.children[0]['href'], :title => row.text, :date => Date.parse(date.text), :domain => "http://www.appropriations.senate.gov/", :party => 'minority' }
110
+ end
111
+ end
112
+ results
113
+ end
114
+
115
+ def self.senate_banking(year)
116
+ results = []
117
+ url = "http://www.banking.senate.gov/public/index.cfm?FuseAction=Newsroom.PressReleases&ContentRecordType_id=b94acc28-404a-4fc6-b143-a9e15bf92da4&Region_id=&Issue_id=&MonthDisplay=0&YearDisplay=#{year}"
118
+ doc = open_html(url)
119
+ return if doc.nil?
120
+ doc.xpath("//tr").each do |row|
121
+ results << { :source => url, :url => "http://www.banking.senate.gov/public/" + row.children[2].children[1]['href'], :title => row.children[2].text.strip, :date => Date.parse(row.children[0].text.strip+", #{year}"), :domain => "http://www.banking.senate.gov/", :party => 'majority' }
122
+ end
123
+ results
124
+ end
125
+
126
+ def self.senate_hsag_majority(year)
127
+ results = []
128
+ url = "http://www.hsgac.senate.gov/media/majority-media?year=#{year}"
129
+ doc = open_html(url)
130
+ return if doc.nil?
131
+ doc.xpath("//tr").each do |row|
132
+ next if row.text.strip.size < 30
133
+ results << { :source => url, :url => row.children[2].children[0]['href'].strip, :title => row.children[2].children[0].text, :date => Date.parse(row.children[0].text), :domain => "http://www.hsgac.senate.gov/", :party => 'majority' }
134
+ end
135
+ results
136
+ end
137
+
138
+ def self.senate_hsag_minority(year)
139
+ results = []
140
+ url = "http://www.hsgac.senate.gov/media/minority-media?year=#{year}"
141
+ doc = open_html(url)
142
+ return if doc.nil?
143
+ doc.xpath("//tr").each do |row|
144
+ next if row.text.strip.size < 30
145
+ results << { :source => url, :url => row.children[2].children[0]['href'].strip, :title => row.children[2].children[0].text, :date => Date.parse(row.children[0].text), :domain => "http://www.hsgac.senate.gov/", :party => 'minority' }
146
+ end
147
+ results
148
+ end
149
+
150
+ def self.senate_indian
151
+ results = []
152
+ url = "http://www.indian.senate.gov/news/index.cfm"
153
+ doc = open_html(url)
154
+ return if doc.nil?
155
+ doc.xpath("//h3").each do |row|
156
+ results << { :source => url, :url => "http://www.indian.senate.gov"+row.children[0]['href'], :title => row.children[0].text, :date => Date.parse(row.previous.previous.text), :domain => "http://www.indian.senate.gov/", :party => 'majority' }
157
+ end
158
+ results
159
+ end
160
+
161
+ def self.senate_aging
162
+ results = []
163
+ url = "http://www.aging.senate.gov/pressroom.cfm?maxrows=100&startrow=1&&type=1"
164
+ doc = open_html(url)
165
+ return if doc.nil?
166
+ doc.xpath("//tr")[6..104].each do |row|
167
+ results << { :source => url, :url => "http://www.aging.senate.gov/"+row.children[2].children[0]['href'], :title => row.children[2].text.strip, :date => Date.parse(row.children[0].text), :domain => "http://www.aging.senate.gov/" }
168
+ end
169
+ results
170
+ end
171
+
172
+ def self.senate_smallbiz_minority
173
+ results = []
174
+ url = "http://www.sbc.senate.gov/public/index.cfm?p=RepublicanPressRoom"
175
+ doc = open_html(url)
176
+ return if doc.nil?
177
+ doc.xpath("//ul[@class='recordList']").each do |row|
178
+ results << { :source => url, :url => row.children[0].children[2].children[0]['href'], :title => row.children[0].children[2].children[0].text, :date => Date.parse(row.children[0].children[0].text), :domain => "http://www.sbc.senate.gov/", :party => 'minority' }
179
+ end
180
+ results
181
+ end
182
+
183
+ def self.senate_intel(congress, start_year, end_year)
184
+ results = []
185
+ url = "http://www.intelligence.senate.gov/press/releases.cfm?congress=#{congress}&y1=#{start_year}&y2=#{end_year}"
186
+ doc = open_html(url)
187
+ return if doc.nil?
188
+ doc.xpath("//tr[@valign='top']")[7..-1].each do |row|
189
+ results << { :source => url, :url => "http://www.intelligence.senate.gov/press/"+row.children[2].children[0]['href'], :title => row.children[2].children[0].text.strip, :date => Date.parse(row.children[0].text), :domain => "http://www.intelligence.senate.gov/" }
190
+ end
191
+ results
192
+ end
193
+
194
+ def self.house_energy_minority
195
+ results = []
196
+ url = "http://democrats.energycommerce.house.gov/index.php?q=news-releases"
197
+ doc = open_html(url)
198
+ return if doc.nil?
199
+ doc.xpath("//div[@class='views-field-title']").each do |row|
200
+ results << { :source => url, :url => "http://democrats.energycommerce.house.gov"+row.children[1].children[0]['href'], :title => row.children[1].children[0].text, :date => Date.parse(row.next.next.text.strip), :domain => "http://energycommerce.house.gov/", :party => 'minority' }
201
+ end
202
+ results
203
+ end
204
+
205
+ def self.house_homeland_security_minority
206
+ results = []
207
+ url = "http://chsdemocrats.house.gov/press/index.asp?subsection=1"
208
+ doc = open_html(url)
209
+ return if doc.nil?
210
+ doc.xpath("//li[@class='article']").each do |row|
211
+ results << { :source => url, :url => "http://chsdemocrats.house.gov"+row.children[1]['href'], :title => row.children[1].text.strip, :date => Date.parse(row.children[3].text), :domain => "http://chsdemocrats.house.gov/", :party => 'minority' }
212
+ end
213
+ results
214
+ end
215
+
216
+ def self.house_judiciary_majority
217
+ results = []
218
+ url = "http://judiciary.house.gov/news/press2013.html"
219
+ doc = open_html(url)
220
+ return if doc.nil?
221
+ doc.xpath("//p")[3..60].each do |row|
222
+ next if row.text.size < 30
223
+ results << { :source => url, :url => row.children[5]['href'], :title => row.children[0].text, :date => Date.parse(row.children[1].text.strip), :domain => "http://judiciary.house.gov/", :party => 'majority' }
224
+ end
225
+ results
226
+ end
227
+
228
+ def self.house_rules_majority
229
+ results = []
230
+ url = "http://www.rules.house.gov/News/Default.aspx"
231
+ doc = open_html(url)
232
+ return if doc.nil?
233
+ doc.xpath("//tr")[1..-2].each do |row|
234
+ next if row.text.strip.size < 30
235
+ results << { :source => url, :url => "http://www.rules.house.gov/News/"+row.children[0].children[1].children[0]['href'], :title => row.children[0].children[1].children[0].text, :date => Date.parse(row.children[2].children[1].text.strip), :domain => "http://www.rules.house.gov/", :party => 'majority' }
236
+ end
237
+ results
238
+ end
239
+
240
+ def self.house_ways_means_majority
241
+ results = []
242
+ url = "http://waysandmeans.house.gov/news/documentquery.aspx?DocumentTypeID=1496"
243
+ doc = open_html(url)
244
+ return if doc.nil?
245
+ doc.xpath("//ul[@class='UnorderedNewsList']").children.each do |row|
246
+ next if row.text.strip.size < 10
247
+ results << { :source => url, :url => "http://waysandmeans.house.gov"+row.children[1].children[1]['href'], :title => row.children[1].children[1].text, :date => Date.parse(row.children[3].children[0].text.strip), :domain => "http://waysandmeans.house.gov/", :party => 'majority' }
248
+ end
249
+ results
250
+ end
251
+
80
252
  ## special cases for members without RSS feeds
81
253
 
82
254
  def self.capuano
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: statement
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.5
4
+ version: '0.8'
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-05-07 00:00:00.000000000 Z
12
+ date: 2013-05-16 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
16
- requirement: !ruby/object:Gem::Requirement
16
+ requirement: &2152291960 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,15 +21,10 @@ dependencies:
21
21
  version: '1.3'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
- requirements:
27
- - - ~>
28
- - !ruby/object:Gem::Version
29
- version: '1.3'
24
+ version_requirements: *2152291960
30
25
  - !ruby/object:Gem::Dependency
31
26
  name: rake
32
- requirement: !ruby/object:Gem::Requirement
27
+ requirement: &2152239160 !ruby/object:Gem::Requirement
33
28
  none: false
34
29
  requirements:
35
30
  - - ! '>='
@@ -37,15 +32,10 @@ dependencies:
37
32
  version: '0'
38
33
  type: :development
39
34
  prerelease: false
40
- version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
- requirements:
43
- - - ! '>='
44
- - !ruby/object:Gem::Version
45
- version: '0'
35
+ version_requirements: *2152239160
46
36
  - !ruby/object:Gem::Dependency
47
37
  name: webmock
48
- requirement: !ruby/object:Gem::Requirement
38
+ requirement: &2152121140 !ruby/object:Gem::Requirement
49
39
  none: false
50
40
  requirements:
51
41
  - - ! '>='
@@ -53,15 +43,10 @@ dependencies:
53
43
  version: '0'
54
44
  type: :development
55
45
  prerelease: false
56
- version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
- requirements:
59
- - - ! '>='
60
- - !ruby/object:Gem::Version
61
- version: '0'
46
+ version_requirements: *2152121140
62
47
  - !ruby/object:Gem::Dependency
63
48
  name: american_date
64
- requirement: !ruby/object:Gem::Requirement
49
+ requirement: &2152084380 !ruby/object:Gem::Requirement
65
50
  none: false
66
51
  requirements:
67
52
  - - ! '>='
@@ -69,15 +54,10 @@ dependencies:
69
54
  version: '0'
70
55
  type: :runtime
71
56
  prerelease: false
72
- version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
- requirements:
75
- - - ! '>='
76
- - !ruby/object:Gem::Version
77
- version: '0'
57
+ version_requirements: *2152084380
78
58
  - !ruby/object:Gem::Dependency
79
59
  name: nokogiri
80
- requirement: !ruby/object:Gem::Requirement
60
+ requirement: &2151878460 !ruby/object:Gem::Requirement
81
61
  none: false
82
62
  requirements:
83
63
  - - ! '>='
@@ -85,12 +65,7 @@ dependencies:
85
65
  version: '0'
86
66
  type: :runtime
87
67
  prerelease: false
88
- version_requirements: !ruby/object:Gem::Requirement
89
- none: false
90
- requirements:
91
- - - ! '>='
92
- - !ruby/object:Gem::Version
93
- version: '0'
68
+ version_requirements: *2151878460
94
69
  description: Crawls congressional websites for press releases.
95
70
  email:
96
71
  - dwillis@gmail.com
@@ -127,21 +102,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
127
102
  - - ! '>='
128
103
  - !ruby/object:Gem::Version
129
104
  version: '0'
130
- segments:
131
- - 0
132
- hash: -672742253743525613
133
105
  required_rubygems_version: !ruby/object:Gem::Requirement
134
106
  none: false
135
107
  requirements:
136
108
  - - ! '>='
137
109
  - !ruby/object:Gem::Version
138
110
  version: '0'
139
- segments:
140
- - 0
141
- hash: -672742253743525613
142
111
  requirements: []
143
112
  rubyforge_project:
144
- rubygems_version: 1.8.24
113
+ rubygems_version: 1.8.17
145
114
  signing_key:
146
115
  specification_version: 3
147
116
  summary: Given a url, Statement returns links to press releases and official statements.