statement 0.7.5 → 0.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/lib/statement/version.rb +1 -1
  2. data/lib/statement.rb +172 -0
  3. metadata +13 -44
@@ -1,3 +1,3 @@
1
1
  module Statement
2
- VERSION = "0.7.5"
2
+ VERSION = "0.8"
3
3
  end
data/lib/statement.rb CHANGED
@@ -77,6 +77,178 @@ module Statement
77
77
  ].flatten
78
78
  end
79
79
 
80
+ def self.committee_scrapers
81
+ year = Date.today.year
82
+ [senate_approps_majority, senate_approps_minority, senate_banking(year), senate_hsag_majority(year), senate_hsag_minority(year),
83
+ senate_indian, senate_aging, senate_smallbiz_minority, senate_intel(113, 2013, 2014), house_energy_minority, house_homeland_security_minority,
84
+ house_judiciary_majority, house_rules_majority, house_ways_means_majority].flatten
85
+ end
86
+
87
+ ## special cases for committees without RSS feeds
88
+
89
+ def self.senate_approps_majority
90
+ results = []
91
+ url = "http://www.appropriations.senate.gov/news.cfm"
92
+ doc = open_html(url)
93
+ return if doc.nil?
94
+ doc.xpath("//div[@class='newsDateUnderlined']").each do |date|
95
+ date.next.next.children.reject{|c| c.text.strip.empty?}.each do |row|
96
+ results << { :source => url, :url => url + row.children[0]['href'], :title => row.text, :date => Date.parse(date.text), :domain => "http://www.appropriations.senate.gov/", :party => 'majority' }
97
+ end
98
+ end
99
+ results
100
+ end
101
+
102
+ def self.senate_approps_minority
103
+ results = []
104
+ url = "http://www.appropriations.senate.gov/republican.cfm"
105
+ doc = open_html(url)
106
+ return if doc.nil?
107
+ doc.xpath("//div[@class='newsDateUnderlined']").each do |date|
108
+ date.next.next.children.reject{|c| c.text.strip.empty?}.each do |row|
109
+ results << { :source => url, :url => url + row.children[0]['href'], :title => row.text, :date => Date.parse(date.text), :domain => "http://www.appropriations.senate.gov/", :party => 'minority' }
110
+ end
111
+ end
112
+ results
113
+ end
114
+
115
+ def self.senate_banking(year)
116
+ results = []
117
+ url = "http://www.banking.senate.gov/public/index.cfm?FuseAction=Newsroom.PressReleases&ContentRecordType_id=b94acc28-404a-4fc6-b143-a9e15bf92da4&Region_id=&Issue_id=&MonthDisplay=0&YearDisplay=#{year}"
118
+ doc = open_html(url)
119
+ return if doc.nil?
120
+ doc.xpath("//tr").each do |row|
121
+ results << { :source => url, :url => "http://www.banking.senate.gov/public/" + row.children[2].children[1]['href'], :title => row.children[2].text.strip, :date => Date.parse(row.children[0].text.strip+", #{year}"), :domain => "http://www.banking.senate.gov/", :party => 'majority' }
122
+ end
123
+ results
124
+ end
125
+
126
+ def self.senate_hsag_majority(year)
127
+ results = []
128
+ url = "http://www.hsgac.senate.gov/media/majority-media?year=#{year}"
129
+ doc = open_html(url)
130
+ return if doc.nil?
131
+ doc.xpath("//tr").each do |row|
132
+ next if row.text.strip.size < 30
133
+ results << { :source => url, :url => row.children[2].children[0]['href'].strip, :title => row.children[2].children[0].text, :date => Date.parse(row.children[0].text), :domain => "http://www.hsgac.senate.gov/", :party => 'majority' }
134
+ end
135
+ results
136
+ end
137
+
138
+ def self.senate_hsag_minority(year)
139
+ results = []
140
+ url = "http://www.hsgac.senate.gov/media/minority-media?year=#{year}"
141
+ doc = open_html(url)
142
+ return if doc.nil?
143
+ doc.xpath("//tr").each do |row|
144
+ next if row.text.strip.size < 30
145
+ results << { :source => url, :url => row.children[2].children[0]['href'].strip, :title => row.children[2].children[0].text, :date => Date.parse(row.children[0].text), :domain => "http://www.hsgac.senate.gov/", :party => 'minority' }
146
+ end
147
+ results
148
+ end
149
+
150
+ def self.senate_indian
151
+ results = []
152
+ url = "http://www.indian.senate.gov/news/index.cfm"
153
+ doc = open_html(url)
154
+ return if doc.nil?
155
+ doc.xpath("//h3").each do |row|
156
+ results << { :source => url, :url => "http://www.indian.senate.gov"+row.children[0]['href'], :title => row.children[0].text, :date => Date.parse(row.previous.previous.text), :domain => "http://www.indian.senate.gov/", :party => 'majority' }
157
+ end
158
+ results
159
+ end
160
+
161
+ def self.senate_aging
162
+ results = []
163
+ url = "http://www.aging.senate.gov/pressroom.cfm?maxrows=100&startrow=1&&type=1"
164
+ doc = open_html(url)
165
+ return if doc.nil?
166
+ doc.xpath("//tr")[6..104].each do |row|
167
+ results << { :source => url, :url => "http://www.aging.senate.gov/"+row.children[2].children[0]['href'], :title => row.children[2].text.strip, :date => Date.parse(row.children[0].text), :domain => "http://www.aging.senate.gov/" }
168
+ end
169
+ results
170
+ end
171
+
172
+ def self.senate_smallbiz_minority
173
+ results = []
174
+ url = "http://www.sbc.senate.gov/public/index.cfm?p=RepublicanPressRoom"
175
+ doc = open_html(url)
176
+ return if doc.nil?
177
+ doc.xpath("//ul[@class='recordList']").each do |row|
178
+ results << { :source => url, :url => row.children[0].children[2].children[0]['href'], :title => row.children[0].children[2].children[0].text, :date => Date.parse(row.children[0].children[0].text), :domain => "http://www.sbc.senate.gov/", :party => 'minority' }
179
+ end
180
+ results
181
+ end
182
+
183
+ def self.senate_intel(congress, start_year, end_year)
184
+ results = []
185
+ url = "http://www.intelligence.senate.gov/press/releases.cfm?congress=#{congress}&y1=#{start_year}&y2=#{end_year}"
186
+ doc = open_html(url)
187
+ return if doc.nil?
188
+ doc.xpath("//tr[@valign='top']")[7..-1].each do |row|
189
+ results << { :source => url, :url => "http://www.intelligence.senate.gov/press/"+row.children[2].children[0]['href'], :title => row.children[2].children[0].text.strip, :date => Date.parse(row.children[0].text), :domain => "http://www.intelligence.senate.gov/" }
190
+ end
191
+ results
192
+ end
193
+
194
+ def self.house_energy_minority
195
+ results = []
196
+ url = "http://democrats.energycommerce.house.gov/index.php?q=news-releases"
197
+ doc = open_html(url)
198
+ return if doc.nil?
199
+ doc.xpath("//div[@class='views-field-title']").each do |row|
200
+ results << { :source => url, :url => "http://democrats.energycommerce.house.gov"+row.children[1].children[0]['href'], :title => row.children[1].children[0].text, :date => Date.parse(row.next.next.text.strip), :domain => "http://energycommerce.house.gov/", :party => 'minority' }
201
+ end
202
+ results
203
+ end
204
+
205
+ def self.house_homeland_security_minority
206
+ results = []
207
+ url = "http://chsdemocrats.house.gov/press/index.asp?subsection=1"
208
+ doc = open_html(url)
209
+ return if doc.nil?
210
+ doc.xpath("//li[@class='article']").each do |row|
211
+ results << { :source => url, :url => "http://chsdemocrats.house.gov"+row.children[1]['href'], :title => row.children[1].text.strip, :date => Date.parse(row.children[3].text), :domain => "http://chsdemocrats.house.gov/", :party => 'minority' }
212
+ end
213
+ results
214
+ end
215
+
216
+ def self.house_judiciary_majority
217
+ results = []
218
+ url = "http://judiciary.house.gov/news/press2013.html"
219
+ doc = open_html(url)
220
+ return if doc.nil?
221
+ doc.xpath("//p")[3..60].each do |row|
222
+ next if row.text.size < 30
223
+ results << { :source => url, :url => row.children[5]['href'], :title => row.children[0].text, :date => Date.parse(row.children[1].text.strip), :domain => "http://judiciary.house.gov/", :party => 'majority' }
224
+ end
225
+ results
226
+ end
227
+
228
+ def self.house_rules_majority
229
+ results = []
230
+ url = "http://www.rules.house.gov/News/Default.aspx"
231
+ doc = open_html(url)
232
+ return if doc.nil?
233
+ doc.xpath("//tr")[1..-2].each do |row|
234
+ next if row.text.strip.size < 30
235
+ results << { :source => url, :url => "http://www.rules.house.gov/News/"+row.children[0].children[1].children[0]['href'], :title => row.children[0].children[1].children[0].text, :date => Date.parse(row.children[2].children[1].text.strip), :domain => "http://www.rules.house.gov/", :party => 'majority' }
236
+ end
237
+ results
238
+ end
239
+
240
+ def self.house_ways_means_majority
241
+ results = []
242
+ url = "http://waysandmeans.house.gov/news/documentquery.aspx?DocumentTypeID=1496"
243
+ doc = open_html(url)
244
+ return if doc.nil?
245
+ doc.xpath("//ul[@class='UnorderedNewsList']").children.each do |row|
246
+ next if row.text.strip.size < 10
247
+ results << { :source => url, :url => "http://waysandmeans.house.gov"+row.children[1].children[1]['href'], :title => row.children[1].children[1].text, :date => Date.parse(row.children[3].children[0].text.strip), :domain => "http://waysandmeans.house.gov/", :party => 'majority' }
248
+ end
249
+ results
250
+ end
251
+
80
252
  ## special cases for members without RSS feeds
81
253
 
82
254
  def self.capuano
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: statement
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.5
4
+ version: '0.8'
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-05-07 00:00:00.000000000 Z
12
+ date: 2013-05-16 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
16
- requirement: !ruby/object:Gem::Requirement
16
+ requirement: &2152291960 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,15 +21,10 @@ dependencies:
21
21
  version: '1.3'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
- requirements:
27
- - - ~>
28
- - !ruby/object:Gem::Version
29
- version: '1.3'
24
+ version_requirements: *2152291960
30
25
  - !ruby/object:Gem::Dependency
31
26
  name: rake
32
- requirement: !ruby/object:Gem::Requirement
27
+ requirement: &2152239160 !ruby/object:Gem::Requirement
33
28
  none: false
34
29
  requirements:
35
30
  - - ! '>='
@@ -37,15 +32,10 @@ dependencies:
37
32
  version: '0'
38
33
  type: :development
39
34
  prerelease: false
40
- version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
- requirements:
43
- - - ! '>='
44
- - !ruby/object:Gem::Version
45
- version: '0'
35
+ version_requirements: *2152239160
46
36
  - !ruby/object:Gem::Dependency
47
37
  name: webmock
48
- requirement: !ruby/object:Gem::Requirement
38
+ requirement: &2152121140 !ruby/object:Gem::Requirement
49
39
  none: false
50
40
  requirements:
51
41
  - - ! '>='
@@ -53,15 +43,10 @@ dependencies:
53
43
  version: '0'
54
44
  type: :development
55
45
  prerelease: false
56
- version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
- requirements:
59
- - - ! '>='
60
- - !ruby/object:Gem::Version
61
- version: '0'
46
+ version_requirements: *2152121140
62
47
  - !ruby/object:Gem::Dependency
63
48
  name: american_date
64
- requirement: !ruby/object:Gem::Requirement
49
+ requirement: &2152084380 !ruby/object:Gem::Requirement
65
50
  none: false
66
51
  requirements:
67
52
  - - ! '>='
@@ -69,15 +54,10 @@ dependencies:
69
54
  version: '0'
70
55
  type: :runtime
71
56
  prerelease: false
72
- version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
- requirements:
75
- - - ! '>='
76
- - !ruby/object:Gem::Version
77
- version: '0'
57
+ version_requirements: *2152084380
78
58
  - !ruby/object:Gem::Dependency
79
59
  name: nokogiri
80
- requirement: !ruby/object:Gem::Requirement
60
+ requirement: &2151878460 !ruby/object:Gem::Requirement
81
61
  none: false
82
62
  requirements:
83
63
  - - ! '>='
@@ -85,12 +65,7 @@ dependencies:
85
65
  version: '0'
86
66
  type: :runtime
87
67
  prerelease: false
88
- version_requirements: !ruby/object:Gem::Requirement
89
- none: false
90
- requirements:
91
- - - ! '>='
92
- - !ruby/object:Gem::Version
93
- version: '0'
68
+ version_requirements: *2151878460
94
69
  description: Crawls congressional websites for press releases.
95
70
  email:
96
71
  - dwillis@gmail.com
@@ -127,21 +102,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
127
102
  - - ! '>='
128
103
  - !ruby/object:Gem::Version
129
104
  version: '0'
130
- segments:
131
- - 0
132
- hash: -672742253743525613
133
105
  required_rubygems_version: !ruby/object:Gem::Requirement
134
106
  none: false
135
107
  requirements:
136
108
  - - ! '>='
137
109
  - !ruby/object:Gem::Version
138
110
  version: '0'
139
- segments:
140
- - 0
141
- hash: -672742253743525613
142
111
  requirements: []
143
112
  rubyforge_project:
144
- rubygems_version: 1.8.24
113
+ rubygems_version: 1.8.17
145
114
  signing_key:
146
115
  specification_version: 3
147
116
  summary: Given a url, Statement returns links to press releases and official statements.