statement 1.9.5 → 1.9.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/statement/scraper.rb +23 -13
- data/lib/statement/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9210caafe5f39c592c2d5865620e11eb6f14fbff
|
4
|
+
data.tar.gz: 5a9f789a96688513a517d739144a9136a93ea63d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 886ce38c989a07e6bf1b5e8785e50dba38d90038e4e7c9077aae49b98111be2aa4f7a9e6ccfdc25c691bc97329710be4a434e3f1082cf276b9dfae5ab436c5c3
|
7
|
+
data.tar.gz: 6d6e2f124d32faf588c4b2a78ed63a27866b916a78720e4f975a9213f6e65e9e122ff1e9dc89cde0fb64741b6019ea5a77ff0c996c2a465bab50f5a0a1309bc6
|
data/lib/statement/scraper.rb
CHANGED
@@ -32,7 +32,7 @@ module Statement
|
|
32
32
|
[:crenshaw, :capuano, :cold_fusion, :conaway, :chabot, :freshman_senators, :klobuchar, :billnelson, :crapo, :boxer,
|
33
33
|
:vitter, :inhofe, :palazzo, :roe, :document_query, :swalwell, :fischer, :clark, :edwards, :culberson_chabot_grisham, :barton,
|
34
34
|
:sherman_mccaul, :welch, :sessions, :gabbard, :ellison, :costa, :farr, :mcclintock, :mcnerney, :olson, :schumer, :lamborn, :walden,
|
35
|
-
:bennie_thompson, :speier, :poe]
|
35
|
+
:bennie_thompson, :speier, :poe, :grassley]
|
36
36
|
end
|
37
37
|
|
38
38
|
def self.committee_methods
|
@@ -42,7 +42,7 @@ module Statement
|
|
42
42
|
def self.member_scrapers
|
43
43
|
year = Date.today.year
|
44
44
|
results = [crenshaw, capuano, cold_fusion(year, nil), conaway, chabot, klobuchar(year), palazzo(page=1), roe(page=1), billnelson(year=year),
|
45
|
-
document_query(page=1), document_query(page=2), swalwell(page=1), crapo,
|
45
|
+
document_query(page=1), document_query(page=2), swalwell(page=1), crapo, boxer(start=1), grassley(page=0),
|
46
46
|
vitter(year=year), inhofe(year=year), fischer, clark(year=year), edwards, culberson_chabot_grisham(page=1), barton, sherman_mccaul, welch,
|
47
47
|
sessions(year=year), gabbard, ellison(page=0), costa, farr, olson, mcnerney, schumer, lamborn(limit=10), walden, bennie_thompson, speier,
|
48
48
|
poe(year=year, month=0)].flatten
|
@@ -52,7 +52,7 @@ module Statement
|
|
52
52
|
|
53
53
|
def self.backfill_from_scrapers
|
54
54
|
results = [cold_fusion(2012, 0), cold_fusion(2011, 0), cold_fusion(2010, 0), billnelson(year=2012), document_query(page=3),
|
55
|
-
document_query(page=4),
|
55
|
+
document_query(page=4), boxer(start=11), boxer(start=21), grassley(page=1), grassley(page=2), grassley(page=3),
|
56
56
|
boxer(start=31), boxer(start=41), vitter(year=2012), vitter(year=2011), swalwell(page=2), swalwell(page=3), clark(year=2013), culberson_chabot_grisham(page=2),
|
57
57
|
sherman_mccaul(page=1), sessions(year=2013), pryor(page=1), ellison(page=1), ellison(page=2), ellison(page=3), farr(year=2013), farr(year=2012), farr(year=2011),
|
58
58
|
mcnerney(page=2), mcnerney(page=3), mcnerney(page=4), mcnerney(page=5), mcnerney(page=6), olson(year=2013), schumer(page=2), schumer(page=3), poe(year=2015, month=2),
|
@@ -287,24 +287,35 @@ module Statement
|
|
287
287
|
def self.cold_fusion(year=Date.today.year, month=nil)
|
288
288
|
results = []
|
289
289
|
year = Date.today.year if not year
|
290
|
-
domains = ['www.ronjohnson.senate.gov
|
290
|
+
domains = ['www.ronjohnson.senate.gov','www.risch.senate.gov', 'www.lee.senate.gov']
|
291
291
|
domains.each do |domain|
|
292
|
-
if domain == 'www.risch.senate.gov
|
292
|
+
if domain == 'www.risch.senate.gov'
|
293
293
|
if not month
|
294
294
|
url = "http://www.risch.senate.gov/public/index.cfm/pressreleases"
|
295
295
|
else
|
296
296
|
url = "http://www.risch.senate.gov/public/index.cfm/pressreleases?YearDisplay=#{year}&MonthDisplay=#{month}&page=1"
|
297
297
|
end
|
298
|
+
elsif domain == 'www.barrasso.senate.gov'
|
299
|
+
if not month
|
300
|
+
url = "http://#{domain}/public/index.cfm/news-releases"
|
301
|
+
else
|
302
|
+
url = "http://#{domain}/public/index.cfm/news-releases?YearDisplay=#{year}&MonthDisplay=#{month}&page=1"
|
303
|
+
end
|
298
304
|
else
|
299
305
|
if not month
|
300
|
-
url = "http
|
306
|
+
url = "http://#{domain}/public/index.cfm/press-releases"
|
301
307
|
else
|
302
|
-
url = "http
|
308
|
+
url = "http://#{domain}/public/index.cfm/press-releases?YearDisplay=#{year}&MonthDisplay=#{month}&page=1"
|
303
309
|
end
|
304
310
|
end
|
305
311
|
doc = Statement::Scraper.open_html(url)
|
306
312
|
return if doc.nil?
|
307
|
-
|
313
|
+
if domain == 'www.lee.senate.gov' or domain == 'www.barrasso.senate.gov'
|
314
|
+
rows = doc.xpath("//tr")[1..-1]
|
315
|
+
else
|
316
|
+
rows = doc.xpath("//tr")[2..-1]
|
317
|
+
end
|
318
|
+
rows.each do |row|
|
308
319
|
date_text, title = row.children.map{|c| c.text.strip}.reject{|c| c.empty?}
|
309
320
|
next if date_text == 'Date' or date_text.size > 10
|
310
321
|
date = Date.parse(date_text)
|
@@ -428,14 +439,13 @@ module Statement
|
|
428
439
|
results
|
429
440
|
end
|
430
441
|
|
431
|
-
def self.
|
442
|
+
def self.grassley(page=0)
|
432
443
|
results = []
|
433
|
-
url = "http://www.
|
444
|
+
url = "http://www.grassley.senate.gov/news/news-releases?title=&tid=All&date[value]&page=#{page}"
|
434
445
|
doc = open_html(url)
|
435
446
|
return if doc.nil?
|
436
|
-
doc.xpath("//
|
437
|
-
|
438
|
-
results << { :source => url, :url => row.children[3].children[0]['href'], :title => row.children[3].text.strip, :date => Date.strptime(row.children[1].text.strip, "%m/%d/%y"), :domain => "coburn.senate.gov" }
|
447
|
+
doc.xpath("//div[@class='views-field views-field-field-release-date']").each do |row|
|
448
|
+
results << { :source => url, :url => "http://www.grassley.senate.gov" + row.next.next.children[1].children[0]['href'], :title => row.next.next.text.strip, :date => Date.parse(row.text.strip), :domain => "grassley.senate.gov" }
|
439
449
|
end
|
440
450
|
results
|
441
451
|
end
|
data/lib/statement/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: statement
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.9.
|
4
|
+
version: 1.9.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Derek Willis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-04-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -211,3 +211,4 @@ test_files:
|
|
211
211
|
- spec/ruiz_rss.xml
|
212
212
|
- spec/statement_spec.rb
|
213
213
|
- spec/vitter_press.html
|
214
|
+
has_rdoc:
|