statement 1.9.5 → 1.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/statement/scraper.rb +23 -13
- data/lib/statement/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9210caafe5f39c592c2d5865620e11eb6f14fbff
|
4
|
+
data.tar.gz: 5a9f789a96688513a517d739144a9136a93ea63d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 886ce38c989a07e6bf1b5e8785e50dba38d90038e4e7c9077aae49b98111be2aa4f7a9e6ccfdc25c691bc97329710be4a434e3f1082cf276b9dfae5ab436c5c3
|
7
|
+
data.tar.gz: 6d6e2f124d32faf588c4b2a78ed63a27866b916a78720e4f975a9213f6e65e9e122ff1e9dc89cde0fb64741b6019ea5a77ff0c996c2a465bab50f5a0a1309bc6
|
data/lib/statement/scraper.rb
CHANGED
@@ -32,7 +32,7 @@ module Statement
|
|
32
32
|
[:crenshaw, :capuano, :cold_fusion, :conaway, :chabot, :freshman_senators, :klobuchar, :billnelson, :crapo, :boxer,
|
33
33
|
:vitter, :inhofe, :palazzo, :roe, :document_query, :swalwell, :fischer, :clark, :edwards, :culberson_chabot_grisham, :barton,
|
34
34
|
:sherman_mccaul, :welch, :sessions, :gabbard, :ellison, :costa, :farr, :mcclintock, :mcnerney, :olson, :schumer, :lamborn, :walden,
|
35
|
-
:bennie_thompson, :speier, :poe]
|
35
|
+
:bennie_thompson, :speier, :poe, :grassley]
|
36
36
|
end
|
37
37
|
|
38
38
|
def self.committee_methods
|
@@ -42,7 +42,7 @@ module Statement
|
|
42
42
|
def self.member_scrapers
|
43
43
|
year = Date.today.year
|
44
44
|
results = [crenshaw, capuano, cold_fusion(year, nil), conaway, chabot, klobuchar(year), palazzo(page=1), roe(page=1), billnelson(year=year),
|
45
|
-
document_query(page=1), document_query(page=2), swalwell(page=1), crapo,
|
45
|
+
document_query(page=1), document_query(page=2), swalwell(page=1), crapo, boxer(start=1), grassley(page=0),
|
46
46
|
vitter(year=year), inhofe(year=year), fischer, clark(year=year), edwards, culberson_chabot_grisham(page=1), barton, sherman_mccaul, welch,
|
47
47
|
sessions(year=year), gabbard, ellison(page=0), costa, farr, olson, mcnerney, schumer, lamborn(limit=10), walden, bennie_thompson, speier,
|
48
48
|
poe(year=year, month=0)].flatten
|
@@ -52,7 +52,7 @@ module Statement
|
|
52
52
|
|
53
53
|
def self.backfill_from_scrapers
|
54
54
|
results = [cold_fusion(2012, 0), cold_fusion(2011, 0), cold_fusion(2010, 0), billnelson(year=2012), document_query(page=3),
|
55
|
-
document_query(page=4),
|
55
|
+
document_query(page=4), boxer(start=11), boxer(start=21), grassley(page=1), grassley(page=2), grassley(page=3),
|
56
56
|
boxer(start=31), boxer(start=41), vitter(year=2012), vitter(year=2011), swalwell(page=2), swalwell(page=3), clark(year=2013), culberson_chabot_grisham(page=2),
|
57
57
|
sherman_mccaul(page=1), sessions(year=2013), pryor(page=1), ellison(page=1), ellison(page=2), ellison(page=3), farr(year=2013), farr(year=2012), farr(year=2011),
|
58
58
|
mcnerney(page=2), mcnerney(page=3), mcnerney(page=4), mcnerney(page=5), mcnerney(page=6), olson(year=2013), schumer(page=2), schumer(page=3), poe(year=2015, month=2),
|
@@ -287,24 +287,35 @@ module Statement
|
|
287
287
|
def self.cold_fusion(year=Date.today.year, month=nil)
|
288
288
|
results = []
|
289
289
|
year = Date.today.year if not year
|
290
|
-
domains = ['www.ronjohnson.senate.gov
|
290
|
+
domains = ['www.ronjohnson.senate.gov','www.risch.senate.gov', 'www.lee.senate.gov']
|
291
291
|
domains.each do |domain|
|
292
|
-
if domain == 'www.risch.senate.gov
|
292
|
+
if domain == 'www.risch.senate.gov'
|
293
293
|
if not month
|
294
294
|
url = "http://www.risch.senate.gov/public/index.cfm/pressreleases"
|
295
295
|
else
|
296
296
|
url = "http://www.risch.senate.gov/public/index.cfm/pressreleases?YearDisplay=#{year}&MonthDisplay=#{month}&page=1"
|
297
297
|
end
|
298
|
+
elsif domain == 'www.barrasso.senate.gov'
|
299
|
+
if not month
|
300
|
+
url = "http://#{domain}/public/index.cfm/news-releases"
|
301
|
+
else
|
302
|
+
url = "http://#{domain}/public/index.cfm/news-releases?YearDisplay=#{year}&MonthDisplay=#{month}&page=1"
|
303
|
+
end
|
298
304
|
else
|
299
305
|
if not month
|
300
|
-
url = "http
|
306
|
+
url = "http://#{domain}/public/index.cfm/press-releases"
|
301
307
|
else
|
302
|
-
url = "http
|
308
|
+
url = "http://#{domain}/public/index.cfm/press-releases?YearDisplay=#{year}&MonthDisplay=#{month}&page=1"
|
303
309
|
end
|
304
310
|
end
|
305
311
|
doc = Statement::Scraper.open_html(url)
|
306
312
|
return if doc.nil?
|
307
|
-
|
313
|
+
if domain == 'www.lee.senate.gov' or domain == 'www.barrasso.senate.gov'
|
314
|
+
rows = doc.xpath("//tr")[1..-1]
|
315
|
+
else
|
316
|
+
rows = doc.xpath("//tr")[2..-1]
|
317
|
+
end
|
318
|
+
rows.each do |row|
|
308
319
|
date_text, title = row.children.map{|c| c.text.strip}.reject{|c| c.empty?}
|
309
320
|
next if date_text == 'Date' or date_text.size > 10
|
310
321
|
date = Date.parse(date_text)
|
@@ -428,14 +439,13 @@ module Statement
|
|
428
439
|
results
|
429
440
|
end
|
430
441
|
|
431
|
-
def self.
|
442
|
+
def self.grassley(page=0)
|
432
443
|
results = []
|
433
|
-
url = "http://www.
|
444
|
+
url = "http://www.grassley.senate.gov/news/news-releases?title=&tid=All&date[value]&page=#{page}"
|
434
445
|
doc = open_html(url)
|
435
446
|
return if doc.nil?
|
436
|
-
doc.xpath("//
|
437
|
-
|
438
|
-
results << { :source => url, :url => row.children[3].children[0]['href'], :title => row.children[3].text.strip, :date => Date.strptime(row.children[1].text.strip, "%m/%d/%y"), :domain => "coburn.senate.gov" }
|
447
|
+
doc.xpath("//div[@class='views-field views-field-field-release-date']").each do |row|
|
448
|
+
results << { :source => url, :url => "http://www.grassley.senate.gov" + row.next.next.children[1].children[0]['href'], :title => row.next.next.text.strip, :date => Date.parse(row.text.strip), :domain => "grassley.senate.gov" }
|
439
449
|
end
|
440
450
|
results
|
441
451
|
end
|
data/lib/statement/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: statement
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.9.
|
4
|
+
version: 1.9.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Derek Willis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-04-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -211,3 +211,4 @@ test_files:
|
|
211
211
|
- spec/ruiz_rss.xml
|
212
212
|
- spec/statement_spec.rb
|
213
213
|
- spec/vitter_press.html
|
214
|
+
has_rdoc:
|