statement 1.8.13 → 1.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/statement/scraper.rb +6 -20
- data/lib/statement/utils.rb +2 -2
- data/lib/statement/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3316c0e258c2ed004e32b0c367c10672519f169c
|
4
|
+
data.tar.gz: cd831fcea5e979ac3c27046e9e2617967616f824
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0ae5af468325c272f9befab60d333e9f9d197588f66e9d50e3f0beb2c3795b67505069f0b73e99972dacc010c5ce7bd6a9e27fc2368863e6f3e816b190ace838
|
7
|
+
data.tar.gz: d34919c91fbc18d45a44d39fbe21120339568ddbd037a97adc91ddd8c8f4a60c48d30d881271f476bf0fe8003713f5af2165c65f934036e666924687f48c32df
|
data/lib/statement/scraper.rb
CHANGED
@@ -29,7 +29,7 @@ module Statement
|
|
29
29
|
end
|
30
30
|
|
31
31
|
def self.member_methods
|
32
|
-
[:crenshaw, :capuano, :cold_fusion, :conaway, :chabot, :susandavis, :freshman_senators, :klobuchar, :billnelson, :
|
32
|
+
[:crenshaw, :capuano, :cold_fusion, :conaway, :chabot, :susandavis, :freshman_senators, :klobuchar, :billnelson, :crapo, :boxer, :vitter, :inhofe, :palazzo, :roe, :document_query, :swalwell, :fischer, :clark, :edwards, :culberson_chabot_grisham, :barton, :sherman_mccaul, :welch, :sessions, :gabbard, :ellison, :costa, :farr, :mcclintock, :mcnerney, :olson]
|
33
33
|
end
|
34
34
|
|
35
35
|
def self.committee_methods
|
@@ -40,8 +40,8 @@ module Statement
|
|
40
40
|
year = Date.today.year
|
41
41
|
results = [crenshaw, capuano, cold_fusion(year, nil), conaway, chabot, susandavis, klobuchar(year), palazzo(page=1), roe(page=1), billnelson(year=year),
|
42
42
|
document_query(page=1), document_query(page=2), swalwell(page=1), crapo, coburn, boxer(start=1),
|
43
|
-
vitter(year=year), inhofe(year=2014), fischer, clark(year=year), edwards, culberson_chabot_grisham(page=1), barton,
|
44
|
-
sessions(year=year), gabbard,
|
43
|
+
vitter(year=year), inhofe(year=2014), fischer, clark(year=year), edwards, culberson_chabot_grisham(page=1), barton, sherman_mccaul, welch,
|
44
|
+
sessions(year=year), gabbard, ellison(page=0), costa, farr, mcclintock, olson, mcnerney].flatten
|
45
45
|
results = results.compact
|
46
46
|
Utils.remove_generic_urls!(results)
|
47
47
|
end
|
@@ -50,7 +50,7 @@ module Statement
|
|
50
50
|
results = [cold_fusion(2012, 0), cold_fusion(2011, 0), cold_fusion(2010, 0), billnelson(year=2012), document_query(page=3),
|
51
51
|
document_query(page=4), coburn(year=2012), coburn(year=2011), coburn(year=2010), boxer(start=11), boxer(start=21),
|
52
52
|
boxer(start=31), boxer(start=41), vitter(year=2012), vitter(year=2011), swalwell(page=2), swalwell(page=3), clark(year=2013), culberson_chabot_grisham(page=2),
|
53
|
-
|
53
|
+
sherman_mccaul(page=1), sessions(year=2013), pryor(page=1), ellison(page=1), ellison(page=2), ellison(page=3), farr(year=2013), farr(year=2012), farr(year=2011),
|
54
54
|
mcnerney(page=2), mcnerney(page=3), mcnerney(page=4), mcnerney(page=5), mcnerney(page=6), olson(year=2013)].flatten
|
55
55
|
Utils.remove_generic_urls!(results)
|
56
56
|
end
|
@@ -579,9 +579,9 @@ module Statement
|
|
579
579
|
results
|
580
580
|
end
|
581
581
|
|
582
|
-
def self.
|
582
|
+
def self.sherman_mccaul(page=0)
|
583
583
|
results = []
|
584
|
-
domains = ['
|
584
|
+
domains = ['sherman.house.gov', 'mccaul.house.gov']
|
585
585
|
domains.each do |domain|
|
586
586
|
url = "http://#{domain}/media-center/press-releases?page=#{page}"
|
587
587
|
doc = open_html(url)
|
@@ -595,20 +595,6 @@ module Statement
|
|
595
595
|
results.flatten
|
596
596
|
end
|
597
597
|
|
598
|
-
def self.pryor(page=0)
|
599
|
-
results = []
|
600
|
-
domain = 'www.pryor.senate.gov'
|
601
|
-
url = "http://www.pryor.senate.gov/newsroom/press-releases?page=#{page}"
|
602
|
-
doc = open_html(url)
|
603
|
-
return if doc.nil?
|
604
|
-
dates = doc.xpath('//span[@class="field-content"]').map {|s| s.text if s.text.strip.include?("201")}.compact!
|
605
|
-
doc.xpath('//div[@class="views-field views-field-title"]').each_with_index do |row, i|
|
606
|
-
date = Date.parse(dates[i])
|
607
|
-
results << {:source => url, :url => "http://www.pryor.senate.gov"+row.children[1].children[0]['href'], :title => row.children[1].children[0].text.strip, :date => date, :domain => domain}
|
608
|
-
end
|
609
|
-
results
|
610
|
-
end
|
611
|
-
|
612
598
|
def self.welch
|
613
599
|
results = []
|
614
600
|
domain = 'welch.house.gov'
|
data/lib/statement/utils.rb
CHANGED
@@ -2,11 +2,11 @@ require 'uri'
|
|
2
2
|
|
3
3
|
module Utils
|
4
4
|
def self.absolute_link(url, link)
|
5
|
-
return link if link =~ /^http
|
5
|
+
return link if link =~ /^http/
|
6
6
|
("http://"+URI.parse(url).host + "/"+link).to_s
|
7
7
|
end
|
8
8
|
|
9
9
|
def self.remove_generic_urls!(results)
|
10
10
|
results.reject{|r| URI.parse(URI.escape(r[:url])).path == '/news/' or URI.parse(URI.escape(r[:url])).path == '/news'}
|
11
11
|
end
|
12
|
-
end
|
12
|
+
end
|
data/lib/statement/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: statement
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: '1.9'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Derek Willis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-02-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|