statement 0.8 → 0.8.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/lib/statement/version.rb +1 -1
  2. data/lib/statement.rb +14 -5
  3. metadata +12 -12
@@ -1,3 +1,3 @@
1
1
  module Statement
2
- VERSION = "0.8"
2
+ VERSION = "0.8.1"
3
3
  end
data/lib/statement.rb CHANGED
@@ -29,6 +29,10 @@ module Statement
29
29
  end
30
30
  end
31
31
 
32
+ def self.remove_generic_urls!(results)
33
+ results.reject{|r| URI.parse(r[:url]).path == '/news/' or URI.parse(r[:url]).path == '/news'}
34
+ end
35
+
32
36
  def self.date_from_rss_item(link)
33
37
  if !link.xpath('pubDate').text.empty?
34
38
  Date.parse(link.xpath('pubDate').text)
@@ -43,12 +47,13 @@ module Statement
43
47
  doc = open_rss(url)
44
48
  return unless doc
45
49
  links = doc.xpath('//item')
46
- links.map do |link|
50
+ results = links.map do |link|
47
51
  abs_link = absolute_link(url, link.xpath('link').text)
48
52
  abs_link = "http://www.burr.senate.gov/public/"+ link.xpath('link').text if url == 'http://www.burr.senate.gov/public/index.cfm?FuseAction=RSS.Feed'
49
53
  abs_link = link.xpath('link').text[37..-1] if url == "http://www.johanns.senate.gov/public/?a=RSS.Feed"
50
54
  { :source => url, :url => abs_link, :title => link.xpath('title').text, :date => date_from_rss_item(link), :domain => URI.parse(url).host }
51
55
  end
56
+ remove_generic_urls!(results)
52
57
  end
53
58
 
54
59
  def self.house_gop(url)
@@ -57,31 +62,35 @@ module Statement
57
62
  uri = URI.parse(url)
58
63
  date = Date.parse(uri.query.split('=').last)
59
64
  links = doc.xpath("//ul[@id='membernews']").search('a')
60
- links.map do |link|
65
+ results = links.map do |link|
61
66
  abs_link = absolute_link(url, link["href"])
62
67
  { :source => url, :url => abs_link, :title => link.text.strip, :date => date, :domain => URI.parse(link["href"]).host }
63
68
  end
69
+ remove_generic_urls!(results)
64
70
  end
65
71
 
66
72
  def self.from_scrapers
67
73
  year = Date.today.year
68
- [freshman_senators, capuano, cold_fusion(year, 0), conaway, susandavis, faleomavaega, klobuchar, lujan, palazzo(page=1), billnelson(year=year),
74
+ results = [freshman_senators, capuano, cold_fusion(year, 0), conaway, susandavis, faleomavaega, klobuchar, lujan, palazzo(page=1), billnelson(year=year),
69
75
  document_query(page=1), document_query(page=2), donnelly(year=year), lautenberg, crapo, coburn, boxer(start=1), mccain(year=year),
70
76
  vitter_cowan(year=year), inhofe(year=year), reid].flatten
77
+ remove_generic_urls!(results)
71
78
  end
72
79
 
73
80
  def self.backfill_from_scrapers
74
- [cold_fusion(2012, 0), cold_fusion(2011, 0), cold_fusion(2010, 0), billnelson(year=2012), document_query(page=3),
81
+ results = [cold_fusion(2012, 0), cold_fusion(2011, 0), cold_fusion(2010, 0), billnelson(year=2012), document_query(page=3),
75
82
  document_query(page=4), coburn(year=2012), coburn(year=2011), coburn(year=2010), boxer(start=11), boxer(start=21),
76
83
  boxer(start=31), boxer(start=41), mccain(year=2012), mccain(year=2011), vitter_cowan(year=2012), vitter_cowan(year=2011),
77
84
  ].flatten
85
+ remove_generic_urls!(results)
78
86
  end
79
87
 
80
88
  def self.committee_scrapers
81
89
  year = Date.today.year
82
- [senate_approps_majority, senate_approps_minority, senate_banking(year), senate_hsag_majority(year), senate_hsag_minority(year),
90
+ results = [senate_approps_majority, senate_approps_minority, senate_banking(year), senate_hsag_majority(year), senate_hsag_minority(year),
83
91
  senate_indian, senate_aging, senate_smallbiz_minority, senate_intel(113, 2013, 2014), house_energy_minority, house_homeland_security_minority,
84
92
  house_judiciary_majority, house_rules_majority, house_ways_means_majority].flatten
93
+ remove_generic_urls!(results)
85
94
  end
86
95
 
87
96
  ## special cases for committees without RSS feeds
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: statement
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.8'
4
+ version: 0.8.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-05-16 00:00:00.000000000 Z
12
+ date: 2013-05-17 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
16
- requirement: &2152291960 !ruby/object:Gem::Requirement
16
+ requirement: &2151755880 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '1.3'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *2152291960
24
+ version_requirements: *2151755880
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rake
27
- requirement: &2152239160 !ruby/object:Gem::Requirement
27
+ requirement: &2151755280 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *2152239160
35
+ version_requirements: *2151755280
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: webmock
38
- requirement: &2152121140 !ruby/object:Gem::Requirement
38
+ requirement: &2151754720 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *2152121140
46
+ version_requirements: *2151754720
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: american_date
49
- requirement: &2152084380 !ruby/object:Gem::Requirement
49
+ requirement: &2151754280 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *2152084380
57
+ version_requirements: *2151754280
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: nokogiri
60
- requirement: &2151878460 !ruby/object:Gem::Requirement
60
+ requirement: &2151753800 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,7 +65,7 @@ dependencies:
65
65
  version: '0'
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *2151878460
68
+ version_requirements: *2151753800
69
69
  description: Crawls congressional websites for press releases.
70
70
  email:
71
71
  - dwillis@gmail.com