statement 0.8 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/lib/statement/version.rb +1 -1
  2. data/lib/statement.rb +14 -5
  3. metadata +12 -12
@@ -1,3 +1,3 @@
1
1
  module Statement
2
- VERSION = "0.8"
2
+ VERSION = "0.8.1"
3
3
  end
data/lib/statement.rb CHANGED
@@ -29,6 +29,10 @@ module Statement
29
29
  end
30
30
  end
31
31
 
32
+ def self.remove_generic_urls!(results)
33
+ results.reject{|r| URI.parse(r[:url]).path == '/news/' or URI.parse(r[:url]).path == '/news'}
34
+ end
35
+
32
36
  def self.date_from_rss_item(link)
33
37
  if !link.xpath('pubDate').text.empty?
34
38
  Date.parse(link.xpath('pubDate').text)
@@ -43,12 +47,13 @@ module Statement
43
47
  doc = open_rss(url)
44
48
  return unless doc
45
49
  links = doc.xpath('//item')
46
- links.map do |link|
50
+ results = links.map do |link|
47
51
  abs_link = absolute_link(url, link.xpath('link').text)
48
52
  abs_link = "http://www.burr.senate.gov/public/"+ link.xpath('link').text if url == 'http://www.burr.senate.gov/public/index.cfm?FuseAction=RSS.Feed'
49
53
  abs_link = link.xpath('link').text[37..-1] if url == "http://www.johanns.senate.gov/public/?a=RSS.Feed"
50
54
  { :source => url, :url => abs_link, :title => link.xpath('title').text, :date => date_from_rss_item(link), :domain => URI.parse(url).host }
51
55
  end
56
+ remove_generic_urls!(results)
52
57
  end
53
58
 
54
59
  def self.house_gop(url)
@@ -57,31 +62,35 @@ module Statement
57
62
  uri = URI.parse(url)
58
63
  date = Date.parse(uri.query.split('=').last)
59
64
  links = doc.xpath("//ul[@id='membernews']").search('a')
60
- links.map do |link|
65
+ results = links.map do |link|
61
66
  abs_link = absolute_link(url, link["href"])
62
67
  { :source => url, :url => abs_link, :title => link.text.strip, :date => date, :domain => URI.parse(link["href"]).host }
63
68
  end
69
+ remove_generic_urls!(results)
64
70
  end
65
71
 
66
72
  def self.from_scrapers
67
73
  year = Date.today.year
68
- [freshman_senators, capuano, cold_fusion(year, 0), conaway, susandavis, faleomavaega, klobuchar, lujan, palazzo(page=1), billnelson(year=year),
74
+ results = [freshman_senators, capuano, cold_fusion(year, 0), conaway, susandavis, faleomavaega, klobuchar, lujan, palazzo(page=1), billnelson(year=year),
69
75
  document_query(page=1), document_query(page=2), donnelly(year=year), lautenberg, crapo, coburn, boxer(start=1), mccain(year=year),
70
76
  vitter_cowan(year=year), inhofe(year=year), reid].flatten
77
+ remove_generic_urls!(results)
71
78
  end
72
79
 
73
80
  def self.backfill_from_scrapers
74
- [cold_fusion(2012, 0), cold_fusion(2011, 0), cold_fusion(2010, 0), billnelson(year=2012), document_query(page=3),
81
+ results = [cold_fusion(2012, 0), cold_fusion(2011, 0), cold_fusion(2010, 0), billnelson(year=2012), document_query(page=3),
75
82
  document_query(page=4), coburn(year=2012), coburn(year=2011), coburn(year=2010), boxer(start=11), boxer(start=21),
76
83
  boxer(start=31), boxer(start=41), mccain(year=2012), mccain(year=2011), vitter_cowan(year=2012), vitter_cowan(year=2011),
77
84
  ].flatten
85
+ remove_generic_urls!(results)
78
86
  end
79
87
 
80
88
  def self.committee_scrapers
81
89
  year = Date.today.year
82
- [senate_approps_majority, senate_approps_minority, senate_banking(year), senate_hsag_majority(year), senate_hsag_minority(year),
90
+ results = [senate_approps_majority, senate_approps_minority, senate_banking(year), senate_hsag_majority(year), senate_hsag_minority(year),
83
91
  senate_indian, senate_aging, senate_smallbiz_minority, senate_intel(113, 2013, 2014), house_energy_minority, house_homeland_security_minority,
84
92
  house_judiciary_majority, house_rules_majority, house_ways_means_majority].flatten
93
+ remove_generic_urls!(results)
85
94
  end
86
95
 
87
96
  ## special cases for committees without RSS feeds
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: statement
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.8'
4
+ version: 0.8.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-05-16 00:00:00.000000000 Z
12
+ date: 2013-05-17 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
16
- requirement: &2152291960 !ruby/object:Gem::Requirement
16
+ requirement: &2151755880 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '1.3'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *2152291960
24
+ version_requirements: *2151755880
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rake
27
- requirement: &2152239160 !ruby/object:Gem::Requirement
27
+ requirement: &2151755280 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *2152239160
35
+ version_requirements: *2151755280
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: webmock
38
- requirement: &2152121140 !ruby/object:Gem::Requirement
38
+ requirement: &2151754720 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *2152121140
46
+ version_requirements: *2151754720
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: american_date
49
- requirement: &2152084380 !ruby/object:Gem::Requirement
49
+ requirement: &2151754280 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *2152084380
57
+ version_requirements: *2151754280
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: nokogiri
60
- requirement: &2151878460 !ruby/object:Gem::Requirement
60
+ requirement: &2151753800 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,7 +65,7 @@ dependencies:
65
65
  version: '0'
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *2151878460
68
+ version_requirements: *2151753800
69
69
  description: Crawls congressional websites for press releases.
70
70
  email:
71
71
  - dwillis@gmail.com