statement 0.8 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/statement/version.rb +1 -1
- data/lib/statement.rb +14 -5
- metadata +12 -12
    
        data/lib/statement/version.rb
    CHANGED
    
    
    
        data/lib/statement.rb
    CHANGED
    
    | @@ -29,6 +29,10 @@ module Statement | |
| 29 29 | 
             
                  end
         | 
| 30 30 | 
             
                end
         | 
| 31 31 |  | 
| 32 | 
            +
                def self.remove_generic_urls!(results)
         | 
| 33 | 
            +
                  results.reject{|r| URI.parse(r[:url]).path == '/news/' or URI.parse(r[:url]).path == '/news'}
         | 
| 34 | 
            +
                end
         | 
| 35 | 
            +
                
         | 
| 32 36 | 
             
                def self.date_from_rss_item(link)
         | 
| 33 37 | 
             
                  if !link.xpath('pubDate').text.empty?
         | 
| 34 38 | 
             
                    Date.parse(link.xpath('pubDate').text)
         | 
| @@ -43,12 +47,13 @@ module Statement | |
| 43 47 | 
             
                  doc = open_rss(url)
         | 
| 44 48 | 
             
                  return unless doc
         | 
| 45 49 | 
             
                  links = doc.xpath('//item')
         | 
| 46 | 
            -
                  links.map do |link|
         | 
| 50 | 
            +
                  results = links.map do |link|
         | 
| 47 51 | 
             
                    abs_link = absolute_link(url, link.xpath('link').text)
         | 
| 48 52 | 
             
                    abs_link = "http://www.burr.senate.gov/public/"+ link.xpath('link').text if url == 'http://www.burr.senate.gov/public/index.cfm?FuseAction=RSS.Feed'
         | 
| 49 53 | 
             
                    abs_link = link.xpath('link').text[37..-1] if url == "http://www.johanns.senate.gov/public/?a=RSS.Feed"
         | 
| 50 54 | 
             
                    { :source => url, :url => abs_link, :title => link.xpath('title').text, :date => date_from_rss_item(link), :domain => URI.parse(url).host }
         | 
| 51 55 | 
             
                  end
         | 
| 56 | 
            +
                  remove_generic_urls!(results)
         | 
| 52 57 | 
             
                end
         | 
| 53 58 |  | 
| 54 59 | 
             
                def self.house_gop(url)
         | 
| @@ -57,31 +62,35 @@ module Statement | |
| 57 62 | 
             
                  uri = URI.parse(url)
         | 
| 58 63 | 
             
                  date = Date.parse(uri.query.split('=').last)
         | 
| 59 64 | 
             
                  links = doc.xpath("//ul[@id='membernews']").search('a')
         | 
| 60 | 
            -
                  links.map do |link| 
         | 
| 65 | 
            +
                  results = links.map do |link| 
         | 
| 61 66 | 
             
                    abs_link = absolute_link(url, link["href"])
         | 
| 62 67 | 
             
                    { :source => url, :url => abs_link, :title => link.text.strip, :date => date, :domain => URI.parse(link["href"]).host }
         | 
| 63 68 | 
             
                  end
         | 
| 69 | 
            +
                  remove_generic_urls!(results)
         | 
| 64 70 | 
             
                end
         | 
| 65 71 |  | 
| 66 72 | 
             
                def self.from_scrapers
         | 
| 67 73 | 
             
                  year = Date.today.year
         | 
| 68 | 
            -
                  [freshman_senators, capuano, cold_fusion(year, 0), conaway, susandavis, faleomavaega, klobuchar, lujan, palazzo(page=1), billnelson(year=year), 
         | 
| 74 | 
            +
                  results = [freshman_senators, capuano, cold_fusion(year, 0), conaway, susandavis, faleomavaega, klobuchar, lujan, palazzo(page=1), billnelson(year=year), 
         | 
| 69 75 | 
             
                    document_query(page=1), document_query(page=2), donnelly(year=year), lautenberg, crapo, coburn, boxer(start=1), mccain(year=year), 
         | 
| 70 76 | 
             
                    vitter_cowan(year=year), inhofe(year=year), reid].flatten
         | 
| 77 | 
            +
                  remove_generic_urls!(results)
         | 
| 71 78 | 
             
                end
         | 
| 72 79 |  | 
| 73 80 | 
             
                def self.backfill_from_scrapers
         | 
| 74 | 
            -
                  [cold_fusion(2012, 0), cold_fusion(2011, 0), cold_fusion(2010, 0), billnelson(year=2012), document_query(page=3), 
         | 
| 81 | 
            +
                  results = [cold_fusion(2012, 0), cold_fusion(2011, 0), cold_fusion(2010, 0), billnelson(year=2012), document_query(page=3), 
         | 
| 75 82 | 
             
                    document_query(page=4), coburn(year=2012), coburn(year=2011), coburn(year=2010), boxer(start=11), boxer(start=21), 
         | 
| 76 83 | 
             
                    boxer(start=31), boxer(start=41), mccain(year=2012), mccain(year=2011), vitter_cowan(year=2012), vitter_cowan(year=2011),
         | 
| 77 84 | 
             
                    ].flatten
         | 
| 85 | 
            +
                  remove_generic_urls!(results)
         | 
| 78 86 | 
             
                end
         | 
| 79 87 |  | 
| 80 88 | 
             
                def self.committee_scrapers
         | 
| 81 89 | 
             
                  year = Date.today.year
         | 
| 82 | 
            -
                  [senate_approps_majority, senate_approps_minority, senate_banking(year), senate_hsag_majority(year), senate_hsag_minority(year),
         | 
| 90 | 
            +
                  results = [senate_approps_majority, senate_approps_minority, senate_banking(year), senate_hsag_majority(year), senate_hsag_minority(year),
         | 
| 83 91 | 
             
                     senate_indian, senate_aging, senate_smallbiz_minority, senate_intel(113, 2013, 2014), house_energy_minority, house_homeland_security_minority,
         | 
| 84 92 | 
             
                     house_judiciary_majority, house_rules_majority, house_ways_means_majority].flatten
         | 
| 93 | 
            +
                  remove_generic_urls!(results)
         | 
| 85 94 | 
             
                end
         | 
| 86 95 |  | 
| 87 96 | 
             
                ## special cases for committees without RSS feeds
         | 
    
        metadata
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: statement
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version:  | 
| 4 | 
            +
              version: 0.8.1
         | 
| 5 5 | 
             
              prerelease: 
         | 
| 6 6 | 
             
            platform: ruby
         | 
| 7 7 | 
             
            authors:
         | 
| @@ -9,11 +9,11 @@ authors: | |
| 9 9 | 
             
            autorequire: 
         | 
| 10 10 | 
             
            bindir: bin
         | 
| 11 11 | 
             
            cert_chain: []
         | 
| 12 | 
            -
            date: 2013-05- | 
| 12 | 
            +
            date: 2013-05-17 00:00:00.000000000 Z
         | 
| 13 13 | 
             
            dependencies:
         | 
| 14 14 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 15 15 | 
             
              name: bundler
         | 
| 16 | 
            -
              requirement: & | 
| 16 | 
            +
              requirement: &2151755880 !ruby/object:Gem::Requirement
         | 
| 17 17 | 
             
                none: false
         | 
| 18 18 | 
             
                requirements:
         | 
| 19 19 | 
             
                - - ~>
         | 
| @@ -21,10 +21,10 @@ dependencies: | |
| 21 21 | 
             
                    version: '1.3'
         | 
| 22 22 | 
             
              type: :development
         | 
| 23 23 | 
             
              prerelease: false
         | 
| 24 | 
            -
              version_requirements: * | 
| 24 | 
            +
              version_requirements: *2151755880
         | 
| 25 25 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 26 26 | 
             
              name: rake
         | 
| 27 | 
            -
              requirement: & | 
| 27 | 
            +
              requirement: &2151755280 !ruby/object:Gem::Requirement
         | 
| 28 28 | 
             
                none: false
         | 
| 29 29 | 
             
                requirements:
         | 
| 30 30 | 
             
                - - ! '>='
         | 
| @@ -32,10 +32,10 @@ dependencies: | |
| 32 32 | 
             
                    version: '0'
         | 
| 33 33 | 
             
              type: :development
         | 
| 34 34 | 
             
              prerelease: false
         | 
| 35 | 
            -
              version_requirements: * | 
| 35 | 
            +
              version_requirements: *2151755280
         | 
| 36 36 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 37 37 | 
             
              name: webmock
         | 
| 38 | 
            -
              requirement: & | 
| 38 | 
            +
              requirement: &2151754720 !ruby/object:Gem::Requirement
         | 
| 39 39 | 
             
                none: false
         | 
| 40 40 | 
             
                requirements:
         | 
| 41 41 | 
             
                - - ! '>='
         | 
| @@ -43,10 +43,10 @@ dependencies: | |
| 43 43 | 
             
                    version: '0'
         | 
| 44 44 | 
             
              type: :development
         | 
| 45 45 | 
             
              prerelease: false
         | 
| 46 | 
            -
              version_requirements: * | 
| 46 | 
            +
              version_requirements: *2151754720
         | 
| 47 47 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 48 48 | 
             
              name: american_date
         | 
| 49 | 
            -
              requirement: & | 
| 49 | 
            +
              requirement: &2151754280 !ruby/object:Gem::Requirement
         | 
| 50 50 | 
             
                none: false
         | 
| 51 51 | 
             
                requirements:
         | 
| 52 52 | 
             
                - - ! '>='
         | 
| @@ -54,10 +54,10 @@ dependencies: | |
| 54 54 | 
             
                    version: '0'
         | 
| 55 55 | 
             
              type: :runtime
         | 
| 56 56 | 
             
              prerelease: false
         | 
| 57 | 
            -
              version_requirements: * | 
| 57 | 
            +
              version_requirements: *2151754280
         | 
| 58 58 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 59 59 | 
             
              name: nokogiri
         | 
| 60 | 
            -
              requirement: & | 
| 60 | 
            +
              requirement: &2151753800 !ruby/object:Gem::Requirement
         | 
| 61 61 | 
             
                none: false
         | 
| 62 62 | 
             
                requirements:
         | 
| 63 63 | 
             
                - - ! '>='
         | 
| @@ -65,7 +65,7 @@ dependencies: | |
| 65 65 | 
             
                    version: '0'
         | 
| 66 66 | 
             
              type: :runtime
         | 
| 67 67 | 
             
              prerelease: false
         | 
| 68 | 
            -
              version_requirements: * | 
| 68 | 
            +
              version_requirements: *2151753800
         | 
| 69 69 | 
             
            description: Crawls congressional websites for press releases.
         | 
| 70 70 | 
             
            email:
         | 
| 71 71 | 
             
            - dwillis@gmail.com
         |