statement 2.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/lib/statement/feed.rb +15 -1
 - data/lib/statement/scraper.rb +38 -4
 - data/lib/statement/version.rb +1 -1
 - metadata +1 -1
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA1:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 74eeb1be0ee4ada11df34f5b8ddce00ffd5649cf
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: dd7fa7d9e6b0ccf0ae0bd3cdfa3f61f9306f4ef4
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 392340713cdf259bff3e60816d3ea12966a09127b0cad75c65f71307b18a692812bb7dc594fc332bbe9c49df15d52f6ab9d396d27795890d736de65ff10330a7
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: a80659fe9a3a651f8db4585112dd193fe413762d1409afd4703ff1d63454599298c22416e4f1f8735897a04169e4e09d5c3436a3e8699c67d7f124654423033f
         
     | 
    
        data/lib/statement/feed.rb
    CHANGED
    
    | 
         @@ -17,6 +17,7 @@ module Statement 
     | 
|
| 
       17 
17 
     | 
    
         
             
                    req.on_complete do |response|
         
     | 
| 
       18 
18 
     | 
    
         
             
                      if response.success?
         
     | 
| 
       19 
19 
     | 
    
         
             
                        doc = Nokogiri::XML(response.body)
         
     | 
| 
      
 20 
     | 
    
         
            +
                        results << parse_atom(doc, url) if url == "http://larson.house.gov/index.php?option=com_ninjarsssyndicator&feed_id=1&format=raw"
         
     | 
| 
       20 
21 
     | 
    
         
             
                        results << parse_rss(doc, url)
         
     | 
| 
       21 
22 
     | 
    
         
             
                      else
         
     | 
| 
       22 
23 
     | 
    
         
             
                        failures << url
         
     | 
| 
         @@ -51,7 +52,11 @@ module Statement 
     | 
|
| 
       51 
52 
     | 
    
         
             
                def self.from_rss(url)
         
     | 
| 
       52 
53 
     | 
    
         
             
                  doc = open_rss(url)
         
     | 
| 
       53 
54 
     | 
    
         
             
                  return unless doc
         
     | 
| 
       54 
     | 
    
         
            -
                   
     | 
| 
      
 55 
     | 
    
         
            +
                  if url == "http://larson.house.gov/index.php?option=com_ninjarsssyndicator&feed_id=1&format=raw"
         
     | 
| 
      
 56 
     | 
    
         
            +
                    parse_atom(doc, url)
         
     | 
| 
      
 57 
     | 
    
         
            +
                  else
         
     | 
| 
      
 58 
     | 
    
         
            +
                    parse_rss(doc, url)
         
     | 
| 
      
 59 
     | 
    
         
            +
                  end
         
     | 
| 
       55 
60 
     | 
    
         
             
                end
         
     | 
| 
       56 
61 
     | 
    
         | 
| 
       57 
62 
     | 
    
         
             
                def self.parse_rss(doc, url)
         
     | 
| 
         @@ -65,5 +70,14 @@ module Statement 
     | 
|
| 
       65 
70 
     | 
    
         
             
                  end
         
     | 
| 
       66 
71 
     | 
    
         
             
                  Utils.remove_generic_urls!(results)
         
     | 
| 
       67 
72 
     | 
    
         
             
                end
         
     | 
| 
      
 73 
     | 
    
         
            +
             
     | 
| 
      
 74 
     | 
    
         
            +
                def self.parse_atom(doc, url)
         
     | 
| 
      
 75 
     | 
    
         
            +
                  links = (doc/:entry)
         
     | 
| 
      
 76 
     | 
    
         
            +
                  return if links.empty?
         
     | 
| 
      
 77 
     | 
    
         
            +
                  results = links.map do |link|
         
     | 
| 
      
 78 
     | 
    
         
            +
                    { :source => url, :url => link.children[3]['href'], :title => link.children[1].text, :date => Date.parse(link.children[5].text), :domain => URI.parse(url).host }
         
     | 
| 
      
 79 
     | 
    
         
            +
                  end
         
     | 
| 
      
 80 
     | 
    
         
            +
                end
         
     | 
| 
      
 81 
     | 
    
         
            +
             
     | 
| 
       68 
82 
     | 
    
         
             
              end
         
     | 
| 
       69 
83 
     | 
    
         
             
            end
         
     | 
    
        data/lib/statement/scraper.rb
    CHANGED
    
    | 
         @@ -32,7 +32,7 @@ module Statement 
     | 
|
| 
       32 
32 
     | 
    
         
             
                  [:crenshaw, :capuano, :cold_fusion, :conaway, :chabot, :freshman_senators, :klobuchar, :billnelson, :crapo, :boxer,
         
     | 
| 
       33 
33 
     | 
    
         
             
                  :vitter, :inhofe, :document_query, :swalwell, :fischer, :clark, :edwards, :culberson_chabot_grisham, :barton,
         
     | 
| 
       34 
34 
     | 
    
         
             
                  :welch, :sessions, :gabbard, :costa, :farr, :mcclintock, :olson, :schumer, :lamborn, :walden,
         
     | 
| 
       35 
     | 
    
         
            -
                  :bennie_thompson, :speier, :poe, :grassley, :bennet, :shaheen, :keating, :drupal, :jenkins]
         
     | 
| 
      
 35 
     | 
    
         
            +
                  :bennie_thompson, :speier, :poe, :grassley, :bennet, :shaheen, :keating, :drupal, :jenkins, :durbin_burr]
         
     | 
| 
       36 
36 
     | 
    
         
             
                end
         
     | 
| 
       37 
37 
     | 
    
         | 
| 
       38 
38 
     | 
    
         
             
                def self.committee_methods
         
     | 
| 
         @@ -45,7 +45,7 @@ module Statement 
     | 
|
| 
       45 
45 
     | 
    
         
             
                    document_query(page=1), document_query(page=2), swalwell(page=1), crapo, boxer, grassley(page=0),
         
     | 
| 
       46 
46 
     | 
    
         
             
                    vitter(year=year), inhofe(year=year), fischer, clark(year=year), edwards, culberson_chabot_grisham(page=1), barton, welch,
         
     | 
| 
       47 
47 
     | 
    
         
             
                    sessions(year=year), gabbard, costa, farr, olson, schumer, lamborn(limit=10), walden, bennie_thompson, speier,
         
     | 
| 
       48 
     | 
    
         
            -
                    poe(year=year, month=0), bennet(page=1), shaheen(page=1), perlmutter, keating, drupal, jenkins].flatten
         
     | 
| 
      
 48 
     | 
    
         
            +
                    poe(year=year, month=0), bennet(page=1), shaheen(page=1), perlmutter, keating, drupal, jenkins, durbin_burr(page=1)].flatten
         
     | 
| 
       49 
49 
     | 
    
         
             
                  results = results.compact
         
     | 
| 
       50 
50 
     | 
    
         
             
                  Utils.remove_generic_urls!(results)
         
     | 
| 
       51 
51 
     | 
    
         
             
                end
         
     | 
| 
         @@ -491,6 +491,20 @@ module Statement 
     | 
|
| 
       491 
491 
     | 
    
         
             
                  results
         
     | 
| 
       492 
492 
     | 
    
         
             
                end
         
     | 
| 
       493 
493 
     | 
    
         | 
| 
      
 494 
     | 
    
         
            +
                def self.durbin_burr(page=1)
         
     | 
| 
      
 495 
     | 
    
         
            +
                  results = []
         
     | 
| 
      
 496 
     | 
    
         
            +
                  domains = ["www.durbin.senate.gov", "www.burr.senate.gov"]
         
     | 
| 
      
 497 
     | 
    
         
            +
                  domains.each do |domain|
         
     | 
| 
      
 498 
     | 
    
         
            +
                    url = "http://#{domain}/newsroom/press-releases?PageNum_rs=#{page}&"
         
     | 
| 
      
 499 
     | 
    
         
            +
                    doc = open_html(url)
         
     | 
| 
      
 500 
     | 
    
         
            +
                    return if doc.nil?
         
     | 
| 
      
 501 
     | 
    
         
            +
                    doc.xpath("//div[@id='press']//h2").each do |row|
         
     | 
| 
      
 502 
     | 
    
         
            +
                      results << { :source => url, :url => "http://#{domain}"+row.children[0]['href'], :title => row.children[0].text.strip, :date => Date.parse(row.previous.previous.text.gsub(".","/")), :domain => domain}
         
     | 
| 
      
 503 
     | 
    
         
            +
                    end
         
     | 
| 
      
 504 
     | 
    
         
            +
                  end
         
     | 
| 
      
 505 
     | 
    
         
            +
                  results
         
     | 
| 
      
 506 
     | 
    
         
            +
                end
         
     | 
| 
      
 507 
     | 
    
         
            +
             
     | 
| 
       494 
508 
     | 
    
         
             
                def self.inhofe(year=Date.today.year)
         
     | 
| 
       495 
509 
     | 
    
         
             
                  results = []
         
     | 
| 
       496 
510 
     | 
    
         
             
                  url = "http://www.inhofe.senate.gov/newsroom/press-releases?year=#{year}"
         
     | 
| 
         @@ -651,7 +665,25 @@ module Statement 
     | 
|
| 
       651 
665 
     | 
    
         | 
| 
       652 
666 
     | 
    
         
             
                def self.document_query(page=1)
         
     | 
| 
       653 
667 
     | 
    
         
             
                  results = []
         
     | 
| 
       654 
     | 
    
         
            -
                  domains = [ 
     | 
| 
      
 668 
     | 
    
         
            +
                  domains = [
         
     | 
| 
      
 669 
     | 
    
         
            +
                    {"thornberry.house.gov" => 1776},
         
     | 
| 
      
 670 
     | 
    
         
            +
                    {"wenstrup.house.gov" => 2491},
         
     | 
| 
      
 671 
     | 
    
         
            +
                    {"clawson.house.gov" => 2641},
         
     | 
| 
      
 672 
     | 
    
         
            +
                    {"palazzo.house.gov" => 2519},
         
     | 
| 
      
 673 
     | 
    
         
            +
                    {"roe.house.gov" => 1532},
         
     | 
| 
      
 674 
     | 
    
         
            +
                    {"perry.house.gov" => 2608},
         
     | 
| 
      
 675 
     | 
    
         
            +
                    {"rodneydavis.house.gov" => 2427},
         
     | 
| 
      
 676 
     | 
    
         
            +
                    {"kevinbrady.house.gov" => 2657},
         
     | 
| 
      
 677 
     | 
    
         
            +
                    {"loudermilk.house.gov" => 27},
         
     | 
| 
      
 678 
     | 
    
         
            +
                    {"babin.house.gov" => 27},
         
     | 
| 
      
 679 
     | 
    
         
            +
                    {"bridenstine.house.gov" => 2412},
         
     | 
| 
      
 680 
     | 
    
         
            +
                    {"allen.house.gov" => 27},
         
     | 
| 
      
 681 
     | 
    
         
            +
                    {"davidscott.house.gov" => 377},
         
     | 
| 
      
 682 
     | 
    
         
            +
                    {"buddycarter.house.gov" => 27},
         
     | 
| 
      
 683 
     | 
    
         
            +
                    {"grothman.house.gov" => 27},
         
     | 
| 
      
 684 
     | 
    
         
            +
                    {"beyer.house.gov" => 27},
         
     | 
| 
      
 685 
     | 
    
         
            +
                    {"kathleenrice.house.gov" => 27}
         
     | 
| 
      
 686 
     | 
    
         
            +
                  ]
         
     | 
| 
       655 
687 
     | 
    
         
             
                  domains.each do |domain|
         
     | 
| 
       656 
688 
     | 
    
         
             
                    doc = open_html("http://"+domain.keys.first+"/news/documentquery.aspx?DocumentTypeID=#{domain.values.first}&Page=#{page}")
         
     | 
| 
       657 
689 
     | 
    
         
             
                    return if doc.nil?
         
     | 
| 
         @@ -827,7 +859,9 @@ module Statement 
     | 
|
| 
       827 
859 
     | 
    
         
             
                        "http://sarbanes.house.gov/media-center/press-releases",
         
     | 
| 
       828 
860 
     | 
    
         
             
                        "http://wilson.house.gov/media-center/press-releases",
         
     | 
| 
       829 
861 
     | 
    
         
             
                        "https://bilirakis.house.gov/press-releases",
         
     | 
| 
       830 
     | 
    
         
            -
                        "http://quigley.house.gov/media-center/press-releases"
         
     | 
| 
      
 862 
     | 
    
         
            +
                        "http://quigley.house.gov/media-center/press-releases",
         
     | 
| 
      
 863 
     | 
    
         
            +
                        "https://denham.house.gov/media-center/press-releases",
         
     | 
| 
      
 864 
     | 
    
         
            +
                        "https://sewell.house.gov/media-center/press-releases"
         
     | 
| 
       831 
865 
     | 
    
         
             
                    ]
         
     | 
| 
       832 
866 
     | 
    
         
             
                  end
         
     | 
| 
       833 
867 
     | 
    
         | 
    
        data/lib/statement/version.rb
    CHANGED