statement 2.0.3 → 2.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 88a5cc8cc4a20b541a5cadd65e4ab3c139f5cf9f
4
- data.tar.gz: 37d62f7c98c3a92b09980c14a5dde0d3c2c809ba
3
+ metadata.gz: c771519d9fc8c1d3b906d0b09695205a92f893f2
4
+ data.tar.gz: 737c1f6530c5d7d9424be1f5629df0042c6abc8c
5
5
  SHA512:
6
- metadata.gz: 4ac80b83893c08c95d6d8a3623ca6a0859cddbfdba8e00004a8ff8370c6f87bc393d059f900eb60484993a70ea0a363a1033305d68a883fd4fe8fa61dbf27755
7
- data.tar.gz: b95a2cba9cd02eb533d4fe73b681c180eb23d28eeb52491174548c59df794667b8fd194397699426cfc94918a32813de297789bedf62c8256b38e6b9196f05ca
6
+ metadata.gz: c1c53d467e8b878ff6995b6aacbc1618904a263bf55fe2d50e702334afbfbd0d323979d26be9e1d39abf83c2e21dc9258dcce98330cfc8a962620714bebd5707
7
+ data.tar.gz: e2ef4d6db0d2276a089c8e4bace60d859bc60d584177c4ec862bc55ddf2973ea3aa1b10aecf19769b4e86a5cc6143ae7206eef7688fe81c0a4f3ea57a1505827
@@ -32,7 +32,7 @@ module Statement
32
32
  [:crenshaw, :capuano, :cold_fusion, :conaway, :chabot, :freshman_senators, :klobuchar, :billnelson, :crapo, :boxer,
33
33
  :vitter, :inhofe, :document_query, :swalwell, :fischer, :clark, :edwards, :culberson_chabot_grisham, :barton,
34
34
  :welch, :sessions, :gabbard, :costa, :farr, :mcclintock, :olson, :schumer, :lamborn, :walden, :boehner,
35
- :bennie_thompson, :speier, :poe, :grassley, :bennet, :shaheen, :keating, :drupal, :jenkins, :durbin_burr]
35
+ :bennie_thompson, :speier, :poe, :grassley, :bennet, :shaheen, :keating, :drupal, :jenkins, :durbin_burr, :rand_paul]
36
36
  end
37
37
 
38
38
  def self.committee_methods
@@ -42,10 +42,11 @@ module Statement
42
42
  def self.member_scrapers
43
43
  year = Date.today.year
44
44
  results = [crenshaw, capuano, cold_fusion(year, nil), conaway, chabot, klobuchar(year), billnelson(page=0),
45
- document_query(page=1), document_query(page=2), swalwell(page=1), crapo, boxer, grassley(page=0),
45
+ document_query(page=1), document_query(page=2), swalwell(page=1), crapo, boxer, grassley(page=0),
46
46
  vitter(year=year), inhofe(year=year), fischer, clark(year=year), edwards, culberson_chabot_grisham(page=1), barton, welch,
47
47
  sessions(year=year), gabbard, costa, farr, olson, schumer, lamborn(limit=10), walden, bennie_thompson, speier,
48
- poe(year=year, month=0), bennet(page=1), shaheen(page=1), perlmutter, keating, drupal, jenkins, durbin_burr(page=1)].flatten
48
+ poe(year=year, month=0), bennet(page=1), shaheen(page=1), perlmutter, keating, drupal, jenkins, durbin_burr(page=1),
49
+ rand_paul(page = 1)].flatten
49
50
  results = results.compact
50
51
  Utils.remove_generic_urls!(results)
51
52
  end
@@ -416,6 +417,51 @@ module Statement
416
417
  results
417
418
  end
418
419
 
420
+ def self.rand_paul(page = 1)
421
+ # each page contains a max of 20 results
422
+ page_url = "http://www.paul.senate.gov/news/press?PageNum_rs=#{page}"
423
+ doc = open_html(page_url)
424
+ return if doc.nil?
425
+ results = doc.search('#press .title').inject([]) do |arr, title|
426
+ article_url = URI.join(page_url, title.search('a')[0]['href'])
427
+ article_datestr = title.previous_element.text # e.g. "05.11.15"
428
+ arr << {
429
+ :source => page_url,
430
+ :url => article_url.to_s,
431
+ :domain => article_url.host,
432
+ :title => title.text,
433
+ :date => Date.strptime(article_datestr, '%m.%d.%y')
434
+ }
435
+ end
436
+ results
437
+ end
438
+
439
+
440
+ def self.patrick_meehan(page = 0)
441
+ # This is a Drupal page and it uses the View plugin, but unlike the other
442
+ # Drupal pages, it does not make use of .views-field-created, and instead, the
443
+ # only Month-Year is given (03 Feb).
444
+ page_url = "https://meehan.house.gov/media-center/press-releases?page=#{page}"
445
+ doc = open_html(page_url)
446
+ return if doc.nil?
447
+ results = doc.search('.view-congress-press-releases .views-row').inject([]) do |arr, article|
448
+ title = article.search('.views-field-title a')[0]
449
+ article_url = URI.join(page_url, title['href'])
450
+ raise "Date still needs to be parsed; thanks a lot Drupal"
451
+ article_datestr = title.previous_element.text
452
+ arr << {
453
+ :source => page_url,
454
+ :url => article_url.to_s,
455
+ :domain => article_url.host,
456
+ :title => title.text,
457
+ :date => Date.strptime(article_datestr, 'SOMETHING')
458
+ }
459
+ end
460
+
461
+ results
462
+ end
463
+
464
+
419
465
  # fetches the latest 1000 releases, can be altered
420
466
  def self.lautenberg(rows=1000)
421
467
  results = []
@@ -811,24 +857,36 @@ module Statement
811
857
  results
812
858
  end
813
859
 
814
- def self.backfill_bilirakis
860
+ def self.backfill_bilirakis(page=1)
815
861
  results = []
816
862
  domain = 'bilirakis.house.gov'
817
- url = 'http://bilirakis.house.gov/press-releases/'
863
+ url = "https://bilirakis.house.gov/press-releases?page=#{page}"
818
864
  doc = open_html(url)
819
865
  return if doc.nil?
820
- doc.css("ul li[@class='article articleright']").each do |row|
821
- results << {:source => url, :url => 'http://bilirakis.house.gov' + row.children[3].children[1]['href'], :title => row.children[3].text.strip, :date => Date.parse(row.children[5].text), :domain => domain }
866
+ doc.css("#region-content .views-row").each do |row|
867
+ title_anchor = row.css("h3 a")
868
+ title = title_anchor.text
869
+ release_url = "http://#{domain + title_anchor.attr('href')}"
870
+ raw_date = row.css(".views-field-created").text
871
+ results << { :source => url,
872
+ :url => release_url,
873
+ :title => title,
874
+ :date => begin Date.parse(raw_date) rescue nil end,
875
+ :domain => domain }
822
876
  end
877
+ results
823
878
  end
824
879
 
825
- def self.backfill_boustany
880
+ def self.backfill_boustany(congress)
826
881
  results = []
827
882
  domain = 'boustany.house.gov'
828
- url = 'http://boustany.house.gov/113th-congress/showallitems/'
883
+ url = "http://boustany.house.gov/#{congress}th-congress/showallitems/"
829
884
  doc = open_html(url)
830
885
  return if doc.nil?
831
-
886
+ (doc/:ul)[13].search(:li).each do |row|
887
+ results << {:source => url, :url => 'http://boustany.house.gov' + row.children.search(:a)[0]['href'], :title => row.children.search(:a)[0].text, :date => Date.parse(row.children[5].text), :domain => domain }
888
+ end
889
+ results
832
890
  end
833
891
 
834
892
  def self.perlmutter
@@ -875,7 +933,9 @@ module Statement
875
933
  "https://bilirakis.house.gov/press-releases",
876
934
  "http://quigley.house.gov/media-center/press-releases",
877
935
  "https://denham.house.gov/media-center/press-releases",
878
- "https://sewell.house.gov/media-center/press-releases"
936
+ "https://sewell.house.gov/media-center/press-releases",
937
+ "https://buchanan.house.gov/media-center/press-releases",
938
+ "https://meehan.house.gov/media-center/press-releases"
879
939
  ]
880
940
  end
881
941
 
@@ -1,3 +1,3 @@
1
1
  module Statement
2
- VERSION = "2.0.3"
2
+ VERSION = "2.0.4"
3
3
  end