statement 2.0.3 → 2.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 88a5cc8cc4a20b541a5cadd65e4ab3c139f5cf9f
4
- data.tar.gz: 37d62f7c98c3a92b09980c14a5dde0d3c2c809ba
3
+ metadata.gz: c771519d9fc8c1d3b906d0b09695205a92f893f2
4
+ data.tar.gz: 737c1f6530c5d7d9424be1f5629df0042c6abc8c
5
5
  SHA512:
6
- metadata.gz: 4ac80b83893c08c95d6d8a3623ca6a0859cddbfdba8e00004a8ff8370c6f87bc393d059f900eb60484993a70ea0a363a1033305d68a883fd4fe8fa61dbf27755
7
- data.tar.gz: b95a2cba9cd02eb533d4fe73b681c180eb23d28eeb52491174548c59df794667b8fd194397699426cfc94918a32813de297789bedf62c8256b38e6b9196f05ca
6
+ metadata.gz: c1c53d467e8b878ff6995b6aacbc1618904a263bf55fe2d50e702334afbfbd0d323979d26be9e1d39abf83c2e21dc9258dcce98330cfc8a962620714bebd5707
7
+ data.tar.gz: e2ef4d6db0d2276a089c8e4bace60d859bc60d584177c4ec862bc55ddf2973ea3aa1b10aecf19769b4e86a5cc6143ae7206eef7688fe81c0a4f3ea57a1505827
@@ -32,7 +32,7 @@ module Statement
32
32
  [:crenshaw, :capuano, :cold_fusion, :conaway, :chabot, :freshman_senators, :klobuchar, :billnelson, :crapo, :boxer,
33
33
  :vitter, :inhofe, :document_query, :swalwell, :fischer, :clark, :edwards, :culberson_chabot_grisham, :barton,
34
34
  :welch, :sessions, :gabbard, :costa, :farr, :mcclintock, :olson, :schumer, :lamborn, :walden, :boehner,
35
- :bennie_thompson, :speier, :poe, :grassley, :bennet, :shaheen, :keating, :drupal, :jenkins, :durbin_burr]
35
+ :bennie_thompson, :speier, :poe, :grassley, :bennet, :shaheen, :keating, :drupal, :jenkins, :durbin_burr, :rand_paul]
36
36
  end
37
37
 
38
38
  def self.committee_methods
@@ -42,10 +42,11 @@ module Statement
42
42
  def self.member_scrapers
43
43
  year = Date.today.year
44
44
  results = [crenshaw, capuano, cold_fusion(year, nil), conaway, chabot, klobuchar(year), billnelson(page=0),
45
- document_query(page=1), document_query(page=2), swalwell(page=1), crapo, boxer, grassley(page=0),
45
+ document_query(page=1), document_query(page=2), swalwell(page=1), crapo, boxer, grassley(page=0),
46
46
  vitter(year=year), inhofe(year=year), fischer, clark(year=year), edwards, culberson_chabot_grisham(page=1), barton, welch,
47
47
  sessions(year=year), gabbard, costa, farr, olson, schumer, lamborn(limit=10), walden, bennie_thompson, speier,
48
- poe(year=year, month=0), bennet(page=1), shaheen(page=1), perlmutter, keating, drupal, jenkins, durbin_burr(page=1)].flatten
48
+ poe(year=year, month=0), bennet(page=1), shaheen(page=1), perlmutter, keating, drupal, jenkins, durbin_burr(page=1),
49
+ rand_paul(page = 1)].flatten
49
50
  results = results.compact
50
51
  Utils.remove_generic_urls!(results)
51
52
  end
@@ -416,6 +417,51 @@ module Statement
416
417
  results
417
418
  end
418
419
 
420
+ def self.rand_paul(page = 1)
421
+ # each page contains a max of 20 results
422
+ page_url = "http://www.paul.senate.gov/news/press?PageNum_rs=#{page}"
423
+ doc = open_html(page_url)
424
+ return if doc.nil?
425
+ results = doc.search('#press .title').inject([]) do |arr, title|
426
+ article_url = URI.join(page_url, title.search('a')[0]['href'])
427
+ article_datestr = title.previous_element.text # e.g. "05.11.15"
428
+ arr << {
429
+ :source => page_url,
430
+ :url => article_url.to_s,
431
+ :domain => article_url.host,
432
+ :title => title.text,
433
+ :date => Date.strptime(article_datestr, '%m.%d.%y')
434
+ }
435
+ end
436
+ results
437
+ end
438
+
439
+
440
+ def self.patrick_meehan(page = 0)
441
+ # This is a Drupal page and it uses the View plugin, but unlike the other
442
+ # Drupal pages, it does not make use of .views-field-created, and instead, the
443
+ # only Month-Year is given (03 Feb).
444
+ page_url = "https://meehan.house.gov/media-center/press-releases?page=#{page}"
445
+ doc = open_html(page_url)
446
+ return if doc.nil?
447
+ results = doc.search('.view-congress-press-releases .views-row').inject([]) do |arr, article|
448
+ title = article.search('.views-field-title a')[0]
449
+ article_url = URI.join(page_url, title['href'])
450
+ raise "Date still needs to be parsed; thanks a lot Drupal"
451
+ article_datestr = title.previous_element.text
452
+ arr << {
453
+ :source => page_url,
454
+ :url => article_url.to_s,
455
+ :domain => article_url.host,
456
+ :title => title.text,
457
+ :date => Date.strptime(article_datestr, 'SOMETHING')
458
+ }
459
+ end
460
+
461
+ results
462
+ end
463
+
464
+
419
465
  # fetches the latest 1000 releases, can be altered
420
466
  def self.lautenberg(rows=1000)
421
467
  results = []
@@ -811,24 +857,36 @@ module Statement
811
857
  results
812
858
  end
813
859
 
814
- def self.backfill_bilirakis
860
+ def self.backfill_bilirakis(page=1)
815
861
  results = []
816
862
  domain = 'bilirakis.house.gov'
817
- url = 'http://bilirakis.house.gov/press-releases/'
863
+ url = "https://bilirakis.house.gov/press-releases?page=#{page}"
818
864
  doc = open_html(url)
819
865
  return if doc.nil?
820
- doc.css("ul li[@class='article articleright']").each do |row|
821
- results << {:source => url, :url => 'http://bilirakis.house.gov' + row.children[3].children[1]['href'], :title => row.children[3].text.strip, :date => Date.parse(row.children[5].text), :domain => domain }
866
+ doc.css("#region-content .views-row").each do |row|
867
+ title_anchor = row.css("h3 a")
868
+ title = title_anchor.text
869
+ release_url = "http://#{domain + title_anchor.attr('href')}"
870
+ raw_date = row.css(".views-field-created").text
871
+ results << { :source => url,
872
+ :url => release_url,
873
+ :title => title,
874
+ :date => begin Date.parse(raw_date) rescue nil end,
875
+ :domain => domain }
822
876
  end
877
+ results
823
878
  end
824
879
 
825
- def self.backfill_boustany
880
+ def self.backfill_boustany(congress)
826
881
  results = []
827
882
  domain = 'boustany.house.gov'
828
- url = 'http://boustany.house.gov/113th-congress/showallitems/'
883
+ url = "http://boustany.house.gov/#{congress}th-congress/showallitems/"
829
884
  doc = open_html(url)
830
885
  return if doc.nil?
831
-
886
+ (doc/:ul)[13].search(:li).each do |row|
887
+ results << {:source => url, :url => 'http://boustany.house.gov' + row.children.search(:a)[0]['href'], :title => row.children.search(:a)[0].text, :date => Date.parse(row.children[5].text), :domain => domain }
888
+ end
889
+ results
832
890
  end
833
891
 
834
892
  def self.perlmutter
@@ -875,7 +933,9 @@ module Statement
875
933
  "https://bilirakis.house.gov/press-releases",
876
934
  "http://quigley.house.gov/media-center/press-releases",
877
935
  "https://denham.house.gov/media-center/press-releases",
878
- "https://sewell.house.gov/media-center/press-releases"
936
+ "https://sewell.house.gov/media-center/press-releases",
937
+ "https://buchanan.house.gov/media-center/press-releases",
938
+ "https://meehan.house.gov/media-center/press-releases"
879
939
  ]
880
940
  end
881
941
 
@@ -1,3 +1,3 @@
1
1
  module Statement
2
- VERSION = "2.0.3"
2
+ VERSION = "2.0.4"
3
3
  end