statement 0.4 → 0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/lib/statement/version.rb +1 -1
  2. data/lib/statement.rb +7 -6
  3. metadata +61 -93
@@ -1,3 +1,3 @@
1
1
  module Statement
2
- VERSION = "0.4"
2
+ VERSION = "0.5"
3
3
  end
data/lib/statement.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  require "statement/version"
2
3
  require 'uri'
3
4
  require 'open-uri'
@@ -101,7 +102,7 @@ module Statement
101
102
  base_url = "http://www.house.gov/faleomavaega/news-press.shtml"
102
103
  doc = Nokogiri::HTML(open(base_url).read)
103
104
  doc.xpath("//li[@type='disc']").each do |row|
104
- results << { :source => base_url, :url => "http://www.house.gov/" + row.children[0]['href'], :title => row.children[0].text.gsub(/[\x80-\xff]/,'').split('Washington, D.C.').last, :date => Date.parse(row.children[1].text.gsub(/[\x80-\xff]/,' ')), :domain => "house.gov/faleomavaega" }
105
+ results << { :source => base_url, :url => "http://www.house.gov/" + row.children[0]['href'], :title => row.children[0].text.gsub(/[u201cu201d]/, '').split('Washington, D.C.').last, :date => Date.parse(row.children[1].text), :domain => "house.gov/faleomavaega" }
105
106
  end
106
107
  results
107
108
  end
@@ -113,7 +114,7 @@ module Statement
113
114
  doc = Nokogiri::HTML(open(base_url+'press.cfm?maxrows=200&startrow=1&&type=1').read)
114
115
  doc.xpath("//tr")[3..-1].each do |row|
115
116
  next if row.text.strip == ''
116
- results << { :source => base_url+'press.cfm?maxrows=200&startrow=1&&type=1', :url => base_url + row.children.children[1]['href'], :title => row.children.children[1].text.strip.split.join(' '), :date => Date.parse(row.children.children[0].text.gsub(/[\x80-\xff]/,'')), :domain => "#{senator}.senate.gov" }
117
+ results << { :source => base_url+'press.cfm?maxrows=200&startrow=1&&type=1', :url => base_url + row.children.children[1]['href'], :title => row.children.children[1].text.strip.split.join(' '), :date => Date.parse(row.children.children[0].text), :domain => "#{senator}.senate.gov" }
117
118
  end
118
119
  end
119
120
  results.flatten
@@ -126,7 +127,7 @@ module Statement
126
127
  year_url = base_url + "newsreleases.cfm?year=#{year}"
127
128
  doc = Nokogiri::HTML(open(year_url).read)
128
129
  doc.xpath("//dt").each do |row|
129
- results << { :source => year_url, :url => base_url + row.next.children[0]['href'], :title => row.next.text.strip.gsub(/[\x80-\xff]/,'').split.join(' '), :date => Date.parse(row.text), :domain => "klobuchar.senate.gov" }
130
+ results << { :source => year_url, :url => base_url + row.next.children[0]['href'], :title => row.next.text.strip.gsub(/[u201cu201d]/, '').split.join(' '), :date => Date.parse(row.text), :domain => "klobuchar.senate.gov" }
130
131
  end
131
132
  end
132
133
  results
@@ -138,7 +139,7 @@ module Statement
138
139
  doc = Nokogiri::HTML(open(base_url+'index.php?option=com_content&view=article&id=981&Itemid=78').read)
139
140
  doc.xpath('//ul')[1].children.each do |row|
140
141
  next if row.text.strip == ''
141
- results << { :source => base_url+'index.php?option=com_content&view=article&id=981&Itemid=78', :url => base_url + row.children[0]['href'], :title => row.children[0].text.gsub(/[\x80-\xff]/,'').gsub('Lujn','Lujan'), :date => nil, :domain => "lujan.house.gov" }
142
+ results << { :source => base_url+'index.php?option=com_content&view=article&id=981&Itemid=78', :url => base_url + row.children[0]['href'], :title => row.children[0].text, :date => nil, :domain => "lujan.house.gov" }
142
143
  end
143
144
  results
144
145
  end
@@ -159,7 +160,7 @@ module Statement
159
160
  base_url = "http://roe.house.gov/news/"
160
161
  doc = Nokogiri::HTML(open(base_url+"documentquery.aspx?DocumentTypeID=1532&Page=#{page}").read)
161
162
  doc.xpath("//span[@class='middlecopy']").each do |row|
162
- results << { :source => base_url+"documentquery.aspx?DocumentTypeID=1532&Page=#{page}", :url => base_url + row.children[6]['href'], :title => row.children[1].text.strip.gsub(/[\x80-\xff]/,''), :date => Date.parse(row.children[4].text.gsub(/[\x80-\xff]/,'').strip), :domain => "roe.house.gov" }
163
+ results << { :source => base_url+"documentquery.aspx?DocumentTypeID=1532&Page=#{page}", :url => base_url + row.children[6]['href'], :title => row.children[1].text.strip, :date => Date.parse(row.children[4].text.strip), :domain => "roe.house.gov" }
163
164
  end
164
165
  results
165
166
  end
@@ -169,7 +170,7 @@ module Statement
169
170
  base_url = "http://thornberry.house.gov/news/"
170
171
  doc = Nokogiri::HTML(open(base_url+"documentquery.aspx?DocumentTypeID=1776&Page=#{page}").read)
171
172
  doc.xpath("//span[@class='middlecopy']").each do |row|
172
- results << { :source => base_url+"documentquery.aspx?DocumentTypeID=1776&Page=#{page}", :url => base_url + row.children[6]['href'], :title => row.children[1].text.strip.gsub(/[\x80-\xff]/,''), :date => Date.parse(row.children[4].text.gsub(/[\x80-\xff]/,'').strip), :domain => "thornberry.house.gov" }
173
+ results << { :source => base_url+"documentquery.aspx?DocumentTypeID=1776&Page=#{page}", :url => base_url + row.children[6]['href'], :title => row.children[1].text.strip, :date => Date.parse(row.children[4].text.strip), :domain => "thornberry.house.gov" }
173
174
  end
174
175
  results
175
176
  end
metadata CHANGED
@@ -1,102 +1,78 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: statement
3
- version: !ruby/object:Gem::Version
4
- hash: 3
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.5'
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 4
9
- version: "0.4"
10
6
  platform: ruby
11
- authors:
7
+ authors:
12
8
  - Derek Willis
13
9
  autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
-
17
- date: 2013-04-25 00:00:00 Z
18
- dependencies:
19
- - !ruby/object:Gem::Dependency
12
+ date: 2013-04-29 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
20
15
  name: bundler
21
- prerelease: false
22
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: &2152473440 !ruby/object:Gem::Requirement
23
17
  none: false
24
- requirements:
18
+ requirements:
25
19
  - - ~>
26
- - !ruby/object:Gem::Version
27
- hash: 9
28
- segments:
29
- - 1
30
- - 3
31
- version: "1.3"
20
+ - !ruby/object:Gem::Version
21
+ version: '1.3'
32
22
  type: :development
33
- version_requirements: *id001
34
- - !ruby/object:Gem::Dependency
35
- name: rake
36
23
  prerelease: false
37
- requirement: &id002 !ruby/object:Gem::Requirement
24
+ version_requirements: *2152473440
25
+ - !ruby/object:Gem::Dependency
26
+ name: rake
27
+ requirement: &2152473020 !ruby/object:Gem::Requirement
38
28
  none: false
39
- requirements:
40
- - - ">="
41
- - !ruby/object:Gem::Version
42
- hash: 3
43
- segments:
44
- - 0
45
- version: "0"
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
46
33
  type: :development
47
- version_requirements: *id002
48
- - !ruby/object:Gem::Dependency
49
- name: webmock
50
34
  prerelease: false
51
- requirement: &id003 !ruby/object:Gem::Requirement
35
+ version_requirements: *2152473020
36
+ - !ruby/object:Gem::Dependency
37
+ name: webmock
38
+ requirement: &2152472560 !ruby/object:Gem::Requirement
52
39
  none: false
53
- requirements:
54
- - - ">="
55
- - !ruby/object:Gem::Version
56
- hash: 3
57
- segments:
58
- - 0
59
- version: "0"
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
60
44
  type: :development
61
- version_requirements: *id003
62
- - !ruby/object:Gem::Dependency
63
- name: american_date
64
45
  prerelease: false
65
- requirement: &id004 !ruby/object:Gem::Requirement
46
+ version_requirements: *2152472560
47
+ - !ruby/object:Gem::Dependency
48
+ name: american_date
49
+ requirement: &2152472140 !ruby/object:Gem::Requirement
66
50
  none: false
67
- requirements:
68
- - - ">="
69
- - !ruby/object:Gem::Version
70
- hash: 3
71
- segments:
72
- - 0
73
- version: "0"
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
74
55
  type: :runtime
75
- version_requirements: *id004
76
- - !ruby/object:Gem::Dependency
77
- name: nokogiri
78
56
  prerelease: false
79
- requirement: &id005 !ruby/object:Gem::Requirement
57
+ version_requirements: *2152472140
58
+ - !ruby/object:Gem::Dependency
59
+ name: nokogiri
60
+ requirement: &2152488080 !ruby/object:Gem::Requirement
80
61
  none: false
81
- requirements:
82
- - - ">="
83
- - !ruby/object:Gem::Version
84
- hash: 3
85
- segments:
86
- - 0
87
- version: "0"
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
88
66
  type: :runtime
89
- version_requirements: *id005
67
+ prerelease: false
68
+ version_requirements: *2152488080
90
69
  description: Crawls congressional websites for press releases.
91
- email:
70
+ email:
92
71
  - dwillis@gmail.com
93
72
  executables: []
94
-
95
73
  extensions: []
96
-
97
74
  extra_rdoc_files: []
98
-
99
- files:
75
+ files:
100
76
  - .gitignore
101
77
  - Gemfile
102
78
  - LICENSE.txt
@@ -109,40 +85,32 @@ files:
109
85
  - spec/ruiz_rss.xml
110
86
  - spec/statement_spec.rb
111
87
  - statement.gemspec
112
- homepage: ""
113
- licenses:
88
+ homepage: ''
89
+ licenses:
114
90
  - Apache
115
91
  post_install_message:
116
92
  rdoc_options: []
117
-
118
- require_paths:
93
+ require_paths:
119
94
  - lib
120
- required_ruby_version: !ruby/object:Gem::Requirement
95
+ required_ruby_version: !ruby/object:Gem::Requirement
121
96
  none: false
122
- requirements:
123
- - - ">="
124
- - !ruby/object:Gem::Version
125
- hash: 3
126
- segments:
127
- - 0
128
- version: "0"
129
- required_rubygems_version: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - ! '>='
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ required_rubygems_version: !ruby/object:Gem::Requirement
130
102
  none: false
131
- requirements:
132
- - - ">="
133
- - !ruby/object:Gem::Version
134
- hash: 3
135
- segments:
136
- - 0
137
- version: "0"
103
+ requirements:
104
+ - - ! '>='
105
+ - !ruby/object:Gem::Version
106
+ version: '0'
138
107
  requirements: []
139
-
140
108
  rubyforge_project:
141
109
  rubygems_version: 1.8.17
142
110
  signing_key:
143
111
  specification_version: 3
144
112
  summary: Given a url, Statement returns links to press releases and official statements.
145
- test_files:
113
+ test_files:
146
114
  - spec/culberson_rss.xml
147
115
  - spec/house_gop_releases.html
148
116
  - spec/ruiz_rss.xml