statement 0.4 → 0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/lib/statement/version.rb +1 -1
  2. data/lib/statement.rb +7 -6
  3. metadata +61 -93
@@ -1,3 +1,3 @@
1
1
  module Statement
2
- VERSION = "0.4"
2
+ VERSION = "0.5"
3
3
  end
data/lib/statement.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  require "statement/version"
2
3
  require 'uri'
3
4
  require 'open-uri'
@@ -101,7 +102,7 @@ module Statement
101
102
  base_url = "http://www.house.gov/faleomavaega/news-press.shtml"
102
103
  doc = Nokogiri::HTML(open(base_url).read)
103
104
  doc.xpath("//li[@type='disc']").each do |row|
104
- results << { :source => base_url, :url => "http://www.house.gov/" + row.children[0]['href'], :title => row.children[0].text.gsub(/[\x80-\xff]/,'').split('Washington, D.C.').last, :date => Date.parse(row.children[1].text.gsub(/[\x80-\xff]/,' ')), :domain => "house.gov/faleomavaega" }
105
+ results << { :source => base_url, :url => "http://www.house.gov/" + row.children[0]['href'], :title => row.children[0].text.gsub(/[u201cu201d]/, '').split('Washington, D.C.').last, :date => Date.parse(row.children[1].text), :domain => "house.gov/faleomavaega" }
105
106
  end
106
107
  results
107
108
  end
@@ -113,7 +114,7 @@ module Statement
113
114
  doc = Nokogiri::HTML(open(base_url+'press.cfm?maxrows=200&startrow=1&&type=1').read)
114
115
  doc.xpath("//tr")[3..-1].each do |row|
115
116
  next if row.text.strip == ''
116
- results << { :source => base_url+'press.cfm?maxrows=200&startrow=1&&type=1', :url => base_url + row.children.children[1]['href'], :title => row.children.children[1].text.strip.split.join(' '), :date => Date.parse(row.children.children[0].text.gsub(/[\x80-\xff]/,'')), :domain => "#{senator}.senate.gov" }
117
+ results << { :source => base_url+'press.cfm?maxrows=200&startrow=1&&type=1', :url => base_url + row.children.children[1]['href'], :title => row.children.children[1].text.strip.split.join(' '), :date => Date.parse(row.children.children[0].text), :domain => "#{senator}.senate.gov" }
117
118
  end
118
119
  end
119
120
  results.flatten
@@ -126,7 +127,7 @@ module Statement
126
127
  year_url = base_url + "newsreleases.cfm?year=#{year}"
127
128
  doc = Nokogiri::HTML(open(year_url).read)
128
129
  doc.xpath("//dt").each do |row|
129
- results << { :source => year_url, :url => base_url + row.next.children[0]['href'], :title => row.next.text.strip.gsub(/[\x80-\xff]/,'').split.join(' '), :date => Date.parse(row.text), :domain => "klobuchar.senate.gov" }
130
+ results << { :source => year_url, :url => base_url + row.next.children[0]['href'], :title => row.next.text.strip.gsub(/[u201cu201d]/, '').split.join(' '), :date => Date.parse(row.text), :domain => "klobuchar.senate.gov" }
130
131
  end
131
132
  end
132
133
  results
@@ -138,7 +139,7 @@ module Statement
138
139
  doc = Nokogiri::HTML(open(base_url+'index.php?option=com_content&view=article&id=981&Itemid=78').read)
139
140
  doc.xpath('//ul')[1].children.each do |row|
140
141
  next if row.text.strip == ''
141
- results << { :source => base_url+'index.php?option=com_content&view=article&id=981&Itemid=78', :url => base_url + row.children[0]['href'], :title => row.children[0].text.gsub(/[\x80-\xff]/,'').gsub('Lujn','Lujan'), :date => nil, :domain => "lujan.house.gov" }
142
+ results << { :source => base_url+'index.php?option=com_content&view=article&id=981&Itemid=78', :url => base_url + row.children[0]['href'], :title => row.children[0].text, :date => nil, :domain => "lujan.house.gov" }
142
143
  end
143
144
  results
144
145
  end
@@ -159,7 +160,7 @@ module Statement
159
160
  base_url = "http://roe.house.gov/news/"
160
161
  doc = Nokogiri::HTML(open(base_url+"documentquery.aspx?DocumentTypeID=1532&Page=#{page}").read)
161
162
  doc.xpath("//span[@class='middlecopy']").each do |row|
162
- results << { :source => base_url+"documentquery.aspx?DocumentTypeID=1532&Page=#{page}", :url => base_url + row.children[6]['href'], :title => row.children[1].text.strip.gsub(/[\x80-\xff]/,''), :date => Date.parse(row.children[4].text.gsub(/[\x80-\xff]/,'').strip), :domain => "roe.house.gov" }
163
+ results << { :source => base_url+"documentquery.aspx?DocumentTypeID=1532&Page=#{page}", :url => base_url + row.children[6]['href'], :title => row.children[1].text.strip, :date => Date.parse(row.children[4].text.strip), :domain => "roe.house.gov" }
163
164
  end
164
165
  results
165
166
  end
@@ -169,7 +170,7 @@ module Statement
169
170
  base_url = "http://thornberry.house.gov/news/"
170
171
  doc = Nokogiri::HTML(open(base_url+"documentquery.aspx?DocumentTypeID=1776&Page=#{page}").read)
171
172
  doc.xpath("//span[@class='middlecopy']").each do |row|
172
- results << { :source => base_url+"documentquery.aspx?DocumentTypeID=1776&Page=#{page}", :url => base_url + row.children[6]['href'], :title => row.children[1].text.strip.gsub(/[\x80-\xff]/,''), :date => Date.parse(row.children[4].text.gsub(/[\x80-\xff]/,'').strip), :domain => "thornberry.house.gov" }
173
+ results << { :source => base_url+"documentquery.aspx?DocumentTypeID=1776&Page=#{page}", :url => base_url + row.children[6]['href'], :title => row.children[1].text.strip, :date => Date.parse(row.children[4].text.strip), :domain => "thornberry.house.gov" }
173
174
  end
174
175
  results
175
176
  end
metadata CHANGED
@@ -1,102 +1,78 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: statement
3
- version: !ruby/object:Gem::Version
4
- hash: 3
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.5'
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 4
9
- version: "0.4"
10
6
  platform: ruby
11
- authors:
7
+ authors:
12
8
  - Derek Willis
13
9
  autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
-
17
- date: 2013-04-25 00:00:00 Z
18
- dependencies:
19
- - !ruby/object:Gem::Dependency
12
+ date: 2013-04-29 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
20
15
  name: bundler
21
- prerelease: false
22
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: &2152473440 !ruby/object:Gem::Requirement
23
17
  none: false
24
- requirements:
18
+ requirements:
25
19
  - - ~>
26
- - !ruby/object:Gem::Version
27
- hash: 9
28
- segments:
29
- - 1
30
- - 3
31
- version: "1.3"
20
+ - !ruby/object:Gem::Version
21
+ version: '1.3'
32
22
  type: :development
33
- version_requirements: *id001
34
- - !ruby/object:Gem::Dependency
35
- name: rake
36
23
  prerelease: false
37
- requirement: &id002 !ruby/object:Gem::Requirement
24
+ version_requirements: *2152473440
25
+ - !ruby/object:Gem::Dependency
26
+ name: rake
27
+ requirement: &2152473020 !ruby/object:Gem::Requirement
38
28
  none: false
39
- requirements:
40
- - - ">="
41
- - !ruby/object:Gem::Version
42
- hash: 3
43
- segments:
44
- - 0
45
- version: "0"
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
46
33
  type: :development
47
- version_requirements: *id002
48
- - !ruby/object:Gem::Dependency
49
- name: webmock
50
34
  prerelease: false
51
- requirement: &id003 !ruby/object:Gem::Requirement
35
+ version_requirements: *2152473020
36
+ - !ruby/object:Gem::Dependency
37
+ name: webmock
38
+ requirement: &2152472560 !ruby/object:Gem::Requirement
52
39
  none: false
53
- requirements:
54
- - - ">="
55
- - !ruby/object:Gem::Version
56
- hash: 3
57
- segments:
58
- - 0
59
- version: "0"
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
60
44
  type: :development
61
- version_requirements: *id003
62
- - !ruby/object:Gem::Dependency
63
- name: american_date
64
45
  prerelease: false
65
- requirement: &id004 !ruby/object:Gem::Requirement
46
+ version_requirements: *2152472560
47
+ - !ruby/object:Gem::Dependency
48
+ name: american_date
49
+ requirement: &2152472140 !ruby/object:Gem::Requirement
66
50
  none: false
67
- requirements:
68
- - - ">="
69
- - !ruby/object:Gem::Version
70
- hash: 3
71
- segments:
72
- - 0
73
- version: "0"
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
74
55
  type: :runtime
75
- version_requirements: *id004
76
- - !ruby/object:Gem::Dependency
77
- name: nokogiri
78
56
  prerelease: false
79
- requirement: &id005 !ruby/object:Gem::Requirement
57
+ version_requirements: *2152472140
58
+ - !ruby/object:Gem::Dependency
59
+ name: nokogiri
60
+ requirement: &2152488080 !ruby/object:Gem::Requirement
80
61
  none: false
81
- requirements:
82
- - - ">="
83
- - !ruby/object:Gem::Version
84
- hash: 3
85
- segments:
86
- - 0
87
- version: "0"
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
88
66
  type: :runtime
89
- version_requirements: *id005
67
+ prerelease: false
68
+ version_requirements: *2152488080
90
69
  description: Crawls congressional websites for press releases.
91
- email:
70
+ email:
92
71
  - dwillis@gmail.com
93
72
  executables: []
94
-
95
73
  extensions: []
96
-
97
74
  extra_rdoc_files: []
98
-
99
- files:
75
+ files:
100
76
  - .gitignore
101
77
  - Gemfile
102
78
  - LICENSE.txt
@@ -109,40 +85,32 @@ files:
109
85
  - spec/ruiz_rss.xml
110
86
  - spec/statement_spec.rb
111
87
  - statement.gemspec
112
- homepage: ""
113
- licenses:
88
+ homepage: ''
89
+ licenses:
114
90
  - Apache
115
91
  post_install_message:
116
92
  rdoc_options: []
117
-
118
- require_paths:
93
+ require_paths:
119
94
  - lib
120
- required_ruby_version: !ruby/object:Gem::Requirement
95
+ required_ruby_version: !ruby/object:Gem::Requirement
121
96
  none: false
122
- requirements:
123
- - - ">="
124
- - !ruby/object:Gem::Version
125
- hash: 3
126
- segments:
127
- - 0
128
- version: "0"
129
- required_rubygems_version: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - ! '>='
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ required_rubygems_version: !ruby/object:Gem::Requirement
130
102
  none: false
131
- requirements:
132
- - - ">="
133
- - !ruby/object:Gem::Version
134
- hash: 3
135
- segments:
136
- - 0
137
- version: "0"
103
+ requirements:
104
+ - - ! '>='
105
+ - !ruby/object:Gem::Version
106
+ version: '0'
138
107
  requirements: []
139
-
140
108
  rubyforge_project:
141
109
  rubygems_version: 1.8.17
142
110
  signing_key:
143
111
  specification_version: 3
144
112
  summary: Given a url, Statement returns links to press releases and official statements.
145
- test_files:
113
+ test_files:
146
114
  - spec/culberson_rss.xml
147
115
  - spec/house_gop_releases.html
148
116
  - spec/ruiz_rss.xml