statement 0.9.7 → 0.9.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/lib/statement/scraper.rb +9 -9
- data/lib/statement/version.rb +1 -1
- metadata +47 -28
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
M2UzMWMzZTk0NWMwMDQ3YTc5MDljNmRjNzE4MWE1MGNjMjU0MDBkZg==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
MmE5M2Q4YjUyMGExNzk0ZTI5YTEwMjZkNTkyMmQ0OWRiNzJkZjZmMA==
|
7
|
+
!binary "U0hBNTEy":
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
YmVkYzlkMzFlNWJmMWU1MjhlZmM1YjJkN2MyNGQwY2Q1YmM1ODQ0ZDk3ZGY2
|
10
|
+
ZjEyZGMxOTljYjk2MDM1ZTU3NjhjYjczZDU4NDkzYWZkZTJkMjQ3ZDEzMzE5
|
11
|
+
NTU4OTlmNDk5OTkyNWQ5NjMxZmZhMTZlOGNhMmYwNmYzNTAxNmM=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
ODBiNDQ1ZTBmZDE4MzQwNDEzOGEwYzFiZDVlYjY5YTVjOTJjMDI3OWU1ZDdi
|
14
|
+
MzkxZDM1NzBjNzQ2NjRjZDkwODA3ODI1OTU0MTI3MTBlNTE3NTJkZDE4ZTM2
|
15
|
+
ZDQ2MzczNjNkODk0MmQwMmVkOGE0ZjBlZDBhNzVjZjVhMWZiNGM=
|
data/lib/statement/scraper.rb
CHANGED
@@ -249,7 +249,7 @@ module Statement
|
|
249
249
|
results = []
|
250
250
|
year = Date.today.year if not year
|
251
251
|
month = 0 if not month
|
252
|
-
domains = ['crenshaw.house.gov/', 'www.ronjohnson.senate.gov/public/','www.
|
252
|
+
domains = ['crenshaw.house.gov/', 'www.ronjohnson.senate.gov/public/','www.hoeven.senate.gov/public/','www.moran.senate.gov/public/','www.risch.senate.gov/public/']
|
253
253
|
domains.each do |domain|
|
254
254
|
if domain == 'crenshaw.house.gov/' or domain == 'www.risch.senate.gov/public/'
|
255
255
|
url = "http://"+domain + "index.cfm/pressreleases?YearDisplay=#{year}&MonthDisplay=#{month}&page=1"
|
@@ -313,7 +313,7 @@ module Statement
|
|
313
313
|
return if doc.nil?
|
314
314
|
doc.xpath("//tr")[3..-1].each do |row|
|
315
315
|
next if row.text.strip == ''
|
316
|
-
results << { :source => base_url+'press.cfm?maxrows=200&startrow=1&&type=1', :url => base_url + row.children.children[1]['href'], :title => row.children.children[1].text.strip.split.join(' '), :date => Date.
|
316
|
+
results << { :source => base_url+'press.cfm?maxrows=200&startrow=1&&type=1', :url => base_url + row.children.children[1]['href'], :title => row.children.children[1].text.strip.split.join(' '), :date => Date.strptime(row.children.children[0].text, "%m/%d/%y"), :domain => "#{senator}.senate.gov" }
|
317
317
|
end
|
318
318
|
end
|
319
319
|
results.flatten
|
@@ -327,7 +327,7 @@ module Statement
|
|
327
327
|
doc = open_html(year_url)
|
328
328
|
return if doc.nil?
|
329
329
|
doc.xpath("//dt").each do |row|
|
330
|
-
results << { :source => year_url, :url => base_url + row.next.children[0]['href'], :title => row.next.text.strip.gsub(/[u201cu201d]/, '').split.join(' '), :date => Date.
|
330
|
+
results << { :source => year_url, :url => base_url + row.next.children[0]['href'], :title => row.next.text.strip.gsub(/[u201cu201d]/, '').split.join(' '), :date => Date.strptime(row.text, "%m/%d/%y"), :domain => "klobuchar.senate.gov" }
|
331
331
|
end
|
332
332
|
end
|
333
333
|
results
|
@@ -365,7 +365,7 @@ module Statement
|
|
365
365
|
doc = open_html(url)
|
366
366
|
return if doc.nil?
|
367
367
|
doc.xpath("//tr")[4..-2].each do |row|
|
368
|
-
results << { :source => url, :url => base_url + row.children[2].children[0]['href'], :title => row.children[2].text.strip, :date => Date.
|
368
|
+
results << { :source => url, :url => base_url + row.children[2].children[0]['href'], :title => row.children[2].text.strip, :date => Date.strptime(row.children[0].text.strip, "%m/%d/%y"), :domain => "lautenberg.senate.gov" }
|
369
369
|
end
|
370
370
|
results
|
371
371
|
end
|
@@ -389,7 +389,7 @@ module Statement
|
|
389
389
|
return if doc.nil?
|
390
390
|
doc.xpath("//tr")[2..-1].each do |row|
|
391
391
|
next if row.text[0..3] == "Date"
|
392
|
-
results << { :source => url, :url => row.children[2].children[0]['href'], :title => row.children[2].text.strip, :date => Date.
|
392
|
+
results << { :source => url, :url => row.children[2].children[0]['href'], :title => row.children[2].text.strip, :date => Date.strptime(row.children[0].text.strip, "%m/%d/%y"), :domain => "coburn.senate.gov" }
|
393
393
|
end
|
394
394
|
results
|
395
395
|
end
|
@@ -413,7 +413,7 @@ module Statement
|
|
413
413
|
doc = open_html(url)
|
414
414
|
return if doc.nil?
|
415
415
|
doc.xpath("//li")[7..-1].each do |row|
|
416
|
-
results << { :source => url, :url => domain + row.children[3].children[1].children[4].children[0]['href'], :title => row.children[3].children[1].children[4].text, :date => Date.
|
416
|
+
results << { :source => url, :url => domain + row.children[3].children[1].children[4].children[0]['href'], :title => row.children[3].children[1].children[4].text, :date => Date.strptime(row.children[3].children[1].children[0].text, "%m/%d/%y"), :domain => domain}
|
417
417
|
end
|
418
418
|
results
|
419
419
|
end
|
@@ -432,7 +432,7 @@ module Statement
|
|
432
432
|
return if doc.nil?
|
433
433
|
doc.xpath("//tr")[1..-1].each do |row|
|
434
434
|
next if row.text.strip.size < 30
|
435
|
-
results << { :source => url, :url => row.children[2].children[0]['href'].strip, :title => row.children[2].text, :date => Date.
|
435
|
+
results << { :source => url, :url => row.children[2].children[0]['href'].strip, :title => row.children[2].text, :date => Date.strptime(row.children[0].text, "%m/%d/%y"), :domain => domain}
|
436
436
|
end
|
437
437
|
end
|
438
438
|
results.flatten
|
@@ -446,7 +446,7 @@ module Statement
|
|
446
446
|
return if doc.nil?
|
447
447
|
doc.xpath("//tr")[1..-1].each do |row|
|
448
448
|
next if row.text.strip.size < 30
|
449
|
-
results << { :source => url, :url => "http://www.donnelly.senate.gov"+row.children[2].children[1]['href'].strip, :title => row.children[2].text.strip, :date => Date.
|
449
|
+
results << { :source => url, :url => "http://www.donnelly.senate.gov"+row.children[2].children[1]['href'].strip, :title => row.children[2].text.strip, :date => Date.strptime(row.children[0].text, "%m/%d/%y"), :domain => domain}
|
450
450
|
end
|
451
451
|
results
|
452
452
|
end
|
@@ -459,7 +459,7 @@ module Statement
|
|
459
459
|
return if doc.nil?
|
460
460
|
doc.xpath("//tr")[1..-1].each do |row|
|
461
461
|
next if row.text.strip.size < 30
|
462
|
-
results << { :source => url, :url => row.children[2].children[0]['href'].strip, :title => row.children[2].text, :date => Date.
|
462
|
+
results << { :source => url, :url => row.children[2].children[0]['href'].strip, :title => row.children[2].text, :date => Date.strptime(row.children[0].text, "%m/%d/%y"), :domain => domain}
|
463
463
|
end
|
464
464
|
results
|
465
465
|
end
|
data/lib/statement/version.rb
CHANGED
metadata
CHANGED
@@ -1,93 +1,113 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: statement
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
5
|
-
prerelease:
|
4
|
+
version: 0.9.8
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Derek Willis
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2013-06-
|
11
|
+
date: 2013-06-07 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: bundler
|
16
|
-
requirement:
|
17
|
-
none: false
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
18
16
|
requirements:
|
19
17
|
- - ~>
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '1.3'
|
22
20
|
type: :development
|
23
21
|
prerelease: false
|
24
|
-
version_requirements:
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
25
27
|
- !ruby/object:Gem::Dependency
|
26
28
|
name: rake
|
27
|
-
requirement:
|
28
|
-
none: false
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
31
|
- - ! '>='
|
31
32
|
- !ruby/object:Gem::Version
|
32
33
|
version: '0'
|
33
34
|
type: :development
|
34
35
|
prerelease: false
|
35
|
-
version_requirements:
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ! '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
36
41
|
- !ruby/object:Gem::Dependency
|
37
42
|
name: webmock
|
38
|
-
requirement:
|
39
|
-
none: false
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
40
44
|
requirements:
|
41
45
|
- - ! '>='
|
42
46
|
- !ruby/object:Gem::Version
|
43
47
|
version: '0'
|
44
48
|
type: :development
|
45
49
|
prerelease: false
|
46
|
-
version_requirements:
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
47
55
|
- !ruby/object:Gem::Dependency
|
48
56
|
name: american_date
|
49
|
-
requirement:
|
50
|
-
none: false
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
51
58
|
requirements:
|
52
59
|
- - ! '>='
|
53
60
|
- !ruby/object:Gem::Version
|
54
61
|
version: '0'
|
55
62
|
type: :runtime
|
56
63
|
prerelease: false
|
57
|
-
version_requirements:
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ! '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
58
69
|
- !ruby/object:Gem::Dependency
|
59
70
|
name: nokogiri
|
60
|
-
requirement:
|
61
|
-
none: false
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
62
72
|
requirements:
|
63
73
|
- - ! '>='
|
64
74
|
- !ruby/object:Gem::Version
|
65
75
|
version: '0'
|
66
76
|
type: :runtime
|
67
77
|
prerelease: false
|
68
|
-
version_requirements:
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ! '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: koala
|
71
|
-
requirement:
|
72
|
-
none: false
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
73
86
|
requirements:
|
74
87
|
- - ! '>='
|
75
88
|
- !ruby/object:Gem::Version
|
76
89
|
version: '0'
|
77
90
|
type: :runtime
|
78
91
|
prerelease: false
|
79
|
-
version_requirements:
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ! '>='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
80
97
|
- !ruby/object:Gem::Dependency
|
81
98
|
name: oj
|
82
|
-
requirement:
|
83
|
-
none: false
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
84
100
|
requirements:
|
85
101
|
- - ! '>='
|
86
102
|
- !ruby/object:Gem::Version
|
87
103
|
version: '0'
|
88
104
|
type: :runtime
|
89
105
|
prerelease: false
|
90
|
-
version_requirements:
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ! '>='
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
91
111
|
description: Crawls congressional websites for press releases.
|
92
112
|
email:
|
93
113
|
- dwillis@gmail.com
|
@@ -118,27 +138,26 @@ files:
|
|
118
138
|
homepage: ''
|
119
139
|
licenses:
|
120
140
|
- Apache
|
141
|
+
metadata: {}
|
121
142
|
post_install_message:
|
122
143
|
rdoc_options: []
|
123
144
|
require_paths:
|
124
145
|
- lib
|
125
146
|
required_ruby_version: !ruby/object:Gem::Requirement
|
126
|
-
none: false
|
127
147
|
requirements:
|
128
148
|
- - ! '>='
|
129
149
|
- !ruby/object:Gem::Version
|
130
150
|
version: '0'
|
131
151
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
132
|
-
none: false
|
133
152
|
requirements:
|
134
153
|
- - ! '>='
|
135
154
|
- !ruby/object:Gem::Version
|
136
155
|
version: '0'
|
137
156
|
requirements: []
|
138
157
|
rubyforge_project:
|
139
|
-
rubygems_version:
|
158
|
+
rubygems_version: 2.0.3
|
140
159
|
signing_key:
|
141
|
-
specification_version:
|
160
|
+
specification_version: 4
|
142
161
|
summary: Given a url, Statement returns links to press releases and official statements.
|
143
162
|
test_files:
|
144
163
|
- spec/bill_nelson_press.html
|