statement 0.9.7 → 0.9.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/lib/statement/scraper.rb +9 -9
- data/lib/statement/version.rb +1 -1
- metadata +47 -28
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
M2UzMWMzZTk0NWMwMDQ3YTc5MDljNmRjNzE4MWE1MGNjMjU0MDBkZg==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
MmE5M2Q4YjUyMGExNzk0ZTI5YTEwMjZkNTkyMmQ0OWRiNzJkZjZmMA==
|
7
|
+
!binary "U0hBNTEy":
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
YmVkYzlkMzFlNWJmMWU1MjhlZmM1YjJkN2MyNGQwY2Q1YmM1ODQ0ZDk3ZGY2
|
10
|
+
ZjEyZGMxOTljYjk2MDM1ZTU3NjhjYjczZDU4NDkzYWZkZTJkMjQ3ZDEzMzE5
|
11
|
+
NTU4OTlmNDk5OTkyNWQ5NjMxZmZhMTZlOGNhMmYwNmYzNTAxNmM=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
ODBiNDQ1ZTBmZDE4MzQwNDEzOGEwYzFiZDVlYjY5YTVjOTJjMDI3OWU1ZDdi
|
14
|
+
MzkxZDM1NzBjNzQ2NjRjZDkwODA3ODI1OTU0MTI3MTBlNTE3NTJkZDE4ZTM2
|
15
|
+
ZDQ2MzczNjNkODk0MmQwMmVkOGE0ZjBlZDBhNzVjZjVhMWZiNGM=
|
data/lib/statement/scraper.rb
CHANGED
@@ -249,7 +249,7 @@ module Statement
|
|
249
249
|
results = []
|
250
250
|
year = Date.today.year if not year
|
251
251
|
month = 0 if not month
|
252
|
-
domains = ['crenshaw.house.gov/', 'www.ronjohnson.senate.gov/public/','www.
|
252
|
+
domains = ['crenshaw.house.gov/', 'www.ronjohnson.senate.gov/public/','www.hoeven.senate.gov/public/','www.moran.senate.gov/public/','www.risch.senate.gov/public/']
|
253
253
|
domains.each do |domain|
|
254
254
|
if domain == 'crenshaw.house.gov/' or domain == 'www.risch.senate.gov/public/'
|
255
255
|
url = "http://"+domain + "index.cfm/pressreleases?YearDisplay=#{year}&MonthDisplay=#{month}&page=1"
|
@@ -313,7 +313,7 @@ module Statement
|
|
313
313
|
return if doc.nil?
|
314
314
|
doc.xpath("//tr")[3..-1].each do |row|
|
315
315
|
next if row.text.strip == ''
|
316
|
-
results << { :source => base_url+'press.cfm?maxrows=200&startrow=1&&type=1', :url => base_url + row.children.children[1]['href'], :title => row.children.children[1].text.strip.split.join(' '), :date => Date.
|
316
|
+
results << { :source => base_url+'press.cfm?maxrows=200&startrow=1&&type=1', :url => base_url + row.children.children[1]['href'], :title => row.children.children[1].text.strip.split.join(' '), :date => Date.strptime(row.children.children[0].text, "%m/%d/%y"), :domain => "#{senator}.senate.gov" }
|
317
317
|
end
|
318
318
|
end
|
319
319
|
results.flatten
|
@@ -327,7 +327,7 @@ module Statement
|
|
327
327
|
doc = open_html(year_url)
|
328
328
|
return if doc.nil?
|
329
329
|
doc.xpath("//dt").each do |row|
|
330
|
-
results << { :source => year_url, :url => base_url + row.next.children[0]['href'], :title => row.next.text.strip.gsub(/[u201cu201d]/, '').split.join(' '), :date => Date.
|
330
|
+
results << { :source => year_url, :url => base_url + row.next.children[0]['href'], :title => row.next.text.strip.gsub(/[u201cu201d]/, '').split.join(' '), :date => Date.strptime(row.text, "%m/%d/%y"), :domain => "klobuchar.senate.gov" }
|
331
331
|
end
|
332
332
|
end
|
333
333
|
results
|
@@ -365,7 +365,7 @@ module Statement
|
|
365
365
|
doc = open_html(url)
|
366
366
|
return if doc.nil?
|
367
367
|
doc.xpath("//tr")[4..-2].each do |row|
|
368
|
-
results << { :source => url, :url => base_url + row.children[2].children[0]['href'], :title => row.children[2].text.strip, :date => Date.
|
368
|
+
results << { :source => url, :url => base_url + row.children[2].children[0]['href'], :title => row.children[2].text.strip, :date => Date.strptime(row.children[0].text.strip, "%m/%d/%y"), :domain => "lautenberg.senate.gov" }
|
369
369
|
end
|
370
370
|
results
|
371
371
|
end
|
@@ -389,7 +389,7 @@ module Statement
|
|
389
389
|
return if doc.nil?
|
390
390
|
doc.xpath("//tr")[2..-1].each do |row|
|
391
391
|
next if row.text[0..3] == "Date"
|
392
|
-
results << { :source => url, :url => row.children[2].children[0]['href'], :title => row.children[2].text.strip, :date => Date.
|
392
|
+
results << { :source => url, :url => row.children[2].children[0]['href'], :title => row.children[2].text.strip, :date => Date.strptime(row.children[0].text.strip, "%m/%d/%y"), :domain => "coburn.senate.gov" }
|
393
393
|
end
|
394
394
|
results
|
395
395
|
end
|
@@ -413,7 +413,7 @@ module Statement
|
|
413
413
|
doc = open_html(url)
|
414
414
|
return if doc.nil?
|
415
415
|
doc.xpath("//li")[7..-1].each do |row|
|
416
|
-
results << { :source => url, :url => domain + row.children[3].children[1].children[4].children[0]['href'], :title => row.children[3].children[1].children[4].text, :date => Date.
|
416
|
+
results << { :source => url, :url => domain + row.children[3].children[1].children[4].children[0]['href'], :title => row.children[3].children[1].children[4].text, :date => Date.strptime(row.children[3].children[1].children[0].text, "%m/%d/%y"), :domain => domain}
|
417
417
|
end
|
418
418
|
results
|
419
419
|
end
|
@@ -432,7 +432,7 @@ module Statement
|
|
432
432
|
return if doc.nil?
|
433
433
|
doc.xpath("//tr")[1..-1].each do |row|
|
434
434
|
next if row.text.strip.size < 30
|
435
|
-
results << { :source => url, :url => row.children[2].children[0]['href'].strip, :title => row.children[2].text, :date => Date.
|
435
|
+
results << { :source => url, :url => row.children[2].children[0]['href'].strip, :title => row.children[2].text, :date => Date.strptime(row.children[0].text, "%m/%d/%y"), :domain => domain}
|
436
436
|
end
|
437
437
|
end
|
438
438
|
results.flatten
|
@@ -446,7 +446,7 @@ module Statement
|
|
446
446
|
return if doc.nil?
|
447
447
|
doc.xpath("//tr")[1..-1].each do |row|
|
448
448
|
next if row.text.strip.size < 30
|
449
|
-
results << { :source => url, :url => "http://www.donnelly.senate.gov"+row.children[2].children[1]['href'].strip, :title => row.children[2].text.strip, :date => Date.
|
449
|
+
results << { :source => url, :url => "http://www.donnelly.senate.gov"+row.children[2].children[1]['href'].strip, :title => row.children[2].text.strip, :date => Date.strptime(row.children[0].text, "%m/%d/%y"), :domain => domain}
|
450
450
|
end
|
451
451
|
results
|
452
452
|
end
|
@@ -459,7 +459,7 @@ module Statement
|
|
459
459
|
return if doc.nil?
|
460
460
|
doc.xpath("//tr")[1..-1].each do |row|
|
461
461
|
next if row.text.strip.size < 30
|
462
|
-
results << { :source => url, :url => row.children[2].children[0]['href'].strip, :title => row.children[2].text, :date => Date.
|
462
|
+
results << { :source => url, :url => row.children[2].children[0]['href'].strip, :title => row.children[2].text, :date => Date.strptime(row.children[0].text, "%m/%d/%y"), :domain => domain}
|
463
463
|
end
|
464
464
|
results
|
465
465
|
end
|
data/lib/statement/version.rb
CHANGED
metadata
CHANGED
@@ -1,93 +1,113 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: statement
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
5
|
-
prerelease:
|
4
|
+
version: 0.9.8
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Derek Willis
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2013-06-
|
11
|
+
date: 2013-06-07 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: bundler
|
16
|
-
requirement:
|
17
|
-
none: false
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
18
16
|
requirements:
|
19
17
|
- - ~>
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '1.3'
|
22
20
|
type: :development
|
23
21
|
prerelease: false
|
24
|
-
version_requirements:
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
25
27
|
- !ruby/object:Gem::Dependency
|
26
28
|
name: rake
|
27
|
-
requirement:
|
28
|
-
none: false
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
31
|
- - ! '>='
|
31
32
|
- !ruby/object:Gem::Version
|
32
33
|
version: '0'
|
33
34
|
type: :development
|
34
35
|
prerelease: false
|
35
|
-
version_requirements:
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ! '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
36
41
|
- !ruby/object:Gem::Dependency
|
37
42
|
name: webmock
|
38
|
-
requirement:
|
39
|
-
none: false
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
40
44
|
requirements:
|
41
45
|
- - ! '>='
|
42
46
|
- !ruby/object:Gem::Version
|
43
47
|
version: '0'
|
44
48
|
type: :development
|
45
49
|
prerelease: false
|
46
|
-
version_requirements:
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
47
55
|
- !ruby/object:Gem::Dependency
|
48
56
|
name: american_date
|
49
|
-
requirement:
|
50
|
-
none: false
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
51
58
|
requirements:
|
52
59
|
- - ! '>='
|
53
60
|
- !ruby/object:Gem::Version
|
54
61
|
version: '0'
|
55
62
|
type: :runtime
|
56
63
|
prerelease: false
|
57
|
-
version_requirements:
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ! '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
58
69
|
- !ruby/object:Gem::Dependency
|
59
70
|
name: nokogiri
|
60
|
-
requirement:
|
61
|
-
none: false
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
62
72
|
requirements:
|
63
73
|
- - ! '>='
|
64
74
|
- !ruby/object:Gem::Version
|
65
75
|
version: '0'
|
66
76
|
type: :runtime
|
67
77
|
prerelease: false
|
68
|
-
version_requirements:
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ! '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: koala
|
71
|
-
requirement:
|
72
|
-
none: false
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
73
86
|
requirements:
|
74
87
|
- - ! '>='
|
75
88
|
- !ruby/object:Gem::Version
|
76
89
|
version: '0'
|
77
90
|
type: :runtime
|
78
91
|
prerelease: false
|
79
|
-
version_requirements:
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ! '>='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
80
97
|
- !ruby/object:Gem::Dependency
|
81
98
|
name: oj
|
82
|
-
requirement:
|
83
|
-
none: false
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
84
100
|
requirements:
|
85
101
|
- - ! '>='
|
86
102
|
- !ruby/object:Gem::Version
|
87
103
|
version: '0'
|
88
104
|
type: :runtime
|
89
105
|
prerelease: false
|
90
|
-
version_requirements:
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ! '>='
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
91
111
|
description: Crawls congressional websites for press releases.
|
92
112
|
email:
|
93
113
|
- dwillis@gmail.com
|
@@ -118,27 +138,26 @@ files:
|
|
118
138
|
homepage: ''
|
119
139
|
licenses:
|
120
140
|
- Apache
|
141
|
+
metadata: {}
|
121
142
|
post_install_message:
|
122
143
|
rdoc_options: []
|
123
144
|
require_paths:
|
124
145
|
- lib
|
125
146
|
required_ruby_version: !ruby/object:Gem::Requirement
|
126
|
-
none: false
|
127
147
|
requirements:
|
128
148
|
- - ! '>='
|
129
149
|
- !ruby/object:Gem::Version
|
130
150
|
version: '0'
|
131
151
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
132
|
-
none: false
|
133
152
|
requirements:
|
134
153
|
- - ! '>='
|
135
154
|
- !ruby/object:Gem::Version
|
136
155
|
version: '0'
|
137
156
|
requirements: []
|
138
157
|
rubyforge_project:
|
139
|
-
rubygems_version:
|
158
|
+
rubygems_version: 2.0.3
|
140
159
|
signing_key:
|
141
|
-
specification_version:
|
160
|
+
specification_version: 4
|
142
161
|
summary: Given a url, Statement returns links to press releases and official statements.
|
143
162
|
test_files:
|
144
163
|
- spec/bill_nelson_press.html
|