statement 0.9.3 → 0.9.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +16 -2
- data/lib/statement/scraper.rb +7 -8
- data/lib/statement/version.rb +1 -1
- metadata +16 -16
data/README.md
CHANGED
@@ -42,8 +42,22 @@ puts results.first
|
|
42
42
|
The sites that require HTML scraping are detailed in individual methods, and can be called individually or in bulk:
|
43
43
|
|
44
44
|
```
|
45
|
-
|
46
|
-
|
45
|
+
results = Statement::Scraper.billnelson
|
46
|
+
members = Statement::Scraper.member_scrapers
|
47
|
+
```
|
48
|
+
|
49
|
+
Using the `koala` gem, Statement can fetch Facebook status feeds, given a Facebook ID. You'll need to either set environment variables `APP_ID` and `APP_SECRET` or create a `config.yml` file containing `app_id` and `app_secret` keys and values.
|
50
|
+
|
51
|
+
```
|
52
|
+
f = Statement::Facebook.new
|
53
|
+
results = f.feed('RepFincherTN08')
|
54
|
+
```
|
55
|
+
|
56
|
+
It also can process IDs in batches, with a `slice` argument to indicate how many ids are passed in each batch:
|
57
|
+
|
58
|
+
```
|
59
|
+
f = Statement::Facebook.new
|
60
|
+
results = f.batch(array, 10)
|
47
61
|
```
|
48
62
|
|
49
63
|
## Tests
|
data/lib/statement/scraper.rb
CHANGED
@@ -233,15 +233,14 @@ module Statement
|
|
233
233
|
list_url = base_url + 'date.shtml'
|
234
234
|
doc = open_html(list_url)
|
235
235
|
return if doc.nil?
|
236
|
-
doc.xpath("//a").each do |link|
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
end
|
243
|
-
results << { :source => list_url, :url => base_url + link['href'], :title => link.text.split(' ',2).last, :date => date, :domain => "www.house.gov/capuano/" }
|
236
|
+
doc.xpath("//a").select{|l| !l['href'].nil? and l['href'].include?('/pr')}[1..-5].each do |link|
|
237
|
+
begin
|
238
|
+
year = link['href'].split('/').first
|
239
|
+
date = Date.parse(link.text.split(' ').first+'/'+year)
|
240
|
+
rescue
|
241
|
+
date = nil
|
244
242
|
end
|
243
|
+
results << { :source => list_url, :url => base_url + link['href'], :title => link.text.split(' ',2).last, :date => date, :domain => "www.house.gov/capuano/" }
|
245
244
|
end
|
246
245
|
return results[0..-5]
|
247
246
|
end
|
data/lib/statement/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: statement
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-05-
|
12
|
+
date: 2013-05-24 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
16
|
-
requirement: &
|
16
|
+
requirement: &2156038320 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '1.3'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2156038320
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rake
|
27
|
-
requirement: &
|
27
|
+
requirement: &2156037740 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *2156037740
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: webmock
|
38
|
-
requirement: &
|
38
|
+
requirement: &2156037140 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *2156037140
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: american_date
|
49
|
-
requirement: &
|
49
|
+
requirement: &2156036600 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *2156036600
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: nokogiri
|
60
|
-
requirement: &
|
60
|
+
requirement: &2156035940 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: '0'
|
66
66
|
type: :runtime
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *2156035940
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: koala
|
71
|
-
requirement: &
|
71
|
+
requirement: &2156035320 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ! '>='
|
@@ -76,10 +76,10 @@ dependencies:
|
|
76
76
|
version: '0'
|
77
77
|
type: :runtime
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *2156035320
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
81
|
name: oj
|
82
|
-
requirement: &
|
82
|
+
requirement: &2156034620 !ruby/object:Gem::Requirement
|
83
83
|
none: false
|
84
84
|
requirements:
|
85
85
|
- - ! '>='
|
@@ -87,7 +87,7 @@ dependencies:
|
|
87
87
|
version: '0'
|
88
88
|
type: :runtime
|
89
89
|
prerelease: false
|
90
|
-
version_requirements: *
|
90
|
+
version_requirements: *2156034620
|
91
91
|
description: Crawls congressional websites for press releases.
|
92
92
|
email:
|
93
93
|
- dwillis@gmail.com
|