statement 0.9.9 → 1.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -44,6 +44,13 @@ puts results.first
44
44
  {:source=>"http://blumenauer.house.gov/index.php?option=com_bca-rss-syndicator&feed_id=1", :url=>"http://blumenauer.house.gov/index.php?option=com_content&amp;view=article&amp;id=2203:blumenauer-qwe-need-a-national-system-that-speaks-to-the-transportation-challenges-of-todayq&amp;catid=66:2013-press-releases", :title=>"Blumenauer: &quot;We need a national system that speaks to the transportation challenges of ...", :date=>#<Date: 2013-04-24 ((2456407j,0s,0n),+0s,2299161j)>, :domain=>"blumenauer.house.gov"}
45
45
  ```
46
46
 
47
+ If you have a batch of RSS URLs, you can pass them to Feed's `batch` class method, which will use Typhoeus to fetch them in parallel:
48
+
49
+ ```ruby
50
+ urls = ['http://aderholt.house.gov/common/rss//index.cfm?rss=20', 'http://andrews.house.gov/rss.xml', "http://alexander.house.gov/common/rss/?rss=24", "http://amash.house.gov/rss.xml"]
51
+ results = Statement::Feed.batch(urls)
52
+ ```
53
+
47
54
  The sites that require HTML scraping are detailed in individual methods, and can be called individually or in bulk:
48
55
 
49
56
  ```ruby
@@ -3,9 +3,25 @@ require 'uri'
3
3
  require 'open-uri'
4
4
  require 'american_date'
5
5
  require 'nokogiri'
6
+ require 'typhoeus'
6
7
 
7
8
  module Statement
8
9
  class Feed
10
+
11
+ def self.batch(urls)
12
+ results = []
13
+ hydra = Typhoeus::Hydra.new
14
+ urls.each do |url|
15
+ req = Typhoeus::Request.new(url)
16
+ req.on_complete do |response|
17
+ doc = Nokogiri::XML(response.body)
18
+ results << parse_rss(doc, url)
19
+ end
20
+ hydra.queue(req)
21
+ end
22
+ hydra.run
23
+ results.flatten
24
+ end
9
25
 
10
26
  def self.open_rss(url)
11
27
  begin
@@ -28,6 +44,10 @@ module Statement
28
44
  def self.from_rss(url)
29
45
  doc = open_rss(url)
30
46
  return unless doc
47
+ parse_rss(doc, url)
48
+ end
49
+
50
+ def self.parse_rss(doc, url)
31
51
  links = doc.xpath('//item')
32
52
  results = links.map do |link|
33
53
  abs_link = Utils.absolute_link(url, link.xpath('link').text)
@@ -1,3 +1,3 @@
1
1
  module Statement
2
- VERSION = "0.9.9"
2
+ VERSION = "1.0"
3
3
  end
data/statement.gemspec CHANGED
@@ -26,4 +26,5 @@ Gem::Specification.new do |spec|
26
26
  spec.add_dependency "koala"
27
27
  spec.add_dependency "oj"
28
28
  spec.add_dependency "twitter"
29
+ spec.add_dependency "typhoeus"
29
30
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: statement
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.9
4
+ version: '1.0'
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-06-10 00:00:00.000000000 Z
12
+ date: 2013-06-12 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
16
- requirement: &2160388100 !ruby/object:Gem::Requirement
16
+ requirement: &2152391660 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '1.3'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *2160388100
24
+ version_requirements: *2152391660
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rake
27
- requirement: &2160387540 !ruby/object:Gem::Requirement
27
+ requirement: &2152391200 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *2160387540
35
+ version_requirements: *2152391200
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: webmock
38
- requirement: &2160386960 !ruby/object:Gem::Requirement
38
+ requirement: &2152407040 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *2160386960
46
+ version_requirements: *2152407040
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: american_date
49
- requirement: &2160386260 !ruby/object:Gem::Requirement
49
+ requirement: &2152406620 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *2160386260
57
+ version_requirements: *2152406620
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: nokogiri
60
- requirement: &2160385680 !ruby/object:Gem::Requirement
60
+ requirement: &2152406180 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: '0'
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *2160385680
68
+ version_requirements: *2152406180
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: koala
71
- requirement: &2160385220 !ruby/object:Gem::Requirement
71
+ requirement: &2152405700 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: '0'
77
77
  type: :runtime
78
78
  prerelease: false
79
- version_requirements: *2160385220
79
+ version_requirements: *2152405700
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: oj
82
- requirement: &2160384700 !ruby/object:Gem::Requirement
82
+ requirement: &2152405200 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ! '>='
@@ -87,10 +87,10 @@ dependencies:
87
87
  version: '0'
88
88
  type: :runtime
89
89
  prerelease: false
90
- version_requirements: *2160384700
90
+ version_requirements: *2152405200
91
91
  - !ruby/object:Gem::Dependency
92
92
  name: twitter
93
- requirement: &2160384260 !ruby/object:Gem::Requirement
93
+ requirement: &2152404700 !ruby/object:Gem::Requirement
94
94
  none: false
95
95
  requirements:
96
96
  - - ! '>='
@@ -98,7 +98,18 @@ dependencies:
98
98
  version: '0'
99
99
  type: :runtime
100
100
  prerelease: false
101
- version_requirements: *2160384260
101
+ version_requirements: *2152404700
102
+ - !ruby/object:Gem::Dependency
103
+ name: typhoeus
104
+ requirement: &2152403760 !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ type: :runtime
111
+ prerelease: false
112
+ version_requirements: *2152403760
102
113
  description: Crawls congressional websites for press releases.
103
114
  email:
104
115
  - dwillis@gmail.com