feedzirra 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +3 -1
- data/.travis.yml +3 -3
- data/{HISTORY.md → CHANGELOG.md} +32 -7
- data/Gemfile +4 -1
- data/Guardfile +0 -1
- data/README.md +102 -87
- data/Rakefile +1 -1
- data/feedzirra.gemspec +2 -1
- data/lib/feedzirra/core_ext/date.rb +1 -1
- data/lib/feedzirra/core_ext/string.rb +2 -2
- data/lib/feedzirra/feed.rb +16 -16
- data/lib/feedzirra/feed_entry_utilities.rb +5 -5
- data/lib/feedzirra/parser/atom_entry.rb +2 -2
- data/lib/feedzirra/parser/atom_feed_burner.rb +1 -1
- data/lib/feedzirra/parser/atom_feed_burner_entry.rb +2 -2
- data/lib/feedzirra/parser/google_docs_atom_entry.rb +1 -1
- data/lib/feedzirra/parser/itunes_rss_item.rb +2 -2
- data/lib/feedzirra/parser/itunes_rss_owner.rb +3 -3
- data/lib/feedzirra/parser/rss.rb +1 -1
- data/lib/feedzirra/parser/rss_entry.rb +7 -6
- data/lib/feedzirra/parser/rss_feed_burner.rb +1 -1
- data/lib/feedzirra/parser/rss_feed_burner_entry.rb +1 -1
- data/lib/feedzirra/parser.rb +1 -1
- data/lib/feedzirra/version.rb +1 -1
- data/lib/feedzirra.rb +1 -1
- data/spec/benchmarks/feed_benchmarks.rb +7 -7
- data/spec/benchmarks/feedzirra_benchmarks.rb +1 -1
- data/spec/benchmarks/fetching_benchmarks.rb +3 -3
- data/spec/benchmarks/parsing_benchmark.rb +1 -1
- data/spec/benchmarks/updating_benchmarks.rb +6 -6
- data/spec/feedzirra/feed_entry_utilities_spec.rb +11 -7
- data/spec/feedzirra/feed_spec.rb +85 -85
- data/spec/feedzirra/feed_utilities_spec.rb +22 -22
- data/spec/feedzirra/parser/atom_entry_spec.rb +1 -1
- data/spec/feedzirra/parser/atom_feed_burner_entry_spec.rb +7 -7
- data/spec/feedzirra/parser/atom_feed_burner_spec.rb +7 -7
- data/spec/feedzirra/parser/atom_spec.rb +9 -9
- data/spec/feedzirra/parser/itunes_rss_item_spec.rb +11 -11
- data/spec/feedzirra/parser/itunes_rss_owner_spec.rb +4 -4
- data/spec/feedzirra/parser/itunes_rss_spec.rb +8 -8
- data/spec/feedzirra/parser/rss_feed_burner_entry_spec.rb +1 -1
- data/spec/feedzirra/parser/rss_feed_burner_spec.rb +8 -8
- data/spec/feedzirra/parser/rss_spec.rb +6 -6
- data/spec/spec_helper.rb +4 -3
- metadata +10 -21
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: bba513269bf90729e08896726cf12f62ece4880a
|
4
|
+
data.tar.gz: 1ea0c67b87dafccdb90cc5485f3997311adb9f63
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 35d266b0c2c150d19cc9dd38ab2248c82799b445a74e1f9d7cd664c523cbee4bc81f819bec80e14de5abe54db5d11e0adb4cb411e0c08b1fe55c4c9a285965fe
|
7
|
+
data.tar.gz: 1b87203126ac584717625ff6d80300be97fa26239dcea493b59e17eedc922ba2481587f4719d804d27ba9a93ad4d17fe3fd31fe6c13f3450fb03b0769a72360d
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
data/{HISTORY.md → CHANGELOG.md}
RENAMED
@@ -1,13 +1,34 @@
|
|
1
|
-
#
|
1
|
+
# Feedzirra Changelog
|
2
|
+
|
3
|
+
## 0.2.2
|
4
|
+
|
5
|
+
* General
|
6
|
+
* Switch to CHANGELOG
|
7
|
+
* Set LICENSE in gemspec
|
8
|
+
* Lots of whitespace cleaning
|
9
|
+
* README updates
|
10
|
+
|
11
|
+
* Enhancements
|
12
|
+
* Also use dc:identifier for `entry_id` [[#182](https://github.com/pauldix/feedzirra/pull/182)]
|
13
|
+
|
14
|
+
* Bug fixes
|
15
|
+
* Don't try to sanitize non-existent elements [[#174](https://github.com/pauldix/feedzirra/pull/174)]
|
16
|
+
* Fix Rspec deprecations [[#188](https://github.com/pauldix/feedzirra/pull/188)]
|
17
|
+
* Fix Travis [[#191](https://github.com/pauldix/feedzirra/pull/191)]
|
18
|
+
|
19
|
+
## 0.2.1
|
20
|
+
|
2
21
|
* Use `Time.parse_safely` in `Feed.last_modified_from_header` [[#129](https://github.com/pauldix/feedzirra/pull/129)].
|
3
22
|
* Added image to the RSS Entry Parser [[#103](https://github.com/pauldix/feedzirra/pull/103)].
|
4
23
|
* Compatibility fixes for Ruby 2.0 [[#136](https://github.com/pauldix/feedzirra/pull/136)].
|
5
24
|
* Remove gorillib dependency [[#113](https://github.com/pauldix/feedzirra/pull/113)].
|
6
25
|
|
7
|
-
|
26
|
+
## 0.2.0.rc2
|
27
|
+
|
8
28
|
* Bump sax-machine to `v0.2.0.rc1`, fixes encoding issues [[#76](https://github.com/pauldix/feedzirra/issues/76)].
|
9
29
|
|
10
|
-
|
30
|
+
## 0.2.0.rc1
|
31
|
+
|
11
32
|
* Remove ActiveSupport dependency
|
12
33
|
* No longer tethered to any version of Rails!
|
13
34
|
* Update curb (v0.8.0) and rspec (v2.10.0)
|
@@ -15,14 +36,18 @@
|
|
15
36
|
* Add Travis-CI integration
|
16
37
|
* General repository and gem maintenance
|
17
38
|
|
18
|
-
|
39
|
+
## 0.1.3
|
40
|
+
|
19
41
|
* ?
|
20
42
|
|
21
|
-
|
43
|
+
## 0.1.2
|
44
|
+
|
22
45
|
* ?
|
23
46
|
|
24
|
-
|
47
|
+
## 0.1.1
|
48
|
+
|
25
49
|
* make FeedEntries enumerable (patch by Daniel Gregoire)
|
26
50
|
|
27
|
-
|
51
|
+
## 0.1.0
|
52
|
+
|
28
53
|
* lower builder requirement to make it rails-3 friendly
|
data/Gemfile
CHANGED
data/Guardfile
CHANGED
data/README.md
CHANGED
@@ -4,7 +4,7 @@ I'd like feedback on the api and any bugs encountered on feeds in the wild. I've
|
|
4
4
|
|
5
5
|
## Description
|
6
6
|
|
7
|
-
Feedzirra is a feed library that is designed to get and update many feeds as quickly as possible. This includes using libcurl-multi through the [curb](https://github.com/taf2/curb) gem for faster http gets, and libxml through [nokogiri](https://github.com/tenderlove/nokogiri) and [sax-machine](https://github.com/pauldix/sax-machine) for faster parsing.
|
7
|
+
Feedzirra is a feed library that is designed to get and update many feeds as quickly as possible. This includes using libcurl-multi through the [curb](https://github.com/taf2/curb) gem for faster http gets, and libxml through [nokogiri](https://github.com/tenderlove/nokogiri) and [sax-machine](https://github.com/pauldix/sax-machine) for faster parsing. Feedzirra requires at least Ruby 1.9.2.
|
8
8
|
|
9
9
|
Once you have fetched feeds using Feedzirra, they can be updated using the feed objects. Feedzirra automatically inserts etag and last-modified information from the http response headers to lower bandwidth usage, eliminate unnecessary parsing, and make things speedier in general.
|
10
10
|
|
@@ -16,105 +16,121 @@ The final feature of Feedzirra is the ability to define custom parsing classes.
|
|
16
16
|
|
17
17
|
## Speedup date parsing
|
18
18
|
|
19
|
-
In MRI the date parsing code
|
19
|
+
In MRI before 1.9.3 the date parsing code was written in Ruby and was optimized for readability over speed, to speed up this part you can install the [home_run](https://github.com/jeremyevans/home_run) gem to replace it with an optimized C version. In most cases, if you are using Ruby 1.9.3+, you will not need to use [home_run](https://github.com/jeremyevans/home_run).
|
20
20
|
|
21
21
|
## Usage
|
22
22
|
|
23
23
|
[A gist of the following code](http://gist.github.com/57285)
|
24
24
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
25
|
+
```ruby
|
26
|
+
require 'feedzirra'
|
27
|
+
|
28
|
+
# fetching a single feed
|
29
|
+
feed = Feedzirra::Feed.fetch_and_parse("http://feeds.feedburner.com/PaulDixExplainsNothing")
|
30
|
+
|
31
|
+
# feed and entries accessors
|
32
|
+
feed.title # => "Paul Dix Explains Nothing"
|
33
|
+
feed.url # => "http://www.pauldix.net"
|
34
|
+
feed.feed_url # => "http://feeds.feedburner.com/PaulDixExplainsNothing"
|
35
|
+
feed.etag # => "GunxqnEP4NeYhrqq9TyVKTuDnh0"
|
36
|
+
feed.last_modified # => Sat Jan 31 17:58:16 -0500 2009 # it's a Time object
|
37
|
+
|
38
|
+
entry = feed.entries.first
|
39
|
+
entry.title # => "Ruby Http Client Library Performance"
|
40
|
+
entry.url # => "http://www.pauldix.net/2009/01/ruby-http-client-library-performance.html"
|
41
|
+
entry.author # => "Paul Dix"
|
42
|
+
entry.summary # => "..."
|
43
|
+
entry.content # => "..."
|
44
|
+
entry.published # => Thu Jan 29 17:00:19 UTC 2009 # it's a Time object
|
45
|
+
entry.categories # => ["...", "..."]
|
46
|
+
|
47
|
+
# sanitizing an entry's content
|
48
|
+
entry.title.sanitize # => returns the title with harmful stuff escaped
|
49
|
+
entry.author.sanitize # => returns the author with harmful stuff escaped
|
50
|
+
entry.content.sanitize # => returns the content with harmful stuff escaped
|
51
|
+
entry.content.sanitize! # => returns content with harmful stuff escaped and replaces original (also exists for author and title)
|
52
|
+
entry.sanitize! # => sanitizes the entry's title, author, and content in place (as in, it changes the value to clean versions)
|
53
|
+
feed.sanitize_entries! # => sanitizes all entries in place
|
54
|
+
|
55
|
+
# updating a single feed
|
56
|
+
updated_feed = Feedzirra::Feed.update(feed)
|
57
|
+
|
58
|
+
# an updated feed has the following extra accessors
|
59
|
+
updated_feed.updated? # returns true if any of the feed attributes have been modified. will return false if no new entries
|
60
|
+
updated_feed.new_entries # a collection of the entry objects that are newer than the latest in the feed before update
|
61
|
+
|
62
|
+
# fetching multiple feeds
|
63
|
+
feed_urls = ["http://feeds.feedburner.com/PaulDixExplainsNothing", "http://feeds.feedburner.com/trottercashion"]
|
64
|
+
feeds = Feedzirra::Feed.fetch_and_parse(feed_urls)
|
65
|
+
|
66
|
+
# feeds is now a hash with the feed_urls as keys and the parsed feed objects as values. If an error was thrown
|
67
|
+
# there will be a Fixnum of the http response code instead of a feed object
|
68
|
+
|
69
|
+
# updating multiple feeds. it expects a collection of feed objects
|
70
|
+
updated_feeds = Feedzirra::Feed.update(feeds.values)
|
71
|
+
|
72
|
+
# defining custom behavior on failure or success. note that a return status of 304 (not updated) will call the on_success handler
|
73
|
+
feed = Feedzirra::Feed.fetch_and_parse("http://feeds.feedburner.com/PaulDixExplainsNothing",
|
74
|
+
:on_success => lambda [|url, feed| puts feed.title ],
|
75
|
+
:on_failure => lambda [|url, response_code, response_header, response_body| puts response_body ])
|
76
|
+
# if a collection was passed into fetch_and_parse, the handlers will be called for each one
|
77
|
+
|
78
|
+
# the behavior for the handlers when using Feedzirra::Feed.update is slightly different. The feed passed into on_success will be
|
79
|
+
# the updated feed with the standard updated accessors. on failure it will be the original feed object passed into update
|
80
|
+
|
81
|
+
# fetching a feed via a proxy (optional)
|
82
|
+
feed = Feedzirra::Feed.fetch_and_parse("http://feeds.feedburner.com/PaulDixExplainsNothing", {:proxy_url => '10.0.0.1', :proxy_port => 3084})
|
83
|
+
|
84
|
+
```
|
83
85
|
## Extending
|
84
86
|
|
85
87
|
### Adding a feed parsing class
|
86
88
|
|
87
|
-
|
88
|
-
|
89
|
+
```ruby
|
90
|
+
# Adds a new feed parsing class, this class will be used first
|
91
|
+
Feedzirra::Feed.add_feed_class MyFeedClass
|
92
|
+
```
|
89
93
|
|
90
94
|
### Adding attributes to all feeds types / all entries types
|
91
95
|
|
92
|
-
|
93
|
-
|
94
|
-
|
96
|
+
```ruby
|
97
|
+
# Add the generator attribute to all feed types
|
98
|
+
Feedzirra::Feed.add_common_feed_element('generator')
|
99
|
+
Feedzirra::Feed.fetch_and_parse("href="http://www.pauldix.net/atom.xml").generator # => 'TypePad'
|
95
100
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
+
# Add some GeoRss information
|
102
|
+
Feedzirra::Feed.add_common_feed_entry_element('geo:lat', :as => :lat)
|
103
|
+
Feedzirra::Feed.fetch_and_parse("http://www.earthpublisher.com/georss.php").entries.each do |e|
|
104
|
+
p "lat: #[e.lat}, long: #{e.long]"
|
105
|
+
end
|
106
|
+
```
|
101
107
|
|
102
108
|
### Adding attributes to only one class
|
103
109
|
|
104
|
-
If you want to add attributes for only
|
110
|
+
If you want to add attributes for only one class you simply have to declare them in the class
|
111
|
+
|
112
|
+
```ruby
|
113
|
+
# Add some GeoRss information
|
114
|
+
require 'lib/feedzirra/parser/rss_entry'
|
115
|
+
|
116
|
+
class Feedzirra::Parser::RSSEntry
|
117
|
+
element 'geo:lat', :as => :lat
|
118
|
+
element 'geo:long', :as => :long
|
119
|
+
end
|
105
120
|
|
106
|
-
|
107
|
-
|
121
|
+
# Fetch a feed containing GeoRss info and print them
|
122
|
+
Feedzirra::Feed.fetch_and_parse("http://www.earthpublisher.com/georss.php").entries.each do |e|
|
123
|
+
p "lat: #[e.lat}, long: #{e.long]"
|
124
|
+
end
|
125
|
+
```
|
108
126
|
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
127
|
+
## Testing
|
128
|
+
|
129
|
+
Feedzirra uses [curb](https://github.com/taf2/curb) to perform requests. `curb` provides bindings for [libcurl](http://curl.haxx.se/libcurl/) and supports numerous protocols, including FILE. To test Feedzirra with local file use `file://` protocol:
|
130
|
+
```ruby
|
131
|
+
feed = Feedzirra::Feed.fetch_and_parse('file:///home/feedzirra/examples/feed.rss')
|
132
|
+
```
|
113
133
|
|
114
|
-
# Fetch a feed containing GeoRss info and print them
|
115
|
-
Feedzirra::Feed.fetch_and_parse("http://www.earthpublisher.com/georss.php").entries.each do |e|
|
116
|
-
p "lat: #[e.lat}, long: #{e.long]"
|
117
|
-
end
|
118
134
|
|
119
135
|
## Benchmarks
|
120
136
|
|
@@ -143,7 +159,6 @@ Here are some more specific TODOs.
|
|
143
159
|
* Make a feedzirra-rails gem to integrate feedzirra seamlessly with Rails and ActiveRecord.
|
144
160
|
* Add support for authenticated feeds.
|
145
161
|
* Create a super sweet DSL for defining new parsers.
|
146
|
-
* Test against Ruby 1.9.1 and fix any bugs.
|
147
162
|
* I'm not keeping track of modified on entries. Should I add this?
|
148
163
|
* Clean up the fetching code inside feed.rb so it doesn't suck so hard.
|
149
164
|
* Make the feed_spec actually mock stuff out so it doesn't hit the net.
|
@@ -152,9 +167,9 @@ Here are some more specific TODOs.
|
|
152
167
|
## LICENSE
|
153
168
|
|
154
169
|
(The MIT License)
|
155
|
-
|
170
|
+
|
156
171
|
Copyright (c) 2009-2012:
|
157
|
-
|
172
|
+
|
158
173
|
- [Paul Dix](http://pauldix.net)
|
159
174
|
- [Julien Kirch](http://archiloque.net/)
|
160
175
|
- [Ezekiel Templin](http://zeke.templ.in/)
|
@@ -166,14 +181,14 @@ without limitation the rights to use, copy, modify, merge, publish,
|
|
166
181
|
distribute, sublicense, and/or sell copies of the Software, and to
|
167
182
|
permit persons to whom the Software is furnished to do so, subject to
|
168
183
|
the following conditions:
|
169
|
-
|
184
|
+
|
170
185
|
The above copyright notice and this permission notice shall be
|
171
186
|
included in all copies or substantial portions of the Software.
|
172
|
-
|
187
|
+
|
173
188
|
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
174
189
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
175
190
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
176
191
|
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
177
192
|
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
178
193
|
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
179
|
-
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
194
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
CHANGED
data/feedzirra.gemspec
CHANGED
@@ -4,6 +4,7 @@ require File.expand_path('../lib/feedzirra/version', __FILE__)
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = 'feedzirra'
|
6
6
|
s.version = Feedzirra::VERSION
|
7
|
+
s.license = 'MIT'
|
7
8
|
|
8
9
|
s.authors = ['Paul Dix', 'Julien Kirch', "Ezekiel Templin"]
|
9
10
|
s.email = 'feedzirra@googlegroups.com'
|
@@ -15,7 +16,7 @@ Gem::Specification.new do |s|
|
|
15
16
|
s.files = `git ls-files`.split("\n")
|
16
17
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
17
18
|
s.require_paths = ['lib']
|
18
|
-
|
19
|
+
|
19
20
|
s.platform = Gem::Platform::RUBY
|
20
21
|
|
21
22
|
s.add_dependency 'nokogiri', '~> 1.6.0'
|
data/lib/feedzirra/feed.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
module Feedzirra
|
1
|
+
module Feedzirra
|
2
2
|
class Feed
|
3
3
|
USER_AGENT = "feedzirra http://github.com/pauldix/feedzirra/tree/master"
|
4
|
-
|
4
|
+
|
5
5
|
# Takes a raw XML feed and attempts to parse it. If no parser is available a Feedzirra::NoParserAvailable exception is raised.
|
6
6
|
# You can pass a block to be called when there's an error during the parsing.
|
7
7
|
# === Parameters
|
@@ -19,7 +19,7 @@ module Feedzirra
|
|
19
19
|
end
|
20
20
|
|
21
21
|
# Determines the correct parser class to use for parsing the feed.
|
22
|
-
#
|
22
|
+
#
|
23
23
|
# === Parameters
|
24
24
|
# [xml<String>] The XML that you would like determine the parser for.
|
25
25
|
# === Returns
|
@@ -35,7 +35,7 @@ module Feedzirra
|
|
35
35
|
# [klass<Constant>] The class/constant that you want to register.
|
36
36
|
# === Returns
|
37
37
|
# A updated array of feed parser class names.
|
38
|
-
def self.add_feed_class(klass)
|
38
|
+
def self.add_feed_class(klass)
|
39
39
|
feed_classes.unshift klass
|
40
40
|
end
|
41
41
|
|
@@ -46,7 +46,7 @@ module Feedzirra
|
|
46
46
|
def self.feed_classes
|
47
47
|
@feed_classes ||= [Feedzirra::Parser::RSSFeedBurner, Feedzirra::Parser::GoogleDocsAtom, Feedzirra::Parser::AtomFeedBurner, Feedzirra::Parser::Atom, Feedzirra::Parser::ITunesRSS, Feedzirra::Parser::RSS]
|
48
48
|
end
|
49
|
-
|
49
|
+
|
50
50
|
# Makes all registered feeds types look for the passed in element to parse.
|
51
51
|
# This is actually just a call to element (a SAXMachine call) in the class.
|
52
52
|
#
|
@@ -80,7 +80,7 @@ module Feedzirra
|
|
80
80
|
def self.add_common_feed_entry_element(element_tag, options = {})
|
81
81
|
call_on_each_feed_entry :element, element_tag, options
|
82
82
|
end
|
83
|
-
|
83
|
+
|
84
84
|
# Makes all registered entry types look for the passed in elements to parse.
|
85
85
|
# This is actually just a call to element (a SAXMachine call) in the class.
|
86
86
|
#
|
@@ -144,7 +144,7 @@ module Feedzirra
|
|
144
144
|
# * all parameters defined in setup_easy
|
145
145
|
# === Returns
|
146
146
|
# A String of XML if a single URL is passed.
|
147
|
-
#
|
147
|
+
#
|
148
148
|
# A Hash if multiple URL's are passed. The key will be the URL, and the value the XML.
|
149
149
|
def self.fetch_raw(urls, options = {})
|
150
150
|
url_queue = [*urls]
|
@@ -189,13 +189,13 @@ module Feedzirra
|
|
189
189
|
url_queue = [*urls]
|
190
190
|
multi = Curl::Multi.new
|
191
191
|
responses = {}
|
192
|
-
|
192
|
+
|
193
193
|
# I broke these down so I would only try to do 30 simultaneously because
|
194
194
|
# I was getting weird errors when doing a lot. As one finishes it pops another off the queue.
|
195
195
|
url_queue.slice!(0, 30).each do |url|
|
196
196
|
add_url_to_multi(multi, url, url_queue, responses, options)
|
197
197
|
end
|
198
|
-
|
198
|
+
|
199
199
|
multi.perform
|
200
200
|
return urls.is_a?(String) ? responses.values.first : responses
|
201
201
|
end
|
@@ -212,7 +212,7 @@ module Feedzirra
|
|
212
212
|
gz = Zlib::GzipReader.new(StringIO.new(c.body_str))
|
213
213
|
xml = gz.read
|
214
214
|
gz.close
|
215
|
-
rescue Zlib::GzipFile::Error
|
215
|
+
rescue Zlib::GzipFile::Error
|
216
216
|
# Maybe this is not gzipped?
|
217
217
|
xml = c.body_str
|
218
218
|
end
|
@@ -241,15 +241,15 @@ module Feedzirra
|
|
241
241
|
feed_queue = [*feeds]
|
242
242
|
multi = Curl::Multi.new
|
243
243
|
responses = {}
|
244
|
-
|
244
|
+
|
245
245
|
feed_queue.slice!(0, 30).each do |feed|
|
246
246
|
add_feed_to_multi(multi, feed, feed_queue, responses, options)
|
247
247
|
end
|
248
|
-
|
248
|
+
|
249
249
|
multi.perform
|
250
250
|
feeds.is_a?(Array) ? responses : responses.values.first
|
251
251
|
end
|
252
|
-
|
252
|
+
|
253
253
|
# An abstraction for adding a feed by URL to the passed Curb::multi stack.
|
254
254
|
#
|
255
255
|
# === Parameters
|
@@ -274,7 +274,7 @@ module Feedzirra
|
|
274
274
|
add_url_to_multi(multi, url_queue.shift, url_queue, responses, options) unless url_queue.empty?
|
275
275
|
xml = decode_content(c)
|
276
276
|
klass = determine_feed_parser_for_xml(xml)
|
277
|
-
|
277
|
+
|
278
278
|
if klass
|
279
279
|
begin
|
280
280
|
feed = klass.parse(xml, Proc.new{|message| warn "Error while parsing [#{url}] #{message}" })
|
@@ -332,7 +332,7 @@ module Feedzirra
|
|
332
332
|
# * all parameters defined in setup_easy
|
333
333
|
# === Returns
|
334
334
|
# The updated Curl::Multi object with the request details added to it's stack.
|
335
|
-
def self.add_feed_to_multi(multi, feed, feed_queue, responses, options)
|
335
|
+
def self.add_feed_to_multi(multi, feed, feed_queue, responses, options)
|
336
336
|
easy = Curl::Easy.new(feed.feed_url) do |curl|
|
337
337
|
setup_easy curl, options
|
338
338
|
curl.headers["If-Modified-Since"] = feed.last_modified.httpdate if feed.last_modified
|
@@ -370,7 +370,7 @@ module Feedzirra
|
|
370
370
|
end
|
371
371
|
|
372
372
|
# Determines the etag from the request headers.
|
373
|
-
#
|
373
|
+
#
|
374
374
|
# === Parameters
|
375
375
|
# [header<String>] Raw request header returned from the request
|
376
376
|
# === Returns
|
@@ -37,11 +37,11 @@ module Feedzirra
|
|
37
37
|
end
|
38
38
|
|
39
39
|
def sanitize!
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
40
|
+
%w[title author summary content image].each do |name|
|
41
|
+
if self.respond_to?(name) && self.send(name).respond_to?(:sanitize!)
|
42
|
+
self.send(name).send :sanitize!
|
43
|
+
end
|
44
|
+
end
|
45
45
|
end
|
46
46
|
|
47
47
|
alias_method :last_modified, :published
|
@@ -5,7 +5,7 @@ module Feedzirra
|
|
5
5
|
class AtomEntry
|
6
6
|
include SAXMachine
|
7
7
|
include FeedEntryUtilities
|
8
|
-
|
8
|
+
|
9
9
|
element :title
|
10
10
|
element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
|
11
11
|
element :name, :as => :author
|
@@ -31,4 +31,4 @@ module Feedzirra
|
|
31
31
|
|
32
32
|
end
|
33
33
|
|
34
|
-
end
|
34
|
+
end
|
@@ -5,7 +5,7 @@ module Feedzirra
|
|
5
5
|
class AtomFeedBurnerEntry
|
6
6
|
include SAXMachine
|
7
7
|
include FeedEntryUtilities
|
8
|
-
|
8
|
+
|
9
9
|
element :title
|
10
10
|
element :name, :as => :author
|
11
11
|
element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
|
@@ -32,4 +32,4 @@ module Feedzirra
|
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
35
|
-
end
|
35
|
+
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module Feedzirra
|
2
|
-
|
2
|
+
|
3
3
|
module Parser
|
4
4
|
# iTunes extensions to the standard RSS2.0 item
|
5
5
|
# Source: http://www.apple.com/itunes/whatson/podcasts/specs.html
|
@@ -28,5 +28,5 @@ module Feedzirra
|
|
28
28
|
element :enclosure, :value => :url, :as => :enclosure_url
|
29
29
|
end
|
30
30
|
end
|
31
|
-
|
31
|
+
|
32
32
|
end
|
data/lib/feedzirra/parser/rss.rb
CHANGED