feedjira 2.1.0 → 2.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rubocop.yml +9 -2
- data/CHANGELOG.md +4 -0
- data/LICENSE +1 -1
- data/README.md +210 -7
- data/Rakefile +5 -0
- data/feedjira.gemspec +2 -1
- data/lib/feedjira.rb +7 -1
- data/lib/feedjira/configuration.rb +76 -0
- data/lib/feedjira/core_ext/date.rb +1 -0
- data/lib/feedjira/core_ext/string.rb +1 -0
- data/lib/feedjira/core_ext/time.rb +5 -1
- data/lib/feedjira/date_time_utilities.rb +11 -3
- data/lib/feedjira/date_time_utilities/date_time_epoch_parser.rb +13 -0
- data/lib/feedjira/date_time_utilities/date_time_language_parser.rb +2 -0
- data/lib/feedjira/date_time_utilities/date_time_pattern_parser.rb +6 -1
- data/lib/feedjira/feed.rb +87 -69
- data/lib/feedjira/feed_entry_utilities.rb +5 -2
- data/lib/feedjira/feed_utilities.rb +11 -1
- data/lib/feedjira/parser.rb +1 -1
- data/lib/feedjira/parser/atom.rb +1 -0
- data/lib/feedjira/parser/atom_entry.rb +1 -0
- data/lib/feedjira/parser/atom_feed_burner.rb +19 -2
- data/lib/feedjira/parser/atom_feed_burner_entry.rb +1 -0
- data/lib/feedjira/parser/atom_youtube.rb +1 -0
- data/lib/feedjira/parser/atom_youtube_entry.rb +1 -0
- data/lib/feedjira/parser/google_docs_atom.rb +2 -1
- data/lib/feedjira/parser/google_docs_atom_entry.rb +2 -0
- data/lib/feedjira/parser/itunes_rss.rb +1 -0
- data/lib/feedjira/parser/itunes_rss_category.rb +1 -0
- data/lib/feedjira/parser/itunes_rss_owner.rb +1 -0
- data/lib/feedjira/parser/podlove_chapter.rb +2 -0
- data/lib/feedjira/parser/rss.rb +1 -0
- data/lib/feedjira/parser/rss_feed_burner.rb +1 -0
- data/lib/feedjira/parser/rss_feed_burner_entry.rb +1 -0
- data/lib/feedjira/preprocessor.rb +2 -0
- data/lib/feedjira/version.rb +1 -1
- data/spec/feedjira/configuration_spec.rb +25 -0
- data/spec/feedjira/date_time_utilities_spec.rb +6 -0
- data/spec/feedjira/feed_spec.rb +20 -2
- data/spec/feedjira/feed_utilities_spec.rb +18 -0
- data/spec/feedjira/parser/atom_feed_burner_spec.rb +32 -1
- data/spec/sample_feeds.rb +1 -0
- data/spec/sample_feeds/GiantRobotsSmashingIntoOtherGiantRobots.xml +682 -0
- metadata +49 -29
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ea1e60152e0f43269fa66fcf0c3ca99c90696dc7
|
4
|
+
data.tar.gz: cf08a79575466cbc8dde079ed0fbad6920f9fd3a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4a6ef2e92c41f2f483f4cdfbb63a3bca09789b725291a02f971b6fbc5fc088c6133928c9a5cbf5d61f9c5faf13eedc1d4246252b8db6b12892fe3ac3bce260a3
|
7
|
+
data.tar.gz: 5ec6ff54c6e43b11701974152e4066458090e5c0beddf16657d861dd35f10f99f67eff68b722467c69e4f1e67774cfe92ee1f42b69ea0dedbd876cf7ef914a4c
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
@@ -1,8 +1,15 @@
|
|
1
1
|
AllCops:
|
2
2
|
TargetRubyVersion: 2.1
|
3
3
|
|
4
|
-
Documentation:
|
5
|
-
Enabled:
|
4
|
+
Style/Documentation:
|
5
|
+
Enabled: true
|
6
|
+
Exclude:
|
7
|
+
- 'spec/**/*'
|
8
|
+
|
9
|
+
Style/DocumentationMethod:
|
10
|
+
Enabled: true
|
11
|
+
Exclude:
|
12
|
+
- 'spec/**/*'
|
6
13
|
|
7
14
|
Style/ClassAndModuleChildren:
|
8
15
|
Enabled: false
|
data/CHANGELOG.md
CHANGED
data/LICENSE
CHANGED
data/README.md
CHANGED
@@ -1,14 +1,217 @@
|
|
1
|
-
# Feedjira
|
1
|
+
# Feedjira
|
2
2
|
|
3
|
-
[travis-badge]
|
3
|
+
[![Build Status][travis-badge]][travis] [![Code Climate][code-climate-badge]][code-climate] [![Gitter][gitter-badge]][gitter]
|
4
|
+
|
5
|
+
[travis-badge]: https://travis-ci.org/feedjira/feedjira.svg?branch=master
|
4
6
|
[travis]: http://travis-ci.org/feedjira/feedjira
|
5
|
-
[code-climate-badge]: https://codeclimate.com/github/feedjira/feedjira.
|
7
|
+
[code-climate-badge]: https://codeclimate.com/github/feedjira/feedjira/badges/gpa.svg
|
6
8
|
[code-climate]: https://codeclimate.com/github/feedjira/feedjira
|
9
|
+
[gitter-badge]: https://badges.gitter.im/feedjira/feedjira.svg
|
10
|
+
[gitter]: https://gitter.im/feedjira/feedjira?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge
|
7
11
|
|
8
12
|
Feedjira (formerly Feedzirra) is a Ruby library designed to fetch and parse
|
9
|
-
feeds as quickly as possible.
|
10
|
-
|
13
|
+
feeds as quickly as possible.
|
14
|
+
|
15
|
+
## Getting Started
|
16
|
+
|
17
|
+
Feedjira is tested with Ruby version 1.9.3 and 2.x so like any Ruby gem, the
|
18
|
+
first step is to install the gem:
|
19
|
+
|
20
|
+
```
|
21
|
+
$ gem install feedjira
|
22
|
+
```
|
23
|
+
|
24
|
+
Or add it to your Gemfile:
|
25
|
+
|
26
|
+
```ruby
|
27
|
+
gem "feedjira"
|
28
|
+
```
|
29
|
+
|
30
|
+
## Fetching and Parsing
|
31
|
+
|
32
|
+
For many users, the `fetch_and_parse` method is what they use Feedjira for. This
|
33
|
+
method takes a url and returns a Parser object:
|
34
|
+
|
35
|
+
```ruby
|
36
|
+
url = "http://feedjira.com/blog/feed.xml"
|
37
|
+
feed = Feedjira::Feed.fetch_and_parse(url)
|
38
|
+
# => #<Feedjira::Parser::Atom...>
|
39
|
+
```
|
40
|
+
|
41
|
+
These feed objects have both the meta data for a feed and an `entries`
|
42
|
+
collection that contains all the entries that were found:
|
43
|
+
|
44
|
+
```ruby
|
45
|
+
feed.title
|
46
|
+
# => "Feedjira Blog"
|
47
|
+
feed.url
|
48
|
+
# => "http://feedjira.com/blog"
|
49
|
+
feed.entries # returns an array of Entry objects
|
50
|
+
# => [<Feedjira::Feed::Entry ...>, <Feedjira::Feed::Entry ...>, ...]
|
51
|
+
```
|
52
|
+
|
53
|
+
These entry objects contain the data parsed from the feed XML:
|
54
|
+
|
55
|
+
```ruby
|
56
|
+
entry = feed.entries.first
|
57
|
+
entry.title
|
58
|
+
# => "Announcing verison 1.0"
|
59
|
+
entry.url
|
60
|
+
# => "http://feedjira.com/blog/2014-02-12-announcing-version-10.html"
|
61
|
+
```
|
62
|
+
|
63
|
+
## Just Parsing
|
64
|
+
|
65
|
+
The parsing functionality of Feedjira has been exposed so that it can be used in
|
66
|
+
isolation:
|
67
|
+
|
68
|
+
```ruby
|
69
|
+
xml = Faraday.get(url).body
|
70
|
+
feed = Feedjira::Feed.parse xml
|
71
|
+
feed.entries.first.title
|
72
|
+
# => "Announcing verison 1.0"
|
73
|
+
```
|
74
|
+
|
75
|
+
## Adding a feed parsing class
|
76
|
+
|
77
|
+
When determining which parser to use for a given XML document, the following
|
78
|
+
list of parser classes is used:
|
79
|
+
|
80
|
+
* `Feedjira::Parser::RSSFeedBurner`
|
81
|
+
* `Feedjira::Parser::GoogleDocsAtom`
|
82
|
+
* `Feedjira::Parser::AtomFeedBurner`
|
83
|
+
* `Feedjira::Parser::Atom`
|
84
|
+
* `Feedjira::Parser::ITunesRSS`
|
85
|
+
* `Feedjira::Parser::RSS`
|
86
|
+
|
87
|
+
You can insert your own parser at the front of this stack by calling
|
88
|
+
`add_feed_class`, like this:
|
89
|
+
|
90
|
+
```ruby
|
91
|
+
Feedjira::Feed.add_feed_class(MyAwesomeParser)
|
92
|
+
```
|
93
|
+
|
94
|
+
Now when you `fetch_and_parse`, `MyAwesomeParser` will be the first one to get a
|
95
|
+
chance to parse the feed.
|
96
|
+
|
97
|
+
If you have the XML and just want to provide a parser class for one parse, you
|
98
|
+
can specify that using `parse_with`:
|
99
|
+
|
100
|
+
```ruby
|
101
|
+
Feedjira::Feed.parse_with(MyAwesomeParser, xml)
|
102
|
+
```
|
103
|
+
|
104
|
+
## Adding attributes to all feeds types / all entries types
|
105
|
+
|
106
|
+
```ruby
|
107
|
+
# Add the generator attribute to all feed types
|
108
|
+
Feedjira::Feed.add_common_feed_element("generator")
|
109
|
+
Feedjira::Feed.fetch_and_parse("http://www.pauldix.net/atom.xml").generator
|
110
|
+
# => "TypePad"
|
111
|
+
```
|
112
|
+
|
113
|
+
## Adding attributes to only one class
|
114
|
+
|
115
|
+
If you want to add attributes for only one class you simply have to declare them
|
116
|
+
in the class
|
117
|
+
|
118
|
+
```ruby
|
119
|
+
# Add some GeoRss information
|
120
|
+
class Feedjira::Parser::RSSEntry
|
121
|
+
element "georss:elevation", as: :elevation
|
122
|
+
end
|
123
|
+
|
124
|
+
# Fetch a feed containing GeoRss info and print them
|
125
|
+
url = "https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/significant_week.atom"
|
126
|
+
Feedjira::Feed.fetch_and_parse(url).entries.each do |entry|
|
127
|
+
puts "Elevation: #{entry.elevation}"
|
128
|
+
end
|
129
|
+
```
|
130
|
+
|
131
|
+
## Configuration
|
132
|
+
|
133
|
+
#### Stripping whitespace from XML
|
134
|
+
|
135
|
+
Feedjira can be configured to strip all whitespace but defaults to lstrip only:
|
136
|
+
|
137
|
+
```ruby
|
138
|
+
Feedjira.configure do |config|
|
139
|
+
config.strip_whitespace = true
|
140
|
+
end
|
141
|
+
```
|
142
|
+
|
143
|
+
#### Follow redirect limit
|
144
|
+
|
145
|
+
For fetching feeds, the follow redirect limit defaults to 3 but can be set:
|
146
|
+
|
147
|
+
```ruby
|
148
|
+
Feedjira.configure do |config|
|
149
|
+
config.follow_redirect_limit = 5
|
150
|
+
end
|
151
|
+
```
|
152
|
+
|
153
|
+
#### Request timeout
|
154
|
+
|
155
|
+
The request timeout defaults to 30 but can be set:
|
156
|
+
|
157
|
+
```ruby
|
158
|
+
Feedjira.configure do |config|
|
159
|
+
config.request_timeout = 45
|
160
|
+
end
|
161
|
+
```
|
162
|
+
|
163
|
+
#### User agent
|
164
|
+
|
165
|
+
The default user agent is "Feedjira #{Version}" but can be set:
|
166
|
+
|
167
|
+
```ruby
|
168
|
+
Feedjira.configure do |config|
|
169
|
+
config.user_agent = "Awesome Feed Reader"
|
170
|
+
end
|
171
|
+
```
|
172
|
+
|
173
|
+
## Testing
|
174
|
+
|
175
|
+
Feedjira uses [faraday][] to perform requests, so testing Feedjira is really
|
176
|
+
about [stubbing out faraday requests][stub].
|
177
|
+
|
178
|
+
[faraday]: https://github.com/lostisland/faraday
|
179
|
+
[stub]: https://github.com/lostisland/faraday#using-faraday-for-testing
|
180
|
+
|
181
|
+
## Projects that use Feedjira
|
182
|
+
|
183
|
+
Feedjira is used in some awesome projects around the web - from RSS readers to
|
184
|
+
add-ons and everything in between. Here are some of them:
|
185
|
+
|
186
|
+
* [Feedbin][]: Feedbin bills itself as a fast, simple RSS reader that delivers a
|
187
|
+
great reading experience. It's a paid RSS reader that integrates with mobile
|
188
|
+
apps and it even has a fully featured API!
|
189
|
+
|
190
|
+
* [Stringer][]: Stringer is a self-hosted, anti-social RSS reader. It's an
|
191
|
+
open-source project that's easy to deploy to any host, there's even a
|
192
|
+
one-click button to deploy on Heroku.
|
193
|
+
|
194
|
+
* [BlogFeeder][]: BlogFeeder is a paid Shopify App that makes it easy for you to
|
195
|
+
import any external blog into your Shopify store. It helps improve your
|
196
|
+
store's SEO and keeps your blogs in sync, plus a lot more.
|
197
|
+
|
198
|
+
* [Feedbunch][]: Feedbunch is an open source feed reader built to fill the hole
|
199
|
+
left by Google Reader. It aims to support all features of Google Reader and
|
200
|
+
actually improve on others.
|
201
|
+
|
202
|
+
* [The Old Reader][old]: The Old Reader advertises as the ultimate social RSS
|
203
|
+
reader. It's free to start and also has a paid premium version. There's an API
|
204
|
+
and it integrates with many different mobile apps.
|
205
|
+
|
206
|
+
* [Solve for All][solve]: Solve for All combines search engine and feed parsing
|
207
|
+
while protecting your privacy. It's even extendable by the community!
|
11
208
|
|
12
|
-
[
|
209
|
+
[Feedbin]: https://feedbin.com/
|
210
|
+
[Stringer]: https://github.com/swanson/stringer
|
211
|
+
[BlogFeeder]: https://apps.shopify.com/blogfeeder
|
212
|
+
[Feedbunch]: https://github.com/amatriain/feedbunch
|
213
|
+
[old]: http://theoldreader.com/
|
214
|
+
[solve]: https://solveforall.com/
|
13
215
|
|
14
|
-
|
216
|
+
Note: to get your project on this list, simply [send an email](mailto:feedjira@gmail.com)
|
217
|
+
with your project's details.
|
data/Rakefile
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'rspec/core/rake_task'
|
2
2
|
require 'rubocop/rake_task'
|
3
|
+
require 'yard'
|
3
4
|
|
4
5
|
RSpec::Core::RakeTask.new(:spec) do |t|
|
5
6
|
t.verbose = false
|
@@ -9,4 +10,8 @@ RuboCop::RakeTask.new(:rubocop) do |t|
|
|
9
10
|
t.options = ['--display-cop-names']
|
10
11
|
end
|
11
12
|
|
13
|
+
YARD::Rake::YardocTask.new do |t|
|
14
|
+
t.files = ['lib/**/*.rb', '-', 'LICENSE']
|
15
|
+
end
|
16
|
+
|
12
17
|
task default: [:spec, :rubocop]
|
data/feedjira.gemspec
CHANGED
@@ -25,6 +25,7 @@ Gem::Specification.new do |s|
|
|
25
25
|
s.add_development_dependency 'danger'
|
26
26
|
s.add_development_dependency 'danger-commit_lint'
|
27
27
|
s.add_development_dependency 'rspec'
|
28
|
-
s.add_development_dependency 'rubocop'
|
28
|
+
s.add_development_dependency 'rubocop', '0.46'
|
29
29
|
s.add_development_dependency 'vcr'
|
30
|
+
s.add_development_dependency 'yard', '~> 0.9'
|
30
31
|
end
|
data/lib/feedjira.rb
CHANGED
@@ -3,10 +3,13 @@ require 'faraday'
|
|
3
3
|
require 'faraday_middleware'
|
4
4
|
require 'sax-machine'
|
5
5
|
require 'loofah'
|
6
|
+
require 'logger'
|
6
7
|
|
7
8
|
require 'feedjira/core_ext'
|
8
|
-
require 'feedjira/
|
9
|
+
require 'feedjira/configuration'
|
10
|
+
require 'feedjira/date_time_utilities/date_time_epoch_parser'
|
9
11
|
require 'feedjira/date_time_utilities/date_time_language_parser'
|
12
|
+
require 'feedjira/date_time_utilities/date_time_pattern_parser'
|
10
13
|
require 'feedjira/date_time_utilities'
|
11
14
|
require 'feedjira/date_time_utilities'
|
12
15
|
require 'feedjira/feed_entry_utilities'
|
@@ -35,7 +38,10 @@ require 'feedjira/parser/google_docs_atom'
|
|
35
38
|
require 'feedjira/parser/atom_youtube_entry'
|
36
39
|
require 'feedjira/parser/atom_youtube'
|
37
40
|
|
41
|
+
# Feedjira
|
38
42
|
module Feedjira
|
39
43
|
class NoParserAvailable < StandardError; end
|
40
44
|
class FetchFailure < StandardError; end
|
45
|
+
|
46
|
+
extend Configuration
|
41
47
|
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
# Feedjira::Configuration
|
2
|
+
module Feedjira
|
3
|
+
# Provides global configuration options for Feedjira
|
4
|
+
#
|
5
|
+
# @example Set configuration options using a block
|
6
|
+
# Feedjira.configure do |config|
|
7
|
+
# config.strip_whitespace = true
|
8
|
+
# end
|
9
|
+
module Configuration
|
10
|
+
attr_accessor(
|
11
|
+
:follow_redirect_limit,
|
12
|
+
:logger,
|
13
|
+
:parsers,
|
14
|
+
:request_timeout,
|
15
|
+
:strip_whitespace,
|
16
|
+
:user_agent
|
17
|
+
)
|
18
|
+
|
19
|
+
# Modify Feedjira's current configuration
|
20
|
+
#
|
21
|
+
# @yieldparam [Feedjria] config current Feedjira config
|
22
|
+
# @example
|
23
|
+
# Feedjira.configure do |config|
|
24
|
+
# config.strip_whitespace = true
|
25
|
+
# end
|
26
|
+
def configure
|
27
|
+
yield self
|
28
|
+
end
|
29
|
+
|
30
|
+
# Reset Feedjira's configuration to defaults
|
31
|
+
#
|
32
|
+
# @example
|
33
|
+
# Feedjira.reset_configuration!
|
34
|
+
def reset_configuration!
|
35
|
+
set_default_configuration
|
36
|
+
end
|
37
|
+
|
38
|
+
# @private
|
39
|
+
def self.extended(base)
|
40
|
+
base.set_default_configuration
|
41
|
+
end
|
42
|
+
|
43
|
+
# @private
|
44
|
+
def set_default_configuration
|
45
|
+
self.follow_redirect_limit = 3
|
46
|
+
self.logger = default_logger
|
47
|
+
self.parsers = default_parsers
|
48
|
+
self.request_timeout = 30
|
49
|
+
self.strip_whitespace = false
|
50
|
+
self.user_agent = "Feedjira #{Feedjira::VERSION}"
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
|
55
|
+
# @private
|
56
|
+
def default_logger
|
57
|
+
Logger.new(STDOUT).tap do |logger|
|
58
|
+
logger.progname = 'Feedjira'
|
59
|
+
logger.level = Logger::WARN
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# @private
|
64
|
+
def default_parsers
|
65
|
+
[
|
66
|
+
Feedjira::Parser::RSSFeedBurner,
|
67
|
+
Feedjira::Parser::GoogleDocsAtom,
|
68
|
+
Feedjira::Parser::AtomYoutube,
|
69
|
+
Feedjira::Parser::AtomFeedBurner,
|
70
|
+
Feedjira::Parser::Atom,
|
71
|
+
Feedjira::Parser::ITunesRSS,
|
72
|
+
Feedjira::Parser::RSS
|
73
|
+
]
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'time'
|
2
2
|
require 'date'
|
3
3
|
|
4
|
+
# rubocop:disable Style/DocumentationMethod
|
4
5
|
class Time
|
5
6
|
# Parse a time string and convert it to UTC without raising errors.
|
6
7
|
# Parses a flattened 14-digit time (YYYYmmddHHMMMSS) as UTC.
|
@@ -10,6 +11,7 @@ class Time
|
|
10
11
|
#
|
11
12
|
# === Returns
|
12
13
|
# A Time instance in UTC or nil if there were errors while parsing.
|
14
|
+
# rubocop:disable Metrics/MethodLength
|
13
15
|
def self.parse_safely(dt)
|
14
16
|
if dt.is_a?(Time)
|
15
17
|
dt.utc
|
@@ -18,7 +20,9 @@ class Time
|
|
18
20
|
elsif dt.respond_to? :to_s
|
19
21
|
parse_string_safely dt.to_s
|
20
22
|
end
|
21
|
-
rescue StandardError
|
23
|
+
rescue StandardError => e
|
24
|
+
Feedjira.logger.debug { "Failed to parse time #{dt}" }
|
25
|
+
Feedjira.logger.debug(e)
|
22
26
|
nil
|
23
27
|
end
|
24
28
|
|