feedjira 2.1.0 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rubocop.yml +9 -2
- data/CHANGELOG.md +4 -0
- data/LICENSE +1 -1
- data/README.md +210 -7
- data/Rakefile +5 -0
- data/feedjira.gemspec +2 -1
- data/lib/feedjira.rb +7 -1
- data/lib/feedjira/configuration.rb +76 -0
- data/lib/feedjira/core_ext/date.rb +1 -0
- data/lib/feedjira/core_ext/string.rb +1 -0
- data/lib/feedjira/core_ext/time.rb +5 -1
- data/lib/feedjira/date_time_utilities.rb +11 -3
- data/lib/feedjira/date_time_utilities/date_time_epoch_parser.rb +13 -0
- data/lib/feedjira/date_time_utilities/date_time_language_parser.rb +2 -0
- data/lib/feedjira/date_time_utilities/date_time_pattern_parser.rb +6 -1
- data/lib/feedjira/feed.rb +87 -69
- data/lib/feedjira/feed_entry_utilities.rb +5 -2
- data/lib/feedjira/feed_utilities.rb +11 -1
- data/lib/feedjira/parser.rb +1 -1
- data/lib/feedjira/parser/atom.rb +1 -0
- data/lib/feedjira/parser/atom_entry.rb +1 -0
- data/lib/feedjira/parser/atom_feed_burner.rb +19 -2
- data/lib/feedjira/parser/atom_feed_burner_entry.rb +1 -0
- data/lib/feedjira/parser/atom_youtube.rb +1 -0
- data/lib/feedjira/parser/atom_youtube_entry.rb +1 -0
- data/lib/feedjira/parser/google_docs_atom.rb +2 -1
- data/lib/feedjira/parser/google_docs_atom_entry.rb +2 -0
- data/lib/feedjira/parser/itunes_rss.rb +1 -0
- data/lib/feedjira/parser/itunes_rss_category.rb +1 -0
- data/lib/feedjira/parser/itunes_rss_owner.rb +1 -0
- data/lib/feedjira/parser/podlove_chapter.rb +2 -0
- data/lib/feedjira/parser/rss.rb +1 -0
- data/lib/feedjira/parser/rss_feed_burner.rb +1 -0
- data/lib/feedjira/parser/rss_feed_burner_entry.rb +1 -0
- data/lib/feedjira/preprocessor.rb +2 -0
- data/lib/feedjira/version.rb +1 -1
- data/spec/feedjira/configuration_spec.rb +25 -0
- data/spec/feedjira/date_time_utilities_spec.rb +6 -0
- data/spec/feedjira/feed_spec.rb +20 -2
- data/spec/feedjira/feed_utilities_spec.rb +18 -0
- data/spec/feedjira/parser/atom_feed_burner_spec.rb +32 -1
- data/spec/sample_feeds.rb +1 -0
- data/spec/sample_feeds/GiantRobotsSmashingIntoOtherGiantRobots.xml +682 -0
- metadata +49 -29
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ea1e60152e0f43269fa66fcf0c3ca99c90696dc7
|
4
|
+
data.tar.gz: cf08a79575466cbc8dde079ed0fbad6920f9fd3a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4a6ef2e92c41f2f483f4cdfbb63a3bca09789b725291a02f971b6fbc5fc088c6133928c9a5cbf5d61f9c5faf13eedc1d4246252b8db6b12892fe3ac3bce260a3
|
7
|
+
data.tar.gz: 5ec6ff54c6e43b11701974152e4066458090e5c0beddf16657d861dd35f10f99f67eff68b722467c69e4f1e67774cfe92ee1f42b69ea0dedbd876cf7ef914a4c
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
@@ -1,8 +1,15 @@
|
|
1
1
|
AllCops:
|
2
2
|
TargetRubyVersion: 2.1
|
3
3
|
|
4
|
-
Documentation:
|
5
|
-
Enabled:
|
4
|
+
Style/Documentation:
|
5
|
+
Enabled: true
|
6
|
+
Exclude:
|
7
|
+
- 'spec/**/*'
|
8
|
+
|
9
|
+
Style/DocumentationMethod:
|
10
|
+
Enabled: true
|
11
|
+
Exclude:
|
12
|
+
- 'spec/**/*'
|
6
13
|
|
7
14
|
Style/ClassAndModuleChildren:
|
8
15
|
Enabled: false
|
data/CHANGELOG.md
CHANGED
data/LICENSE
CHANGED
data/README.md
CHANGED
@@ -1,14 +1,217 @@
|
|
1
|
-
# Feedjira
|
1
|
+
# Feedjira
|
2
2
|
|
3
|
-
[travis-badge]
|
3
|
+
[![Build Status][travis-badge]][travis] [![Code Climate][code-climate-badge]][code-climate] [![Gitter][gitter-badge]][gitter]
|
4
|
+
|
5
|
+
[travis-badge]: https://travis-ci.org/feedjira/feedjira.svg?branch=master
|
4
6
|
[travis]: http://travis-ci.org/feedjira/feedjira
|
5
|
-
[code-climate-badge]: https://codeclimate.com/github/feedjira/feedjira.
|
7
|
+
[code-climate-badge]: https://codeclimate.com/github/feedjira/feedjira/badges/gpa.svg
|
6
8
|
[code-climate]: https://codeclimate.com/github/feedjira/feedjira
|
9
|
+
[gitter-badge]: https://badges.gitter.im/feedjira/feedjira.svg
|
10
|
+
[gitter]: https://gitter.im/feedjira/feedjira?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge
|
7
11
|
|
8
12
|
Feedjira (formerly Feedzirra) is a Ruby library designed to fetch and parse
|
9
|
-
feeds as quickly as possible.
|
10
|
-
|
13
|
+
feeds as quickly as possible.
|
14
|
+
|
15
|
+
## Getting Started
|
16
|
+
|
17
|
+
Feedjira is tested with Ruby version 1.9.3 and 2.x so like any Ruby gem, the
|
18
|
+
first step is to install the gem:
|
19
|
+
|
20
|
+
```
|
21
|
+
$ gem install feedjira
|
22
|
+
```
|
23
|
+
|
24
|
+
Or add it to your Gemfile:
|
25
|
+
|
26
|
+
```ruby
|
27
|
+
gem "feedjira"
|
28
|
+
```
|
29
|
+
|
30
|
+
## Fetching and Parsing
|
31
|
+
|
32
|
+
For many users, the `fetch_and_parse` method is what they use Feedjira for. This
|
33
|
+
method takes a url and returns a Parser object:
|
34
|
+
|
35
|
+
```ruby
|
36
|
+
url = "http://feedjira.com/blog/feed.xml"
|
37
|
+
feed = Feedjira::Feed.fetch_and_parse(url)
|
38
|
+
# => #<Feedjira::Parser::Atom...>
|
39
|
+
```
|
40
|
+
|
41
|
+
These feed objects have both the meta data for a feed and an `entries`
|
42
|
+
collection that contains all the entries that were found:
|
43
|
+
|
44
|
+
```ruby
|
45
|
+
feed.title
|
46
|
+
# => "Feedjira Blog"
|
47
|
+
feed.url
|
48
|
+
# => "http://feedjira.com/blog"
|
49
|
+
feed.entries # returns an array of Entry objects
|
50
|
+
# => [<Feedjira::Feed::Entry ...>, <Feedjira::Feed::Entry ...>, ...]
|
51
|
+
```
|
52
|
+
|
53
|
+
These entry objects contain the data parsed from the feed XML:
|
54
|
+
|
55
|
+
```ruby
|
56
|
+
entry = feed.entries.first
|
57
|
+
entry.title
|
58
|
+
# => "Announcing verison 1.0"
|
59
|
+
entry.url
|
60
|
+
# => "http://feedjira.com/blog/2014-02-12-announcing-version-10.html"
|
61
|
+
```
|
62
|
+
|
63
|
+
## Just Parsing
|
64
|
+
|
65
|
+
The parsing functionality of Feedjira has been exposed so that it can be used in
|
66
|
+
isolation:
|
67
|
+
|
68
|
+
```ruby
|
69
|
+
xml = Faraday.get(url).body
|
70
|
+
feed = Feedjira::Feed.parse xml
|
71
|
+
feed.entries.first.title
|
72
|
+
# => "Announcing verison 1.0"
|
73
|
+
```
|
74
|
+
|
75
|
+
## Adding a feed parsing class
|
76
|
+
|
77
|
+
When determining which parser to use for a given XML document, the following
|
78
|
+
list of parser classes is used:
|
79
|
+
|
80
|
+
* `Feedjira::Parser::RSSFeedBurner`
|
81
|
+
* `Feedjira::Parser::GoogleDocsAtom`
|
82
|
+
* `Feedjira::Parser::AtomFeedBurner`
|
83
|
+
* `Feedjira::Parser::Atom`
|
84
|
+
* `Feedjira::Parser::ITunesRSS`
|
85
|
+
* `Feedjira::Parser::RSS`
|
86
|
+
|
87
|
+
You can insert your own parser at the front of this stack by calling
|
88
|
+
`add_feed_class`, like this:
|
89
|
+
|
90
|
+
```ruby
|
91
|
+
Feedjira::Feed.add_feed_class(MyAwesomeParser)
|
92
|
+
```
|
93
|
+
|
94
|
+
Now when you `fetch_and_parse`, `MyAwesomeParser` will be the first one to get a
|
95
|
+
chance to parse the feed.
|
96
|
+
|
97
|
+
If you have the XML and just want to provide a parser class for one parse, you
|
98
|
+
can specify that using `parse_with`:
|
99
|
+
|
100
|
+
```ruby
|
101
|
+
Feedjira::Feed.parse_with(MyAwesomeParser, xml)
|
102
|
+
```
|
103
|
+
|
104
|
+
## Adding attributes to all feeds types / all entries types
|
105
|
+
|
106
|
+
```ruby
|
107
|
+
# Add the generator attribute to all feed types
|
108
|
+
Feedjira::Feed.add_common_feed_element("generator")
|
109
|
+
Feedjira::Feed.fetch_and_parse("http://www.pauldix.net/atom.xml").generator
|
110
|
+
# => "TypePad"
|
111
|
+
```
|
112
|
+
|
113
|
+
## Adding attributes to only one class
|
114
|
+
|
115
|
+
If you want to add attributes for only one class you simply have to declare them
|
116
|
+
in the class
|
117
|
+
|
118
|
+
```ruby
|
119
|
+
# Add some GeoRss information
|
120
|
+
class Feedjira::Parser::RSSEntry
|
121
|
+
element "georss:elevation", as: :elevation
|
122
|
+
end
|
123
|
+
|
124
|
+
# Fetch a feed containing GeoRss info and print them
|
125
|
+
url = "https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/significant_week.atom"
|
126
|
+
Feedjira::Feed.fetch_and_parse(url).entries.each do |entry|
|
127
|
+
puts "Elevation: #{entry.elevation}"
|
128
|
+
end
|
129
|
+
```
|
130
|
+
|
131
|
+
## Configuration
|
132
|
+
|
133
|
+
#### Stripping whitespace from XML
|
134
|
+
|
135
|
+
Feedjira can be configured to strip all whitespace but defaults to lstrip only:
|
136
|
+
|
137
|
+
```ruby
|
138
|
+
Feedjira.configure do |config|
|
139
|
+
config.strip_whitespace = true
|
140
|
+
end
|
141
|
+
```
|
142
|
+
|
143
|
+
#### Follow redirect limit
|
144
|
+
|
145
|
+
For fetching feeds, the follow redirect limit defaults to 3 but can be set:
|
146
|
+
|
147
|
+
```ruby
|
148
|
+
Feedjira.configure do |config|
|
149
|
+
config.follow_redirect_limit = 5
|
150
|
+
end
|
151
|
+
```
|
152
|
+
|
153
|
+
#### Request timeout
|
154
|
+
|
155
|
+
The request timeout defaults to 30 but can be set:
|
156
|
+
|
157
|
+
```ruby
|
158
|
+
Feedjira.configure do |config|
|
159
|
+
config.request_timeout = 45
|
160
|
+
end
|
161
|
+
```
|
162
|
+
|
163
|
+
#### User agent
|
164
|
+
|
165
|
+
The default user agent is "Feedjira #{Version}" but can be set:
|
166
|
+
|
167
|
+
```ruby
|
168
|
+
Feedjira.configure do |config|
|
169
|
+
config.user_agent = "Awesome Feed Reader"
|
170
|
+
end
|
171
|
+
```
|
172
|
+
|
173
|
+
## Testing
|
174
|
+
|
175
|
+
Feedjira uses [faraday][] to perform requests, so testing Feedjira is really
|
176
|
+
about [stubbing out faraday requests][stub].
|
177
|
+
|
178
|
+
[faraday]: https://github.com/lostisland/faraday
|
179
|
+
[stub]: https://github.com/lostisland/faraday#using-faraday-for-testing
|
180
|
+
|
181
|
+
## Projects that use Feedjira
|
182
|
+
|
183
|
+
Feedjira is used in some awesome projects around the web - from RSS readers to
|
184
|
+
add-ons and everything in between. Here are some of them:
|
185
|
+
|
186
|
+
* [Feedbin][]: Feedbin bills itself as a fast, simple RSS reader that delivers a
|
187
|
+
great reading experience. It's a paid RSS reader that integrates with mobile
|
188
|
+
apps and it even has a fully featured API!
|
189
|
+
|
190
|
+
* [Stringer][]: Stringer is a self-hosted, anti-social RSS reader. It's an
|
191
|
+
open-source project that's easy to deploy to any host, there's even a
|
192
|
+
one-click button to deploy on Heroku.
|
193
|
+
|
194
|
+
* [BlogFeeder][]: BlogFeeder is a paid Shopify App that makes it easy for you to
|
195
|
+
import any external blog into your Shopify store. It helps improve your
|
196
|
+
store's SEO and keeps your blogs in sync, plus a lot more.
|
197
|
+
|
198
|
+
* [Feedbunch][]: Feedbunch is an open source feed reader built to fill the hole
|
199
|
+
left by Google Reader. It aims to support all features of Google Reader and
|
200
|
+
actually improve on others.
|
201
|
+
|
202
|
+
* [The Old Reader][old]: The Old Reader advertises as the ultimate social RSS
|
203
|
+
reader. It's free to start and also has a paid premium version. There's an API
|
204
|
+
and it integrates with many different mobile apps.
|
205
|
+
|
206
|
+
* [Solve for All][solve]: Solve for All combines search engine and feed parsing
|
207
|
+
while protecting your privacy. It's even extendable by the community!
|
11
208
|
|
12
|
-
[
|
209
|
+
[Feedbin]: https://feedbin.com/
|
210
|
+
[Stringer]: https://github.com/swanson/stringer
|
211
|
+
[BlogFeeder]: https://apps.shopify.com/blogfeeder
|
212
|
+
[Feedbunch]: https://github.com/amatriain/feedbunch
|
213
|
+
[old]: http://theoldreader.com/
|
214
|
+
[solve]: https://solveforall.com/
|
13
215
|
|
14
|
-
|
216
|
+
Note: to get your project on this list, simply [send an email](mailto:feedjira@gmail.com)
|
217
|
+
with your project's details.
|
data/Rakefile
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'rspec/core/rake_task'
|
2
2
|
require 'rubocop/rake_task'
|
3
|
+
require 'yard'
|
3
4
|
|
4
5
|
RSpec::Core::RakeTask.new(:spec) do |t|
|
5
6
|
t.verbose = false
|
@@ -9,4 +10,8 @@ RuboCop::RakeTask.new(:rubocop) do |t|
|
|
9
10
|
t.options = ['--display-cop-names']
|
10
11
|
end
|
11
12
|
|
13
|
+
YARD::Rake::YardocTask.new do |t|
|
14
|
+
t.files = ['lib/**/*.rb', '-', 'LICENSE']
|
15
|
+
end
|
16
|
+
|
12
17
|
task default: [:spec, :rubocop]
|
data/feedjira.gemspec
CHANGED
@@ -25,6 +25,7 @@ Gem::Specification.new do |s|
|
|
25
25
|
s.add_development_dependency 'danger'
|
26
26
|
s.add_development_dependency 'danger-commit_lint'
|
27
27
|
s.add_development_dependency 'rspec'
|
28
|
-
s.add_development_dependency 'rubocop'
|
28
|
+
s.add_development_dependency 'rubocop', '0.46'
|
29
29
|
s.add_development_dependency 'vcr'
|
30
|
+
s.add_development_dependency 'yard', '~> 0.9'
|
30
31
|
end
|
data/lib/feedjira.rb
CHANGED
@@ -3,10 +3,13 @@ require 'faraday'
|
|
3
3
|
require 'faraday_middleware'
|
4
4
|
require 'sax-machine'
|
5
5
|
require 'loofah'
|
6
|
+
require 'logger'
|
6
7
|
|
7
8
|
require 'feedjira/core_ext'
|
8
|
-
require 'feedjira/
|
9
|
+
require 'feedjira/configuration'
|
10
|
+
require 'feedjira/date_time_utilities/date_time_epoch_parser'
|
9
11
|
require 'feedjira/date_time_utilities/date_time_language_parser'
|
12
|
+
require 'feedjira/date_time_utilities/date_time_pattern_parser'
|
10
13
|
require 'feedjira/date_time_utilities'
|
11
14
|
require 'feedjira/date_time_utilities'
|
12
15
|
require 'feedjira/feed_entry_utilities'
|
@@ -35,7 +38,10 @@ require 'feedjira/parser/google_docs_atom'
|
|
35
38
|
require 'feedjira/parser/atom_youtube_entry'
|
36
39
|
require 'feedjira/parser/atom_youtube'
|
37
40
|
|
41
|
+
# Feedjira
|
38
42
|
module Feedjira
|
39
43
|
class NoParserAvailable < StandardError; end
|
40
44
|
class FetchFailure < StandardError; end
|
45
|
+
|
46
|
+
extend Configuration
|
41
47
|
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
# Feedjira::Configuration
|
2
|
+
module Feedjira
|
3
|
+
# Provides global configuration options for Feedjira
|
4
|
+
#
|
5
|
+
# @example Set configuration options using a block
|
6
|
+
# Feedjira.configure do |config|
|
7
|
+
# config.strip_whitespace = true
|
8
|
+
# end
|
9
|
+
module Configuration
|
10
|
+
attr_accessor(
|
11
|
+
:follow_redirect_limit,
|
12
|
+
:logger,
|
13
|
+
:parsers,
|
14
|
+
:request_timeout,
|
15
|
+
:strip_whitespace,
|
16
|
+
:user_agent
|
17
|
+
)
|
18
|
+
|
19
|
+
# Modify Feedjira's current configuration
|
20
|
+
#
|
21
|
+
# @yieldparam [Feedjria] config current Feedjira config
|
22
|
+
# @example
|
23
|
+
# Feedjira.configure do |config|
|
24
|
+
# config.strip_whitespace = true
|
25
|
+
# end
|
26
|
+
def configure
|
27
|
+
yield self
|
28
|
+
end
|
29
|
+
|
30
|
+
# Reset Feedjira's configuration to defaults
|
31
|
+
#
|
32
|
+
# @example
|
33
|
+
# Feedjira.reset_configuration!
|
34
|
+
def reset_configuration!
|
35
|
+
set_default_configuration
|
36
|
+
end
|
37
|
+
|
38
|
+
# @private
|
39
|
+
def self.extended(base)
|
40
|
+
base.set_default_configuration
|
41
|
+
end
|
42
|
+
|
43
|
+
# @private
|
44
|
+
def set_default_configuration
|
45
|
+
self.follow_redirect_limit = 3
|
46
|
+
self.logger = default_logger
|
47
|
+
self.parsers = default_parsers
|
48
|
+
self.request_timeout = 30
|
49
|
+
self.strip_whitespace = false
|
50
|
+
self.user_agent = "Feedjira #{Feedjira::VERSION}"
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
|
55
|
+
# @private
|
56
|
+
def default_logger
|
57
|
+
Logger.new(STDOUT).tap do |logger|
|
58
|
+
logger.progname = 'Feedjira'
|
59
|
+
logger.level = Logger::WARN
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# @private
|
64
|
+
def default_parsers
|
65
|
+
[
|
66
|
+
Feedjira::Parser::RSSFeedBurner,
|
67
|
+
Feedjira::Parser::GoogleDocsAtom,
|
68
|
+
Feedjira::Parser::AtomYoutube,
|
69
|
+
Feedjira::Parser::AtomFeedBurner,
|
70
|
+
Feedjira::Parser::Atom,
|
71
|
+
Feedjira::Parser::ITunesRSS,
|
72
|
+
Feedjira::Parser::RSS
|
73
|
+
]
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'time'
|
2
2
|
require 'date'
|
3
3
|
|
4
|
+
# rubocop:disable Style/DocumentationMethod
|
4
5
|
class Time
|
5
6
|
# Parse a time string and convert it to UTC without raising errors.
|
6
7
|
# Parses a flattened 14-digit time (YYYYmmddHHMMMSS) as UTC.
|
@@ -10,6 +11,7 @@ class Time
|
|
10
11
|
#
|
11
12
|
# === Returns
|
12
13
|
# A Time instance in UTC or nil if there were errors while parsing.
|
14
|
+
# rubocop:disable Metrics/MethodLength
|
13
15
|
def self.parse_safely(dt)
|
14
16
|
if dt.is_a?(Time)
|
15
17
|
dt.utc
|
@@ -18,7 +20,9 @@ class Time
|
|
18
20
|
elsif dt.respond_to? :to_s
|
19
21
|
parse_string_safely dt.to_s
|
20
22
|
end
|
21
|
-
rescue StandardError
|
23
|
+
rescue StandardError => e
|
24
|
+
Feedjira.logger.debug { "Failed to parse time #{dt}" }
|
25
|
+
Feedjira.logger.debug(e)
|
22
26
|
nil
|
23
27
|
end
|
24
28
|
|