feedjira 2.0.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +2 -0
- data/.rubocop.yml +15 -0
- data/.travis.yml +31 -12
- data/CHANGELOG.md +34 -1
- data/Dangerfile +1 -0
- data/Gemfile +2 -1
- data/LICENSE +1 -1
- data/README.md +210 -7
- data/Rakefile +11 -1
- data/feedjira.gemspec +17 -14
- data/fixtures/vcr_cassettes/fetch_failure.yml +62 -0
- data/fixtures/vcr_cassettes/parse_error.yml +222 -0
- data/fixtures/vcr_cassettes/success.yml +281 -0
- data/lib/feedjira/configuration.rb +76 -0
- data/lib/feedjira/core_ext/date.rb +3 -1
- data/lib/feedjira/core_ext/string.rb +2 -1
- data/lib/feedjira/core_ext/time.rb +24 -17
- data/lib/feedjira/core_ext.rb +3 -3
- data/lib/feedjira/date_time_utilities/date_time_epoch_parser.rb +13 -0
- data/lib/feedjira/date_time_utilities/date_time_language_parser.rb +24 -0
- data/lib/feedjira/date_time_utilities/date_time_pattern_parser.rb +34 -0
- data/lib/feedjira/date_time_utilities.rb +32 -0
- data/lib/feedjira/feed.rb +89 -62
- data/lib/feedjira/feed_entry_utilities.rb +20 -19
- data/lib/feedjira/feed_utilities.rb +37 -22
- data/lib/feedjira/parser/atom.rb +10 -8
- data/lib/feedjira/parser/atom_entry.rb +11 -13
- data/lib/feedjira/parser/atom_feed_burner.rb +27 -10
- data/lib/feedjira/parser/atom_feed_burner_entry.rb +12 -14
- data/lib/feedjira/parser/atom_youtube.rb +21 -0
- data/lib/feedjira/parser/atom_youtube_entry.rb +30 -0
- data/lib/feedjira/parser/google_docs_atom.rb +8 -7
- data/lib/feedjira/parser/google_docs_atom_entry.rb +13 -11
- data/lib/feedjira/parser/itunes_rss.rb +41 -22
- data/lib/feedjira/parser/itunes_rss_category.rb +39 -0
- data/lib/feedjira/parser/itunes_rss_item.rb +32 -20
- data/lib/feedjira/parser/itunes_rss_owner.rb +4 -4
- data/lib/feedjira/parser/podlove_chapter.rb +22 -0
- data/lib/feedjira/parser/rss.rb +11 -8
- data/lib/feedjira/parser/rss_entry.rb +17 -21
- data/lib/feedjira/parser/rss_feed_burner.rb +5 -6
- data/lib/feedjira/parser/rss_feed_burner_entry.rb +24 -28
- data/lib/feedjira/parser/rss_image.rb +15 -0
- data/lib/feedjira/parser.rb +1 -1
- data/lib/feedjira/preprocessor.rb +4 -2
- data/lib/feedjira/version.rb +1 -1
- data/lib/feedjira.rb +15 -0
- data/spec/feedjira/configuration_spec.rb +25 -0
- data/spec/feedjira/date_time_utilities_spec.rb +47 -0
- data/spec/feedjira/feed_entry_utilities_spec.rb +23 -19
- data/spec/feedjira/feed_spec.rb +140 -75
- data/spec/feedjira/feed_utilities_spec.rb +83 -63
- data/spec/feedjira/parser/atom_entry_spec.rb +54 -34
- data/spec/feedjira/parser/atom_feed_burner_entry_spec.rb +27 -20
- data/spec/feedjira/parser/atom_feed_burner_spec.rb +87 -30
- data/spec/feedjira/parser/atom_spec.rb +50 -48
- data/spec/feedjira/parser/atom_youtube_entry_spec.rb +86 -0
- data/spec/feedjira/parser/atom_youtube_spec.rb +43 -0
- data/spec/feedjira/parser/google_docs_atom_entry_spec.rb +5 -4
- data/spec/feedjira/parser/google_docs_atom_spec.rb +6 -6
- data/spec/feedjira/parser/itunes_rss_item_spec.rb +49 -29
- data/spec/feedjira/parser/itunes_rss_owner_spec.rb +10 -9
- data/spec/feedjira/parser/itunes_rss_spec.rb +87 -30
- data/spec/feedjira/parser/podlove_chapter_spec.rb +37 -0
- data/spec/feedjira/parser/rss_entry_spec.rb +50 -33
- data/spec/feedjira/parser/rss_feed_burner_entry_spec.rb +55 -33
- data/spec/feedjira/parser/rss_feed_burner_spec.rb +31 -26
- data/spec/feedjira/parser/rss_spec.rb +56 -24
- data/spec/feedjira/preprocessor_spec.rb +11 -3
- data/spec/sample_feeds/AmazonWebServicesBlog.xml +797 -797
- data/spec/sample_feeds/AtomEscapedHTMLInPreTag.xml +13 -0
- data/spec/sample_feeds/CRE.xml +5849 -0
- data/spec/sample_feeds/FeedBurnerXHTML.xml +400 -400
- data/spec/sample_feeds/GiantRobotsSmashingIntoOtherGiantRobots.xml +682 -0
- data/spec/sample_feeds/ITunesWithSingleQuotedAttributes.xml +67 -0
- data/spec/sample_feeds/InvalidDateFormat.xml +20 -0
- data/spec/sample_feeds/PaulDixExplainsNothing.xml +175 -175
- data/spec/sample_feeds/PaulDixExplainsNothingAlternate.xml +175 -175
- data/spec/sample_feeds/PaulDixExplainsNothingFirstEntryContent.xml +16 -16
- data/spec/sample_feeds/PaulDixExplainsNothingWFW.xml +174 -174
- data/spec/sample_feeds/TenderLovemaking.xml +12 -2
- data/spec/sample_feeds/TrotterCashionHome.xml +611 -611
- data/spec/sample_feeds/TypePadNews.xml +368 -368
- data/spec/sample_feeds/itunes.xml +31 -2
- data/spec/sample_feeds/pet_atom.xml +229 -229
- data/spec/sample_feeds/youtube_atom.xml +395 -0
- data/spec/sample_feeds.rb +31 -21
- data/spec/spec_helper.rb +6 -0
- metadata +132 -25
data/lib/feedjira/parser.rb
CHANGED
@@ -1 +1 @@
|
|
1
|
-
module Feedjira::Parser; end
|
1
|
+
module Feedjira::Parser; end # rubocop:disable Style/Documentation
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# rubocop:disable Style/Documentation
|
2
|
+
# rubocop:disable Style/DocumentationMethod
|
1
3
|
module Feedjira
|
2
4
|
class Preprocessor
|
3
5
|
def initialize(xml)
|
@@ -18,11 +20,11 @@ module Feedjira
|
|
18
20
|
end
|
19
21
|
|
20
22
|
def content_nodes
|
21
|
-
doc.search 'entry > content[type="xhtml"], entry > summary[type="xhtml"], entry > title[type="xhtml"]'
|
23
|
+
doc.search 'entry > content[type="xhtml"], entry > summary[type="xhtml"], entry > title[type="xhtml"]' # rubocop:disable Metrics/LineLength
|
22
24
|
end
|
23
25
|
|
24
26
|
def raw_html(node)
|
25
|
-
|
27
|
+
node.search('./div').inner_html
|
26
28
|
end
|
27
29
|
|
28
30
|
def doc
|
data/lib/feedjira/version.rb
CHANGED
data/lib/feedjira.rb
CHANGED
@@ -3,13 +3,21 @@ require 'faraday'
|
|
3
3
|
require 'faraday_middleware'
|
4
4
|
require 'sax-machine'
|
5
5
|
require 'loofah'
|
6
|
+
require 'logger'
|
6
7
|
|
7
8
|
require 'feedjira/core_ext'
|
9
|
+
require 'feedjira/configuration'
|
10
|
+
require 'feedjira/date_time_utilities/date_time_epoch_parser'
|
11
|
+
require 'feedjira/date_time_utilities/date_time_language_parser'
|
12
|
+
require 'feedjira/date_time_utilities/date_time_pattern_parser'
|
13
|
+
require 'feedjira/date_time_utilities'
|
14
|
+
require 'feedjira/date_time_utilities'
|
8
15
|
require 'feedjira/feed_entry_utilities'
|
9
16
|
require 'feedjira/feed_utilities'
|
10
17
|
require 'feedjira/feed'
|
11
18
|
require 'feedjira/parser'
|
12
19
|
require 'feedjira/parser/rss_entry'
|
20
|
+
require 'feedjira/parser/rss_image'
|
13
21
|
require 'feedjira/parser/rss'
|
14
22
|
require 'feedjira/parser/atom_entry'
|
15
23
|
require 'feedjira/parser/atom'
|
@@ -18,15 +26,22 @@ require 'feedjira/version'
|
|
18
26
|
|
19
27
|
require 'feedjira/parser/rss_feed_burner_entry'
|
20
28
|
require 'feedjira/parser/rss_feed_burner'
|
29
|
+
require 'feedjira/parser/podlove_chapter'
|
21
30
|
require 'feedjira/parser/itunes_rss_owner'
|
31
|
+
require 'feedjira/parser/itunes_rss_category'
|
22
32
|
require 'feedjira/parser/itunes_rss_item'
|
23
33
|
require 'feedjira/parser/itunes_rss'
|
24
34
|
require 'feedjira/parser/atom_feed_burner_entry'
|
25
35
|
require 'feedjira/parser/atom_feed_burner'
|
26
36
|
require 'feedjira/parser/google_docs_atom_entry'
|
27
37
|
require 'feedjira/parser/google_docs_atom'
|
38
|
+
require 'feedjira/parser/atom_youtube_entry'
|
39
|
+
require 'feedjira/parser/atom_youtube'
|
28
40
|
|
41
|
+
# Feedjira
|
29
42
|
module Feedjira
|
30
43
|
class NoParserAvailable < StandardError; end
|
31
44
|
class FetchFailure < StandardError; end
|
45
|
+
|
46
|
+
extend Configuration
|
32
47
|
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Feedjira::Configuration do
|
4
|
+
describe '.configure' do
|
5
|
+
it 'sets follow_redirect_limit config' do
|
6
|
+
Feedjira.configure { |config| config.follow_redirect_limit = 10 }
|
7
|
+
expect(Feedjira.follow_redirect_limit).to eq(10)
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'sets request_timeout config' do
|
11
|
+
Feedjira.configure { |config| config.request_timeout = 45 }
|
12
|
+
expect(Feedjira.request_timeout).to eq(45)
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'sets strip_whitespace config' do
|
16
|
+
Feedjira.configure { |config| config.strip_whitespace = true }
|
17
|
+
expect(Feedjira.strip_whitespace).to be true
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'sets user_agent config' do
|
21
|
+
Feedjira.configure { |config| config.user_agent = 'Test User Agent' }
|
22
|
+
expect(Feedjira.user_agent).to eq('Test User Agent')
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Feedjira::FeedUtilities do
|
4
|
+
before(:each) do
|
5
|
+
@klass = Class.new do
|
6
|
+
include Feedjira::DateTimeUtilities
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
describe 'handling dates' do
|
11
|
+
it 'should parse an ISO 8601 formatted datetime into Time' do
|
12
|
+
time = @klass.new.parse_datetime('2008-02-20T8:05:00-010:00')
|
13
|
+
expect(time.class).to eq Time
|
14
|
+
expect(time).to eq Time.parse_safely('Wed Feb 20 18:05:00 UTC 2008')
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'should parse a ISO 8601 with milliseconds into Time' do
|
18
|
+
time = @klass.new.parse_datetime('2013-09-17T08:20:13.931-04:00')
|
19
|
+
expect(time.class).to eq Time
|
20
|
+
expect(time).to eq Time.parse_safely('Tue Sep 17 12:20:13 UTC 2013')
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'should parse a US Format into Time' do
|
24
|
+
time = @klass.new.parse_datetime('8/23/2016 12:29:58 PM')
|
25
|
+
expect(time.class).to eq Time
|
26
|
+
expect(time).to eq Time.parse_safely('Wed Aug 23 12:29:58 UTC 2016')
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'should parse a Spanish Format into Time' do
|
30
|
+
time = @klass.new.parse_datetime('Wed, 31 Ago 2016 11:08:22 GMT')
|
31
|
+
expect(time.class).to eq Time
|
32
|
+
expect(time).to eq Time.parse_safely('Wed Aug 31 11:08:22 UTC 2016')
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'should parse Format with japanese symbols into Time' do
|
36
|
+
time = @klass.new.parse_datetime('水, 31 8 2016 07:37:00 PDT')
|
37
|
+
expect(time.class).to eq Time
|
38
|
+
expect(time).to eq Time.parse_safely('Wed Aug 31 14:37:00 UTC 2016')
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'should parse epoch into Time' do
|
42
|
+
time = @klass.new.parse_datetime('1472654220')
|
43
|
+
expect(time.class).to eq Time
|
44
|
+
expect(time).to eq Time.parse_safely('Wed Aug 31 14:37:00 UTC 2016')
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -7,21 +7,21 @@ describe Feedjira::FeedUtilities do
|
|
7
7
|
end
|
8
8
|
end
|
9
9
|
|
10
|
-
describe
|
11
|
-
it
|
12
|
-
time = @klass.new.parse_datetime(
|
10
|
+
describe 'handling dates' do
|
11
|
+
it 'should parse an ISO 8601 formatted datetime into Time' do
|
12
|
+
time = @klass.new.parse_datetime('2008-02-20T8:05:00-010:00')
|
13
13
|
expect(time.class).to eq Time
|
14
|
-
expect(time).to eq Time.parse_safely(
|
14
|
+
expect(time).to eq Time.parse_safely('Wed Feb 20 18:05:00 UTC 2008')
|
15
15
|
end
|
16
16
|
|
17
|
-
it
|
18
|
-
time = @klass.new.parse_datetime(
|
17
|
+
it 'should parse a ISO 8601 with milliseconds into Time' do
|
18
|
+
time = @klass.new.parse_datetime('2013-09-17T08:20:13.931-04:00')
|
19
19
|
expect(time.class).to eq Time
|
20
|
-
expect(time).to eq Time.parse_safely(
|
20
|
+
expect(time).to eq Time.parse_safely('Tue Sep 17 12:20:13 UTC 2013')
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
24
|
-
describe
|
24
|
+
describe 'sanitizing' do
|
25
25
|
before(:each) do
|
26
26
|
@feed = Feedjira::Feed.parse(sample_atom_feed)
|
27
27
|
@entry = @feed.entries.first
|
@@ -31,23 +31,27 @@ describe Feedjira::FeedUtilities do
|
|
31
31
|
expect { @klass.new.sanitize! }.to_not raise_error
|
32
32
|
end
|
33
33
|
|
34
|
-
it
|
35
|
-
new_title =
|
34
|
+
it 'should provide a sanitized title' do
|
35
|
+
new_title = '<script>this is not safe</script>' + @entry.title
|
36
36
|
@entry.title = new_title
|
37
|
-
|
37
|
+
scrubbed_title = Loofah.scrub_fragment(new_title, :prune).to_s
|
38
|
+
expect(@entry.title.sanitize).to eq scrubbed_title
|
38
39
|
end
|
39
40
|
|
40
|
-
it
|
41
|
-
new_content =
|
41
|
+
it 'should sanitize content in place' do
|
42
|
+
new_content = '<script>' + @entry.content
|
42
43
|
@entry.content = new_content.dup
|
43
|
-
|
44
|
-
|
44
|
+
|
45
|
+
scrubbed_content = Loofah.scrub_fragment(new_content, :prune).to_s
|
46
|
+
|
47
|
+
expect(@entry.content.sanitize!).to eq scrubbed_content
|
48
|
+
expect(@entry.content).to eq scrubbed_content
|
45
49
|
end
|
46
50
|
|
47
|
-
it
|
48
|
-
@entry.title +=
|
49
|
-
@entry.author +=
|
50
|
-
@entry.content +=
|
51
|
+
it 'should sanitize things in place' do
|
52
|
+
@entry.title += '<script>'
|
53
|
+
@entry.author += '<script>'
|
54
|
+
@entry.content += '<script>'
|
51
55
|
|
52
56
|
cleaned_title = Loofah.scrub_fragment(@entry.title, :prune).to_s
|
53
57
|
cleaned_author = Loofah.scrub_fragment(@entry.author, :prune).to_s
|
data/spec/feedjira/feed_spec.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
|
-
require
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
# rubocop:disable Style/BlockDelimiters
|
2
4
|
|
3
5
|
class Hell < StandardError; end
|
4
6
|
|
@@ -11,63 +13,72 @@ end
|
|
11
13
|
describe Feedjira::Feed do
|
12
14
|
describe '.fetch_and_parse' do
|
13
15
|
it 'raises an error when the fetch fails' do
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
16
|
+
VCR.use_cassette('fetch_failure') do
|
17
|
+
url = 'http://www.example.com/feed.xml'
|
18
|
+
expect {
|
19
|
+
Feedjira::Feed.fetch_and_parse url
|
20
|
+
}.to raise_error Feedjira::FetchFailure
|
21
|
+
end
|
18
22
|
end
|
19
23
|
|
20
24
|
it 'raises an error when no parser can be found' do
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
+
VCR.use_cassette('parse_error') do
|
26
|
+
url = 'http://feedjira.com'
|
27
|
+
expect {
|
28
|
+
Feedjira::Feed.fetch_and_parse url
|
29
|
+
}.to raise_error Feedjira::NoParserAvailable
|
30
|
+
end
|
25
31
|
end
|
26
32
|
|
27
33
|
it 'fetches and parses the feed' do
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
34
|
+
VCR.use_cassette('success') do
|
35
|
+
url = 'http://feedjira.com/blog/feed.xml'
|
36
|
+
expected_time = DateTime.parse('Fri, 07 Oct 2016 14:37:00 GMT').to_time
|
37
|
+
feed = Feedjira::Feed.fetch_and_parse url
|
38
|
+
|
39
|
+
expect(feed.class).to eq Feedjira::Parser::Atom
|
40
|
+
expect(feed.entries.count).to eq 4
|
41
|
+
expect(feed.feed_url).to eq url
|
42
|
+
expect(feed.etag).to eq('393e-53e4757c9db00-gzip')
|
43
|
+
expect(feed.last_modified).to eq(expected_time)
|
44
|
+
end
|
36
45
|
end
|
37
46
|
end
|
38
47
|
|
39
|
-
describe
|
48
|
+
describe '#add_common_feed_element' do
|
40
49
|
before(:all) do
|
41
|
-
Feedjira::Feed.add_common_feed_element(
|
50
|
+
Feedjira::Feed.add_common_feed_element('generator')
|
42
51
|
end
|
43
52
|
|
44
|
-
it
|
45
|
-
expect(Feedjira::Feed.parse(sample_wfw_feed).generator).to eq
|
53
|
+
it 'should parse the added element out of Atom feeds' do
|
54
|
+
expect(Feedjira::Feed.parse(sample_wfw_feed).generator).to eq 'TypePad'
|
46
55
|
end
|
47
56
|
|
48
|
-
it
|
57
|
+
it 'should parse the added element out of Atom Feedburner feeds' do
|
49
58
|
expect(Feedjira::Parser::Atom.new).to respond_to(:generator)
|
50
59
|
end
|
51
60
|
|
52
|
-
it
|
61
|
+
it 'should parse the added element out of RSS feeds' do
|
53
62
|
expect(Feedjira::Parser::RSS.new).to respond_to(:generator)
|
54
63
|
end
|
55
64
|
end
|
56
65
|
|
57
|
-
describe
|
66
|
+
describe '#add_common_feed_entry_element' do
|
58
67
|
before(:all) do
|
59
|
-
|
68
|
+
tag = 'wfw:commentRss'
|
69
|
+
Feedjira::Feed.add_common_feed_entry_element tag, as: :comment_rss
|
60
70
|
end
|
61
71
|
|
62
|
-
it
|
63
|
-
|
72
|
+
it 'should parse the added element out of Atom feeds entries' do
|
73
|
+
entry = Feedjira::Feed.parse(sample_wfw_feed).entries.first
|
74
|
+
expect(entry.comment_rss).to eq 'this is the new val'
|
64
75
|
end
|
65
76
|
|
66
|
-
it
|
77
|
+
it 'should parse the added element out of Atom Feedburner feeds entries' do
|
67
78
|
expect(Feedjira::Parser::AtomEntry.new).to respond_to(:comment_rss)
|
68
79
|
end
|
69
80
|
|
70
|
-
it
|
81
|
+
it 'should parse the added element out of RSS feeds entries' do
|
71
82
|
expect(Feedjira::Parser::RSSEntry.new).to respond_to(:comment_rss)
|
72
83
|
end
|
73
84
|
end
|
@@ -92,107 +103,161 @@ describe Feedjira::Feed do
|
|
92
103
|
end
|
93
104
|
end
|
94
105
|
|
95
|
-
describe
|
106
|
+
describe '#parse' do
|
96
107
|
context "when there's an available parser" do
|
97
|
-
it
|
108
|
+
it 'should parse an rdf feed' do
|
98
109
|
feed = Feedjira::Feed.parse(sample_rdf_feed)
|
99
|
-
expect(feed.title).to eq
|
100
|
-
|
110
|
+
expect(feed.title).to eq 'HREF Considered Harmful'
|
111
|
+
published = Time.parse_safely('Tue Sep 02 19:50:07 UTC 2008')
|
112
|
+
expect(feed.entries.first.published).to eq published
|
101
113
|
expect(feed.entries.size).to eq 10
|
102
114
|
end
|
103
115
|
|
104
|
-
it
|
116
|
+
it 'should parse an rss feed' do
|
105
117
|
feed = Feedjira::Feed.parse(sample_rss_feed)
|
106
|
-
expect(feed.title).to eq
|
107
|
-
|
118
|
+
expect(feed.title).to eq 'Tender Lovemaking'
|
119
|
+
published = Time.parse_safely 'Thu Dec 04 17:17:49 UTC 2008'
|
120
|
+
expect(feed.entries.first.published).to eq published
|
108
121
|
expect(feed.entries.size).to eq 10
|
109
122
|
end
|
110
123
|
|
111
|
-
it
|
124
|
+
it 'should parse an atom feed' do
|
112
125
|
feed = Feedjira::Feed.parse(sample_atom_feed)
|
113
|
-
expect(feed.title).to eq
|
114
|
-
|
126
|
+
expect(feed.title).to eq 'Amazon Web Services Blog'
|
127
|
+
published = Time.parse_safely 'Fri Jan 16 18:21:00 UTC 2009'
|
128
|
+
expect(feed.entries.first.published).to eq published
|
115
129
|
expect(feed.entries.size).to eq 10
|
116
130
|
end
|
117
131
|
|
118
|
-
it
|
132
|
+
it 'should parse an feedburner atom feed' do
|
119
133
|
feed = Feedjira::Feed.parse(sample_feedburner_atom_feed)
|
120
|
-
expect(feed.title).to eq
|
121
|
-
|
134
|
+
expect(feed.title).to eq 'Paul Dix Explains Nothing'
|
135
|
+
published = Time.parse_safely 'Thu Jan 22 15:50:22 UTC 2009'
|
136
|
+
expect(feed.entries.first.published).to eq published
|
122
137
|
expect(feed.entries.size).to eq 5
|
123
138
|
end
|
124
139
|
|
125
|
-
it
|
140
|
+
it 'should parse an itunes feed' do
|
126
141
|
feed = Feedjira::Feed.parse(sample_itunes_feed)
|
127
|
-
expect(feed.title).to eq
|
128
|
-
|
142
|
+
expect(feed.title).to eq 'All About Everything'
|
143
|
+
published = Time.parse_safely 'Wed, 15 Jun 2005 19:00:00 GMT'
|
144
|
+
expect(feed.entries.first.published).to eq published
|
129
145
|
expect(feed.entries.size).to eq 3
|
130
146
|
end
|
147
|
+
|
148
|
+
# rubocop:disable Metrics/LineLength
|
149
|
+
it 'does not fail if multiple published dates exist and some are unparseable' do
|
150
|
+
expect(Feedjira.logger).to receive(:warn).once
|
151
|
+
|
152
|
+
feed = Feedjira::Feed.parse(sample_invalid_date_format_feed)
|
153
|
+
expect(feed.title).to eq 'Invalid date format feed'
|
154
|
+
published = Time.parse_safely 'Mon, 16 Oct 2017 15:10:00 GMT'
|
155
|
+
expect(feed.entries.first.published).to eq published
|
156
|
+
expect(feed.entries.size).to eq 2
|
157
|
+
end
|
158
|
+
# rubocop:enable Metrics/LineLength
|
131
159
|
end
|
132
160
|
|
133
161
|
context "when there's no available parser" do
|
134
|
-
it
|
162
|
+
it 'raises Feedjira::NoParserAvailable' do
|
135
163
|
expect {
|
136
164
|
Feedjira::Feed.parse("I'm an invalid feed")
|
137
165
|
}.to raise_error(Feedjira::NoParserAvailable)
|
138
166
|
end
|
139
167
|
end
|
140
168
|
|
141
|
-
it
|
169
|
+
it 'should parse an feedburner rss feed' do
|
142
170
|
feed = Feedjira::Feed.parse(sample_rss_feed_burner_feed)
|
143
|
-
expect(feed.title).to eq
|
144
|
-
|
171
|
+
expect(feed.title).to eq 'TechCrunch'
|
172
|
+
published = Time.parse_safely 'Wed Nov 02 17:25:27 UTC 2011'
|
173
|
+
expect(feed.entries.first.published).to eq published
|
145
174
|
expect(feed.entries.size).to eq 20
|
146
175
|
end
|
147
176
|
end
|
148
177
|
|
149
|
-
describe
|
150
|
-
it '
|
151
|
-
|
178
|
+
describe '#determine_feed_parser_for_xml' do
|
179
|
+
it 'with Google Docs atom feed it returns the GoogleDocsAtom parser' do
|
180
|
+
xml = sample_google_docs_list_feed
|
181
|
+
actual_parser = Feedjira::Feed.determine_feed_parser_for_xml xml
|
182
|
+
expect(actual_parser).to eq Feedjira::Parser::GoogleDocsAtom
|
152
183
|
end
|
153
184
|
|
154
|
-
it
|
155
|
-
|
185
|
+
it 'with an atom feed it returns the Atom parser' do
|
186
|
+
xml = sample_atom_feed
|
187
|
+
actual_parser = Feedjira::Feed.determine_feed_parser_for_xml xml
|
188
|
+
expect(actual_parser).to eq Feedjira::Parser::Atom
|
156
189
|
end
|
157
190
|
|
158
|
-
it
|
159
|
-
|
191
|
+
it 'with an atom feedburner feed it returns the AtomFeedBurner parser' do
|
192
|
+
xml = sample_feedburner_atom_feed
|
193
|
+
actual_parser = Feedjira::Feed.determine_feed_parser_for_xml xml
|
194
|
+
expect(actual_parser).to eq Feedjira::Parser::AtomFeedBurner
|
160
195
|
end
|
161
196
|
|
162
|
-
it
|
163
|
-
|
197
|
+
it 'with an rdf feed it returns the RSS parser' do
|
198
|
+
xml = sample_rdf_feed
|
199
|
+
actual_parser = Feedjira::Feed.determine_feed_parser_for_xml xml
|
200
|
+
expect(actual_parser).to eq Feedjira::Parser::RSS
|
164
201
|
end
|
165
202
|
|
166
|
-
it
|
167
|
-
|
203
|
+
it 'with an rss feedburner feed it returns the RSSFeedBurner parser' do
|
204
|
+
xml = sample_rss_feed_burner_feed
|
205
|
+
actual_parser = Feedjira::Feed.determine_feed_parser_for_xml xml
|
206
|
+
expect(actual_parser).to eq Feedjira::Parser::RSSFeedBurner
|
168
207
|
end
|
169
208
|
|
170
|
-
it
|
171
|
-
|
209
|
+
it 'with an rss 2.0 feed it returns the RSS parser' do
|
210
|
+
xml = sample_rss_feed
|
211
|
+
actual_parser = Feedjira::Feed.determine_feed_parser_for_xml xml
|
212
|
+
expect(actual_parser).to eq Feedjira::Parser::RSS
|
172
213
|
end
|
173
214
|
|
174
|
-
it
|
175
|
-
|
215
|
+
it 'with an itunes feed it returns the RSS parser' do
|
216
|
+
xml = sample_itunes_feed
|
217
|
+
actual_parser = Feedjira::Feed.determine_feed_parser_for_xml xml
|
218
|
+
expect(actual_parser).to eq Feedjira::Parser::ITunesRSS
|
176
219
|
end
|
177
|
-
|
178
220
|
end
|
179
221
|
|
180
|
-
describe
|
181
|
-
it
|
182
|
-
|
222
|
+
describe 'when adding feed types' do
|
223
|
+
it 'should prioritize added types over the built in ones' do
|
224
|
+
xml = 'Atom asdf'
|
183
225
|
allow(Feedjira::Parser::Atom).to receive(:able_to_parse?).and_return(true)
|
184
|
-
|
185
|
-
def self.able_to_parse?(
|
226
|
+
new_parser = Class.new do
|
227
|
+
def self.able_to_parse?(_)
|
186
228
|
true
|
187
229
|
end
|
188
230
|
end
|
189
231
|
|
190
|
-
expect(
|
191
|
-
|
192
|
-
|
232
|
+
expect(new_parser).to be_able_to_parse(xml)
|
233
|
+
|
234
|
+
Feedjira::Feed.add_feed_class(new_parser)
|
193
235
|
|
194
|
-
|
195
|
-
|
236
|
+
parser = Feedjira::Feed.determine_feed_parser_for_xml xml
|
237
|
+
expect(parser).to eq new_parser
|
238
|
+
|
239
|
+
Feedjira::Feed.reset_parsers!
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
describe 'when parsers are configured' do
|
244
|
+
it 'does not use default parsers' do
|
245
|
+
xml = 'Atom asdf'
|
246
|
+
new_parser = Class.new do
|
247
|
+
def self.able_to_parse?(_)
|
248
|
+
true
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
Feedjira.configure { |config| config.parsers = [new_parser] }
|
253
|
+
|
254
|
+
parser = Feedjira::Feed.determine_feed_parser_for_xml(xml)
|
255
|
+
expect(parser).to eq(new_parser)
|
256
|
+
|
257
|
+
Feedjira.reset_configuration!
|
258
|
+
Feedjira::Feed.reset_parsers!
|
196
259
|
end
|
197
260
|
end
|
198
261
|
end
|
262
|
+
|
263
|
+
# rubocop:enable Style/BlockDelimiters
|