feedjira 2.2.0 → 3.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE/feed-parsing.md +15 -0
- data/.rubocop.yml +32 -8
- data/.rubocop_todo.yml +11 -0
- data/.travis.yml +3 -7
- data/CHANGELOG.md +18 -9
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +8 -5
- data/README.md +46 -99
- data/Rakefile +8 -6
- data/feedjira.gemspec +31 -20
- data/lib/feedjira.rb +75 -41
- data/lib/feedjira/atom_entry_utilities.rb +51 -0
- data/lib/feedjira/configuration.rb +8 -10
- data/lib/feedjira/core_ext.rb +5 -3
- data/lib/feedjira/core_ext/date.rb +2 -1
- data/lib/feedjira/core_ext/string.rb +2 -1
- data/lib/feedjira/core_ext/time.rb +12 -12
- data/lib/feedjira/date_time_utilities.rb +8 -10
- data/lib/feedjira/date_time_utilities/date_time_epoch_parser.rb +3 -2
- data/lib/feedjira/date_time_utilities/date_time_language_parser.rb +4 -4
- data/lib/feedjira/date_time_utilities/date_time_pattern_parser.rb +11 -15
- data/lib/feedjira/feed.rb +12 -82
- data/lib/feedjira/feed_entry_utilities.rb +14 -7
- data/lib/feedjira/feed_utilities.rb +5 -4
- data/lib/feedjira/parser.rb +6 -1
- data/lib/feedjira/parser/atom.rb +6 -5
- data/lib/feedjira/parser/atom_entry.rb +4 -21
- data/lib/feedjira/parser/atom_feed_burner.rb +7 -6
- data/lib/feedjira/parser/atom_feed_burner_entry.rb +7 -18
- data/lib/feedjira/parser/atom_google_alerts.rb +26 -0
- data/lib/feedjira/parser/atom_google_alerts_entry.rb +21 -0
- data/lib/feedjira/parser/atom_youtube.rb +4 -3
- data/lib/feedjira/parser/atom_youtube_entry.rb +9 -8
- data/lib/feedjira/parser/globally_unique_identifier.rb +21 -0
- data/lib/feedjira/parser/google_docs_atom.rb +6 -6
- data/lib/feedjira/parser/google_docs_atom_entry.rb +3 -19
- data/lib/feedjira/parser/itunes_rss.rb +4 -3
- data/lib/feedjira/parser/itunes_rss_category.rb +6 -5
- data/lib/feedjira/parser/itunes_rss_item.rb +5 -8
- data/lib/feedjira/parser/itunes_rss_owner.rb +2 -1
- data/lib/feedjira/parser/json_feed.rb +41 -0
- data/lib/feedjira/parser/json_feed_item.rb +57 -0
- data/lib/feedjira/parser/podlove_chapter.rb +4 -3
- data/lib/feedjira/parser/rss.rb +5 -3
- data/lib/feedjira/parser/rss_entry.rb +3 -24
- data/lib/feedjira/parser/rss_feed_burner.rb +4 -3
- data/lib/feedjira/parser/rss_feed_burner_entry.rb +6 -26
- data/lib/feedjira/parser/rss_image.rb +2 -0
- data/lib/feedjira/preprocessor.rb +4 -4
- data/lib/feedjira/rss_entry_utilities.rb +53 -0
- data/lib/feedjira/version.rb +3 -1
- data/spec/feedjira/configuration_spec.rb +11 -16
- data/spec/feedjira/date_time_utilities_spec.rb +22 -20
- data/spec/feedjira/feed_entry_utilities_spec.rb +20 -18
- data/spec/feedjira/feed_spec.rb +17 -229
- data/spec/feedjira/feed_utilities_spec.rb +75 -73
- data/spec/feedjira/parser/atom_entry_spec.rb +41 -38
- data/spec/feedjira/parser/atom_feed_burner_entry_spec.rb +22 -20
- data/spec/feedjira/parser/atom_feed_burner_spec.rb +122 -118
- data/spec/feedjira/parser/atom_google_alerts_entry_spec.rb +34 -0
- data/spec/feedjira/parser/atom_google_alerts_spec.rb +62 -0
- data/spec/feedjira/parser/atom_spec.rb +83 -77
- data/spec/feedjira/parser/atom_youtube_entry_spec.rb +41 -39
- data/spec/feedjira/parser/atom_youtube_spec.rb +21 -19
- data/spec/feedjira/parser/google_docs_atom_entry_spec.rb +10 -8
- data/spec/feedjira/parser/google_docs_atom_spec.rb +25 -21
- data/spec/feedjira/parser/itunes_rss_item_spec.rb +39 -37
- data/spec/feedjira/parser/itunes_rss_owner_spec.rb +7 -5
- data/spec/feedjira/parser/itunes_rss_spec.rb +120 -116
- data/spec/feedjira/parser/json_feed_item_spec.rb +81 -0
- data/spec/feedjira/parser/json_feed_spec.rb +55 -0
- data/spec/feedjira/parser/podlove_chapter_spec.rb +14 -12
- data/spec/feedjira/parser/rss_entry_spec.rb +56 -34
- data/spec/feedjira/parser/rss_feed_burner_entry_spec.rb +36 -34
- data/spec/feedjira/parser/rss_feed_burner_spec.rb +49 -45
- data/spec/feedjira/parser/rss_spec.rb +38 -36
- data/spec/feedjira/preprocessor_spec.rb +9 -7
- data/spec/feedjira_spec.rb +166 -0
- data/spec/sample_feeds.rb +32 -29
- data/spec/sample_feeds/HuffPostCanada.xml +279 -0
- data/spec/sample_feeds/Permalinks.xml +22 -0
- data/spec/sample_feeds/a10.xml +72 -0
- data/spec/sample_feeds/google_alerts_atom.xml +1 -0
- data/spec/sample_feeds/json_feed.json +156 -0
- data/spec/spec_helper.rb +7 -5
- metadata +59 -70
- data/Dangerfile +0 -1
- data/fixtures/vcr_cassettes/fetch_failure.yml +0 -62
- data/fixtures/vcr_cassettes/parse_error.yml +0 -222
- data/fixtures/vcr_cassettes/success.yml +0 -281
@@ -1,5 +1,5 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
3
|
module Feedjira
|
4
4
|
class Preprocessor
|
5
5
|
def initialize(xml)
|
@@ -20,11 +20,11 @@ module Feedjira
|
|
20
20
|
end
|
21
21
|
|
22
22
|
def content_nodes
|
23
|
-
doc.search 'entry > content[type="xhtml"], entry > summary[type="xhtml"], entry > title[type="xhtml"]'
|
23
|
+
doc.search 'entry > content[type="xhtml"], entry > summary[type="xhtml"], entry > title[type="xhtml"]'
|
24
24
|
end
|
25
25
|
|
26
26
|
def raw_html(node)
|
27
|
-
node.search(
|
27
|
+
node.search("./div").inner_html
|
28
28
|
end
|
29
29
|
|
30
30
|
def doc
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Feedjira
|
4
|
+
module RSSEntryUtilities
|
5
|
+
def self.included(mod)
|
6
|
+
mod.class_exec do
|
7
|
+
element :title
|
8
|
+
|
9
|
+
element :"content:encoded", as: :content
|
10
|
+
element :"a10:content", as: :content
|
11
|
+
element :description, as: :summary
|
12
|
+
|
13
|
+
element :link, as: :url
|
14
|
+
element :"a10:link", as: :url, value: :href
|
15
|
+
|
16
|
+
element :author
|
17
|
+
element :"dc:creator", as: :author
|
18
|
+
element :"a10:name", as: :author
|
19
|
+
|
20
|
+
element :pubDate, as: :published
|
21
|
+
element :pubdate, as: :published
|
22
|
+
element :issued, as: :published
|
23
|
+
element :"dc:date", as: :published
|
24
|
+
element :"dc:Date", as: :published
|
25
|
+
element :"dcterms:created", as: :published
|
26
|
+
|
27
|
+
element :"dcterms:modified", as: :updated
|
28
|
+
element :"a10:updated", as: :updated
|
29
|
+
|
30
|
+
element :guid, as: :entry_id, class: Feedjira::Parser::GloballyUniqueIdentifier
|
31
|
+
element :"dc:identifier", as: :dc_identifier
|
32
|
+
|
33
|
+
element :"media:thumbnail", as: :image, value: :url
|
34
|
+
element :"media:content", as: :image, value: :url
|
35
|
+
element :enclosure, as: :image, value: :url
|
36
|
+
|
37
|
+
elements :category, as: :categories
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def entry_id
|
42
|
+
@entry_id&.guid
|
43
|
+
end
|
44
|
+
|
45
|
+
def url
|
46
|
+
@url || @entry_id&.url
|
47
|
+
end
|
48
|
+
|
49
|
+
def id
|
50
|
+
entry_id || @dc_identifier || @url
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
data/lib/feedjira/version.rb
CHANGED
@@ -1,25 +1,20 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
describe '.configure' do
|
5
|
-
it 'sets follow_redirect_limit config' do
|
6
|
-
Feedjira.configure { |config| config.follow_redirect_limit = 10 }
|
7
|
-
expect(Feedjira.follow_redirect_limit).to eq(10)
|
8
|
-
end
|
9
|
-
|
10
|
-
it 'sets request_timeout config' do
|
11
|
-
Feedjira.configure { |config| config.request_timeout = 45 }
|
12
|
-
expect(Feedjira.request_timeout).to eq(45)
|
13
|
-
end
|
3
|
+
require "spec_helper"
|
14
4
|
|
15
|
-
|
5
|
+
describe Feedjira::Configuration do
|
6
|
+
describe ".configure" do
|
7
|
+
it "sets strip_whitespace config" do
|
16
8
|
Feedjira.configure { |config| config.strip_whitespace = true }
|
17
9
|
expect(Feedjira.strip_whitespace).to be true
|
18
10
|
end
|
19
11
|
|
20
|
-
it
|
21
|
-
|
22
|
-
|
12
|
+
it "allows parsers to be modified" do
|
13
|
+
CustomParser = Class.new
|
14
|
+
|
15
|
+
Feedjira.configure { |config| config.parsers.unshift(CustomParser) }
|
16
|
+
expect(Feedjira.parsers.first).to eq(CustomParser)
|
17
|
+
Feedjira.reset_configuration!
|
23
18
|
end
|
24
19
|
end
|
25
20
|
end
|
@@ -1,4 +1,6 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "spec_helper"
|
2
4
|
|
3
5
|
describe Feedjira::FeedUtilities do
|
4
6
|
before(:each) do
|
@@ -7,41 +9,41 @@ describe Feedjira::FeedUtilities do
|
|
7
9
|
end
|
8
10
|
end
|
9
11
|
|
10
|
-
describe
|
11
|
-
it
|
12
|
-
time = @klass.new.parse_datetime(
|
12
|
+
describe "handling dates" do
|
13
|
+
it "should parse an ISO 8601 formatted datetime into Time" do
|
14
|
+
time = @klass.new.parse_datetime("2008-02-20T8:05:00-010:00")
|
13
15
|
expect(time.class).to eq Time
|
14
|
-
expect(time).to eq Time.parse_safely(
|
16
|
+
expect(time).to eq Time.parse_safely("Wed Feb 20 18:05:00 UTC 2008")
|
15
17
|
end
|
16
18
|
|
17
|
-
it
|
18
|
-
time = @klass.new.parse_datetime(
|
19
|
+
it "should parse a ISO 8601 with milliseconds into Time" do
|
20
|
+
time = @klass.new.parse_datetime("2013-09-17T08:20:13.931-04:00")
|
19
21
|
expect(time.class).to eq Time
|
20
|
-
expect(time).to eq Time.parse_safely(
|
22
|
+
expect(time).to eq Time.parse_safely("Tue Sep 17 12:20:13 UTC 2013")
|
21
23
|
end
|
22
24
|
|
23
|
-
it
|
24
|
-
time = @klass.new.parse_datetime(
|
25
|
+
it "should parse a US Format into Time" do
|
26
|
+
time = @klass.new.parse_datetime("8/23/2016 12:29:58 PM")
|
25
27
|
expect(time.class).to eq Time
|
26
|
-
expect(time).to eq Time.parse_safely(
|
28
|
+
expect(time).to eq Time.parse_safely("Wed Aug 23 12:29:58 UTC 2016")
|
27
29
|
end
|
28
30
|
|
29
|
-
it
|
30
|
-
time = @klass.new.parse_datetime(
|
31
|
+
it "should parse a Spanish Format into Time" do
|
32
|
+
time = @klass.new.parse_datetime("Wed, 31 Ago 2016 11:08:22 GMT")
|
31
33
|
expect(time.class).to eq Time
|
32
|
-
expect(time).to eq Time.parse_safely(
|
34
|
+
expect(time).to eq Time.parse_safely("Wed Aug 31 11:08:22 UTC 2016")
|
33
35
|
end
|
34
36
|
|
35
|
-
it
|
36
|
-
time = @klass.new.parse_datetime(
|
37
|
+
it "should parse Format with japanese symbols into Time" do
|
38
|
+
time = @klass.new.parse_datetime("水, 31 8 2016 07:37:00 PDT")
|
37
39
|
expect(time.class).to eq Time
|
38
|
-
expect(time).to eq Time.parse_safely(
|
40
|
+
expect(time).to eq Time.parse_safely("Wed Aug 31 14:37:00 UTC 2016")
|
39
41
|
end
|
40
42
|
|
41
|
-
it
|
42
|
-
time = @klass.new.parse_datetime(
|
43
|
+
it "should parse epoch into Time" do
|
44
|
+
time = @klass.new.parse_datetime("1472654220")
|
43
45
|
expect(time.class).to eq Time
|
44
|
-
expect(time).to eq Time.parse_safely(
|
46
|
+
expect(time).to eq Time.parse_safely("Wed Aug 31 14:37:00 UTC 2016")
|
45
47
|
end
|
46
48
|
end
|
47
49
|
end
|
@@ -1,4 +1,6 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "spec_helper"
|
2
4
|
|
3
5
|
describe Feedjira::FeedUtilities do
|
4
6
|
before(:each) do
|
@@ -7,23 +9,23 @@ describe Feedjira::FeedUtilities do
|
|
7
9
|
end
|
8
10
|
end
|
9
11
|
|
10
|
-
describe
|
11
|
-
it
|
12
|
-
time = @klass.new.parse_datetime(
|
12
|
+
describe "handling dates" do
|
13
|
+
it "should parse an ISO 8601 formatted datetime into Time" do
|
14
|
+
time = @klass.new.parse_datetime("2008-02-20T8:05:00-010:00")
|
13
15
|
expect(time.class).to eq Time
|
14
|
-
expect(time).to eq Time.parse_safely(
|
16
|
+
expect(time).to eq Time.parse_safely("Wed Feb 20 18:05:00 UTC 2008")
|
15
17
|
end
|
16
18
|
|
17
|
-
it
|
18
|
-
time = @klass.new.parse_datetime(
|
19
|
+
it "should parse a ISO 8601 with milliseconds into Time" do
|
20
|
+
time = @klass.new.parse_datetime("2013-09-17T08:20:13.931-04:00")
|
19
21
|
expect(time.class).to eq Time
|
20
|
-
expect(time).to eq Time.parse_safely(
|
22
|
+
expect(time).to eq Time.parse_safely("Tue Sep 17 12:20:13 UTC 2013")
|
21
23
|
end
|
22
24
|
end
|
23
25
|
|
24
|
-
describe
|
26
|
+
describe "sanitizing" do
|
25
27
|
before(:each) do
|
26
|
-
@feed = Feedjira
|
28
|
+
@feed = Feedjira.parse(sample_atom_feed)
|
27
29
|
@entry = @feed.entries.first
|
28
30
|
end
|
29
31
|
|
@@ -31,15 +33,15 @@ describe Feedjira::FeedUtilities do
|
|
31
33
|
expect { @klass.new.sanitize! }.to_not raise_error
|
32
34
|
end
|
33
35
|
|
34
|
-
it
|
35
|
-
new_title =
|
36
|
+
it "should provide a sanitized title" do
|
37
|
+
new_title = "<script>this is not safe</script>#{@entry.title}"
|
36
38
|
@entry.title = new_title
|
37
39
|
scrubbed_title = Loofah.scrub_fragment(new_title, :prune).to_s
|
38
40
|
expect(@entry.title.sanitize).to eq scrubbed_title
|
39
41
|
end
|
40
42
|
|
41
|
-
it
|
42
|
-
new_content =
|
43
|
+
it "should sanitize content in place" do
|
44
|
+
new_content = "<script>#{@entry.content}"
|
43
45
|
@entry.content = new_content.dup
|
44
46
|
|
45
47
|
scrubbed_content = Loofah.scrub_fragment(new_content, :prune).to_s
|
@@ -48,10 +50,10 @@ describe Feedjira::FeedUtilities do
|
|
48
50
|
expect(@entry.content).to eq scrubbed_content
|
49
51
|
end
|
50
52
|
|
51
|
-
it
|
52
|
-
@entry.title +=
|
53
|
-
@entry.author +=
|
54
|
-
@entry.content +=
|
53
|
+
it "should sanitize things in place" do
|
54
|
+
@entry.title += "<script>"
|
55
|
+
@entry.author += "<script>"
|
56
|
+
@entry.content += "<script>"
|
55
57
|
|
56
58
|
cleaned_title = Loofah.scrub_fragment(@entry.title, :prune).to_s
|
57
59
|
cleaned_author = Loofah.scrub_fragment(@entry.author, :prune).to_s
|
data/spec/feedjira/feed_spec.rb
CHANGED
@@ -1,263 +1,51 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
3
|
+
require "spec_helper"
|
4
4
|
|
5
5
|
class Hell < StandardError; end
|
6
6
|
|
7
7
|
class FailParser
|
8
|
-
def self.parse(
|
9
|
-
on_failure.call
|
8
|
+
def self.parse(_xml, &on_failure)
|
9
|
+
on_failure.call "this parser always fails."
|
10
10
|
end
|
11
11
|
end
|
12
12
|
|
13
13
|
describe Feedjira::Feed do
|
14
|
-
describe
|
15
|
-
it 'raises an error when the fetch fails' do
|
16
|
-
VCR.use_cassette('fetch_failure') do
|
17
|
-
url = 'http://www.example.com/feed.xml'
|
18
|
-
expect {
|
19
|
-
Feedjira::Feed.fetch_and_parse url
|
20
|
-
}.to raise_error Feedjira::FetchFailure
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
it 'raises an error when no parser can be found' do
|
25
|
-
VCR.use_cassette('parse_error') do
|
26
|
-
url = 'http://feedjira.com'
|
27
|
-
expect {
|
28
|
-
Feedjira::Feed.fetch_and_parse url
|
29
|
-
}.to raise_error Feedjira::NoParserAvailable
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
it 'fetches and parses the feed' do
|
34
|
-
VCR.use_cassette('success') do
|
35
|
-
url = 'http://feedjira.com/blog/feed.xml'
|
36
|
-
expected_time = DateTime.parse('Fri, 07 Oct 2016 14:37:00 GMT').to_time
|
37
|
-
feed = Feedjira::Feed.fetch_and_parse url
|
38
|
-
|
39
|
-
expect(feed.class).to eq Feedjira::Parser::Atom
|
40
|
-
expect(feed.entries.count).to eq 4
|
41
|
-
expect(feed.feed_url).to eq url
|
42
|
-
expect(feed.etag).to eq('393e-53e4757c9db00-gzip')
|
43
|
-
expect(feed.last_modified).to eq(expected_time)
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
describe '#add_common_feed_element' do
|
14
|
+
describe "#add_common_feed_element" do
|
49
15
|
before(:all) do
|
50
|
-
Feedjira::Feed.add_common_feed_element(
|
16
|
+
Feedjira::Feed.add_common_feed_element("generator")
|
51
17
|
end
|
52
18
|
|
53
|
-
it
|
54
|
-
expect(Feedjira
|
19
|
+
it "should parse the added element out of Atom feeds" do
|
20
|
+
expect(Feedjira.parse(sample_wfw_feed).generator).to eq "TypePad"
|
55
21
|
end
|
56
22
|
|
57
|
-
it
|
23
|
+
it "should parse the added element out of Atom Feedburner feeds" do
|
58
24
|
expect(Feedjira::Parser::Atom.new).to respond_to(:generator)
|
59
25
|
end
|
60
26
|
|
61
|
-
it
|
27
|
+
it "should parse the added element out of RSS feeds" do
|
62
28
|
expect(Feedjira::Parser::RSS.new).to respond_to(:generator)
|
63
29
|
end
|
64
30
|
end
|
65
31
|
|
66
|
-
describe
|
32
|
+
describe "#add_common_feed_entry_element" do
|
67
33
|
before(:all) do
|
68
|
-
tag =
|
34
|
+
tag = "wfw:commentRss"
|
69
35
|
Feedjira::Feed.add_common_feed_entry_element tag, as: :comment_rss
|
70
36
|
end
|
71
37
|
|
72
|
-
it
|
73
|
-
entry = Feedjira
|
74
|
-
expect(entry.comment_rss).to eq
|
38
|
+
it "should parse the added element out of Atom feeds entries" do
|
39
|
+
entry = Feedjira.parse(sample_wfw_feed).entries.first
|
40
|
+
expect(entry.comment_rss).to eq "this is the new val"
|
75
41
|
end
|
76
42
|
|
77
|
-
it
|
43
|
+
it "should parse the added element out of Atom Feedburner feeds entries" do
|
78
44
|
expect(Feedjira::Parser::AtomEntry.new).to respond_to(:comment_rss)
|
79
45
|
end
|
80
46
|
|
81
|
-
it
|
47
|
+
it "should parse the added element out of RSS feeds entries" do
|
82
48
|
expect(Feedjira::Parser::RSSEntry.new).to respond_to(:comment_rss)
|
83
49
|
end
|
84
50
|
end
|
85
|
-
|
86
|
-
describe '#parse_with' do
|
87
|
-
let(:xml) { '<xml></xml>' }
|
88
|
-
|
89
|
-
it 'invokes the parser and passes the xml' do
|
90
|
-
parser = double 'Parser', parse: nil
|
91
|
-
expect(parser).to receive(:parse).with xml
|
92
|
-
Feedjira::Feed.parse_with parser, xml
|
93
|
-
end
|
94
|
-
|
95
|
-
context 'with a callback block' do
|
96
|
-
it 'passes the callback to the parser' do
|
97
|
-
callback = ->(*) { raise Hell }
|
98
|
-
|
99
|
-
expect do
|
100
|
-
Feedjira::Feed.parse_with FailParser, xml, &callback
|
101
|
-
end.to raise_error Hell
|
102
|
-
end
|
103
|
-
end
|
104
|
-
end
|
105
|
-
|
106
|
-
describe '#parse' do
|
107
|
-
context "when there's an available parser" do
|
108
|
-
it 'should parse an rdf feed' do
|
109
|
-
feed = Feedjira::Feed.parse(sample_rdf_feed)
|
110
|
-
expect(feed.title).to eq 'HREF Considered Harmful'
|
111
|
-
published = Time.parse_safely('Tue Sep 02 19:50:07 UTC 2008')
|
112
|
-
expect(feed.entries.first.published).to eq published
|
113
|
-
expect(feed.entries.size).to eq 10
|
114
|
-
end
|
115
|
-
|
116
|
-
it 'should parse an rss feed' do
|
117
|
-
feed = Feedjira::Feed.parse(sample_rss_feed)
|
118
|
-
expect(feed.title).to eq 'Tender Lovemaking'
|
119
|
-
published = Time.parse_safely 'Thu Dec 04 17:17:49 UTC 2008'
|
120
|
-
expect(feed.entries.first.published).to eq published
|
121
|
-
expect(feed.entries.size).to eq 10
|
122
|
-
end
|
123
|
-
|
124
|
-
it 'should parse an atom feed' do
|
125
|
-
feed = Feedjira::Feed.parse(sample_atom_feed)
|
126
|
-
expect(feed.title).to eq 'Amazon Web Services Blog'
|
127
|
-
published = Time.parse_safely 'Fri Jan 16 18:21:00 UTC 2009'
|
128
|
-
expect(feed.entries.first.published).to eq published
|
129
|
-
expect(feed.entries.size).to eq 10
|
130
|
-
end
|
131
|
-
|
132
|
-
it 'should parse an feedburner atom feed' do
|
133
|
-
feed = Feedjira::Feed.parse(sample_feedburner_atom_feed)
|
134
|
-
expect(feed.title).to eq 'Paul Dix Explains Nothing'
|
135
|
-
published = Time.parse_safely 'Thu Jan 22 15:50:22 UTC 2009'
|
136
|
-
expect(feed.entries.first.published).to eq published
|
137
|
-
expect(feed.entries.size).to eq 5
|
138
|
-
end
|
139
|
-
|
140
|
-
it 'should parse an itunes feed' do
|
141
|
-
feed = Feedjira::Feed.parse(sample_itunes_feed)
|
142
|
-
expect(feed.title).to eq 'All About Everything'
|
143
|
-
published = Time.parse_safely 'Wed, 15 Jun 2005 19:00:00 GMT'
|
144
|
-
expect(feed.entries.first.published).to eq published
|
145
|
-
expect(feed.entries.size).to eq 3
|
146
|
-
end
|
147
|
-
|
148
|
-
# rubocop:disable Metrics/LineLength
|
149
|
-
it 'does not fail if multiple published dates exist and some are unparseable' do
|
150
|
-
expect(Feedjira.logger).to receive(:warn).once
|
151
|
-
|
152
|
-
feed = Feedjira::Feed.parse(sample_invalid_date_format_feed)
|
153
|
-
expect(feed.title).to eq 'Invalid date format feed'
|
154
|
-
published = Time.parse_safely 'Mon, 16 Oct 2017 15:10:00 GMT'
|
155
|
-
expect(feed.entries.first.published).to eq published
|
156
|
-
expect(feed.entries.size).to eq 2
|
157
|
-
end
|
158
|
-
# rubocop:enable Metrics/LineLength
|
159
|
-
end
|
160
|
-
|
161
|
-
context "when there's no available parser" do
|
162
|
-
it 'raises Feedjira::NoParserAvailable' do
|
163
|
-
expect {
|
164
|
-
Feedjira::Feed.parse("I'm an invalid feed")
|
165
|
-
}.to raise_error(Feedjira::NoParserAvailable)
|
166
|
-
end
|
167
|
-
end
|
168
|
-
|
169
|
-
it 'should parse an feedburner rss feed' do
|
170
|
-
feed = Feedjira::Feed.parse(sample_rss_feed_burner_feed)
|
171
|
-
expect(feed.title).to eq 'TechCrunch'
|
172
|
-
published = Time.parse_safely 'Wed Nov 02 17:25:27 UTC 2011'
|
173
|
-
expect(feed.entries.first.published).to eq published
|
174
|
-
expect(feed.entries.size).to eq 20
|
175
|
-
end
|
176
|
-
end
|
177
|
-
|
178
|
-
describe '#determine_feed_parser_for_xml' do
|
179
|
-
it 'with Google Docs atom feed it returns the GoogleDocsAtom parser' do
|
180
|
-
xml = sample_google_docs_list_feed
|
181
|
-
actual_parser = Feedjira::Feed.determine_feed_parser_for_xml xml
|
182
|
-
expect(actual_parser).to eq Feedjira::Parser::GoogleDocsAtom
|
183
|
-
end
|
184
|
-
|
185
|
-
it 'with an atom feed it returns the Atom parser' do
|
186
|
-
xml = sample_atom_feed
|
187
|
-
actual_parser = Feedjira::Feed.determine_feed_parser_for_xml xml
|
188
|
-
expect(actual_parser).to eq Feedjira::Parser::Atom
|
189
|
-
end
|
190
|
-
|
191
|
-
it 'with an atom feedburner feed it returns the AtomFeedBurner parser' do
|
192
|
-
xml = sample_feedburner_atom_feed
|
193
|
-
actual_parser = Feedjira::Feed.determine_feed_parser_for_xml xml
|
194
|
-
expect(actual_parser).to eq Feedjira::Parser::AtomFeedBurner
|
195
|
-
end
|
196
|
-
|
197
|
-
it 'with an rdf feed it returns the RSS parser' do
|
198
|
-
xml = sample_rdf_feed
|
199
|
-
actual_parser = Feedjira::Feed.determine_feed_parser_for_xml xml
|
200
|
-
expect(actual_parser).to eq Feedjira::Parser::RSS
|
201
|
-
end
|
202
|
-
|
203
|
-
it 'with an rss feedburner feed it returns the RSSFeedBurner parser' do
|
204
|
-
xml = sample_rss_feed_burner_feed
|
205
|
-
actual_parser = Feedjira::Feed.determine_feed_parser_for_xml xml
|
206
|
-
expect(actual_parser).to eq Feedjira::Parser::RSSFeedBurner
|
207
|
-
end
|
208
|
-
|
209
|
-
it 'with an rss 2.0 feed it returns the RSS parser' do
|
210
|
-
xml = sample_rss_feed
|
211
|
-
actual_parser = Feedjira::Feed.determine_feed_parser_for_xml xml
|
212
|
-
expect(actual_parser).to eq Feedjira::Parser::RSS
|
213
|
-
end
|
214
|
-
|
215
|
-
it 'with an itunes feed it returns the RSS parser' do
|
216
|
-
xml = sample_itunes_feed
|
217
|
-
actual_parser = Feedjira::Feed.determine_feed_parser_for_xml xml
|
218
|
-
expect(actual_parser).to eq Feedjira::Parser::ITunesRSS
|
219
|
-
end
|
220
|
-
end
|
221
|
-
|
222
|
-
describe 'when adding feed types' do
|
223
|
-
it 'should prioritize added types over the built in ones' do
|
224
|
-
xml = 'Atom asdf'
|
225
|
-
allow(Feedjira::Parser::Atom).to receive(:able_to_parse?).and_return(true)
|
226
|
-
new_parser = Class.new do
|
227
|
-
def self.able_to_parse?(_)
|
228
|
-
true
|
229
|
-
end
|
230
|
-
end
|
231
|
-
|
232
|
-
expect(new_parser).to be_able_to_parse(xml)
|
233
|
-
|
234
|
-
Feedjira::Feed.add_feed_class(new_parser)
|
235
|
-
|
236
|
-
parser = Feedjira::Feed.determine_feed_parser_for_xml xml
|
237
|
-
expect(parser).to eq new_parser
|
238
|
-
|
239
|
-
Feedjira::Feed.reset_parsers!
|
240
|
-
end
|
241
|
-
end
|
242
|
-
|
243
|
-
describe 'when parsers are configured' do
|
244
|
-
it 'does not use default parsers' do
|
245
|
-
xml = 'Atom asdf'
|
246
|
-
new_parser = Class.new do
|
247
|
-
def self.able_to_parse?(_)
|
248
|
-
true
|
249
|
-
end
|
250
|
-
end
|
251
|
-
|
252
|
-
Feedjira.configure { |config| config.parsers = [new_parser] }
|
253
|
-
|
254
|
-
parser = Feedjira::Feed.determine_feed_parser_for_xml(xml)
|
255
|
-
expect(parser).to eq(new_parser)
|
256
|
-
|
257
|
-
Feedjira.reset_configuration!
|
258
|
-
Feedjira::Feed.reset_parsers!
|
259
|
-
end
|
260
|
-
end
|
261
51
|
end
|
262
|
-
|
263
|
-
# rubocop:enable Style/BlockDelimiters
|