feedjira 2.2.0 → 3.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE/feed-parsing.md +15 -0
- data/.rubocop.yml +32 -8
- data/.rubocop_todo.yml +11 -0
- data/.travis.yml +3 -7
- data/CHANGELOG.md +18 -9
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +8 -5
- data/README.md +46 -99
- data/Rakefile +8 -6
- data/feedjira.gemspec +31 -20
- data/lib/feedjira.rb +75 -41
- data/lib/feedjira/atom_entry_utilities.rb +51 -0
- data/lib/feedjira/configuration.rb +8 -10
- data/lib/feedjira/core_ext.rb +5 -3
- data/lib/feedjira/core_ext/date.rb +2 -1
- data/lib/feedjira/core_ext/string.rb +2 -1
- data/lib/feedjira/core_ext/time.rb +12 -12
- data/lib/feedjira/date_time_utilities.rb +8 -10
- data/lib/feedjira/date_time_utilities/date_time_epoch_parser.rb +3 -2
- data/lib/feedjira/date_time_utilities/date_time_language_parser.rb +4 -4
- data/lib/feedjira/date_time_utilities/date_time_pattern_parser.rb +11 -15
- data/lib/feedjira/feed.rb +12 -82
- data/lib/feedjira/feed_entry_utilities.rb +14 -7
- data/lib/feedjira/feed_utilities.rb +5 -4
- data/lib/feedjira/parser.rb +6 -1
- data/lib/feedjira/parser/atom.rb +6 -5
- data/lib/feedjira/parser/atom_entry.rb +4 -21
- data/lib/feedjira/parser/atom_feed_burner.rb +7 -6
- data/lib/feedjira/parser/atom_feed_burner_entry.rb +7 -18
- data/lib/feedjira/parser/atom_google_alerts.rb +26 -0
- data/lib/feedjira/parser/atom_google_alerts_entry.rb +21 -0
- data/lib/feedjira/parser/atom_youtube.rb +4 -3
- data/lib/feedjira/parser/atom_youtube_entry.rb +9 -8
- data/lib/feedjira/parser/globally_unique_identifier.rb +21 -0
- data/lib/feedjira/parser/google_docs_atom.rb +6 -6
- data/lib/feedjira/parser/google_docs_atom_entry.rb +3 -19
- data/lib/feedjira/parser/itunes_rss.rb +4 -3
- data/lib/feedjira/parser/itunes_rss_category.rb +6 -5
- data/lib/feedjira/parser/itunes_rss_item.rb +5 -8
- data/lib/feedjira/parser/itunes_rss_owner.rb +2 -1
- data/lib/feedjira/parser/json_feed.rb +41 -0
- data/lib/feedjira/parser/json_feed_item.rb +57 -0
- data/lib/feedjira/parser/podlove_chapter.rb +4 -3
- data/lib/feedjira/parser/rss.rb +5 -3
- data/lib/feedjira/parser/rss_entry.rb +3 -24
- data/lib/feedjira/parser/rss_feed_burner.rb +4 -3
- data/lib/feedjira/parser/rss_feed_burner_entry.rb +6 -26
- data/lib/feedjira/parser/rss_image.rb +2 -0
- data/lib/feedjira/preprocessor.rb +4 -4
- data/lib/feedjira/rss_entry_utilities.rb +53 -0
- data/lib/feedjira/version.rb +3 -1
- data/spec/feedjira/configuration_spec.rb +11 -16
- data/spec/feedjira/date_time_utilities_spec.rb +22 -20
- data/spec/feedjira/feed_entry_utilities_spec.rb +20 -18
- data/spec/feedjira/feed_spec.rb +17 -229
- data/spec/feedjira/feed_utilities_spec.rb +75 -73
- data/spec/feedjira/parser/atom_entry_spec.rb +41 -38
- data/spec/feedjira/parser/atom_feed_burner_entry_spec.rb +22 -20
- data/spec/feedjira/parser/atom_feed_burner_spec.rb +122 -118
- data/spec/feedjira/parser/atom_google_alerts_entry_spec.rb +34 -0
- data/spec/feedjira/parser/atom_google_alerts_spec.rb +62 -0
- data/spec/feedjira/parser/atom_spec.rb +83 -77
- data/spec/feedjira/parser/atom_youtube_entry_spec.rb +41 -39
- data/spec/feedjira/parser/atom_youtube_spec.rb +21 -19
- data/spec/feedjira/parser/google_docs_atom_entry_spec.rb +10 -8
- data/spec/feedjira/parser/google_docs_atom_spec.rb +25 -21
- data/spec/feedjira/parser/itunes_rss_item_spec.rb +39 -37
- data/spec/feedjira/parser/itunes_rss_owner_spec.rb +7 -5
- data/spec/feedjira/parser/itunes_rss_spec.rb +120 -116
- data/spec/feedjira/parser/json_feed_item_spec.rb +81 -0
- data/spec/feedjira/parser/json_feed_spec.rb +55 -0
- data/spec/feedjira/parser/podlove_chapter_spec.rb +14 -12
- data/spec/feedjira/parser/rss_entry_spec.rb +56 -34
- data/spec/feedjira/parser/rss_feed_burner_entry_spec.rb +36 -34
- data/spec/feedjira/parser/rss_feed_burner_spec.rb +49 -45
- data/spec/feedjira/parser/rss_spec.rb +38 -36
- data/spec/feedjira/preprocessor_spec.rb +9 -7
- data/spec/feedjira_spec.rb +166 -0
- data/spec/sample_feeds.rb +32 -29
- data/spec/sample_feeds/HuffPostCanada.xml +279 -0
- data/spec/sample_feeds/Permalinks.xml +22 -0
- data/spec/sample_feeds/a10.xml +72 -0
- data/spec/sample_feeds/google_alerts_atom.xml +1 -0
- data/spec/sample_feeds/json_feed.json +156 -0
- data/spec/spec_helper.rb +7 -5
- metadata +59 -70
- data/Dangerfile +0 -1
- data/fixtures/vcr_cassettes/fetch_failure.yml +0 -62
- data/fixtures/vcr_cassettes/parse_error.yml +0 -222
- data/fixtures/vcr_cassettes/success.yml +0 -281
|
@@ -1,4 +1,6 @@
|
|
|
1
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "spec_helper"
|
|
2
4
|
|
|
3
5
|
describe Feedjira::Parser::AtomFeedBurnerEntry do
|
|
4
6
|
before(:each) do
|
|
@@ -10,45 +12,45 @@ describe Feedjira::Parser::AtomFeedBurnerEntry do
|
|
|
10
12
|
@entry = feed.entries.first
|
|
11
13
|
end
|
|
12
14
|
|
|
13
|
-
it
|
|
14
|
-
expect(@entry.title).to eq
|
|
15
|
+
it "should parse the title" do
|
|
16
|
+
expect(@entry.title).to eq "Making a Ruby C library even faster"
|
|
15
17
|
end
|
|
16
18
|
|
|
17
|
-
it "should be able to fetch a url via the 'alternate' rel if no origLink exists" do
|
|
18
|
-
xml = File.read("#{File.dirname(__FILE__)}/../../sample_feeds/PaulDixExplainsNothingAlternate.xml")
|
|
19
|
+
it "should be able to fetch a url via the 'alternate' rel if no origLink exists" do
|
|
20
|
+
xml = File.read("#{File.dirname(__FILE__)}/../../sample_feeds/PaulDixExplainsNothingAlternate.xml")
|
|
19
21
|
entry = Feedjira::Parser::AtomFeedBurner.parse(xml).entries.first
|
|
20
|
-
expect(entry.url).to eq
|
|
22
|
+
expect(entry.url).to eq("http://feeds.feedburner.com/~r/PaulDixExplainsNothing/~3/519925023/making-a-ruby-c-library-even-faster.html")
|
|
21
23
|
end
|
|
22
24
|
|
|
23
|
-
it
|
|
24
|
-
expect(@entry.url).to eq
|
|
25
|
+
it "should parse the url" do
|
|
26
|
+
expect(@entry.url).to eq "http://www.pauldix.net/2009/01/making-a-ruby-c-library-even-faster.html"
|
|
25
27
|
end
|
|
26
28
|
|
|
27
|
-
it
|
|
28
|
-
xml = File.read("#{File.dirname(__FILE__)}/../../sample_feeds/FeedBurnerUrlNoAlternate.xml")
|
|
29
|
+
it "should parse the url when there is no alternate" do
|
|
30
|
+
xml = File.read("#{File.dirname(__FILE__)}/../../sample_feeds/FeedBurnerUrlNoAlternate.xml")
|
|
29
31
|
entry = Feedjira::Parser::AtomFeedBurner.parse(xml).entries.first
|
|
30
|
-
expect(entry.url).to eq
|
|
32
|
+
expect(entry.url).to eq "http://example.com/QQQQ.html"
|
|
31
33
|
end
|
|
32
34
|
|
|
33
|
-
it
|
|
34
|
-
expect(@entry.author).to eq
|
|
35
|
+
it "should parse the author" do
|
|
36
|
+
expect(@entry.author).to eq "Paul Dix"
|
|
35
37
|
end
|
|
36
38
|
|
|
37
|
-
it
|
|
39
|
+
it "should parse the content" do
|
|
38
40
|
expect(@entry.content).to eq sample_feedburner_atom_entry_content
|
|
39
41
|
end
|
|
40
42
|
|
|
41
|
-
it
|
|
42
|
-
summary = "Last week I released the first version of a SAX based XML parsing library called SAX-Machine. It uses Nokogiri, which uses libxml, so it's pretty fast. However, I felt that it could be even faster. The only question was how..."
|
|
43
|
+
it "should provide a summary" do
|
|
44
|
+
summary = "Last week I released the first version of a SAX based XML parsing library called SAX-Machine. It uses Nokogiri, which uses libxml, so it's pretty fast. However, I felt that it could be even faster. The only question was how..."
|
|
43
45
|
expect(@entry.summary).to eq summary
|
|
44
46
|
end
|
|
45
47
|
|
|
46
|
-
it
|
|
47
|
-
published = Time.parse_safely
|
|
48
|
+
it "should parse the published date" do
|
|
49
|
+
published = Time.parse_safely "Thu Jan 22 15:50:22 UTC 2009"
|
|
48
50
|
expect(@entry.published).to eq published
|
|
49
51
|
end
|
|
50
52
|
|
|
51
|
-
it
|
|
52
|
-
expect(@entry.categories).to eq [
|
|
53
|
+
it "should parse the categories" do
|
|
54
|
+
expect(@entry.categories).to eq ["Ruby", "Another Category"]
|
|
53
55
|
end
|
|
54
56
|
end
|
|
@@ -1,124 +1,128 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "spec_helper"
|
|
4
|
+
|
|
5
|
+
module Feedjira
|
|
6
|
+
module Parser
|
|
7
|
+
describe "#will_parse?" do
|
|
8
|
+
it "should return true for a feedburner atom feed" do
|
|
9
|
+
expect(AtomFeedBurner).to be_able_to_parse(sample_feedburner_atom_feed)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
it "should return false for an rdf feed" do
|
|
13
|
+
expect(AtomFeedBurner).to_not be_able_to_parse(sample_rdf_feed)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
it "should return false for a regular atom feed" do
|
|
17
|
+
expect(AtomFeedBurner).to_not be_able_to_parse(sample_atom_feed)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
it "should return false for an rss feedburner feed" do
|
|
21
|
+
expect(AtomFeedBurner).to_not be_able_to_parse sample_rss_feed_burner_feed
|
|
22
|
+
end
|
|
19
23
|
end
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
24
|
+
|
|
25
|
+
describe "parsing old style feeds" do
|
|
26
|
+
before(:each) do
|
|
27
|
+
@feed = AtomFeedBurner.parse(sample_feedburner_atom_feed)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
it "should parse the title" do
|
|
31
|
+
expect(@feed.title).to eq "Paul Dix Explains Nothing"
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
it "should parse the description" do
|
|
35
|
+
description = "Entrepreneurship, programming, software development, politics, NYC, and random thoughts."
|
|
36
|
+
expect(@feed.description).to eq description
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
it "should parse the url" do
|
|
40
|
+
expect(@feed.url).to eq "http://www.pauldix.net/"
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
it "should parse the feed_url" do
|
|
44
|
+
expect(@feed.feed_url).to eq "http://feeds.feedburner.com/PaulDixExplainsNothing"
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
it "should parse no hub urls" do
|
|
48
|
+
expect(@feed.hubs.count).to eq 0
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
it "should parse hub urls" do
|
|
52
|
+
AtomFeedBurner.preprocess_xml = false
|
|
53
|
+
feed_with_hub = AtomFeedBurner.parse(load_sample("TypePadNews.xml"))
|
|
54
|
+
expect(feed_with_hub.hubs.count).to eq 1
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
it "should parse entries" do
|
|
58
|
+
expect(@feed.entries.size).to eq 5
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
it "should change url" do
|
|
62
|
+
new_url = "http://some.url.com"
|
|
63
|
+
expect { @feed.url = new_url }.not_to raise_error
|
|
64
|
+
expect(@feed.url).to eq new_url
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
it "should change feed_url" do
|
|
68
|
+
new_url = "http://some.url.com"
|
|
69
|
+
expect { @feed.feed_url = new_url }.not_to raise_error
|
|
70
|
+
expect(@feed.feed_url).to eq new_url
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
describe "parsing alternate style feeds" do
|
|
75
|
+
before(:each) do
|
|
76
|
+
@feed = AtomFeedBurner.parse(sample_feedburner_atom_feed_alternate)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
it "should parse the title" do
|
|
80
|
+
expect(@feed.title).to eq "Giant Robots Smashing Into Other Giant Robots"
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
it "should parse the description" do
|
|
84
|
+
description = "Written by thoughtbot"
|
|
85
|
+
expect(@feed.description).to eq description
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
it "should parse the url" do
|
|
89
|
+
expect(@feed.url).to eq "https://robots.thoughtbot.com"
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
it "should parse the feed_url" do
|
|
93
|
+
expect(@feed.feed_url).to eq "http://feeds.feedburner.com/GiantRobotsSmashingIntoOtherGiantRobots"
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
it "should parse hub urls" do
|
|
97
|
+
expect(@feed.hubs.count).to eq 1
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
it "should parse entries" do
|
|
101
|
+
expect(@feed.entries.size).to eq 3
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
it "should change url" do
|
|
105
|
+
new_url = "http://some.url.com"
|
|
106
|
+
expect { @feed.url = new_url }.not_to raise_error
|
|
107
|
+
expect(@feed.url).to eq new_url
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
it "should change feed_url" do
|
|
111
|
+
new_url = "http://some.url.com"
|
|
112
|
+
expect { @feed.feed_url = new_url }.not_to raise_error
|
|
113
|
+
expect(@feed.feed_url).to eq new_url
|
|
114
|
+
end
|
|
62
115
|
end
|
|
63
116
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
@feed = AtomFeedBurner.parse(sample_feedburner_atom_feed_alternate)
|
|
74
|
-
end
|
|
75
|
-
|
|
76
|
-
it 'should parse the title' do
|
|
77
|
-
expect(@feed.title).to eq 'Giant Robots Smashing Into Other Giant Robots'
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
it 'should parse the description' do
|
|
81
|
-
description = 'Written by thoughtbot'
|
|
82
|
-
expect(@feed.description).to eq description
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
it 'should parse the url' do
|
|
86
|
-
expect(@feed.url).to eq 'https://robots.thoughtbot.com'
|
|
87
|
-
end
|
|
88
|
-
|
|
89
|
-
it 'should parse the feed_url' do
|
|
90
|
-
expect(@feed.feed_url).to eq 'http://feeds.feedburner.com/GiantRobotsSmashingIntoOtherGiantRobots'
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
it 'should parse hub urls' do
|
|
94
|
-
expect(@feed.hubs.count).to eq 1
|
|
95
|
-
end
|
|
96
|
-
|
|
97
|
-
it 'should parse entries' do
|
|
98
|
-
expect(@feed.entries.size).to eq 3
|
|
99
|
-
end
|
|
100
|
-
|
|
101
|
-
it 'should change url' do
|
|
102
|
-
new_url = 'http://some.url.com'
|
|
103
|
-
expect { @feed.url = new_url }.not_to raise_error
|
|
104
|
-
expect(@feed.url).to eq new_url
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
it 'should change feed_url' do
|
|
108
|
-
new_url = 'http://some.url.com'
|
|
109
|
-
expect { @feed.feed_url = new_url }.not_to raise_error
|
|
110
|
-
expect(@feed.feed_url).to eq new_url
|
|
111
|
-
end
|
|
112
|
-
end
|
|
113
|
-
|
|
114
|
-
describe 'preprocessing' do
|
|
115
|
-
it 'retains markup in xhtml content' do
|
|
116
|
-
AtomFeedBurner.preprocess_xml = true
|
|
117
|
-
|
|
118
|
-
feed = AtomFeedBurner.parse sample_feed_burner_atom_xhtml_feed
|
|
119
|
-
entry = feed.entries.first
|
|
120
|
-
|
|
121
|
-
expect(entry.content).to match(/\A\<p/)
|
|
117
|
+
describe "preprocessing" do
|
|
118
|
+
it "retains markup in xhtml content" do
|
|
119
|
+
AtomFeedBurner.preprocess_xml = true
|
|
120
|
+
|
|
121
|
+
feed = AtomFeedBurner.parse sample_feed_burner_atom_xhtml_feed
|
|
122
|
+
entry = feed.entries.first
|
|
123
|
+
|
|
124
|
+
expect(entry.content).to match(/\A<p/)
|
|
125
|
+
end
|
|
122
126
|
end
|
|
123
127
|
end
|
|
124
128
|
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "spec_helper"
|
|
4
|
+
|
|
5
|
+
describe Feedjira::Parser::AtomGoogleAlertsEntry do
|
|
6
|
+
before(:each) do
|
|
7
|
+
feed = Feedjira::Parser::AtomGoogleAlerts.parse sample_google_alerts_atom_feed
|
|
8
|
+
@entry = feed.entries.first
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
it "should parse the title" do
|
|
12
|
+
expect(@entry.title).to eq "Report offers Prediction of Automotive Slack Market by Top key players like Haldex, Meritor, Bendix ..."
|
|
13
|
+
expect(@entry.raw_title).to eq "Report offers Prediction of Automotive <b>Slack</b> Market by Top key players like Haldex, Meritor, Bendix ..."
|
|
14
|
+
expect(@entry.title_type).to eq "html"
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
it "should parse the url" do
|
|
18
|
+
expect(@entry.url).to eq "https://www.aglobalmarketresearch.com/report-offers-prediction-of-automotive-slack-market-by-top-key-players-like-haldex-meritor-bendix-mei-wabco-accuride-stemco-tbk-febi-aydinsan/"
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
it "should parse the content" do
|
|
22
|
+
expect(@entry.content).to eq "Automotive <b>Slack</b> Market reports provides a comprehensive overview of the global market size and share. It provides strategists, marketers and senior ..."
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
it "should parse the published date" do
|
|
26
|
+
published = Time.parse_safely "2019-07-10T11:53:37Z"
|
|
27
|
+
expect(@entry.published).to eq published
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
it "should parse the updated date" do
|
|
31
|
+
updated = Time.parse_safely "2019-07-10T11:53:37Z"
|
|
32
|
+
expect(@entry.updated).to eq updated
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "spec_helper"
|
|
4
|
+
|
|
5
|
+
module Feedjira
|
|
6
|
+
module Parser
|
|
7
|
+
describe "#able_to_parse?" do
|
|
8
|
+
it "should return true for a Google Alerts atom feed" do
|
|
9
|
+
expect(AtomGoogleAlerts).to be_able_to_parse(sample_google_alerts_atom_feed)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
it "should return false for an rdf feed" do
|
|
13
|
+
expect(AtomGoogleAlerts).to_not be_able_to_parse(sample_rdf_feed)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
it "should return false for a regular atom feed" do
|
|
17
|
+
expect(AtomGoogleAlerts).to_not be_able_to_parse(sample_atom_feed)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
it "should return false for a feedburner atom feed" do
|
|
21
|
+
expect(AtomGoogleAlerts).to_not be_able_to_parse(sample_feedburner_atom_feed)
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
describe "parsing" do
|
|
26
|
+
before(:each) do
|
|
27
|
+
@feed = AtomGoogleAlerts.parse(sample_google_alerts_atom_feed)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
it "should parse the title" do
|
|
31
|
+
expect(@feed.title).to eq "Google Alert - Slack"
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
it "should parse the descripton" do
|
|
35
|
+
expect(@feed.description).to be_nil
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
it "should parse the url" do
|
|
39
|
+
expect(@feed.url).to eq "https://www.google.com/alerts/feeds/04175468913983673025/4428013283581841004"
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
it "should parse the feed_url" do
|
|
43
|
+
expect(@feed.feed_url).to eq "https://www.google.com/alerts/feeds/04175468913983673025/4428013283581841004"
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
it "should parse entries" do
|
|
47
|
+
expect(@feed.entries.size).to eq 20
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
describe "preprocessing" do
|
|
52
|
+
it "retains markup in xhtml content" do
|
|
53
|
+
AtomGoogleAlerts.preprocess_xml = true
|
|
54
|
+
|
|
55
|
+
feed = AtomGoogleAlerts.parse sample_google_alerts_atom_feed
|
|
56
|
+
entry = feed.entries.first
|
|
57
|
+
|
|
58
|
+
expect(entry.content).to include("<b>Slack</b>")
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
@@ -1,104 +1,110 @@
|
|
|
1
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
describe '#will_parse?' do
|
|
5
|
-
it 'should return true for an atom feed' do
|
|
6
|
-
expect(Atom).to be_able_to_parse(sample_atom_feed)
|
|
7
|
-
end
|
|
3
|
+
require "spec_helper"
|
|
8
4
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
5
|
+
module Feedjira
|
|
6
|
+
module Parser
|
|
7
|
+
describe "#will_parse?" do
|
|
8
|
+
it "should return true for an atom feed" do
|
|
9
|
+
expect(Atom).to be_able_to_parse(sample_atom_feed)
|
|
10
|
+
end
|
|
12
11
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
12
|
+
it "should return false for an rdf feed" do
|
|
13
|
+
expect(Atom).to_not be_able_to_parse(sample_rdf_feed)
|
|
14
|
+
end
|
|
16
15
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
end
|
|
16
|
+
it "should return false for an rss feedburner feed" do
|
|
17
|
+
expect(Atom).to_not be_able_to_parse(sample_rss_feed_burner_feed)
|
|
18
|
+
end
|
|
21
19
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
20
|
+
it "should return true for an atom feed that has line breaks in between attributes in the <feed> node" do
|
|
21
|
+
expect(Atom).to be_able_to_parse(sample_atom_feed_line_breaks)
|
|
22
|
+
end
|
|
25
23
|
end
|
|
26
24
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
25
|
+
describe "parsing" do
|
|
26
|
+
before(:each) do
|
|
27
|
+
@feed = Atom.parse(sample_atom_feed)
|
|
28
|
+
end
|
|
30
29
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
end
|
|
30
|
+
it "should parse the title" do
|
|
31
|
+
expect(@feed.title).to eq "Amazon Web Services Blog"
|
|
32
|
+
end
|
|
35
33
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
34
|
+
it "should parse the description" do
|
|
35
|
+
description = "Amazon Web Services, Products, Tools, and Developer Information..."
|
|
36
|
+
expect(@feed.description).to eq description
|
|
37
|
+
end
|
|
39
38
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
expect(feed.url).to eq 'http://www.innoq.com/planet/'
|
|
44
|
-
end
|
|
39
|
+
it "should parse the url" do
|
|
40
|
+
expect(@feed.url).to eq "http://aws.typepad.com/aws/"
|
|
41
|
+
end
|
|
45
42
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
43
|
+
it "should parse the url even when it doesn't have the type='text/html' attribute" do
|
|
44
|
+
xml = load_sample "atom_with_link_tag_for_url_unmarked.xml"
|
|
45
|
+
feed = Atom.parse xml
|
|
46
|
+
expect(feed.url).to eq "http://www.innoq.com/planet/"
|
|
47
|
+
end
|
|
50
48
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
49
|
+
it "should parse the feed_url even when it doesn't have the type='application/atom+xml' attribute" do
|
|
50
|
+
feed = Atom.parse(load_sample("atom_with_link_tag_for_url_unmarked.xml"))
|
|
51
|
+
expect(feed.feed_url).to eq "http://www.innoq.com/planet/atom.xml"
|
|
52
|
+
end
|
|
54
53
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
54
|
+
it "should parse the feed_url" do
|
|
55
|
+
expect(@feed.feed_url).to eq "http://aws.typepad.com/aws/atom.xml"
|
|
56
|
+
end
|
|
58
57
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
expect(feed_with_hub.hubs.first).to eq 'http://pubsubhubbub.appspot.com/'
|
|
63
|
-
end
|
|
58
|
+
it "should parse no hub urls" do
|
|
59
|
+
expect(@feed.hubs.count).to eq 0
|
|
60
|
+
end
|
|
64
61
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
62
|
+
it "should parse the hub urls" do
|
|
63
|
+
feed_with_hub = Atom.parse(load_sample("SamRuby.xml"))
|
|
64
|
+
expect(feed_with_hub.hubs.count).to eq 1
|
|
65
|
+
expect(feed_with_hub.hubs.first).to eq "http://pubsubhubbub.appspot.com/"
|
|
66
|
+
end
|
|
69
67
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
68
|
+
it "should parse entries" do
|
|
69
|
+
expect(@feed.entries.size).to eq 10
|
|
70
|
+
end
|
|
71
|
+
end
|
|
73
72
|
|
|
74
|
-
|
|
75
|
-
|
|
73
|
+
describe "preprocessing" do
|
|
74
|
+
it "retains markup in xhtml content" do
|
|
75
|
+
Atom.preprocess_xml = true
|
|
76
76
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
expect(entry.content).to match(/\A\<p/)
|
|
80
|
-
end
|
|
77
|
+
feed = Atom.parse sample_atom_xhtml_feed
|
|
78
|
+
entry = feed.entries.first
|
|
81
79
|
|
|
82
|
-
|
|
83
|
-
|
|
80
|
+
expect(entry.raw_title).to match(/<i/)
|
|
81
|
+
expect(entry.title).to eq("Sentry Calming Collar for dogs")
|
|
82
|
+
expect(entry.title_type).to eq("xhtml")
|
|
83
|
+
expect(entry.summary).to match(/<b/)
|
|
84
|
+
expect(entry.content).to match(/\A<p/)
|
|
85
|
+
end
|
|
84
86
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
expect(content.css('img').length).to eq 11
|
|
88
|
-
end
|
|
89
|
-
end
|
|
87
|
+
it "should not duplicate content when there are divs in content" do
|
|
88
|
+
Atom.preprocess_xml = true
|
|
90
89
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
90
|
+
feed = Atom.parse sample_duplicate_content_atom_feed
|
|
91
|
+
content = Nokogiri::HTML(feed.entries[1].content)
|
|
92
|
+
expect(content.css("img").length).to eq 11
|
|
93
|
+
end
|
|
94
94
|
end
|
|
95
95
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
96
|
+
describe "parsing url and feed url based on rel attribute" do
|
|
97
|
+
before :each do
|
|
98
|
+
@feed = Atom.parse(sample_atom_middleman_feed)
|
|
99
|
+
end
|
|
99
100
|
|
|
100
|
-
|
|
101
|
-
|
|
101
|
+
it "should parse url" do
|
|
102
|
+
expect(@feed.url).to eq "http://feedjira.com/blog"
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
it "should parse feed url" do
|
|
106
|
+
expect(@feed.feed_url).to eq "http://feedjira.com/blog/feed.xml"
|
|
107
|
+
end
|
|
102
108
|
end
|
|
103
109
|
end
|
|
104
110
|
end
|