feedjira 3.1.1 → 3.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE/feed-parsing.md +15 -0
- data/.rubocop.yml +19 -627
- data/.rubocop_todo.yml +4 -29
- data/.travis.yml +0 -1
- data/CHANGELOG.md +5 -0
- data/Gemfile +4 -2
- data/Rakefile +3 -1
- data/feedjira.gemspec +6 -10
- data/lib/feedjira.rb +4 -3
- data/lib/feedjira/configuration.rb +5 -3
- data/lib/feedjira/core_ext.rb +2 -0
- data/lib/feedjira/core_ext/date.rb +3 -1
- data/lib/feedjira/core_ext/string.rb +2 -0
- data/lib/feedjira/core_ext/time.rb +2 -0
- data/lib/feedjira/date_time_utilities.rb +8 -8
- data/lib/feedjira/date_time_utilities/date_time_epoch_parser.rb +3 -0
- data/lib/feedjira/date_time_utilities/date_time_language_parser.rb +4 -2
- data/lib/feedjira/date_time_utilities/date_time_pattern_parser.rb +9 -9
- data/lib/feedjira/feed.rb +2 -0
- data/lib/feedjira/feed_entry_utilities.rb +5 -3
- data/lib/feedjira/feed_utilities.rb +5 -2
- data/lib/feedjira/parser.rb +2 -0
- data/lib/feedjira/parser/atom.rb +3 -1
- data/lib/feedjira/parser/atom_entry.rb +2 -0
- data/lib/feedjira/parser/atom_feed_burner.rb +4 -2
- data/lib/feedjira/parser/atom_feed_burner_entry.rb +2 -2
- data/lib/feedjira/parser/atom_google_alerts.rb +3 -1
- data/lib/feedjira/parser/atom_google_alerts_entry.rb +6 -5
- data/lib/feedjira/parser/atom_youtube.rb +2 -0
- data/lib/feedjira/parser/atom_youtube_entry.rb +2 -0
- data/lib/feedjira/parser/globally_unique_identifier.rb +2 -0
- data/lib/feedjira/parser/google_docs_atom.rb +4 -2
- data/lib/feedjira/parser/google_docs_atom_entry.rb +2 -0
- data/lib/feedjira/parser/itunes_rss.rb +4 -2
- data/lib/feedjira/parser/itunes_rss_category.rb +2 -0
- data/lib/feedjira/parser/itunes_rss_item.rb +3 -1
- data/lib/feedjira/parser/itunes_rss_owner.rb +2 -0
- data/lib/feedjira/parser/json_feed.rb +4 -2
- data/lib/feedjira/parser/json_feed_item.rb +7 -1
- data/lib/feedjira/parser/podlove_chapter.rb +3 -0
- data/lib/feedjira/parser/rss.rb +4 -1
- data/lib/feedjira/parser/rss_entry.rb +2 -0
- data/lib/feedjira/parser/rss_feed_burner.rb +3 -1
- data/lib/feedjira/parser/rss_feed_burner_entry.rb +2 -2
- data/lib/feedjira/parser/rss_image.rb +2 -0
- data/lib/feedjira/preprocessor.rb +3 -1
- data/lib/feedjira/rss_entry_utilities.rb +6 -2
- data/lib/feedjira/version.rb +3 -1
- data/spec/feedjira/configuration_spec.rb +2 -0
- data/spec/feedjira/date_time_utilities_spec.rb +2 -0
- data/spec/feedjira/feed_entry_utilities_spec.rb +4 -2
- data/spec/feedjira/feed_spec.rb +2 -0
- data/spec/feedjira/feed_utilities_spec.rb +5 -3
- data/spec/feedjira/parser/atom_entry_spec.rb +6 -4
- data/spec/feedjira/parser/atom_feed_burner_entry_spec.rb +7 -5
- data/spec/feedjira/parser/atom_feed_burner_spec.rb +4 -2
- data/spec/feedjira/parser/atom_google_alerts_entry_spec.rb +6 -4
- data/spec/feedjira/parser/atom_google_alerts_spec.rb +2 -0
- data/spec/feedjira/parser/atom_spec.rb +9 -7
- data/spec/feedjira/parser/atom_youtube_entry_spec.rb +6 -4
- data/spec/feedjira/parser/atom_youtube_spec.rb +6 -4
- data/spec/feedjira/parser/google_docs_atom_entry_spec.rb +2 -0
- data/spec/feedjira/parser/google_docs_atom_spec.rb +2 -0
- data/spec/feedjira/parser/itunes_rss_item_spec.rb +3 -1
- data/spec/feedjira/parser/itunes_rss_owner_spec.rb +2 -0
- data/spec/feedjira/parser/itunes_rss_spec.rb +11 -9
- data/spec/feedjira/parser/json_feed_item_spec.rb +5 -3
- data/spec/feedjira/parser/json_feed_spec.rb +2 -0
- data/spec/feedjira/parser/podlove_chapter_spec.rb +2 -0
- data/spec/feedjira/parser/rss_entry_spec.rb +6 -4
- data/spec/feedjira/parser/rss_feed_burner_entry_spec.rb +9 -7
- data/spec/feedjira/parser/rss_feed_burner_spec.rb +3 -1
- data/spec/feedjira/parser/rss_spec.rb +2 -0
- data/spec/feedjira/preprocessor_spec.rb +4 -2
- data/spec/feedjira_spec.rb +11 -0
- data/spec/sample_feeds.rb +4 -1
- data/spec/sample_feeds/a10.xml +72 -0
- data/spec/spec_helper.rb +3 -1
- metadata +13 -25
- data/Dangerfile +0 -1
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Feedjira
|
2
4
|
module Parser
|
3
5
|
# Parser for dealing with Feedburner Atom feeds.
|
@@ -13,7 +15,7 @@ module Feedjira
|
|
13
15
|
elements :entry, as: :entries, class: AtomGoogleAlertsEntry
|
14
16
|
|
15
17
|
def self.able_to_parse?(xml)
|
16
|
-
Atom.able_to_parse?(xml) && (%r{<id>tag:google\.com,2005:[^<]+/com\.google/alerts/} === xml) # rubocop:disable
|
18
|
+
Atom.able_to_parse?(xml) && (%r{<id>tag:google\.com,2005:[^<]+/com\.google/alerts/} === xml) # rubocop:disable Style/CaseEquality
|
17
19
|
end
|
18
20
|
|
19
21
|
def self.preprocess(xml)
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Feedjira
|
2
4
|
module Parser
|
3
5
|
# Parser for dealing with Feedburner Atom feed entries.
|
@@ -8,12 +10,11 @@ module Feedjira
|
|
8
10
|
|
9
11
|
def url
|
10
12
|
url = super
|
13
|
+
return unless url&.start_with?("https://www.google.com/url?")
|
11
14
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
cons && cons[1]
|
16
|
-
end
|
15
|
+
uri = URI(url)
|
16
|
+
cons = URI.decode_www_form(uri.query).assoc("url")
|
17
|
+
cons && cons[1]
|
17
18
|
end
|
18
19
|
end
|
19
20
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require File.expand_path("./atom", File.dirname(__FILE__))
|
2
4
|
module Feedjira
|
3
5
|
module Parser
|
@@ -7,7 +9,7 @@ module Feedjira
|
|
7
9
|
element :title
|
8
10
|
element :subtitle, as: :description
|
9
11
|
element :link, as: :url, value: :href, with: { type: "text/html" }
|
10
|
-
element :link, as: :feed_url, value: :href, with: { type: "application/atom+xml" }
|
12
|
+
element :link, as: :feed_url, value: :href, with: { type: "application/atom+xml" }
|
11
13
|
elements :link, as: :links, value: :href
|
12
14
|
elements :entry, as: :entries, class: GoogleDocsAtomEntry
|
13
15
|
|
@@ -16,7 +18,7 @@ module Feedjira
|
|
16
18
|
end
|
17
19
|
|
18
20
|
def self.able_to_parse?(xml) #:nodoc:
|
19
|
-
%r{<id>https?://docs\.google\.com
|
21
|
+
%r{<id>https?://docs\.google\.com/.*</id>} =~ xml
|
20
22
|
end
|
21
23
|
|
22
24
|
def feed_url
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Feedjira
|
2
4
|
module Parser
|
3
5
|
# iTunes is RSS 2.0 + some apple extensions
|
@@ -18,7 +20,7 @@ module Feedjira
|
|
18
20
|
element :language
|
19
21
|
element :lastBuildDate, as: :last_built
|
20
22
|
element :link, as: :url
|
21
|
-
element :managingEditor
|
23
|
+
element :managingEditor, as: :managing_editor
|
22
24
|
element :rss, as: :version, value: :version
|
23
25
|
element :title
|
24
26
|
element :ttl
|
@@ -61,7 +63,7 @@ module Feedjira
|
|
61
63
|
elements :item, as: :entries, class: ITunesRSSItem
|
62
64
|
|
63
65
|
def self.able_to_parse?(xml)
|
64
|
-
%r{xmlns:itunes\s?=\s?[
|
66
|
+
%r{xmlns:itunes\s?=\s?["']http://www\.itunes\.com/dtds/podcast-1\.0\.dtd["']}i =~ xml
|
65
67
|
end
|
66
68
|
end
|
67
69
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Feedjira
|
2
4
|
module Parser
|
3
5
|
# iTunes extensions to the standard RSS2.0 item
|
@@ -29,7 +31,7 @@ module Feedjira
|
|
29
31
|
element :enclosure, value: :length, as: :enclosure_length
|
30
32
|
element :enclosure, value: :type, as: :enclosure_type
|
31
33
|
element :enclosure, value: :url, as: :enclosure_url
|
32
|
-
elements "psc:chapter", as: :raw_chapters, class: Feedjira::Parser::PodloveChapter
|
34
|
+
elements "psc:chapter", as: :raw_chapters, class: Feedjira::Parser::PodloveChapter
|
33
35
|
|
34
36
|
# Podlove requires clients to re-order by start time in the
|
35
37
|
# event the publisher doesn't provide them in that
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Feedjira
|
2
4
|
module Parser
|
3
5
|
# Parser for dealing with JSON Feeds.
|
@@ -6,7 +8,7 @@ module Feedjira
|
|
6
8
|
include FeedUtilities
|
7
9
|
|
8
10
|
def self.able_to_parse?(json)
|
9
|
-
%r{https
|
11
|
+
%r{https://jsonfeed.org/version/} =~ json
|
10
12
|
end
|
11
13
|
|
12
14
|
def self.parse(json)
|
@@ -14,7 +16,7 @@ module Feedjira
|
|
14
16
|
end
|
15
17
|
|
16
18
|
attr_reader :json, :version, :title, :url, :feed_url, :description,
|
17
|
-
|
19
|
+
:expired, :entries
|
18
20
|
|
19
21
|
def initialize(json)
|
20
22
|
@json = json
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Feedjira
|
2
4
|
module Parser
|
3
5
|
# Parser for dealing with JSON Feed items.
|
@@ -5,7 +7,7 @@ module Feedjira
|
|
5
7
|
include FeedEntryUtilities
|
6
8
|
|
7
9
|
attr_reader :json, :entry_id, :url, :external_url, :title, :content, :summary,
|
8
|
-
|
10
|
+
:published, :updated, :image, :banner_image, :author, :categories
|
9
11
|
|
10
12
|
def initialize(json)
|
11
13
|
@json = json
|
@@ -27,11 +29,13 @@ module Feedjira
|
|
27
29
|
|
28
30
|
def parse_published(date_published)
|
29
31
|
return nil unless date_published
|
32
|
+
|
30
33
|
Time.parse_safely(date_published)
|
31
34
|
end
|
32
35
|
|
33
36
|
def parse_updated(date_modified)
|
34
37
|
return nil unless date_modified
|
38
|
+
|
35
39
|
Time.parse_safely(date_modified)
|
36
40
|
end
|
37
41
|
|
@@ -39,11 +43,13 @@ module Feedjira
|
|
39
43
|
# Prefer content_html unless it isn't included.
|
40
44
|
def parse_content(content_html, content_text)
|
41
45
|
return content_html unless content_html.nil?
|
46
|
+
|
42
47
|
content_text
|
43
48
|
end
|
44
49
|
|
45
50
|
def author_name(author_obj)
|
46
51
|
return nil if author_obj.nil?
|
52
|
+
|
47
53
|
author_obj["name"]
|
48
54
|
end
|
49
55
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Feedjira
|
2
4
|
module Parser
|
3
5
|
class PodloveChapter
|
@@ -10,6 +12,7 @@ module Feedjira
|
|
10
12
|
|
11
13
|
def start
|
12
14
|
return unless start_ntp
|
15
|
+
|
13
16
|
parts = start_ntp.split(":")
|
14
17
|
parts.reverse.to_enum.with_index.map do |part, index|
|
15
18
|
part.to_f * (60**index)
|
data/lib/feedjira/parser/rss.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Feedjira
|
2
4
|
module Parser
|
3
5
|
# Parser for dealing with RSS feeds.
|
@@ -10,6 +12,7 @@ module Feedjira
|
|
10
12
|
element :language
|
11
13
|
element :lastBuildDate, as: :last_built
|
12
14
|
element :link, as: :url
|
15
|
+
element :"a10:link", as: :url, value: :href
|
13
16
|
element :rss, as: :version, value: :version
|
14
17
|
element :title
|
15
18
|
element :ttl
|
@@ -19,7 +22,7 @@ module Feedjira
|
|
19
22
|
attr_accessor :feed_url
|
20
23
|
|
21
24
|
def self.able_to_parse?(xml)
|
22
|
-
(
|
25
|
+
(/<rss|<rdf/ =~ xml) && !(/feedburner/ =~ xml)
|
23
26
|
end
|
24
27
|
end
|
25
28
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Feedjira
|
2
4
|
module Parser
|
3
5
|
# Parser for dealing with RSS feeds.
|
@@ -14,7 +16,7 @@ module Feedjira
|
|
14
16
|
attr_accessor :feed_url
|
15
17
|
|
16
18
|
def self.able_to_parse?(xml) #:nodoc:
|
17
|
-
(
|
19
|
+
(/<rss|<rdf/ =~ xml) && (/feedburner/ =~ xml)
|
18
20
|
end
|
19
21
|
end
|
20
22
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Feedjira
|
2
4
|
module Parser
|
3
5
|
# Parser for dealing with RDF feed entries.
|
@@ -7,9 +9,7 @@ module Feedjira
|
|
7
9
|
include RSSEntryUtilities
|
8
10
|
|
9
11
|
element :"feedburner:origLink", as: :orig_link
|
10
|
-
# rubocop:disable Style/AccessModifierDeclarations
|
11
12
|
private :orig_link
|
12
|
-
# rubocop:enable Style/AccessModifierDeclarations
|
13
13
|
|
14
14
|
def url
|
15
15
|
orig_link || super
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Feedjira
|
2
4
|
class Preprocessor
|
3
5
|
def initialize(xml)
|
@@ -18,7 +20,7 @@ module Feedjira
|
|
18
20
|
end
|
19
21
|
|
20
22
|
def content_nodes
|
21
|
-
doc.search 'entry > content[type="xhtml"], entry > summary[type="xhtml"], entry > title[type="xhtml"]'
|
23
|
+
doc.search 'entry > content[type="xhtml"], entry > summary[type="xhtml"], entry > title[type="xhtml"]'
|
22
24
|
end
|
23
25
|
|
24
26
|
def raw_html(node)
|
@@ -7,12 +7,15 @@ module Feedjira
|
|
7
7
|
element :title
|
8
8
|
|
9
9
|
element :"content:encoded", as: :content
|
10
|
+
element :"a10:content", as: :content
|
10
11
|
element :description, as: :summary
|
11
12
|
|
12
13
|
element :link, as: :url
|
14
|
+
element :"a10:link", as: :url, value: :href
|
13
15
|
|
14
16
|
element :author
|
15
17
|
element :"dc:creator", as: :author
|
18
|
+
element :"a10:name", as: :author
|
16
19
|
|
17
20
|
element :pubDate, as: :published
|
18
21
|
element :pubdate, as: :published
|
@@ -22,6 +25,7 @@ module Feedjira
|
|
22
25
|
element :"dcterms:created", as: :published
|
23
26
|
|
24
27
|
element :"dcterms:modified", as: :updated
|
28
|
+
element :"a10:updated", as: :updated
|
25
29
|
|
26
30
|
element :guid, as: :entry_id, class: Feedjira::Parser::GloballyUniqueIdentifier
|
27
31
|
element :"dc:identifier", as: :dc_identifier
|
@@ -35,11 +39,11 @@ module Feedjira
|
|
35
39
|
end
|
36
40
|
|
37
41
|
def entry_id
|
38
|
-
@entry_id
|
42
|
+
@entry_id&.guid
|
39
43
|
end
|
40
44
|
|
41
45
|
def url
|
42
|
-
@url ||
|
46
|
+
@url || @entry_id&.url
|
43
47
|
end
|
44
48
|
|
45
49
|
def id
|
data/lib/feedjira/version.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "spec_helper"
|
2
4
|
|
3
5
|
describe Feedjira::FeedUtilities do
|
@@ -32,14 +34,14 @@ describe Feedjira::FeedUtilities do
|
|
32
34
|
end
|
33
35
|
|
34
36
|
it "should provide a sanitized title" do
|
35
|
-
new_title = "<script>this is not safe</script
|
37
|
+
new_title = "<script>this is not safe</script>#{@entry.title}"
|
36
38
|
@entry.title = new_title
|
37
39
|
scrubbed_title = Loofah.scrub_fragment(new_title, :prune).to_s
|
38
40
|
expect(@entry.title.sanitize).to eq scrubbed_title
|
39
41
|
end
|
40
42
|
|
41
43
|
it "should sanitize content in place" do
|
42
|
-
new_content = "<script
|
44
|
+
new_content = "<script>#{@entry.content}"
|
43
45
|
@entry.content = new_content.dup
|
44
46
|
|
45
47
|
scrubbed_content = Loofah.scrub_fragment(new_content, :prune).to_s
|
data/spec/feedjira/feed_spec.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "spec_helper"
|
2
4
|
|
3
5
|
describe Feedjira::FeedUtilities do
|
@@ -73,13 +75,13 @@ describe Feedjira::FeedUtilities do
|
|
73
75
|
expect(feed.last_modified.class).to eq Time
|
74
76
|
end
|
75
77
|
|
76
|
-
it "should return new_entries? as true when entries are put into new_entries" do
|
78
|
+
it "should return new_entries? as true when entries are put into new_entries" do
|
77
79
|
feed = @klass.new
|
78
80
|
feed.new_entries << :foo
|
79
81
|
expect(feed.new_entries?).to eq true
|
80
82
|
end
|
81
83
|
|
82
|
-
it "should return a last_modified value from the entry with the most recent published date if the last_modified date hasn't been set" do
|
84
|
+
it "should return a last_modified value from the entry with the most recent published date if the last_modified date hasn't been set" do
|
83
85
|
feed = Feedjira::Parser::Atom.new
|
84
86
|
entry = Feedjira::Parser::AtomEntry.new
|
85
87
|
entry.published = Time.now.to_s
|
@@ -87,7 +89,7 @@ describe Feedjira::FeedUtilities do
|
|
87
89
|
expect(feed.last_modified).to eq entry.published
|
88
90
|
end
|
89
91
|
|
90
|
-
it "should not throw an error if one of the entries has published date of nil" do
|
92
|
+
it "should not throw an error if one of the entries has published date of nil" do
|
91
93
|
feed = Feedjira::Parser::Atom.new
|
92
94
|
entry = Feedjira::Parser::AtomEntry.new
|
93
95
|
entry.published = Time.now.to_s
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "spec_helper"
|
2
4
|
|
3
5
|
describe Feedjira::Parser::AtomEntry do
|
@@ -32,7 +34,7 @@ describe Feedjira::Parser::AtomEntry do
|
|
32
34
|
end
|
33
35
|
|
34
36
|
it "should provide a summary" do
|
35
|
-
summary = "Late last year an entrepreneur from Turkey visited me at Amazon HQ in Seattle. We talked about his plans to use AWS as part of his new social video portal startup. I won't spill any beans before he's ready to..."
|
37
|
+
summary = "Late last year an entrepreneur from Turkey visited me at Amazon HQ in Seattle. We talked about his plans to use AWS as part of his new social video portal startup. I won't spill any beans before he's ready to..."
|
36
38
|
expect(@entry.summary).to eq summary
|
37
39
|
end
|
38
40
|
|
@@ -42,7 +44,7 @@ describe Feedjira::Parser::AtomEntry do
|
|
42
44
|
end
|
43
45
|
|
44
46
|
it "should parse the categories" do
|
45
|
-
expect(@entry.categories).to eq %w
|
47
|
+
expect(@entry.categories).to eq %w[Turkey Seattle]
|
46
48
|
end
|
47
49
|
|
48
50
|
it "should parse the updated date" do
|
@@ -69,7 +71,7 @@ describe Feedjira::Parser::AtomEntry do
|
|
69
71
|
|
70
72
|
expect(title_value).to eq "AWS Job: Architect & Designer Position in Turkey"
|
71
73
|
|
72
|
-
expected_fields = %w
|
74
|
+
expected_fields = %w[
|
73
75
|
author
|
74
76
|
categories
|
75
77
|
content
|
@@ -81,7 +83,7 @@ describe Feedjira::Parser::AtomEntry do
|
|
81
83
|
title_type
|
82
84
|
updated
|
83
85
|
url
|
84
|
-
|
86
|
+
]
|
85
87
|
expect(all_fields.sort).to eq expected_fields
|
86
88
|
end
|
87
89
|
|