feedjira 3.0.0.beta1 → 3.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/ISSUE_TEMPLATE/feed-parsing.md +15 -0
- data/.github/workflows/ruby.yml +39 -0
- data/.rubocop.yml +18 -628
- data/.rubocop_todo.yml +11 -0
- data/CHANGELOG.md +66 -9
- data/Gemfile +4 -1
- data/README.md +10 -1
- data/Rakefile +3 -1
- data/feedjira.gemspec +14 -12
- data/lib/feedjira.rb +9 -3
- data/lib/feedjira/atom_entry_utilities.rb +51 -0
- data/lib/feedjira/configuration.rb +6 -3
- data/lib/feedjira/core_ext.rb +2 -0
- data/lib/feedjira/core_ext/date.rb +3 -2
- data/lib/feedjira/core_ext/string.rb +2 -1
- data/lib/feedjira/core_ext/time.rb +10 -10
- data/lib/feedjira/date_time_utilities.rb +8 -10
- data/lib/feedjira/date_time_utilities/date_time_epoch_parser.rb +3 -2
- data/lib/feedjira/date_time_utilities/date_time_language_parser.rb +4 -4
- data/lib/feedjira/date_time_utilities/date_time_pattern_parser.rb +9 -13
- data/lib/feedjira/feed.rb +3 -3
- data/lib/feedjira/feed_entry_utilities.rb +15 -8
- data/lib/feedjira/feed_utilities.rb +5 -4
- data/lib/feedjira/parser.rb +2 -0
- data/lib/feedjira/parser/atom.rb +5 -7
- data/lib/feedjira/parser/atom_entry.rb +4 -21
- data/lib/feedjira/parser/atom_feed_burner.rb +4 -3
- data/lib/feedjira/parser/atom_feed_burner_entry.rb +7 -18
- data/lib/feedjira/parser/atom_google_alerts.rb +26 -0
- data/lib/feedjira/parser/atom_google_alerts_entry.rb +21 -0
- data/lib/feedjira/parser/atom_youtube.rb +2 -1
- data/lib/feedjira/parser/atom_youtube_entry.rb +8 -7
- data/lib/feedjira/parser/globally_unique_identifier.rb +21 -0
- data/lib/feedjira/parser/google_docs_atom.rb +4 -4
- data/lib/feedjira/parser/google_docs_atom_entry.rb +3 -19
- data/lib/feedjira/parser/itunes_rss.rb +4 -3
- data/lib/feedjira/parser/itunes_rss_category.rb +6 -5
- data/lib/feedjira/parser/itunes_rss_item.rb +5 -8
- data/lib/feedjira/parser/itunes_rss_owner.rb +2 -1
- data/lib/feedjira/parser/json_feed.rb +4 -2
- data/lib/feedjira/parser/json_feed_item.rb +7 -1
- data/lib/feedjira/parser/podlove_chapter.rb +3 -2
- data/lib/feedjira/parser/rss.rb +4 -2
- data/lib/feedjira/parser/rss_entry.rb +3 -28
- data/lib/feedjira/parser/rss_feed_burner.rb +3 -2
- data/lib/feedjira/parser/rss_feed_burner_entry.rb +6 -26
- data/lib/feedjira/parser/rss_image.rb +2 -0
- data/lib/feedjira/preprocessor.rb +3 -3
- data/lib/feedjira/rss_entry_utilities.rb +53 -0
- data/lib/feedjira/version.rb +3 -1
- data/spec/feedjira/configuration_spec.rb +5 -3
- data/spec/feedjira/date_time_utilities_spec.rb +2 -0
- data/spec/feedjira/feed_entry_utilities_spec.rb +4 -2
- data/spec/feedjira/feed_spec.rb +3 -1
- data/spec/feedjira/feed_utilities_spec.rb +5 -3
- data/spec/feedjira/parser/atom_entry_spec.rb +7 -4
- data/spec/feedjira/parser/atom_feed_burner_entry_spec.rb +7 -5
- data/spec/feedjira/parser/atom_feed_burner_spec.rb +4 -2
- data/spec/feedjira/parser/atom_google_alerts_entry_spec.rb +34 -0
- data/spec/feedjira/parser/atom_google_alerts_spec.rb +62 -0
- data/spec/feedjira/parser/atom_spec.rb +30 -9
- data/spec/feedjira/parser/atom_youtube_entry_spec.rb +8 -6
- data/spec/feedjira/parser/atom_youtube_spec.rb +6 -4
- data/spec/feedjira/parser/google_docs_atom_entry_spec.rb +2 -0
- data/spec/feedjira/parser/google_docs_atom_spec.rb +2 -0
- data/spec/feedjira/parser/itunes_rss_item_spec.rb +3 -1
- data/spec/feedjira/parser/itunes_rss_owner_spec.rb +2 -0
- data/spec/feedjira/parser/itunes_rss_spec.rb +11 -9
- data/spec/feedjira/parser/json_feed_item_spec.rb +5 -3
- data/spec/feedjira/parser/json_feed_spec.rb +2 -0
- data/spec/feedjira/parser/podlove_chapter_spec.rb +2 -0
- data/spec/feedjira/parser/rss_entry_spec.rb +26 -4
- data/spec/feedjira/parser/rss_feed_burner_entry_spec.rb +9 -7
- data/spec/feedjira/parser/rss_feed_burner_spec.rb +3 -1
- data/spec/feedjira/parser/rss_spec.rb +2 -0
- data/spec/feedjira/preprocessor_spec.rb +4 -2
- data/spec/feedjira_spec.rb +22 -1
- data/spec/sample_feeds.rb +7 -3
- data/spec/sample_feeds/InvalidDateFormat.xml +20 -0
- data/spec/sample_feeds/Permalinks.xml +22 -0
- data/spec/sample_feeds/a10.xml +72 -0
- data/spec/sample_feeds/atom_simple_single_entry.xml +17 -0
- data/spec/sample_feeds/atom_simple_single_entry_link_self.xml +17 -0
- data/spec/sample_feeds/google_alerts_atom.xml +1 -0
- data/spec/spec_helper.rb +3 -7
- metadata +44 -47
- data/.travis.yml +0 -37
- data/Dangerfile +0 -1
@@ -1,31 +1,15 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
3
|
module Feedjira
|
4
4
|
module Parser
|
5
5
|
class GoogleDocsAtomEntry
|
6
6
|
include SAXMachine
|
7
7
|
include FeedEntryUtilities
|
8
|
+
include AtomEntryUtilities
|
8
9
|
|
9
|
-
element :title
|
10
|
-
element :link, as: :url, value: :href, with: { type: "text/html", rel: "alternate" } # rubocop:disable Metrics/LineLength
|
11
|
-
element :name, as: :author
|
12
|
-
element :content
|
13
|
-
element :summary
|
14
|
-
element :published
|
15
|
-
element :id, as: :entry_id
|
16
|
-
element :created, as: :published
|
17
|
-
element :issued, as: :published
|
18
|
-
element :updated
|
19
|
-
element :modified, as: :updated
|
20
|
-
elements :category, as: :categories, value: :term
|
21
|
-
elements :link, as: :links, value: :href
|
22
10
|
element :"docs:md5Checksum", as: :checksum
|
23
11
|
element :"docs:filename", as: :original_filename
|
24
12
|
element :"docs:suggestedFilename", as: :suggested_filename
|
25
|
-
|
26
|
-
def url
|
27
|
-
@url ||= links.first
|
28
|
-
end
|
29
13
|
end
|
30
14
|
end
|
31
15
|
end
|
@@ -1,4 +1,5 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Feedjira
|
3
4
|
module Parser
|
4
5
|
# iTunes is RSS 2.0 + some apple extensions
|
@@ -19,7 +20,7 @@ module Feedjira
|
|
19
20
|
element :language
|
20
21
|
element :lastBuildDate, as: :last_built
|
21
22
|
element :link, as: :url
|
22
|
-
element :managingEditor
|
23
|
+
element :managingEditor, as: :managing_editor
|
23
24
|
element :rss, as: :version, value: :version
|
24
25
|
element :title
|
25
26
|
element :ttl
|
@@ -62,7 +63,7 @@ module Feedjira
|
|
62
63
|
elements :item, as: :entries, class: ITunesRSSItem
|
63
64
|
|
64
65
|
def self.able_to_parse?(xml)
|
65
|
-
%r{xmlns:itunes\s?=\s?[
|
66
|
+
%r{xmlns:itunes\s?=\s?["']http://www\.itunes\.com/dtds/podcast-1\.0\.dtd["']}i =~ xml
|
66
67
|
end
|
67
68
|
end
|
68
69
|
end
|
@@ -1,4 +1,5 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Feedjira
|
3
4
|
module Parser
|
4
5
|
# iTunes extensions to the standard RSS2.0 item
|
@@ -11,17 +12,17 @@ module Feedjira
|
|
11
12
|
elements :"itunes:category", as: :itunes_categories,
|
12
13
|
class: ITunesRSSCategory
|
13
14
|
|
14
|
-
def each_subcategory
|
15
|
+
def each_subcategory(&block)
|
15
16
|
return to_enum(__method__) unless block_given?
|
16
17
|
|
17
18
|
yield text
|
18
19
|
|
19
20
|
itunes_categories.each do |itunes_category|
|
20
|
-
itunes_category.each_subcategory(&
|
21
|
+
itunes_category.each_subcategory(&block)
|
21
22
|
end
|
22
23
|
end
|
23
24
|
|
24
|
-
def each_path(ancestors = [])
|
25
|
+
def each_path(ancestors = [], &block)
|
25
26
|
return to_enum(__method__, ancestors) unless block_given?
|
26
27
|
|
27
28
|
category_hierarchy = ancestors + [text]
|
@@ -30,7 +31,7 @@ module Feedjira
|
|
30
31
|
yield category_hierarchy
|
31
32
|
else
|
32
33
|
itunes_categories.each do |itunes_category|
|
33
|
-
itunes_category.each_path(category_hierarchy, &
|
34
|
+
itunes_category.each_path(category_hierarchy, &block)
|
34
35
|
end
|
35
36
|
end
|
36
37
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Feedjira
|
2
4
|
module Parser
|
3
5
|
# iTunes extensions to the standard RSS2.0 item
|
@@ -5,14 +7,9 @@ module Feedjira
|
|
5
7
|
class ITunesRSSItem
|
6
8
|
include SAXMachine
|
7
9
|
include FeedEntryUtilities
|
10
|
+
include RSSEntryUtilities
|
8
11
|
|
9
|
-
|
10
|
-
element :guid, as: :entry_id
|
11
|
-
element :title
|
12
|
-
element :link, as: :url
|
13
|
-
element :description, as: :summary
|
14
|
-
element :"content:encoded", as: :content
|
15
|
-
element :pubDate, as: :published
|
12
|
+
sax_config.top_level_elements["enclosure"].clear
|
16
13
|
|
17
14
|
# If author is not present use author tag on the item
|
18
15
|
element :"itunes:author", as: :itunes_author
|
@@ -34,7 +31,7 @@ module Feedjira
|
|
34
31
|
element :enclosure, value: :length, as: :enclosure_length
|
35
32
|
element :enclosure, value: :type, as: :enclosure_type
|
36
33
|
element :enclosure, value: :url, as: :enclosure_url
|
37
|
-
elements "psc:chapter", as: :raw_chapters, class: Feedjira::Parser::PodloveChapter
|
34
|
+
elements "psc:chapter", as: :raw_chapters, class: Feedjira::Parser::PodloveChapter
|
38
35
|
|
39
36
|
# Podlove requires clients to re-order by start time in the
|
40
37
|
# event the publisher doesn't provide them in that
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Feedjira
|
2
4
|
module Parser
|
3
5
|
# Parser for dealing with JSON Feeds.
|
@@ -6,7 +8,7 @@ module Feedjira
|
|
6
8
|
include FeedUtilities
|
7
9
|
|
8
10
|
def self.able_to_parse?(json)
|
9
|
-
%r{https
|
11
|
+
%r{https://jsonfeed.org/version/} =~ json
|
10
12
|
end
|
11
13
|
|
12
14
|
def self.parse(json)
|
@@ -14,7 +16,7 @@ module Feedjira
|
|
14
16
|
end
|
15
17
|
|
16
18
|
attr_reader :json, :version, :title, :url, :feed_url, :description,
|
17
|
-
|
19
|
+
:expired, :entries
|
18
20
|
|
19
21
|
def initialize(json)
|
20
22
|
@json = json
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Feedjira
|
2
4
|
module Parser
|
3
5
|
# Parser for dealing with JSON Feed items.
|
@@ -5,7 +7,7 @@ module Feedjira
|
|
5
7
|
include FeedEntryUtilities
|
6
8
|
|
7
9
|
attr_reader :json, :entry_id, :url, :external_url, :title, :content, :summary,
|
8
|
-
|
10
|
+
:published, :updated, :image, :banner_image, :author, :categories
|
9
11
|
|
10
12
|
def initialize(json)
|
11
13
|
@json = json
|
@@ -27,11 +29,13 @@ module Feedjira
|
|
27
29
|
|
28
30
|
def parse_published(date_published)
|
29
31
|
return nil unless date_published
|
32
|
+
|
30
33
|
Time.parse_safely(date_published)
|
31
34
|
end
|
32
35
|
|
33
36
|
def parse_updated(date_modified)
|
34
37
|
return nil unless date_modified
|
38
|
+
|
35
39
|
Time.parse_safely(date_modified)
|
36
40
|
end
|
37
41
|
|
@@ -39,11 +43,13 @@ module Feedjira
|
|
39
43
|
# Prefer content_html unless it isn't included.
|
40
44
|
def parse_content(content_html, content_text)
|
41
45
|
return content_html unless content_html.nil?
|
46
|
+
|
42
47
|
content_text
|
43
48
|
end
|
44
49
|
|
45
50
|
def author_name(author_obj)
|
46
51
|
return nil if author_obj.nil?
|
52
|
+
|
47
53
|
author_obj["name"]
|
48
54
|
end
|
49
55
|
end
|
@@ -1,5 +1,5 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
3
|
module Feedjira
|
4
4
|
module Parser
|
5
5
|
class PodloveChapter
|
@@ -12,6 +12,7 @@ module Feedjira
|
|
12
12
|
|
13
13
|
def start
|
14
14
|
return unless start_ntp
|
15
|
+
|
15
16
|
parts = start_ntp.split(":")
|
16
17
|
parts.reverse.to_enum.with_index.map do |part, index|
|
17
18
|
part.to_f * (60**index)
|
data/lib/feedjira/parser/rss.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Feedjira
|
3
4
|
module Parser
|
4
5
|
# Parser for dealing with RSS feeds.
|
@@ -11,6 +12,7 @@ module Feedjira
|
|
11
12
|
element :language
|
12
13
|
element :lastBuildDate, as: :last_built
|
13
14
|
element :link, as: :url
|
15
|
+
element :"a10:link", as: :url, value: :href
|
14
16
|
element :rss, as: :version, value: :version
|
15
17
|
element :title
|
16
18
|
element :ttl
|
@@ -20,7 +22,7 @@ module Feedjira
|
|
20
22
|
attr_accessor :feed_url
|
21
23
|
|
22
24
|
def self.able_to_parse?(xml)
|
23
|
-
(
|
25
|
+
(/<rss|<rdf/ =~ xml) && !(/feedburner/ =~ xml)
|
24
26
|
end
|
25
27
|
end
|
26
28
|
end
|
@@ -1,37 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Feedjira
|
2
4
|
module Parser
|
3
5
|
# Parser for dealing with RDF feed entries.
|
4
6
|
class RSSEntry
|
5
7
|
include SAXMachine
|
6
8
|
include FeedEntryUtilities
|
7
|
-
|
8
|
-
element :title
|
9
|
-
element :link, as: :url
|
10
|
-
|
11
|
-
element :"dc:creator", as: :author
|
12
|
-
element :author, as: :author
|
13
|
-
element :"content:encoded", as: :content
|
14
|
-
element :description, as: :summary
|
15
|
-
|
16
|
-
element :"media:content", as: :image, value: :url
|
17
|
-
element :enclosure, as: :image, value: :url
|
18
|
-
|
19
|
-
element :pubDate, as: :published
|
20
|
-
element :pubdate, as: :published
|
21
|
-
element :"dc:date", as: :published
|
22
|
-
element :"dc:Date", as: :published
|
23
|
-
element :"dcterms:created", as: :published
|
24
|
-
|
25
|
-
element :"dcterms:modified", as: :updated
|
26
|
-
element :issued, as: :published
|
27
|
-
elements :category, as: :categories
|
28
|
-
|
29
|
-
element :guid, as: :entry_id
|
30
|
-
element :"dc:identifier", as: :dc_identifier
|
31
|
-
|
32
|
-
def id
|
33
|
-
@entry_id ||= @dc_identifier || @url
|
34
|
-
end
|
9
|
+
include RSSEntryUtilities
|
35
10
|
end
|
36
11
|
end
|
37
12
|
end
|
@@ -1,4 +1,5 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Feedjira
|
3
4
|
module Parser
|
4
5
|
# Parser for dealing with RSS feeds.
|
@@ -15,7 +16,7 @@ module Feedjira
|
|
15
16
|
attr_accessor :feed_url
|
16
17
|
|
17
18
|
def self.able_to_parse?(xml) #:nodoc:
|
18
|
-
(
|
19
|
+
(/<rss|<rdf/ =~ xml) && (/feedburner/ =~ xml)
|
19
20
|
end
|
20
21
|
end
|
21
22
|
end
|
@@ -1,38 +1,18 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Feedjira
|
3
4
|
module Parser
|
4
5
|
# Parser for dealing with RDF feed entries.
|
5
6
|
class RSSFeedBurnerEntry
|
6
7
|
include SAXMachine
|
7
8
|
include FeedEntryUtilities
|
9
|
+
include RSSEntryUtilities
|
8
10
|
|
9
|
-
element :
|
10
|
-
|
11
|
-
element :"feedburner:origLink", as: :url
|
12
|
-
element :link, as: :url
|
13
|
-
|
14
|
-
element :"dc:creator", as: :author
|
15
|
-
element :author, as: :author
|
16
|
-
element :"content:encoded", as: :content
|
17
|
-
element :description, as: :summary
|
18
|
-
|
19
|
-
element :"media:content", as: :image, value: :url
|
20
|
-
element :enclosure, as: :image, value: :url
|
21
|
-
|
22
|
-
element :pubDate, as: :published
|
23
|
-
element :pubdate, as: :published
|
24
|
-
element :"dc:date", as: :published
|
25
|
-
element :"dc:Date", as: :published
|
26
|
-
element :"dcterms:created", as: :published
|
27
|
-
|
28
|
-
element :"dcterms:modified", as: :updated
|
29
|
-
element :issued, as: :published
|
30
|
-
elements :category, as: :categories
|
31
|
-
|
32
|
-
element :guid, as: :entry_id
|
11
|
+
element :"feedburner:origLink", as: :orig_link
|
12
|
+
private :orig_link
|
33
13
|
|
34
14
|
def url
|
35
|
-
|
15
|
+
orig_link || super
|
36
16
|
end
|
37
17
|
end
|
38
18
|
end
|
@@ -1,5 +1,5 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
3
|
module Feedjira
|
4
4
|
class Preprocessor
|
5
5
|
def initialize(xml)
|
@@ -20,7 +20,7 @@ module Feedjira
|
|
20
20
|
end
|
21
21
|
|
22
22
|
def content_nodes
|
23
|
-
doc.search 'entry > content[type="xhtml"], entry > summary[type="xhtml"], entry > title[type="xhtml"]'
|
23
|
+
doc.search 'entry > content[type="xhtml"], entry > summary[type="xhtml"], entry > title[type="xhtml"]'
|
24
24
|
end
|
25
25
|
|
26
26
|
def raw_html(node)
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Feedjira
|
4
|
+
module RSSEntryUtilities
|
5
|
+
def self.included(mod)
|
6
|
+
mod.class_exec do
|
7
|
+
element :title
|
8
|
+
|
9
|
+
element :"content:encoded", as: :content
|
10
|
+
element :"a10:content", as: :content
|
11
|
+
element :description, as: :summary
|
12
|
+
|
13
|
+
element :link, as: :url
|
14
|
+
element :"a10:link", as: :url, value: :href
|
15
|
+
|
16
|
+
element :author
|
17
|
+
element :"dc:creator", as: :author
|
18
|
+
element :"a10:name", as: :author
|
19
|
+
|
20
|
+
element :pubDate, as: :published
|
21
|
+
element :pubdate, as: :published
|
22
|
+
element :issued, as: :published
|
23
|
+
element :"dc:date", as: :published
|
24
|
+
element :"dc:Date", as: :published
|
25
|
+
element :"dcterms:created", as: :published
|
26
|
+
|
27
|
+
element :"dcterms:modified", as: :updated
|
28
|
+
element :"a10:updated", as: :updated
|
29
|
+
|
30
|
+
element :guid, as: :entry_id, class: Feedjira::Parser::GloballyUniqueIdentifier
|
31
|
+
element :"dc:identifier", as: :dc_identifier
|
32
|
+
|
33
|
+
element :"media:thumbnail", as: :image, value: :url
|
34
|
+
element :"media:content", as: :image, value: :url
|
35
|
+
element :enclosure, as: :image, value: :url
|
36
|
+
|
37
|
+
elements :category, as: :categories
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def entry_id
|
42
|
+
@entry_id&.guid
|
43
|
+
end
|
44
|
+
|
45
|
+
def url
|
46
|
+
@url || @entry_id&.url
|
47
|
+
end
|
48
|
+
|
49
|
+
def id
|
50
|
+
entry_id || @dc_identifier || @url
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
data/lib/feedjira/version.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "spec_helper"
|
2
4
|
|
3
5
|
describe Feedjira::Configuration do
|
@@ -8,10 +10,10 @@ describe Feedjira::Configuration do
|
|
8
10
|
end
|
9
11
|
|
10
12
|
it "allows parsers to be modified" do
|
11
|
-
|
13
|
+
custom_parser = Class.new
|
12
14
|
|
13
|
-
Feedjira.configure { |config| config.parsers.unshift(
|
14
|
-
expect(Feedjira.parsers.first).to eq(
|
15
|
+
Feedjira.configure { |config| config.parsers.unshift(custom_parser) }
|
16
|
+
expect(Feedjira.parsers.first).to eq(custom_parser)
|
15
17
|
Feedjira.reset_configuration!
|
16
18
|
end
|
17
19
|
end
|