feedjira 2.0.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +2 -0
- data/.rubocop.yml +15 -0
- data/.travis.yml +31 -12
- data/CHANGELOG.md +34 -1
- data/Dangerfile +1 -0
- data/Gemfile +2 -1
- data/LICENSE +1 -1
- data/README.md +210 -7
- data/Rakefile +11 -1
- data/feedjira.gemspec +17 -14
- data/fixtures/vcr_cassettes/fetch_failure.yml +62 -0
- data/fixtures/vcr_cassettes/parse_error.yml +222 -0
- data/fixtures/vcr_cassettes/success.yml +281 -0
- data/lib/feedjira/configuration.rb +76 -0
- data/lib/feedjira/core_ext/date.rb +3 -1
- data/lib/feedjira/core_ext/string.rb +2 -1
- data/lib/feedjira/core_ext/time.rb +24 -17
- data/lib/feedjira/core_ext.rb +3 -3
- data/lib/feedjira/date_time_utilities/date_time_epoch_parser.rb +13 -0
- data/lib/feedjira/date_time_utilities/date_time_language_parser.rb +24 -0
- data/lib/feedjira/date_time_utilities/date_time_pattern_parser.rb +34 -0
- data/lib/feedjira/date_time_utilities.rb +32 -0
- data/lib/feedjira/feed.rb +89 -62
- data/lib/feedjira/feed_entry_utilities.rb +20 -19
- data/lib/feedjira/feed_utilities.rb +37 -22
- data/lib/feedjira/parser/atom.rb +10 -8
- data/lib/feedjira/parser/atom_entry.rb +11 -13
- data/lib/feedjira/parser/atom_feed_burner.rb +27 -10
- data/lib/feedjira/parser/atom_feed_burner_entry.rb +12 -14
- data/lib/feedjira/parser/atom_youtube.rb +21 -0
- data/lib/feedjira/parser/atom_youtube_entry.rb +30 -0
- data/lib/feedjira/parser/google_docs_atom.rb +8 -7
- data/lib/feedjira/parser/google_docs_atom_entry.rb +13 -11
- data/lib/feedjira/parser/itunes_rss.rb +41 -22
- data/lib/feedjira/parser/itunes_rss_category.rb +39 -0
- data/lib/feedjira/parser/itunes_rss_item.rb +32 -20
- data/lib/feedjira/parser/itunes_rss_owner.rb +4 -4
- data/lib/feedjira/parser/podlove_chapter.rb +22 -0
- data/lib/feedjira/parser/rss.rb +11 -8
- data/lib/feedjira/parser/rss_entry.rb +17 -21
- data/lib/feedjira/parser/rss_feed_burner.rb +5 -6
- data/lib/feedjira/parser/rss_feed_burner_entry.rb +24 -28
- data/lib/feedjira/parser/rss_image.rb +15 -0
- data/lib/feedjira/parser.rb +1 -1
- data/lib/feedjira/preprocessor.rb +4 -2
- data/lib/feedjira/version.rb +1 -1
- data/lib/feedjira.rb +15 -0
- data/spec/feedjira/configuration_spec.rb +25 -0
- data/spec/feedjira/date_time_utilities_spec.rb +47 -0
- data/spec/feedjira/feed_entry_utilities_spec.rb +23 -19
- data/spec/feedjira/feed_spec.rb +140 -75
- data/spec/feedjira/feed_utilities_spec.rb +83 -63
- data/spec/feedjira/parser/atom_entry_spec.rb +54 -34
- data/spec/feedjira/parser/atom_feed_burner_entry_spec.rb +27 -20
- data/spec/feedjira/parser/atom_feed_burner_spec.rb +87 -30
- data/spec/feedjira/parser/atom_spec.rb +50 -48
- data/spec/feedjira/parser/atom_youtube_entry_spec.rb +86 -0
- data/spec/feedjira/parser/atom_youtube_spec.rb +43 -0
- data/spec/feedjira/parser/google_docs_atom_entry_spec.rb +5 -4
- data/spec/feedjira/parser/google_docs_atom_spec.rb +6 -6
- data/spec/feedjira/parser/itunes_rss_item_spec.rb +49 -29
- data/spec/feedjira/parser/itunes_rss_owner_spec.rb +10 -9
- data/spec/feedjira/parser/itunes_rss_spec.rb +87 -30
- data/spec/feedjira/parser/podlove_chapter_spec.rb +37 -0
- data/spec/feedjira/parser/rss_entry_spec.rb +50 -33
- data/spec/feedjira/parser/rss_feed_burner_entry_spec.rb +55 -33
- data/spec/feedjira/parser/rss_feed_burner_spec.rb +31 -26
- data/spec/feedjira/parser/rss_spec.rb +56 -24
- data/spec/feedjira/preprocessor_spec.rb +11 -3
- data/spec/sample_feeds/AmazonWebServicesBlog.xml +797 -797
- data/spec/sample_feeds/AtomEscapedHTMLInPreTag.xml +13 -0
- data/spec/sample_feeds/CRE.xml +5849 -0
- data/spec/sample_feeds/FeedBurnerXHTML.xml +400 -400
- data/spec/sample_feeds/GiantRobotsSmashingIntoOtherGiantRobots.xml +682 -0
- data/spec/sample_feeds/ITunesWithSingleQuotedAttributes.xml +67 -0
- data/spec/sample_feeds/InvalidDateFormat.xml +20 -0
- data/spec/sample_feeds/PaulDixExplainsNothing.xml +175 -175
- data/spec/sample_feeds/PaulDixExplainsNothingAlternate.xml +175 -175
- data/spec/sample_feeds/PaulDixExplainsNothingFirstEntryContent.xml +16 -16
- data/spec/sample_feeds/PaulDixExplainsNothingWFW.xml +174 -174
- data/spec/sample_feeds/TenderLovemaking.xml +12 -2
- data/spec/sample_feeds/TrotterCashionHome.xml +611 -611
- data/spec/sample_feeds/TypePadNews.xml +368 -368
- data/spec/sample_feeds/itunes.xml +31 -2
- data/spec/sample_feeds/pet_atom.xml +229 -229
- data/spec/sample_feeds/youtube_atom.xml +395 -0
- data/spec/sample_feeds.rb +31 -21
- data/spec/spec_helper.rb +6 -0
- metadata +132 -25
@@ -1,26 +1,43 @@
|
|
1
|
+
# rubocop:disable Style/DocumentationMethod
|
1
2
|
module Feedjira
|
2
|
-
|
3
3
|
module Parser
|
4
4
|
# Parser for dealing with Feedburner Atom feeds.
|
5
5
|
class AtomFeedBurner
|
6
6
|
include SAXMachine
|
7
7
|
include FeedUtilities
|
8
|
+
|
8
9
|
element :title
|
9
|
-
element :subtitle, :
|
10
|
-
element :link, :
|
11
|
-
|
12
|
-
|
13
|
-
|
10
|
+
element :subtitle, as: :description
|
11
|
+
element :link, as: :url_text_html, value: :href,
|
12
|
+
with: { type: 'text/html' }
|
13
|
+
element :link, as: :url_notype, value: :href, with: { type: nil }
|
14
|
+
element :link, as: :feed_url_link, value: :href, with: { type: 'application/atom+xml' } # rubocop:disable Metrics/LineLength
|
15
|
+
element :"atom10:link", as: :feed_url_atom10_link, value: :href,
|
16
|
+
with: { type: 'application/atom+xml' }
|
17
|
+
elements :"atom10:link", as: :hubs, value: :href, with: { rel: 'hub' }
|
18
|
+
elements :entry, as: :entries, class: AtomFeedBurnerEntry
|
19
|
+
|
20
|
+
attr_writer :url, :feed_url
|
21
|
+
|
22
|
+
def self.able_to_parse?(xml)
|
23
|
+
((/Atom/ =~ xml) && (/feedburner/ =~ xml) && !(/\<rss|\<rdf/ =~ xml)) || false # rubocop:disable Metrics/LineLength
|
24
|
+
end
|
14
25
|
|
15
|
-
|
16
|
-
|
26
|
+
# Feed url is <link> with type="text/html" if present,
|
27
|
+
# <link> with no type attribute otherwise
|
28
|
+
def url
|
29
|
+
@url || @url_text_html || @url_notype
|
30
|
+
end
|
31
|
+
|
32
|
+
# Feed feed_url is <link> with type="application/atom+xml" if present,
|
33
|
+
# <atom10:link> with type="application/atom+xml" otherwise
|
34
|
+
def feed_url
|
35
|
+
@feed_url || @feed_url_link || @feed_url_atom10_link
|
17
36
|
end
|
18
37
|
|
19
38
|
def self.preprocess(xml)
|
20
39
|
Preprocessor.new(xml).to_xml
|
21
40
|
end
|
22
41
|
end
|
23
|
-
|
24
42
|
end
|
25
|
-
|
26
43
|
end
|
@@ -1,5 +1,5 @@
|
|
1
|
+
# rubocop:disable Style/DocumentationMethod
|
1
2
|
module Feedjira
|
2
|
-
|
3
3
|
module Parser
|
4
4
|
# Parser for dealing with Feedburner Atom feed entries.
|
5
5
|
class AtomFeedBurnerEntry
|
@@ -7,29 +7,27 @@ module Feedjira
|
|
7
7
|
include FeedEntryUtilities
|
8
8
|
|
9
9
|
element :title
|
10
|
-
element :name, :
|
11
|
-
element :link, :
|
12
|
-
element :"feedburner:origLink", :
|
10
|
+
element :name, as: :author
|
11
|
+
element :link, as: :url, value: :href, with: { type: 'text/html', rel: 'alternate' } # rubocop:disable Metrics/LineLength
|
12
|
+
element :"feedburner:origLink", as: :url
|
13
13
|
element :summary
|
14
14
|
element :content
|
15
15
|
|
16
|
-
element :"media:content", :
|
17
|
-
element :enclosure, :
|
16
|
+
element :"media:content", as: :image, value: :url
|
17
|
+
element :enclosure, as: :image, value: :href
|
18
18
|
|
19
19
|
element :published
|
20
|
-
element :id, :
|
21
|
-
element :issued, :
|
22
|
-
element :created, :
|
20
|
+
element :id, as: :entry_id
|
21
|
+
element :issued, as: :published
|
22
|
+
element :created, as: :published
|
23
23
|
element :updated
|
24
|
-
element :modified, :
|
25
|
-
elements :category, :
|
26
|
-
elements :link, :
|
24
|
+
element :modified, as: :updated
|
25
|
+
elements :category, as: :categories, value: :term
|
26
|
+
elements :link, as: :links, value: :href
|
27
27
|
|
28
28
|
def url
|
29
29
|
@url ||= links.first
|
30
30
|
end
|
31
|
-
|
32
31
|
end
|
33
32
|
end
|
34
|
-
|
35
33
|
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# rubocop:disable Style/DocumentationMethod
|
2
|
+
module Feedjira
|
3
|
+
module Parser
|
4
|
+
# Parser for dealing with RSS feeds.
|
5
|
+
class AtomYoutube
|
6
|
+
include SAXMachine
|
7
|
+
include FeedUtilities
|
8
|
+
element :title
|
9
|
+
element :link, as: :url, value: :href, with: { rel: 'alternate' }
|
10
|
+
element :link, as: :feed_url, value: :href, with: { rel: 'self' }
|
11
|
+
element :name, as: :author
|
12
|
+
element :"yt:channelId", as: :youtube_channel_id
|
13
|
+
|
14
|
+
elements :entry, as: :entries, class: AtomYoutubeEntry
|
15
|
+
|
16
|
+
def self.able_to_parse?(xml) #:nodoc:
|
17
|
+
%r{xmlns:yt="http://www.youtube.com/xml/schemas/2015"} =~ xml
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# rubocop:disable Style/Documentation
|
2
|
+
module Feedjira
|
3
|
+
module Parser
|
4
|
+
class AtomYoutubeEntry
|
5
|
+
include SAXMachine
|
6
|
+
include FeedEntryUtilities
|
7
|
+
|
8
|
+
element :title
|
9
|
+
element :link, as: :url, value: :href, with: { rel: 'alternate' }
|
10
|
+
element :name, as: :author
|
11
|
+
element :"media:description", as: :content
|
12
|
+
element :summary
|
13
|
+
element :published
|
14
|
+
element :id, as: :entry_id
|
15
|
+
element :updated
|
16
|
+
element :"yt:videoId", as: :youtube_video_id
|
17
|
+
element :"media:title", as: :media_title
|
18
|
+
element :"media:content", as: :media_url, value: :url
|
19
|
+
element :"media:content", as: :media_type, value: :type
|
20
|
+
element :"media:content", as: :media_width, value: :width
|
21
|
+
element :"media:content", as: :media_height, value: :height
|
22
|
+
element :"media:thumbnail", as: :media_thumbnail_url, value: :url
|
23
|
+
element :"media:thumbnail", as: :media_thumbnail_width, value: :width
|
24
|
+
element :"media:thumbnail", as: :media_thumbnail_height, value: :height
|
25
|
+
element :"media:starRating", as: :media_star_count, value: :count
|
26
|
+
element :"media:starRating", as: :media_star_average, value: :average
|
27
|
+
element :"media:statistics", as: :media_views, value: :views
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -1,23 +1,24 @@
|
|
1
1
|
require File.expand_path('./atom', File.dirname(__FILE__))
|
2
|
-
|
2
|
+
# rubocop:disable Style/Documentation
|
3
|
+
# rubocop:disable Style/DocumentationMethod
|
3
4
|
module Feedjira
|
4
5
|
module Parser
|
5
6
|
class GoogleDocsAtom
|
6
7
|
include SAXMachine
|
7
8
|
include FeedUtilities
|
8
9
|
element :title
|
9
|
-
element :subtitle, :
|
10
|
-
element :link, :
|
11
|
-
element :link, :
|
12
|
-
elements :link, :
|
13
|
-
elements :entry, :
|
10
|
+
element :subtitle, as: :description
|
11
|
+
element :link, as: :url, value: :href, with: { type: 'text/html' }
|
12
|
+
element :link, as: :feed_url, value: :href, with: { type: 'application/atom+xml' } # rubocop:disable Metrics/LineLength
|
13
|
+
elements :link, as: :links, value: :href
|
14
|
+
elements :entry, as: :entries, class: GoogleDocsAtomEntry
|
14
15
|
|
15
16
|
def url
|
16
17
|
@url ||= links.first
|
17
18
|
end
|
18
19
|
|
19
20
|
def self.able_to_parse?(xml) #:nodoc:
|
20
|
-
%r{<id>https?://docs
|
21
|
+
%r{<id>https?://docs\.google\.com/.*\</id\>} =~ xml
|
21
22
|
end
|
22
23
|
|
23
24
|
def feed_url
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# rubocop:disable Style/Documentation
|
2
|
+
# rubocop:disable Style/DocumentationMethod
|
1
3
|
module Feedjira
|
2
4
|
module Parser
|
3
5
|
class GoogleDocsAtomEntry
|
@@ -5,21 +7,21 @@ module Feedjira
|
|
5
7
|
include FeedEntryUtilities
|
6
8
|
|
7
9
|
element :title
|
8
|
-
element :link, :
|
9
|
-
element :name, :
|
10
|
+
element :link, as: :url, value: :href, with: { type: 'text/html', rel: 'alternate' } # rubocop:disable Metrics/LineLength
|
11
|
+
element :name, as: :author
|
10
12
|
element :content
|
11
13
|
element :summary
|
12
14
|
element :published
|
13
|
-
element :id, :
|
14
|
-
element :created, :
|
15
|
-
element :issued, :
|
15
|
+
element :id, as: :entry_id
|
16
|
+
element :created, as: :published
|
17
|
+
element :issued, as: :published
|
16
18
|
element :updated
|
17
|
-
element :modified, :
|
18
|
-
elements :category, :
|
19
|
-
elements :link, :
|
20
|
-
element :"docs:md5Checksum", :
|
21
|
-
element :"docs:filename", :
|
22
|
-
element :"docs:suggestedFilename", :
|
19
|
+
element :modified, as: :updated
|
20
|
+
elements :category, as: :categories, value: :term
|
21
|
+
elements :link, as: :links, value: :href
|
22
|
+
element :"docs:md5Checksum", as: :checksum
|
23
|
+
element :"docs:filename", as: :original_filename
|
24
|
+
element :"docs:suggestedFilename", as: :suggested_filename
|
23
25
|
|
24
26
|
def url
|
25
27
|
@url ||= links.first
|
@@ -1,8 +1,11 @@
|
|
1
|
+
# rubocop:disable Style/DocumentationMethod
|
1
2
|
module Feedjira
|
2
|
-
|
3
3
|
module Parser
|
4
4
|
# iTunes is RSS 2.0 + some apple extensions
|
5
|
-
#
|
5
|
+
# Sources:
|
6
|
+
# * https://cyber.harvard.edu/rss/rss.html
|
7
|
+
# * http://lists.apple.com/archives/syndication-dev/2005/Nov/msg00002.html
|
8
|
+
# * https://help.apple.com/itc/podcasts_connect/
|
6
9
|
class ITunesRSS
|
7
10
|
include SAXMachine
|
8
11
|
include FeedUtilities
|
@@ -12,39 +15,55 @@ module Feedjira
|
|
12
15
|
# RSS 2.0 elements that need including
|
13
16
|
element :copyright
|
14
17
|
element :description
|
18
|
+
element :image, class: RSSImage
|
15
19
|
element :language
|
20
|
+
element :lastBuildDate, as: :last_built
|
21
|
+
element :link, as: :url
|
16
22
|
element :managingEditor
|
23
|
+
element :rss, as: :version, value: :version
|
17
24
|
element :title
|
18
|
-
element :
|
25
|
+
element :ttl
|
19
26
|
|
20
27
|
# If author is not present use managingEditor on the channel
|
21
|
-
element :"itunes:author", :
|
22
|
-
element :"itunes:block", :
|
23
|
-
element :"itunes:image", :
|
24
|
-
element :"itunes:explicit", :
|
25
|
-
element :"itunes:
|
28
|
+
element :"itunes:author", as: :itunes_author
|
29
|
+
element :"itunes:block", as: :itunes_block
|
30
|
+
element :"itunes:image", value: :href, as: :itunes_image
|
31
|
+
element :"itunes:explicit", as: :itunes_explicit
|
32
|
+
element :"itunes:complete", as: :itunes_complete
|
33
|
+
element :"itunes:keywords", as: :itunes_keywords
|
34
|
+
element :"itunes:type", as: :itunes_type
|
35
|
+
|
26
36
|
# New URL for the podcast feed
|
27
|
-
element :"itunes:new_feed_url", :
|
28
|
-
element :"itunes:subtitle", :
|
37
|
+
element :"itunes:new_feed_url", as: :itunes_new_feed_url
|
38
|
+
element :"itunes:subtitle", as: :itunes_subtitle
|
39
|
+
|
29
40
|
# If summary is not present, use the description tag
|
30
|
-
element :"itunes:summary", :
|
41
|
+
element :"itunes:summary", as: :itunes_summary
|
31
42
|
|
32
|
-
# iTunes RSS feeds can have multiple main categories
|
33
|
-
#
|
34
|
-
|
35
|
-
|
36
|
-
|
43
|
+
# iTunes RSS feeds can have multiple main categories and multiple
|
44
|
+
# sub-categories per category.
|
45
|
+
elements :"itunes:category", as: :_itunes_categories,
|
46
|
+
class: ITunesRSSCategory
|
47
|
+
private :_itunes_categories
|
37
48
|
|
38
|
-
|
49
|
+
def itunes_categories
|
50
|
+
_itunes_categories.flat_map do |itunes_category|
|
51
|
+
itunes_category.enum_for(:each_subcategory).to_a
|
52
|
+
end
|
53
|
+
end
|
39
54
|
|
40
|
-
|
55
|
+
def itunes_category_paths
|
56
|
+
_itunes_categories.flat_map do |itunes_category|
|
57
|
+
itunes_category.enum_for(:each_path).to_a
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
elements :"itunes:owner", as: :itunes_owners, class: ITunesRSSOwner
|
62
|
+
elements :item, as: :entries, class: ITunesRSSItem
|
41
63
|
|
42
64
|
def self.able_to_parse?(xml)
|
43
|
-
|
65
|
+
%r{xmlns:itunes\s?=\s?[\"\']http://www\.itunes\.com/dtds/podcast-1\.0\.dtd[\"\']}i =~ xml # rubocop:disable Metrics/LineLength
|
44
66
|
end
|
45
|
-
|
46
67
|
end
|
47
|
-
|
48
68
|
end
|
49
|
-
|
50
69
|
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# rubocop:disable Style/DocumentationMethod
|
2
|
+
module Feedjira
|
3
|
+
module Parser
|
4
|
+
# iTunes extensions to the standard RSS2.0 item
|
5
|
+
# Source: https://help.apple.com/itc/podcasts_connect/#/itcb54353390
|
6
|
+
class ITunesRSSCategory
|
7
|
+
include SAXMachine
|
8
|
+
|
9
|
+
attribute :text
|
10
|
+
|
11
|
+
elements :"itunes:category", as: :itunes_categories,
|
12
|
+
class: ITunesRSSCategory
|
13
|
+
|
14
|
+
def each_subcategory
|
15
|
+
return to_enum(__method__) unless block_given?
|
16
|
+
|
17
|
+
yield text
|
18
|
+
|
19
|
+
itunes_categories.each do |itunes_category|
|
20
|
+
itunes_category.each_subcategory(&proc)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def each_path(ancestors = [])
|
25
|
+
return to_enum(__method__, ancestors) unless block_given?
|
26
|
+
|
27
|
+
category_hierarchy = ancestors + [text]
|
28
|
+
|
29
|
+
if itunes_categories.empty?
|
30
|
+
yield category_hierarchy
|
31
|
+
else
|
32
|
+
itunes_categories.each do |itunes_category|
|
33
|
+
itunes_category.each_path(category_hierarchy, &proc)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Feedjira
|
2
|
-
|
3
2
|
module Parser
|
4
3
|
# iTunes extensions to the standard RSS2.0 item
|
5
4
|
# Source: http://www.apple.com/itunes/whatson/podcasts/specs.html
|
@@ -8,29 +7,42 @@ module Feedjira
|
|
8
7
|
include FeedEntryUtilities
|
9
8
|
|
10
9
|
element :author
|
11
|
-
element :guid, :
|
10
|
+
element :guid, as: :entry_id
|
12
11
|
element :title
|
13
|
-
element :link, :
|
14
|
-
element :description, :
|
15
|
-
element :"content:encoded", :
|
16
|
-
element :pubDate, :
|
12
|
+
element :link, as: :url
|
13
|
+
element :description, as: :summary
|
14
|
+
element :"content:encoded", as: :content
|
15
|
+
element :pubDate, as: :published
|
17
16
|
|
18
17
|
# If author is not present use author tag on the item
|
19
|
-
element :"itunes:author", :
|
20
|
-
element :"itunes:block", :
|
21
|
-
element :"itunes:duration", :
|
22
|
-
element :"itunes:explicit", :
|
23
|
-
element :"itunes:keywords", :
|
24
|
-
element :"itunes:subtitle", :
|
25
|
-
element :"itunes:image", :
|
26
|
-
element :"itunes:isClosedCaptioned", :
|
27
|
-
element :"itunes:order", :
|
18
|
+
element :"itunes:author", as: :itunes_author
|
19
|
+
element :"itunes:block", as: :itunes_block
|
20
|
+
element :"itunes:duration", as: :itunes_duration
|
21
|
+
element :"itunes:explicit", as: :itunes_explicit
|
22
|
+
element :"itunes:keywords", as: :itunes_keywords
|
23
|
+
element :"itunes:subtitle", as: :itunes_subtitle
|
24
|
+
element :"itunes:image", value: :href, as: :itunes_image
|
25
|
+
element :"itunes:isClosedCaptioned", as: :itunes_closed_captioned
|
26
|
+
element :"itunes:order", as: :itunes_order
|
27
|
+
element :"itunes:season", as: :itunes_season
|
28
|
+
element :"itunes:episode", as: :itunes_episode
|
29
|
+
element :"itunes:title", as: :itunes_title
|
30
|
+
element :"itunes:episodeType", as: :itunes_episode_type
|
31
|
+
|
28
32
|
# If summary is not present, use the description tag
|
29
|
-
element :"itunes:summary", :
|
30
|
-
element :enclosure, :
|
31
|
-
element :enclosure, :
|
32
|
-
element :enclosure, :
|
33
|
+
element :"itunes:summary", as: :itunes_summary
|
34
|
+
element :enclosure, value: :length, as: :enclosure_length
|
35
|
+
element :enclosure, value: :type, as: :enclosure_type
|
36
|
+
element :enclosure, value: :url, as: :enclosure_url
|
37
|
+
elements 'psc:chapter', as: :raw_chapters, class: Feedjira::Parser::PodloveChapter # rubocop:disable Metrics/LineLength
|
38
|
+
|
39
|
+
# Podlove requires clients to re-order by start time in the
|
40
|
+
# event the publisher doesn't provide them in that
|
41
|
+
# order. SAXMachine doesn't have any sort capability afaik, so
|
42
|
+
# we have to sort chapters manually.
|
43
|
+
def chapters
|
44
|
+
raw_chapters.sort_by(&:start)
|
45
|
+
end
|
33
46
|
end
|
34
47
|
end
|
35
|
-
|
36
48
|
end
|
@@ -1,12 +1,12 @@
|
|
1
|
+
# rubocop:disable Style/Documentation
|
1
2
|
module Feedjira
|
2
|
-
|
3
3
|
module Parser
|
4
4
|
class ITunesRSSOwner
|
5
5
|
include SAXMachine
|
6
6
|
include FeedUtilities
|
7
|
-
|
8
|
-
element :"itunes:
|
7
|
+
|
8
|
+
element :"itunes:name", as: :name
|
9
|
+
element :"itunes:email", as: :email
|
9
10
|
end
|
10
11
|
end
|
11
|
-
|
12
12
|
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# rubocop:disable Style/Documentation
|
2
|
+
# rubocop:disable Style/DocumentationMethod
|
3
|
+
module Feedjira
|
4
|
+
module Parser
|
5
|
+
class PodloveChapter
|
6
|
+
include SAXMachine
|
7
|
+
include FeedEntryUtilities
|
8
|
+
attribute :start, as: :start_ntp
|
9
|
+
attribute :title
|
10
|
+
attribute :href, as: :url
|
11
|
+
attribute :image
|
12
|
+
|
13
|
+
def start
|
14
|
+
return unless start_ntp
|
15
|
+
parts = start_ntp.split(':')
|
16
|
+
parts.reverse.to_enum.with_index.map do |part, index|
|
17
|
+
part.to_f * (60**index)
|
18
|
+
end.reduce(:+)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
data/lib/feedjira/parser/rss.rb
CHANGED
@@ -1,24 +1,27 @@
|
|
1
|
+
# rubocop:disable Style/DocumentationMethod
|
1
2
|
module Feedjira
|
2
|
-
|
3
3
|
module Parser
|
4
4
|
# Parser for dealing with RSS feeds.
|
5
|
+
# Source: https://cyber.harvard.edu/rss/rss.html
|
5
6
|
class RSS
|
6
7
|
include SAXMachine
|
7
8
|
include FeedUtilities
|
9
|
+
element :description
|
10
|
+
element :image, class: RSSImage
|
11
|
+
element :language
|
12
|
+
element :lastBuildDate, as: :last_built
|
13
|
+
element :link, as: :url
|
8
14
|
element :rss, as: :version, value: :version
|
9
15
|
element :title
|
10
|
-
element :
|
11
|
-
|
12
|
-
elements :item, :
|
13
|
-
elements :"atom:link", :as => :hubs, :value => :href, :with => {:rel => "hub"}
|
16
|
+
element :ttl
|
17
|
+
elements :"atom:link", as: :hubs, value: :href, with: { rel: 'hub' }
|
18
|
+
elements :item, as: :entries, class: RSSEntry
|
14
19
|
|
15
20
|
attr_accessor :feed_url
|
16
21
|
|
17
|
-
def self.able_to_parse?(xml)
|
22
|
+
def self.able_to_parse?(xml)
|
18
23
|
(/\<rss|\<rdf/ =~ xml) && !(/feedburner/ =~ xml)
|
19
24
|
end
|
20
25
|
end
|
21
|
-
|
22
26
|
end
|
23
|
-
|
24
27
|
end
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Feedjira
|
2
|
-
|
3
2
|
module Parser
|
4
3
|
# Parser for dealing with RDF feed entries.
|
5
4
|
class RSSEntry
|
@@ -7,31 +6,28 @@ module Feedjira
|
|
7
6
|
include FeedEntryUtilities
|
8
7
|
|
9
8
|
element :title
|
10
|
-
element :link, :
|
11
|
-
|
12
|
-
element :"dc:creator", :as => :author
|
13
|
-
element :author, :as => :author
|
14
|
-
element :"content:encoded", :as => :content
|
15
|
-
element :description, :as => :summary
|
9
|
+
element :link, as: :url
|
16
10
|
|
17
|
-
element :"
|
18
|
-
element :
|
11
|
+
element :"dc:creator", as: :author
|
12
|
+
element :author, as: :author
|
13
|
+
element :"content:encoded", as: :content
|
14
|
+
element :description, as: :summary
|
19
15
|
|
20
|
-
element :
|
21
|
-
element :
|
22
|
-
element :"dc:date", :as => :published
|
23
|
-
element :"dc:Date", :as => :published
|
24
|
-
element :"dcterms:created", :as => :published
|
16
|
+
element :"media:content", as: :image, value: :url
|
17
|
+
element :enclosure, as: :image, value: :url
|
25
18
|
|
19
|
+
element :pubDate, as: :published
|
20
|
+
element :pubdate, as: :published
|
21
|
+
element :"dc:date", as: :published
|
22
|
+
element :"dc:Date", as: :published
|
23
|
+
element :"dcterms:created", as: :published
|
26
24
|
|
27
|
-
element :"dcterms:modified", :
|
28
|
-
element :issued, :
|
29
|
-
elements :category, :
|
25
|
+
element :"dcterms:modified", as: :updated
|
26
|
+
element :issued, as: :published
|
27
|
+
elements :category, as: :categories
|
30
28
|
|
31
|
-
element :guid, :
|
32
|
-
element :"dc:identifier", :
|
29
|
+
element :guid, as: :entry_id
|
30
|
+
element :"dc:identifier", as: :entry_id
|
33
31
|
end
|
34
|
-
|
35
32
|
end
|
36
|
-
|
37
33
|
end
|
@@ -1,5 +1,5 @@
|
|
1
|
+
# rubocop:disable Style/DocumentationMethod
|
1
2
|
module Feedjira
|
2
|
-
|
3
3
|
module Parser
|
4
4
|
# Parser for dealing with RSS feeds.
|
5
5
|
class RSSFeedBurner
|
@@ -7,9 +7,10 @@ module Feedjira
|
|
7
7
|
include FeedUtilities
|
8
8
|
element :title
|
9
9
|
element :description
|
10
|
-
element :link, :
|
11
|
-
|
12
|
-
elements :
|
10
|
+
element :link, as: :url
|
11
|
+
element :lastBuildDate, as: :last_built
|
12
|
+
elements :"atom10:link", as: :hubs, value: :href, with: { rel: 'hub' }
|
13
|
+
elements :item, as: :entries, class: RSSFeedBurnerEntry
|
13
14
|
|
14
15
|
attr_accessor :feed_url
|
15
16
|
|
@@ -17,7 +18,5 @@ module Feedjira
|
|
17
18
|
(/\<rss|\<rdf/ =~ xml) && (/feedburner/ =~ xml)
|
18
19
|
end
|
19
20
|
end
|
20
|
-
|
21
21
|
end
|
22
|
-
|
23
22
|
end
|
@@ -1,43 +1,39 @@
|
|
1
|
+
# rubocop:disable Style/DocumentationMethod
|
1
2
|
module Feedjira
|
2
|
-
|
3
3
|
module Parser
|
4
4
|
# Parser for dealing with RDF feed entries.
|
5
5
|
class RSSFeedBurnerEntry
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
element :title
|
10
|
-
|
11
|
-
element :"feedburner:origLink", :as => :url
|
12
|
-
element :link, :as => :url
|
6
|
+
include SAXMachine
|
7
|
+
include FeedEntryUtilities
|
13
8
|
|
14
|
-
|
15
|
-
element :author, :as => :author
|
16
|
-
element :"content:encoded", :as => :content
|
17
|
-
element :description, :as => :summary
|
9
|
+
element :title
|
18
10
|
|
19
|
-
|
20
|
-
|
11
|
+
element :"feedburner:origLink", as: :url
|
12
|
+
element :link, as: :url
|
21
13
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
element :"dcterms:created", :as => :published
|
14
|
+
element :"dc:creator", as: :author
|
15
|
+
element :author, as: :author
|
16
|
+
element :"content:encoded", as: :content
|
17
|
+
element :description, as: :summary
|
27
18
|
|
19
|
+
element :"media:content", as: :image, value: :url
|
20
|
+
element :enclosure, as: :image, value: :url
|
28
21
|
|
29
|
-
|
30
|
-
|
31
|
-
|
22
|
+
element :pubDate, as: :published
|
23
|
+
element :pubdate, as: :published
|
24
|
+
element :"dc:date", as: :published
|
25
|
+
element :"dc:Date", as: :published
|
26
|
+
element :"dcterms:created", as: :published
|
32
27
|
|
33
|
-
|
28
|
+
element :"dcterms:modified", as: :updated
|
29
|
+
element :issued, as: :published
|
30
|
+
elements :category, as: :categories
|
34
31
|
|
35
|
-
|
36
|
-
@url || @link
|
37
|
-
end
|
32
|
+
element :guid, as: :entry_id
|
38
33
|
|
34
|
+
def url
|
35
|
+
@url || @link
|
36
|
+
end
|
39
37
|
end
|
40
|
-
|
41
38
|
end
|
42
|
-
|
43
39
|
end
|