feedjira 2.2.0 → 3.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE/feed-parsing.md +15 -0
- data/.rubocop.yml +32 -8
- data/.rubocop_todo.yml +11 -0
- data/.travis.yml +3 -7
- data/CHANGELOG.md +18 -9
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +8 -5
- data/README.md +46 -99
- data/Rakefile +8 -6
- data/feedjira.gemspec +31 -20
- data/lib/feedjira.rb +75 -41
- data/lib/feedjira/atom_entry_utilities.rb +51 -0
- data/lib/feedjira/configuration.rb +8 -10
- data/lib/feedjira/core_ext.rb +5 -3
- data/lib/feedjira/core_ext/date.rb +2 -1
- data/lib/feedjira/core_ext/string.rb +2 -1
- data/lib/feedjira/core_ext/time.rb +12 -12
- data/lib/feedjira/date_time_utilities.rb +8 -10
- data/lib/feedjira/date_time_utilities/date_time_epoch_parser.rb +3 -2
- data/lib/feedjira/date_time_utilities/date_time_language_parser.rb +4 -4
- data/lib/feedjira/date_time_utilities/date_time_pattern_parser.rb +11 -15
- data/lib/feedjira/feed.rb +12 -82
- data/lib/feedjira/feed_entry_utilities.rb +14 -7
- data/lib/feedjira/feed_utilities.rb +5 -4
- data/lib/feedjira/parser.rb +6 -1
- data/lib/feedjira/parser/atom.rb +6 -5
- data/lib/feedjira/parser/atom_entry.rb +4 -21
- data/lib/feedjira/parser/atom_feed_burner.rb +7 -6
- data/lib/feedjira/parser/atom_feed_burner_entry.rb +7 -18
- data/lib/feedjira/parser/atom_google_alerts.rb +26 -0
- data/lib/feedjira/parser/atom_google_alerts_entry.rb +21 -0
- data/lib/feedjira/parser/atom_youtube.rb +4 -3
- data/lib/feedjira/parser/atom_youtube_entry.rb +9 -8
- data/lib/feedjira/parser/globally_unique_identifier.rb +21 -0
- data/lib/feedjira/parser/google_docs_atom.rb +6 -6
- data/lib/feedjira/parser/google_docs_atom_entry.rb +3 -19
- data/lib/feedjira/parser/itunes_rss.rb +4 -3
- data/lib/feedjira/parser/itunes_rss_category.rb +6 -5
- data/lib/feedjira/parser/itunes_rss_item.rb +5 -8
- data/lib/feedjira/parser/itunes_rss_owner.rb +2 -1
- data/lib/feedjira/parser/json_feed.rb +41 -0
- data/lib/feedjira/parser/json_feed_item.rb +57 -0
- data/lib/feedjira/parser/podlove_chapter.rb +4 -3
- data/lib/feedjira/parser/rss.rb +5 -3
- data/lib/feedjira/parser/rss_entry.rb +3 -24
- data/lib/feedjira/parser/rss_feed_burner.rb +4 -3
- data/lib/feedjira/parser/rss_feed_burner_entry.rb +6 -26
- data/lib/feedjira/parser/rss_image.rb +2 -0
- data/lib/feedjira/preprocessor.rb +4 -4
- data/lib/feedjira/rss_entry_utilities.rb +53 -0
- data/lib/feedjira/version.rb +3 -1
- data/spec/feedjira/configuration_spec.rb +11 -16
- data/spec/feedjira/date_time_utilities_spec.rb +22 -20
- data/spec/feedjira/feed_entry_utilities_spec.rb +20 -18
- data/spec/feedjira/feed_spec.rb +17 -229
- data/spec/feedjira/feed_utilities_spec.rb +75 -73
- data/spec/feedjira/parser/atom_entry_spec.rb +41 -38
- data/spec/feedjira/parser/atom_feed_burner_entry_spec.rb +22 -20
- data/spec/feedjira/parser/atom_feed_burner_spec.rb +122 -118
- data/spec/feedjira/parser/atom_google_alerts_entry_spec.rb +34 -0
- data/spec/feedjira/parser/atom_google_alerts_spec.rb +62 -0
- data/spec/feedjira/parser/atom_spec.rb +83 -77
- data/spec/feedjira/parser/atom_youtube_entry_spec.rb +41 -39
- data/spec/feedjira/parser/atom_youtube_spec.rb +21 -19
- data/spec/feedjira/parser/google_docs_atom_entry_spec.rb +10 -8
- data/spec/feedjira/parser/google_docs_atom_spec.rb +25 -21
- data/spec/feedjira/parser/itunes_rss_item_spec.rb +39 -37
- data/spec/feedjira/parser/itunes_rss_owner_spec.rb +7 -5
- data/spec/feedjira/parser/itunes_rss_spec.rb +120 -116
- data/spec/feedjira/parser/json_feed_item_spec.rb +81 -0
- data/spec/feedjira/parser/json_feed_spec.rb +55 -0
- data/spec/feedjira/parser/podlove_chapter_spec.rb +14 -12
- data/spec/feedjira/parser/rss_entry_spec.rb +56 -34
- data/spec/feedjira/parser/rss_feed_burner_entry_spec.rb +36 -34
- data/spec/feedjira/parser/rss_feed_burner_spec.rb +49 -45
- data/spec/feedjira/parser/rss_spec.rb +38 -36
- data/spec/feedjira/preprocessor_spec.rb +9 -7
- data/spec/feedjira_spec.rb +166 -0
- data/spec/sample_feeds.rb +32 -29
- data/spec/sample_feeds/HuffPostCanada.xml +279 -0
- data/spec/sample_feeds/Permalinks.xml +22 -0
- data/spec/sample_feeds/a10.xml +72 -0
- data/spec/sample_feeds/google_alerts_atom.xml +1 -0
- data/spec/sample_feeds/json_feed.json +156 -0
- data/spec/spec_helper.rb +7 -5
- metadata +59 -70
- data/Dangerfile +0 -1
- data/fixtures/vcr_cassettes/fetch_failure.yml +0 -62
- data/fixtures/vcr_cassettes/parse_error.yml +0 -222
- data/fixtures/vcr_cassettes/success.yml +0 -281
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
#
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
2
3
|
module Feedjira
|
|
3
4
|
module Parser
|
|
4
5
|
# Parser for dealing with RSS feeds.
|
|
@@ -6,8 +7,8 @@ module Feedjira
|
|
|
6
7
|
include SAXMachine
|
|
7
8
|
include FeedUtilities
|
|
8
9
|
element :title
|
|
9
|
-
element :link, as: :url, value: :href, with: { rel:
|
|
10
|
-
element :link, as: :feed_url, value: :href, with: { rel:
|
|
10
|
+
element :link, as: :url, value: :href, with: { rel: "alternate" }
|
|
11
|
+
element :link, as: :feed_url, value: :href, with: { rel: "self" }
|
|
11
12
|
element :name, as: :author
|
|
12
13
|
element :"yt:channelId", as: :youtube_channel_id
|
|
13
14
|
|
|
@@ -1,19 +1,20 @@
|
|
|
1
|
-
#
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
2
3
|
module Feedjira
|
|
3
4
|
module Parser
|
|
4
5
|
class AtomYoutubeEntry
|
|
5
6
|
include SAXMachine
|
|
6
7
|
include FeedEntryUtilities
|
|
8
|
+
include AtomEntryUtilities
|
|
9
|
+
|
|
10
|
+
sax_config.top_level_elements["link"].clear
|
|
11
|
+
sax_config.collection_elements["link"].clear
|
|
12
|
+
|
|
13
|
+
element :link, as: :url, value: :href, with: { rel: "alternate" }
|
|
7
14
|
|
|
8
|
-
element :title
|
|
9
|
-
element :link, as: :url, value: :href, with: { rel: 'alternate' }
|
|
10
|
-
element :name, as: :author
|
|
11
15
|
element :"media:description", as: :content
|
|
12
|
-
element :summary
|
|
13
|
-
element :published
|
|
14
|
-
element :id, as: :entry_id
|
|
15
|
-
element :updated
|
|
16
16
|
element :"yt:videoId", as: :youtube_video_id
|
|
17
|
+
element :"yt:channelId", as: :youtube_channel_id
|
|
17
18
|
element :"media:title", as: :media_title
|
|
18
19
|
element :"media:content", as: :media_url, value: :url
|
|
19
20
|
element :"media:content", as: :media_type, value: :type
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Feedjira
|
|
4
|
+
module Parser
|
|
5
|
+
class GloballyUniqueIdentifier
|
|
6
|
+
include SAXMachine
|
|
7
|
+
|
|
8
|
+
attribute :isPermaLink, as: :is_perma_link
|
|
9
|
+
|
|
10
|
+
value :guid
|
|
11
|
+
|
|
12
|
+
def perma_link?
|
|
13
|
+
is_perma_link != "false"
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def url
|
|
17
|
+
perma_link? ? guid : nil
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require File.expand_path("./atom", File.dirname(__FILE__))
|
|
4
4
|
module Feedjira
|
|
5
5
|
module Parser
|
|
6
6
|
class GoogleDocsAtom
|
|
@@ -8,8 +8,8 @@ module Feedjira
|
|
|
8
8
|
include FeedUtilities
|
|
9
9
|
element :title
|
|
10
10
|
element :subtitle, as: :description
|
|
11
|
-
element :link, as: :url, value: :href, with: { type:
|
|
12
|
-
element :link, as: :feed_url, value: :href, with: { type:
|
|
11
|
+
element :link, as: :url, value: :href, with: { type: "text/html" }
|
|
12
|
+
element :link, as: :feed_url, value: :href, with: { type: "application/atom+xml" }
|
|
13
13
|
elements :link, as: :links, value: :href
|
|
14
14
|
elements :entry, as: :entries, class: GoogleDocsAtomEntry
|
|
15
15
|
|
|
@@ -18,7 +18,7 @@ module Feedjira
|
|
|
18
18
|
end
|
|
19
19
|
|
|
20
20
|
def self.able_to_parse?(xml) #:nodoc:
|
|
21
|
-
%r{<id>https?://docs\.google\.com
|
|
21
|
+
%r{<id>https?://docs\.google\.com/.*</id>} =~ xml
|
|
22
22
|
end
|
|
23
23
|
|
|
24
24
|
def feed_url
|
|
@@ -1,31 +1,15 @@
|
|
|
1
|
-
#
|
|
2
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
3
|
module Feedjira
|
|
4
4
|
module Parser
|
|
5
5
|
class GoogleDocsAtomEntry
|
|
6
6
|
include SAXMachine
|
|
7
7
|
include FeedEntryUtilities
|
|
8
|
+
include AtomEntryUtilities
|
|
8
9
|
|
|
9
|
-
element :title
|
|
10
|
-
element :link, as: :url, value: :href, with: { type: 'text/html', rel: 'alternate' } # rubocop:disable Metrics/LineLength
|
|
11
|
-
element :name, as: :author
|
|
12
|
-
element :content
|
|
13
|
-
element :summary
|
|
14
|
-
element :published
|
|
15
|
-
element :id, as: :entry_id
|
|
16
|
-
element :created, as: :published
|
|
17
|
-
element :issued, as: :published
|
|
18
|
-
element :updated
|
|
19
|
-
element :modified, as: :updated
|
|
20
|
-
elements :category, as: :categories, value: :term
|
|
21
|
-
elements :link, as: :links, value: :href
|
|
22
10
|
element :"docs:md5Checksum", as: :checksum
|
|
23
11
|
element :"docs:filename", as: :original_filename
|
|
24
12
|
element :"docs:suggestedFilename", as: :suggested_filename
|
|
25
|
-
|
|
26
|
-
def url
|
|
27
|
-
@url ||= links.first
|
|
28
|
-
end
|
|
29
13
|
end
|
|
30
14
|
end
|
|
31
15
|
end
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
#
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
2
3
|
module Feedjira
|
|
3
4
|
module Parser
|
|
4
5
|
# iTunes is RSS 2.0 + some apple extensions
|
|
@@ -19,7 +20,7 @@ module Feedjira
|
|
|
19
20
|
element :language
|
|
20
21
|
element :lastBuildDate, as: :last_built
|
|
21
22
|
element :link, as: :url
|
|
22
|
-
element :managingEditor
|
|
23
|
+
element :managingEditor, as: :managing_editor
|
|
23
24
|
element :rss, as: :version, value: :version
|
|
24
25
|
element :title
|
|
25
26
|
element :ttl
|
|
@@ -62,7 +63,7 @@ module Feedjira
|
|
|
62
63
|
elements :item, as: :entries, class: ITunesRSSItem
|
|
63
64
|
|
|
64
65
|
def self.able_to_parse?(xml)
|
|
65
|
-
%r{xmlns:itunes\s?=\s?[
|
|
66
|
+
%r{xmlns:itunes\s?=\s?["']http://www\.itunes\.com/dtds/podcast-1\.0\.dtd["']}i =~ xml
|
|
66
67
|
end
|
|
67
68
|
end
|
|
68
69
|
end
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
#
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
2
3
|
module Feedjira
|
|
3
4
|
module Parser
|
|
4
5
|
# iTunes extensions to the standard RSS2.0 item
|
|
@@ -11,17 +12,17 @@ module Feedjira
|
|
|
11
12
|
elements :"itunes:category", as: :itunes_categories,
|
|
12
13
|
class: ITunesRSSCategory
|
|
13
14
|
|
|
14
|
-
def each_subcategory
|
|
15
|
+
def each_subcategory(&block)
|
|
15
16
|
return to_enum(__method__) unless block_given?
|
|
16
17
|
|
|
17
18
|
yield text
|
|
18
19
|
|
|
19
20
|
itunes_categories.each do |itunes_category|
|
|
20
|
-
itunes_category.each_subcategory(&
|
|
21
|
+
itunes_category.each_subcategory(&block)
|
|
21
22
|
end
|
|
22
23
|
end
|
|
23
24
|
|
|
24
|
-
def each_path(ancestors = [])
|
|
25
|
+
def each_path(ancestors = [], &block)
|
|
25
26
|
return to_enum(__method__, ancestors) unless block_given?
|
|
26
27
|
|
|
27
28
|
category_hierarchy = ancestors + [text]
|
|
@@ -30,7 +31,7 @@ module Feedjira
|
|
|
30
31
|
yield category_hierarchy
|
|
31
32
|
else
|
|
32
33
|
itunes_categories.each do |itunes_category|
|
|
33
|
-
itunes_category.each_path(category_hierarchy, &
|
|
34
|
+
itunes_category.each_path(category_hierarchy, &block)
|
|
34
35
|
end
|
|
35
36
|
end
|
|
36
37
|
end
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module Feedjira
|
|
2
4
|
module Parser
|
|
3
5
|
# iTunes extensions to the standard RSS2.0 item
|
|
@@ -5,14 +7,9 @@ module Feedjira
|
|
|
5
7
|
class ITunesRSSItem
|
|
6
8
|
include SAXMachine
|
|
7
9
|
include FeedEntryUtilities
|
|
10
|
+
include RSSEntryUtilities
|
|
8
11
|
|
|
9
|
-
|
|
10
|
-
element :guid, as: :entry_id
|
|
11
|
-
element :title
|
|
12
|
-
element :link, as: :url
|
|
13
|
-
element :description, as: :summary
|
|
14
|
-
element :"content:encoded", as: :content
|
|
15
|
-
element :pubDate, as: :published
|
|
12
|
+
sax_config.top_level_elements["enclosure"].clear
|
|
16
13
|
|
|
17
14
|
# If author is not present use author tag on the item
|
|
18
15
|
element :"itunes:author", as: :itunes_author
|
|
@@ -34,7 +31,7 @@ module Feedjira
|
|
|
34
31
|
element :enclosure, value: :length, as: :enclosure_length
|
|
35
32
|
element :enclosure, value: :type, as: :enclosure_type
|
|
36
33
|
element :enclosure, value: :url, as: :enclosure_url
|
|
37
|
-
elements
|
|
34
|
+
elements "psc:chapter", as: :raw_chapters, class: Feedjira::Parser::PodloveChapter
|
|
38
35
|
|
|
39
36
|
# Podlove requires clients to re-order by start time in the
|
|
40
37
|
# event the publisher doesn't provide them in that
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Feedjira
|
|
4
|
+
module Parser
|
|
5
|
+
# Parser for dealing with JSON Feeds.
|
|
6
|
+
class JSONFeed
|
|
7
|
+
include SAXMachine
|
|
8
|
+
include FeedUtilities
|
|
9
|
+
|
|
10
|
+
def self.able_to_parse?(json)
|
|
11
|
+
%r{https://jsonfeed.org/version/} =~ json
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def self.parse(json)
|
|
15
|
+
new(JSON.parse(json))
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
attr_reader :json, :version, :title, :url, :feed_url, :description,
|
|
19
|
+
:expired, :entries
|
|
20
|
+
|
|
21
|
+
def initialize(json)
|
|
22
|
+
@json = json
|
|
23
|
+
@version = json.fetch("version")
|
|
24
|
+
@title = json.fetch("title")
|
|
25
|
+
@url = json.fetch("home_page_url", nil)
|
|
26
|
+
@feed_url = json.fetch("feed_url", nil)
|
|
27
|
+
@description = json.fetch("description", nil)
|
|
28
|
+
@expired = json.fetch("expired", nil)
|
|
29
|
+
@entries = parse_items(json["items"])
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
private
|
|
33
|
+
|
|
34
|
+
def parse_items(items)
|
|
35
|
+
items.map do |item|
|
|
36
|
+
Feedjira::Parser::JSONFeedItem.new(item)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Feedjira
|
|
4
|
+
module Parser
|
|
5
|
+
# Parser for dealing with JSON Feed items.
|
|
6
|
+
class JSONFeedItem
|
|
7
|
+
include FeedEntryUtilities
|
|
8
|
+
|
|
9
|
+
attr_reader :json, :entry_id, :url, :external_url, :title, :content, :summary,
|
|
10
|
+
:published, :updated, :image, :banner_image, :author, :categories
|
|
11
|
+
|
|
12
|
+
def initialize(json)
|
|
13
|
+
@json = json
|
|
14
|
+
@entry_id = json.fetch("id")
|
|
15
|
+
@url = json.fetch("url")
|
|
16
|
+
@external_url = json.fetch("external_url", nil)
|
|
17
|
+
@title = json.fetch("title", nil)
|
|
18
|
+
@content = parse_content(json.fetch("content_html", nil), json.fetch("content_text", nil))
|
|
19
|
+
@summary = json.fetch("summary", nil)
|
|
20
|
+
@image = json.fetch("image", nil)
|
|
21
|
+
@banner_image = json.fetch("banner_image", nil)
|
|
22
|
+
@published = parse_published(json.fetch("date_published", nil))
|
|
23
|
+
@updated = parse_updated(json.fetch("date_modified", nil))
|
|
24
|
+
@author = author_name(json.fetch("author", nil))
|
|
25
|
+
@categories = json.fetch("tags", [])
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
private
|
|
29
|
+
|
|
30
|
+
def parse_published(date_published)
|
|
31
|
+
return nil unless date_published
|
|
32
|
+
|
|
33
|
+
Time.parse_safely(date_published)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def parse_updated(date_modified)
|
|
37
|
+
return nil unless date_modified
|
|
38
|
+
|
|
39
|
+
Time.parse_safely(date_modified)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Convenience method to return the included content type.
|
|
43
|
+
# Prefer content_html unless it isn't included.
|
|
44
|
+
def parse_content(content_html, content_text)
|
|
45
|
+
return content_html unless content_html.nil?
|
|
46
|
+
|
|
47
|
+
content_text
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def author_name(author_obj)
|
|
51
|
+
return nil if author_obj.nil?
|
|
52
|
+
|
|
53
|
+
author_obj["name"]
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
#
|
|
2
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
3
|
module Feedjira
|
|
4
4
|
module Parser
|
|
5
5
|
class PodloveChapter
|
|
@@ -12,7 +12,8 @@ module Feedjira
|
|
|
12
12
|
|
|
13
13
|
def start
|
|
14
14
|
return unless start_ntp
|
|
15
|
-
|
|
15
|
+
|
|
16
|
+
parts = start_ntp.split(":")
|
|
16
17
|
parts.reverse.to_enum.with_index.map do |part, index|
|
|
17
18
|
part.to_f * (60**index)
|
|
18
19
|
end.reduce(:+)
|
data/lib/feedjira/parser/rss.rb
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
#
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
2
3
|
module Feedjira
|
|
3
4
|
module Parser
|
|
4
5
|
# Parser for dealing with RSS feeds.
|
|
@@ -11,16 +12,17 @@ module Feedjira
|
|
|
11
12
|
element :language
|
|
12
13
|
element :lastBuildDate, as: :last_built
|
|
13
14
|
element :link, as: :url
|
|
15
|
+
element :"a10:link", as: :url, value: :href
|
|
14
16
|
element :rss, as: :version, value: :version
|
|
15
17
|
element :title
|
|
16
18
|
element :ttl
|
|
17
|
-
elements :"atom:link", as: :hubs, value: :href, with: { rel:
|
|
19
|
+
elements :"atom:link", as: :hubs, value: :href, with: { rel: "hub" }
|
|
18
20
|
elements :item, as: :entries, class: RSSEntry
|
|
19
21
|
|
|
20
22
|
attr_accessor :feed_url
|
|
21
23
|
|
|
22
24
|
def self.able_to_parse?(xml)
|
|
23
|
-
(
|
|
25
|
+
(/<rss|<rdf/ =~ xml) && !(/feedburner/ =~ xml)
|
|
24
26
|
end
|
|
25
27
|
end
|
|
26
28
|
end
|
|
@@ -1,33 +1,12 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module Feedjira
|
|
2
4
|
module Parser
|
|
3
5
|
# Parser for dealing with RDF feed entries.
|
|
4
6
|
class RSSEntry
|
|
5
7
|
include SAXMachine
|
|
6
8
|
include FeedEntryUtilities
|
|
7
|
-
|
|
8
|
-
element :title
|
|
9
|
-
element :link, as: :url
|
|
10
|
-
|
|
11
|
-
element :"dc:creator", as: :author
|
|
12
|
-
element :author, as: :author
|
|
13
|
-
element :"content:encoded", as: :content
|
|
14
|
-
element :description, as: :summary
|
|
15
|
-
|
|
16
|
-
element :"media:content", as: :image, value: :url
|
|
17
|
-
element :enclosure, as: :image, value: :url
|
|
18
|
-
|
|
19
|
-
element :pubDate, as: :published
|
|
20
|
-
element :pubdate, as: :published
|
|
21
|
-
element :"dc:date", as: :published
|
|
22
|
-
element :"dc:Date", as: :published
|
|
23
|
-
element :"dcterms:created", as: :published
|
|
24
|
-
|
|
25
|
-
element :"dcterms:modified", as: :updated
|
|
26
|
-
element :issued, as: :published
|
|
27
|
-
elements :category, as: :categories
|
|
28
|
-
|
|
29
|
-
element :guid, as: :entry_id
|
|
30
|
-
element :"dc:identifier", as: :entry_id
|
|
9
|
+
include RSSEntryUtilities
|
|
31
10
|
end
|
|
32
11
|
end
|
|
33
12
|
end
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
#
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
2
3
|
module Feedjira
|
|
3
4
|
module Parser
|
|
4
5
|
# Parser for dealing with RSS feeds.
|
|
@@ -9,13 +10,13 @@ module Feedjira
|
|
|
9
10
|
element :description
|
|
10
11
|
element :link, as: :url
|
|
11
12
|
element :lastBuildDate, as: :last_built
|
|
12
|
-
elements :"atom10:link", as: :hubs, value: :href, with: { rel:
|
|
13
|
+
elements :"atom10:link", as: :hubs, value: :href, with: { rel: "hub" }
|
|
13
14
|
elements :item, as: :entries, class: RSSFeedBurnerEntry
|
|
14
15
|
|
|
15
16
|
attr_accessor :feed_url
|
|
16
17
|
|
|
17
18
|
def self.able_to_parse?(xml) #:nodoc:
|
|
18
|
-
(
|
|
19
|
+
(/<rss|<rdf/ =~ xml) && (/feedburner/ =~ xml)
|
|
19
20
|
end
|
|
20
21
|
end
|
|
21
22
|
end
|
|
@@ -1,38 +1,18 @@
|
|
|
1
|
-
#
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
2
3
|
module Feedjira
|
|
3
4
|
module Parser
|
|
4
5
|
# Parser for dealing with RDF feed entries.
|
|
5
6
|
class RSSFeedBurnerEntry
|
|
6
7
|
include SAXMachine
|
|
7
8
|
include FeedEntryUtilities
|
|
9
|
+
include RSSEntryUtilities
|
|
8
10
|
|
|
9
|
-
element :
|
|
10
|
-
|
|
11
|
-
element :"feedburner:origLink", as: :url
|
|
12
|
-
element :link, as: :url
|
|
13
|
-
|
|
14
|
-
element :"dc:creator", as: :author
|
|
15
|
-
element :author, as: :author
|
|
16
|
-
element :"content:encoded", as: :content
|
|
17
|
-
element :description, as: :summary
|
|
18
|
-
|
|
19
|
-
element :"media:content", as: :image, value: :url
|
|
20
|
-
element :enclosure, as: :image, value: :url
|
|
21
|
-
|
|
22
|
-
element :pubDate, as: :published
|
|
23
|
-
element :pubdate, as: :published
|
|
24
|
-
element :"dc:date", as: :published
|
|
25
|
-
element :"dc:Date", as: :published
|
|
26
|
-
element :"dcterms:created", as: :published
|
|
27
|
-
|
|
28
|
-
element :"dcterms:modified", as: :updated
|
|
29
|
-
element :issued, as: :published
|
|
30
|
-
elements :category, as: :categories
|
|
31
|
-
|
|
32
|
-
element :guid, as: :entry_id
|
|
11
|
+
element :"feedburner:origLink", as: :orig_link
|
|
12
|
+
private :orig_link
|
|
33
13
|
|
|
34
14
|
def url
|
|
35
|
-
|
|
15
|
+
orig_link || super
|
|
36
16
|
end
|
|
37
17
|
end
|
|
38
18
|
end
|