feedjira 2.2.0 → 3.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE/feed-parsing.md +15 -0
  3. data/.rubocop.yml +32 -8
  4. data/.rubocop_todo.yml +11 -0
  5. data/.travis.yml +3 -7
  6. data/CHANGELOG.md +18 -9
  7. data/CODE_OF_CONDUCT.md +74 -0
  8. data/Gemfile +8 -5
  9. data/README.md +46 -99
  10. data/Rakefile +8 -6
  11. data/feedjira.gemspec +31 -20
  12. data/lib/feedjira.rb +75 -41
  13. data/lib/feedjira/atom_entry_utilities.rb +51 -0
  14. data/lib/feedjira/configuration.rb +8 -10
  15. data/lib/feedjira/core_ext.rb +5 -3
  16. data/lib/feedjira/core_ext/date.rb +2 -1
  17. data/lib/feedjira/core_ext/string.rb +2 -1
  18. data/lib/feedjira/core_ext/time.rb +12 -12
  19. data/lib/feedjira/date_time_utilities.rb +8 -10
  20. data/lib/feedjira/date_time_utilities/date_time_epoch_parser.rb +3 -2
  21. data/lib/feedjira/date_time_utilities/date_time_language_parser.rb +4 -4
  22. data/lib/feedjira/date_time_utilities/date_time_pattern_parser.rb +11 -15
  23. data/lib/feedjira/feed.rb +12 -82
  24. data/lib/feedjira/feed_entry_utilities.rb +14 -7
  25. data/lib/feedjira/feed_utilities.rb +5 -4
  26. data/lib/feedjira/parser.rb +6 -1
  27. data/lib/feedjira/parser/atom.rb +6 -5
  28. data/lib/feedjira/parser/atom_entry.rb +4 -21
  29. data/lib/feedjira/parser/atom_feed_burner.rb +7 -6
  30. data/lib/feedjira/parser/atom_feed_burner_entry.rb +7 -18
  31. data/lib/feedjira/parser/atom_google_alerts.rb +26 -0
  32. data/lib/feedjira/parser/atom_google_alerts_entry.rb +21 -0
  33. data/lib/feedjira/parser/atom_youtube.rb +4 -3
  34. data/lib/feedjira/parser/atom_youtube_entry.rb +9 -8
  35. data/lib/feedjira/parser/globally_unique_identifier.rb +21 -0
  36. data/lib/feedjira/parser/google_docs_atom.rb +6 -6
  37. data/lib/feedjira/parser/google_docs_atom_entry.rb +3 -19
  38. data/lib/feedjira/parser/itunes_rss.rb +4 -3
  39. data/lib/feedjira/parser/itunes_rss_category.rb +6 -5
  40. data/lib/feedjira/parser/itunes_rss_item.rb +5 -8
  41. data/lib/feedjira/parser/itunes_rss_owner.rb +2 -1
  42. data/lib/feedjira/parser/json_feed.rb +41 -0
  43. data/lib/feedjira/parser/json_feed_item.rb +57 -0
  44. data/lib/feedjira/parser/podlove_chapter.rb +4 -3
  45. data/lib/feedjira/parser/rss.rb +5 -3
  46. data/lib/feedjira/parser/rss_entry.rb +3 -24
  47. data/lib/feedjira/parser/rss_feed_burner.rb +4 -3
  48. data/lib/feedjira/parser/rss_feed_burner_entry.rb +6 -26
  49. data/lib/feedjira/parser/rss_image.rb +2 -0
  50. data/lib/feedjira/preprocessor.rb +4 -4
  51. data/lib/feedjira/rss_entry_utilities.rb +53 -0
  52. data/lib/feedjira/version.rb +3 -1
  53. data/spec/feedjira/configuration_spec.rb +11 -16
  54. data/spec/feedjira/date_time_utilities_spec.rb +22 -20
  55. data/spec/feedjira/feed_entry_utilities_spec.rb +20 -18
  56. data/spec/feedjira/feed_spec.rb +17 -229
  57. data/spec/feedjira/feed_utilities_spec.rb +75 -73
  58. data/spec/feedjira/parser/atom_entry_spec.rb +41 -38
  59. data/spec/feedjira/parser/atom_feed_burner_entry_spec.rb +22 -20
  60. data/spec/feedjira/parser/atom_feed_burner_spec.rb +122 -118
  61. data/spec/feedjira/parser/atom_google_alerts_entry_spec.rb +34 -0
  62. data/spec/feedjira/parser/atom_google_alerts_spec.rb +62 -0
  63. data/spec/feedjira/parser/atom_spec.rb +83 -77
  64. data/spec/feedjira/parser/atom_youtube_entry_spec.rb +41 -39
  65. data/spec/feedjira/parser/atom_youtube_spec.rb +21 -19
  66. data/spec/feedjira/parser/google_docs_atom_entry_spec.rb +10 -8
  67. data/spec/feedjira/parser/google_docs_atom_spec.rb +25 -21
  68. data/spec/feedjira/parser/itunes_rss_item_spec.rb +39 -37
  69. data/spec/feedjira/parser/itunes_rss_owner_spec.rb +7 -5
  70. data/spec/feedjira/parser/itunes_rss_spec.rb +120 -116
  71. data/spec/feedjira/parser/json_feed_item_spec.rb +81 -0
  72. data/spec/feedjira/parser/json_feed_spec.rb +55 -0
  73. data/spec/feedjira/parser/podlove_chapter_spec.rb +14 -12
  74. data/spec/feedjira/parser/rss_entry_spec.rb +56 -34
  75. data/spec/feedjira/parser/rss_feed_burner_entry_spec.rb +36 -34
  76. data/spec/feedjira/parser/rss_feed_burner_spec.rb +49 -45
  77. data/spec/feedjira/parser/rss_spec.rb +38 -36
  78. data/spec/feedjira/preprocessor_spec.rb +9 -7
  79. data/spec/feedjira_spec.rb +166 -0
  80. data/spec/sample_feeds.rb +32 -29
  81. data/spec/sample_feeds/HuffPostCanada.xml +279 -0
  82. data/spec/sample_feeds/Permalinks.xml +22 -0
  83. data/spec/sample_feeds/a10.xml +72 -0
  84. data/spec/sample_feeds/google_alerts_atom.xml +1 -0
  85. data/spec/sample_feeds/json_feed.json +156 -0
  86. data/spec/spec_helper.rb +7 -5
  87. metadata +59 -70
  88. data/Dangerfile +0 -1
  89. data/fixtures/vcr_cassettes/fetch_failure.yml +0 -62
  90. data/fixtures/vcr_cassettes/parse_error.yml +0 -222
  91. data/fixtures/vcr_cassettes/success.yml +0 -281
@@ -1,4 +1,5 @@
1
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
2
3
  module Feedjira
3
4
  module Parser
4
5
  # Parser for dealing with RSS feeds.
@@ -6,8 +7,8 @@ module Feedjira
6
7
  include SAXMachine
7
8
  include FeedUtilities
8
9
  element :title
9
- element :link, as: :url, value: :href, with: { rel: 'alternate' }
10
- element :link, as: :feed_url, value: :href, with: { rel: 'self' }
10
+ element :link, as: :url, value: :href, with: { rel: "alternate" }
11
+ element :link, as: :feed_url, value: :href, with: { rel: "self" }
11
12
  element :name, as: :author
12
13
  element :"yt:channelId", as: :youtube_channel_id
13
14
 
@@ -1,19 +1,20 @@
1
- # rubocop:disable Style/Documentation
1
+ # frozen_string_literal: true
2
+
2
3
  module Feedjira
3
4
  module Parser
4
5
  class AtomYoutubeEntry
5
6
  include SAXMachine
6
7
  include FeedEntryUtilities
8
+ include AtomEntryUtilities
9
+
10
+ sax_config.top_level_elements["link"].clear
11
+ sax_config.collection_elements["link"].clear
12
+
13
+ element :link, as: :url, value: :href, with: { rel: "alternate" }
7
14
 
8
- element :title
9
- element :link, as: :url, value: :href, with: { rel: 'alternate' }
10
- element :name, as: :author
11
15
  element :"media:description", as: :content
12
- element :summary
13
- element :published
14
- element :id, as: :entry_id
15
- element :updated
16
16
  element :"yt:videoId", as: :youtube_video_id
17
+ element :"yt:channelId", as: :youtube_channel_id
17
18
  element :"media:title", as: :media_title
18
19
  element :"media:content", as: :media_url, value: :url
19
20
  element :"media:content", as: :media_type, value: :type
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Feedjira
4
+ module Parser
5
+ class GloballyUniqueIdentifier
6
+ include SAXMachine
7
+
8
+ attribute :isPermaLink, as: :is_perma_link
9
+
10
+ value :guid
11
+
12
+ def perma_link?
13
+ is_perma_link != "false"
14
+ end
15
+
16
+ def url
17
+ perma_link? ? guid : nil
18
+ end
19
+ end
20
+ end
21
+ end
@@ -1,6 +1,6 @@
1
- require File.expand_path('./atom', File.dirname(__FILE__))
2
- # rubocop:disable Style/Documentation
3
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
3
+ require File.expand_path("./atom", File.dirname(__FILE__))
4
4
  module Feedjira
5
5
  module Parser
6
6
  class GoogleDocsAtom
@@ -8,8 +8,8 @@ module Feedjira
8
8
  include FeedUtilities
9
9
  element :title
10
10
  element :subtitle, as: :description
11
- element :link, as: :url, value: :href, with: { type: 'text/html' }
12
- element :link, as: :feed_url, value: :href, with: { type: 'application/atom+xml' } # rubocop:disable Metrics/LineLength
11
+ element :link, as: :url, value: :href, with: { type: "text/html" }
12
+ element :link, as: :feed_url, value: :href, with: { type: "application/atom+xml" }
13
13
  elements :link, as: :links, value: :href
14
14
  elements :entry, as: :entries, class: GoogleDocsAtomEntry
15
15
 
@@ -18,7 +18,7 @@ module Feedjira
18
18
  end
19
19
 
20
20
  def self.able_to_parse?(xml) #:nodoc:
21
- %r{<id>https?://docs\.google\.com/.*\</id\>} =~ xml
21
+ %r{<id>https?://docs\.google\.com/.*</id>} =~ xml
22
22
  end
23
23
 
24
24
  def feed_url
@@ -1,31 +1,15 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
3
3
  module Feedjira
4
4
  module Parser
5
5
  class GoogleDocsAtomEntry
6
6
  include SAXMachine
7
7
  include FeedEntryUtilities
8
+ include AtomEntryUtilities
8
9
 
9
- element :title
10
- element :link, as: :url, value: :href, with: { type: 'text/html', rel: 'alternate' } # rubocop:disable Metrics/LineLength
11
- element :name, as: :author
12
- element :content
13
- element :summary
14
- element :published
15
- element :id, as: :entry_id
16
- element :created, as: :published
17
- element :issued, as: :published
18
- element :updated
19
- element :modified, as: :updated
20
- elements :category, as: :categories, value: :term
21
- elements :link, as: :links, value: :href
22
10
  element :"docs:md5Checksum", as: :checksum
23
11
  element :"docs:filename", as: :original_filename
24
12
  element :"docs:suggestedFilename", as: :suggested_filename
25
-
26
- def url
27
- @url ||= links.first
28
- end
29
13
  end
30
14
  end
31
15
  end
@@ -1,4 +1,5 @@
1
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
2
3
  module Feedjira
3
4
  module Parser
4
5
  # iTunes is RSS 2.0 + some apple extensions
@@ -19,7 +20,7 @@ module Feedjira
19
20
  element :language
20
21
  element :lastBuildDate, as: :last_built
21
22
  element :link, as: :url
22
- element :managingEditor
23
+ element :managingEditor, as: :managing_editor
23
24
  element :rss, as: :version, value: :version
24
25
  element :title
25
26
  element :ttl
@@ -62,7 +63,7 @@ module Feedjira
62
63
  elements :item, as: :entries, class: ITunesRSSItem
63
64
 
64
65
  def self.able_to_parse?(xml)
65
- %r{xmlns:itunes\s?=\s?[\"\']http://www\.itunes\.com/dtds/podcast-1\.0\.dtd[\"\']}i =~ xml # rubocop:disable Metrics/LineLength
66
+ %r{xmlns:itunes\s?=\s?["']http://www\.itunes\.com/dtds/podcast-1\.0\.dtd["']}i =~ xml
66
67
  end
67
68
  end
68
69
  end
@@ -1,4 +1,5 @@
1
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
2
3
  module Feedjira
3
4
  module Parser
4
5
  # iTunes extensions to the standard RSS2.0 item
@@ -11,17 +12,17 @@ module Feedjira
11
12
  elements :"itunes:category", as: :itunes_categories,
12
13
  class: ITunesRSSCategory
13
14
 
14
- def each_subcategory
15
+ def each_subcategory(&block)
15
16
  return to_enum(__method__) unless block_given?
16
17
 
17
18
  yield text
18
19
 
19
20
  itunes_categories.each do |itunes_category|
20
- itunes_category.each_subcategory(&proc)
21
+ itunes_category.each_subcategory(&block)
21
22
  end
22
23
  end
23
24
 
24
- def each_path(ancestors = [])
25
+ def each_path(ancestors = [], &block)
25
26
  return to_enum(__method__, ancestors) unless block_given?
26
27
 
27
28
  category_hierarchy = ancestors + [text]
@@ -30,7 +31,7 @@ module Feedjira
30
31
  yield category_hierarchy
31
32
  else
32
33
  itunes_categories.each do |itunes_category|
33
- itunes_category.each_path(category_hierarchy, &proc)
34
+ itunes_category.each_path(category_hierarchy, &block)
34
35
  end
35
36
  end
36
37
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Feedjira
2
4
  module Parser
3
5
  # iTunes extensions to the standard RSS2.0 item
@@ -5,14 +7,9 @@ module Feedjira
5
7
  class ITunesRSSItem
6
8
  include SAXMachine
7
9
  include FeedEntryUtilities
10
+ include RSSEntryUtilities
8
11
 
9
- element :author
10
- element :guid, as: :entry_id
11
- element :title
12
- element :link, as: :url
13
- element :description, as: :summary
14
- element :"content:encoded", as: :content
15
- element :pubDate, as: :published
12
+ sax_config.top_level_elements["enclosure"].clear
16
13
 
17
14
  # If author is not present use author tag on the item
18
15
  element :"itunes:author", as: :itunes_author
@@ -34,7 +31,7 @@ module Feedjira
34
31
  element :enclosure, value: :length, as: :enclosure_length
35
32
  element :enclosure, value: :type, as: :enclosure_type
36
33
  element :enclosure, value: :url, as: :enclosure_url
37
- elements 'psc:chapter', as: :raw_chapters, class: Feedjira::Parser::PodloveChapter # rubocop:disable Metrics/LineLength
34
+ elements "psc:chapter", as: :raw_chapters, class: Feedjira::Parser::PodloveChapter
38
35
 
39
36
  # Podlove requires clients to re-order by start time in the
40
37
  # event the publisher doesn't provide them in that
@@ -1,4 +1,5 @@
1
- # rubocop:disable Style/Documentation
1
+ # frozen_string_literal: true
2
+
2
3
  module Feedjira
3
4
  module Parser
4
5
  class ITunesRSSOwner
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Feedjira
4
+ module Parser
5
+ # Parser for dealing with JSON Feeds.
6
+ class JSONFeed
7
+ include SAXMachine
8
+ include FeedUtilities
9
+
10
+ def self.able_to_parse?(json)
11
+ %r{https://jsonfeed.org/version/} =~ json
12
+ end
13
+
14
+ def self.parse(json)
15
+ new(JSON.parse(json))
16
+ end
17
+
18
+ attr_reader :json, :version, :title, :url, :feed_url, :description,
19
+ :expired, :entries
20
+
21
+ def initialize(json)
22
+ @json = json
23
+ @version = json.fetch("version")
24
+ @title = json.fetch("title")
25
+ @url = json.fetch("home_page_url", nil)
26
+ @feed_url = json.fetch("feed_url", nil)
27
+ @description = json.fetch("description", nil)
28
+ @expired = json.fetch("expired", nil)
29
+ @entries = parse_items(json["items"])
30
+ end
31
+
32
+ private
33
+
34
+ def parse_items(items)
35
+ items.map do |item|
36
+ Feedjira::Parser::JSONFeedItem.new(item)
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Feedjira
4
+ module Parser
5
+ # Parser for dealing with JSON Feed items.
6
+ class JSONFeedItem
7
+ include FeedEntryUtilities
8
+
9
+ attr_reader :json, :entry_id, :url, :external_url, :title, :content, :summary,
10
+ :published, :updated, :image, :banner_image, :author, :categories
11
+
12
+ def initialize(json)
13
+ @json = json
14
+ @entry_id = json.fetch("id")
15
+ @url = json.fetch("url")
16
+ @external_url = json.fetch("external_url", nil)
17
+ @title = json.fetch("title", nil)
18
+ @content = parse_content(json.fetch("content_html", nil), json.fetch("content_text", nil))
19
+ @summary = json.fetch("summary", nil)
20
+ @image = json.fetch("image", nil)
21
+ @banner_image = json.fetch("banner_image", nil)
22
+ @published = parse_published(json.fetch("date_published", nil))
23
+ @updated = parse_updated(json.fetch("date_modified", nil))
24
+ @author = author_name(json.fetch("author", nil))
25
+ @categories = json.fetch("tags", [])
26
+ end
27
+
28
+ private
29
+
30
+ def parse_published(date_published)
31
+ return nil unless date_published
32
+
33
+ Time.parse_safely(date_published)
34
+ end
35
+
36
+ def parse_updated(date_modified)
37
+ return nil unless date_modified
38
+
39
+ Time.parse_safely(date_modified)
40
+ end
41
+
42
+ # Convenience method to return the included content type.
43
+ # Prefer content_html unless it isn't included.
44
+ def parse_content(content_html, content_text)
45
+ return content_html unless content_html.nil?
46
+
47
+ content_text
48
+ end
49
+
50
+ def author_name(author_obj)
51
+ return nil if author_obj.nil?
52
+
53
+ author_obj["name"]
54
+ end
55
+ end
56
+ end
57
+ end
@@ -1,5 +1,5 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
3
3
  module Feedjira
4
4
  module Parser
5
5
  class PodloveChapter
@@ -12,7 +12,8 @@ module Feedjira
12
12
 
13
13
  def start
14
14
  return unless start_ntp
15
- parts = start_ntp.split(':')
15
+
16
+ parts = start_ntp.split(":")
16
17
  parts.reverse.to_enum.with_index.map do |part, index|
17
18
  part.to_f * (60**index)
18
19
  end.reduce(:+)
@@ -1,4 +1,5 @@
1
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
2
3
  module Feedjira
3
4
  module Parser
4
5
  # Parser for dealing with RSS feeds.
@@ -11,16 +12,17 @@ module Feedjira
11
12
  element :language
12
13
  element :lastBuildDate, as: :last_built
13
14
  element :link, as: :url
15
+ element :"a10:link", as: :url, value: :href
14
16
  element :rss, as: :version, value: :version
15
17
  element :title
16
18
  element :ttl
17
- elements :"atom:link", as: :hubs, value: :href, with: { rel: 'hub' }
19
+ elements :"atom:link", as: :hubs, value: :href, with: { rel: "hub" }
18
20
  elements :item, as: :entries, class: RSSEntry
19
21
 
20
22
  attr_accessor :feed_url
21
23
 
22
24
  def self.able_to_parse?(xml)
23
- (/\<rss|\<rdf/ =~ xml) && !(/feedburner/ =~ xml)
25
+ (/<rss|<rdf/ =~ xml) && !(/feedburner/ =~ xml)
24
26
  end
25
27
  end
26
28
  end
@@ -1,33 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Feedjira
2
4
  module Parser
3
5
  # Parser for dealing with RDF feed entries.
4
6
  class RSSEntry
5
7
  include SAXMachine
6
8
  include FeedEntryUtilities
7
-
8
- element :title
9
- element :link, as: :url
10
-
11
- element :"dc:creator", as: :author
12
- element :author, as: :author
13
- element :"content:encoded", as: :content
14
- element :description, as: :summary
15
-
16
- element :"media:content", as: :image, value: :url
17
- element :enclosure, as: :image, value: :url
18
-
19
- element :pubDate, as: :published
20
- element :pubdate, as: :published
21
- element :"dc:date", as: :published
22
- element :"dc:Date", as: :published
23
- element :"dcterms:created", as: :published
24
-
25
- element :"dcterms:modified", as: :updated
26
- element :issued, as: :published
27
- elements :category, as: :categories
28
-
29
- element :guid, as: :entry_id
30
- element :"dc:identifier", as: :entry_id
9
+ include RSSEntryUtilities
31
10
  end
32
11
  end
33
12
  end
@@ -1,4 +1,5 @@
1
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
2
3
  module Feedjira
3
4
  module Parser
4
5
  # Parser for dealing with RSS feeds.
@@ -9,13 +10,13 @@ module Feedjira
9
10
  element :description
10
11
  element :link, as: :url
11
12
  element :lastBuildDate, as: :last_built
12
- elements :"atom10:link", as: :hubs, value: :href, with: { rel: 'hub' }
13
+ elements :"atom10:link", as: :hubs, value: :href, with: { rel: "hub" }
13
14
  elements :item, as: :entries, class: RSSFeedBurnerEntry
14
15
 
15
16
  attr_accessor :feed_url
16
17
 
17
18
  def self.able_to_parse?(xml) #:nodoc:
18
- (/\<rss|\<rdf/ =~ xml) && (/feedburner/ =~ xml)
19
+ (/<rss|<rdf/ =~ xml) && (/feedburner/ =~ xml)
19
20
  end
20
21
  end
21
22
  end
@@ -1,38 +1,18 @@
1
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
2
3
  module Feedjira
3
4
  module Parser
4
5
  # Parser for dealing with RDF feed entries.
5
6
  class RSSFeedBurnerEntry
6
7
  include SAXMachine
7
8
  include FeedEntryUtilities
9
+ include RSSEntryUtilities
8
10
 
9
- element :title
10
-
11
- element :"feedburner:origLink", as: :url
12
- element :link, as: :url
13
-
14
- element :"dc:creator", as: :author
15
- element :author, as: :author
16
- element :"content:encoded", as: :content
17
- element :description, as: :summary
18
-
19
- element :"media:content", as: :image, value: :url
20
- element :enclosure, as: :image, value: :url
21
-
22
- element :pubDate, as: :published
23
- element :pubdate, as: :published
24
- element :"dc:date", as: :published
25
- element :"dc:Date", as: :published
26
- element :"dcterms:created", as: :published
27
-
28
- element :"dcterms:modified", as: :updated
29
- element :issued, as: :published
30
- elements :category, as: :categories
31
-
32
- element :guid, as: :entry_id
11
+ element :"feedburner:origLink", as: :orig_link
12
+ private :orig_link
33
13
 
34
14
  def url
35
- @url || @link
15
+ orig_link || super
36
16
  end
37
17
  end
38
18
  end