feedjira 3.0.0.beta1 → 3.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (89) hide show
  1. checksums.yaml +5 -5
  2. data/.github/ISSUE_TEMPLATE/feed-parsing.md +15 -0
  3. data/.github/workflows/ruby.yml +39 -0
  4. data/.rubocop.yml +18 -628
  5. data/.rubocop_todo.yml +11 -0
  6. data/CHANGELOG.md +66 -9
  7. data/Gemfile +4 -1
  8. data/README.md +10 -1
  9. data/Rakefile +3 -1
  10. data/feedjira.gemspec +14 -12
  11. data/lib/feedjira.rb +9 -3
  12. data/lib/feedjira/atom_entry_utilities.rb +51 -0
  13. data/lib/feedjira/configuration.rb +6 -3
  14. data/lib/feedjira/core_ext.rb +2 -0
  15. data/lib/feedjira/core_ext/date.rb +3 -2
  16. data/lib/feedjira/core_ext/string.rb +2 -1
  17. data/lib/feedjira/core_ext/time.rb +10 -10
  18. data/lib/feedjira/date_time_utilities.rb +8 -10
  19. data/lib/feedjira/date_time_utilities/date_time_epoch_parser.rb +3 -2
  20. data/lib/feedjira/date_time_utilities/date_time_language_parser.rb +4 -4
  21. data/lib/feedjira/date_time_utilities/date_time_pattern_parser.rb +9 -13
  22. data/lib/feedjira/feed.rb +3 -3
  23. data/lib/feedjira/feed_entry_utilities.rb +15 -8
  24. data/lib/feedjira/feed_utilities.rb +5 -4
  25. data/lib/feedjira/parser.rb +2 -0
  26. data/lib/feedjira/parser/atom.rb +5 -7
  27. data/lib/feedjira/parser/atom_entry.rb +4 -21
  28. data/lib/feedjira/parser/atom_feed_burner.rb +4 -3
  29. data/lib/feedjira/parser/atom_feed_burner_entry.rb +7 -18
  30. data/lib/feedjira/parser/atom_google_alerts.rb +26 -0
  31. data/lib/feedjira/parser/atom_google_alerts_entry.rb +21 -0
  32. data/lib/feedjira/parser/atom_youtube.rb +2 -1
  33. data/lib/feedjira/parser/atom_youtube_entry.rb +8 -7
  34. data/lib/feedjira/parser/globally_unique_identifier.rb +21 -0
  35. data/lib/feedjira/parser/google_docs_atom.rb +4 -4
  36. data/lib/feedjira/parser/google_docs_atom_entry.rb +3 -19
  37. data/lib/feedjira/parser/itunes_rss.rb +4 -3
  38. data/lib/feedjira/parser/itunes_rss_category.rb +6 -5
  39. data/lib/feedjira/parser/itunes_rss_item.rb +5 -8
  40. data/lib/feedjira/parser/itunes_rss_owner.rb +2 -1
  41. data/lib/feedjira/parser/json_feed.rb +4 -2
  42. data/lib/feedjira/parser/json_feed_item.rb +7 -1
  43. data/lib/feedjira/parser/podlove_chapter.rb +3 -2
  44. data/lib/feedjira/parser/rss.rb +4 -2
  45. data/lib/feedjira/parser/rss_entry.rb +3 -28
  46. data/lib/feedjira/parser/rss_feed_burner.rb +3 -2
  47. data/lib/feedjira/parser/rss_feed_burner_entry.rb +6 -26
  48. data/lib/feedjira/parser/rss_image.rb +2 -0
  49. data/lib/feedjira/preprocessor.rb +3 -3
  50. data/lib/feedjira/rss_entry_utilities.rb +53 -0
  51. data/lib/feedjira/version.rb +3 -1
  52. data/spec/feedjira/configuration_spec.rb +5 -3
  53. data/spec/feedjira/date_time_utilities_spec.rb +2 -0
  54. data/spec/feedjira/feed_entry_utilities_spec.rb +4 -2
  55. data/spec/feedjira/feed_spec.rb +3 -1
  56. data/spec/feedjira/feed_utilities_spec.rb +5 -3
  57. data/spec/feedjira/parser/atom_entry_spec.rb +7 -4
  58. data/spec/feedjira/parser/atom_feed_burner_entry_spec.rb +7 -5
  59. data/spec/feedjira/parser/atom_feed_burner_spec.rb +4 -2
  60. data/spec/feedjira/parser/atom_google_alerts_entry_spec.rb +34 -0
  61. data/spec/feedjira/parser/atom_google_alerts_spec.rb +62 -0
  62. data/spec/feedjira/parser/atom_spec.rb +30 -9
  63. data/spec/feedjira/parser/atom_youtube_entry_spec.rb +8 -6
  64. data/spec/feedjira/parser/atom_youtube_spec.rb +6 -4
  65. data/spec/feedjira/parser/google_docs_atom_entry_spec.rb +2 -0
  66. data/spec/feedjira/parser/google_docs_atom_spec.rb +2 -0
  67. data/spec/feedjira/parser/itunes_rss_item_spec.rb +3 -1
  68. data/spec/feedjira/parser/itunes_rss_owner_spec.rb +2 -0
  69. data/spec/feedjira/parser/itunes_rss_spec.rb +11 -9
  70. data/spec/feedjira/parser/json_feed_item_spec.rb +5 -3
  71. data/spec/feedjira/parser/json_feed_spec.rb +2 -0
  72. data/spec/feedjira/parser/podlove_chapter_spec.rb +2 -0
  73. data/spec/feedjira/parser/rss_entry_spec.rb +26 -4
  74. data/spec/feedjira/parser/rss_feed_burner_entry_spec.rb +9 -7
  75. data/spec/feedjira/parser/rss_feed_burner_spec.rb +3 -1
  76. data/spec/feedjira/parser/rss_spec.rb +2 -0
  77. data/spec/feedjira/preprocessor_spec.rb +4 -2
  78. data/spec/feedjira_spec.rb +22 -1
  79. data/spec/sample_feeds.rb +7 -3
  80. data/spec/sample_feeds/InvalidDateFormat.xml +20 -0
  81. data/spec/sample_feeds/Permalinks.xml +22 -0
  82. data/spec/sample_feeds/a10.xml +72 -0
  83. data/spec/sample_feeds/atom_simple_single_entry.xml +17 -0
  84. data/spec/sample_feeds/atom_simple_single_entry_link_self.xml +17 -0
  85. data/spec/sample_feeds/google_alerts_atom.xml +1 -0
  86. data/spec/spec_helper.rb +3 -7
  87. metadata +44 -47
  88. data/.travis.yml +0 -37
  89. data/Dangerfile +0 -1
@@ -1,31 +1,15 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
3
3
  module Feedjira
4
4
  module Parser
5
5
  class GoogleDocsAtomEntry
6
6
  include SAXMachine
7
7
  include FeedEntryUtilities
8
+ include AtomEntryUtilities
8
9
 
9
- element :title
10
- element :link, as: :url, value: :href, with: { type: "text/html", rel: "alternate" } # rubocop:disable Metrics/LineLength
11
- element :name, as: :author
12
- element :content
13
- element :summary
14
- element :published
15
- element :id, as: :entry_id
16
- element :created, as: :published
17
- element :issued, as: :published
18
- element :updated
19
- element :modified, as: :updated
20
- elements :category, as: :categories, value: :term
21
- elements :link, as: :links, value: :href
22
10
  element :"docs:md5Checksum", as: :checksum
23
11
  element :"docs:filename", as: :original_filename
24
12
  element :"docs:suggestedFilename", as: :suggested_filename
25
-
26
- def url
27
- @url ||= links.first
28
- end
29
13
  end
30
14
  end
31
15
  end
@@ -1,4 +1,5 @@
1
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
2
3
  module Feedjira
3
4
  module Parser
4
5
  # iTunes is RSS 2.0 + some apple extensions
@@ -19,7 +20,7 @@ module Feedjira
19
20
  element :language
20
21
  element :lastBuildDate, as: :last_built
21
22
  element :link, as: :url
22
- element :managingEditor
23
+ element :managingEditor, as: :managing_editor
23
24
  element :rss, as: :version, value: :version
24
25
  element :title
25
26
  element :ttl
@@ -62,7 +63,7 @@ module Feedjira
62
63
  elements :item, as: :entries, class: ITunesRSSItem
63
64
 
64
65
  def self.able_to_parse?(xml)
65
- %r{xmlns:itunes\s?=\s?[\"\']http://www\.itunes\.com/dtds/podcast-1\.0\.dtd[\"\']}i =~ xml # rubocop:disable Metrics/LineLength
66
+ %r{xmlns:itunes\s?=\s?["']http://www\.itunes\.com/dtds/podcast-1\.0\.dtd["']}i =~ xml
66
67
  end
67
68
  end
68
69
  end
@@ -1,4 +1,5 @@
1
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
2
3
  module Feedjira
3
4
  module Parser
4
5
  # iTunes extensions to the standard RSS2.0 item
@@ -11,17 +12,17 @@ module Feedjira
11
12
  elements :"itunes:category", as: :itunes_categories,
12
13
  class: ITunesRSSCategory
13
14
 
14
- def each_subcategory
15
+ def each_subcategory(&block)
15
16
  return to_enum(__method__) unless block_given?
16
17
 
17
18
  yield text
18
19
 
19
20
  itunes_categories.each do |itunes_category|
20
- itunes_category.each_subcategory(&proc)
21
+ itunes_category.each_subcategory(&block)
21
22
  end
22
23
  end
23
24
 
24
- def each_path(ancestors = [])
25
+ def each_path(ancestors = [], &block)
25
26
  return to_enum(__method__, ancestors) unless block_given?
26
27
 
27
28
  category_hierarchy = ancestors + [text]
@@ -30,7 +31,7 @@ module Feedjira
30
31
  yield category_hierarchy
31
32
  else
32
33
  itunes_categories.each do |itunes_category|
33
- itunes_category.each_path(category_hierarchy, &proc)
34
+ itunes_category.each_path(category_hierarchy, &block)
34
35
  end
35
36
  end
36
37
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Feedjira
2
4
  module Parser
3
5
  # iTunes extensions to the standard RSS2.0 item
@@ -5,14 +7,9 @@ module Feedjira
5
7
  class ITunesRSSItem
6
8
  include SAXMachine
7
9
  include FeedEntryUtilities
10
+ include RSSEntryUtilities
8
11
 
9
- element :author
10
- element :guid, as: :entry_id
11
- element :title
12
- element :link, as: :url
13
- element :description, as: :summary
14
- element :"content:encoded", as: :content
15
- element :pubDate, as: :published
12
+ sax_config.top_level_elements["enclosure"].clear
16
13
 
17
14
  # If author is not present use author tag on the item
18
15
  element :"itunes:author", as: :itunes_author
@@ -34,7 +31,7 @@ module Feedjira
34
31
  element :enclosure, value: :length, as: :enclosure_length
35
32
  element :enclosure, value: :type, as: :enclosure_type
36
33
  element :enclosure, value: :url, as: :enclosure_url
37
- elements "psc:chapter", as: :raw_chapters, class: Feedjira::Parser::PodloveChapter # rubocop:disable Metrics/LineLength
34
+ elements "psc:chapter", as: :raw_chapters, class: Feedjira::Parser::PodloveChapter
38
35
 
39
36
  # Podlove requires clients to re-order by start time in the
40
37
  # event the publisher doesn't provide them in that
@@ -1,4 +1,5 @@
1
- # rubocop:disable Style/Documentation
1
+ # frozen_string_literal: true
2
+
2
3
  module Feedjira
3
4
  module Parser
4
5
  class ITunesRSSOwner
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Feedjira
2
4
  module Parser
3
5
  # Parser for dealing with JSON Feeds.
@@ -6,7 +8,7 @@ module Feedjira
6
8
  include FeedUtilities
7
9
 
8
10
  def self.able_to_parse?(json)
9
- %r{https:\/\/jsonfeed.org\/version\/} =~ json
11
+ %r{https://jsonfeed.org/version/} =~ json
10
12
  end
11
13
 
12
14
  def self.parse(json)
@@ -14,7 +16,7 @@ module Feedjira
14
16
  end
15
17
 
16
18
  attr_reader :json, :version, :title, :url, :feed_url, :description,
17
- :expired, :entries
19
+ :expired, :entries
18
20
 
19
21
  def initialize(json)
20
22
  @json = json
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Feedjira
2
4
  module Parser
3
5
  # Parser for dealing with JSON Feed items.
@@ -5,7 +7,7 @@ module Feedjira
5
7
  include FeedEntryUtilities
6
8
 
7
9
  attr_reader :json, :entry_id, :url, :external_url, :title, :content, :summary,
8
- :published, :updated, :image, :banner_image, :author, :categories
10
+ :published, :updated, :image, :banner_image, :author, :categories
9
11
 
10
12
  def initialize(json)
11
13
  @json = json
@@ -27,11 +29,13 @@ module Feedjira
27
29
 
28
30
  def parse_published(date_published)
29
31
  return nil unless date_published
32
+
30
33
  Time.parse_safely(date_published)
31
34
  end
32
35
 
33
36
  def parse_updated(date_modified)
34
37
  return nil unless date_modified
38
+
35
39
  Time.parse_safely(date_modified)
36
40
  end
37
41
 
@@ -39,11 +43,13 @@ module Feedjira
39
43
  # Prefer content_html unless it isn't included.
40
44
  def parse_content(content_html, content_text)
41
45
  return content_html unless content_html.nil?
46
+
42
47
  content_text
43
48
  end
44
49
 
45
50
  def author_name(author_obj)
46
51
  return nil if author_obj.nil?
52
+
47
53
  author_obj["name"]
48
54
  end
49
55
  end
@@ -1,5 +1,5 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
3
3
  module Feedjira
4
4
  module Parser
5
5
  class PodloveChapter
@@ -12,6 +12,7 @@ module Feedjira
12
12
 
13
13
  def start
14
14
  return unless start_ntp
15
+
15
16
  parts = start_ntp.split(":")
16
17
  parts.reverse.to_enum.with_index.map do |part, index|
17
18
  part.to_f * (60**index)
@@ -1,4 +1,5 @@
1
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
2
3
  module Feedjira
3
4
  module Parser
4
5
  # Parser for dealing with RSS feeds.
@@ -11,6 +12,7 @@ module Feedjira
11
12
  element :language
12
13
  element :lastBuildDate, as: :last_built
13
14
  element :link, as: :url
15
+ element :"a10:link", as: :url, value: :href
14
16
  element :rss, as: :version, value: :version
15
17
  element :title
16
18
  element :ttl
@@ -20,7 +22,7 @@ module Feedjira
20
22
  attr_accessor :feed_url
21
23
 
22
24
  def self.able_to_parse?(xml)
23
- (/\<rss|\<rdf/ =~ xml) && !(/feedburner/ =~ xml)
25
+ (/<rss|<rdf/ =~ xml) && !(/feedburner/ =~ xml)
24
26
  end
25
27
  end
26
28
  end
@@ -1,37 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Feedjira
2
4
  module Parser
3
5
  # Parser for dealing with RDF feed entries.
4
6
  class RSSEntry
5
7
  include SAXMachine
6
8
  include FeedEntryUtilities
7
-
8
- element :title
9
- element :link, as: :url
10
-
11
- element :"dc:creator", as: :author
12
- element :author, as: :author
13
- element :"content:encoded", as: :content
14
- element :description, as: :summary
15
-
16
- element :"media:content", as: :image, value: :url
17
- element :enclosure, as: :image, value: :url
18
-
19
- element :pubDate, as: :published
20
- element :pubdate, as: :published
21
- element :"dc:date", as: :published
22
- element :"dc:Date", as: :published
23
- element :"dcterms:created", as: :published
24
-
25
- element :"dcterms:modified", as: :updated
26
- element :issued, as: :published
27
- elements :category, as: :categories
28
-
29
- element :guid, as: :entry_id
30
- element :"dc:identifier", as: :dc_identifier
31
-
32
- def id
33
- @entry_id ||= @dc_identifier || @url
34
- end
9
+ include RSSEntryUtilities
35
10
  end
36
11
  end
37
12
  end
@@ -1,4 +1,5 @@
1
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
2
3
  module Feedjira
3
4
  module Parser
4
5
  # Parser for dealing with RSS feeds.
@@ -15,7 +16,7 @@ module Feedjira
15
16
  attr_accessor :feed_url
16
17
 
17
18
  def self.able_to_parse?(xml) #:nodoc:
18
- (/\<rss|\<rdf/ =~ xml) && (/feedburner/ =~ xml)
19
+ (/<rss|<rdf/ =~ xml) && (/feedburner/ =~ xml)
19
20
  end
20
21
  end
21
22
  end
@@ -1,38 +1,18 @@
1
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
2
3
  module Feedjira
3
4
  module Parser
4
5
  # Parser for dealing with RDF feed entries.
5
6
  class RSSFeedBurnerEntry
6
7
  include SAXMachine
7
8
  include FeedEntryUtilities
9
+ include RSSEntryUtilities
8
10
 
9
- element :title
10
-
11
- element :"feedburner:origLink", as: :url
12
- element :link, as: :url
13
-
14
- element :"dc:creator", as: :author
15
- element :author, as: :author
16
- element :"content:encoded", as: :content
17
- element :description, as: :summary
18
-
19
- element :"media:content", as: :image, value: :url
20
- element :enclosure, as: :image, value: :url
21
-
22
- element :pubDate, as: :published
23
- element :pubdate, as: :published
24
- element :"dc:date", as: :published
25
- element :"dc:Date", as: :published
26
- element :"dcterms:created", as: :published
27
-
28
- element :"dcterms:modified", as: :updated
29
- element :issued, as: :published
30
- elements :category, as: :categories
31
-
32
- element :guid, as: :entry_id
11
+ element :"feedburner:origLink", as: :orig_link
12
+ private :orig_link
33
13
 
34
14
  def url
35
- @url || @link
15
+ orig_link || super
36
16
  end
37
17
  end
38
18
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Feedjira
2
4
  module Parser
3
5
  # Parser for dealing with RSS images
@@ -1,5 +1,5 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
3
3
  module Feedjira
4
4
  class Preprocessor
5
5
  def initialize(xml)
@@ -20,7 +20,7 @@ module Feedjira
20
20
  end
21
21
 
22
22
  def content_nodes
23
- doc.search 'entry > content[type="xhtml"], entry > summary[type="xhtml"], entry > title[type="xhtml"]' # rubocop:disable Metrics/LineLength
23
+ doc.search 'entry > content[type="xhtml"], entry > summary[type="xhtml"], entry > title[type="xhtml"]'
24
24
  end
25
25
 
26
26
  def raw_html(node)
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Feedjira
4
+ module RSSEntryUtilities
5
+ def self.included(mod)
6
+ mod.class_exec do
7
+ element :title
8
+
9
+ element :"content:encoded", as: :content
10
+ element :"a10:content", as: :content
11
+ element :description, as: :summary
12
+
13
+ element :link, as: :url
14
+ element :"a10:link", as: :url, value: :href
15
+
16
+ element :author
17
+ element :"dc:creator", as: :author
18
+ element :"a10:name", as: :author
19
+
20
+ element :pubDate, as: :published
21
+ element :pubdate, as: :published
22
+ element :issued, as: :published
23
+ element :"dc:date", as: :published
24
+ element :"dc:Date", as: :published
25
+ element :"dcterms:created", as: :published
26
+
27
+ element :"dcterms:modified", as: :updated
28
+ element :"a10:updated", as: :updated
29
+
30
+ element :guid, as: :entry_id, class: Feedjira::Parser::GloballyUniqueIdentifier
31
+ element :"dc:identifier", as: :dc_identifier
32
+
33
+ element :"media:thumbnail", as: :image, value: :url
34
+ element :"media:content", as: :image, value: :url
35
+ element :enclosure, as: :image, value: :url
36
+
37
+ elements :category, as: :categories
38
+ end
39
+ end
40
+
41
+ def entry_id
42
+ @entry_id&.guid
43
+ end
44
+
45
+ def url
46
+ @url || @entry_id&.url
47
+ end
48
+
49
+ def id
50
+ entry_id || @dc_identifier || @url
51
+ end
52
+ end
53
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Feedjira
2
- VERSION = "3.0.0.beta1".freeze
4
+ VERSION = "3.2.0"
3
5
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "spec_helper"
2
4
 
3
5
  describe Feedjira::Configuration do
@@ -8,10 +10,10 @@ describe Feedjira::Configuration do
8
10
  end
9
11
 
10
12
  it "allows parsers to be modified" do
11
- CustomParser = Class.new
13
+ custom_parser = Class.new
12
14
 
13
- Feedjira.configure { |config| config.parsers.unshift(CustomParser) }
14
- expect(Feedjira.parsers.first).to eq(CustomParser)
15
+ Feedjira.configure { |config| config.parsers.unshift(custom_parser) }
16
+ expect(Feedjira.parsers.first).to eq(custom_parser)
15
17
  Feedjira.reset_configuration!
16
18
  end
17
19
  end