feedjira 3.0.0.beta1 → 3.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (89) hide show
  1. checksums.yaml +5 -5
  2. data/.github/ISSUE_TEMPLATE/feed-parsing.md +15 -0
  3. data/.github/workflows/ruby.yml +39 -0
  4. data/.rubocop.yml +18 -628
  5. data/.rubocop_todo.yml +11 -0
  6. data/CHANGELOG.md +66 -9
  7. data/Gemfile +4 -1
  8. data/README.md +10 -1
  9. data/Rakefile +3 -1
  10. data/feedjira.gemspec +14 -12
  11. data/lib/feedjira.rb +9 -3
  12. data/lib/feedjira/atom_entry_utilities.rb +51 -0
  13. data/lib/feedjira/configuration.rb +6 -3
  14. data/lib/feedjira/core_ext.rb +2 -0
  15. data/lib/feedjira/core_ext/date.rb +3 -2
  16. data/lib/feedjira/core_ext/string.rb +2 -1
  17. data/lib/feedjira/core_ext/time.rb +10 -10
  18. data/lib/feedjira/date_time_utilities.rb +8 -10
  19. data/lib/feedjira/date_time_utilities/date_time_epoch_parser.rb +3 -2
  20. data/lib/feedjira/date_time_utilities/date_time_language_parser.rb +4 -4
  21. data/lib/feedjira/date_time_utilities/date_time_pattern_parser.rb +9 -13
  22. data/lib/feedjira/feed.rb +3 -3
  23. data/lib/feedjira/feed_entry_utilities.rb +15 -8
  24. data/lib/feedjira/feed_utilities.rb +5 -4
  25. data/lib/feedjira/parser.rb +2 -0
  26. data/lib/feedjira/parser/atom.rb +5 -7
  27. data/lib/feedjira/parser/atom_entry.rb +4 -21
  28. data/lib/feedjira/parser/atom_feed_burner.rb +4 -3
  29. data/lib/feedjira/parser/atom_feed_burner_entry.rb +7 -18
  30. data/lib/feedjira/parser/atom_google_alerts.rb +26 -0
  31. data/lib/feedjira/parser/atom_google_alerts_entry.rb +21 -0
  32. data/lib/feedjira/parser/atom_youtube.rb +2 -1
  33. data/lib/feedjira/parser/atom_youtube_entry.rb +8 -7
  34. data/lib/feedjira/parser/globally_unique_identifier.rb +21 -0
  35. data/lib/feedjira/parser/google_docs_atom.rb +4 -4
  36. data/lib/feedjira/parser/google_docs_atom_entry.rb +3 -19
  37. data/lib/feedjira/parser/itunes_rss.rb +4 -3
  38. data/lib/feedjira/parser/itunes_rss_category.rb +6 -5
  39. data/lib/feedjira/parser/itunes_rss_item.rb +5 -8
  40. data/lib/feedjira/parser/itunes_rss_owner.rb +2 -1
  41. data/lib/feedjira/parser/json_feed.rb +4 -2
  42. data/lib/feedjira/parser/json_feed_item.rb +7 -1
  43. data/lib/feedjira/parser/podlove_chapter.rb +3 -2
  44. data/lib/feedjira/parser/rss.rb +4 -2
  45. data/lib/feedjira/parser/rss_entry.rb +3 -28
  46. data/lib/feedjira/parser/rss_feed_burner.rb +3 -2
  47. data/lib/feedjira/parser/rss_feed_burner_entry.rb +6 -26
  48. data/lib/feedjira/parser/rss_image.rb +2 -0
  49. data/lib/feedjira/preprocessor.rb +3 -3
  50. data/lib/feedjira/rss_entry_utilities.rb +53 -0
  51. data/lib/feedjira/version.rb +3 -1
  52. data/spec/feedjira/configuration_spec.rb +5 -3
  53. data/spec/feedjira/date_time_utilities_spec.rb +2 -0
  54. data/spec/feedjira/feed_entry_utilities_spec.rb +4 -2
  55. data/spec/feedjira/feed_spec.rb +3 -1
  56. data/spec/feedjira/feed_utilities_spec.rb +5 -3
  57. data/spec/feedjira/parser/atom_entry_spec.rb +7 -4
  58. data/spec/feedjira/parser/atom_feed_burner_entry_spec.rb +7 -5
  59. data/spec/feedjira/parser/atom_feed_burner_spec.rb +4 -2
  60. data/spec/feedjira/parser/atom_google_alerts_entry_spec.rb +34 -0
  61. data/spec/feedjira/parser/atom_google_alerts_spec.rb +62 -0
  62. data/spec/feedjira/parser/atom_spec.rb +30 -9
  63. data/spec/feedjira/parser/atom_youtube_entry_spec.rb +8 -6
  64. data/spec/feedjira/parser/atom_youtube_spec.rb +6 -4
  65. data/spec/feedjira/parser/google_docs_atom_entry_spec.rb +2 -0
  66. data/spec/feedjira/parser/google_docs_atom_spec.rb +2 -0
  67. data/spec/feedjira/parser/itunes_rss_item_spec.rb +3 -1
  68. data/spec/feedjira/parser/itunes_rss_owner_spec.rb +2 -0
  69. data/spec/feedjira/parser/itunes_rss_spec.rb +11 -9
  70. data/spec/feedjira/parser/json_feed_item_spec.rb +5 -3
  71. data/spec/feedjira/parser/json_feed_spec.rb +2 -0
  72. data/spec/feedjira/parser/podlove_chapter_spec.rb +2 -0
  73. data/spec/feedjira/parser/rss_entry_spec.rb +26 -4
  74. data/spec/feedjira/parser/rss_feed_burner_entry_spec.rb +9 -7
  75. data/spec/feedjira/parser/rss_feed_burner_spec.rb +3 -1
  76. data/spec/feedjira/parser/rss_spec.rb +2 -0
  77. data/spec/feedjira/preprocessor_spec.rb +4 -2
  78. data/spec/feedjira_spec.rb +22 -1
  79. data/spec/sample_feeds.rb +7 -3
  80. data/spec/sample_feeds/InvalidDateFormat.xml +20 -0
  81. data/spec/sample_feeds/Permalinks.xml +22 -0
  82. data/spec/sample_feeds/a10.xml +72 -0
  83. data/spec/sample_feeds/atom_simple_single_entry.xml +17 -0
  84. data/spec/sample_feeds/atom_simple_single_entry_link_self.xml +17 -0
  85. data/spec/sample_feeds/google_alerts_atom.xml +1 -0
  86. data/spec/spec_helper.rb +3 -7
  87. metadata +44 -47
  88. data/.travis.yml +0 -37
  89. data/Dangerfile +0 -1
@@ -1,11 +1,12 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
3
3
  module Feedjira
4
4
  module DateTimeUtilities
5
5
  class DateTimeEpochParser
6
6
  def self.parse(string)
7
7
  epoch_time = string.to_i
8
8
  return Time.at(epoch_time).to_datetime if epoch_time.to_s == string
9
+
9
10
  raise "#{string} is not a valid epoch time"
10
11
  end
11
12
  end
@@ -1,12 +1,12 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
3
3
  module Feedjira
4
4
  module DateTimeUtilities
5
5
  class DateTimeLanguageParser
6
6
  MONTHS_ENGLISH =
7
- %w(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec).freeze
7
+ %w[Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec].freeze
8
8
  MONTHS_SPANISH =
9
- %w(Ene Feb Mar Abr May Jun Jul Ago Sep Oct Nov Dic).freeze
9
+ %w[Ene Feb Mar Abr May Jun Jul Ago Sep Oct Nov Dic].freeze
10
10
 
11
11
  def self.parse(string)
12
12
  DateTime.parse(translate(string))
@@ -1,25 +1,21 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
3
3
  module Feedjira
4
4
  module DateTimeUtilities
5
5
  class DateTimePatternParser
6
- # rubocop:disable Style/AsciiComments
7
6
  # Japanese Symbols are required for strange Date Strings like
8
7
  # '水, 31 8 2016 07:37:00 PDT'
9
- JAPANESE_SYMBOLS = %w(日 月 火 水 木 金 土).freeze
8
+ JAPANESE_SYMBOLS = %w[日 月 火 水 木 金 土].freeze
10
9
  PATTERNS = ["%m/%d/%Y %T %p", "%d %m %Y %T %Z"].freeze
11
10
 
12
- # rubocop:disable Metrics/MethodLength
13
11
  def self.parse(string)
14
12
  PATTERNS.each do |p|
15
- begin
16
- datetime = DateTime.strptime(prepare(string), p)
17
- return datetime
18
- rescue StandardError => e
19
- Feedjira.logger.debug("Failed to parse date #{string}")
20
- Feedjira.logger.debug(e)
21
- nil
22
- end
13
+ datetime = DateTime.strptime(prepare(string), p)
14
+ return datetime
15
+ rescue StandardError => e
16
+ Feedjira.logger.debug("Failed to parse date #{string}")
17
+ Feedjira.logger.debug(e)
18
+ nil
23
19
  end
24
20
  raise "No pattern matched #{string}"
25
21
  end
data/lib/feedjira/feed.rb CHANGED
@@ -1,5 +1,5 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
3
3
  module Feedjira
4
4
  class Feed
5
5
  class << self
@@ -29,7 +29,7 @@ module Feedjira
29
29
  Feedjira.parsers.each do |klass|
30
30
  klass.sax_config.collection_elements.each_value do |value|
31
31
  collection_configs = value.select do |v|
32
- v.accessor == "entries" && v.data_class.class == Class
32
+ v.accessor == "entries" && v.data_class.is_a?(Class)
33
33
  end
34
34
 
35
35
  collection_configs.each do |config|
@@ -1,5 +1,5 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
3
3
  module Feedjira
4
4
  module FeedEntryUtilities
5
5
  include Enumerable
@@ -20,28 +20,30 @@ module Feedjira
20
20
  ##
21
21
  # Returns the id of the entry or its url if not id is present, as some
22
22
  # formats don't support it
23
+ # rubocop:disable Naming/MemoizedInstanceVariableName
23
24
  def id
24
25
  @entry_id ||= @url
25
26
  end
27
+ # rubocop:enable Naming/MemoizedInstanceVariableName
26
28
 
27
29
  ##
28
30
  # Writer for published. By default, we keep the "oldest" publish time found.
29
31
  def published=(val)
30
32
  parsed = parse_datetime(val)
31
- @published = parsed if !@published || parsed < @published
33
+ @published = parsed if parsed && (!@published || parsed < @published)
32
34
  end
33
35
 
34
36
  ##
35
37
  # Writer for updated. By default, we keep the most recent update time found.
36
38
  def updated=(val)
37
39
  parsed = parse_datetime(val)
38
- @updated = parsed if !@updated || parsed > @updated
40
+ @updated = parsed if parsed && (!@updated || parsed > @updated)
39
41
  end
40
42
 
41
43
  def sanitize!
42
- %w(title author summary content image).each do |name|
44
+ %w[title author summary content image].each do |name|
43
45
  if respond_to?(name) && send(name).respond_to?(:sanitize!)
44
- send(name).send :sanitize!
46
+ send(name).send(:sanitize!)
45
47
  end
46
48
  end
47
49
  end
@@ -49,10 +51,15 @@ module Feedjira
49
51
  alias last_modified published
50
52
 
51
53
  def each
52
- @rss_fields ||= instance_variables
54
+ @rss_fields ||= instance_variables.map do |ivar|
55
+ ivar.to_s.sub("@", "")
56
+ end.select do |field| # rubocop:disable Style/MultilineBlockChain
57
+ # select callable (public) methods only
58
+ respond_to?(field)
59
+ end
53
60
 
54
61
  @rss_fields.each do |field|
55
- yield(field.to_s.sub("@", ""), instance_variable_get(field))
62
+ yield(field, instance_variable_get(:"@#{field}"))
56
63
  end
57
64
  end
58
65
 
@@ -1,8 +1,8 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
3
3
  module Feedjira
4
4
  module FeedUtilities
5
- UPDATABLE_ATTRIBUTES = %w(title feed_url url last_modified etag).freeze
5
+ UPDATABLE_ATTRIBUTES = %w[title feed_url url last_modified etag].freeze
6
6
 
7
7
  attr_writer :new_entries, :updated, :last_modified
8
8
  attr_accessor :etag
@@ -43,7 +43,7 @@ module Feedjira
43
43
  def last_modified
44
44
  @last_modified ||= begin
45
45
  published = entries.reject { |e| e.published.nil? }
46
- entry = published.sort_by { |e| e.published if e.published }.last
46
+ entry = published.max_by(&:published)
47
47
  entry ? entry.published : nil
48
48
  end
49
49
  end
@@ -102,6 +102,7 @@ module Feedjira
102
102
 
103
103
  feed.entries.each do |entry|
104
104
  break unless new_entry?(entry, latest_entry)
105
+
105
106
  found_new_entries << entry
106
107
  end
107
108
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Feedjira
2
4
  module Parser
3
5
  end
@@ -1,4 +1,5 @@
1
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
2
3
  module Feedjira
3
4
  module Parser
4
5
  # Parser for dealing with Atom feeds.
@@ -13,17 +14,14 @@ module Feedjira
13
14
  elements :link, as: :links, value: :href
14
15
  elements :link, as: :hubs, value: :href, with: { rel: "hub" }
15
16
  elements :entry, as: :entries, class: AtomEntry
17
+ element :icon
16
18
 
17
19
  def self.able_to_parse?(xml)
18
- %r{\<feed[^\>]+xmlns\s?=\s?[\"\'](http://www\.w3\.org/2005/Atom|http://purl\.org/atom/ns\#)[\"\'][^\>]*\>} =~ xml # rubocop:disable Metrics/LineLength
20
+ %r{<feed[^>]+xmlns\s?=\s?["'](http://www\.w3\.org/2005/Atom|http://purl\.org/atom/ns\#)["'][^>]*>} =~ xml
19
21
  end
20
22
 
21
23
  def url
22
- @url || (links - [feed_url]).last || links.last
23
- end
24
-
25
- def feed_url
26
- @feed_url ||= links.first
24
+ @url || (links - [feed_url]).last
27
25
  end
28
26
 
29
27
  def self.preprocess(xml)
@@ -1,32 +1,15 @@
1
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
2
3
  module Feedjira
3
4
  module Parser
4
5
  # Parser for dealing with Atom feed entries.
5
6
  class AtomEntry
6
7
  include SAXMachine
7
8
  include FeedEntryUtilities
9
+ include AtomEntryUtilities
8
10
 
9
- element :title
10
- element :link, as: :url, value: :href, with: { type: "text/html", rel: "alternate" } # rubocop:disable Metrics/LineLength
11
- element :name, as: :author
12
- element :content
13
- element :summary
14
-
11
+ element :"media:thumbnail", as: :image, value: :url
15
12
  element :"media:content", as: :image, value: :url
16
- element :enclosure, as: :image, value: :href
17
-
18
- element :published
19
- element :id, as: :entry_id
20
- element :created, as: :published
21
- element :issued, as: :published
22
- element :updated
23
- element :modified, as: :updated
24
- elements :category, as: :categories, value: :term
25
- elements :link, as: :links, value: :href
26
-
27
- def url
28
- @url ||= links.first
29
- end
30
13
  end
31
14
  end
32
15
  end
@@ -1,4 +1,5 @@
1
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
2
3
  module Feedjira
3
4
  module Parser
4
5
  # Parser for dealing with Feedburner Atom feeds.
@@ -11,7 +12,7 @@ module Feedjira
11
12
  element :link, as: :url_text_html, value: :href,
12
13
  with: { type: "text/html" }
13
14
  element :link, as: :url_notype, value: :href, with: { type: nil }
14
- element :link, as: :feed_url_link, value: :href, with: { type: "application/atom+xml" } # rubocop:disable Metrics/LineLength
15
+ element :link, as: :feed_url_link, value: :href, with: { type: "application/atom+xml" }
15
16
  element :"atom10:link", as: :feed_url_atom10_link, value: :href,
16
17
  with: { type: "application/atom+xml" }
17
18
  elements :"atom10:link", as: :hubs, value: :href, with: { rel: "hub" }
@@ -20,7 +21,7 @@ module Feedjira
20
21
  attr_writer :url, :feed_url
21
22
 
22
23
  def self.able_to_parse?(xml)
23
- ((/Atom/ =~ xml) && (/feedburner/ =~ xml) && !(/\<rss|\<rdf/ =~ xml)) || false # rubocop:disable Metrics/LineLength
24
+ ((/<feed/ =~ xml) && (/Atom/ =~ xml) && (/feedburner/ =~ xml) && !(/<rss|<rdf/ =~ xml)) || false
24
25
  end
25
26
 
26
27
  # Feed url is <link> with type="text/html" if present,
@@ -1,32 +1,21 @@
1
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
2
3
  module Feedjira
3
4
  module Parser
4
5
  # Parser for dealing with Feedburner Atom feed entries.
5
6
  class AtomFeedBurnerEntry
6
7
  include SAXMachine
7
8
  include FeedEntryUtilities
9
+ include AtomEntryUtilities
8
10
 
9
- element :title
10
- element :name, as: :author
11
- element :link, as: :url, value: :href, with: { type: "text/html", rel: "alternate" } # rubocop:disable Metrics/LineLength
12
- element :"feedburner:origLink", as: :url
13
- element :summary
14
- element :content
11
+ element :"feedburner:origLink", as: :orig_link
12
+ private :orig_link
15
13
 
14
+ element :"media:thumbnail", as: :image, value: :url
16
15
  element :"media:content", as: :image, value: :url
17
- element :enclosure, as: :image, value: :href
18
-
19
- element :published
20
- element :id, as: :entry_id
21
- element :issued, as: :published
22
- element :created, as: :published
23
- element :updated
24
- element :modified, as: :updated
25
- elements :category, as: :categories, value: :term
26
- elements :link, as: :links, value: :href
27
16
 
28
17
  def url
29
- @url ||= links.first
18
+ orig_link || super
30
19
  end
31
20
  end
32
21
  end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Feedjira
4
+ module Parser
5
+ # Parser for dealing with Feedburner Atom feeds.
6
+ class AtomGoogleAlerts
7
+ include SAXMachine
8
+ include FeedUtilities
9
+
10
+ element :title
11
+ element :subtitle, as: :description
12
+ element :link, as: :feed_url, value: :href, with: { rel: "self" }
13
+ element :link, as: :url, value: :href, with: { rel: "self" }
14
+ elements :link, as: :links, value: :href
15
+ elements :entry, as: :entries, class: AtomGoogleAlertsEntry
16
+
17
+ def self.able_to_parse?(xml)
18
+ Atom.able_to_parse?(xml) && (%r{<id>tag:google\.com,2005:[^<]+/com\.google/alerts/} === xml) # rubocop:disable Style/CaseEquality
19
+ end
20
+
21
+ def self.preprocess(xml)
22
+ Preprocessor.new(xml).to_xml
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Feedjira
4
+ module Parser
5
+ # Parser for dealing with Feedburner Atom feed entries.
6
+ class AtomGoogleAlertsEntry
7
+ include SAXMachine
8
+ include FeedEntryUtilities
9
+ include AtomEntryUtilities
10
+
11
+ def url
12
+ url = super
13
+ return unless url&.start_with?("https://www.google.com/url?")
14
+
15
+ uri = URI(url)
16
+ cons = URI.decode_www_form(uri.query).assoc("url")
17
+ cons && cons[1]
18
+ end
19
+ end
20
+ end
21
+ end
@@ -1,4 +1,5 @@
1
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
2
3
  module Feedjira
3
4
  module Parser
4
5
  # Parser for dealing with RSS feeds.
@@ -1,19 +1,20 @@
1
- # rubocop:disable Style/Documentation
1
+ # frozen_string_literal: true
2
+
2
3
  module Feedjira
3
4
  module Parser
4
5
  class AtomYoutubeEntry
5
6
  include SAXMachine
6
7
  include FeedEntryUtilities
8
+ include AtomEntryUtilities
9
+
10
+ sax_config.top_level_elements["link"].clear
11
+ sax_config.collection_elements["link"].clear
7
12
 
8
- element :title
9
13
  element :link, as: :url, value: :href, with: { rel: "alternate" }
10
- element :name, as: :author
14
+
11
15
  element :"media:description", as: :content
12
- element :summary
13
- element :published
14
- element :id, as: :entry_id
15
- element :updated
16
16
  element :"yt:videoId", as: :youtube_video_id
17
+ element :"yt:channelId", as: :youtube_channel_id
17
18
  element :"media:title", as: :media_title
18
19
  element :"media:content", as: :media_url, value: :url
19
20
  element :"media:content", as: :media_type, value: :type
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Feedjira
4
+ module Parser
5
+ class GloballyUniqueIdentifier
6
+ include SAXMachine
7
+
8
+ attribute :isPermaLink, as: :is_perma_link
9
+
10
+ value :guid
11
+
12
+ def perma_link?
13
+ is_perma_link != "false"
14
+ end
15
+
16
+ def url
17
+ perma_link? ? guid : nil
18
+ end
19
+ end
20
+ end
21
+ end
@@ -1,6 +1,6 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require File.expand_path("./atom", File.dirname(__FILE__))
2
- # rubocop:disable Style/Documentation
3
- # rubocop:disable Style/DocumentationMethod
4
4
  module Feedjira
5
5
  module Parser
6
6
  class GoogleDocsAtom
@@ -9,7 +9,7 @@ module Feedjira
9
9
  element :title
10
10
  element :subtitle, as: :description
11
11
  element :link, as: :url, value: :href, with: { type: "text/html" }
12
- element :link, as: :feed_url, value: :href, with: { type: "application/atom+xml" } # rubocop:disable Metrics/LineLength
12
+ element :link, as: :feed_url, value: :href, with: { type: "application/atom+xml" }
13
13
  elements :link, as: :links, value: :href
14
14
  elements :entry, as: :entries, class: GoogleDocsAtomEntry
15
15
 
@@ -18,7 +18,7 @@ module Feedjira
18
18
  end
19
19
 
20
20
  def self.able_to_parse?(xml) #:nodoc:
21
- %r{<id>https?://docs\.google\.com/.*\</id\>} =~ xml
21
+ %r{<id>https?://docs\.google\.com/.*</id>} =~ xml
22
22
  end
23
23
 
24
24
  def feed_url