feedjira 2.0.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +2 -0
  3. data/.rubocop.yml +15 -0
  4. data/.travis.yml +31 -12
  5. data/CHANGELOG.md +34 -1
  6. data/Dangerfile +1 -0
  7. data/Gemfile +2 -1
  8. data/LICENSE +1 -1
  9. data/README.md +210 -7
  10. data/Rakefile +11 -1
  11. data/feedjira.gemspec +17 -14
  12. data/fixtures/vcr_cassettes/fetch_failure.yml +62 -0
  13. data/fixtures/vcr_cassettes/parse_error.yml +222 -0
  14. data/fixtures/vcr_cassettes/success.yml +281 -0
  15. data/lib/feedjira/configuration.rb +76 -0
  16. data/lib/feedjira/core_ext/date.rb +3 -1
  17. data/lib/feedjira/core_ext/string.rb +2 -1
  18. data/lib/feedjira/core_ext/time.rb +24 -17
  19. data/lib/feedjira/core_ext.rb +3 -3
  20. data/lib/feedjira/date_time_utilities/date_time_epoch_parser.rb +13 -0
  21. data/lib/feedjira/date_time_utilities/date_time_language_parser.rb +24 -0
  22. data/lib/feedjira/date_time_utilities/date_time_pattern_parser.rb +34 -0
  23. data/lib/feedjira/date_time_utilities.rb +32 -0
  24. data/lib/feedjira/feed.rb +89 -62
  25. data/lib/feedjira/feed_entry_utilities.rb +20 -19
  26. data/lib/feedjira/feed_utilities.rb +37 -22
  27. data/lib/feedjira/parser/atom.rb +10 -8
  28. data/lib/feedjira/parser/atom_entry.rb +11 -13
  29. data/lib/feedjira/parser/atom_feed_burner.rb +27 -10
  30. data/lib/feedjira/parser/atom_feed_burner_entry.rb +12 -14
  31. data/lib/feedjira/parser/atom_youtube.rb +21 -0
  32. data/lib/feedjira/parser/atom_youtube_entry.rb +30 -0
  33. data/lib/feedjira/parser/google_docs_atom.rb +8 -7
  34. data/lib/feedjira/parser/google_docs_atom_entry.rb +13 -11
  35. data/lib/feedjira/parser/itunes_rss.rb +41 -22
  36. data/lib/feedjira/parser/itunes_rss_category.rb +39 -0
  37. data/lib/feedjira/parser/itunes_rss_item.rb +32 -20
  38. data/lib/feedjira/parser/itunes_rss_owner.rb +4 -4
  39. data/lib/feedjira/parser/podlove_chapter.rb +22 -0
  40. data/lib/feedjira/parser/rss.rb +11 -8
  41. data/lib/feedjira/parser/rss_entry.rb +17 -21
  42. data/lib/feedjira/parser/rss_feed_burner.rb +5 -6
  43. data/lib/feedjira/parser/rss_feed_burner_entry.rb +24 -28
  44. data/lib/feedjira/parser/rss_image.rb +15 -0
  45. data/lib/feedjira/parser.rb +1 -1
  46. data/lib/feedjira/preprocessor.rb +4 -2
  47. data/lib/feedjira/version.rb +1 -1
  48. data/lib/feedjira.rb +15 -0
  49. data/spec/feedjira/configuration_spec.rb +25 -0
  50. data/spec/feedjira/date_time_utilities_spec.rb +47 -0
  51. data/spec/feedjira/feed_entry_utilities_spec.rb +23 -19
  52. data/spec/feedjira/feed_spec.rb +140 -75
  53. data/spec/feedjira/feed_utilities_spec.rb +83 -63
  54. data/spec/feedjira/parser/atom_entry_spec.rb +54 -34
  55. data/spec/feedjira/parser/atom_feed_burner_entry_spec.rb +27 -20
  56. data/spec/feedjira/parser/atom_feed_burner_spec.rb +87 -30
  57. data/spec/feedjira/parser/atom_spec.rb +50 -48
  58. data/spec/feedjira/parser/atom_youtube_entry_spec.rb +86 -0
  59. data/spec/feedjira/parser/atom_youtube_spec.rb +43 -0
  60. data/spec/feedjira/parser/google_docs_atom_entry_spec.rb +5 -4
  61. data/spec/feedjira/parser/google_docs_atom_spec.rb +6 -6
  62. data/spec/feedjira/parser/itunes_rss_item_spec.rb +49 -29
  63. data/spec/feedjira/parser/itunes_rss_owner_spec.rb +10 -9
  64. data/spec/feedjira/parser/itunes_rss_spec.rb +87 -30
  65. data/spec/feedjira/parser/podlove_chapter_spec.rb +37 -0
  66. data/spec/feedjira/parser/rss_entry_spec.rb +50 -33
  67. data/spec/feedjira/parser/rss_feed_burner_entry_spec.rb +55 -33
  68. data/spec/feedjira/parser/rss_feed_burner_spec.rb +31 -26
  69. data/spec/feedjira/parser/rss_spec.rb +56 -24
  70. data/spec/feedjira/preprocessor_spec.rb +11 -3
  71. data/spec/sample_feeds/AmazonWebServicesBlog.xml +797 -797
  72. data/spec/sample_feeds/AtomEscapedHTMLInPreTag.xml +13 -0
  73. data/spec/sample_feeds/CRE.xml +5849 -0
  74. data/spec/sample_feeds/FeedBurnerXHTML.xml +400 -400
  75. data/spec/sample_feeds/GiantRobotsSmashingIntoOtherGiantRobots.xml +682 -0
  76. data/spec/sample_feeds/ITunesWithSingleQuotedAttributes.xml +67 -0
  77. data/spec/sample_feeds/InvalidDateFormat.xml +20 -0
  78. data/spec/sample_feeds/PaulDixExplainsNothing.xml +175 -175
  79. data/spec/sample_feeds/PaulDixExplainsNothingAlternate.xml +175 -175
  80. data/spec/sample_feeds/PaulDixExplainsNothingFirstEntryContent.xml +16 -16
  81. data/spec/sample_feeds/PaulDixExplainsNothingWFW.xml +174 -174
  82. data/spec/sample_feeds/TenderLovemaking.xml +12 -2
  83. data/spec/sample_feeds/TrotterCashionHome.xml +611 -611
  84. data/spec/sample_feeds/TypePadNews.xml +368 -368
  85. data/spec/sample_feeds/itunes.xml +31 -2
  86. data/spec/sample_feeds/pet_atom.xml +229 -229
  87. data/spec/sample_feeds/youtube_atom.xml +395 -0
  88. data/spec/sample_feeds.rb +31 -21
  89. data/spec/spec_helper.rb +6 -0
  90. metadata +132 -25
@@ -1,6 +1,7 @@
1
- require "time"
2
- require "date"
1
+ require 'time'
2
+ require 'date'
3
3
 
4
+ # rubocop:disable Style/DocumentationMethod
4
5
  class Time
5
6
  # Parse a time string and convert it to UTC without raising errors.
6
7
  # Parses a flattened 14-digit time (YYYYmmddHHMMMSS) as UTC.
@@ -10,22 +11,28 @@ class Time
10
11
  #
11
12
  # === Returns
12
13
  # A Time instance in UTC or nil if there were errors while parsing.
14
+ # rubocop:disable Metrics/MethodLength
13
15
  def self.parse_safely(dt)
14
- if dt
15
- case
16
- when dt.is_a?(Time)
17
- dt.utc
18
- when dt.respond_to?(:empty?) && dt.empty?
19
- nil
20
- when dt.respond_to?(:to_datetime)
21
- dt.to_datetime.utc
22
- when dt.to_s =~ /\A\d{14}\z/
23
- parse("#{dt.to_s}Z", true)
24
- else
25
- parse(dt.to_s, true).utc
26
- end
16
+ if dt.is_a?(Time)
17
+ dt.utc
18
+ elsif dt.respond_to?(:to_datetime)
19
+ dt.to_datetime.utc
20
+ elsif dt.respond_to? :to_s
21
+ parse_string_safely dt.to_s
27
22
  end
28
- rescue StandardError
23
+ rescue StandardError => e
24
+ Feedjira.logger.debug { "Failed to parse time #{dt}" }
25
+ Feedjira.logger.debug(e)
29
26
  nil
30
- end unless method_defined?(:parse_safely)
27
+ end
28
+
29
+ def self.parse_string_safely(string)
30
+ return nil if string.empty?
31
+
32
+ if string =~ /\A\d{14}\z/
33
+ parse("#{string}Z", true)
34
+ else
35
+ parse(string).utc
36
+ end
37
+ end
31
38
  end
@@ -1,3 +1,3 @@
1
- require "feedjira/core_ext/time"
2
- require "feedjira/core_ext/date"
3
- require "feedjira/core_ext/string"
1
+ require 'feedjira/core_ext/time'
2
+ require 'feedjira/core_ext/date'
3
+ require 'feedjira/core_ext/string'
@@ -0,0 +1,13 @@
1
+ # rubocop:disable Style/Documentation
2
+ # rubocop:disable Style/DocumentationMethod
3
+ module Feedjira
4
+ module DateTimeUtilities
5
+ class DateTimeEpochParser
6
+ def self.parse(string)
7
+ epoch_time = string.to_i
8
+ return Time.at(epoch_time).to_datetime if epoch_time.to_s == string
9
+ raise "#{string} is not a valid epoch time"
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,24 @@
1
+ # rubocop:disable Style/Documentation
2
+ # rubocop:disable Style/DocumentationMethod
3
+ module Feedjira
4
+ module DateTimeUtilities
5
+ class DateTimeLanguageParser
6
+ MONTHS_ENGLISH =
7
+ %w(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec).freeze
8
+ MONTHS_SPANISH =
9
+ %w(Ene Feb Mar Abr May Jun Jul Ago Sep Oct Nov Dic).freeze
10
+
11
+ def self.parse(string)
12
+ DateTime.parse(translate(string))
13
+ end
14
+
15
+ def self.translate(string)
16
+ MONTHS_SPANISH.each_with_index do |m, i|
17
+ rgx = Regexp.new("\s#{m}\s", Regexp::IGNORECASE)
18
+ return string.gsub(rgx, MONTHS_ENGLISH[i]) if string =~ rgx
19
+ end
20
+ raise "No translation found for #{string}"
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,34 @@
1
+ # rubocop:disable Style/Documentation
2
+ # rubocop:disable Style/DocumentationMethod
3
+ module Feedjira
4
+ module DateTimeUtilities
5
+ class DateTimePatternParser
6
+ # rubocop:disable Style/AsciiComments
7
+ # Japanese Symbols are required for strange Date Strings like
8
+ # '水, 31 8 2016 07:37:00 PDT'
9
+ JAPANESE_SYMBOLS = %w(日 月 火 水 木 金 土).freeze
10
+ PATTERNS = ['%m/%d/%Y %T %p', '%d %m %Y %T %Z'].freeze
11
+
12
+ # rubocop:disable Metrics/MethodLength
13
+ def self.parse(string)
14
+ PATTERNS.each do |p|
15
+ begin
16
+ datetime = DateTime.strptime(prepare(string), p)
17
+ return datetime
18
+ rescue StandardError => e
19
+ Feedjira.logger.debug("Failed to parse date #{string}")
20
+ Feedjira.logger.debug(e)
21
+ nil
22
+ end
23
+ end
24
+ raise "No pattern matched #{string}"
25
+ end
26
+
27
+ def self.prepare(string)
28
+ rgx = Regexp.new("^(#{JAPANESE_SYMBOLS.join('|')}),\s")
29
+ string.gsub(rgx, '')
30
+ end
31
+ private_class_method :prepare
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,32 @@
1
+ # rubocop:disable Style/Documentation
2
+ module Feedjira
3
+ module DateTimeUtilities
4
+ # This is our date parsing heuristic.
5
+ # Date Parsers are attempted in order.
6
+ DATE_PARSERS = [
7
+ DateTimePatternParser,
8
+ DateTimeLanguageParser,
9
+ DateTimeEpochParser,
10
+ DateTime
11
+ ].freeze
12
+
13
+ # Parse the given string starting with the most common parser (default ruby)
14
+ # and going over all other available parsers
15
+ # rubocop:disable Metrics/MethodLength
16
+ def parse_datetime(string)
17
+ res = DATE_PARSERS.find do |parser|
18
+ begin
19
+ return parser.parse(string).feed_utils_to_gm_time
20
+ rescue StandardError => e
21
+ Feedjira.logger.debug { "Failed to parse date #{string}" }
22
+ Feedjira.logger.debug(e)
23
+ nil
24
+ end
25
+ end
26
+
27
+ Feedjira.logger.warn { "Failed to parse date #{string}" } if res.nil?
28
+
29
+ res
30
+ end
31
+ end
32
+ end
data/lib/feedjira/feed.rb CHANGED
@@ -1,85 +1,112 @@
1
+ # rubocop:disable Style/Documentation
2
+ # rubocop:disable Style/DocumentationMethod
1
3
  module Feedjira
2
4
  class Feed
3
- def self.parse_with(parser, xml, &block)
4
- parser.parse xml, &block
5
- end
5
+ class << self
6
+ def parse_with(parser, xml, &block)
7
+ parser.parse xml, &block
8
+ end
6
9
 
7
- def self.parse(xml, &block)
8
- if parser = determine_feed_parser_for_xml(xml)
10
+ def parse(xml, &block)
11
+ parser = determine_feed_parser_for_xml(xml)
12
+ raise NoParserAvailable, 'No valid parser for XML.' unless parser
9
13
  parse_with parser, xml, &block
10
- else
11
- raise NoParserAvailable.new("No valid parser for XML.")
12
14
  end
13
- end
14
15
 
15
- def self.determine_feed_parser_for_xml(xml)
16
- start_of_doc = xml.slice(0, 2000)
17
- feed_classes.detect {|klass| klass.able_to_parse?(start_of_doc)}
18
- end
16
+ def determine_feed_parser_for_xml(xml)
17
+ start_of_doc = xml.slice(0, 2000)
18
+ feed_classes.detect { |klass| klass.able_to_parse?(start_of_doc) }
19
+ end
19
20
 
20
- def self.add_feed_class(klass)
21
- feed_classes.unshift klass
22
- end
21
+ def add_feed_class(klass)
22
+ feed_classes.unshift klass
23
+ end
23
24
 
24
- def self.feed_classes
25
- @feed_classes ||= [
26
- Feedjira::Parser::RSSFeedBurner,
27
- Feedjira::Parser::GoogleDocsAtom,
28
- Feedjira::Parser::AtomFeedBurner,
29
- Feedjira::Parser::Atom,
30
- Feedjira::Parser::ITunesRSS,
31
- Feedjira::Parser::RSS
32
- ]
33
- end
25
+ def feed_classes
26
+ @feed_classes ||= Feedjira.parsers
27
+ end
34
28
 
35
- def self.add_common_feed_element(element_tag, options = {})
36
- feed_classes.each do |k|
37
- k.element element_tag, options
29
+ def reset_parsers!
30
+ @feed_classes = nil
38
31
  end
39
- end
40
32
 
41
- def self.add_common_feed_elements(element_tag, options = {})
42
- feed_classes.each do |k|
43
- k.elements element_tag, options
33
+ def add_common_feed_element(element_tag, options = {})
34
+ feed_classes.each do |k|
35
+ k.element element_tag, options
36
+ end
44
37
  end
45
- end
46
38
 
47
- def self.add_common_feed_entry_element(element_tag, options = {})
48
- call_on_each_feed_entry :element, element_tag, options
49
- end
39
+ def add_common_feed_elements(element_tag, options = {})
40
+ feed_classes.each do |k|
41
+ k.elements element_tag, options
42
+ end
43
+ end
50
44
 
51
- def self.add_common_feed_entry_elements(element_tag, options = {})
52
- call_on_each_feed_entry :elements, element_tag, options
53
- end
45
+ def add_common_feed_entry_element(element_tag, options = {})
46
+ call_on_each_feed_entry :element, element_tag, options
47
+ end
54
48
 
55
- def self.call_on_each_feed_entry(method, *parameters)
56
- feed_classes.each do |k|
57
- k.sax_config.collection_elements.each_value do |vl|
58
- vl.find_all{|v| (v.accessor == 'entries') && (v.data_class.class == Class)}.each do |v|
59
- v.data_class.send(method, *parameters)
49
+ def add_common_feed_entry_elements(element_tag, options = {})
50
+ call_on_each_feed_entry :elements, element_tag, options
51
+ end
52
+
53
+ def call_on_each_feed_entry(method, *parameters)
54
+ feed_classes.each do |klass|
55
+ klass.sax_config.collection_elements.each_value do |value|
56
+ collection_configs = value.select do |v|
57
+ v.accessor == 'entries' && v.data_class.class == Class
58
+ end
59
+
60
+ collection_configs.each do |config|
61
+ config.data_class.send(method, *parameters)
62
+ end
60
63
  end
61
64
  end
62
65
  end
63
- end
64
66
 
65
- def self.fetch_and_parse(url)
66
- response = connection(url).get
67
- raise FetchFailure.new("Fetch failed - #{response.status}") unless response.success?
68
- xml = response.body
69
- parser_klass = determine_feed_parser_for_xml xml
70
- raise NoParserAvailable.new("No valid parser for XML.") unless parser_klass
71
-
72
- feed = parse_with parser_klass, xml
73
- feed.feed_url = url
74
- feed.etag = response.headers['etag'].to_s.gsub(/"/, '')
75
- feed.last_modified = response.headers['last-modified']
76
- feed
77
- end
67
+ def fetch_and_parse(url)
68
+ response = connection(url).get
69
+ unless response.success?
70
+ raise FetchFailure, "Fetch failed - #{response.status}"
71
+ end
72
+ feed = parse response.body
73
+ feed.feed_url = url
74
+ feed.etag = response.headers['etag'].to_s.delete '"'
75
+
76
+ feed.last_modified = parse_last_modified(response)
77
+ feed
78
+ end
79
+
80
+ # rubocop:disable LineLength
81
+ def connection(url)
82
+ Faraday.new(url: url, headers: headers, request: request_options) do |conn|
83
+ conn.use FaradayMiddleware::FollowRedirects, limit: Feedjira.follow_redirect_limit
84
+ conn.adapter(*Faraday.default_adapter)
85
+ end
86
+ end
87
+ # rubocop:enable LineLength
88
+
89
+ private
90
+
91
+ def headers
92
+ {
93
+ user_agent: Feedjira.user_agent
94
+ }
95
+ end
96
+
97
+ def request_options
98
+ {
99
+ timeout: Feedjira.request_timeout
100
+ }
101
+ end
78
102
 
79
- def self.connection(url)
80
- Faraday.new(url: url) do |conn|
81
- conn.use FaradayMiddleware::FollowRedirects, limit: 3
82
- conn.adapter :net_http
103
+ def parse_last_modified(response)
104
+ lm = response.headers['last-modified']
105
+ DateTime.parse(lm).to_time
106
+ rescue StandardError => e
107
+ Feedjira.logger.warn { "Failed to parse last modified '#{lm}'" }
108
+ Feedjira.logger.debug(e)
109
+ nil
83
110
  end
84
111
  end
85
112
  end
@@ -1,23 +1,25 @@
1
+ # rubocop:disable Style/Documentation
2
+ # rubocop:disable Style/DocumentationMethod
1
3
  module Feedjira
2
4
  module FeedEntryUtilities
3
-
4
5
  include Enumerable
6
+ include DateTimeUtilities
5
7
 
6
8
  def published
7
9
  @published ||= @updated
8
10
  end
9
11
 
10
12
  def parse_datetime(string)
11
- begin
12
- DateTime.parse(string).feed_utils_to_gm_time
13
- rescue
14
- warn "Failed to parse date #{string.inspect}"
15
- nil
16
- end
13
+ DateTime.parse(string).feed_utils_to_gm_time
14
+ rescue StandardError => e
15
+ Feedjira.logger.warn { "Failed to parse date #{string.inspect}" }
16
+ Feedjira.logger.debug(e)
17
+ nil
17
18
  end
18
19
 
19
20
  ##
20
- # Returns the id of the entry or its url if not id is present, as some formats don't support it
21
+ # Returns the id of the entry or its url if not id is present, as some
22
+ # formats don't support it
21
23
  def id
22
24
  @entry_id ||= @url
23
25
  end
@@ -26,41 +28,40 @@ module Feedjira
26
28
  # Writer for published. By default, we keep the "oldest" publish time found.
27
29
  def published=(val)
28
30
  parsed = parse_datetime(val)
29
- @published = parsed if !@published || parsed < @published
31
+ @published = parsed if parsed && (!@published || parsed < @published)
30
32
  end
31
33
 
32
34
  ##
33
35
  # Writer for updated. By default, we keep the most recent update time found.
34
36
  def updated=(val)
35
37
  parsed = parse_datetime(val)
36
- @updated = parsed if !@updated || parsed > @updated
38
+ @updated = parsed if parsed && (!@updated || parsed > @updated)
37
39
  end
38
40
 
39
41
  def sanitize!
40
- %w[title author summary content image].each do |name|
41
- if self.respond_to?(name) && self.send(name).respond_to?(:sanitize!)
42
- self.send(name).send :sanitize!
42
+ %w(title author summary content image).each do |name|
43
+ if respond_to?(name) && send(name).respond_to?(:sanitize!)
44
+ send(name).send :sanitize!
43
45
  end
44
46
  end
45
47
  end
46
48
 
47
- alias_method :last_modified, :published
49
+ alias last_modified published
48
50
 
49
51
  def each
50
- @rss_fields ||= self.instance_variables
52
+ @rss_fields ||= instance_variables
51
53
 
52
54
  @rss_fields.each do |field|
53
- yield(field.to_s.sub('@', ''), self.instance_variable_get(field))
55
+ yield(field.to_s.sub('@', ''), instance_variable_get(field))
54
56
  end
55
57
  end
56
58
 
57
59
  def [](field)
58
- self.instance_variable_get("@#{field.to_s}")
60
+ instance_variable_get("@#{field}")
59
61
  end
60
62
 
61
63
  def []=(field, value)
62
- self.instance_variable_set("@#{field.to_s}", value)
64
+ instance_variable_set("@#{field}", value)
63
65
  end
64
-
65
66
  end
66
67
  end
@@ -1,6 +1,8 @@
1
+ # rubocop:disable Style/Documentation
2
+ # rubocop:disable Style/DocumentationMethod
1
3
  module Feedjira
2
4
  module FeedUtilities
3
- UPDATABLE_ATTRIBUTES = %w(title feed_url url last_modified etag)
5
+ UPDATABLE_ATTRIBUTES = %w(title feed_url url last_modified etag).freeze
4
6
 
5
7
  attr_writer :new_entries, :updated, :last_modified
6
8
  attr_accessor :etag
@@ -11,7 +13,7 @@ module Feedjira
11
13
 
12
14
  module ClassMethods
13
15
  def parse(xml, &block)
14
- xml = xml.lstrip
16
+ xml = strip_whitespace(xml)
15
17
  xml = preprocess(xml) if preprocess_xml
16
18
  super xml, &block
17
19
  end
@@ -28,11 +30,20 @@ module Feedjira
28
30
  def preprocess_xml
29
31
  @preprocess_xml
30
32
  end
33
+
34
+ def strip_whitespace(xml)
35
+ if Feedjira.strip_whitespace
36
+ xml.strip
37
+ else
38
+ xml.lstrip
39
+ end
40
+ end
31
41
  end
32
42
 
33
43
  def last_modified
34
44
  @last_modified ||= begin
35
- entry = entries.reject {|e| e.published.nil? }.sort_by { |entry| entry.published if entry.published }.last
45
+ published = entries.reject { |e| e.published.nil? }
46
+ entry = published.sort_by { |e| e.published if e.published }.last
36
47
  entry ? entry.published : nil
37
48
  end
38
49
  end
@@ -45,13 +56,13 @@ module Feedjira
45
56
  @new_entries ||= []
46
57
  end
47
58
 
48
- def has_new_entries?
49
- new_entries.size > 0
59
+ def new_entries?
60
+ !new_entries.empty?
50
61
  end
51
62
 
52
63
  def update_from_feed(feed)
53
64
  self.new_entries += find_new_entries_for(feed)
54
- self.entries.unshift(*self.new_entries)
65
+ entries.unshift(*self.new_entries)
55
66
 
56
67
  @updated = false
57
68
 
@@ -61,7 +72,8 @@ module Feedjira
61
72
  end
62
73
 
63
74
  def update_attribute(feed, name)
64
- old_value, new_value = send(name), feed.send(name)
75
+ old_value = send(name)
76
+ new_value = feed.send(name)
65
77
 
66
78
  if old_value != new_value
67
79
  send("#{name}=", new_value)
@@ -72,33 +84,36 @@ module Feedjira
72
84
  end
73
85
 
74
86
  def sanitize_entries!
75
- entries.each {|entry| entry.sanitize!}
87
+ entries.each(&:sanitize!)
76
88
  end
77
89
 
78
90
  private
79
91
 
92
+ # This implementation is a hack, which is why it's so ugly. It's to get
93
+ # around the fact that not all feeds have a published date. However,
94
+ # they're always ordered with the newest one first. So we go through the
95
+ # entries just parsed and insert each one as a new entry until we get to
96
+ # one that has the same id as the the newest for the feed.
80
97
  def find_new_entries_for(feed)
81
- # this implementation is a hack, which is why it's so ugly.
82
- # it's to get around the fact that not all feeds have a published date.
83
- # however, they're always ordered with the newest one first.
84
- # So we go through the entries just parsed and insert each one as a new entry
85
- # until we get to one that has the same id as the the newest for the feed
86
- return feed.entries if self.entries.length == 0
87
- latest_entry = self.entries.first
98
+ return feed.entries if entries.length.zero?
99
+
100
+ latest_entry = entries.first
88
101
  found_new_entries = []
102
+
89
103
  feed.entries.each do |entry|
90
- if entry.entry_id.nil? && latest_entry.entry_id.nil?
91
- break if entry.url == latest_entry.url
92
- else
93
- break if entry.entry_id == latest_entry.entry_id || entry.url == latest_entry.url
94
- end
104
+ break unless new_entry?(entry, latest_entry)
95
105
  found_new_entries << entry
96
106
  end
107
+
97
108
  found_new_entries
98
109
  end
99
110
 
100
- def existing_entry?(test_entry)
101
- entries.any? { |entry| entry.id == test_entry.id }
111
+ def new_entry?(entry, latest)
112
+ nil_ids = entry.entry_id.nil? && latest.entry_id.nil?
113
+ new_id = entry.entry_id != latest.entry_id
114
+ new_url = entry.url != latest.url
115
+
116
+ (nil_ids || new_id) && new_url
102
117
  end
103
118
  end
104
119
  end
@@ -1,19 +1,21 @@
1
+ # rubocop:disable Style/DocumentationMethod
1
2
  module Feedjira
2
3
  module Parser
3
4
  # Parser for dealing with Atom feeds.
4
5
  class Atom
5
6
  include SAXMachine
6
7
  include FeedUtilities
8
+
7
9
  element :title
8
- element :subtitle, :as => :description
9
- element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
10
- element :link, :as => :feed_url, :value => :href, :with => {:rel => "self"}
11
- elements :link, :as => :links, :value => :href
12
- elements :link, :as => :hubs, :value => :href, :with => {:rel => "hub"}
13
- elements :entry, :as => :entries, :class => AtomEntry
10
+ element :subtitle, as: :description
11
+ element :link, as: :url, value: :href, with: { type: 'text/html' }
12
+ element :link, as: :feed_url, value: :href, with: { rel: 'self' }
13
+ elements :link, as: :links, value: :href
14
+ elements :link, as: :hubs, value: :href, with: { rel: 'hub' }
15
+ elements :entry, as: :entries, class: AtomEntry
14
16
 
15
- def self.able_to_parse?(xml) #:nodoc:
16
- /\<feed[^\>]+xmlns\s?=\s?[\"|\'](http:\/\/www\.w3\.org\/2005\/Atom|http:\/\/purl\.org\/atom\/ns\#)[\"|\'][^\>]*\>/ =~ xml
17
+ def self.able_to_parse?(xml)
18
+ %r{\<feed[^\>]+xmlns\s?=\s?[\"\'](http://www\.w3\.org/2005/Atom|http://purl\.org/atom/ns\#)[\"\'][^\>]*\>} =~ xml # rubocop:disable Metrics/LineLength
17
19
  end
18
20
 
19
21
  def url
@@ -1,5 +1,5 @@
1
+ # rubocop:disable Style/DocumentationMethod
1
2
  module Feedjira
2
-
3
3
  module Parser
4
4
  # Parser for dealing with Atom feed entries.
5
5
  class AtomEntry
@@ -7,28 +7,26 @@ module Feedjira
7
7
  include FeedEntryUtilities
8
8
 
9
9
  element :title
10
- element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
11
- element :name, :as => :author
10
+ element :link, as: :url, value: :href, with: { type: 'text/html', rel: 'alternate' } # rubocop:disable Metrics/LineLength
11
+ element :name, as: :author
12
12
  element :content
13
13
  element :summary
14
14
 
15
- element :"media:content", :as => :image, :value => :url
16
- element :enclosure, :as => :image, :value => :href
15
+ element :"media:content", as: :image, value: :url
16
+ element :enclosure, as: :image, value: :href
17
17
 
18
18
  element :published
19
- element :id, :as => :entry_id
20
- element :created, :as => :published
21
- element :issued, :as => :published
19
+ element :id, as: :entry_id
20
+ element :created, as: :published
21
+ element :issued, as: :published
22
22
  element :updated
23
- element :modified, :as => :updated
24
- elements :category, :as => :categories, :value => :term
25
- elements :link, :as => :links, :value => :href
23
+ element :modified, as: :updated
24
+ elements :category, as: :categories, value: :term
25
+ elements :link, as: :links, value: :href
26
26
 
27
27
  def url
28
28
  @url ||= links.first
29
29
  end
30
30
  end
31
-
32
31
  end
33
-
34
32
  end