feedjira 2.0.0 → 2.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (90) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +2 -0
  3. data/.rubocop.yml +15 -0
  4. data/.travis.yml +31 -12
  5. data/CHANGELOG.md +34 -1
  6. data/Dangerfile +1 -0
  7. data/Gemfile +2 -1
  8. data/LICENSE +1 -1
  9. data/README.md +210 -7
  10. data/Rakefile +11 -1
  11. data/feedjira.gemspec +17 -14
  12. data/fixtures/vcr_cassettes/fetch_failure.yml +62 -0
  13. data/fixtures/vcr_cassettes/parse_error.yml +222 -0
  14. data/fixtures/vcr_cassettes/success.yml +281 -0
  15. data/lib/feedjira/configuration.rb +76 -0
  16. data/lib/feedjira/core_ext/date.rb +3 -1
  17. data/lib/feedjira/core_ext/string.rb +2 -1
  18. data/lib/feedjira/core_ext/time.rb +24 -17
  19. data/lib/feedjira/core_ext.rb +3 -3
  20. data/lib/feedjira/date_time_utilities/date_time_epoch_parser.rb +13 -0
  21. data/lib/feedjira/date_time_utilities/date_time_language_parser.rb +24 -0
  22. data/lib/feedjira/date_time_utilities/date_time_pattern_parser.rb +34 -0
  23. data/lib/feedjira/date_time_utilities.rb +32 -0
  24. data/lib/feedjira/feed.rb +89 -62
  25. data/lib/feedjira/feed_entry_utilities.rb +20 -19
  26. data/lib/feedjira/feed_utilities.rb +37 -22
  27. data/lib/feedjira/parser/atom.rb +10 -8
  28. data/lib/feedjira/parser/atom_entry.rb +11 -13
  29. data/lib/feedjira/parser/atom_feed_burner.rb +27 -10
  30. data/lib/feedjira/parser/atom_feed_burner_entry.rb +12 -14
  31. data/lib/feedjira/parser/atom_youtube.rb +21 -0
  32. data/lib/feedjira/parser/atom_youtube_entry.rb +30 -0
  33. data/lib/feedjira/parser/google_docs_atom.rb +8 -7
  34. data/lib/feedjira/parser/google_docs_atom_entry.rb +13 -11
  35. data/lib/feedjira/parser/itunes_rss.rb +41 -22
  36. data/lib/feedjira/parser/itunes_rss_category.rb +39 -0
  37. data/lib/feedjira/parser/itunes_rss_item.rb +32 -20
  38. data/lib/feedjira/parser/itunes_rss_owner.rb +4 -4
  39. data/lib/feedjira/parser/podlove_chapter.rb +22 -0
  40. data/lib/feedjira/parser/rss.rb +11 -8
  41. data/lib/feedjira/parser/rss_entry.rb +17 -21
  42. data/lib/feedjira/parser/rss_feed_burner.rb +5 -6
  43. data/lib/feedjira/parser/rss_feed_burner_entry.rb +24 -28
  44. data/lib/feedjira/parser/rss_image.rb +15 -0
  45. data/lib/feedjira/parser.rb +1 -1
  46. data/lib/feedjira/preprocessor.rb +4 -2
  47. data/lib/feedjira/version.rb +1 -1
  48. data/lib/feedjira.rb +15 -0
  49. data/spec/feedjira/configuration_spec.rb +25 -0
  50. data/spec/feedjira/date_time_utilities_spec.rb +47 -0
  51. data/spec/feedjira/feed_entry_utilities_spec.rb +23 -19
  52. data/spec/feedjira/feed_spec.rb +140 -75
  53. data/spec/feedjira/feed_utilities_spec.rb +83 -63
  54. data/spec/feedjira/parser/atom_entry_spec.rb +54 -34
  55. data/spec/feedjira/parser/atom_feed_burner_entry_spec.rb +27 -20
  56. data/spec/feedjira/parser/atom_feed_burner_spec.rb +87 -30
  57. data/spec/feedjira/parser/atom_spec.rb +50 -48
  58. data/spec/feedjira/parser/atom_youtube_entry_spec.rb +86 -0
  59. data/spec/feedjira/parser/atom_youtube_spec.rb +43 -0
  60. data/spec/feedjira/parser/google_docs_atom_entry_spec.rb +5 -4
  61. data/spec/feedjira/parser/google_docs_atom_spec.rb +6 -6
  62. data/spec/feedjira/parser/itunes_rss_item_spec.rb +49 -29
  63. data/spec/feedjira/parser/itunes_rss_owner_spec.rb +10 -9
  64. data/spec/feedjira/parser/itunes_rss_spec.rb +87 -30
  65. data/spec/feedjira/parser/podlove_chapter_spec.rb +37 -0
  66. data/spec/feedjira/parser/rss_entry_spec.rb +50 -33
  67. data/spec/feedjira/parser/rss_feed_burner_entry_spec.rb +55 -33
  68. data/spec/feedjira/parser/rss_feed_burner_spec.rb +31 -26
  69. data/spec/feedjira/parser/rss_spec.rb +56 -24
  70. data/spec/feedjira/preprocessor_spec.rb +11 -3
  71. data/spec/sample_feeds/AmazonWebServicesBlog.xml +797 -797
  72. data/spec/sample_feeds/AtomEscapedHTMLInPreTag.xml +13 -0
  73. data/spec/sample_feeds/CRE.xml +5849 -0
  74. data/spec/sample_feeds/FeedBurnerXHTML.xml +400 -400
  75. data/spec/sample_feeds/GiantRobotsSmashingIntoOtherGiantRobots.xml +682 -0
  76. data/spec/sample_feeds/ITunesWithSingleQuotedAttributes.xml +67 -0
  77. data/spec/sample_feeds/InvalidDateFormat.xml +20 -0
  78. data/spec/sample_feeds/PaulDixExplainsNothing.xml +175 -175
  79. data/spec/sample_feeds/PaulDixExplainsNothingAlternate.xml +175 -175
  80. data/spec/sample_feeds/PaulDixExplainsNothingFirstEntryContent.xml +16 -16
  81. data/spec/sample_feeds/PaulDixExplainsNothingWFW.xml +174 -174
  82. data/spec/sample_feeds/TenderLovemaking.xml +12 -2
  83. data/spec/sample_feeds/TrotterCashionHome.xml +611 -611
  84. data/spec/sample_feeds/TypePadNews.xml +368 -368
  85. data/spec/sample_feeds/itunes.xml +31 -2
  86. data/spec/sample_feeds/pet_atom.xml +229 -229
  87. data/spec/sample_feeds/youtube_atom.xml +395 -0
  88. data/spec/sample_feeds.rb +31 -21
  89. data/spec/spec_helper.rb +6 -0
  90. metadata +132 -25
@@ -1,6 +1,7 @@
1
- require "time"
2
- require "date"
1
+ require 'time'
2
+ require 'date'
3
3
 
4
+ # rubocop:disable Style/DocumentationMethod
4
5
  class Time
5
6
  # Parse a time string and convert it to UTC without raising errors.
6
7
  # Parses a flattened 14-digit time (YYYYmmddHHMMMSS) as UTC.
@@ -10,22 +11,28 @@ class Time
10
11
  #
11
12
  # === Returns
12
13
  # A Time instance in UTC or nil if there were errors while parsing.
14
+ # rubocop:disable Metrics/MethodLength
13
15
  def self.parse_safely(dt)
14
- if dt
15
- case
16
- when dt.is_a?(Time)
17
- dt.utc
18
- when dt.respond_to?(:empty?) && dt.empty?
19
- nil
20
- when dt.respond_to?(:to_datetime)
21
- dt.to_datetime.utc
22
- when dt.to_s =~ /\A\d{14}\z/
23
- parse("#{dt.to_s}Z", true)
24
- else
25
- parse(dt.to_s, true).utc
26
- end
16
+ if dt.is_a?(Time)
17
+ dt.utc
18
+ elsif dt.respond_to?(:to_datetime)
19
+ dt.to_datetime.utc
20
+ elsif dt.respond_to? :to_s
21
+ parse_string_safely dt.to_s
27
22
  end
28
- rescue StandardError
23
+ rescue StandardError => e
24
+ Feedjira.logger.debug { "Failed to parse time #{dt}" }
25
+ Feedjira.logger.debug(e)
29
26
  nil
30
- end unless method_defined?(:parse_safely)
27
+ end
28
+
29
+ def self.parse_string_safely(string)
30
+ return nil if string.empty?
31
+
32
+ if string =~ /\A\d{14}\z/
33
+ parse("#{string}Z", true)
34
+ else
35
+ parse(string).utc
36
+ end
37
+ end
31
38
  end
@@ -1,3 +1,3 @@
1
- require "feedjira/core_ext/time"
2
- require "feedjira/core_ext/date"
3
- require "feedjira/core_ext/string"
1
+ require 'feedjira/core_ext/time'
2
+ require 'feedjira/core_ext/date'
3
+ require 'feedjira/core_ext/string'
@@ -0,0 +1,13 @@
1
+ # rubocop:disable Style/Documentation
2
+ # rubocop:disable Style/DocumentationMethod
3
+ module Feedjira
4
+ module DateTimeUtilities
5
+ class DateTimeEpochParser
6
+ def self.parse(string)
7
+ epoch_time = string.to_i
8
+ return Time.at(epoch_time).to_datetime if epoch_time.to_s == string
9
+ raise "#{string} is not a valid epoch time"
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,24 @@
1
+ # rubocop:disable Style/Documentation
2
+ # rubocop:disable Style/DocumentationMethod
3
+ module Feedjira
4
+ module DateTimeUtilities
5
+ class DateTimeLanguageParser
6
+ MONTHS_ENGLISH =
7
+ %w(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec).freeze
8
+ MONTHS_SPANISH =
9
+ %w(Ene Feb Mar Abr May Jun Jul Ago Sep Oct Nov Dic).freeze
10
+
11
+ def self.parse(string)
12
+ DateTime.parse(translate(string))
13
+ end
14
+
15
+ def self.translate(string)
16
+ MONTHS_SPANISH.each_with_index do |m, i|
17
+ rgx = Regexp.new("\s#{m}\s", Regexp::IGNORECASE)
18
+ return string.gsub(rgx, MONTHS_ENGLISH[i]) if string =~ rgx
19
+ end
20
+ raise "No translation found for #{string}"
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,34 @@
1
+ # rubocop:disable Style/Documentation
2
+ # rubocop:disable Style/DocumentationMethod
3
+ module Feedjira
4
+ module DateTimeUtilities
5
+ class DateTimePatternParser
6
+ # rubocop:disable Style/AsciiComments
7
+ # Japanese Symbols are required for strange Date Strings like
8
+ # '水, 31 8 2016 07:37:00 PDT'
9
+ JAPANESE_SYMBOLS = %w(日 月 火 水 木 金 土).freeze
10
+ PATTERNS = ['%m/%d/%Y %T %p', '%d %m %Y %T %Z'].freeze
11
+
12
+ # rubocop:disable Metrics/MethodLength
13
+ def self.parse(string)
14
+ PATTERNS.each do |p|
15
+ begin
16
+ datetime = DateTime.strptime(prepare(string), p)
17
+ return datetime
18
+ rescue StandardError => e
19
+ Feedjira.logger.debug("Failed to parse date #{string}")
20
+ Feedjira.logger.debug(e)
21
+ nil
22
+ end
23
+ end
24
+ raise "No pattern matched #{string}"
25
+ end
26
+
27
+ def self.prepare(string)
28
+ rgx = Regexp.new("^(#{JAPANESE_SYMBOLS.join('|')}),\s")
29
+ string.gsub(rgx, '')
30
+ end
31
+ private_class_method :prepare
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,32 @@
1
+ # rubocop:disable Style/Documentation
2
+ module Feedjira
3
+ module DateTimeUtilities
4
+ # This is our date parsing heuristic.
5
+ # Date Parsers are attempted in order.
6
+ DATE_PARSERS = [
7
+ DateTimePatternParser,
8
+ DateTimeLanguageParser,
9
+ DateTimeEpochParser,
10
+ DateTime
11
+ ].freeze
12
+
13
+ # Parse the given string starting with the most common parser (default ruby)
14
+ # and going over all other available parsers
15
+ # rubocop:disable Metrics/MethodLength
16
+ def parse_datetime(string)
17
+ res = DATE_PARSERS.find do |parser|
18
+ begin
19
+ return parser.parse(string).feed_utils_to_gm_time
20
+ rescue StandardError => e
21
+ Feedjira.logger.debug { "Failed to parse date #{string}" }
22
+ Feedjira.logger.debug(e)
23
+ nil
24
+ end
25
+ end
26
+
27
+ Feedjira.logger.warn { "Failed to parse date #{string}" } if res.nil?
28
+
29
+ res
30
+ end
31
+ end
32
+ end
data/lib/feedjira/feed.rb CHANGED
@@ -1,85 +1,112 @@
1
+ # rubocop:disable Style/Documentation
2
+ # rubocop:disable Style/DocumentationMethod
1
3
  module Feedjira
2
4
  class Feed
3
- def self.parse_with(parser, xml, &block)
4
- parser.parse xml, &block
5
- end
5
+ class << self
6
+ def parse_with(parser, xml, &block)
7
+ parser.parse xml, &block
8
+ end
6
9
 
7
- def self.parse(xml, &block)
8
- if parser = determine_feed_parser_for_xml(xml)
10
+ def parse(xml, &block)
11
+ parser = determine_feed_parser_for_xml(xml)
12
+ raise NoParserAvailable, 'No valid parser for XML.' unless parser
9
13
  parse_with parser, xml, &block
10
- else
11
- raise NoParserAvailable.new("No valid parser for XML.")
12
14
  end
13
- end
14
15
 
15
- def self.determine_feed_parser_for_xml(xml)
16
- start_of_doc = xml.slice(0, 2000)
17
- feed_classes.detect {|klass| klass.able_to_parse?(start_of_doc)}
18
- end
16
+ def determine_feed_parser_for_xml(xml)
17
+ start_of_doc = xml.slice(0, 2000)
18
+ feed_classes.detect { |klass| klass.able_to_parse?(start_of_doc) }
19
+ end
19
20
 
20
- def self.add_feed_class(klass)
21
- feed_classes.unshift klass
22
- end
21
+ def add_feed_class(klass)
22
+ feed_classes.unshift klass
23
+ end
23
24
 
24
- def self.feed_classes
25
- @feed_classes ||= [
26
- Feedjira::Parser::RSSFeedBurner,
27
- Feedjira::Parser::GoogleDocsAtom,
28
- Feedjira::Parser::AtomFeedBurner,
29
- Feedjira::Parser::Atom,
30
- Feedjira::Parser::ITunesRSS,
31
- Feedjira::Parser::RSS
32
- ]
33
- end
25
+ def feed_classes
26
+ @feed_classes ||= Feedjira.parsers
27
+ end
34
28
 
35
- def self.add_common_feed_element(element_tag, options = {})
36
- feed_classes.each do |k|
37
- k.element element_tag, options
29
+ def reset_parsers!
30
+ @feed_classes = nil
38
31
  end
39
- end
40
32
 
41
- def self.add_common_feed_elements(element_tag, options = {})
42
- feed_classes.each do |k|
43
- k.elements element_tag, options
33
+ def add_common_feed_element(element_tag, options = {})
34
+ feed_classes.each do |k|
35
+ k.element element_tag, options
36
+ end
44
37
  end
45
- end
46
38
 
47
- def self.add_common_feed_entry_element(element_tag, options = {})
48
- call_on_each_feed_entry :element, element_tag, options
49
- end
39
+ def add_common_feed_elements(element_tag, options = {})
40
+ feed_classes.each do |k|
41
+ k.elements element_tag, options
42
+ end
43
+ end
50
44
 
51
- def self.add_common_feed_entry_elements(element_tag, options = {})
52
- call_on_each_feed_entry :elements, element_tag, options
53
- end
45
+ def add_common_feed_entry_element(element_tag, options = {})
46
+ call_on_each_feed_entry :element, element_tag, options
47
+ end
54
48
 
55
- def self.call_on_each_feed_entry(method, *parameters)
56
- feed_classes.each do |k|
57
- k.sax_config.collection_elements.each_value do |vl|
58
- vl.find_all{|v| (v.accessor == 'entries') && (v.data_class.class == Class)}.each do |v|
59
- v.data_class.send(method, *parameters)
49
+ def add_common_feed_entry_elements(element_tag, options = {})
50
+ call_on_each_feed_entry :elements, element_tag, options
51
+ end
52
+
53
+ def call_on_each_feed_entry(method, *parameters)
54
+ feed_classes.each do |klass|
55
+ klass.sax_config.collection_elements.each_value do |value|
56
+ collection_configs = value.select do |v|
57
+ v.accessor == 'entries' && v.data_class.class == Class
58
+ end
59
+
60
+ collection_configs.each do |config|
61
+ config.data_class.send(method, *parameters)
62
+ end
60
63
  end
61
64
  end
62
65
  end
63
- end
64
66
 
65
- def self.fetch_and_parse(url)
66
- response = connection(url).get
67
- raise FetchFailure.new("Fetch failed - #{response.status}") unless response.success?
68
- xml = response.body
69
- parser_klass = determine_feed_parser_for_xml xml
70
- raise NoParserAvailable.new("No valid parser for XML.") unless parser_klass
71
-
72
- feed = parse_with parser_klass, xml
73
- feed.feed_url = url
74
- feed.etag = response.headers['etag'].to_s.gsub(/"/, '')
75
- feed.last_modified = response.headers['last-modified']
76
- feed
77
- end
67
+ def fetch_and_parse(url)
68
+ response = connection(url).get
69
+ unless response.success?
70
+ raise FetchFailure, "Fetch failed - #{response.status}"
71
+ end
72
+ feed = parse response.body
73
+ feed.feed_url = url
74
+ feed.etag = response.headers['etag'].to_s.delete '"'
75
+
76
+ feed.last_modified = parse_last_modified(response)
77
+ feed
78
+ end
79
+
80
+ # rubocop:disable LineLength
81
+ def connection(url)
82
+ Faraday.new(url: url, headers: headers, request: request_options) do |conn|
83
+ conn.use FaradayMiddleware::FollowRedirects, limit: Feedjira.follow_redirect_limit
84
+ conn.adapter(*Faraday.default_adapter)
85
+ end
86
+ end
87
+ # rubocop:enable LineLength
88
+
89
+ private
90
+
91
+ def headers
92
+ {
93
+ user_agent: Feedjira.user_agent
94
+ }
95
+ end
96
+
97
+ def request_options
98
+ {
99
+ timeout: Feedjira.request_timeout
100
+ }
101
+ end
78
102
 
79
- def self.connection(url)
80
- Faraday.new(url: url) do |conn|
81
- conn.use FaradayMiddleware::FollowRedirects, limit: 3
82
- conn.adapter :net_http
103
+ def parse_last_modified(response)
104
+ lm = response.headers['last-modified']
105
+ DateTime.parse(lm).to_time
106
+ rescue StandardError => e
107
+ Feedjira.logger.warn { "Failed to parse last modified '#{lm}'" }
108
+ Feedjira.logger.debug(e)
109
+ nil
83
110
  end
84
111
  end
85
112
  end
@@ -1,23 +1,25 @@
1
+ # rubocop:disable Style/Documentation
2
+ # rubocop:disable Style/DocumentationMethod
1
3
  module Feedjira
2
4
  module FeedEntryUtilities
3
-
4
5
  include Enumerable
6
+ include DateTimeUtilities
5
7
 
6
8
  def published
7
9
  @published ||= @updated
8
10
  end
9
11
 
10
12
  def parse_datetime(string)
11
- begin
12
- DateTime.parse(string).feed_utils_to_gm_time
13
- rescue
14
- warn "Failed to parse date #{string.inspect}"
15
- nil
16
- end
13
+ DateTime.parse(string).feed_utils_to_gm_time
14
+ rescue StandardError => e
15
+ Feedjira.logger.warn { "Failed to parse date #{string.inspect}" }
16
+ Feedjira.logger.debug(e)
17
+ nil
17
18
  end
18
19
 
19
20
  ##
20
- # Returns the id of the entry or its url if not id is present, as some formats don't support it
21
+ # Returns the id of the entry or its url if not id is present, as some
22
+ # formats don't support it
21
23
  def id
22
24
  @entry_id ||= @url
23
25
  end
@@ -26,41 +28,40 @@ module Feedjira
26
28
  # Writer for published. By default, we keep the "oldest" publish time found.
27
29
  def published=(val)
28
30
  parsed = parse_datetime(val)
29
- @published = parsed if !@published || parsed < @published
31
+ @published = parsed if parsed && (!@published || parsed < @published)
30
32
  end
31
33
 
32
34
  ##
33
35
  # Writer for updated. By default, we keep the most recent update time found.
34
36
  def updated=(val)
35
37
  parsed = parse_datetime(val)
36
- @updated = parsed if !@updated || parsed > @updated
38
+ @updated = parsed if parsed && (!@updated || parsed > @updated)
37
39
  end
38
40
 
39
41
  def sanitize!
40
- %w[title author summary content image].each do |name|
41
- if self.respond_to?(name) && self.send(name).respond_to?(:sanitize!)
42
- self.send(name).send :sanitize!
42
+ %w(title author summary content image).each do |name|
43
+ if respond_to?(name) && send(name).respond_to?(:sanitize!)
44
+ send(name).send :sanitize!
43
45
  end
44
46
  end
45
47
  end
46
48
 
47
- alias_method :last_modified, :published
49
+ alias last_modified published
48
50
 
49
51
  def each
50
- @rss_fields ||= self.instance_variables
52
+ @rss_fields ||= instance_variables
51
53
 
52
54
  @rss_fields.each do |field|
53
- yield(field.to_s.sub('@', ''), self.instance_variable_get(field))
55
+ yield(field.to_s.sub('@', ''), instance_variable_get(field))
54
56
  end
55
57
  end
56
58
 
57
59
  def [](field)
58
- self.instance_variable_get("@#{field.to_s}")
60
+ instance_variable_get("@#{field}")
59
61
  end
60
62
 
61
63
  def []=(field, value)
62
- self.instance_variable_set("@#{field.to_s}", value)
64
+ instance_variable_set("@#{field}", value)
63
65
  end
64
-
65
66
  end
66
67
  end
@@ -1,6 +1,8 @@
1
+ # rubocop:disable Style/Documentation
2
+ # rubocop:disable Style/DocumentationMethod
1
3
  module Feedjira
2
4
  module FeedUtilities
3
- UPDATABLE_ATTRIBUTES = %w(title feed_url url last_modified etag)
5
+ UPDATABLE_ATTRIBUTES = %w(title feed_url url last_modified etag).freeze
4
6
 
5
7
  attr_writer :new_entries, :updated, :last_modified
6
8
  attr_accessor :etag
@@ -11,7 +13,7 @@ module Feedjira
11
13
 
12
14
  module ClassMethods
13
15
  def parse(xml, &block)
14
- xml = xml.lstrip
16
+ xml = strip_whitespace(xml)
15
17
  xml = preprocess(xml) if preprocess_xml
16
18
  super xml, &block
17
19
  end
@@ -28,11 +30,20 @@ module Feedjira
28
30
  def preprocess_xml
29
31
  @preprocess_xml
30
32
  end
33
+
34
+ def strip_whitespace(xml)
35
+ if Feedjira.strip_whitespace
36
+ xml.strip
37
+ else
38
+ xml.lstrip
39
+ end
40
+ end
31
41
  end
32
42
 
33
43
  def last_modified
34
44
  @last_modified ||= begin
35
- entry = entries.reject {|e| e.published.nil? }.sort_by { |entry| entry.published if entry.published }.last
45
+ published = entries.reject { |e| e.published.nil? }
46
+ entry = published.sort_by { |e| e.published if e.published }.last
36
47
  entry ? entry.published : nil
37
48
  end
38
49
  end
@@ -45,13 +56,13 @@ module Feedjira
45
56
  @new_entries ||= []
46
57
  end
47
58
 
48
- def has_new_entries?
49
- new_entries.size > 0
59
+ def new_entries?
60
+ !new_entries.empty?
50
61
  end
51
62
 
52
63
  def update_from_feed(feed)
53
64
  self.new_entries += find_new_entries_for(feed)
54
- self.entries.unshift(*self.new_entries)
65
+ entries.unshift(*self.new_entries)
55
66
 
56
67
  @updated = false
57
68
 
@@ -61,7 +72,8 @@ module Feedjira
61
72
  end
62
73
 
63
74
  def update_attribute(feed, name)
64
- old_value, new_value = send(name), feed.send(name)
75
+ old_value = send(name)
76
+ new_value = feed.send(name)
65
77
 
66
78
  if old_value != new_value
67
79
  send("#{name}=", new_value)
@@ -72,33 +84,36 @@ module Feedjira
72
84
  end
73
85
 
74
86
  def sanitize_entries!
75
- entries.each {|entry| entry.sanitize!}
87
+ entries.each(&:sanitize!)
76
88
  end
77
89
 
78
90
  private
79
91
 
92
+ # This implementation is a hack, which is why it's so ugly. It's to get
93
+ # around the fact that not all feeds have a published date. However,
94
+ # they're always ordered with the newest one first. So we go through the
95
+ # entries just parsed and insert each one as a new entry until we get to
96
+ # one that has the same id as the the newest for the feed.
80
97
  def find_new_entries_for(feed)
81
- # this implementation is a hack, which is why it's so ugly.
82
- # it's to get around the fact that not all feeds have a published date.
83
- # however, they're always ordered with the newest one first.
84
- # So we go through the entries just parsed and insert each one as a new entry
85
- # until we get to one that has the same id as the the newest for the feed
86
- return feed.entries if self.entries.length == 0
87
- latest_entry = self.entries.first
98
+ return feed.entries if entries.length.zero?
99
+
100
+ latest_entry = entries.first
88
101
  found_new_entries = []
102
+
89
103
  feed.entries.each do |entry|
90
- if entry.entry_id.nil? && latest_entry.entry_id.nil?
91
- break if entry.url == latest_entry.url
92
- else
93
- break if entry.entry_id == latest_entry.entry_id || entry.url == latest_entry.url
94
- end
104
+ break unless new_entry?(entry, latest_entry)
95
105
  found_new_entries << entry
96
106
  end
107
+
97
108
  found_new_entries
98
109
  end
99
110
 
100
- def existing_entry?(test_entry)
101
- entries.any? { |entry| entry.id == test_entry.id }
111
+ def new_entry?(entry, latest)
112
+ nil_ids = entry.entry_id.nil? && latest.entry_id.nil?
113
+ new_id = entry.entry_id != latest.entry_id
114
+ new_url = entry.url != latest.url
115
+
116
+ (nil_ids || new_id) && new_url
102
117
  end
103
118
  end
104
119
  end
@@ -1,19 +1,21 @@
1
+ # rubocop:disable Style/DocumentationMethod
1
2
  module Feedjira
2
3
  module Parser
3
4
  # Parser for dealing with Atom feeds.
4
5
  class Atom
5
6
  include SAXMachine
6
7
  include FeedUtilities
8
+
7
9
  element :title
8
- element :subtitle, :as => :description
9
- element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
10
- element :link, :as => :feed_url, :value => :href, :with => {:rel => "self"}
11
- elements :link, :as => :links, :value => :href
12
- elements :link, :as => :hubs, :value => :href, :with => {:rel => "hub"}
13
- elements :entry, :as => :entries, :class => AtomEntry
10
+ element :subtitle, as: :description
11
+ element :link, as: :url, value: :href, with: { type: 'text/html' }
12
+ element :link, as: :feed_url, value: :href, with: { rel: 'self' }
13
+ elements :link, as: :links, value: :href
14
+ elements :link, as: :hubs, value: :href, with: { rel: 'hub' }
15
+ elements :entry, as: :entries, class: AtomEntry
14
16
 
15
- def self.able_to_parse?(xml) #:nodoc:
16
- /\<feed[^\>]+xmlns\s?=\s?[\"|\'](http:\/\/www\.w3\.org\/2005\/Atom|http:\/\/purl\.org\/atom\/ns\#)[\"|\'][^\>]*\>/ =~ xml
17
+ def self.able_to_parse?(xml)
18
+ %r{\<feed[^\>]+xmlns\s?=\s?[\"\'](http://www\.w3\.org/2005/Atom|http://purl\.org/atom/ns\#)[\"\'][^\>]*\>} =~ xml # rubocop:disable Metrics/LineLength
17
19
  end
18
20
 
19
21
  def url
@@ -1,5 +1,5 @@
1
+ # rubocop:disable Style/DocumentationMethod
1
2
  module Feedjira
2
-
3
3
  module Parser
4
4
  # Parser for dealing with Atom feed entries.
5
5
  class AtomEntry
@@ -7,28 +7,26 @@ module Feedjira
7
7
  include FeedEntryUtilities
8
8
 
9
9
  element :title
10
- element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
11
- element :name, :as => :author
10
+ element :link, as: :url, value: :href, with: { type: 'text/html', rel: 'alternate' } # rubocop:disable Metrics/LineLength
11
+ element :name, as: :author
12
12
  element :content
13
13
  element :summary
14
14
 
15
- element :"media:content", :as => :image, :value => :url
16
- element :enclosure, :as => :image, :value => :href
15
+ element :"media:content", as: :image, value: :url
16
+ element :enclosure, as: :image, value: :href
17
17
 
18
18
  element :published
19
- element :id, :as => :entry_id
20
- element :created, :as => :published
21
- element :issued, :as => :published
19
+ element :id, as: :entry_id
20
+ element :created, as: :published
21
+ element :issued, as: :published
22
22
  element :updated
23
- element :modified, :as => :updated
24
- elements :category, :as => :categories, :value => :term
25
- elements :link, :as => :links, :value => :href
23
+ element :modified, as: :updated
24
+ elements :category, as: :categories, value: :term
25
+ elements :link, as: :links, value: :href
26
26
 
27
27
  def url
28
28
  @url ||= links.first
29
29
  end
30
30
  end
31
-
32
31
  end
33
-
34
32
  end