feedjira 2.1.0 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.rubocop.yml +9 -2
  4. data/CHANGELOG.md +4 -0
  5. data/LICENSE +1 -1
  6. data/README.md +210 -7
  7. data/Rakefile +5 -0
  8. data/feedjira.gemspec +2 -1
  9. data/lib/feedjira.rb +7 -1
  10. data/lib/feedjira/configuration.rb +76 -0
  11. data/lib/feedjira/core_ext/date.rb +1 -0
  12. data/lib/feedjira/core_ext/string.rb +1 -0
  13. data/lib/feedjira/core_ext/time.rb +5 -1
  14. data/lib/feedjira/date_time_utilities.rb +11 -3
  15. data/lib/feedjira/date_time_utilities/date_time_epoch_parser.rb +13 -0
  16. data/lib/feedjira/date_time_utilities/date_time_language_parser.rb +2 -0
  17. data/lib/feedjira/date_time_utilities/date_time_pattern_parser.rb +6 -1
  18. data/lib/feedjira/feed.rb +87 -69
  19. data/lib/feedjira/feed_entry_utilities.rb +5 -2
  20. data/lib/feedjira/feed_utilities.rb +11 -1
  21. data/lib/feedjira/parser.rb +1 -1
  22. data/lib/feedjira/parser/atom.rb +1 -0
  23. data/lib/feedjira/parser/atom_entry.rb +1 -0
  24. data/lib/feedjira/parser/atom_feed_burner.rb +19 -2
  25. data/lib/feedjira/parser/atom_feed_burner_entry.rb +1 -0
  26. data/lib/feedjira/parser/atom_youtube.rb +1 -0
  27. data/lib/feedjira/parser/atom_youtube_entry.rb +1 -0
  28. data/lib/feedjira/parser/google_docs_atom.rb +2 -1
  29. data/lib/feedjira/parser/google_docs_atom_entry.rb +2 -0
  30. data/lib/feedjira/parser/itunes_rss.rb +1 -0
  31. data/lib/feedjira/parser/itunes_rss_category.rb +1 -0
  32. data/lib/feedjira/parser/itunes_rss_owner.rb +1 -0
  33. data/lib/feedjira/parser/podlove_chapter.rb +2 -0
  34. data/lib/feedjira/parser/rss.rb +1 -0
  35. data/lib/feedjira/parser/rss_feed_burner.rb +1 -0
  36. data/lib/feedjira/parser/rss_feed_burner_entry.rb +1 -0
  37. data/lib/feedjira/preprocessor.rb +2 -0
  38. data/lib/feedjira/version.rb +1 -1
  39. data/spec/feedjira/configuration_spec.rb +25 -0
  40. data/spec/feedjira/date_time_utilities_spec.rb +6 -0
  41. data/spec/feedjira/feed_spec.rb +20 -2
  42. data/spec/feedjira/feed_utilities_spec.rb +18 -0
  43. data/spec/feedjira/parser/atom_feed_burner_spec.rb +32 -1
  44. data/spec/sample_feeds.rb +1 -0
  45. data/spec/sample_feeds/GiantRobotsSmashingIntoOtherGiantRobots.xml +682 -0
  46. metadata +49 -29
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Style/Documentation
1
2
  module Feedjira
2
3
  module DateTimeUtilities
3
4
  # This is our date parsing heuristic.
@@ -5,20 +6,27 @@ module Feedjira
5
6
  DATE_PARSERS = [
6
7
  DateTimePatternParser,
7
8
  DateTimeLanguageParser,
9
+ DateTimeEpochParser,
8
10
  DateTime
9
11
  ].freeze
10
12
 
11
13
  # Parse the given string starting with the most common parser (default ruby)
12
14
  # and going over all other available parsers
15
+ # rubocop:disable Metrics/MethodLength
13
16
  def parse_datetime(string)
14
- DATE_PARSERS.find do |parser|
17
+ res = DATE_PARSERS.find do |parser|
15
18
  begin
16
19
  return parser.parse(string).feed_utils_to_gm_time
17
- rescue
20
+ rescue StandardError => e
21
+ Feedjira.logger.debug { "Failed to parse date #{string}" }
22
+ Feedjira.logger.debug(e)
18
23
  nil
19
24
  end
20
25
  end
21
- warn "Failed to parse date #{string.inspect}"
26
+
27
+ Feedjira.logger.warn { "Failed to parse date #{string}" } if res.nil?
28
+
29
+ res
22
30
  end
23
31
  end
24
32
  end
@@ -0,0 +1,13 @@
1
+ # rubocop:disable Style/Documentation
2
+ # rubocop:disable Style/DocumentationMethod
3
+ module Feedjira
4
+ module DateTimeUtilities
5
+ class DateTimeEpochParser
6
+ def self.parse(string)
7
+ epoch_time = string.to_i
8
+ return Time.at(epoch_time).to_datetime if epoch_time.to_s == string
9
+ raise "#{string} is not a valid epoch time"
10
+ end
11
+ end
12
+ end
13
+ end
@@ -1,3 +1,5 @@
1
+ # rubocop:disable Style/Documentation
2
+ # rubocop:disable Style/DocumentationMethod
1
3
  module Feedjira
2
4
  module DateTimeUtilities
3
5
  class DateTimeLanguageParser
@@ -1,3 +1,5 @@
1
+ # rubocop:disable Style/Documentation
2
+ # rubocop:disable Style/DocumentationMethod
1
3
  module Feedjira
2
4
  module DateTimeUtilities
3
5
  class DateTimePatternParser
@@ -7,12 +9,15 @@ module Feedjira
7
9
  JAPANESE_SYMBOLS = %w(日 月 火 水 木 金 土).freeze
8
10
  PATTERNS = ['%m/%d/%Y %T %p', '%d %m %Y %T %Z'].freeze
9
11
 
12
+ # rubocop:disable Metrics/MethodLength
10
13
  def self.parse(string)
11
14
  PATTERNS.each do |p|
12
15
  begin
13
16
  datetime = DateTime.strptime(prepare(string), p)
14
17
  return datetime
15
- rescue
18
+ rescue StandardError => e
19
+ Feedjira.logger.debug("Failed to parse date #{string}")
20
+ Feedjira.logger.debug(e)
16
21
  nil
17
22
  end
18
23
  end
data/lib/feedjira/feed.rb CHANGED
@@ -1,95 +1,113 @@
1
+ # rubocop:disable Style/Documentation
2
+ # rubocop:disable Style/DocumentationMethod
1
3
  module Feedjira
2
4
  class Feed
3
- def self.parse_with(parser, xml, &block)
4
- parser.parse xml, &block
5
- end
5
+ class << self
6
+ def parse_with(parser, xml, &block)
7
+ parser.parse xml, &block
8
+ end
6
9
 
7
- def self.parse(xml, &block)
8
- parser = determine_feed_parser_for_xml(xml)
9
- raise NoParserAvailable, 'No valid parser for XML.' unless parser
10
- parse_with parser, xml, &block
11
- end
10
+ def parse(xml, &block)
11
+ parser = determine_feed_parser_for_xml(xml)
12
+ raise NoParserAvailable, 'No valid parser for XML.' unless parser
13
+ parse_with parser, xml, &block
14
+ end
12
15
 
13
- def self.determine_feed_parser_for_xml(xml)
14
- start_of_doc = xml.slice(0, 2000)
15
- feed_classes.detect { |klass| klass.able_to_parse?(start_of_doc) }
16
- end
16
+ def determine_feed_parser_for_xml(xml)
17
+ start_of_doc = xml.slice(0, 2000)
18
+ feed_classes.detect { |klass| klass.able_to_parse?(start_of_doc) }
19
+ end
17
20
 
18
- def self.add_feed_class(klass)
19
- feed_classes.unshift klass
20
- end
21
+ def add_feed_class(klass)
22
+ feed_classes.unshift klass
23
+ end
21
24
 
22
- def self.feed_classes
23
- @feed_classes ||= [
24
- Feedjira::Parser::RSSFeedBurner,
25
- Feedjira::Parser::GoogleDocsAtom,
26
- Feedjira::Parser::AtomYoutube,
27
- Feedjira::Parser::AtomFeedBurner,
28
- Feedjira::Parser::Atom,
29
- Feedjira::Parser::ITunesRSS,
30
- Feedjira::Parser::RSS
31
- ]
32
- end
25
+ def feed_classes
26
+ @feed_classes ||= Feedjira.parsers
27
+ end
33
28
 
34
- def self.add_common_feed_element(element_tag, options = {})
35
- feed_classes.each do |k|
36
- k.element element_tag, options
29
+ def reset_parsers!
30
+ @feed_classes = nil
37
31
  end
38
- end
39
32
 
40
- def self.add_common_feed_elements(element_tag, options = {})
41
- feed_classes.each do |k|
42
- k.elements element_tag, options
33
+ def add_common_feed_element(element_tag, options = {})
34
+ feed_classes.each do |k|
35
+ k.element element_tag, options
36
+ end
43
37
  end
44
- end
45
38
 
46
- def self.add_common_feed_entry_element(element_tag, options = {})
47
- call_on_each_feed_entry :element, element_tag, options
48
- end
39
+ def add_common_feed_elements(element_tag, options = {})
40
+ feed_classes.each do |k|
41
+ k.elements element_tag, options
42
+ end
43
+ end
49
44
 
50
- def self.add_common_feed_entry_elements(element_tag, options = {})
51
- call_on_each_feed_entry :elements, element_tag, options
52
- end
45
+ def add_common_feed_entry_element(element_tag, options = {})
46
+ call_on_each_feed_entry :element, element_tag, options
47
+ end
53
48
 
54
- def self.call_on_each_feed_entry(method, *parameters)
55
- feed_classes.each do |klass|
56
- klass.sax_config.collection_elements.each_value do |value|
57
- collection_configs = value.select do |v|
58
- v.accessor == 'entries' && v.data_class.class == Class
59
- end
49
+ def add_common_feed_entry_elements(element_tag, options = {})
50
+ call_on_each_feed_entry :elements, element_tag, options
51
+ end
60
52
 
61
- collection_configs.each do |config|
62
- config.data_class.send(method, *parameters)
53
+ def call_on_each_feed_entry(method, *parameters)
54
+ feed_classes.each do |klass|
55
+ klass.sax_config.collection_elements.each_value do |value|
56
+ collection_configs = value.select do |v|
57
+ v.accessor == 'entries' && v.data_class.class == Class
58
+ end
59
+
60
+ collection_configs.each do |config|
61
+ config.data_class.send(method, *parameters)
62
+ end
63
63
  end
64
64
  end
65
65
  end
66
- end
67
66
 
68
- def self.fetch_and_parse(url)
69
- response = connection(url).get
70
- unless response.success?
71
- raise FetchFailure, "Fetch failed - #{response.status}"
67
+ def fetch_and_parse(url)
68
+ response = connection(url).get
69
+ unless response.success?
70
+ raise FetchFailure, "Fetch failed - #{response.status}"
71
+ end
72
+ feed = parse response.body
73
+ feed.feed_url = url
74
+ feed.etag = response.headers['etag'].to_s.delete '"'
75
+
76
+ feed.last_modified = parse_last_modified(response)
77
+ feed
72
78
  end
73
- feed = parse response.body
74
- feed.feed_url = url
75
- feed.etag = response.headers['etag'].to_s.delete '"'
76
79
 
77
- feed.last_modified = parse_last_modified(response)
78
- feed
79
- end
80
+ # rubocop:disable LineLength
81
+ def connection(url)
82
+ Faraday.new(url: url, headers: headers, request: request_options) do |conn|
83
+ conn.use FaradayMiddleware::FollowRedirects, limit: Feedjira.follow_redirect_limit
84
+ conn.adapter :net_http
85
+ end
86
+ end
87
+ # rubocop:enable LineLength
88
+
89
+ private
80
90
 
81
- def self.connection(url)
82
- Faraday.new(url: url) do |conn|
83
- conn.use FaradayMiddleware::FollowRedirects, limit: 3
84
- conn.adapter :net_http
91
+ def headers
92
+ {
93
+ user_agent: Feedjira.user_agent
94
+ }
95
+ end
96
+
97
+ def request_options
98
+ {
99
+ timeout: Feedjira.request_timeout
100
+ }
85
101
  end
86
- end
87
102
 
88
- def self.parse_last_modified(response)
89
- DateTime.parse(response.headers['last-modified']).to_time
90
- rescue
91
- nil
103
+ def parse_last_modified(response)
104
+ lm = response.headers['last-modified']
105
+ DateTime.parse(lm).to_time
106
+ rescue StandardError => e
107
+ Feedjira.logger.warn { "Failed to parse last modified '#{lm}'" }
108
+ Feedjira.logger.debug(e)
109
+ nil
110
+ end
92
111
  end
93
- private_class_method :parse_last_modified
94
112
  end
95
113
  end
@@ -1,3 +1,5 @@
1
+ # rubocop:disable Style/Documentation
2
+ # rubocop:disable Style/DocumentationMethod
1
3
  module Feedjira
2
4
  module FeedEntryUtilities
3
5
  include Enumerable
@@ -9,8 +11,9 @@ module Feedjira
9
11
 
10
12
  def parse_datetime(string)
11
13
  DateTime.parse(string).feed_utils_to_gm_time
12
- rescue
13
- warn "Failed to parse date #{string.inspect}"
14
+ rescue StandardError => e
15
+ Feedjira.logger.warn { "Failed to parse date #{string.inspect}" }
16
+ Feedjira.logger.warn(e)
14
17
  nil
15
18
  end
16
19
 
@@ -1,3 +1,5 @@
1
+ # rubocop:disable Style/Documentation
2
+ # rubocop:disable Style/DocumentationMethod
1
3
  module Feedjira
2
4
  module FeedUtilities
3
5
  UPDATABLE_ATTRIBUTES = %w(title feed_url url last_modified etag).freeze
@@ -11,7 +13,7 @@ module Feedjira
11
13
 
12
14
  module ClassMethods
13
15
  def parse(xml, &block)
14
- xml = xml.lstrip
16
+ xml = strip_whitespace(xml)
15
17
  xml = preprocess(xml) if preprocess_xml
16
18
  super xml, &block
17
19
  end
@@ -28,6 +30,14 @@ module Feedjira
28
30
  def preprocess_xml
29
31
  @preprocess_xml
30
32
  end
33
+
34
+ def strip_whitespace(xml)
35
+ if Feedjira.strip_whitespace
36
+ xml.strip
37
+ else
38
+ xml.lstrip
39
+ end
40
+ end
31
41
  end
32
42
 
33
43
  def last_modified
@@ -1 +1 @@
1
- module Feedjira::Parser; end
1
+ module Feedjira::Parser; end # rubocop:disable Style/Documentation
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Style/DocumentationMethod
1
2
  module Feedjira
2
3
  module Parser
3
4
  # Parser for dealing with Atom feeds.
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Style/DocumentationMethod
1
2
  module Feedjira
2
3
  module Parser
3
4
  # Parser for dealing with Atom feed entries.
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Style/DocumentationMethod
1
2
  module Feedjira
2
3
  module Parser
3
4
  # Parser for dealing with Feedburner Atom feeds.
@@ -7,8 +8,12 @@ module Feedjira
7
8
 
8
9
  element :title
9
10
  element :subtitle, as: :description
10
- element :link, as: :url, value: :href, with: { type: 'text/html' }
11
- element :link, as: :feed_url, value: :href, with: { type: 'application/atom+xml' } # rubocop:disable Metrics/LineLength
11
+ element :link, as: :url_text_html, value: :href,
12
+ with: { type: 'text/html' }
13
+ element :link, as: :url_notype, value: :href, with: { type: nil }
14
+ element :link, as: :feed_url_link, value: :href, with: { type: 'application/atom+xml' } # rubocop:disable Metrics/LineLength
15
+ element :"atom10:link", as: :feed_url_atom10_link, value: :href,
16
+ with: { type: 'application/atom+xml' }
12
17
  elements :"atom10:link", as: :hubs, value: :href, with: { rel: 'hub' }
13
18
  elements :entry, as: :entries, class: AtomFeedBurnerEntry
14
19
 
@@ -16,6 +21,18 @@ module Feedjira
16
21
  ((/Atom/ =~ xml) && (/feedburner/ =~ xml) && !(/\<rss|\<rdf/ =~ xml)) || false # rubocop:disable Metrics/LineLength
17
22
  end
18
23
 
24
+ # Feed url is <link> with type="text/html" if present,
25
+ # <link> with no type attribute otherwise
26
+ def url
27
+ @url_text_html || url_notype
28
+ end
29
+
30
+ # Feed feed_url is <link> with type="application/atom+xml" if present,
31
+ # <atom10:link> with type="application/atom+xml" otherwise
32
+ def feed_url
33
+ @feed_url_link || feed_url_atom10_link
34
+ end
35
+
19
36
  def self.preprocess(xml)
20
37
  Preprocessor.new(xml).to_xml
21
38
  end
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Style/DocumentationMethod
1
2
  module Feedjira
2
3
  module Parser
3
4
  # Parser for dealing with Feedburner Atom feed entries.
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Style/DocumentationMethod
1
2
  module Feedjira
2
3
  module Parser
3
4
  # Parser for dealing with RSS feeds.
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Style/Documentation
1
2
  module Feedjira
2
3
  module Parser
3
4
  class AtomYoutubeEntry
@@ -1,5 +1,6 @@
1
1
  require File.expand_path('./atom', File.dirname(__FILE__))
2
-
2
+ # rubocop:disable Style/Documentation
3
+ # rubocop:disable Style/DocumentationMethod
3
4
  module Feedjira
4
5
  module Parser
5
6
  class GoogleDocsAtom
@@ -1,3 +1,5 @@
1
+ # rubocop:disable Style/Documentation
2
+ # rubocop:disable Style/DocumentationMethod
1
3
  module Feedjira
2
4
  module Parser
3
5
  class GoogleDocsAtomEntry
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Style/DocumentationMethod
1
2
  module Feedjira
2
3
  module Parser
3
4
  # iTunes is RSS 2.0 + some apple extensions
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Style/DocumentationMethod
1
2
  module Feedjira
2
3
  module Parser
3
4
  # iTunes extensions to the standard RSS2.0 item
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Style/Documentation
1
2
  module Feedjira
2
3
  module Parser
3
4
  class ITunesRSSOwner
@@ -1,3 +1,5 @@
1
+ # rubocop:disable Style/Documentation
2
+ # rubocop:disable Style/DocumentationMethod
1
3
  module Feedjira
2
4
  module Parser
3
5
  class PodloveChapter
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Style/DocumentationMethod
1
2
  module Feedjira
2
3
  module Parser
3
4
  # Parser for dealing with RSS feeds.
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Style/DocumentationMethod
1
2
  module Feedjira
2
3
  module Parser
3
4
  # Parser for dealing with RSS feeds.
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Style/DocumentationMethod
1
2
  module Feedjira
2
3
  module Parser
3
4
  # Parser for dealing with RDF feed entries.