feedjira 2.1.0 → 2.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.rubocop.yml +9 -2
  4. data/CHANGELOG.md +4 -0
  5. data/LICENSE +1 -1
  6. data/README.md +210 -7
  7. data/Rakefile +5 -0
  8. data/feedjira.gemspec +2 -1
  9. data/lib/feedjira.rb +7 -1
  10. data/lib/feedjira/configuration.rb +76 -0
  11. data/lib/feedjira/core_ext/date.rb +1 -0
  12. data/lib/feedjira/core_ext/string.rb +1 -0
  13. data/lib/feedjira/core_ext/time.rb +5 -1
  14. data/lib/feedjira/date_time_utilities.rb +11 -3
  15. data/lib/feedjira/date_time_utilities/date_time_epoch_parser.rb +13 -0
  16. data/lib/feedjira/date_time_utilities/date_time_language_parser.rb +2 -0
  17. data/lib/feedjira/date_time_utilities/date_time_pattern_parser.rb +6 -1
  18. data/lib/feedjira/feed.rb +87 -69
  19. data/lib/feedjira/feed_entry_utilities.rb +5 -2
  20. data/lib/feedjira/feed_utilities.rb +11 -1
  21. data/lib/feedjira/parser.rb +1 -1
  22. data/lib/feedjira/parser/atom.rb +1 -0
  23. data/lib/feedjira/parser/atom_entry.rb +1 -0
  24. data/lib/feedjira/parser/atom_feed_burner.rb +19 -2
  25. data/lib/feedjira/parser/atom_feed_burner_entry.rb +1 -0
  26. data/lib/feedjira/parser/atom_youtube.rb +1 -0
  27. data/lib/feedjira/parser/atom_youtube_entry.rb +1 -0
  28. data/lib/feedjira/parser/google_docs_atom.rb +2 -1
  29. data/lib/feedjira/parser/google_docs_atom_entry.rb +2 -0
  30. data/lib/feedjira/parser/itunes_rss.rb +1 -0
  31. data/lib/feedjira/parser/itunes_rss_category.rb +1 -0
  32. data/lib/feedjira/parser/itunes_rss_owner.rb +1 -0
  33. data/lib/feedjira/parser/podlove_chapter.rb +2 -0
  34. data/lib/feedjira/parser/rss.rb +1 -0
  35. data/lib/feedjira/parser/rss_feed_burner.rb +1 -0
  36. data/lib/feedjira/parser/rss_feed_burner_entry.rb +1 -0
  37. data/lib/feedjira/preprocessor.rb +2 -0
  38. data/lib/feedjira/version.rb +1 -1
  39. data/spec/feedjira/configuration_spec.rb +25 -0
  40. data/spec/feedjira/date_time_utilities_spec.rb +6 -0
  41. data/spec/feedjira/feed_spec.rb +20 -2
  42. data/spec/feedjira/feed_utilities_spec.rb +18 -0
  43. data/spec/feedjira/parser/atom_feed_burner_spec.rb +32 -1
  44. data/spec/sample_feeds.rb +1 -0
  45. data/spec/sample_feeds/GiantRobotsSmashingIntoOtherGiantRobots.xml +682 -0
  46. metadata +49 -29
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Style/Documentation
1
2
  module Feedjira
2
3
  module DateTimeUtilities
3
4
  # This is our date parsing heuristic.
@@ -5,20 +6,27 @@ module Feedjira
5
6
  DATE_PARSERS = [
6
7
  DateTimePatternParser,
7
8
  DateTimeLanguageParser,
9
+ DateTimeEpochParser,
8
10
  DateTime
9
11
  ].freeze
10
12
 
11
13
  # Parse the given string starting with the most common parser (default ruby)
12
14
  # and going over all other available parsers
15
+ # rubocop:disable Metrics/MethodLength
13
16
  def parse_datetime(string)
14
- DATE_PARSERS.find do |parser|
17
+ res = DATE_PARSERS.find do |parser|
15
18
  begin
16
19
  return parser.parse(string).feed_utils_to_gm_time
17
- rescue
20
+ rescue StandardError => e
21
+ Feedjira.logger.debug { "Failed to parse date #{string}" }
22
+ Feedjira.logger.debug(e)
18
23
  nil
19
24
  end
20
25
  end
21
- warn "Failed to parse date #{string.inspect}"
26
+
27
+ Feedjira.logger.warn { "Failed to parse date #{string}" } if res.nil?
28
+
29
+ res
22
30
  end
23
31
  end
24
32
  end
@@ -0,0 +1,13 @@
1
+ # rubocop:disable Style/Documentation
2
+ # rubocop:disable Style/DocumentationMethod
3
+ module Feedjira
4
+ module DateTimeUtilities
5
+ class DateTimeEpochParser
6
+ def self.parse(string)
7
+ epoch_time = string.to_i
8
+ return Time.at(epoch_time).to_datetime if epoch_time.to_s == string
9
+ raise "#{string} is not a valid epoch time"
10
+ end
11
+ end
12
+ end
13
+ end
@@ -1,3 +1,5 @@
1
+ # rubocop:disable Style/Documentation
2
+ # rubocop:disable Style/DocumentationMethod
1
3
  module Feedjira
2
4
  module DateTimeUtilities
3
5
  class DateTimeLanguageParser
@@ -1,3 +1,5 @@
1
+ # rubocop:disable Style/Documentation
2
+ # rubocop:disable Style/DocumentationMethod
1
3
  module Feedjira
2
4
  module DateTimeUtilities
3
5
  class DateTimePatternParser
@@ -7,12 +9,15 @@ module Feedjira
7
9
  JAPANESE_SYMBOLS = %w(日 月 火 水 木 金 土).freeze
8
10
  PATTERNS = ['%m/%d/%Y %T %p', '%d %m %Y %T %Z'].freeze
9
11
 
12
+ # rubocop:disable Metrics/MethodLength
10
13
  def self.parse(string)
11
14
  PATTERNS.each do |p|
12
15
  begin
13
16
  datetime = DateTime.strptime(prepare(string), p)
14
17
  return datetime
15
- rescue
18
+ rescue StandardError => e
19
+ Feedjira.logger.debug("Failed to parse date #{string}")
20
+ Feedjira.logger.debug(e)
16
21
  nil
17
22
  end
18
23
  end
data/lib/feedjira/feed.rb CHANGED
@@ -1,95 +1,113 @@
1
+ # rubocop:disable Style/Documentation
2
+ # rubocop:disable Style/DocumentationMethod
1
3
  module Feedjira
2
4
  class Feed
3
- def self.parse_with(parser, xml, &block)
4
- parser.parse xml, &block
5
- end
5
+ class << self
6
+ def parse_with(parser, xml, &block)
7
+ parser.parse xml, &block
8
+ end
6
9
 
7
- def self.parse(xml, &block)
8
- parser = determine_feed_parser_for_xml(xml)
9
- raise NoParserAvailable, 'No valid parser for XML.' unless parser
10
- parse_with parser, xml, &block
11
- end
10
+ def parse(xml, &block)
11
+ parser = determine_feed_parser_for_xml(xml)
12
+ raise NoParserAvailable, 'No valid parser for XML.' unless parser
13
+ parse_with parser, xml, &block
14
+ end
12
15
 
13
- def self.determine_feed_parser_for_xml(xml)
14
- start_of_doc = xml.slice(0, 2000)
15
- feed_classes.detect { |klass| klass.able_to_parse?(start_of_doc) }
16
- end
16
+ def determine_feed_parser_for_xml(xml)
17
+ start_of_doc = xml.slice(0, 2000)
18
+ feed_classes.detect { |klass| klass.able_to_parse?(start_of_doc) }
19
+ end
17
20
 
18
- def self.add_feed_class(klass)
19
- feed_classes.unshift klass
20
- end
21
+ def add_feed_class(klass)
22
+ feed_classes.unshift klass
23
+ end
21
24
 
22
- def self.feed_classes
23
- @feed_classes ||= [
24
- Feedjira::Parser::RSSFeedBurner,
25
- Feedjira::Parser::GoogleDocsAtom,
26
- Feedjira::Parser::AtomYoutube,
27
- Feedjira::Parser::AtomFeedBurner,
28
- Feedjira::Parser::Atom,
29
- Feedjira::Parser::ITunesRSS,
30
- Feedjira::Parser::RSS
31
- ]
32
- end
25
+ def feed_classes
26
+ @feed_classes ||= Feedjira.parsers
27
+ end
33
28
 
34
- def self.add_common_feed_element(element_tag, options = {})
35
- feed_classes.each do |k|
36
- k.element element_tag, options
29
+ def reset_parsers!
30
+ @feed_classes = nil
37
31
  end
38
- end
39
32
 
40
- def self.add_common_feed_elements(element_tag, options = {})
41
- feed_classes.each do |k|
42
- k.elements element_tag, options
33
+ def add_common_feed_element(element_tag, options = {})
34
+ feed_classes.each do |k|
35
+ k.element element_tag, options
36
+ end
43
37
  end
44
- end
45
38
 
46
- def self.add_common_feed_entry_element(element_tag, options = {})
47
- call_on_each_feed_entry :element, element_tag, options
48
- end
39
+ def add_common_feed_elements(element_tag, options = {})
40
+ feed_classes.each do |k|
41
+ k.elements element_tag, options
42
+ end
43
+ end
49
44
 
50
- def self.add_common_feed_entry_elements(element_tag, options = {})
51
- call_on_each_feed_entry :elements, element_tag, options
52
- end
45
+ def add_common_feed_entry_element(element_tag, options = {})
46
+ call_on_each_feed_entry :element, element_tag, options
47
+ end
53
48
 
54
- def self.call_on_each_feed_entry(method, *parameters)
55
- feed_classes.each do |klass|
56
- klass.sax_config.collection_elements.each_value do |value|
57
- collection_configs = value.select do |v|
58
- v.accessor == 'entries' && v.data_class.class == Class
59
- end
49
+ def add_common_feed_entry_elements(element_tag, options = {})
50
+ call_on_each_feed_entry :elements, element_tag, options
51
+ end
60
52
 
61
- collection_configs.each do |config|
62
- config.data_class.send(method, *parameters)
53
+ def call_on_each_feed_entry(method, *parameters)
54
+ feed_classes.each do |klass|
55
+ klass.sax_config.collection_elements.each_value do |value|
56
+ collection_configs = value.select do |v|
57
+ v.accessor == 'entries' && v.data_class.class == Class
58
+ end
59
+
60
+ collection_configs.each do |config|
61
+ config.data_class.send(method, *parameters)
62
+ end
63
63
  end
64
64
  end
65
65
  end
66
- end
67
66
 
68
- def self.fetch_and_parse(url)
69
- response = connection(url).get
70
- unless response.success?
71
- raise FetchFailure, "Fetch failed - #{response.status}"
67
+ def fetch_and_parse(url)
68
+ response = connection(url).get
69
+ unless response.success?
70
+ raise FetchFailure, "Fetch failed - #{response.status}"
71
+ end
72
+ feed = parse response.body
73
+ feed.feed_url = url
74
+ feed.etag = response.headers['etag'].to_s.delete '"'
75
+
76
+ feed.last_modified = parse_last_modified(response)
77
+ feed
72
78
  end
73
- feed = parse response.body
74
- feed.feed_url = url
75
- feed.etag = response.headers['etag'].to_s.delete '"'
76
79
 
77
- feed.last_modified = parse_last_modified(response)
78
- feed
79
- end
80
+ # rubocop:disable LineLength
81
+ def connection(url)
82
+ Faraday.new(url: url, headers: headers, request: request_options) do |conn|
83
+ conn.use FaradayMiddleware::FollowRedirects, limit: Feedjira.follow_redirect_limit
84
+ conn.adapter :net_http
85
+ end
86
+ end
87
+ # rubocop:enable LineLength
88
+
89
+ private
80
90
 
81
- def self.connection(url)
82
- Faraday.new(url: url) do |conn|
83
- conn.use FaradayMiddleware::FollowRedirects, limit: 3
84
- conn.adapter :net_http
91
+ def headers
92
+ {
93
+ user_agent: Feedjira.user_agent
94
+ }
95
+ end
96
+
97
+ def request_options
98
+ {
99
+ timeout: Feedjira.request_timeout
100
+ }
85
101
  end
86
- end
87
102
 
88
- def self.parse_last_modified(response)
89
- DateTime.parse(response.headers['last-modified']).to_time
90
- rescue
91
- nil
103
+ def parse_last_modified(response)
104
+ lm = response.headers['last-modified']
105
+ DateTime.parse(lm).to_time
106
+ rescue StandardError => e
107
+ Feedjira.logger.warn { "Failed to parse last modified '#{lm}'" }
108
+ Feedjira.logger.debug(e)
109
+ nil
110
+ end
92
111
  end
93
- private_class_method :parse_last_modified
94
112
  end
95
113
  end
@@ -1,3 +1,5 @@
1
+ # rubocop:disable Style/Documentation
2
+ # rubocop:disable Style/DocumentationMethod
1
3
  module Feedjira
2
4
  module FeedEntryUtilities
3
5
  include Enumerable
@@ -9,8 +11,9 @@ module Feedjira
9
11
 
10
12
  def parse_datetime(string)
11
13
  DateTime.parse(string).feed_utils_to_gm_time
12
- rescue
13
- warn "Failed to parse date #{string.inspect}"
14
+ rescue StandardError => e
15
+ Feedjira.logger.warn { "Failed to parse date #{string.inspect}" }
16
+ Feedjira.logger.warn(e)
14
17
  nil
15
18
  end
16
19
 
@@ -1,3 +1,5 @@
1
+ # rubocop:disable Style/Documentation
2
+ # rubocop:disable Style/DocumentationMethod
1
3
  module Feedjira
2
4
  module FeedUtilities
3
5
  UPDATABLE_ATTRIBUTES = %w(title feed_url url last_modified etag).freeze
@@ -11,7 +13,7 @@ module Feedjira
11
13
 
12
14
  module ClassMethods
13
15
  def parse(xml, &block)
14
- xml = xml.lstrip
16
+ xml = strip_whitespace(xml)
15
17
  xml = preprocess(xml) if preprocess_xml
16
18
  super xml, &block
17
19
  end
@@ -28,6 +30,14 @@ module Feedjira
28
30
  def preprocess_xml
29
31
  @preprocess_xml
30
32
  end
33
+
34
+ def strip_whitespace(xml)
35
+ if Feedjira.strip_whitespace
36
+ xml.strip
37
+ else
38
+ xml.lstrip
39
+ end
40
+ end
31
41
  end
32
42
 
33
43
  def last_modified
@@ -1 +1 @@
1
- module Feedjira::Parser; end
1
+ module Feedjira::Parser; end # rubocop:disable Style/Documentation
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Style/DocumentationMethod
1
2
  module Feedjira
2
3
  module Parser
3
4
  # Parser for dealing with Atom feeds.
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Style/DocumentationMethod
1
2
  module Feedjira
2
3
  module Parser
3
4
  # Parser for dealing with Atom feed entries.
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Style/DocumentationMethod
1
2
  module Feedjira
2
3
  module Parser
3
4
  # Parser for dealing with Feedburner Atom feeds.
@@ -7,8 +8,12 @@ module Feedjira
7
8
 
8
9
  element :title
9
10
  element :subtitle, as: :description
10
- element :link, as: :url, value: :href, with: { type: 'text/html' }
11
- element :link, as: :feed_url, value: :href, with: { type: 'application/atom+xml' } # rubocop:disable Metrics/LineLength
11
+ element :link, as: :url_text_html, value: :href,
12
+ with: { type: 'text/html' }
13
+ element :link, as: :url_notype, value: :href, with: { type: nil }
14
+ element :link, as: :feed_url_link, value: :href, with: { type: 'application/atom+xml' } # rubocop:disable Metrics/LineLength
15
+ element :"atom10:link", as: :feed_url_atom10_link, value: :href,
16
+ with: { type: 'application/atom+xml' }
12
17
  elements :"atom10:link", as: :hubs, value: :href, with: { rel: 'hub' }
13
18
  elements :entry, as: :entries, class: AtomFeedBurnerEntry
14
19
 
@@ -16,6 +21,18 @@ module Feedjira
16
21
  ((/Atom/ =~ xml) && (/feedburner/ =~ xml) && !(/\<rss|\<rdf/ =~ xml)) || false # rubocop:disable Metrics/LineLength
17
22
  end
18
23
 
24
+ # Feed url is <link> with type="text/html" if present,
25
+ # <link> with no type attribute otherwise
26
+ def url
27
+ @url_text_html || url_notype
28
+ end
29
+
30
+ # Feed feed_url is <link> with type="application/atom+xml" if present,
31
+ # <atom10:link> with type="application/atom+xml" otherwise
32
+ def feed_url
33
+ @feed_url_link || feed_url_atom10_link
34
+ end
35
+
19
36
  def self.preprocess(xml)
20
37
  Preprocessor.new(xml).to_xml
21
38
  end
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Style/DocumentationMethod
1
2
  module Feedjira
2
3
  module Parser
3
4
  # Parser for dealing with Feedburner Atom feed entries.
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Style/DocumentationMethod
1
2
  module Feedjira
2
3
  module Parser
3
4
  # Parser for dealing with RSS feeds.
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Style/Documentation
1
2
  module Feedjira
2
3
  module Parser
3
4
  class AtomYoutubeEntry
@@ -1,5 +1,6 @@
1
1
  require File.expand_path('./atom', File.dirname(__FILE__))
2
-
2
+ # rubocop:disable Style/Documentation
3
+ # rubocop:disable Style/DocumentationMethod
3
4
  module Feedjira
4
5
  module Parser
5
6
  class GoogleDocsAtom
@@ -1,3 +1,5 @@
1
+ # rubocop:disable Style/Documentation
2
+ # rubocop:disable Style/DocumentationMethod
1
3
  module Feedjira
2
4
  module Parser
3
5
  class GoogleDocsAtomEntry
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Style/DocumentationMethod
1
2
  module Feedjira
2
3
  module Parser
3
4
  # iTunes is RSS 2.0 + some apple extensions
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Style/DocumentationMethod
1
2
  module Feedjira
2
3
  module Parser
3
4
  # iTunes extensions to the standard RSS2.0 item
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Style/Documentation
1
2
  module Feedjira
2
3
  module Parser
3
4
  class ITunesRSSOwner
@@ -1,3 +1,5 @@
1
+ # rubocop:disable Style/Documentation
2
+ # rubocop:disable Style/DocumentationMethod
1
3
  module Feedjira
2
4
  module Parser
3
5
  class PodloveChapter
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Style/DocumentationMethod
1
2
  module Feedjira
2
3
  module Parser
3
4
  # Parser for dealing with RSS feeds.
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Style/DocumentationMethod
1
2
  module Feedjira
2
3
  module Parser
3
4
  # Parser for dealing with RSS feeds.
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Style/DocumentationMethod
1
2
  module Feedjira
2
3
  module Parser
3
4
  # Parser for dealing with RDF feed entries.