feedjira 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.rubocop.yml +8 -0
  4. data/.travis.yml +31 -12
  5. data/CHANGELOG.md +15 -1
  6. data/Dangerfile +1 -0
  7. data/Gemfile +2 -1
  8. data/Rakefile +6 -1
  9. data/feedjira.gemspec +16 -14
  10. data/fixtures/vcr_cassettes/fetch_failure.yml +62 -0
  11. data/fixtures/vcr_cassettes/parse_error.yml +222 -0
  12. data/fixtures/vcr_cassettes/success.yml +281 -0
  13. data/lib/feedjira.rb +9 -0
  14. data/lib/feedjira/core_ext.rb +3 -3
  15. data/lib/feedjira/core_ext/date.rb +2 -1
  16. data/lib/feedjira/core_ext/string.rb +1 -1
  17. data/lib/feedjira/core_ext/time.rb +19 -16
  18. data/lib/feedjira/date_time_utilities.rb +24 -0
  19. data/lib/feedjira/date_time_utilities/date_time_language_parser.rb +22 -0
  20. data/lib/feedjira/date_time_utilities/date_time_pattern_parser.rb +29 -0
  21. data/lib/feedjira/feed.rb +27 -18
  22. data/lib/feedjira/feed_entry_utilities.rb +15 -17
  23. data/lib/feedjira/feed_utilities.rb +26 -21
  24. data/lib/feedjira/parser/atom.rb +9 -8
  25. data/lib/feedjira/parser/atom_entry.rb +10 -13
  26. data/lib/feedjira/parser/atom_feed_burner.rb +8 -10
  27. data/lib/feedjira/parser/atom_feed_burner_entry.rb +11 -14
  28. data/lib/feedjira/parser/atom_youtube.rb +20 -0
  29. data/lib/feedjira/parser/atom_youtube_entry.rb +29 -0
  30. data/lib/feedjira/parser/google_docs_atom.rb +6 -6
  31. data/lib/feedjira/parser/google_docs_atom_entry.rb +11 -11
  32. data/lib/feedjira/parser/itunes_rss.rb +39 -22
  33. data/lib/feedjira/parser/itunes_rss_category.rb +38 -0
  34. data/lib/feedjira/parser/itunes_rss_item.rb +28 -20
  35. data/lib/feedjira/parser/itunes_rss_owner.rb +3 -4
  36. data/lib/feedjira/parser/podlove_chapter.rb +20 -0
  37. data/lib/feedjira/parser/rss.rb +10 -8
  38. data/lib/feedjira/parser/rss_entry.rb +17 -21
  39. data/lib/feedjira/parser/rss_feed_burner.rb +4 -6
  40. data/lib/feedjira/parser/rss_feed_burner_entry.rb +23 -28
  41. data/lib/feedjira/parser/rss_image.rb +15 -0
  42. data/lib/feedjira/preprocessor.rb +2 -2
  43. data/lib/feedjira/version.rb +1 -1
  44. data/spec/feedjira/date_time_utilities_spec.rb +41 -0
  45. data/spec/feedjira/feed_entry_utilities_spec.rb +23 -19
  46. data/spec/feedjira/feed_spec.rb +109 -74
  47. data/spec/feedjira/feed_utilities_spec.rb +65 -63
  48. data/spec/feedjira/parser/atom_entry_spec.rb +54 -34
  49. data/spec/feedjira/parser/atom_feed_burner_entry_spec.rb +27 -20
  50. data/spec/feedjira/parser/atom_feed_burner_spec.rb +32 -30
  51. data/spec/feedjira/parser/atom_spec.rb +50 -48
  52. data/spec/feedjira/parser/atom_youtube_entry_spec.rb +86 -0
  53. data/spec/feedjira/parser/atom_youtube_spec.rb +43 -0
  54. data/spec/feedjira/parser/google_docs_atom_entry_spec.rb +5 -4
  55. data/spec/feedjira/parser/google_docs_atom_spec.rb +6 -6
  56. data/spec/feedjira/parser/itunes_rss_item_spec.rb +33 -29
  57. data/spec/feedjira/parser/itunes_rss_owner_spec.rb +10 -9
  58. data/spec/feedjira/parser/itunes_rss_spec.rb +83 -30
  59. data/spec/feedjira/parser/podlove_chapter_spec.rb +37 -0
  60. data/spec/feedjira/parser/rss_entry_spec.rb +50 -33
  61. data/spec/feedjira/parser/rss_feed_burner_entry_spec.rb +55 -33
  62. data/spec/feedjira/parser/rss_feed_burner_spec.rb +31 -26
  63. data/spec/feedjira/parser/rss_spec.rb +56 -24
  64. data/spec/feedjira/preprocessor_spec.rb +11 -3
  65. data/spec/sample_feeds.rb +29 -21
  66. data/spec/sample_feeds/AmazonWebServicesBlog.xml +797 -797
  67. data/spec/sample_feeds/AtomEscapedHTMLInPreTag.xml +13 -0
  68. data/spec/sample_feeds/CRE.xml +5849 -0
  69. data/spec/sample_feeds/FeedBurnerXHTML.xml +400 -400
  70. data/spec/sample_feeds/ITunesWithSingleQuotedAttributes.xml +67 -0
  71. data/spec/sample_feeds/PaulDixExplainsNothing.xml +175 -175
  72. data/spec/sample_feeds/PaulDixExplainsNothingAlternate.xml +175 -175
  73. data/spec/sample_feeds/PaulDixExplainsNothingFirstEntryContent.xml +16 -16
  74. data/spec/sample_feeds/PaulDixExplainsNothingWFW.xml +174 -174
  75. data/spec/sample_feeds/TenderLovemaking.xml +12 -2
  76. data/spec/sample_feeds/TrotterCashionHome.xml +611 -611
  77. data/spec/sample_feeds/TypePadNews.xml +368 -368
  78. data/spec/sample_feeds/itunes.xml +18 -2
  79. data/spec/sample_feeds/pet_atom.xml +229 -229
  80. data/spec/sample_feeds/youtube_atom.xml +395 -0
  81. data/spec/spec_helper.rb +6 -0
  82. metadata +112 -27
@@ -5,16 +5,14 @@ module Feedjira
5
5
  end
6
6
 
7
7
  def self.parse(xml, &block)
8
- if parser = determine_feed_parser_for_xml(xml)
9
- parse_with parser, xml, &block
10
- else
11
- raise NoParserAvailable.new("No valid parser for XML.")
12
- end
8
+ parser = determine_feed_parser_for_xml(xml)
9
+ raise NoParserAvailable, 'No valid parser for XML.' unless parser
10
+ parse_with parser, xml, &block
13
11
  end
14
12
 
15
13
  def self.determine_feed_parser_for_xml(xml)
16
14
  start_of_doc = xml.slice(0, 2000)
17
- feed_classes.detect {|klass| klass.able_to_parse?(start_of_doc)}
15
+ feed_classes.detect { |klass| klass.able_to_parse?(start_of_doc) }
18
16
  end
19
17
 
20
18
  def self.add_feed_class(klass)
@@ -25,6 +23,7 @@ module Feedjira
25
23
  @feed_classes ||= [
26
24
  Feedjira::Parser::RSSFeedBurner,
27
25
  Feedjira::Parser::GoogleDocsAtom,
26
+ Feedjira::Parser::AtomYoutube,
28
27
  Feedjira::Parser::AtomFeedBurner,
29
28
  Feedjira::Parser::Atom,
30
29
  Feedjira::Parser::ITunesRSS,
@@ -53,10 +52,14 @@ module Feedjira
53
52
  end
54
53
 
55
54
  def self.call_on_each_feed_entry(method, *parameters)
56
- feed_classes.each do |k|
57
- k.sax_config.collection_elements.each_value do |vl|
58
- vl.find_all{|v| (v.accessor == 'entries') && (v.data_class.class == Class)}.each do |v|
59
- v.data_class.send(method, *parameters)
55
+ feed_classes.each do |klass|
56
+ klass.sax_config.collection_elements.each_value do |value|
57
+ collection_configs = value.select do |v|
58
+ v.accessor == 'entries' && v.data_class.class == Class
59
+ end
60
+
61
+ collection_configs.each do |config|
62
+ config.data_class.send(method, *parameters)
60
63
  end
61
64
  end
62
65
  end
@@ -64,15 +67,14 @@ module Feedjira
64
67
 
65
68
  def self.fetch_and_parse(url)
66
69
  response = connection(url).get
67
- raise FetchFailure.new("Fetch failed - #{response.status}") unless response.success?
68
- xml = response.body
69
- parser_klass = determine_feed_parser_for_xml xml
70
- raise NoParserAvailable.new("No valid parser for XML.") unless parser_klass
71
-
72
- feed = parse_with parser_klass, xml
70
+ unless response.success?
71
+ raise FetchFailure, "Fetch failed - #{response.status}"
72
+ end
73
+ feed = parse response.body
73
74
  feed.feed_url = url
74
- feed.etag = response.headers['etag'].to_s.gsub(/"/, '')
75
- feed.last_modified = response.headers['last-modified']
75
+ feed.etag = response.headers['etag'].to_s.delete '"'
76
+
77
+ feed.last_modified = parse_last_modified(response)
76
78
  feed
77
79
  end
78
80
 
@@ -82,5 +84,12 @@ module Feedjira
82
84
  conn.adapter :net_http
83
85
  end
84
86
  end
87
+
88
+ def self.parse_last_modified(response)
89
+ DateTime.parse(response.headers['last-modified']).to_time
90
+ rescue
91
+ nil
92
+ end
93
+ private_class_method :parse_last_modified
85
94
  end
86
95
  end
@@ -1,23 +1,22 @@
1
1
  module Feedjira
2
2
  module FeedEntryUtilities
3
-
4
3
  include Enumerable
4
+ include DateTimeUtilities
5
5
 
6
6
  def published
7
7
  @published ||= @updated
8
8
  end
9
9
 
10
10
  def parse_datetime(string)
11
- begin
12
- DateTime.parse(string).feed_utils_to_gm_time
13
- rescue
14
- warn "Failed to parse date #{string.inspect}"
15
- nil
16
- end
11
+ DateTime.parse(string).feed_utils_to_gm_time
12
+ rescue
13
+ warn "Failed to parse date #{string.inspect}"
14
+ nil
17
15
  end
18
16
 
19
17
  ##
20
- # Returns the id of the entry or its url if not id is present, as some formats don't support it
18
+ # Returns the id of the entry or its url if not id is present, as some
19
+ # formats don't support it
21
20
  def id
22
21
  @entry_id ||= @url
23
22
  end
@@ -37,30 +36,29 @@ module Feedjira
37
36
  end
38
37
 
39
38
  def sanitize!
40
- %w[title author summary content image].each do |name|
41
- if self.respond_to?(name) && self.send(name).respond_to?(:sanitize!)
42
- self.send(name).send :sanitize!
39
+ %w(title author summary content image).each do |name|
40
+ if respond_to?(name) && send(name).respond_to?(:sanitize!)
41
+ send(name).send :sanitize!
43
42
  end
44
43
  end
45
44
  end
46
45
 
47
- alias_method :last_modified, :published
46
+ alias last_modified published
48
47
 
49
48
  def each
50
- @rss_fields ||= self.instance_variables
49
+ @rss_fields ||= instance_variables
51
50
 
52
51
  @rss_fields.each do |field|
53
- yield(field.to_s.sub('@', ''), self.instance_variable_get(field))
52
+ yield(field.to_s.sub('@', ''), instance_variable_get(field))
54
53
  end
55
54
  end
56
55
 
57
56
  def [](field)
58
- self.instance_variable_get("@#{field.to_s}")
57
+ instance_variable_get("@#{field}")
59
58
  end
60
59
 
61
60
  def []=(field, value)
62
- self.instance_variable_set("@#{field.to_s}", value)
61
+ instance_variable_set("@#{field}", value)
63
62
  end
64
-
65
63
  end
66
64
  end
@@ -1,6 +1,6 @@
1
1
  module Feedjira
2
2
  module FeedUtilities
3
- UPDATABLE_ATTRIBUTES = %w(title feed_url url last_modified etag)
3
+ UPDATABLE_ATTRIBUTES = %w(title feed_url url last_modified etag).freeze
4
4
 
5
5
  attr_writer :new_entries, :updated, :last_modified
6
6
  attr_accessor :etag
@@ -32,7 +32,8 @@ module Feedjira
32
32
 
33
33
  def last_modified
34
34
  @last_modified ||= begin
35
- entry = entries.reject {|e| e.published.nil? }.sort_by { |entry| entry.published if entry.published }.last
35
+ published = entries.reject { |e| e.published.nil? }
36
+ entry = published.sort_by { |e| e.published if e.published }.last
36
37
  entry ? entry.published : nil
37
38
  end
38
39
  end
@@ -45,13 +46,13 @@ module Feedjira
45
46
  @new_entries ||= []
46
47
  end
47
48
 
48
- def has_new_entries?
49
- new_entries.size > 0
49
+ def new_entries?
50
+ !new_entries.empty?
50
51
  end
51
52
 
52
53
  def update_from_feed(feed)
53
54
  self.new_entries += find_new_entries_for(feed)
54
- self.entries.unshift(*self.new_entries)
55
+ entries.unshift(*self.new_entries)
55
56
 
56
57
  @updated = false
57
58
 
@@ -61,7 +62,8 @@ module Feedjira
61
62
  end
62
63
 
63
64
  def update_attribute(feed, name)
64
- old_value, new_value = send(name), feed.send(name)
65
+ old_value = send(name)
66
+ new_value = feed.send(name)
65
67
 
66
68
  if old_value != new_value
67
69
  send("#{name}=", new_value)
@@ -72,33 +74,36 @@ module Feedjira
72
74
  end
73
75
 
74
76
  def sanitize_entries!
75
- entries.each {|entry| entry.sanitize!}
77
+ entries.each(&:sanitize!)
76
78
  end
77
79
 
78
80
  private
79
81
 
82
+ # This implementation is a hack, which is why it's so ugly. It's to get
83
+ # around the fact that not all feeds have a published date. However,
84
+ # they're always ordered with the newest one first. So we go through the
85
+ # entries just parsed and insert each one as a new entry until we get to
86
+ # one that has the same id as the the newest for the feed.
80
87
  def find_new_entries_for(feed)
81
- # this implementation is a hack, which is why it's so ugly.
82
- # it's to get around the fact that not all feeds have a published date.
83
- # however, they're always ordered with the newest one first.
84
- # So we go through the entries just parsed and insert each one as a new entry
85
- # until we get to one that has the same id as the the newest for the feed
86
- return feed.entries if self.entries.length == 0
87
- latest_entry = self.entries.first
88
+ return feed.entries if entries.length.zero?
89
+
90
+ latest_entry = entries.first
88
91
  found_new_entries = []
92
+
89
93
  feed.entries.each do |entry|
90
- if entry.entry_id.nil? && latest_entry.entry_id.nil?
91
- break if entry.url == latest_entry.url
92
- else
93
- break if entry.entry_id == latest_entry.entry_id || entry.url == latest_entry.url
94
- end
94
+ break unless new_entry?(entry, latest_entry)
95
95
  found_new_entries << entry
96
96
  end
97
+
97
98
  found_new_entries
98
99
  end
99
100
 
100
- def existing_entry?(test_entry)
101
- entries.any? { |entry| entry.id == test_entry.id }
101
+ def new_entry?(entry, latest)
102
+ nil_ids = entry.entry_id.nil? && latest.entry_id.nil?
103
+ new_id = entry.entry_id != latest.entry_id
104
+ new_url = entry.url != latest.url
105
+
106
+ (nil_ids || new_id) && new_url
102
107
  end
103
108
  end
104
109
  end
@@ -4,16 +4,17 @@ module Feedjira
4
4
  class Atom
5
5
  include SAXMachine
6
6
  include FeedUtilities
7
+
7
8
  element :title
8
- element :subtitle, :as => :description
9
- element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
10
- element :link, :as => :feed_url, :value => :href, :with => {:rel => "self"}
11
- elements :link, :as => :links, :value => :href
12
- elements :link, :as => :hubs, :value => :href, :with => {:rel => "hub"}
13
- elements :entry, :as => :entries, :class => AtomEntry
9
+ element :subtitle, as: :description
10
+ element :link, as: :url, value: :href, with: { type: 'text/html' }
11
+ element :link, as: :feed_url, value: :href, with: { rel: 'self' }
12
+ elements :link, as: :links, value: :href
13
+ elements :link, as: :hubs, value: :href, with: { rel: 'hub' }
14
+ elements :entry, as: :entries, class: AtomEntry
14
15
 
15
- def self.able_to_parse?(xml) #:nodoc:
16
- /\<feed[^\>]+xmlns\s?=\s?[\"|\'](http:\/\/www\.w3\.org\/2005\/Atom|http:\/\/purl\.org\/atom\/ns\#)[\"|\'][^\>]*\>/ =~ xml
16
+ def self.able_to_parse?(xml)
17
+ %r{\<feed[^\>]+xmlns\s?=\s?[\"\'](http://www\.w3\.org/2005/Atom|http://purl\.org/atom/ns\#)[\"\'][^\>]*\>} =~ xml # rubocop:disable Metrics/LineLength
17
18
  end
18
19
 
19
20
  def url
@@ -1,5 +1,4 @@
1
1
  module Feedjira
2
-
3
2
  module Parser
4
3
  # Parser for dealing with Atom feed entries.
5
4
  class AtomEntry
@@ -7,28 +6,26 @@ module Feedjira
7
6
  include FeedEntryUtilities
8
7
 
9
8
  element :title
10
- element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
11
- element :name, :as => :author
9
+ element :link, as: :url, value: :href, with: { type: 'text/html', rel: 'alternate' } # rubocop:disable Metrics/LineLength
10
+ element :name, as: :author
12
11
  element :content
13
12
  element :summary
14
13
 
15
- element :"media:content", :as => :image, :value => :url
16
- element :enclosure, :as => :image, :value => :href
14
+ element :"media:content", as: :image, value: :url
15
+ element :enclosure, as: :image, value: :href
17
16
 
18
17
  element :published
19
- element :id, :as => :entry_id
20
- element :created, :as => :published
21
- element :issued, :as => :published
18
+ element :id, as: :entry_id
19
+ element :created, as: :published
20
+ element :issued, as: :published
22
21
  element :updated
23
- element :modified, :as => :updated
24
- elements :category, :as => :categories, :value => :term
25
- elements :link, :as => :links, :value => :href
22
+ element :modified, as: :updated
23
+ elements :category, as: :categories, value: :term
24
+ elements :link, as: :links, value: :href
26
25
 
27
26
  def url
28
27
  @url ||= links.first
29
28
  end
30
29
  end
31
-
32
30
  end
33
-
34
31
  end
@@ -1,26 +1,24 @@
1
1
  module Feedjira
2
-
3
2
  module Parser
4
3
  # Parser for dealing with Feedburner Atom feeds.
5
4
  class AtomFeedBurner
6
5
  include SAXMachine
7
6
  include FeedUtilities
7
+
8
8
  element :title
9
- element :subtitle, :as => :description
10
- element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
11
- element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
12
- elements :"atom10:link", :as => :hubs, :value => :href, :with => {:rel => "hub"}
13
- elements :entry, :as => :entries, :class => AtomFeedBurnerEntry
9
+ element :subtitle, as: :description
10
+ element :link, as: :url, value: :href, with: { type: 'text/html' }
11
+ element :link, as: :feed_url, value: :href, with: { type: 'application/atom+xml' } # rubocop:disable Metrics/LineLength
12
+ elements :"atom10:link", as: :hubs, value: :href, with: { rel: 'hub' }
13
+ elements :entry, as: :entries, class: AtomFeedBurnerEntry
14
14
 
15
- def self.able_to_parse?(xml) #:nodoc:
16
- ((/Atom/ =~ xml) && (/feedburner/ =~ xml) && !(/\<rss|\<rdf/ =~ xml)) || false
15
+ def self.able_to_parse?(xml)
16
+ ((/Atom/ =~ xml) && (/feedburner/ =~ xml) && !(/\<rss|\<rdf/ =~ xml)) || false # rubocop:disable Metrics/LineLength
17
17
  end
18
18
 
19
19
  def self.preprocess(xml)
20
20
  Preprocessor.new(xml).to_xml
21
21
  end
22
22
  end
23
-
24
23
  end
25
-
26
24
  end
@@ -1,5 +1,4 @@
1
1
  module Feedjira
2
-
3
2
  module Parser
4
3
  # Parser for dealing with Feedburner Atom feed entries.
5
4
  class AtomFeedBurnerEntry
@@ -7,29 +6,27 @@ module Feedjira
7
6
  include FeedEntryUtilities
8
7
 
9
8
  element :title
10
- element :name, :as => :author
11
- element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
12
- element :"feedburner:origLink", :as => :url
9
+ element :name, as: :author
10
+ element :link, as: :url, value: :href, with: { type: 'text/html', rel: 'alternate' } # rubocop:disable Metrics/LineLength
11
+ element :"feedburner:origLink", as: :url
13
12
  element :summary
14
13
  element :content
15
14
 
16
- element :"media:content", :as => :image, :value => :url
17
- element :enclosure, :as => :image, :value => :href
15
+ element :"media:content", as: :image, value: :url
16
+ element :enclosure, as: :image, value: :href
18
17
 
19
18
  element :published
20
- element :id, :as => :entry_id
21
- element :issued, :as => :published
22
- element :created, :as => :published
19
+ element :id, as: :entry_id
20
+ element :issued, as: :published
21
+ element :created, as: :published
23
22
  element :updated
24
- element :modified, :as => :updated
25
- elements :category, :as => :categories, :value => :term
26
- elements :link, :as => :links, :value => :href
23
+ element :modified, as: :updated
24
+ elements :category, as: :categories, value: :term
25
+ elements :link, as: :links, value: :href
27
26
 
28
27
  def url
29
28
  @url ||= links.first
30
29
  end
31
-
32
30
  end
33
31
  end
34
-
35
32
  end
@@ -0,0 +1,20 @@
1
+ module Feedjira
2
+ module Parser
3
+ # Parser for dealing with RSS feeds.
4
+ class AtomYoutube
5
+ include SAXMachine
6
+ include FeedUtilities
7
+ element :title
8
+ element :link, as: :url, value: :href, with: { rel: 'alternate' }
9
+ element :link, as: :feed_url, value: :href, with: { rel: 'self' }
10
+ element :name, as: :author
11
+ element :"yt:channelId", as: :youtube_channel_id
12
+
13
+ elements :entry, as: :entries, class: AtomYoutubeEntry
14
+
15
+ def self.able_to_parse?(xml) #:nodoc:
16
+ %r{xmlns:yt="http://www.youtube.com/xml/schemas/2015"} =~ xml
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,29 @@
1
+ module Feedjira
2
+ module Parser
3
+ class AtomYoutubeEntry
4
+ include SAXMachine
5
+ include FeedEntryUtilities
6
+
7
+ element :title
8
+ element :link, as: :url, value: :href, with: { rel: 'alternate' }
9
+ element :name, as: :author
10
+ element :"media:description", as: :content
11
+ element :summary
12
+ element :published
13
+ element :id, as: :entry_id
14
+ element :updated
15
+ element :"yt:videoId", as: :youtube_video_id
16
+ element :"media:title", as: :media_title
17
+ element :"media:content", as: :media_url, value: :url
18
+ element :"media:content", as: :media_type, value: :type
19
+ element :"media:content", as: :media_width, value: :width
20
+ element :"media:content", as: :media_height, value: :height
21
+ element :"media:thumbnail", as: :media_thumbnail_url, value: :url
22
+ element :"media:thumbnail", as: :media_thumbnail_width, value: :width
23
+ element :"media:thumbnail", as: :media_thumbnail_height, value: :height
24
+ element :"media:starRating", as: :media_star_count, value: :count
25
+ element :"media:starRating", as: :media_star_average, value: :average
26
+ element :"media:statistics", as: :media_views, value: :views
27
+ end
28
+ end
29
+ end