feedjira 2.0.0 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.rubocop.yml +8 -0
  4. data/.travis.yml +31 -12
  5. data/CHANGELOG.md +15 -1
  6. data/Dangerfile +1 -0
  7. data/Gemfile +2 -1
  8. data/Rakefile +6 -1
  9. data/feedjira.gemspec +16 -14
  10. data/fixtures/vcr_cassettes/fetch_failure.yml +62 -0
  11. data/fixtures/vcr_cassettes/parse_error.yml +222 -0
  12. data/fixtures/vcr_cassettes/success.yml +281 -0
  13. data/lib/feedjira.rb +9 -0
  14. data/lib/feedjira/core_ext.rb +3 -3
  15. data/lib/feedjira/core_ext/date.rb +2 -1
  16. data/lib/feedjira/core_ext/string.rb +1 -1
  17. data/lib/feedjira/core_ext/time.rb +19 -16
  18. data/lib/feedjira/date_time_utilities.rb +24 -0
  19. data/lib/feedjira/date_time_utilities/date_time_language_parser.rb +22 -0
  20. data/lib/feedjira/date_time_utilities/date_time_pattern_parser.rb +29 -0
  21. data/lib/feedjira/feed.rb +27 -18
  22. data/lib/feedjira/feed_entry_utilities.rb +15 -17
  23. data/lib/feedjira/feed_utilities.rb +26 -21
  24. data/lib/feedjira/parser/atom.rb +9 -8
  25. data/lib/feedjira/parser/atom_entry.rb +10 -13
  26. data/lib/feedjira/parser/atom_feed_burner.rb +8 -10
  27. data/lib/feedjira/parser/atom_feed_burner_entry.rb +11 -14
  28. data/lib/feedjira/parser/atom_youtube.rb +20 -0
  29. data/lib/feedjira/parser/atom_youtube_entry.rb +29 -0
  30. data/lib/feedjira/parser/google_docs_atom.rb +6 -6
  31. data/lib/feedjira/parser/google_docs_atom_entry.rb +11 -11
  32. data/lib/feedjira/parser/itunes_rss.rb +39 -22
  33. data/lib/feedjira/parser/itunes_rss_category.rb +38 -0
  34. data/lib/feedjira/parser/itunes_rss_item.rb +28 -20
  35. data/lib/feedjira/parser/itunes_rss_owner.rb +3 -4
  36. data/lib/feedjira/parser/podlove_chapter.rb +20 -0
  37. data/lib/feedjira/parser/rss.rb +10 -8
  38. data/lib/feedjira/parser/rss_entry.rb +17 -21
  39. data/lib/feedjira/parser/rss_feed_burner.rb +4 -6
  40. data/lib/feedjira/parser/rss_feed_burner_entry.rb +23 -28
  41. data/lib/feedjira/parser/rss_image.rb +15 -0
  42. data/lib/feedjira/preprocessor.rb +2 -2
  43. data/lib/feedjira/version.rb +1 -1
  44. data/spec/feedjira/date_time_utilities_spec.rb +41 -0
  45. data/spec/feedjira/feed_entry_utilities_spec.rb +23 -19
  46. data/spec/feedjira/feed_spec.rb +109 -74
  47. data/spec/feedjira/feed_utilities_spec.rb +65 -63
  48. data/spec/feedjira/parser/atom_entry_spec.rb +54 -34
  49. data/spec/feedjira/parser/atom_feed_burner_entry_spec.rb +27 -20
  50. data/spec/feedjira/parser/atom_feed_burner_spec.rb +32 -30
  51. data/spec/feedjira/parser/atom_spec.rb +50 -48
  52. data/spec/feedjira/parser/atom_youtube_entry_spec.rb +86 -0
  53. data/spec/feedjira/parser/atom_youtube_spec.rb +43 -0
  54. data/spec/feedjira/parser/google_docs_atom_entry_spec.rb +5 -4
  55. data/spec/feedjira/parser/google_docs_atom_spec.rb +6 -6
  56. data/spec/feedjira/parser/itunes_rss_item_spec.rb +33 -29
  57. data/spec/feedjira/parser/itunes_rss_owner_spec.rb +10 -9
  58. data/spec/feedjira/parser/itunes_rss_spec.rb +83 -30
  59. data/spec/feedjira/parser/podlove_chapter_spec.rb +37 -0
  60. data/spec/feedjira/parser/rss_entry_spec.rb +50 -33
  61. data/spec/feedjira/parser/rss_feed_burner_entry_spec.rb +55 -33
  62. data/spec/feedjira/parser/rss_feed_burner_spec.rb +31 -26
  63. data/spec/feedjira/parser/rss_spec.rb +56 -24
  64. data/spec/feedjira/preprocessor_spec.rb +11 -3
  65. data/spec/sample_feeds.rb +29 -21
  66. data/spec/sample_feeds/AmazonWebServicesBlog.xml +797 -797
  67. data/spec/sample_feeds/AtomEscapedHTMLInPreTag.xml +13 -0
  68. data/spec/sample_feeds/CRE.xml +5849 -0
  69. data/spec/sample_feeds/FeedBurnerXHTML.xml +400 -400
  70. data/spec/sample_feeds/ITunesWithSingleQuotedAttributes.xml +67 -0
  71. data/spec/sample_feeds/PaulDixExplainsNothing.xml +175 -175
  72. data/spec/sample_feeds/PaulDixExplainsNothingAlternate.xml +175 -175
  73. data/spec/sample_feeds/PaulDixExplainsNothingFirstEntryContent.xml +16 -16
  74. data/spec/sample_feeds/PaulDixExplainsNothingWFW.xml +174 -174
  75. data/spec/sample_feeds/TenderLovemaking.xml +12 -2
  76. data/spec/sample_feeds/TrotterCashionHome.xml +611 -611
  77. data/spec/sample_feeds/TypePadNews.xml +368 -368
  78. data/spec/sample_feeds/itunes.xml +18 -2
  79. data/spec/sample_feeds/pet_atom.xml +229 -229
  80. data/spec/sample_feeds/youtube_atom.xml +395 -0
  81. data/spec/spec_helper.rb +6 -0
  82. metadata +112 -27
@@ -5,16 +5,14 @@ module Feedjira
5
5
  end
6
6
 
7
7
  def self.parse(xml, &block)
8
- if parser = determine_feed_parser_for_xml(xml)
9
- parse_with parser, xml, &block
10
- else
11
- raise NoParserAvailable.new("No valid parser for XML.")
12
- end
8
+ parser = determine_feed_parser_for_xml(xml)
9
+ raise NoParserAvailable, 'No valid parser for XML.' unless parser
10
+ parse_with parser, xml, &block
13
11
  end
14
12
 
15
13
  def self.determine_feed_parser_for_xml(xml)
16
14
  start_of_doc = xml.slice(0, 2000)
17
- feed_classes.detect {|klass| klass.able_to_parse?(start_of_doc)}
15
+ feed_classes.detect { |klass| klass.able_to_parse?(start_of_doc) }
18
16
  end
19
17
 
20
18
  def self.add_feed_class(klass)
@@ -25,6 +23,7 @@ module Feedjira
25
23
  @feed_classes ||= [
26
24
  Feedjira::Parser::RSSFeedBurner,
27
25
  Feedjira::Parser::GoogleDocsAtom,
26
+ Feedjira::Parser::AtomYoutube,
28
27
  Feedjira::Parser::AtomFeedBurner,
29
28
  Feedjira::Parser::Atom,
30
29
  Feedjira::Parser::ITunesRSS,
@@ -53,10 +52,14 @@ module Feedjira
53
52
  end
54
53
 
55
54
  def self.call_on_each_feed_entry(method, *parameters)
56
- feed_classes.each do |k|
57
- k.sax_config.collection_elements.each_value do |vl|
58
- vl.find_all{|v| (v.accessor == 'entries') && (v.data_class.class == Class)}.each do |v|
59
- v.data_class.send(method, *parameters)
55
+ feed_classes.each do |klass|
56
+ klass.sax_config.collection_elements.each_value do |value|
57
+ collection_configs = value.select do |v|
58
+ v.accessor == 'entries' && v.data_class.class == Class
59
+ end
60
+
61
+ collection_configs.each do |config|
62
+ config.data_class.send(method, *parameters)
60
63
  end
61
64
  end
62
65
  end
@@ -64,15 +67,14 @@ module Feedjira
64
67
 
65
68
  def self.fetch_and_parse(url)
66
69
  response = connection(url).get
67
- raise FetchFailure.new("Fetch failed - #{response.status}") unless response.success?
68
- xml = response.body
69
- parser_klass = determine_feed_parser_for_xml xml
70
- raise NoParserAvailable.new("No valid parser for XML.") unless parser_klass
71
-
72
- feed = parse_with parser_klass, xml
70
+ unless response.success?
71
+ raise FetchFailure, "Fetch failed - #{response.status}"
72
+ end
73
+ feed = parse response.body
73
74
  feed.feed_url = url
74
- feed.etag = response.headers['etag'].to_s.gsub(/"/, '')
75
- feed.last_modified = response.headers['last-modified']
75
+ feed.etag = response.headers['etag'].to_s.delete '"'
76
+
77
+ feed.last_modified = parse_last_modified(response)
76
78
  feed
77
79
  end
78
80
 
@@ -82,5 +84,12 @@ module Feedjira
82
84
  conn.adapter :net_http
83
85
  end
84
86
  end
87
+
88
+ def self.parse_last_modified(response)
89
+ DateTime.parse(response.headers['last-modified']).to_time
90
+ rescue
91
+ nil
92
+ end
93
+ private_class_method :parse_last_modified
85
94
  end
86
95
  end
@@ -1,23 +1,22 @@
1
1
  module Feedjira
2
2
  module FeedEntryUtilities
3
-
4
3
  include Enumerable
4
+ include DateTimeUtilities
5
5
 
6
6
  def published
7
7
  @published ||= @updated
8
8
  end
9
9
 
10
10
  def parse_datetime(string)
11
- begin
12
- DateTime.parse(string).feed_utils_to_gm_time
13
- rescue
14
- warn "Failed to parse date #{string.inspect}"
15
- nil
16
- end
11
+ DateTime.parse(string).feed_utils_to_gm_time
12
+ rescue
13
+ warn "Failed to parse date #{string.inspect}"
14
+ nil
17
15
  end
18
16
 
19
17
  ##
20
- # Returns the id of the entry or its url if not id is present, as some formats don't support it
18
+ # Returns the id of the entry or its url if not id is present, as some
19
+ # formats don't support it
21
20
  def id
22
21
  @entry_id ||= @url
23
22
  end
@@ -37,30 +36,29 @@ module Feedjira
37
36
  end
38
37
 
39
38
  def sanitize!
40
- %w[title author summary content image].each do |name|
41
- if self.respond_to?(name) && self.send(name).respond_to?(:sanitize!)
42
- self.send(name).send :sanitize!
39
+ %w(title author summary content image).each do |name|
40
+ if respond_to?(name) && send(name).respond_to?(:sanitize!)
41
+ send(name).send :sanitize!
43
42
  end
44
43
  end
45
44
  end
46
45
 
47
- alias_method :last_modified, :published
46
+ alias last_modified published
48
47
 
49
48
  def each
50
- @rss_fields ||= self.instance_variables
49
+ @rss_fields ||= instance_variables
51
50
 
52
51
  @rss_fields.each do |field|
53
- yield(field.to_s.sub('@', ''), self.instance_variable_get(field))
52
+ yield(field.to_s.sub('@', ''), instance_variable_get(field))
54
53
  end
55
54
  end
56
55
 
57
56
  def [](field)
58
- self.instance_variable_get("@#{field.to_s}")
57
+ instance_variable_get("@#{field}")
59
58
  end
60
59
 
61
60
  def []=(field, value)
62
- self.instance_variable_set("@#{field.to_s}", value)
61
+ instance_variable_set("@#{field}", value)
63
62
  end
64
-
65
63
  end
66
64
  end
@@ -1,6 +1,6 @@
1
1
  module Feedjira
2
2
  module FeedUtilities
3
- UPDATABLE_ATTRIBUTES = %w(title feed_url url last_modified etag)
3
+ UPDATABLE_ATTRIBUTES = %w(title feed_url url last_modified etag).freeze
4
4
 
5
5
  attr_writer :new_entries, :updated, :last_modified
6
6
  attr_accessor :etag
@@ -32,7 +32,8 @@ module Feedjira
32
32
 
33
33
  def last_modified
34
34
  @last_modified ||= begin
35
- entry = entries.reject {|e| e.published.nil? }.sort_by { |entry| entry.published if entry.published }.last
35
+ published = entries.reject { |e| e.published.nil? }
36
+ entry = published.sort_by { |e| e.published if e.published }.last
36
37
  entry ? entry.published : nil
37
38
  end
38
39
  end
@@ -45,13 +46,13 @@ module Feedjira
45
46
  @new_entries ||= []
46
47
  end
47
48
 
48
- def has_new_entries?
49
- new_entries.size > 0
49
+ def new_entries?
50
+ !new_entries.empty?
50
51
  end
51
52
 
52
53
  def update_from_feed(feed)
53
54
  self.new_entries += find_new_entries_for(feed)
54
- self.entries.unshift(*self.new_entries)
55
+ entries.unshift(*self.new_entries)
55
56
 
56
57
  @updated = false
57
58
 
@@ -61,7 +62,8 @@ module Feedjira
61
62
  end
62
63
 
63
64
  def update_attribute(feed, name)
64
- old_value, new_value = send(name), feed.send(name)
65
+ old_value = send(name)
66
+ new_value = feed.send(name)
65
67
 
66
68
  if old_value != new_value
67
69
  send("#{name}=", new_value)
@@ -72,33 +74,36 @@ module Feedjira
72
74
  end
73
75
 
74
76
  def sanitize_entries!
75
- entries.each {|entry| entry.sanitize!}
77
+ entries.each(&:sanitize!)
76
78
  end
77
79
 
78
80
  private
79
81
 
82
+ # This implementation is a hack, which is why it's so ugly. It's to get
83
+ # around the fact that not all feeds have a published date. However,
84
+ # they're always ordered with the newest one first. So we go through the
85
+ # entries just parsed and insert each one as a new entry until we get to
86
+ # one that has the same id as the the newest for the feed.
80
87
  def find_new_entries_for(feed)
81
- # this implementation is a hack, which is why it's so ugly.
82
- # it's to get around the fact that not all feeds have a published date.
83
- # however, they're always ordered with the newest one first.
84
- # So we go through the entries just parsed and insert each one as a new entry
85
- # until we get to one that has the same id as the the newest for the feed
86
- return feed.entries if self.entries.length == 0
87
- latest_entry = self.entries.first
88
+ return feed.entries if entries.length.zero?
89
+
90
+ latest_entry = entries.first
88
91
  found_new_entries = []
92
+
89
93
  feed.entries.each do |entry|
90
- if entry.entry_id.nil? && latest_entry.entry_id.nil?
91
- break if entry.url == latest_entry.url
92
- else
93
- break if entry.entry_id == latest_entry.entry_id || entry.url == latest_entry.url
94
- end
94
+ break unless new_entry?(entry, latest_entry)
95
95
  found_new_entries << entry
96
96
  end
97
+
97
98
  found_new_entries
98
99
  end
99
100
 
100
- def existing_entry?(test_entry)
101
- entries.any? { |entry| entry.id == test_entry.id }
101
+ def new_entry?(entry, latest)
102
+ nil_ids = entry.entry_id.nil? && latest.entry_id.nil?
103
+ new_id = entry.entry_id != latest.entry_id
104
+ new_url = entry.url != latest.url
105
+
106
+ (nil_ids || new_id) && new_url
102
107
  end
103
108
  end
104
109
  end
@@ -4,16 +4,17 @@ module Feedjira
4
4
  class Atom
5
5
  include SAXMachine
6
6
  include FeedUtilities
7
+
7
8
  element :title
8
- element :subtitle, :as => :description
9
- element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
10
- element :link, :as => :feed_url, :value => :href, :with => {:rel => "self"}
11
- elements :link, :as => :links, :value => :href
12
- elements :link, :as => :hubs, :value => :href, :with => {:rel => "hub"}
13
- elements :entry, :as => :entries, :class => AtomEntry
9
+ element :subtitle, as: :description
10
+ element :link, as: :url, value: :href, with: { type: 'text/html' }
11
+ element :link, as: :feed_url, value: :href, with: { rel: 'self' }
12
+ elements :link, as: :links, value: :href
13
+ elements :link, as: :hubs, value: :href, with: { rel: 'hub' }
14
+ elements :entry, as: :entries, class: AtomEntry
14
15
 
15
- def self.able_to_parse?(xml) #:nodoc:
16
- /\<feed[^\>]+xmlns\s?=\s?[\"|\'](http:\/\/www\.w3\.org\/2005\/Atom|http:\/\/purl\.org\/atom\/ns\#)[\"|\'][^\>]*\>/ =~ xml
16
+ def self.able_to_parse?(xml)
17
+ %r{\<feed[^\>]+xmlns\s?=\s?[\"\'](http://www\.w3\.org/2005/Atom|http://purl\.org/atom/ns\#)[\"\'][^\>]*\>} =~ xml # rubocop:disable Metrics/LineLength
17
18
  end
18
19
 
19
20
  def url
@@ -1,5 +1,4 @@
1
1
  module Feedjira
2
-
3
2
  module Parser
4
3
  # Parser for dealing with Atom feed entries.
5
4
  class AtomEntry
@@ -7,28 +6,26 @@ module Feedjira
7
6
  include FeedEntryUtilities
8
7
 
9
8
  element :title
10
- element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
11
- element :name, :as => :author
9
+ element :link, as: :url, value: :href, with: { type: 'text/html', rel: 'alternate' } # rubocop:disable Metrics/LineLength
10
+ element :name, as: :author
12
11
  element :content
13
12
  element :summary
14
13
 
15
- element :"media:content", :as => :image, :value => :url
16
- element :enclosure, :as => :image, :value => :href
14
+ element :"media:content", as: :image, value: :url
15
+ element :enclosure, as: :image, value: :href
17
16
 
18
17
  element :published
19
- element :id, :as => :entry_id
20
- element :created, :as => :published
21
- element :issued, :as => :published
18
+ element :id, as: :entry_id
19
+ element :created, as: :published
20
+ element :issued, as: :published
22
21
  element :updated
23
- element :modified, :as => :updated
24
- elements :category, :as => :categories, :value => :term
25
- elements :link, :as => :links, :value => :href
22
+ element :modified, as: :updated
23
+ elements :category, as: :categories, value: :term
24
+ elements :link, as: :links, value: :href
26
25
 
27
26
  def url
28
27
  @url ||= links.first
29
28
  end
30
29
  end
31
-
32
30
  end
33
-
34
31
  end
@@ -1,26 +1,24 @@
1
1
  module Feedjira
2
-
3
2
  module Parser
4
3
  # Parser for dealing with Feedburner Atom feeds.
5
4
  class AtomFeedBurner
6
5
  include SAXMachine
7
6
  include FeedUtilities
7
+
8
8
  element :title
9
- element :subtitle, :as => :description
10
- element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
11
- element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
12
- elements :"atom10:link", :as => :hubs, :value => :href, :with => {:rel => "hub"}
13
- elements :entry, :as => :entries, :class => AtomFeedBurnerEntry
9
+ element :subtitle, as: :description
10
+ element :link, as: :url, value: :href, with: { type: 'text/html' }
11
+ element :link, as: :feed_url, value: :href, with: { type: 'application/atom+xml' } # rubocop:disable Metrics/LineLength
12
+ elements :"atom10:link", as: :hubs, value: :href, with: { rel: 'hub' }
13
+ elements :entry, as: :entries, class: AtomFeedBurnerEntry
14
14
 
15
- def self.able_to_parse?(xml) #:nodoc:
16
- ((/Atom/ =~ xml) && (/feedburner/ =~ xml) && !(/\<rss|\<rdf/ =~ xml)) || false
15
+ def self.able_to_parse?(xml)
16
+ ((/Atom/ =~ xml) && (/feedburner/ =~ xml) && !(/\<rss|\<rdf/ =~ xml)) || false # rubocop:disable Metrics/LineLength
17
17
  end
18
18
 
19
19
  def self.preprocess(xml)
20
20
  Preprocessor.new(xml).to_xml
21
21
  end
22
22
  end
23
-
24
23
  end
25
-
26
24
  end
@@ -1,5 +1,4 @@
1
1
  module Feedjira
2
-
3
2
  module Parser
4
3
  # Parser for dealing with Feedburner Atom feed entries.
5
4
  class AtomFeedBurnerEntry
@@ -7,29 +6,27 @@ module Feedjira
7
6
  include FeedEntryUtilities
8
7
 
9
8
  element :title
10
- element :name, :as => :author
11
- element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
12
- element :"feedburner:origLink", :as => :url
9
+ element :name, as: :author
10
+ element :link, as: :url, value: :href, with: { type: 'text/html', rel: 'alternate' } # rubocop:disable Metrics/LineLength
11
+ element :"feedburner:origLink", as: :url
13
12
  element :summary
14
13
  element :content
15
14
 
16
- element :"media:content", :as => :image, :value => :url
17
- element :enclosure, :as => :image, :value => :href
15
+ element :"media:content", as: :image, value: :url
16
+ element :enclosure, as: :image, value: :href
18
17
 
19
18
  element :published
20
- element :id, :as => :entry_id
21
- element :issued, :as => :published
22
- element :created, :as => :published
19
+ element :id, as: :entry_id
20
+ element :issued, as: :published
21
+ element :created, as: :published
23
22
  element :updated
24
- element :modified, :as => :updated
25
- elements :category, :as => :categories, :value => :term
26
- elements :link, :as => :links, :value => :href
23
+ element :modified, as: :updated
24
+ elements :category, as: :categories, value: :term
25
+ elements :link, as: :links, value: :href
27
26
 
28
27
  def url
29
28
  @url ||= links.first
30
29
  end
31
-
32
30
  end
33
31
  end
34
-
35
32
  end
@@ -0,0 +1,20 @@
1
+ module Feedjira
2
+ module Parser
3
+ # Parser for dealing with RSS feeds.
4
+ class AtomYoutube
5
+ include SAXMachine
6
+ include FeedUtilities
7
+ element :title
8
+ element :link, as: :url, value: :href, with: { rel: 'alternate' }
9
+ element :link, as: :feed_url, value: :href, with: { rel: 'self' }
10
+ element :name, as: :author
11
+ element :"yt:channelId", as: :youtube_channel_id
12
+
13
+ elements :entry, as: :entries, class: AtomYoutubeEntry
14
+
15
+ def self.able_to_parse?(xml) #:nodoc:
16
+ %r{xmlns:yt="http://www.youtube.com/xml/schemas/2015"} =~ xml
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,29 @@
1
+ module Feedjira
2
+ module Parser
3
+ class AtomYoutubeEntry
4
+ include SAXMachine
5
+ include FeedEntryUtilities
6
+
7
+ element :title
8
+ element :link, as: :url, value: :href, with: { rel: 'alternate' }
9
+ element :name, as: :author
10
+ element :"media:description", as: :content
11
+ element :summary
12
+ element :published
13
+ element :id, as: :entry_id
14
+ element :updated
15
+ element :"yt:videoId", as: :youtube_video_id
16
+ element :"media:title", as: :media_title
17
+ element :"media:content", as: :media_url, value: :url
18
+ element :"media:content", as: :media_type, value: :type
19
+ element :"media:content", as: :media_width, value: :width
20
+ element :"media:content", as: :media_height, value: :height
21
+ element :"media:thumbnail", as: :media_thumbnail_url, value: :url
22
+ element :"media:thumbnail", as: :media_thumbnail_width, value: :width
23
+ element :"media:thumbnail", as: :media_thumbnail_height, value: :height
24
+ element :"media:starRating", as: :media_star_count, value: :count
25
+ element :"media:starRating", as: :media_star_average, value: :average
26
+ element :"media:statistics", as: :media_views, value: :views
27
+ end
28
+ end
29
+ end