feedjira 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.rubocop.yml +8 -0
  4. data/.travis.yml +31 -12
  5. data/CHANGELOG.md +15 -1
  6. data/Dangerfile +1 -0
  7. data/Gemfile +2 -1
  8. data/Rakefile +6 -1
  9. data/feedjira.gemspec +16 -14
  10. data/fixtures/vcr_cassettes/fetch_failure.yml +62 -0
  11. data/fixtures/vcr_cassettes/parse_error.yml +222 -0
  12. data/fixtures/vcr_cassettes/success.yml +281 -0
  13. data/lib/feedjira.rb +9 -0
  14. data/lib/feedjira/core_ext.rb +3 -3
  15. data/lib/feedjira/core_ext/date.rb +2 -1
  16. data/lib/feedjira/core_ext/string.rb +1 -1
  17. data/lib/feedjira/core_ext/time.rb +19 -16
  18. data/lib/feedjira/date_time_utilities.rb +24 -0
  19. data/lib/feedjira/date_time_utilities/date_time_language_parser.rb +22 -0
  20. data/lib/feedjira/date_time_utilities/date_time_pattern_parser.rb +29 -0
  21. data/lib/feedjira/feed.rb +27 -18
  22. data/lib/feedjira/feed_entry_utilities.rb +15 -17
  23. data/lib/feedjira/feed_utilities.rb +26 -21
  24. data/lib/feedjira/parser/atom.rb +9 -8
  25. data/lib/feedjira/parser/atom_entry.rb +10 -13
  26. data/lib/feedjira/parser/atom_feed_burner.rb +8 -10
  27. data/lib/feedjira/parser/atom_feed_burner_entry.rb +11 -14
  28. data/lib/feedjira/parser/atom_youtube.rb +20 -0
  29. data/lib/feedjira/parser/atom_youtube_entry.rb +29 -0
  30. data/lib/feedjira/parser/google_docs_atom.rb +6 -6
  31. data/lib/feedjira/parser/google_docs_atom_entry.rb +11 -11
  32. data/lib/feedjira/parser/itunes_rss.rb +39 -22
  33. data/lib/feedjira/parser/itunes_rss_category.rb +38 -0
  34. data/lib/feedjira/parser/itunes_rss_item.rb +28 -20
  35. data/lib/feedjira/parser/itunes_rss_owner.rb +3 -4
  36. data/lib/feedjira/parser/podlove_chapter.rb +20 -0
  37. data/lib/feedjira/parser/rss.rb +10 -8
  38. data/lib/feedjira/parser/rss_entry.rb +17 -21
  39. data/lib/feedjira/parser/rss_feed_burner.rb +4 -6
  40. data/lib/feedjira/parser/rss_feed_burner_entry.rb +23 -28
  41. data/lib/feedjira/parser/rss_image.rb +15 -0
  42. data/lib/feedjira/preprocessor.rb +2 -2
  43. data/lib/feedjira/version.rb +1 -1
  44. data/spec/feedjira/date_time_utilities_spec.rb +41 -0
  45. data/spec/feedjira/feed_entry_utilities_spec.rb +23 -19
  46. data/spec/feedjira/feed_spec.rb +109 -74
  47. data/spec/feedjira/feed_utilities_spec.rb +65 -63
  48. data/spec/feedjira/parser/atom_entry_spec.rb +54 -34
  49. data/spec/feedjira/parser/atom_feed_burner_entry_spec.rb +27 -20
  50. data/spec/feedjira/parser/atom_feed_burner_spec.rb +32 -30
  51. data/spec/feedjira/parser/atom_spec.rb +50 -48
  52. data/spec/feedjira/parser/atom_youtube_entry_spec.rb +86 -0
  53. data/spec/feedjira/parser/atom_youtube_spec.rb +43 -0
  54. data/spec/feedjira/parser/google_docs_atom_entry_spec.rb +5 -4
  55. data/spec/feedjira/parser/google_docs_atom_spec.rb +6 -6
  56. data/spec/feedjira/parser/itunes_rss_item_spec.rb +33 -29
  57. data/spec/feedjira/parser/itunes_rss_owner_spec.rb +10 -9
  58. data/spec/feedjira/parser/itunes_rss_spec.rb +83 -30
  59. data/spec/feedjira/parser/podlove_chapter_spec.rb +37 -0
  60. data/spec/feedjira/parser/rss_entry_spec.rb +50 -33
  61. data/spec/feedjira/parser/rss_feed_burner_entry_spec.rb +55 -33
  62. data/spec/feedjira/parser/rss_feed_burner_spec.rb +31 -26
  63. data/spec/feedjira/parser/rss_spec.rb +56 -24
  64. data/spec/feedjira/preprocessor_spec.rb +11 -3
  65. data/spec/sample_feeds.rb +29 -21
  66. data/spec/sample_feeds/AmazonWebServicesBlog.xml +797 -797
  67. data/spec/sample_feeds/AtomEscapedHTMLInPreTag.xml +13 -0
  68. data/spec/sample_feeds/CRE.xml +5849 -0
  69. data/spec/sample_feeds/FeedBurnerXHTML.xml +400 -400
  70. data/spec/sample_feeds/ITunesWithSingleQuotedAttributes.xml +67 -0
  71. data/spec/sample_feeds/PaulDixExplainsNothing.xml +175 -175
  72. data/spec/sample_feeds/PaulDixExplainsNothingAlternate.xml +175 -175
  73. data/spec/sample_feeds/PaulDixExplainsNothingFirstEntryContent.xml +16 -16
  74. data/spec/sample_feeds/PaulDixExplainsNothingWFW.xml +174 -174
  75. data/spec/sample_feeds/TenderLovemaking.xml +12 -2
  76. data/spec/sample_feeds/TrotterCashionHome.xml +611 -611
  77. data/spec/sample_feeds/TypePadNews.xml +368 -368
  78. data/spec/sample_feeds/itunes.xml +18 -2
  79. data/spec/sample_feeds/pet_atom.xml +229 -229
  80. data/spec/sample_feeds/youtube_atom.xml +395 -0
  81. data/spec/spec_helper.rb +6 -0
  82. metadata +112 -27
@@ -6,18 +6,18 @@ module Feedjira
6
6
  include SAXMachine
7
7
  include FeedUtilities
8
8
  element :title
9
- element :subtitle, :as => :description
10
- element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
11
- element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
12
- elements :link, :as => :links, :value => :href
13
- elements :entry, :as => :entries, :class => GoogleDocsAtomEntry
9
+ element :subtitle, as: :description
10
+ element :link, as: :url, value: :href, with: { type: 'text/html' }
11
+ element :link, as: :feed_url, value: :href, with: { type: 'application/atom+xml' } # rubocop:disable Metrics/LineLength
12
+ elements :link, as: :links, value: :href
13
+ elements :entry, as: :entries, class: GoogleDocsAtomEntry
14
14
 
15
15
  def url
16
16
  @url ||= links.first
17
17
  end
18
18
 
19
19
  def self.able_to_parse?(xml) #:nodoc:
20
- %r{<id>https?://docs.google.com/.*\</id\>} =~ xml
20
+ %r{<id>https?://docs\.google\.com/.*\</id\>} =~ xml
21
21
  end
22
22
 
23
23
  def feed_url
@@ -5,21 +5,21 @@ module Feedjira
5
5
  include FeedEntryUtilities
6
6
 
7
7
  element :title
8
- element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
9
- element :name, :as => :author
8
+ element :link, as: :url, value: :href, with: { type: 'text/html', rel: 'alternate' } # rubocop:disable Metrics/LineLength
9
+ element :name, as: :author
10
10
  element :content
11
11
  element :summary
12
12
  element :published
13
- element :id, :as => :entry_id
14
- element :created, :as => :published
15
- element :issued, :as => :published
13
+ element :id, as: :entry_id
14
+ element :created, as: :published
15
+ element :issued, as: :published
16
16
  element :updated
17
- element :modified, :as => :updated
18
- elements :category, :as => :categories, :value => :term
19
- elements :link, :as => :links, :value => :href
20
- element :"docs:md5Checksum", :as => :checksum
21
- element :"docs:filename", :as => :original_filename
22
- element :"docs:suggestedFilename", :as => :suggested_filename
17
+ element :modified, as: :updated
18
+ elements :category, as: :categories, value: :term
19
+ elements :link, as: :links, value: :href
20
+ element :"docs:md5Checksum", as: :checksum
21
+ element :"docs:filename", as: :original_filename
22
+ element :"docs:suggestedFilename", as: :suggested_filename
23
23
 
24
24
  def url
25
25
  @url ||= links.first
@@ -1,8 +1,10 @@
1
1
  module Feedjira
2
-
3
2
  module Parser
4
3
  # iTunes is RSS 2.0 + some apple extensions
5
- # Source: http://www.apple.com/itunes/whatson/podcasts/specs.html
4
+ # Sources:
5
+ # * https://cyber.harvard.edu/rss/rss.html
6
+ # * http://lists.apple.com/archives/syndication-dev/2005/Nov/msg00002.html
7
+ # * https://help.apple.com/itc/podcasts_connect/
6
8
  class ITunesRSS
7
9
  include SAXMachine
8
10
  include FeedUtilities
@@ -12,39 +14,54 @@ module Feedjira
12
14
  # RSS 2.0 elements that need including
13
15
  element :copyright
14
16
  element :description
17
+ element :image, class: RSSImage
15
18
  element :language
19
+ element :lastBuildDate, as: :last_built
20
+ element :link, as: :url
16
21
  element :managingEditor
22
+ element :rss, as: :version, value: :version
17
23
  element :title
18
- element :link, :as => :url
24
+ element :ttl
19
25
 
20
26
  # If author is not present use managingEditor on the channel
21
- element :"itunes:author", :as => :itunes_author
22
- element :"itunes:block", :as => :itunes_block
23
- element :"itunes:image", :value => :href, :as => :itunes_image
24
- element :"itunes:explicit", :as => :itunes_explicit
25
- element :"itunes:keywords", :as => :itunes_keywords
27
+ element :"itunes:author", as: :itunes_author
28
+ element :"itunes:block", as: :itunes_block
29
+ element :"itunes:image", value: :href, as: :itunes_image
30
+ element :"itunes:explicit", as: :itunes_explicit
31
+ element :"itunes:complete", as: :itunes_complete
32
+ element :"itunes:keywords", as: :itunes_keywords
33
+
26
34
  # New URL for the podcast feed
27
- element :"itunes:new_feed_url", :as => :itunes_new_feed_url
28
- element :"itunes:subtitle", :as => :itunes_subtitle
35
+ element :"itunes:new_feed_url", as: :itunes_new_feed_url
36
+ element :"itunes:subtitle", as: :itunes_subtitle
37
+
29
38
  # If summary is not present, use the description tag
30
- element :"itunes:summary", :as => :itunes_summary
39
+ element :"itunes:summary", as: :itunes_summary
31
40
 
32
- # iTunes RSS feeds can have multiple main categories...
33
- # ...and multiple sub-categories per category
34
- # TODO subcategories not supported correctly - they are at the same level
35
- # as the main categories
36
- elements :"itunes:category", :as => :itunes_categories, :value => :text
41
+ # iTunes RSS feeds can have multiple main categories and multiple
42
+ # sub-categories per category.
43
+ elements :"itunes:category", as: :_itunes_categories,
44
+ class: ITunesRSSCategory
45
+ private :_itunes_categories
37
46
 
38
- elements :"itunes:owner", :as => :itunes_owners, :class => ITunesRSSOwner
47
+ def itunes_categories
48
+ _itunes_categories.flat_map do |itunes_category|
49
+ itunes_category.enum_for(:each_subcategory).to_a
50
+ end
51
+ end
39
52
 
40
- elements :item, :as => :entries, :class => ITunesRSSItem
53
+ def itunes_category_paths
54
+ _itunes_categories.flat_map do |itunes_category|
55
+ itunes_category.enum_for(:each_path).to_a
56
+ end
57
+ end
58
+
59
+ elements :"itunes:owner", as: :itunes_owners, class: ITunesRSSOwner
60
+ elements :item, as: :entries, class: ITunesRSSItem
41
61
 
42
62
  def self.able_to_parse?(xml)
43
- /xmlns:itunes\s?=\s?\"http:\/\/www.itunes.com\/dtds\/podcast-1.0.dtd\"/i =~ xml
63
+ %r{xmlns:itunes\s?=\s?[\"\']http://www\.itunes\.com/dtds/podcast-1\.0\.dtd[\"\']}i =~ xml # rubocop:disable Metrics/LineLength
44
64
  end
45
-
46
65
  end
47
-
48
66
  end
49
-
50
67
  end
@@ -0,0 +1,38 @@
1
+ module Feedjira
2
+ module Parser
3
+ # iTunes extensions to the standard RSS2.0 item
4
+ # Source: https://help.apple.com/itc/podcasts_connect/#/itcb54353390
5
+ class ITunesRSSCategory
6
+ include SAXMachine
7
+
8
+ attribute :text
9
+
10
+ elements :"itunes:category", as: :itunes_categories,
11
+ class: ITunesRSSCategory
12
+
13
+ def each_subcategory
14
+ return to_enum(__method__) unless block_given?
15
+
16
+ yield text
17
+
18
+ itunes_categories.each do |itunes_category|
19
+ itunes_category.each_subcategory(&proc)
20
+ end
21
+ end
22
+
23
+ def each_path(ancestors = [])
24
+ return to_enum(__method__, ancestors) unless block_given?
25
+
26
+ category_hierarchy = ancestors + [text]
27
+
28
+ if itunes_categories.empty?
29
+ yield category_hierarchy
30
+ else
31
+ itunes_categories.each do |itunes_category|
32
+ itunes_category.each_path(category_hierarchy, &proc)
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -1,5 +1,4 @@
1
1
  module Feedjira
2
-
3
2
  module Parser
4
3
  # iTunes extensions to the standard RSS2.0 item
5
4
  # Source: http://www.apple.com/itunes/whatson/podcasts/specs.html
@@ -8,29 +7,38 @@ module Feedjira
8
7
  include FeedEntryUtilities
9
8
 
10
9
  element :author
11
- element :guid, :as => :entry_id
10
+ element :guid, as: :entry_id
12
11
  element :title
13
- element :link, :as => :url
14
- element :description, :as => :summary
15
- element :"content:encoded", :as => :content
16
- element :pubDate, :as => :published
12
+ element :link, as: :url
13
+ element :description, as: :summary
14
+ element :"content:encoded", as: :content
15
+ element :pubDate, as: :published
17
16
 
18
17
  # If author is not present use author tag on the item
19
- element :"itunes:author", :as => :itunes_author
20
- element :"itunes:block", :as => :itunes_block
21
- element :"itunes:duration", :as => :itunes_duration
22
- element :"itunes:explicit", :as => :itunes_explicit
23
- element :"itunes:keywords", :as => :itunes_keywords
24
- element :"itunes:subtitle", :as => :itunes_subtitle
25
- element :"itunes:image", :value => :href, :as => :itunes_image
26
- element :"itunes:isClosedCaptioned", :as => :itunes_closed_captioned
27
- element :"itunes:order", :as => :itunes_order
18
+ element :"itunes:author", as: :itunes_author
19
+ element :"itunes:block", as: :itunes_block
20
+ element :"itunes:duration", as: :itunes_duration
21
+ element :"itunes:explicit", as: :itunes_explicit
22
+ element :"itunes:keywords", as: :itunes_keywords
23
+ element :"itunes:subtitle", as: :itunes_subtitle
24
+ element :"itunes:image", value: :href, as: :itunes_image
25
+ element :"itunes:isClosedCaptioned", as: :itunes_closed_captioned
26
+ element :"itunes:order", as: :itunes_order
27
+
28
28
  # If summary is not present, use the description tag
29
- element :"itunes:summary", :as => :itunes_summary
30
- element :enclosure, :value => :length, :as => :enclosure_length
31
- element :enclosure, :value => :type, :as => :enclosure_type
32
- element :enclosure, :value => :url, :as => :enclosure_url
29
+ element :"itunes:summary", as: :itunes_summary
30
+ element :enclosure, value: :length, as: :enclosure_length
31
+ element :enclosure, value: :type, as: :enclosure_type
32
+ element :enclosure, value: :url, as: :enclosure_url
33
+ elements 'psc:chapter', as: :raw_chapters, class: Feedjira::Parser::PodloveChapter # rubocop:disable Metrics/LineLength
34
+
35
+ # Podlove requires clients to re-order by start time in the
36
+ # event the publisher doesn't provide them in that
37
+ # order. SAXMachine doesn't have any sort capability afaik, so
38
+ # we have to sort chapters manually.
39
+ def chapters
40
+ raw_chapters.sort_by(&:start)
41
+ end
33
42
  end
34
43
  end
35
-
36
44
  end
@@ -1,12 +1,11 @@
1
1
  module Feedjira
2
-
3
2
  module Parser
4
3
  class ITunesRSSOwner
5
4
  include SAXMachine
6
5
  include FeedUtilities
7
- element :"itunes:name", :as => :name
8
- element :"itunes:email", :as => :email
6
+
7
+ element :"itunes:name", as: :name
8
+ element :"itunes:email", as: :email
9
9
  end
10
10
  end
11
-
12
11
  end
@@ -0,0 +1,20 @@
1
+ module Feedjira
2
+ module Parser
3
+ class PodloveChapter
4
+ include SAXMachine
5
+ include FeedEntryUtilities
6
+ attribute :start, as: :start_ntp
7
+ attribute :title
8
+ attribute :href, as: :url
9
+ attribute :image
10
+
11
+ def start
12
+ return unless start_ntp
13
+ parts = start_ntp.split(':')
14
+ parts.reverse.to_enum.with_index.map do |part, index|
15
+ part.to_f * (60**index)
16
+ end.reduce(:+)
17
+ end
18
+ end
19
+ end
20
+ end
@@ -1,24 +1,26 @@
1
1
  module Feedjira
2
-
3
2
  module Parser
4
3
  # Parser for dealing with RSS feeds.
4
+ # Source: https://cyber.harvard.edu/rss/rss.html
5
5
  class RSS
6
6
  include SAXMachine
7
7
  include FeedUtilities
8
+ element :description
9
+ element :image, class: RSSImage
10
+ element :language
11
+ element :lastBuildDate, as: :last_built
12
+ element :link, as: :url
8
13
  element :rss, as: :version, value: :version
9
14
  element :title
10
- element :description
11
- element :link, :as => :url
12
- elements :item, :as => :entries, :class => RSSEntry
13
- elements :"atom:link", :as => :hubs, :value => :href, :with => {:rel => "hub"}
15
+ element :ttl
16
+ elements :"atom:link", as: :hubs, value: :href, with: { rel: 'hub' }
17
+ elements :item, as: :entries, class: RSSEntry
14
18
 
15
19
  attr_accessor :feed_url
16
20
 
17
- def self.able_to_parse?(xml) #:nodoc:
21
+ def self.able_to_parse?(xml)
18
22
  (/\<rss|\<rdf/ =~ xml) && !(/feedburner/ =~ xml)
19
23
  end
20
24
  end
21
-
22
25
  end
23
-
24
26
  end
@@ -1,5 +1,4 @@
1
1
  module Feedjira
2
-
3
2
  module Parser
4
3
  # Parser for dealing with RDF feed entries.
5
4
  class RSSEntry
@@ -7,31 +6,28 @@ module Feedjira
7
6
  include FeedEntryUtilities
8
7
 
9
8
  element :title
10
- element :link, :as => :url
11
-
12
- element :"dc:creator", :as => :author
13
- element :author, :as => :author
14
- element :"content:encoded", :as => :content
15
- element :description, :as => :summary
9
+ element :link, as: :url
16
10
 
17
- element :"media:content", :as => :image, :value => :url
18
- element :enclosure, :as => :image, :value => :url
11
+ element :"dc:creator", as: :author
12
+ element :author, as: :author
13
+ element :"content:encoded", as: :content
14
+ element :description, as: :summary
19
15
 
20
- element :pubDate, :as => :published
21
- element :pubdate, :as => :published
22
- element :"dc:date", :as => :published
23
- element :"dc:Date", :as => :published
24
- element :"dcterms:created", :as => :published
16
+ element :"media:content", as: :image, value: :url
17
+ element :enclosure, as: :image, value: :url
25
18
 
19
+ element :pubDate, as: :published
20
+ element :pubdate, as: :published
21
+ element :"dc:date", as: :published
22
+ element :"dc:Date", as: :published
23
+ element :"dcterms:created", as: :published
26
24
 
27
- element :"dcterms:modified", :as => :updated
28
- element :issued, :as => :published
29
- elements :category, :as => :categories
25
+ element :"dcterms:modified", as: :updated
26
+ element :issued, as: :published
27
+ elements :category, as: :categories
30
28
 
31
- element :guid, :as => :entry_id
32
- element :"dc:identifier", :as => :entry_id
29
+ element :guid, as: :entry_id
30
+ element :"dc:identifier", as: :entry_id
33
31
  end
34
-
35
32
  end
36
-
37
33
  end
@@ -1,5 +1,4 @@
1
1
  module Feedjira
2
-
3
2
  module Parser
4
3
  # Parser for dealing with RSS feeds.
5
4
  class RSSFeedBurner
@@ -7,9 +6,10 @@ module Feedjira
7
6
  include FeedUtilities
8
7
  element :title
9
8
  element :description
10
- element :link, :as => :url
11
- elements :"atom10:link", :as => :hubs, :value => :href, :with => {:rel => "hub"}
12
- elements :item, :as => :entries, :class => RSSFeedBurnerEntry
9
+ element :link, as: :url
10
+ element :lastBuildDate, as: :last_built
11
+ elements :"atom10:link", as: :hubs, value: :href, with: { rel: 'hub' }
12
+ elements :item, as: :entries, class: RSSFeedBurnerEntry
13
13
 
14
14
  attr_accessor :feed_url
15
15
 
@@ -17,7 +17,5 @@ module Feedjira
17
17
  (/\<rss|\<rdf/ =~ xml) && (/feedburner/ =~ xml)
18
18
  end
19
19
  end
20
-
21
20
  end
22
-
23
21
  end
@@ -1,43 +1,38 @@
1
1
  module Feedjira
2
-
3
2
  module Parser
4
3
  # Parser for dealing with RDF feed entries.
5
4
  class RSSFeedBurnerEntry
6
- include SAXMachine
7
- include FeedEntryUtilities
8
-
9
- element :title
10
-
11
- element :"feedburner:origLink", :as => :url
12
- element :link, :as => :url
5
+ include SAXMachine
6
+ include FeedEntryUtilities
13
7
 
14
- element :"dc:creator", :as => :author
15
- element :author, :as => :author
16
- element :"content:encoded", :as => :content
17
- element :description, :as => :summary
8
+ element :title
18
9
 
19
- element :"media:content", :as => :image, :value => :url
20
- element :enclosure, :as => :image, :value => :url
10
+ element :"feedburner:origLink", as: :url
11
+ element :link, as: :url
21
12
 
22
- element :pubDate, :as => :published
23
- element :pubdate, :as => :published
24
- element :"dc:date", :as => :published
25
- element :"dc:Date", :as => :published
26
- element :"dcterms:created", :as => :published
13
+ element :"dc:creator", as: :author
14
+ element :author, as: :author
15
+ element :"content:encoded", as: :content
16
+ element :description, as: :summary
27
17
 
18
+ element :"media:content", as: :image, value: :url
19
+ element :enclosure, as: :image, value: :url
28
20
 
29
- element :"dcterms:modified", :as => :updated
30
- element :issued, :as => :published
31
- elements :category, :as => :categories
21
+ element :pubDate, as: :published
22
+ element :pubdate, as: :published
23
+ element :"dc:date", as: :published
24
+ element :"dc:Date", as: :published
25
+ element :"dcterms:created", as: :published
32
26
 
33
- element :guid, :as => :entry_id
27
+ element :"dcterms:modified", as: :updated
28
+ element :issued, as: :published
29
+ elements :category, as: :categories
34
30
 
35
- def url
36
- @url || @link
37
- end
31
+ element :guid, as: :entry_id
38
32
 
33
+ def url
34
+ @url || @link
35
+ end
39
36
  end
40
-
41
37
  end
42
-
43
38
  end