feedjira 2.0.0 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.rubocop.yml +8 -0
  4. data/.travis.yml +31 -12
  5. data/CHANGELOG.md +15 -1
  6. data/Dangerfile +1 -0
  7. data/Gemfile +2 -1
  8. data/Rakefile +6 -1
  9. data/feedjira.gemspec +16 -14
  10. data/fixtures/vcr_cassettes/fetch_failure.yml +62 -0
  11. data/fixtures/vcr_cassettes/parse_error.yml +222 -0
  12. data/fixtures/vcr_cassettes/success.yml +281 -0
  13. data/lib/feedjira.rb +9 -0
  14. data/lib/feedjira/core_ext.rb +3 -3
  15. data/lib/feedjira/core_ext/date.rb +2 -1
  16. data/lib/feedjira/core_ext/string.rb +1 -1
  17. data/lib/feedjira/core_ext/time.rb +19 -16
  18. data/lib/feedjira/date_time_utilities.rb +24 -0
  19. data/lib/feedjira/date_time_utilities/date_time_language_parser.rb +22 -0
  20. data/lib/feedjira/date_time_utilities/date_time_pattern_parser.rb +29 -0
  21. data/lib/feedjira/feed.rb +27 -18
  22. data/lib/feedjira/feed_entry_utilities.rb +15 -17
  23. data/lib/feedjira/feed_utilities.rb +26 -21
  24. data/lib/feedjira/parser/atom.rb +9 -8
  25. data/lib/feedjira/parser/atom_entry.rb +10 -13
  26. data/lib/feedjira/parser/atom_feed_burner.rb +8 -10
  27. data/lib/feedjira/parser/atom_feed_burner_entry.rb +11 -14
  28. data/lib/feedjira/parser/atom_youtube.rb +20 -0
  29. data/lib/feedjira/parser/atom_youtube_entry.rb +29 -0
  30. data/lib/feedjira/parser/google_docs_atom.rb +6 -6
  31. data/lib/feedjira/parser/google_docs_atom_entry.rb +11 -11
  32. data/lib/feedjira/parser/itunes_rss.rb +39 -22
  33. data/lib/feedjira/parser/itunes_rss_category.rb +38 -0
  34. data/lib/feedjira/parser/itunes_rss_item.rb +28 -20
  35. data/lib/feedjira/parser/itunes_rss_owner.rb +3 -4
  36. data/lib/feedjira/parser/podlove_chapter.rb +20 -0
  37. data/lib/feedjira/parser/rss.rb +10 -8
  38. data/lib/feedjira/parser/rss_entry.rb +17 -21
  39. data/lib/feedjira/parser/rss_feed_burner.rb +4 -6
  40. data/lib/feedjira/parser/rss_feed_burner_entry.rb +23 -28
  41. data/lib/feedjira/parser/rss_image.rb +15 -0
  42. data/lib/feedjira/preprocessor.rb +2 -2
  43. data/lib/feedjira/version.rb +1 -1
  44. data/spec/feedjira/date_time_utilities_spec.rb +41 -0
  45. data/spec/feedjira/feed_entry_utilities_spec.rb +23 -19
  46. data/spec/feedjira/feed_spec.rb +109 -74
  47. data/spec/feedjira/feed_utilities_spec.rb +65 -63
  48. data/spec/feedjira/parser/atom_entry_spec.rb +54 -34
  49. data/spec/feedjira/parser/atom_feed_burner_entry_spec.rb +27 -20
  50. data/spec/feedjira/parser/atom_feed_burner_spec.rb +32 -30
  51. data/spec/feedjira/parser/atom_spec.rb +50 -48
  52. data/spec/feedjira/parser/atom_youtube_entry_spec.rb +86 -0
  53. data/spec/feedjira/parser/atom_youtube_spec.rb +43 -0
  54. data/spec/feedjira/parser/google_docs_atom_entry_spec.rb +5 -4
  55. data/spec/feedjira/parser/google_docs_atom_spec.rb +6 -6
  56. data/spec/feedjira/parser/itunes_rss_item_spec.rb +33 -29
  57. data/spec/feedjira/parser/itunes_rss_owner_spec.rb +10 -9
  58. data/spec/feedjira/parser/itunes_rss_spec.rb +83 -30
  59. data/spec/feedjira/parser/podlove_chapter_spec.rb +37 -0
  60. data/spec/feedjira/parser/rss_entry_spec.rb +50 -33
  61. data/spec/feedjira/parser/rss_feed_burner_entry_spec.rb +55 -33
  62. data/spec/feedjira/parser/rss_feed_burner_spec.rb +31 -26
  63. data/spec/feedjira/parser/rss_spec.rb +56 -24
  64. data/spec/feedjira/preprocessor_spec.rb +11 -3
  65. data/spec/sample_feeds.rb +29 -21
  66. data/spec/sample_feeds/AmazonWebServicesBlog.xml +797 -797
  67. data/spec/sample_feeds/AtomEscapedHTMLInPreTag.xml +13 -0
  68. data/spec/sample_feeds/CRE.xml +5849 -0
  69. data/spec/sample_feeds/FeedBurnerXHTML.xml +400 -400
  70. data/spec/sample_feeds/ITunesWithSingleQuotedAttributes.xml +67 -0
  71. data/spec/sample_feeds/PaulDixExplainsNothing.xml +175 -175
  72. data/spec/sample_feeds/PaulDixExplainsNothingAlternate.xml +175 -175
  73. data/spec/sample_feeds/PaulDixExplainsNothingFirstEntryContent.xml +16 -16
  74. data/spec/sample_feeds/PaulDixExplainsNothingWFW.xml +174 -174
  75. data/spec/sample_feeds/TenderLovemaking.xml +12 -2
  76. data/spec/sample_feeds/TrotterCashionHome.xml +611 -611
  77. data/spec/sample_feeds/TypePadNews.xml +368 -368
  78. data/spec/sample_feeds/itunes.xml +18 -2
  79. data/spec/sample_feeds/pet_atom.xml +229 -229
  80. data/spec/sample_feeds/youtube_atom.xml +395 -0
  81. data/spec/spec_helper.rb +6 -0
  82. metadata +112 -27
@@ -6,18 +6,18 @@ module Feedjira
6
6
  include SAXMachine
7
7
  include FeedUtilities
8
8
  element :title
9
- element :subtitle, :as => :description
10
- element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
11
- element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
12
- elements :link, :as => :links, :value => :href
13
- elements :entry, :as => :entries, :class => GoogleDocsAtomEntry
9
+ element :subtitle, as: :description
10
+ element :link, as: :url, value: :href, with: { type: 'text/html' }
11
+ element :link, as: :feed_url, value: :href, with: { type: 'application/atom+xml' } # rubocop:disable Metrics/LineLength
12
+ elements :link, as: :links, value: :href
13
+ elements :entry, as: :entries, class: GoogleDocsAtomEntry
14
14
 
15
15
  def url
16
16
  @url ||= links.first
17
17
  end
18
18
 
19
19
  def self.able_to_parse?(xml) #:nodoc:
20
- %r{<id>https?://docs.google.com/.*\</id\>} =~ xml
20
+ %r{<id>https?://docs\.google\.com/.*\</id\>} =~ xml
21
21
  end
22
22
 
23
23
  def feed_url
@@ -5,21 +5,21 @@ module Feedjira
5
5
  include FeedEntryUtilities
6
6
 
7
7
  element :title
8
- element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
9
- element :name, :as => :author
8
+ element :link, as: :url, value: :href, with: { type: 'text/html', rel: 'alternate' } # rubocop:disable Metrics/LineLength
9
+ element :name, as: :author
10
10
  element :content
11
11
  element :summary
12
12
  element :published
13
- element :id, :as => :entry_id
14
- element :created, :as => :published
15
- element :issued, :as => :published
13
+ element :id, as: :entry_id
14
+ element :created, as: :published
15
+ element :issued, as: :published
16
16
  element :updated
17
- element :modified, :as => :updated
18
- elements :category, :as => :categories, :value => :term
19
- elements :link, :as => :links, :value => :href
20
- element :"docs:md5Checksum", :as => :checksum
21
- element :"docs:filename", :as => :original_filename
22
- element :"docs:suggestedFilename", :as => :suggested_filename
17
+ element :modified, as: :updated
18
+ elements :category, as: :categories, value: :term
19
+ elements :link, as: :links, value: :href
20
+ element :"docs:md5Checksum", as: :checksum
21
+ element :"docs:filename", as: :original_filename
22
+ element :"docs:suggestedFilename", as: :suggested_filename
23
23
 
24
24
  def url
25
25
  @url ||= links.first
@@ -1,8 +1,10 @@
1
1
  module Feedjira
2
-
3
2
  module Parser
4
3
  # iTunes is RSS 2.0 + some apple extensions
5
- # Source: http://www.apple.com/itunes/whatson/podcasts/specs.html
4
+ # Sources:
5
+ # * https://cyber.harvard.edu/rss/rss.html
6
+ # * http://lists.apple.com/archives/syndication-dev/2005/Nov/msg00002.html
7
+ # * https://help.apple.com/itc/podcasts_connect/
6
8
  class ITunesRSS
7
9
  include SAXMachine
8
10
  include FeedUtilities
@@ -12,39 +14,54 @@ module Feedjira
12
14
  # RSS 2.0 elements that need including
13
15
  element :copyright
14
16
  element :description
17
+ element :image, class: RSSImage
15
18
  element :language
19
+ element :lastBuildDate, as: :last_built
20
+ element :link, as: :url
16
21
  element :managingEditor
22
+ element :rss, as: :version, value: :version
17
23
  element :title
18
- element :link, :as => :url
24
+ element :ttl
19
25
 
20
26
  # If author is not present use managingEditor on the channel
21
- element :"itunes:author", :as => :itunes_author
22
- element :"itunes:block", :as => :itunes_block
23
- element :"itunes:image", :value => :href, :as => :itunes_image
24
- element :"itunes:explicit", :as => :itunes_explicit
25
- element :"itunes:keywords", :as => :itunes_keywords
27
+ element :"itunes:author", as: :itunes_author
28
+ element :"itunes:block", as: :itunes_block
29
+ element :"itunes:image", value: :href, as: :itunes_image
30
+ element :"itunes:explicit", as: :itunes_explicit
31
+ element :"itunes:complete", as: :itunes_complete
32
+ element :"itunes:keywords", as: :itunes_keywords
33
+
26
34
  # New URL for the podcast feed
27
- element :"itunes:new_feed_url", :as => :itunes_new_feed_url
28
- element :"itunes:subtitle", :as => :itunes_subtitle
35
+ element :"itunes:new_feed_url", as: :itunes_new_feed_url
36
+ element :"itunes:subtitle", as: :itunes_subtitle
37
+
29
38
  # If summary is not present, use the description tag
30
- element :"itunes:summary", :as => :itunes_summary
39
+ element :"itunes:summary", as: :itunes_summary
31
40
 
32
- # iTunes RSS feeds can have multiple main categories...
33
- # ...and multiple sub-categories per category
34
- # TODO subcategories not supported correctly - they are at the same level
35
- # as the main categories
36
- elements :"itunes:category", :as => :itunes_categories, :value => :text
41
+ # iTunes RSS feeds can have multiple main categories and multiple
42
+ # sub-categories per category.
43
+ elements :"itunes:category", as: :_itunes_categories,
44
+ class: ITunesRSSCategory
45
+ private :_itunes_categories
37
46
 
38
- elements :"itunes:owner", :as => :itunes_owners, :class => ITunesRSSOwner
47
+ def itunes_categories
48
+ _itunes_categories.flat_map do |itunes_category|
49
+ itunes_category.enum_for(:each_subcategory).to_a
50
+ end
51
+ end
39
52
 
40
- elements :item, :as => :entries, :class => ITunesRSSItem
53
+ def itunes_category_paths
54
+ _itunes_categories.flat_map do |itunes_category|
55
+ itunes_category.enum_for(:each_path).to_a
56
+ end
57
+ end
58
+
59
+ elements :"itunes:owner", as: :itunes_owners, class: ITunesRSSOwner
60
+ elements :item, as: :entries, class: ITunesRSSItem
41
61
 
42
62
  def self.able_to_parse?(xml)
43
- /xmlns:itunes\s?=\s?\"http:\/\/www.itunes.com\/dtds\/podcast-1.0.dtd\"/i =~ xml
63
+ %r{xmlns:itunes\s?=\s?[\"\']http://www\.itunes\.com/dtds/podcast-1\.0\.dtd[\"\']}i =~ xml # rubocop:disable Metrics/LineLength
44
64
  end
45
-
46
65
  end
47
-
48
66
  end
49
-
50
67
  end
@@ -0,0 +1,38 @@
1
+ module Feedjira
2
+ module Parser
3
+ # iTunes extensions to the standard RSS2.0 item
4
+ # Source: https://help.apple.com/itc/podcasts_connect/#/itcb54353390
5
+ class ITunesRSSCategory
6
+ include SAXMachine
7
+
8
+ attribute :text
9
+
10
+ elements :"itunes:category", as: :itunes_categories,
11
+ class: ITunesRSSCategory
12
+
13
+ def each_subcategory
14
+ return to_enum(__method__) unless block_given?
15
+
16
+ yield text
17
+
18
+ itunes_categories.each do |itunes_category|
19
+ itunes_category.each_subcategory(&proc)
20
+ end
21
+ end
22
+
23
+ def each_path(ancestors = [])
24
+ return to_enum(__method__, ancestors) unless block_given?
25
+
26
+ category_hierarchy = ancestors + [text]
27
+
28
+ if itunes_categories.empty?
29
+ yield category_hierarchy
30
+ else
31
+ itunes_categories.each do |itunes_category|
32
+ itunes_category.each_path(category_hierarchy, &proc)
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -1,5 +1,4 @@
1
1
  module Feedjira
2
-
3
2
  module Parser
4
3
  # iTunes extensions to the standard RSS2.0 item
5
4
  # Source: http://www.apple.com/itunes/whatson/podcasts/specs.html
@@ -8,29 +7,38 @@ module Feedjira
8
7
  include FeedEntryUtilities
9
8
 
10
9
  element :author
11
- element :guid, :as => :entry_id
10
+ element :guid, as: :entry_id
12
11
  element :title
13
- element :link, :as => :url
14
- element :description, :as => :summary
15
- element :"content:encoded", :as => :content
16
- element :pubDate, :as => :published
12
+ element :link, as: :url
13
+ element :description, as: :summary
14
+ element :"content:encoded", as: :content
15
+ element :pubDate, as: :published
17
16
 
18
17
  # If author is not present use author tag on the item
19
- element :"itunes:author", :as => :itunes_author
20
- element :"itunes:block", :as => :itunes_block
21
- element :"itunes:duration", :as => :itunes_duration
22
- element :"itunes:explicit", :as => :itunes_explicit
23
- element :"itunes:keywords", :as => :itunes_keywords
24
- element :"itunes:subtitle", :as => :itunes_subtitle
25
- element :"itunes:image", :value => :href, :as => :itunes_image
26
- element :"itunes:isClosedCaptioned", :as => :itunes_closed_captioned
27
- element :"itunes:order", :as => :itunes_order
18
+ element :"itunes:author", as: :itunes_author
19
+ element :"itunes:block", as: :itunes_block
20
+ element :"itunes:duration", as: :itunes_duration
21
+ element :"itunes:explicit", as: :itunes_explicit
22
+ element :"itunes:keywords", as: :itunes_keywords
23
+ element :"itunes:subtitle", as: :itunes_subtitle
24
+ element :"itunes:image", value: :href, as: :itunes_image
25
+ element :"itunes:isClosedCaptioned", as: :itunes_closed_captioned
26
+ element :"itunes:order", as: :itunes_order
27
+
28
28
  # If summary is not present, use the description tag
29
- element :"itunes:summary", :as => :itunes_summary
30
- element :enclosure, :value => :length, :as => :enclosure_length
31
- element :enclosure, :value => :type, :as => :enclosure_type
32
- element :enclosure, :value => :url, :as => :enclosure_url
29
+ element :"itunes:summary", as: :itunes_summary
30
+ element :enclosure, value: :length, as: :enclosure_length
31
+ element :enclosure, value: :type, as: :enclosure_type
32
+ element :enclosure, value: :url, as: :enclosure_url
33
+ elements 'psc:chapter', as: :raw_chapters, class: Feedjira::Parser::PodloveChapter # rubocop:disable Metrics/LineLength
34
+
35
+ # Podlove requires clients to re-order by start time in the
36
+ # event the publisher doesn't provide them in that
37
+ # order. SAXMachine doesn't have any sort capability afaik, so
38
+ # we have to sort chapters manually.
39
+ def chapters
40
+ raw_chapters.sort_by(&:start)
41
+ end
33
42
  end
34
43
  end
35
-
36
44
  end
@@ -1,12 +1,11 @@
1
1
  module Feedjira
2
-
3
2
  module Parser
4
3
  class ITunesRSSOwner
5
4
  include SAXMachine
6
5
  include FeedUtilities
7
- element :"itunes:name", :as => :name
8
- element :"itunes:email", :as => :email
6
+
7
+ element :"itunes:name", as: :name
8
+ element :"itunes:email", as: :email
9
9
  end
10
10
  end
11
-
12
11
  end
@@ -0,0 +1,20 @@
1
+ module Feedjira
2
+ module Parser
3
+ class PodloveChapter
4
+ include SAXMachine
5
+ include FeedEntryUtilities
6
+ attribute :start, as: :start_ntp
7
+ attribute :title
8
+ attribute :href, as: :url
9
+ attribute :image
10
+
11
+ def start
12
+ return unless start_ntp
13
+ parts = start_ntp.split(':')
14
+ parts.reverse.to_enum.with_index.map do |part, index|
15
+ part.to_f * (60**index)
16
+ end.reduce(:+)
17
+ end
18
+ end
19
+ end
20
+ end
@@ -1,24 +1,26 @@
1
1
  module Feedjira
2
-
3
2
  module Parser
4
3
  # Parser for dealing with RSS feeds.
4
+ # Source: https://cyber.harvard.edu/rss/rss.html
5
5
  class RSS
6
6
  include SAXMachine
7
7
  include FeedUtilities
8
+ element :description
9
+ element :image, class: RSSImage
10
+ element :language
11
+ element :lastBuildDate, as: :last_built
12
+ element :link, as: :url
8
13
  element :rss, as: :version, value: :version
9
14
  element :title
10
- element :description
11
- element :link, :as => :url
12
- elements :item, :as => :entries, :class => RSSEntry
13
- elements :"atom:link", :as => :hubs, :value => :href, :with => {:rel => "hub"}
15
+ element :ttl
16
+ elements :"atom:link", as: :hubs, value: :href, with: { rel: 'hub' }
17
+ elements :item, as: :entries, class: RSSEntry
14
18
 
15
19
  attr_accessor :feed_url
16
20
 
17
- def self.able_to_parse?(xml) #:nodoc:
21
+ def self.able_to_parse?(xml)
18
22
  (/\<rss|\<rdf/ =~ xml) && !(/feedburner/ =~ xml)
19
23
  end
20
24
  end
21
-
22
25
  end
23
-
24
26
  end
@@ -1,5 +1,4 @@
1
1
  module Feedjira
2
-
3
2
  module Parser
4
3
  # Parser for dealing with RDF feed entries.
5
4
  class RSSEntry
@@ -7,31 +6,28 @@ module Feedjira
7
6
  include FeedEntryUtilities
8
7
 
9
8
  element :title
10
- element :link, :as => :url
11
-
12
- element :"dc:creator", :as => :author
13
- element :author, :as => :author
14
- element :"content:encoded", :as => :content
15
- element :description, :as => :summary
9
+ element :link, as: :url
16
10
 
17
- element :"media:content", :as => :image, :value => :url
18
- element :enclosure, :as => :image, :value => :url
11
+ element :"dc:creator", as: :author
12
+ element :author, as: :author
13
+ element :"content:encoded", as: :content
14
+ element :description, as: :summary
19
15
 
20
- element :pubDate, :as => :published
21
- element :pubdate, :as => :published
22
- element :"dc:date", :as => :published
23
- element :"dc:Date", :as => :published
24
- element :"dcterms:created", :as => :published
16
+ element :"media:content", as: :image, value: :url
17
+ element :enclosure, as: :image, value: :url
25
18
 
19
+ element :pubDate, as: :published
20
+ element :pubdate, as: :published
21
+ element :"dc:date", as: :published
22
+ element :"dc:Date", as: :published
23
+ element :"dcterms:created", as: :published
26
24
 
27
- element :"dcterms:modified", :as => :updated
28
- element :issued, :as => :published
29
- elements :category, :as => :categories
25
+ element :"dcterms:modified", as: :updated
26
+ element :issued, as: :published
27
+ elements :category, as: :categories
30
28
 
31
- element :guid, :as => :entry_id
32
- element :"dc:identifier", :as => :entry_id
29
+ element :guid, as: :entry_id
30
+ element :"dc:identifier", as: :entry_id
33
31
  end
34
-
35
32
  end
36
-
37
33
  end
@@ -1,5 +1,4 @@
1
1
  module Feedjira
2
-
3
2
  module Parser
4
3
  # Parser for dealing with RSS feeds.
5
4
  class RSSFeedBurner
@@ -7,9 +6,10 @@ module Feedjira
7
6
  include FeedUtilities
8
7
  element :title
9
8
  element :description
10
- element :link, :as => :url
11
- elements :"atom10:link", :as => :hubs, :value => :href, :with => {:rel => "hub"}
12
- elements :item, :as => :entries, :class => RSSFeedBurnerEntry
9
+ element :link, as: :url
10
+ element :lastBuildDate, as: :last_built
11
+ elements :"atom10:link", as: :hubs, value: :href, with: { rel: 'hub' }
12
+ elements :item, as: :entries, class: RSSFeedBurnerEntry
13
13
 
14
14
  attr_accessor :feed_url
15
15
 
@@ -17,7 +17,5 @@ module Feedjira
17
17
  (/\<rss|\<rdf/ =~ xml) && (/feedburner/ =~ xml)
18
18
  end
19
19
  end
20
-
21
20
  end
22
-
23
21
  end
@@ -1,43 +1,38 @@
1
1
  module Feedjira
2
-
3
2
  module Parser
4
3
  # Parser for dealing with RDF feed entries.
5
4
  class RSSFeedBurnerEntry
6
- include SAXMachine
7
- include FeedEntryUtilities
8
-
9
- element :title
10
-
11
- element :"feedburner:origLink", :as => :url
12
- element :link, :as => :url
5
+ include SAXMachine
6
+ include FeedEntryUtilities
13
7
 
14
- element :"dc:creator", :as => :author
15
- element :author, :as => :author
16
- element :"content:encoded", :as => :content
17
- element :description, :as => :summary
8
+ element :title
18
9
 
19
- element :"media:content", :as => :image, :value => :url
20
- element :enclosure, :as => :image, :value => :url
10
+ element :"feedburner:origLink", as: :url
11
+ element :link, as: :url
21
12
 
22
- element :pubDate, :as => :published
23
- element :pubdate, :as => :published
24
- element :"dc:date", :as => :published
25
- element :"dc:Date", :as => :published
26
- element :"dcterms:created", :as => :published
13
+ element :"dc:creator", as: :author
14
+ element :author, as: :author
15
+ element :"content:encoded", as: :content
16
+ element :description, as: :summary
27
17
 
18
+ element :"media:content", as: :image, value: :url
19
+ element :enclosure, as: :image, value: :url
28
20
 
29
- element :"dcterms:modified", :as => :updated
30
- element :issued, :as => :published
31
- elements :category, :as => :categories
21
+ element :pubDate, as: :published
22
+ element :pubdate, as: :published
23
+ element :"dc:date", as: :published
24
+ element :"dc:Date", as: :published
25
+ element :"dcterms:created", as: :published
32
26
 
33
- element :guid, :as => :entry_id
27
+ element :"dcterms:modified", as: :updated
28
+ element :issued, as: :published
29
+ elements :category, as: :categories
34
30
 
35
- def url
36
- @url || @link
37
- end
31
+ element :guid, as: :entry_id
38
32
 
33
+ def url
34
+ @url || @link
35
+ end
39
36
  end
40
-
41
37
  end
42
-
43
38
  end