feedjira 2.2.0 → 3.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (91) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE/feed-parsing.md +15 -0
  3. data/.rubocop.yml +32 -8
  4. data/.rubocop_todo.yml +11 -0
  5. data/.travis.yml +3 -7
  6. data/CHANGELOG.md +18 -9
  7. data/CODE_OF_CONDUCT.md +74 -0
  8. data/Gemfile +8 -5
  9. data/README.md +46 -99
  10. data/Rakefile +8 -6
  11. data/feedjira.gemspec +31 -20
  12. data/lib/feedjira.rb +75 -41
  13. data/lib/feedjira/atom_entry_utilities.rb +51 -0
  14. data/lib/feedjira/configuration.rb +8 -10
  15. data/lib/feedjira/core_ext.rb +5 -3
  16. data/lib/feedjira/core_ext/date.rb +2 -1
  17. data/lib/feedjira/core_ext/string.rb +2 -1
  18. data/lib/feedjira/core_ext/time.rb +12 -12
  19. data/lib/feedjira/date_time_utilities.rb +8 -10
  20. data/lib/feedjira/date_time_utilities/date_time_epoch_parser.rb +3 -2
  21. data/lib/feedjira/date_time_utilities/date_time_language_parser.rb +4 -4
  22. data/lib/feedjira/date_time_utilities/date_time_pattern_parser.rb +11 -15
  23. data/lib/feedjira/feed.rb +12 -82
  24. data/lib/feedjira/feed_entry_utilities.rb +14 -7
  25. data/lib/feedjira/feed_utilities.rb +5 -4
  26. data/lib/feedjira/parser.rb +6 -1
  27. data/lib/feedjira/parser/atom.rb +6 -5
  28. data/lib/feedjira/parser/atom_entry.rb +4 -21
  29. data/lib/feedjira/parser/atom_feed_burner.rb +7 -6
  30. data/lib/feedjira/parser/atom_feed_burner_entry.rb +7 -18
  31. data/lib/feedjira/parser/atom_google_alerts.rb +26 -0
  32. data/lib/feedjira/parser/atom_google_alerts_entry.rb +21 -0
  33. data/lib/feedjira/parser/atom_youtube.rb +4 -3
  34. data/lib/feedjira/parser/atom_youtube_entry.rb +9 -8
  35. data/lib/feedjira/parser/globally_unique_identifier.rb +21 -0
  36. data/lib/feedjira/parser/google_docs_atom.rb +6 -6
  37. data/lib/feedjira/parser/google_docs_atom_entry.rb +3 -19
  38. data/lib/feedjira/parser/itunes_rss.rb +4 -3
  39. data/lib/feedjira/parser/itunes_rss_category.rb +6 -5
  40. data/lib/feedjira/parser/itunes_rss_item.rb +5 -8
  41. data/lib/feedjira/parser/itunes_rss_owner.rb +2 -1
  42. data/lib/feedjira/parser/json_feed.rb +41 -0
  43. data/lib/feedjira/parser/json_feed_item.rb +57 -0
  44. data/lib/feedjira/parser/podlove_chapter.rb +4 -3
  45. data/lib/feedjira/parser/rss.rb +5 -3
  46. data/lib/feedjira/parser/rss_entry.rb +3 -24
  47. data/lib/feedjira/parser/rss_feed_burner.rb +4 -3
  48. data/lib/feedjira/parser/rss_feed_burner_entry.rb +6 -26
  49. data/lib/feedjira/parser/rss_image.rb +2 -0
  50. data/lib/feedjira/preprocessor.rb +4 -4
  51. data/lib/feedjira/rss_entry_utilities.rb +53 -0
  52. data/lib/feedjira/version.rb +3 -1
  53. data/spec/feedjira/configuration_spec.rb +11 -16
  54. data/spec/feedjira/date_time_utilities_spec.rb +22 -20
  55. data/spec/feedjira/feed_entry_utilities_spec.rb +20 -18
  56. data/spec/feedjira/feed_spec.rb +17 -229
  57. data/spec/feedjira/feed_utilities_spec.rb +75 -73
  58. data/spec/feedjira/parser/atom_entry_spec.rb +41 -38
  59. data/spec/feedjira/parser/atom_feed_burner_entry_spec.rb +22 -20
  60. data/spec/feedjira/parser/atom_feed_burner_spec.rb +122 -118
  61. data/spec/feedjira/parser/atom_google_alerts_entry_spec.rb +34 -0
  62. data/spec/feedjira/parser/atom_google_alerts_spec.rb +62 -0
  63. data/spec/feedjira/parser/atom_spec.rb +83 -77
  64. data/spec/feedjira/parser/atom_youtube_entry_spec.rb +41 -39
  65. data/spec/feedjira/parser/atom_youtube_spec.rb +21 -19
  66. data/spec/feedjira/parser/google_docs_atom_entry_spec.rb +10 -8
  67. data/spec/feedjira/parser/google_docs_atom_spec.rb +25 -21
  68. data/spec/feedjira/parser/itunes_rss_item_spec.rb +39 -37
  69. data/spec/feedjira/parser/itunes_rss_owner_spec.rb +7 -5
  70. data/spec/feedjira/parser/itunes_rss_spec.rb +120 -116
  71. data/spec/feedjira/parser/json_feed_item_spec.rb +81 -0
  72. data/spec/feedjira/parser/json_feed_spec.rb +55 -0
  73. data/spec/feedjira/parser/podlove_chapter_spec.rb +14 -12
  74. data/spec/feedjira/parser/rss_entry_spec.rb +56 -34
  75. data/spec/feedjira/parser/rss_feed_burner_entry_spec.rb +36 -34
  76. data/spec/feedjira/parser/rss_feed_burner_spec.rb +49 -45
  77. data/spec/feedjira/parser/rss_spec.rb +38 -36
  78. data/spec/feedjira/preprocessor_spec.rb +9 -7
  79. data/spec/feedjira_spec.rb +166 -0
  80. data/spec/sample_feeds.rb +32 -29
  81. data/spec/sample_feeds/HuffPostCanada.xml +279 -0
  82. data/spec/sample_feeds/Permalinks.xml +22 -0
  83. data/spec/sample_feeds/a10.xml +72 -0
  84. data/spec/sample_feeds/google_alerts_atom.xml +1 -0
  85. data/spec/sample_feeds/json_feed.json +156 -0
  86. data/spec/spec_helper.rb +7 -5
  87. metadata +59 -70
  88. data/Dangerfile +0 -1
  89. data/fixtures/vcr_cassettes/fetch_failure.yml +0 -62
  90. data/fixtures/vcr_cassettes/parse_error.yml +0 -222
  91. data/fixtures/vcr_cassettes/success.yml +0 -281
@@ -1,4 +1,5 @@
1
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
2
3
  module Feedjira
3
4
  module Parser
4
5
  # Parser for dealing with RSS feeds.
@@ -6,8 +7,8 @@ module Feedjira
6
7
  include SAXMachine
7
8
  include FeedUtilities
8
9
  element :title
9
- element :link, as: :url, value: :href, with: { rel: 'alternate' }
10
- element :link, as: :feed_url, value: :href, with: { rel: 'self' }
10
+ element :link, as: :url, value: :href, with: { rel: "alternate" }
11
+ element :link, as: :feed_url, value: :href, with: { rel: "self" }
11
12
  element :name, as: :author
12
13
  element :"yt:channelId", as: :youtube_channel_id
13
14
 
@@ -1,19 +1,20 @@
1
- # rubocop:disable Style/Documentation
1
+ # frozen_string_literal: true
2
+
2
3
  module Feedjira
3
4
  module Parser
4
5
  class AtomYoutubeEntry
5
6
  include SAXMachine
6
7
  include FeedEntryUtilities
8
+ include AtomEntryUtilities
9
+
10
+ sax_config.top_level_elements["link"].clear
11
+ sax_config.collection_elements["link"].clear
12
+
13
+ element :link, as: :url, value: :href, with: { rel: "alternate" }
7
14
 
8
- element :title
9
- element :link, as: :url, value: :href, with: { rel: 'alternate' }
10
- element :name, as: :author
11
15
  element :"media:description", as: :content
12
- element :summary
13
- element :published
14
- element :id, as: :entry_id
15
- element :updated
16
16
  element :"yt:videoId", as: :youtube_video_id
17
+ element :"yt:channelId", as: :youtube_channel_id
17
18
  element :"media:title", as: :media_title
18
19
  element :"media:content", as: :media_url, value: :url
19
20
  element :"media:content", as: :media_type, value: :type
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Feedjira
4
+ module Parser
5
+ class GloballyUniqueIdentifier
6
+ include SAXMachine
7
+
8
+ attribute :isPermaLink, as: :is_perma_link
9
+
10
+ value :guid
11
+
12
+ def perma_link?
13
+ is_perma_link != "false"
14
+ end
15
+
16
+ def url
17
+ perma_link? ? guid : nil
18
+ end
19
+ end
20
+ end
21
+ end
@@ -1,6 +1,6 @@
1
- require File.expand_path('./atom', File.dirname(__FILE__))
2
- # rubocop:disable Style/Documentation
3
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
3
+ require File.expand_path("./atom", File.dirname(__FILE__))
4
4
  module Feedjira
5
5
  module Parser
6
6
  class GoogleDocsAtom
@@ -8,8 +8,8 @@ module Feedjira
8
8
  include FeedUtilities
9
9
  element :title
10
10
  element :subtitle, as: :description
11
- element :link, as: :url, value: :href, with: { type: 'text/html' }
12
- element :link, as: :feed_url, value: :href, with: { type: 'application/atom+xml' } # rubocop:disable Metrics/LineLength
11
+ element :link, as: :url, value: :href, with: { type: "text/html" }
12
+ element :link, as: :feed_url, value: :href, with: { type: "application/atom+xml" }
13
13
  elements :link, as: :links, value: :href
14
14
  elements :entry, as: :entries, class: GoogleDocsAtomEntry
15
15
 
@@ -18,7 +18,7 @@ module Feedjira
18
18
  end
19
19
 
20
20
  def self.able_to_parse?(xml) #:nodoc:
21
- %r{<id>https?://docs\.google\.com/.*\</id\>} =~ xml
21
+ %r{<id>https?://docs\.google\.com/.*</id>} =~ xml
22
22
  end
23
23
 
24
24
  def feed_url
@@ -1,31 +1,15 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
3
3
  module Feedjira
4
4
  module Parser
5
5
  class GoogleDocsAtomEntry
6
6
  include SAXMachine
7
7
  include FeedEntryUtilities
8
+ include AtomEntryUtilities
8
9
 
9
- element :title
10
- element :link, as: :url, value: :href, with: { type: 'text/html', rel: 'alternate' } # rubocop:disable Metrics/LineLength
11
- element :name, as: :author
12
- element :content
13
- element :summary
14
- element :published
15
- element :id, as: :entry_id
16
- element :created, as: :published
17
- element :issued, as: :published
18
- element :updated
19
- element :modified, as: :updated
20
- elements :category, as: :categories, value: :term
21
- elements :link, as: :links, value: :href
22
10
  element :"docs:md5Checksum", as: :checksum
23
11
  element :"docs:filename", as: :original_filename
24
12
  element :"docs:suggestedFilename", as: :suggested_filename
25
-
26
- def url
27
- @url ||= links.first
28
- end
29
13
  end
30
14
  end
31
15
  end
@@ -1,4 +1,5 @@
1
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
2
3
  module Feedjira
3
4
  module Parser
4
5
  # iTunes is RSS 2.0 + some apple extensions
@@ -19,7 +20,7 @@ module Feedjira
19
20
  element :language
20
21
  element :lastBuildDate, as: :last_built
21
22
  element :link, as: :url
22
- element :managingEditor
23
+ element :managingEditor, as: :managing_editor
23
24
  element :rss, as: :version, value: :version
24
25
  element :title
25
26
  element :ttl
@@ -62,7 +63,7 @@ module Feedjira
62
63
  elements :item, as: :entries, class: ITunesRSSItem
63
64
 
64
65
  def self.able_to_parse?(xml)
65
- %r{xmlns:itunes\s?=\s?[\"\']http://www\.itunes\.com/dtds/podcast-1\.0\.dtd[\"\']}i =~ xml # rubocop:disable Metrics/LineLength
66
+ %r{xmlns:itunes\s?=\s?["']http://www\.itunes\.com/dtds/podcast-1\.0\.dtd["']}i =~ xml
66
67
  end
67
68
  end
68
69
  end
@@ -1,4 +1,5 @@
1
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
2
3
  module Feedjira
3
4
  module Parser
4
5
  # iTunes extensions to the standard RSS2.0 item
@@ -11,17 +12,17 @@ module Feedjira
11
12
  elements :"itunes:category", as: :itunes_categories,
12
13
  class: ITunesRSSCategory
13
14
 
14
- def each_subcategory
15
+ def each_subcategory(&block)
15
16
  return to_enum(__method__) unless block_given?
16
17
 
17
18
  yield text
18
19
 
19
20
  itunes_categories.each do |itunes_category|
20
- itunes_category.each_subcategory(&proc)
21
+ itunes_category.each_subcategory(&block)
21
22
  end
22
23
  end
23
24
 
24
- def each_path(ancestors = [])
25
+ def each_path(ancestors = [], &block)
25
26
  return to_enum(__method__, ancestors) unless block_given?
26
27
 
27
28
  category_hierarchy = ancestors + [text]
@@ -30,7 +31,7 @@ module Feedjira
30
31
  yield category_hierarchy
31
32
  else
32
33
  itunes_categories.each do |itunes_category|
33
- itunes_category.each_path(category_hierarchy, &proc)
34
+ itunes_category.each_path(category_hierarchy, &block)
34
35
  end
35
36
  end
36
37
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Feedjira
2
4
  module Parser
3
5
  # iTunes extensions to the standard RSS2.0 item
@@ -5,14 +7,9 @@ module Feedjira
5
7
  class ITunesRSSItem
6
8
  include SAXMachine
7
9
  include FeedEntryUtilities
10
+ include RSSEntryUtilities
8
11
 
9
- element :author
10
- element :guid, as: :entry_id
11
- element :title
12
- element :link, as: :url
13
- element :description, as: :summary
14
- element :"content:encoded", as: :content
15
- element :pubDate, as: :published
12
+ sax_config.top_level_elements["enclosure"].clear
16
13
 
17
14
  # If author is not present use author tag on the item
18
15
  element :"itunes:author", as: :itunes_author
@@ -34,7 +31,7 @@ module Feedjira
34
31
  element :enclosure, value: :length, as: :enclosure_length
35
32
  element :enclosure, value: :type, as: :enclosure_type
36
33
  element :enclosure, value: :url, as: :enclosure_url
37
- elements 'psc:chapter', as: :raw_chapters, class: Feedjira::Parser::PodloveChapter # rubocop:disable Metrics/LineLength
34
+ elements "psc:chapter", as: :raw_chapters, class: Feedjira::Parser::PodloveChapter
38
35
 
39
36
  # Podlove requires clients to re-order by start time in the
40
37
  # event the publisher doesn't provide them in that
@@ -1,4 +1,5 @@
1
- # rubocop:disable Style/Documentation
1
+ # frozen_string_literal: true
2
+
2
3
  module Feedjira
3
4
  module Parser
4
5
  class ITunesRSSOwner
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Feedjira
4
+ module Parser
5
+ # Parser for dealing with JSON Feeds.
6
+ class JSONFeed
7
+ include SAXMachine
8
+ include FeedUtilities
9
+
10
+ def self.able_to_parse?(json)
11
+ %r{https://jsonfeed.org/version/} =~ json
12
+ end
13
+
14
+ def self.parse(json)
15
+ new(JSON.parse(json))
16
+ end
17
+
18
+ attr_reader :json, :version, :title, :url, :feed_url, :description,
19
+ :expired, :entries
20
+
21
+ def initialize(json)
22
+ @json = json
23
+ @version = json.fetch("version")
24
+ @title = json.fetch("title")
25
+ @url = json.fetch("home_page_url", nil)
26
+ @feed_url = json.fetch("feed_url", nil)
27
+ @description = json.fetch("description", nil)
28
+ @expired = json.fetch("expired", nil)
29
+ @entries = parse_items(json["items"])
30
+ end
31
+
32
+ private
33
+
34
+ def parse_items(items)
35
+ items.map do |item|
36
+ Feedjira::Parser::JSONFeedItem.new(item)
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Feedjira
4
+ module Parser
5
+ # Parser for dealing with JSON Feed items.
6
+ class JSONFeedItem
7
+ include FeedEntryUtilities
8
+
9
+ attr_reader :json, :entry_id, :url, :external_url, :title, :content, :summary,
10
+ :published, :updated, :image, :banner_image, :author, :categories
11
+
12
+ def initialize(json)
13
+ @json = json
14
+ @entry_id = json.fetch("id")
15
+ @url = json.fetch("url")
16
+ @external_url = json.fetch("external_url", nil)
17
+ @title = json.fetch("title", nil)
18
+ @content = parse_content(json.fetch("content_html", nil), json.fetch("content_text", nil))
19
+ @summary = json.fetch("summary", nil)
20
+ @image = json.fetch("image", nil)
21
+ @banner_image = json.fetch("banner_image", nil)
22
+ @published = parse_published(json.fetch("date_published", nil))
23
+ @updated = parse_updated(json.fetch("date_modified", nil))
24
+ @author = author_name(json.fetch("author", nil))
25
+ @categories = json.fetch("tags", [])
26
+ end
27
+
28
+ private
29
+
30
+ def parse_published(date_published)
31
+ return nil unless date_published
32
+
33
+ Time.parse_safely(date_published)
34
+ end
35
+
36
+ def parse_updated(date_modified)
37
+ return nil unless date_modified
38
+
39
+ Time.parse_safely(date_modified)
40
+ end
41
+
42
+ # Convenience method to return the included content type.
43
+ # Prefer content_html unless it isn't included.
44
+ def parse_content(content_html, content_text)
45
+ return content_html unless content_html.nil?
46
+
47
+ content_text
48
+ end
49
+
50
+ def author_name(author_obj)
51
+ return nil if author_obj.nil?
52
+
53
+ author_obj["name"]
54
+ end
55
+ end
56
+ end
57
+ end
@@ -1,5 +1,5 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
3
3
  module Feedjira
4
4
  module Parser
5
5
  class PodloveChapter
@@ -12,7 +12,8 @@ module Feedjira
12
12
 
13
13
  def start
14
14
  return unless start_ntp
15
- parts = start_ntp.split(':')
15
+
16
+ parts = start_ntp.split(":")
16
17
  parts.reverse.to_enum.with_index.map do |part, index|
17
18
  part.to_f * (60**index)
18
19
  end.reduce(:+)
@@ -1,4 +1,5 @@
1
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
2
3
  module Feedjira
3
4
  module Parser
4
5
  # Parser for dealing with RSS feeds.
@@ -11,16 +12,17 @@ module Feedjira
11
12
  element :language
12
13
  element :lastBuildDate, as: :last_built
13
14
  element :link, as: :url
15
+ element :"a10:link", as: :url, value: :href
14
16
  element :rss, as: :version, value: :version
15
17
  element :title
16
18
  element :ttl
17
- elements :"atom:link", as: :hubs, value: :href, with: { rel: 'hub' }
19
+ elements :"atom:link", as: :hubs, value: :href, with: { rel: "hub" }
18
20
  elements :item, as: :entries, class: RSSEntry
19
21
 
20
22
  attr_accessor :feed_url
21
23
 
22
24
  def self.able_to_parse?(xml)
23
- (/\<rss|\<rdf/ =~ xml) && !(/feedburner/ =~ xml)
25
+ (/<rss|<rdf/ =~ xml) && !(/feedburner/ =~ xml)
24
26
  end
25
27
  end
26
28
  end
@@ -1,33 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Feedjira
2
4
  module Parser
3
5
  # Parser for dealing with RDF feed entries.
4
6
  class RSSEntry
5
7
  include SAXMachine
6
8
  include FeedEntryUtilities
7
-
8
- element :title
9
- element :link, as: :url
10
-
11
- element :"dc:creator", as: :author
12
- element :author, as: :author
13
- element :"content:encoded", as: :content
14
- element :description, as: :summary
15
-
16
- element :"media:content", as: :image, value: :url
17
- element :enclosure, as: :image, value: :url
18
-
19
- element :pubDate, as: :published
20
- element :pubdate, as: :published
21
- element :"dc:date", as: :published
22
- element :"dc:Date", as: :published
23
- element :"dcterms:created", as: :published
24
-
25
- element :"dcterms:modified", as: :updated
26
- element :issued, as: :published
27
- elements :category, as: :categories
28
-
29
- element :guid, as: :entry_id
30
- element :"dc:identifier", as: :entry_id
9
+ include RSSEntryUtilities
31
10
  end
32
11
  end
33
12
  end
@@ -1,4 +1,5 @@
1
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
2
3
  module Feedjira
3
4
  module Parser
4
5
  # Parser for dealing with RSS feeds.
@@ -9,13 +10,13 @@ module Feedjira
9
10
  element :description
10
11
  element :link, as: :url
11
12
  element :lastBuildDate, as: :last_built
12
- elements :"atom10:link", as: :hubs, value: :href, with: { rel: 'hub' }
13
+ elements :"atom10:link", as: :hubs, value: :href, with: { rel: "hub" }
13
14
  elements :item, as: :entries, class: RSSFeedBurnerEntry
14
15
 
15
16
  attr_accessor :feed_url
16
17
 
17
18
  def self.able_to_parse?(xml) #:nodoc:
18
- (/\<rss|\<rdf/ =~ xml) && (/feedburner/ =~ xml)
19
+ (/<rss|<rdf/ =~ xml) && (/feedburner/ =~ xml)
19
20
  end
20
21
  end
21
22
  end
@@ -1,38 +1,18 @@
1
- # rubocop:disable Style/DocumentationMethod
1
+ # frozen_string_literal: true
2
+
2
3
  module Feedjira
3
4
  module Parser
4
5
  # Parser for dealing with RDF feed entries.
5
6
  class RSSFeedBurnerEntry
6
7
  include SAXMachine
7
8
  include FeedEntryUtilities
9
+ include RSSEntryUtilities
8
10
 
9
- element :title
10
-
11
- element :"feedburner:origLink", as: :url
12
- element :link, as: :url
13
-
14
- element :"dc:creator", as: :author
15
- element :author, as: :author
16
- element :"content:encoded", as: :content
17
- element :description, as: :summary
18
-
19
- element :"media:content", as: :image, value: :url
20
- element :enclosure, as: :image, value: :url
21
-
22
- element :pubDate, as: :published
23
- element :pubdate, as: :published
24
- element :"dc:date", as: :published
25
- element :"dc:Date", as: :published
26
- element :"dcterms:created", as: :published
27
-
28
- element :"dcterms:modified", as: :updated
29
- element :issued, as: :published
30
- elements :category, as: :categories
31
-
32
- element :guid, as: :entry_id
11
+ element :"feedburner:origLink", as: :orig_link
12
+ private :orig_link
33
13
 
34
14
  def url
35
- @url || @link
15
+ orig_link || super
36
16
  end
37
17
  end
38
18
  end