feedjira 2.2.0 → 3.0.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +5 -5
  2. data/.rubocop.yml +635 -6
  3. data/.travis.yml +1 -1
  4. data/CHANGELOG.md +6 -12
  5. data/CODE_OF_CONDUCT.md +74 -0
  6. data/Gemfile +5 -5
  7. data/README.md +37 -99
  8. data/Rakefile +5 -5
  9. data/feedjira.gemspec +27 -19
  10. data/lib/feedjira.rb +69 -41
  11. data/lib/feedjira/configuration.rb +3 -8
  12. data/lib/feedjira/core_ext.rb +3 -3
  13. data/lib/feedjira/core_ext/date.rb +1 -1
  14. data/lib/feedjira/core_ext/time.rb +2 -2
  15. data/lib/feedjira/date_time_utilities.rb +2 -2
  16. data/lib/feedjira/date_time_utilities/date_time_pattern_parser.rb +2 -2
  17. data/lib/feedjira/feed.rb +10 -80
  18. data/lib/feedjira/feed_entry_utilities.rb +4 -4
  19. data/lib/feedjira/parser.rb +4 -1
  20. data/lib/feedjira/parser/atom.rb +3 -3
  21. data/lib/feedjira/parser/atom_entry.rb +1 -1
  22. data/lib/feedjira/parser/atom_feed_burner.rb +4 -4
  23. data/lib/feedjira/parser/atom_feed_burner_entry.rb +1 -1
  24. data/lib/feedjira/parser/atom_youtube.rb +2 -2
  25. data/lib/feedjira/parser/atom_youtube_entry.rb +1 -1
  26. data/lib/feedjira/parser/google_docs_atom.rb +3 -3
  27. data/lib/feedjira/parser/google_docs_atom_entry.rb +1 -1
  28. data/lib/feedjira/parser/itunes_rss_item.rb +1 -1
  29. data/lib/feedjira/parser/json_feed.rb +39 -0
  30. data/lib/feedjira/parser/json_feed_item.rb +51 -0
  31. data/lib/feedjira/parser/podlove_chapter.rb +1 -1
  32. data/lib/feedjira/parser/rss.rb +1 -1
  33. data/lib/feedjira/parser/rss_entry.rb +5 -1
  34. data/lib/feedjira/parser/rss_feed_burner.rb +1 -1
  35. data/lib/feedjira/preprocessor.rb +1 -1
  36. data/lib/feedjira/version.rb +1 -1
  37. data/spec/feedjira/configuration_spec.rb +9 -16
  38. data/spec/feedjira/date_time_utilities_spec.rb +20 -20
  39. data/spec/feedjira/feed_entry_utilities_spec.rb +18 -18
  40. data/spec/feedjira/feed_spec.rb +15 -229
  41. data/spec/feedjira/feed_utilities_spec.rb +72 -72
  42. data/spec/feedjira/parser/atom_entry_spec.rb +34 -34
  43. data/spec/feedjira/parser/atom_feed_burner_entry_spec.rb +16 -16
  44. data/spec/feedjira/parser/atom_feed_burner_spec.rb +121 -119
  45. data/spec/feedjira/parser/atom_spec.rb +78 -76
  46. data/spec/feedjira/parser/atom_youtube_entry_spec.rb +38 -38
  47. data/spec/feedjira/parser/atom_youtube_spec.rb +15 -15
  48. data/spec/feedjira/parser/google_docs_atom_entry_spec.rb +8 -8
  49. data/spec/feedjira/parser/google_docs_atom_spec.rb +23 -21
  50. data/spec/feedjira/parser/itunes_rss_item_spec.rb +37 -37
  51. data/spec/feedjira/parser/itunes_rss_owner_spec.rb +5 -5
  52. data/spec/feedjira/parser/itunes_rss_spec.rb +118 -116
  53. data/spec/feedjira/parser/json_feed_item_spec.rb +79 -0
  54. data/spec/feedjira/parser/json_feed_spec.rb +53 -0
  55. data/spec/feedjira/parser/podlove_chapter_spec.rb +12 -12
  56. data/spec/feedjira/parser/rss_entry_spec.rb +30 -30
  57. data/spec/feedjira/parser/rss_feed_burner_entry_spec.rb +32 -32
  58. data/spec/feedjira/parser/rss_feed_burner_spec.rb +47 -45
  59. data/spec/feedjira/parser/rss_spec.rb +36 -36
  60. data/spec/feedjira/preprocessor_spec.rb +6 -6
  61. data/spec/feedjira_spec.rb +145 -0
  62. data/spec/sample_feeds.rb +27 -26
  63. data/spec/sample_feeds/HuffPostCanada.xml +279 -0
  64. data/spec/sample_feeds/json_feed.json +156 -0
  65. data/spec/spec_helper.rb +5 -5
  66. metadata +31 -49
  67. data/fixtures/vcr_cassettes/fetch_failure.yml +0 -62
  68. data/fixtures/vcr_cassettes/parse_error.yml +0 -222
  69. data/fixtures/vcr_cassettes/success.yml +0 -281
  70. data/spec/sample_feeds/InvalidDateFormat.xml +0 -20
@@ -8,12 +8,9 @@ module Feedjira
8
8
  # end
9
9
  module Configuration
10
10
  attr_accessor(
11
- :follow_redirect_limit,
12
11
  :logger,
13
12
  :parsers,
14
- :request_timeout,
15
13
  :strip_whitespace,
16
- :user_agent
17
14
  )
18
15
 
19
16
  # Modify Feedjira's current configuration
@@ -42,12 +39,9 @@ module Feedjira
42
39
 
43
40
  # @private
44
41
  def set_default_configuration
45
- self.follow_redirect_limit = 3
46
42
  self.logger = default_logger
47
43
  self.parsers = default_parsers
48
- self.request_timeout = 30
49
44
  self.strip_whitespace = false
50
- self.user_agent = "Feedjira #{Feedjira::VERSION}"
51
45
  end
52
46
 
53
47
  private
@@ -55,7 +49,7 @@ module Feedjira
55
49
  # @private
56
50
  def default_logger
57
51
  Logger.new(STDOUT).tap do |logger|
58
- logger.progname = 'Feedjira'
52
+ logger.progname = "Feedjira"
59
53
  logger.level = Logger::WARN
60
54
  end
61
55
  end
@@ -69,7 +63,8 @@ module Feedjira
69
63
  Feedjira::Parser::AtomFeedBurner,
70
64
  Feedjira::Parser::Atom,
71
65
  Feedjira::Parser::ITunesRSS,
72
- Feedjira::Parser::RSS
66
+ Feedjira::Parser::RSS,
67
+ Feedjira::Parser::JSONFeed,
73
68
  ]
74
69
  end
75
70
  end
@@ -1,3 +1,3 @@
1
- require 'feedjira/core_ext/time'
2
- require 'feedjira/core_ext/date'
3
- require 'feedjira/core_ext/string'
1
+ require "feedjira/core_ext/time"
2
+ require "feedjira/core_ext/date"
3
+ require "feedjira/core_ext/string"
@@ -16,6 +16,6 @@ class Date
16
16
 
17
17
  def feed_utils_to_time(dest, method)
18
18
  Time.send(method, dest.year, dest.month, dest.day, dest.hour, dest.min,
19
- dest.sec, dest.zone)
19
+ dest.sec, dest.zone)
20
20
  end
21
21
  end
@@ -1,5 +1,5 @@
1
- require 'time'
2
- require 'date'
1
+ require "time"
2
+ require "date"
3
3
 
4
4
  # rubocop:disable Style/DocumentationMethod
5
5
  class Time
@@ -7,14 +7,14 @@ module Feedjira
7
7
  DateTimePatternParser,
8
8
  DateTimeLanguageParser,
9
9
  DateTimeEpochParser,
10
- DateTime
10
+ DateTime,
11
11
  ].freeze
12
12
 
13
13
  # Parse the given string starting with the most common parser (default ruby)
14
14
  # and going over all other available parsers
15
15
  # rubocop:disable Metrics/MethodLength
16
16
  def parse_datetime(string)
17
- res = DATE_PARSERS.find do |parser|
17
+ res = DATE_PARSERS.detect do |parser|
18
18
  begin
19
19
  return parser.parse(string).feed_utils_to_gm_time
20
20
  rescue StandardError => e
@@ -7,7 +7,7 @@ module Feedjira
7
7
  # Japanese Symbols are required for strange Date Strings like
8
8
  # '水, 31 8 2016 07:37:00 PDT'
9
9
  JAPANESE_SYMBOLS = %w(日 月 火 水 木 金 土).freeze
10
- PATTERNS = ['%m/%d/%Y %T %p', '%d %m %Y %T %Z'].freeze
10
+ PATTERNS = ["%m/%d/%Y %T %p", "%d %m %Y %T %Z"].freeze
11
11
 
12
12
  # rubocop:disable Metrics/MethodLength
13
13
  def self.parse(string)
@@ -26,7 +26,7 @@ module Feedjira
26
26
 
27
27
  def self.prepare(string)
28
28
  rgx = Regexp.new("^(#{JAPANESE_SYMBOLS.join('|')}),\s")
29
- string.gsub(rgx, '')
29
+ string.gsub(rgx, "")
30
30
  end
31
31
  private_class_method :prepare
32
32
  end
@@ -3,58 +3,33 @@
3
3
  module Feedjira
4
4
  class Feed
5
5
  class << self
6
- def parse_with(parser, xml, &block)
7
- parser.parse xml, &block
8
- end
9
-
10
- def parse(xml, &block)
11
- parser = determine_feed_parser_for_xml(xml)
12
- raise NoParserAvailable, 'No valid parser for XML.' unless parser
13
- parse_with parser, xml, &block
14
- end
15
-
16
- def determine_feed_parser_for_xml(xml)
17
- start_of_doc = xml.slice(0, 2000)
18
- feed_classes.detect { |klass| klass.able_to_parse?(start_of_doc) }
19
- end
20
-
21
- def add_feed_class(klass)
22
- feed_classes.unshift klass
23
- end
24
-
25
- def feed_classes
26
- @feed_classes ||= Feedjira.parsers
27
- end
28
-
29
- def reset_parsers!
30
- @feed_classes = nil
31
- end
32
-
33
6
  def add_common_feed_element(element_tag, options = {})
34
- feed_classes.each do |k|
35
- k.element element_tag, options
7
+ Feedjira.parsers.each do |k|
8
+ k.element(element_tag, options)
36
9
  end
37
10
  end
38
11
 
39
12
  def add_common_feed_elements(element_tag, options = {})
40
- feed_classes.each do |k|
41
- k.elements element_tag, options
13
+ Feedjira.parsers.each do |k|
14
+ k.elements(element_tag, options)
42
15
  end
43
16
  end
44
17
 
45
18
  def add_common_feed_entry_element(element_tag, options = {})
46
- call_on_each_feed_entry :element, element_tag, options
19
+ call_on_each_feed_entry(:element, element_tag, options)
47
20
  end
48
21
 
49
22
  def add_common_feed_entry_elements(element_tag, options = {})
50
- call_on_each_feed_entry :elements, element_tag, options
23
+ call_on_each_feed_entry(:elements, element_tag, options)
51
24
  end
52
25
 
26
+ private
27
+
53
28
  def call_on_each_feed_entry(method, *parameters)
54
- feed_classes.each do |klass|
29
+ Feedjira.parsers.each do |klass|
55
30
  klass.sax_config.collection_elements.each_value do |value|
56
31
  collection_configs = value.select do |v|
57
- v.accessor == 'entries' && v.data_class.class == Class
32
+ v.accessor == "entries" && v.data_class.class == Class
58
33
  end
59
34
 
60
35
  collection_configs.each do |config|
@@ -63,51 +38,6 @@ module Feedjira
63
38
  end
64
39
  end
65
40
  end
66
-
67
- def fetch_and_parse(url)
68
- response = connection(url).get
69
- unless response.success?
70
- raise FetchFailure, "Fetch failed - #{response.status}"
71
- end
72
- feed = parse response.body
73
- feed.feed_url = url
74
- feed.etag = response.headers['etag'].to_s.delete '"'
75
-
76
- feed.last_modified = parse_last_modified(response)
77
- feed
78
- end
79
-
80
- # rubocop:disable LineLength
81
- def connection(url)
82
- Faraday.new(url: url, headers: headers, request: request_options) do |conn|
83
- conn.use FaradayMiddleware::FollowRedirects, limit: Feedjira.follow_redirect_limit
84
- conn.adapter(*Faraday.default_adapter)
85
- end
86
- end
87
- # rubocop:enable LineLength
88
-
89
- private
90
-
91
- def headers
92
- {
93
- user_agent: Feedjira.user_agent
94
- }
95
- end
96
-
97
- def request_options
98
- {
99
- timeout: Feedjira.request_timeout
100
- }
101
- end
102
-
103
- def parse_last_modified(response)
104
- lm = response.headers['last-modified']
105
- DateTime.parse(lm).to_time
106
- rescue StandardError => e
107
- Feedjira.logger.warn { "Failed to parse last modified '#{lm}'" }
108
- Feedjira.logger.debug(e)
109
- nil
110
- end
111
41
  end
112
42
  end
113
43
  end
@@ -13,7 +13,7 @@ module Feedjira
13
13
  DateTime.parse(string).feed_utils_to_gm_time
14
14
  rescue StandardError => e
15
15
  Feedjira.logger.warn { "Failed to parse date #{string.inspect}" }
16
- Feedjira.logger.debug(e)
16
+ Feedjira.logger.warn(e)
17
17
  nil
18
18
  end
19
19
 
@@ -28,14 +28,14 @@ module Feedjira
28
28
  # Writer for published. By default, we keep the "oldest" publish time found.
29
29
  def published=(val)
30
30
  parsed = parse_datetime(val)
31
- @published = parsed if parsed && (!@published || parsed < @published)
31
+ @published = parsed if !@published || parsed < @published
32
32
  end
33
33
 
34
34
  ##
35
35
  # Writer for updated. By default, we keep the most recent update time found.
36
36
  def updated=(val)
37
37
  parsed = parse_datetime(val)
38
- @updated = parsed if parsed && (!@updated || parsed > @updated)
38
+ @updated = parsed if !@updated || parsed > @updated
39
39
  end
40
40
 
41
41
  def sanitize!
@@ -52,7 +52,7 @@ module Feedjira
52
52
  @rss_fields ||= instance_variables
53
53
 
54
54
  @rss_fields.each do |field|
55
- yield(field.to_s.sub('@', ''), instance_variable_get(field))
55
+ yield(field.to_s.sub("@", ""), instance_variable_get(field))
56
56
  end
57
57
  end
58
58
 
@@ -1 +1,4 @@
1
- module Feedjira::Parser; end # rubocop:disable Style/Documentation
1
+ module Feedjira
2
+ module Parser
3
+ end
4
+ end
@@ -8,10 +8,10 @@ module Feedjira
8
8
 
9
9
  element :title
10
10
  element :subtitle, as: :description
11
- element :link, as: :url, value: :href, with: { type: 'text/html' }
12
- element :link, as: :feed_url, value: :href, with: { rel: 'self' }
11
+ element :link, as: :url, value: :href, with: { type: "text/html" }
12
+ element :link, as: :feed_url, value: :href, with: { rel: "self" }
13
13
  elements :link, as: :links, value: :href
14
- elements :link, as: :hubs, value: :href, with: { rel: 'hub' }
14
+ elements :link, as: :hubs, value: :href, with: { rel: "hub" }
15
15
  elements :entry, as: :entries, class: AtomEntry
16
16
 
17
17
  def self.able_to_parse?(xml)
@@ -7,7 +7,7 @@ module Feedjira
7
7
  include FeedEntryUtilities
8
8
 
9
9
  element :title
10
- element :link, as: :url, value: :href, with: { type: 'text/html', rel: 'alternate' } # rubocop:disable Metrics/LineLength
10
+ element :link, as: :url, value: :href, with: { type: "text/html", rel: "alternate" } # rubocop:disable Metrics/LineLength
11
11
  element :name, as: :author
12
12
  element :content
13
13
  element :summary
@@ -9,12 +9,12 @@ module Feedjira
9
9
  element :title
10
10
  element :subtitle, as: :description
11
11
  element :link, as: :url_text_html, value: :href,
12
- with: { type: 'text/html' }
12
+ with: { type: "text/html" }
13
13
  element :link, as: :url_notype, value: :href, with: { type: nil }
14
- element :link, as: :feed_url_link, value: :href, with: { type: 'application/atom+xml' } # rubocop:disable Metrics/LineLength
14
+ element :link, as: :feed_url_link, value: :href, with: { type: "application/atom+xml" } # rubocop:disable Metrics/LineLength
15
15
  element :"atom10:link", as: :feed_url_atom10_link, value: :href,
16
- with: { type: 'application/atom+xml' }
17
- elements :"atom10:link", as: :hubs, value: :href, with: { rel: 'hub' }
16
+ with: { type: "application/atom+xml" }
17
+ elements :"atom10:link", as: :hubs, value: :href, with: { rel: "hub" }
18
18
  elements :entry, as: :entries, class: AtomFeedBurnerEntry
19
19
 
20
20
  attr_writer :url, :feed_url
@@ -8,7 +8,7 @@ module Feedjira
8
8
 
9
9
  element :title
10
10
  element :name, as: :author
11
- element :link, as: :url, value: :href, with: { type: 'text/html', rel: 'alternate' } # rubocop:disable Metrics/LineLength
11
+ element :link, as: :url, value: :href, with: { type: "text/html", rel: "alternate" } # rubocop:disable Metrics/LineLength
12
12
  element :"feedburner:origLink", as: :url
13
13
  element :summary
14
14
  element :content
@@ -6,8 +6,8 @@ module Feedjira
6
6
  include SAXMachine
7
7
  include FeedUtilities
8
8
  element :title
9
- element :link, as: :url, value: :href, with: { rel: 'alternate' }
10
- element :link, as: :feed_url, value: :href, with: { rel: 'self' }
9
+ element :link, as: :url, value: :href, with: { rel: "alternate" }
10
+ element :link, as: :feed_url, value: :href, with: { rel: "self" }
11
11
  element :name, as: :author
12
12
  element :"yt:channelId", as: :youtube_channel_id
13
13
 
@@ -6,7 +6,7 @@ module Feedjira
6
6
  include FeedEntryUtilities
7
7
 
8
8
  element :title
9
- element :link, as: :url, value: :href, with: { rel: 'alternate' }
9
+ element :link, as: :url, value: :href, with: { rel: "alternate" }
10
10
  element :name, as: :author
11
11
  element :"media:description", as: :content
12
12
  element :summary
@@ -1,4 +1,4 @@
1
- require File.expand_path('./atom', File.dirname(__FILE__))
1
+ require File.expand_path("./atom", File.dirname(__FILE__))
2
2
  # rubocop:disable Style/Documentation
3
3
  # rubocop:disable Style/DocumentationMethod
4
4
  module Feedjira
@@ -8,8 +8,8 @@ module Feedjira
8
8
  include FeedUtilities
9
9
  element :title
10
10
  element :subtitle, as: :description
11
- element :link, as: :url, value: :href, with: { type: 'text/html' }
12
- element :link, as: :feed_url, value: :href, with: { type: 'application/atom+xml' } # rubocop:disable Metrics/LineLength
11
+ element :link, as: :url, value: :href, with: { type: "text/html" }
12
+ element :link, as: :feed_url, value: :href, with: { type: "application/atom+xml" } # rubocop:disable Metrics/LineLength
13
13
  elements :link, as: :links, value: :href
14
14
  elements :entry, as: :entries, class: GoogleDocsAtomEntry
15
15
 
@@ -7,7 +7,7 @@ module Feedjira
7
7
  include FeedEntryUtilities
8
8
 
9
9
  element :title
10
- element :link, as: :url, value: :href, with: { type: 'text/html', rel: 'alternate' } # rubocop:disable Metrics/LineLength
10
+ element :link, as: :url, value: :href, with: { type: "text/html", rel: "alternate" } # rubocop:disable Metrics/LineLength
11
11
  element :name, as: :author
12
12
  element :content
13
13
  element :summary
@@ -34,7 +34,7 @@ module Feedjira
34
34
  element :enclosure, value: :length, as: :enclosure_length
35
35
  element :enclosure, value: :type, as: :enclosure_type
36
36
  element :enclosure, value: :url, as: :enclosure_url
37
- elements 'psc:chapter', as: :raw_chapters, class: Feedjira::Parser::PodloveChapter # rubocop:disable Metrics/LineLength
37
+ elements "psc:chapter", as: :raw_chapters, class: Feedjira::Parser::PodloveChapter # rubocop:disable Metrics/LineLength
38
38
 
39
39
  # Podlove requires clients to re-order by start time in the
40
40
  # event the publisher doesn't provide them in that
@@ -0,0 +1,39 @@
1
+ module Feedjira
2
+ module Parser
3
+ # Parser for dealing with JSON Feeds.
4
+ class JSONFeed
5
+ include SAXMachine
6
+ include FeedUtilities
7
+
8
+ def self.able_to_parse?(json)
9
+ %r{https:\/\/jsonfeed.org\/version\/} =~ json
10
+ end
11
+
12
+ def self.parse(json)
13
+ new(JSON.parse(json))
14
+ end
15
+
16
+ attr_reader :json, :version, :title, :url, :feed_url, :description,
17
+ :expired, :entries
18
+
19
+ def initialize(json)
20
+ @json = json
21
+ @version = json.fetch("version")
22
+ @title = json.fetch("title")
23
+ @url = json.fetch("home_page_url", nil)
24
+ @feed_url = json.fetch("feed_url", nil)
25
+ @description = json.fetch("description", nil)
26
+ @expired = json.fetch("expired", nil)
27
+ @entries = parse_items(json["items"])
28
+ end
29
+
30
+ private
31
+
32
+ def parse_items(items)
33
+ items.map do |item|
34
+ Feedjira::Parser::JSONFeedItem.new(item)
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,51 @@
1
+ module Feedjira
2
+ module Parser
3
+ # Parser for dealing with JSON Feed items.
4
+ class JSONFeedItem
5
+ include FeedEntryUtilities
6
+
7
+ attr_reader :json, :entry_id, :url, :external_url, :title, :content, :summary,
8
+ :published, :updated, :image, :banner_image, :author, :categories
9
+
10
+ def initialize(json)
11
+ @json = json
12
+ @entry_id = json.fetch("id")
13
+ @url = json.fetch("url")
14
+ @external_url = json.fetch("external_url", nil)
15
+ @title = json.fetch("title", nil)
16
+ @content = parse_content(json.fetch("content_html", nil), json.fetch("content_text", nil))
17
+ @summary = json.fetch("summary", nil)
18
+ @image = json.fetch("image", nil)
19
+ @banner_image = json.fetch("banner_image", nil)
20
+ @published = parse_published(json.fetch("date_published", nil))
21
+ @updated = parse_updated(json.fetch("date_modified", nil))
22
+ @author = author_name(json.fetch("author", nil))
23
+ @categories = json.fetch("tags", [])
24
+ end
25
+
26
+ private
27
+
28
+ def parse_published(date_published)
29
+ return nil unless date_published
30
+ Time.parse_safely(date_published)
31
+ end
32
+
33
+ def parse_updated(date_modified)
34
+ return nil unless date_modified
35
+ Time.parse_safely(date_modified)
36
+ end
37
+
38
+ # Convenience method to return the included content type.
39
+ # Prefer content_html unless it isn't included.
40
+ def parse_content(content_html, content_text)
41
+ return content_html unless content_html.nil?
42
+ content_text
43
+ end
44
+
45
+ def author_name(author_obj)
46
+ return nil if author_obj.nil?
47
+ author_obj["name"]
48
+ end
49
+ end
50
+ end
51
+ end