feedjira 2.2.0 → 3.0.0.beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. checksums.yaml +5 -5
  2. data/.rubocop.yml +635 -6
  3. data/.travis.yml +1 -1
  4. data/CHANGELOG.md +6 -12
  5. data/CODE_OF_CONDUCT.md +74 -0
  6. data/Gemfile +5 -5
  7. data/README.md +37 -99
  8. data/Rakefile +5 -5
  9. data/feedjira.gemspec +27 -19
  10. data/lib/feedjira.rb +69 -41
  11. data/lib/feedjira/configuration.rb +3 -8
  12. data/lib/feedjira/core_ext.rb +3 -3
  13. data/lib/feedjira/core_ext/date.rb +1 -1
  14. data/lib/feedjira/core_ext/time.rb +2 -2
  15. data/lib/feedjira/date_time_utilities.rb +2 -2
  16. data/lib/feedjira/date_time_utilities/date_time_pattern_parser.rb +2 -2
  17. data/lib/feedjira/feed.rb +10 -80
  18. data/lib/feedjira/feed_entry_utilities.rb +4 -4
  19. data/lib/feedjira/parser.rb +4 -1
  20. data/lib/feedjira/parser/atom.rb +3 -3
  21. data/lib/feedjira/parser/atom_entry.rb +1 -1
  22. data/lib/feedjira/parser/atom_feed_burner.rb +4 -4
  23. data/lib/feedjira/parser/atom_feed_burner_entry.rb +1 -1
  24. data/lib/feedjira/parser/atom_youtube.rb +2 -2
  25. data/lib/feedjira/parser/atom_youtube_entry.rb +1 -1
  26. data/lib/feedjira/parser/google_docs_atom.rb +3 -3
  27. data/lib/feedjira/parser/google_docs_atom_entry.rb +1 -1
  28. data/lib/feedjira/parser/itunes_rss_item.rb +1 -1
  29. data/lib/feedjira/parser/json_feed.rb +39 -0
  30. data/lib/feedjira/parser/json_feed_item.rb +51 -0
  31. data/lib/feedjira/parser/podlove_chapter.rb +1 -1
  32. data/lib/feedjira/parser/rss.rb +1 -1
  33. data/lib/feedjira/parser/rss_entry.rb +5 -1
  34. data/lib/feedjira/parser/rss_feed_burner.rb +1 -1
  35. data/lib/feedjira/preprocessor.rb +1 -1
  36. data/lib/feedjira/version.rb +1 -1
  37. data/spec/feedjira/configuration_spec.rb +9 -16
  38. data/spec/feedjira/date_time_utilities_spec.rb +20 -20
  39. data/spec/feedjira/feed_entry_utilities_spec.rb +18 -18
  40. data/spec/feedjira/feed_spec.rb +15 -229
  41. data/spec/feedjira/feed_utilities_spec.rb +72 -72
  42. data/spec/feedjira/parser/atom_entry_spec.rb +34 -34
  43. data/spec/feedjira/parser/atom_feed_burner_entry_spec.rb +16 -16
  44. data/spec/feedjira/parser/atom_feed_burner_spec.rb +121 -119
  45. data/spec/feedjira/parser/atom_spec.rb +78 -76
  46. data/spec/feedjira/parser/atom_youtube_entry_spec.rb +38 -38
  47. data/spec/feedjira/parser/atom_youtube_spec.rb +15 -15
  48. data/spec/feedjira/parser/google_docs_atom_entry_spec.rb +8 -8
  49. data/spec/feedjira/parser/google_docs_atom_spec.rb +23 -21
  50. data/spec/feedjira/parser/itunes_rss_item_spec.rb +37 -37
  51. data/spec/feedjira/parser/itunes_rss_owner_spec.rb +5 -5
  52. data/spec/feedjira/parser/itunes_rss_spec.rb +118 -116
  53. data/spec/feedjira/parser/json_feed_item_spec.rb +79 -0
  54. data/spec/feedjira/parser/json_feed_spec.rb +53 -0
  55. data/spec/feedjira/parser/podlove_chapter_spec.rb +12 -12
  56. data/spec/feedjira/parser/rss_entry_spec.rb +30 -30
  57. data/spec/feedjira/parser/rss_feed_burner_entry_spec.rb +32 -32
  58. data/spec/feedjira/parser/rss_feed_burner_spec.rb +47 -45
  59. data/spec/feedjira/parser/rss_spec.rb +36 -36
  60. data/spec/feedjira/preprocessor_spec.rb +6 -6
  61. data/spec/feedjira_spec.rb +145 -0
  62. data/spec/sample_feeds.rb +27 -26
  63. data/spec/sample_feeds/HuffPostCanada.xml +279 -0
  64. data/spec/sample_feeds/json_feed.json +156 -0
  65. data/spec/spec_helper.rb +5 -5
  66. metadata +31 -49
  67. data/fixtures/vcr_cassettes/fetch_failure.yml +0 -62
  68. data/fixtures/vcr_cassettes/parse_error.yml +0 -222
  69. data/fixtures/vcr_cassettes/success.yml +0 -281
  70. data/spec/sample_feeds/InvalidDateFormat.xml +0 -20
@@ -8,12 +8,9 @@ module Feedjira
8
8
  # end
9
9
  module Configuration
10
10
  attr_accessor(
11
- :follow_redirect_limit,
12
11
  :logger,
13
12
  :parsers,
14
- :request_timeout,
15
13
  :strip_whitespace,
16
- :user_agent
17
14
  )
18
15
 
19
16
  # Modify Feedjira's current configuration
@@ -42,12 +39,9 @@ module Feedjira
42
39
 
43
40
  # @private
44
41
  def set_default_configuration
45
- self.follow_redirect_limit = 3
46
42
  self.logger = default_logger
47
43
  self.parsers = default_parsers
48
- self.request_timeout = 30
49
44
  self.strip_whitespace = false
50
- self.user_agent = "Feedjira #{Feedjira::VERSION}"
51
45
  end
52
46
 
53
47
  private
@@ -55,7 +49,7 @@ module Feedjira
55
49
  # @private
56
50
  def default_logger
57
51
  Logger.new(STDOUT).tap do |logger|
58
- logger.progname = 'Feedjira'
52
+ logger.progname = "Feedjira"
59
53
  logger.level = Logger::WARN
60
54
  end
61
55
  end
@@ -69,7 +63,8 @@ module Feedjira
69
63
  Feedjira::Parser::AtomFeedBurner,
70
64
  Feedjira::Parser::Atom,
71
65
  Feedjira::Parser::ITunesRSS,
72
- Feedjira::Parser::RSS
66
+ Feedjira::Parser::RSS,
67
+ Feedjira::Parser::JSONFeed,
73
68
  ]
74
69
  end
75
70
  end
@@ -1,3 +1,3 @@
1
- require 'feedjira/core_ext/time'
2
- require 'feedjira/core_ext/date'
3
- require 'feedjira/core_ext/string'
1
+ require "feedjira/core_ext/time"
2
+ require "feedjira/core_ext/date"
3
+ require "feedjira/core_ext/string"
@@ -16,6 +16,6 @@ class Date
16
16
 
17
17
  def feed_utils_to_time(dest, method)
18
18
  Time.send(method, dest.year, dest.month, dest.day, dest.hour, dest.min,
19
- dest.sec, dest.zone)
19
+ dest.sec, dest.zone)
20
20
  end
21
21
  end
@@ -1,5 +1,5 @@
1
- require 'time'
2
- require 'date'
1
+ require "time"
2
+ require "date"
3
3
 
4
4
  # rubocop:disable Style/DocumentationMethod
5
5
  class Time
@@ -7,14 +7,14 @@ module Feedjira
7
7
  DateTimePatternParser,
8
8
  DateTimeLanguageParser,
9
9
  DateTimeEpochParser,
10
- DateTime
10
+ DateTime,
11
11
  ].freeze
12
12
 
13
13
  # Parse the given string starting with the most common parser (default ruby)
14
14
  # and going over all other available parsers
15
15
  # rubocop:disable Metrics/MethodLength
16
16
  def parse_datetime(string)
17
- res = DATE_PARSERS.find do |parser|
17
+ res = DATE_PARSERS.detect do |parser|
18
18
  begin
19
19
  return parser.parse(string).feed_utils_to_gm_time
20
20
  rescue StandardError => e
@@ -7,7 +7,7 @@ module Feedjira
7
7
  # Japanese Symbols are required for strange Date Strings like
8
8
  # '水, 31 8 2016 07:37:00 PDT'
9
9
  JAPANESE_SYMBOLS = %w(日 月 火 水 木 金 土).freeze
10
- PATTERNS = ['%m/%d/%Y %T %p', '%d %m %Y %T %Z'].freeze
10
+ PATTERNS = ["%m/%d/%Y %T %p", "%d %m %Y %T %Z"].freeze
11
11
 
12
12
  # rubocop:disable Metrics/MethodLength
13
13
  def self.parse(string)
@@ -26,7 +26,7 @@ module Feedjira
26
26
 
27
27
  def self.prepare(string)
28
28
  rgx = Regexp.new("^(#{JAPANESE_SYMBOLS.join('|')}),\s")
29
- string.gsub(rgx, '')
29
+ string.gsub(rgx, "")
30
30
  end
31
31
  private_class_method :prepare
32
32
  end
@@ -3,58 +3,33 @@
3
3
  module Feedjira
4
4
  class Feed
5
5
  class << self
6
- def parse_with(parser, xml, &block)
7
- parser.parse xml, &block
8
- end
9
-
10
- def parse(xml, &block)
11
- parser = determine_feed_parser_for_xml(xml)
12
- raise NoParserAvailable, 'No valid parser for XML.' unless parser
13
- parse_with parser, xml, &block
14
- end
15
-
16
- def determine_feed_parser_for_xml(xml)
17
- start_of_doc = xml.slice(0, 2000)
18
- feed_classes.detect { |klass| klass.able_to_parse?(start_of_doc) }
19
- end
20
-
21
- def add_feed_class(klass)
22
- feed_classes.unshift klass
23
- end
24
-
25
- def feed_classes
26
- @feed_classes ||= Feedjira.parsers
27
- end
28
-
29
- def reset_parsers!
30
- @feed_classes = nil
31
- end
32
-
33
6
  def add_common_feed_element(element_tag, options = {})
34
- feed_classes.each do |k|
35
- k.element element_tag, options
7
+ Feedjira.parsers.each do |k|
8
+ k.element(element_tag, options)
36
9
  end
37
10
  end
38
11
 
39
12
  def add_common_feed_elements(element_tag, options = {})
40
- feed_classes.each do |k|
41
- k.elements element_tag, options
13
+ Feedjira.parsers.each do |k|
14
+ k.elements(element_tag, options)
42
15
  end
43
16
  end
44
17
 
45
18
  def add_common_feed_entry_element(element_tag, options = {})
46
- call_on_each_feed_entry :element, element_tag, options
19
+ call_on_each_feed_entry(:element, element_tag, options)
47
20
  end
48
21
 
49
22
  def add_common_feed_entry_elements(element_tag, options = {})
50
- call_on_each_feed_entry :elements, element_tag, options
23
+ call_on_each_feed_entry(:elements, element_tag, options)
51
24
  end
52
25
 
26
+ private
27
+
53
28
  def call_on_each_feed_entry(method, *parameters)
54
- feed_classes.each do |klass|
29
+ Feedjira.parsers.each do |klass|
55
30
  klass.sax_config.collection_elements.each_value do |value|
56
31
  collection_configs = value.select do |v|
57
- v.accessor == 'entries' && v.data_class.class == Class
32
+ v.accessor == "entries" && v.data_class.class == Class
58
33
  end
59
34
 
60
35
  collection_configs.each do |config|
@@ -63,51 +38,6 @@ module Feedjira
63
38
  end
64
39
  end
65
40
  end
66
-
67
- def fetch_and_parse(url)
68
- response = connection(url).get
69
- unless response.success?
70
- raise FetchFailure, "Fetch failed - #{response.status}"
71
- end
72
- feed = parse response.body
73
- feed.feed_url = url
74
- feed.etag = response.headers['etag'].to_s.delete '"'
75
-
76
- feed.last_modified = parse_last_modified(response)
77
- feed
78
- end
79
-
80
- # rubocop:disable LineLength
81
- def connection(url)
82
- Faraday.new(url: url, headers: headers, request: request_options) do |conn|
83
- conn.use FaradayMiddleware::FollowRedirects, limit: Feedjira.follow_redirect_limit
84
- conn.adapter(*Faraday.default_adapter)
85
- end
86
- end
87
- # rubocop:enable LineLength
88
-
89
- private
90
-
91
- def headers
92
- {
93
- user_agent: Feedjira.user_agent
94
- }
95
- end
96
-
97
- def request_options
98
- {
99
- timeout: Feedjira.request_timeout
100
- }
101
- end
102
-
103
- def parse_last_modified(response)
104
- lm = response.headers['last-modified']
105
- DateTime.parse(lm).to_time
106
- rescue StandardError => e
107
- Feedjira.logger.warn { "Failed to parse last modified '#{lm}'" }
108
- Feedjira.logger.debug(e)
109
- nil
110
- end
111
41
  end
112
42
  end
113
43
  end
@@ -13,7 +13,7 @@ module Feedjira
13
13
  DateTime.parse(string).feed_utils_to_gm_time
14
14
  rescue StandardError => e
15
15
  Feedjira.logger.warn { "Failed to parse date #{string.inspect}" }
16
- Feedjira.logger.debug(e)
16
+ Feedjira.logger.warn(e)
17
17
  nil
18
18
  end
19
19
 
@@ -28,14 +28,14 @@ module Feedjira
28
28
  # Writer for published. By default, we keep the "oldest" publish time found.
29
29
  def published=(val)
30
30
  parsed = parse_datetime(val)
31
- @published = parsed if parsed && (!@published || parsed < @published)
31
+ @published = parsed if !@published || parsed < @published
32
32
  end
33
33
 
34
34
  ##
35
35
  # Writer for updated. By default, we keep the most recent update time found.
36
36
  def updated=(val)
37
37
  parsed = parse_datetime(val)
38
- @updated = parsed if parsed && (!@updated || parsed > @updated)
38
+ @updated = parsed if !@updated || parsed > @updated
39
39
  end
40
40
 
41
41
  def sanitize!
@@ -52,7 +52,7 @@ module Feedjira
52
52
  @rss_fields ||= instance_variables
53
53
 
54
54
  @rss_fields.each do |field|
55
- yield(field.to_s.sub('@', ''), instance_variable_get(field))
55
+ yield(field.to_s.sub("@", ""), instance_variable_get(field))
56
56
  end
57
57
  end
58
58
 
@@ -1 +1,4 @@
1
- module Feedjira::Parser; end # rubocop:disable Style/Documentation
1
+ module Feedjira
2
+ module Parser
3
+ end
4
+ end
@@ -8,10 +8,10 @@ module Feedjira
8
8
 
9
9
  element :title
10
10
  element :subtitle, as: :description
11
- element :link, as: :url, value: :href, with: { type: 'text/html' }
12
- element :link, as: :feed_url, value: :href, with: { rel: 'self' }
11
+ element :link, as: :url, value: :href, with: { type: "text/html" }
12
+ element :link, as: :feed_url, value: :href, with: { rel: "self" }
13
13
  elements :link, as: :links, value: :href
14
- elements :link, as: :hubs, value: :href, with: { rel: 'hub' }
14
+ elements :link, as: :hubs, value: :href, with: { rel: "hub" }
15
15
  elements :entry, as: :entries, class: AtomEntry
16
16
 
17
17
  def self.able_to_parse?(xml)
@@ -7,7 +7,7 @@ module Feedjira
7
7
  include FeedEntryUtilities
8
8
 
9
9
  element :title
10
- element :link, as: :url, value: :href, with: { type: 'text/html', rel: 'alternate' } # rubocop:disable Metrics/LineLength
10
+ element :link, as: :url, value: :href, with: { type: "text/html", rel: "alternate" } # rubocop:disable Metrics/LineLength
11
11
  element :name, as: :author
12
12
  element :content
13
13
  element :summary
@@ -9,12 +9,12 @@ module Feedjira
9
9
  element :title
10
10
  element :subtitle, as: :description
11
11
  element :link, as: :url_text_html, value: :href,
12
- with: { type: 'text/html' }
12
+ with: { type: "text/html" }
13
13
  element :link, as: :url_notype, value: :href, with: { type: nil }
14
- element :link, as: :feed_url_link, value: :href, with: { type: 'application/atom+xml' } # rubocop:disable Metrics/LineLength
14
+ element :link, as: :feed_url_link, value: :href, with: { type: "application/atom+xml" } # rubocop:disable Metrics/LineLength
15
15
  element :"atom10:link", as: :feed_url_atom10_link, value: :href,
16
- with: { type: 'application/atom+xml' }
17
- elements :"atom10:link", as: :hubs, value: :href, with: { rel: 'hub' }
16
+ with: { type: "application/atom+xml" }
17
+ elements :"atom10:link", as: :hubs, value: :href, with: { rel: "hub" }
18
18
  elements :entry, as: :entries, class: AtomFeedBurnerEntry
19
19
 
20
20
  attr_writer :url, :feed_url
@@ -8,7 +8,7 @@ module Feedjira
8
8
 
9
9
  element :title
10
10
  element :name, as: :author
11
- element :link, as: :url, value: :href, with: { type: 'text/html', rel: 'alternate' } # rubocop:disable Metrics/LineLength
11
+ element :link, as: :url, value: :href, with: { type: "text/html", rel: "alternate" } # rubocop:disable Metrics/LineLength
12
12
  element :"feedburner:origLink", as: :url
13
13
  element :summary
14
14
  element :content
@@ -6,8 +6,8 @@ module Feedjira
6
6
  include SAXMachine
7
7
  include FeedUtilities
8
8
  element :title
9
- element :link, as: :url, value: :href, with: { rel: 'alternate' }
10
- element :link, as: :feed_url, value: :href, with: { rel: 'self' }
9
+ element :link, as: :url, value: :href, with: { rel: "alternate" }
10
+ element :link, as: :feed_url, value: :href, with: { rel: "self" }
11
11
  element :name, as: :author
12
12
  element :"yt:channelId", as: :youtube_channel_id
13
13
 
@@ -6,7 +6,7 @@ module Feedjira
6
6
  include FeedEntryUtilities
7
7
 
8
8
  element :title
9
- element :link, as: :url, value: :href, with: { rel: 'alternate' }
9
+ element :link, as: :url, value: :href, with: { rel: "alternate" }
10
10
  element :name, as: :author
11
11
  element :"media:description", as: :content
12
12
  element :summary
@@ -1,4 +1,4 @@
1
- require File.expand_path('./atom', File.dirname(__FILE__))
1
+ require File.expand_path("./atom", File.dirname(__FILE__))
2
2
  # rubocop:disable Style/Documentation
3
3
  # rubocop:disable Style/DocumentationMethod
4
4
  module Feedjira
@@ -8,8 +8,8 @@ module Feedjira
8
8
  include FeedUtilities
9
9
  element :title
10
10
  element :subtitle, as: :description
11
- element :link, as: :url, value: :href, with: { type: 'text/html' }
12
- element :link, as: :feed_url, value: :href, with: { type: 'application/atom+xml' } # rubocop:disable Metrics/LineLength
11
+ element :link, as: :url, value: :href, with: { type: "text/html" }
12
+ element :link, as: :feed_url, value: :href, with: { type: "application/atom+xml" } # rubocop:disable Metrics/LineLength
13
13
  elements :link, as: :links, value: :href
14
14
  elements :entry, as: :entries, class: GoogleDocsAtomEntry
15
15
 
@@ -7,7 +7,7 @@ module Feedjira
7
7
  include FeedEntryUtilities
8
8
 
9
9
  element :title
10
- element :link, as: :url, value: :href, with: { type: 'text/html', rel: 'alternate' } # rubocop:disable Metrics/LineLength
10
+ element :link, as: :url, value: :href, with: { type: "text/html", rel: "alternate" } # rubocop:disable Metrics/LineLength
11
11
  element :name, as: :author
12
12
  element :content
13
13
  element :summary
@@ -34,7 +34,7 @@ module Feedjira
34
34
  element :enclosure, value: :length, as: :enclosure_length
35
35
  element :enclosure, value: :type, as: :enclosure_type
36
36
  element :enclosure, value: :url, as: :enclosure_url
37
- elements 'psc:chapter', as: :raw_chapters, class: Feedjira::Parser::PodloveChapter # rubocop:disable Metrics/LineLength
37
+ elements "psc:chapter", as: :raw_chapters, class: Feedjira::Parser::PodloveChapter # rubocop:disable Metrics/LineLength
38
38
 
39
39
  # Podlove requires clients to re-order by start time in the
40
40
  # event the publisher doesn't provide them in that
@@ -0,0 +1,39 @@
1
+ module Feedjira
2
+ module Parser
3
+ # Parser for dealing with JSON Feeds.
4
+ class JSONFeed
5
+ include SAXMachine
6
+ include FeedUtilities
7
+
8
+ def self.able_to_parse?(json)
9
+ %r{https:\/\/jsonfeed.org\/version\/} =~ json
10
+ end
11
+
12
+ def self.parse(json)
13
+ new(JSON.parse(json))
14
+ end
15
+
16
+ attr_reader :json, :version, :title, :url, :feed_url, :description,
17
+ :expired, :entries
18
+
19
+ def initialize(json)
20
+ @json = json
21
+ @version = json.fetch("version")
22
+ @title = json.fetch("title")
23
+ @url = json.fetch("home_page_url", nil)
24
+ @feed_url = json.fetch("feed_url", nil)
25
+ @description = json.fetch("description", nil)
26
+ @expired = json.fetch("expired", nil)
27
+ @entries = parse_items(json["items"])
28
+ end
29
+
30
+ private
31
+
32
+ def parse_items(items)
33
+ items.map do |item|
34
+ Feedjira::Parser::JSONFeedItem.new(item)
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,51 @@
1
+ module Feedjira
2
+ module Parser
3
+ # Parser for dealing with JSON Feed items.
4
+ class JSONFeedItem
5
+ include FeedEntryUtilities
6
+
7
+ attr_reader :json, :entry_id, :url, :external_url, :title, :content, :summary,
8
+ :published, :updated, :image, :banner_image, :author, :categories
9
+
10
+ def initialize(json)
11
+ @json = json
12
+ @entry_id = json.fetch("id")
13
+ @url = json.fetch("url")
14
+ @external_url = json.fetch("external_url", nil)
15
+ @title = json.fetch("title", nil)
16
+ @content = parse_content(json.fetch("content_html", nil), json.fetch("content_text", nil))
17
+ @summary = json.fetch("summary", nil)
18
+ @image = json.fetch("image", nil)
19
+ @banner_image = json.fetch("banner_image", nil)
20
+ @published = parse_published(json.fetch("date_published", nil))
21
+ @updated = parse_updated(json.fetch("date_modified", nil))
22
+ @author = author_name(json.fetch("author", nil))
23
+ @categories = json.fetch("tags", [])
24
+ end
25
+
26
+ private
27
+
28
+ def parse_published(date_published)
29
+ return nil unless date_published
30
+ Time.parse_safely(date_published)
31
+ end
32
+
33
+ def parse_updated(date_modified)
34
+ return nil unless date_modified
35
+ Time.parse_safely(date_modified)
36
+ end
37
+
38
+ # Convenience method to return the included content type.
39
+ # Prefer content_html unless it isn't included.
40
+ def parse_content(content_html, content_text)
41
+ return content_html unless content_html.nil?
42
+ content_text
43
+ end
44
+
45
+ def author_name(author_obj)
46
+ return nil if author_obj.nil?
47
+ author_obj["name"]
48
+ end
49
+ end
50
+ end
51
+ end