feedjira 3.0.0.beta1 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +5 -5
  2. data/.rubocop.yml +5 -2
  3. data/.rubocop_todo.yml +36 -0
  4. data/.travis.yml +0 -3
  5. data/Gemfile +1 -0
  6. data/README.md +3 -0
  7. data/feedjira.gemspec +9 -3
  8. data/lib/feedjira.rb +2 -0
  9. data/lib/feedjira/atom_entry_utilities.rb +35 -0
  10. data/lib/feedjira/core_ext/date.rb +0 -1
  11. data/lib/feedjira/core_ext/string.rb +0 -1
  12. data/lib/feedjira/core_ext/time.rb +8 -10
  13. data/lib/feedjira/date_time_utilities.rb +0 -2
  14. data/lib/feedjira/date_time_utilities/date_time_epoch_parser.rb +0 -2
  15. data/lib/feedjira/date_time_utilities/date_time_language_parser.rb +0 -2
  16. data/lib/feedjira/date_time_utilities/date_time_pattern_parser.rb +0 -4
  17. data/lib/feedjira/feed.rb +0 -2
  18. data/lib/feedjira/feed_entry_utilities.rb +11 -6
  19. data/lib/feedjira/feed_utilities.rb +0 -2
  20. data/lib/feedjira/parser/atom.rb +0 -1
  21. data/lib/feedjira/parser/atom_entry.rb +2 -21
  22. data/lib/feedjira/parser/atom_feed_burner.rb +1 -2
  23. data/lib/feedjira/parser/atom_feed_burner_entry.rb +7 -18
  24. data/lib/feedjira/parser/atom_youtube.rb +0 -1
  25. data/lib/feedjira/parser/atom_youtube_entry.rb +6 -7
  26. data/lib/feedjira/parser/google_docs_atom.rb +0 -2
  27. data/lib/feedjira/parser/google_docs_atom_entry.rb +1 -19
  28. data/lib/feedjira/parser/itunes_rss.rb +0 -1
  29. data/lib/feedjira/parser/itunes_rss_category.rb +0 -1
  30. data/lib/feedjira/parser/itunes_rss_item.rb +2 -7
  31. data/lib/feedjira/parser/itunes_rss_owner.rb +0 -1
  32. data/lib/feedjira/parser/podlove_chapter.rb +0 -2
  33. data/lib/feedjira/parser/rss.rb +0 -1
  34. data/lib/feedjira/parser/rss_entry.rb +1 -28
  35. data/lib/feedjira/parser/rss_feed_burner.rb +0 -1
  36. data/lib/feedjira/parser/rss_feed_burner_entry.rb +6 -26
  37. data/lib/feedjira/preprocessor.rb +0 -2
  38. data/lib/feedjira/rss_entry_utilities.rb +45 -0
  39. data/lib/feedjira/version.rb +1 -1
  40. data/spec/feedjira/feed_spec.rb +1 -1
  41. data/spec/feedjira/parser/atom_youtube_entry_spec.rb +2 -2
  42. data/spec/feedjira_spec.rb +11 -1
  43. data/spec/sample_feeds.rb +1 -4
  44. data/spec/sample_feeds/InvalidDateFormat.xml +20 -0
  45. metadata +17 -10
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: b128986f5aba338178d635d04c7c76a6bf4786c9
4
- data.tar.gz: d812e34dcf097bd6cc3d5d6b299337898eb119ec
2
+ SHA256:
3
+ metadata.gz: 49efb7655e500df91c7119e3afbb2b91fab6d0a8282703fecb69f24d15f54357
4
+ data.tar.gz: ea6fc3b58be4968be8c8561b6e8c2720b5732882ff2556fe6966583df9bcf130
5
5
  SHA512:
6
- metadata.gz: e199341333e63c4b11fd65532c41d8efd9d7236bd264e21edd9795f1fcbe8b79e35a122534f1b0ee61a0e76fade5364e70a032e142da63589e3158c4fa78440a
7
- data.tar.gz: bcddaf13bdc6214acb552dadfe23f481a2e0d6e7da99b3e182e9bb067f1e0f70e93f80d62b75e02f52304c5cf4b47252c6a9ee55f6c1c697e9c9800acf230e07
6
+ metadata.gz: dd52aec9d212c0428095c3adee3cbfd1115d53a54ac2281bf7ce7f4142ace6659e658658097e43edb3c90deec583941ed220aea1fe1709f66110adb79c471620
7
+ data.tar.gz: d39bfe14c74a642311d55369498195e588beec99e5bfb897b0f00c5019f1985f89c1280b8c8eaafb3d36f55b9558b04c9037ccc8710c34c4bad2cb6127702d61
@@ -1,4 +1,7 @@
1
+ inherit_from: .rubocop_todo.yml
2
+
1
3
  AllCops:
4
+ TargetRubyVersion: 2.2
2
5
  Exclude:
3
6
  - db/schema.rb
4
7
  - vendor/**/*
@@ -360,7 +363,7 @@ Style/TrailingCommaInArguments:
360
363
  - no_comma
361
364
  Enabled: true
362
365
 
363
- Style/TrailingCommaInLiteral:
366
+ Style/TrailingCommaInArrayLiteral:
364
367
  Description: 'Checks for trailing comma in array and hash literals.'
365
368
  StyleGuide: 'https://github.com/bbatsov/ruby-style-guide#no-trailing-array-commas'
366
369
  EnforcedStyleForMultiline: comma
@@ -527,7 +530,7 @@ Lint/UnderscorePrefixedVariableName:
527
530
  Description: 'Do not use prefix `_` for a variable that is used.'
528
531
  Enabled: false
529
532
 
530
- Lint/UnneededDisable:
533
+ Lint/UnneededCopDisableDirective:
531
534
  Description: >-
532
535
  Checks for rubocop:disable comments that can be removed.
533
536
  Note: this cop is not disabled when disabling all cops.
@@ -0,0 +1,36 @@
1
+ # This configuration was generated by
2
+ # `rubocop --auto-gen-config`
3
+ # on 2018-10-08 04:14:19 +0900 using RuboCop version 0.59.2.
4
+ # The point is for the user to remove these configuration records
5
+ # one by one as the offenses are removed from the code base.
6
+ # Note that changes in the inspected code, or installation of new
7
+ # versions of RuboCop, may require this file to be generated again.
8
+
9
+ # Offense count: 7
10
+ # Cop supports --auto-correct.
11
+ Layout/EmptyLineAfterGuardClause:
12
+ Exclude:
13
+ - 'lib/feedjira/date_time_utilities/date_time_epoch_parser.rb'
14
+ - 'lib/feedjira/feed_utilities.rb'
15
+ - 'lib/feedjira/parser/json_feed_item.rb'
16
+ - 'lib/feedjira/parser/podlove_chapter.rb'
17
+
18
+ # Offense count: 1
19
+ # Cop supports --auto-correct.
20
+ Performance/UnneededSort:
21
+ Exclude:
22
+ - 'lib/feedjira/feed_utilities.rb'
23
+
24
+ # Offense count: 3
25
+ # Configuration parameters: EnforcedStyle.
26
+ # SupportedStyles: inline, group
27
+ Style/AccessModifierDeclarations:
28
+ Exclude:
29
+ - 'lib/feedjira.rb'
30
+ - 'lib/feedjira/parser/itunes_rss.rb'
31
+
32
+ # Offense count: 1
33
+ # Cop supports --auto-correct.
34
+ Style/UnneededCondition:
35
+ Exclude:
36
+ - 'lib/feedjira/feed_utilities.rb'
@@ -32,6 +32,3 @@ script:
32
32
 
33
33
  notifications:
34
34
  email: false
35
- webhooks:
36
- urls:
37
- secure: XjoUz2rPXFHnitw//jN4qA92jq7bH19iOI/5KnuptLzz5HrWq1VAXxAr/Fh0KxYZT29G/9i5szaHX1QacfO7he4xa2tZKudRL70Dw3KRLgqLi70G6kFuZYlh+MgMHZy6KwZ/4/250wO31fpv24PCb2M56iTsev2g2uporeobO0Q=
data/Gemfile CHANGED
@@ -3,6 +3,7 @@ source "https://rubygems.org/"
3
3
  gemspec
4
4
 
5
5
  gem "pry"
6
+ gem "rubocop", "~> 0.59.2"
6
7
 
7
8
  group :test do
8
9
  gem "oga"
data/README.md CHANGED
@@ -143,6 +143,8 @@ add-ons and everything in between. Here are some of them:
143
143
 
144
144
  * [Solve for All][solve]: Solve for All combines search engine and feed parsing
145
145
  while protecting your privacy. It's even extendable by the community!
146
+
147
+ * [Feedi API][feedi]: Feedi simplifies how you handle RSS, Atom, or JSON feeds. You can add and keep track of your favourite feed data with a simple and clean REST API. All entries are enriched by Machine Learning and Semantic engines.
146
148
 
147
149
  [Feedbin]: https://feedbin.com/
148
150
  [Stringer]: https://github.com/swanson/stringer
@@ -150,6 +152,7 @@ add-ons and everything in between. Here are some of them:
150
152
  [Feedbunch]: https://github.com/amatriain/feedbunch
151
153
  [old]: http://theoldreader.com/
152
154
  [solve]: https://solveforall.com/
155
+ [feedi]: https://github.com/davidesantangelo/feedi
153
156
 
154
157
  Note: to get your project on this list, simply [send an email](mailto:feedjira@gmail.com)
155
158
  with your project's details.
@@ -1,6 +1,6 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
- require File.expand_path("../lib/feedjira/version", __FILE__)
3
+ require File.expand_path("lib/feedjira/version", __dir__)
4
4
 
5
5
  # rubocop:disable Metrics/BlockLength
6
6
  Gem::Specification.new do |s|
@@ -20,13 +20,19 @@ Gem::Specification.new do |s|
20
20
  s.summary = "A feed parsing library"
21
21
  s.version = Feedjira::VERSION
22
22
 
23
+ s.metadata = {
24
+ "homepage_uri" => "http://feedjira.com",
25
+ "source_code_uri" => "https://github.com/feedjira/feedjira",
26
+ "changelog_uri" => "https://github.com/feedjira/feedjira/blob/master/CHANGELOG.md"
27
+ }
28
+
23
29
  s.files = `git ls-files`.split("\n")
24
30
  s.require_paths = ["lib"]
25
31
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
26
32
 
27
- s.required_ruby_version = ">=1.9.3"
33
+ s.required_ruby_version = ">=2.2"
28
34
 
29
- s.add_dependency "loofah", ">= 2.0"
35
+ s.add_dependency "loofah", ">= 2.2.1"
30
36
  s.add_dependency "sax-machine", ">= 1.0"
31
37
 
32
38
  s.add_development_dependency "danger"
@@ -14,6 +14,8 @@ require "feedjira/date_time_utilities"
14
14
  require "feedjira/feed_entry_utilities"
15
15
  require "feedjira/feed_utilities"
16
16
  require "feedjira/feed"
17
+ require "feedjira/rss_entry_utilities"
18
+ require "feedjira/atom_entry_utilities"
17
19
  require "feedjira/parser"
18
20
  require "feedjira/parser/rss_entry"
19
21
  require "feedjira/parser/rss_image"
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Feedjira
4
+ module AtomEntryUtilities
5
+ def self.included(mod)
6
+ mod.class_exec do
7
+ element :title
8
+ element :name, as: :author
9
+ element :content
10
+ element :summary
11
+ element :enclosure, as: :image, value: :href
12
+
13
+ element :published
14
+ element :id, as: :entry_id
15
+ element :created, as: :published
16
+ element :issued, as: :published
17
+ element :updated
18
+ element :modified, as: :updated
19
+
20
+ elements :category, as: :categories, value: :term
21
+
22
+ element :link, as: :url, value: :href, with: {
23
+ type: "text/html",
24
+ rel: "alternate"
25
+ }
26
+
27
+ elements :link, as: :links, value: :href
28
+ end
29
+ end
30
+
31
+ def url
32
+ @url ||= links.first
33
+ end
34
+ end
35
+ end
@@ -2,7 +2,6 @@
2
2
  # Ruby Cookbook by Lucas Carlson and Leonard Richardson
3
3
  # Published by O'Reilly
4
4
  # ISBN: 0-596-52369-6
5
- # rubocop:disable Style/DocumentationMethod
6
5
  class Date
7
6
  def feed_utils_to_gm_time
8
7
  feed_utils_to_time(new_offset, :gm)
@@ -1,4 +1,3 @@
1
- # rubocop:disable Style/DocumentationMethod Style/Documentation
2
1
  class String
3
2
  def sanitize!
4
3
  replace(sanitize)
@@ -1,7 +1,6 @@
1
1
  require "time"
2
2
  require "date"
3
3
 
4
- # rubocop:disable Style/DocumentationMethod
5
4
  class Time
6
5
  # Parse a time string and convert it to UTC without raising errors.
7
6
  # Parses a flattened 14-digit time (YYYYmmddHHMMMSS) as UTC.
@@ -11,17 +10,16 @@ class Time
11
10
  #
12
11
  # === Returns
13
12
  # A Time instance in UTC or nil if there were errors while parsing.
14
- # rubocop:disable Metrics/MethodLength
15
- def self.parse_safely(dt)
16
- if dt.is_a?(Time)
17
- dt.utc
18
- elsif dt.respond_to?(:to_datetime)
19
- dt.to_datetime.utc
20
- elsif dt.respond_to? :to_s
21
- parse_string_safely dt.to_s
13
+ def self.parse_safely(datetime)
14
+ if datetime.is_a?(Time)
15
+ datetime.utc
16
+ elsif datetime.respond_to?(:to_datetime)
17
+ datetime.to_datetime.utc
18
+ elsif datetime.respond_to? :to_s
19
+ parse_string_safely datetime.to_s
22
20
  end
23
21
  rescue StandardError => e
24
- Feedjira.logger.debug { "Failed to parse time #{dt}" }
22
+ Feedjira.logger.debug { "Failed to parse time #{datetime}" }
25
23
  Feedjira.logger.debug(e)
26
24
  nil
27
25
  end
@@ -1,4 +1,3 @@
1
- # rubocop:disable Style/Documentation
2
1
  module Feedjira
3
2
  module DateTimeUtilities
4
3
  # This is our date parsing heuristic.
@@ -12,7 +11,6 @@ module Feedjira
12
11
 
13
12
  # Parse the given string starting with the most common parser (default ruby)
14
13
  # and going over all other available parsers
15
- # rubocop:disable Metrics/MethodLength
16
14
  def parse_datetime(string)
17
15
  res = DATE_PARSERS.detect do |parser|
18
16
  begin
@@ -1,5 +1,3 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
3
1
  module Feedjira
4
2
  module DateTimeUtilities
5
3
  class DateTimeEpochParser
@@ -1,5 +1,3 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
3
1
  module Feedjira
4
2
  module DateTimeUtilities
5
3
  class DateTimeLanguageParser
@@ -1,15 +1,11 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
3
1
  module Feedjira
4
2
  module DateTimeUtilities
5
3
  class DateTimePatternParser
6
- # rubocop:disable Style/AsciiComments
7
4
  # Japanese Symbols are required for strange Date Strings like
8
5
  # '水, 31 8 2016 07:37:00 PDT'
9
6
  JAPANESE_SYMBOLS = %w(日 月 火 水 木 金 土).freeze
10
7
  PATTERNS = ["%m/%d/%Y %T %p", "%d %m %Y %T %Z"].freeze
11
8
 
12
- # rubocop:disable Metrics/MethodLength
13
9
  def self.parse(string)
14
10
  PATTERNS.each do |p|
15
11
  begin
@@ -1,5 +1,3 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
3
1
  module Feedjira
4
2
  class Feed
5
3
  class << self
@@ -1,5 +1,3 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
3
1
  module Feedjira
4
2
  module FeedEntryUtilities
5
3
  include Enumerable
@@ -20,22 +18,24 @@ module Feedjira
20
18
  ##
21
19
  # Returns the id of the entry or its url if not id is present, as some
22
20
  # formats don't support it
21
+ # rubocop:disable Naming/MemoizedInstanceVariableName
23
22
  def id
24
23
  @entry_id ||= @url
25
24
  end
25
+ # rubocop:enable Naming/MemoizedInstanceVariableName
26
26
 
27
27
  ##
28
28
  # Writer for published. By default, we keep the "oldest" publish time found.
29
29
  def published=(val)
30
30
  parsed = parse_datetime(val)
31
- @published = parsed if !@published || parsed < @published
31
+ @published = parsed if parsed && (!@published || parsed < @published)
32
32
  end
33
33
 
34
34
  ##
35
35
  # Writer for updated. By default, we keep the most recent update time found.
36
36
  def updated=(val)
37
37
  parsed = parse_datetime(val)
38
- @updated = parsed if !@updated || parsed > @updated
38
+ @updated = parsed if parsed && (!@updated || parsed > @updated)
39
39
  end
40
40
 
41
41
  def sanitize!
@@ -49,10 +49,15 @@ module Feedjira
49
49
  alias last_modified published
50
50
 
51
51
  def each
52
- @rss_fields ||= instance_variables
52
+ @rss_fields ||= instance_variables.map do |ivar|
53
+ ivar.to_s.sub("@", "")
54
+ end.select do |field|
55
+ # select callable (public) methods only
56
+ respond_to?(field)
57
+ end
53
58
 
54
59
  @rss_fields.each do |field|
55
- yield(field.to_s.sub("@", ""), instance_variable_get(field))
60
+ yield(field, instance_variable_get(:"@#{field}"))
56
61
  end
57
62
  end
58
63
 
@@ -1,5 +1,3 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
3
1
  module Feedjira
4
2
  module FeedUtilities
5
3
  UPDATABLE_ATTRIBUTES = %w(title feed_url url last_modified etag).freeze
@@ -1,4 +1,3 @@
1
- # rubocop:disable Style/DocumentationMethod
2
1
  module Feedjira
3
2
  module Parser
4
3
  # Parser for dealing with Atom feeds.
@@ -1,32 +1,13 @@
1
- # rubocop:disable Style/DocumentationMethod
2
1
  module Feedjira
3
2
  module Parser
4
3
  # Parser for dealing with Atom feed entries.
5
4
  class AtomEntry
6
5
  include SAXMachine
7
6
  include FeedEntryUtilities
7
+ include AtomEntryUtilities
8
8
 
9
- element :title
10
- element :link, as: :url, value: :href, with: { type: "text/html", rel: "alternate" } # rubocop:disable Metrics/LineLength
11
- element :name, as: :author
12
- element :content
13
- element :summary
14
-
9
+ element :"media:thumbnail", as: :image, value: :url
15
10
  element :"media:content", as: :image, value: :url
16
- element :enclosure, as: :image, value: :href
17
-
18
- element :published
19
- element :id, as: :entry_id
20
- element :created, as: :published
21
- element :issued, as: :published
22
- element :updated
23
- element :modified, as: :updated
24
- elements :category, as: :categories, value: :term
25
- elements :link, as: :links, value: :href
26
-
27
- def url
28
- @url ||= links.first
29
- end
30
11
  end
31
12
  end
32
13
  end
@@ -1,4 +1,3 @@
1
- # rubocop:disable Style/DocumentationMethod
2
1
  module Feedjira
3
2
  module Parser
4
3
  # Parser for dealing with Feedburner Atom feeds.
@@ -20,7 +19,7 @@ module Feedjira
20
19
  attr_writer :url, :feed_url
21
20
 
22
21
  def self.able_to_parse?(xml)
23
- ((/Atom/ =~ xml) && (/feedburner/ =~ xml) && !(/\<rss|\<rdf/ =~ xml)) || false # rubocop:disable Metrics/LineLength
22
+ ((/<feed/ =~ xml) && (/Atom/ =~ xml) && (/feedburner/ =~ xml) && !(/\<rss|\<rdf/ =~ xml)) || false # rubocop:disable Metrics/LineLength
24
23
  end
25
24
 
26
25
  # Feed url is <link> with type="text/html" if present,
@@ -1,32 +1,21 @@
1
- # rubocop:disable Style/DocumentationMethod
2
1
  module Feedjira
3
2
  module Parser
4
3
  # Parser for dealing with Feedburner Atom feed entries.
5
4
  class AtomFeedBurnerEntry
6
5
  include SAXMachine
7
6
  include FeedEntryUtilities
7
+ include AtomEntryUtilities
8
8
 
9
- element :title
10
- element :name, as: :author
11
- element :link, as: :url, value: :href, with: { type: "text/html", rel: "alternate" } # rubocop:disable Metrics/LineLength
12
- element :"feedburner:origLink", as: :url
13
- element :summary
14
- element :content
9
+ element :"feedburner:origLink", as: :orig_link
10
+ # rubocop:disable Style/AccessModifierDeclarations
11
+ private :orig_link
12
+ # rubocop:enable Style/AccessModifierDeclarations
15
13
 
14
+ element :"media:thumbnail", as: :image, value: :url
16
15
  element :"media:content", as: :image, value: :url
17
- element :enclosure, as: :image, value: :href
18
-
19
- element :published
20
- element :id, as: :entry_id
21
- element :issued, as: :published
22
- element :created, as: :published
23
- element :updated
24
- element :modified, as: :updated
25
- elements :category, as: :categories, value: :term
26
- elements :link, as: :links, value: :href
27
16
 
28
17
  def url
29
- @url ||= links.first
18
+ orig_link || super
30
19
  end
31
20
  end
32
21
  end
@@ -1,4 +1,3 @@
1
- # rubocop:disable Style/DocumentationMethod
2
1
  module Feedjira
3
2
  module Parser
4
3
  # Parser for dealing with RSS feeds.
@@ -1,19 +1,18 @@
1
- # rubocop:disable Style/Documentation
2
1
  module Feedjira
3
2
  module Parser
4
3
  class AtomYoutubeEntry
5
4
  include SAXMachine
6
5
  include FeedEntryUtilities
6
+ include AtomEntryUtilities
7
+
8
+ sax_config.top_level_elements["link"].clear
9
+ sax_config.collection_elements["link"].clear
7
10
 
8
- element :title
9
11
  element :link, as: :url, value: :href, with: { rel: "alternate" }
10
- element :name, as: :author
12
+
11
13
  element :"media:description", as: :content
12
- element :summary
13
- element :published
14
- element :id, as: :entry_id
15
- element :updated
16
14
  element :"yt:videoId", as: :youtube_video_id
15
+ element :"yt:channelId", as: :youtube_channel_id
17
16
  element :"media:title", as: :media_title
18
17
  element :"media:content", as: :media_url, value: :url
19
18
  element :"media:content", as: :media_type, value: :type
@@ -1,6 +1,4 @@
1
1
  require File.expand_path("./atom", File.dirname(__FILE__))
2
- # rubocop:disable Style/Documentation
3
- # rubocop:disable Style/DocumentationMethod
4
2
  module Feedjira
5
3
  module Parser
6
4
  class GoogleDocsAtom
@@ -1,31 +1,13 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
3
1
  module Feedjira
4
2
  module Parser
5
3
  class GoogleDocsAtomEntry
6
4
  include SAXMachine
7
5
  include FeedEntryUtilities
6
+ include AtomEntryUtilities
8
7
 
9
- element :title
10
- element :link, as: :url, value: :href, with: { type: "text/html", rel: "alternate" } # rubocop:disable Metrics/LineLength
11
- element :name, as: :author
12
- element :content
13
- element :summary
14
- element :published
15
- element :id, as: :entry_id
16
- element :created, as: :published
17
- element :issued, as: :published
18
- element :updated
19
- element :modified, as: :updated
20
- elements :category, as: :categories, value: :term
21
- elements :link, as: :links, value: :href
22
8
  element :"docs:md5Checksum", as: :checksum
23
9
  element :"docs:filename", as: :original_filename
24
10
  element :"docs:suggestedFilename", as: :suggested_filename
25
-
26
- def url
27
- @url ||= links.first
28
- end
29
11
  end
30
12
  end
31
13
  end
@@ -1,4 +1,3 @@
1
- # rubocop:disable Style/DocumentationMethod
2
1
  module Feedjira
3
2
  module Parser
4
3
  # iTunes is RSS 2.0 + some apple extensions
@@ -1,4 +1,3 @@
1
- # rubocop:disable Style/DocumentationMethod
2
1
  module Feedjira
3
2
  module Parser
4
3
  # iTunes extensions to the standard RSS2.0 item
@@ -5,14 +5,9 @@ module Feedjira
5
5
  class ITunesRSSItem
6
6
  include SAXMachine
7
7
  include FeedEntryUtilities
8
+ include RSSEntryUtilities
8
9
 
9
- element :author
10
- element :guid, as: :entry_id
11
- element :title
12
- element :link, as: :url
13
- element :description, as: :summary
14
- element :"content:encoded", as: :content
15
- element :pubDate, as: :published
10
+ sax_config.top_level_elements["enclosure"].clear
16
11
 
17
12
  # If author is not present use author tag on the item
18
13
  element :"itunes:author", as: :itunes_author
@@ -1,4 +1,3 @@
1
- # rubocop:disable Style/Documentation
2
1
  module Feedjira
3
2
  module Parser
4
3
  class ITunesRSSOwner
@@ -1,5 +1,3 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
3
1
  module Feedjira
4
2
  module Parser
5
3
  class PodloveChapter
@@ -1,4 +1,3 @@
1
- # rubocop:disable Style/DocumentationMethod
2
1
  module Feedjira
3
2
  module Parser
4
3
  # Parser for dealing with RSS feeds.
@@ -4,34 +4,7 @@ module Feedjira
4
4
  class RSSEntry
5
5
  include SAXMachine
6
6
  include FeedEntryUtilities
7
-
8
- element :title
9
- element :link, as: :url
10
-
11
- element :"dc:creator", as: :author
12
- element :author, as: :author
13
- element :"content:encoded", as: :content
14
- element :description, as: :summary
15
-
16
- element :"media:content", as: :image, value: :url
17
- element :enclosure, as: :image, value: :url
18
-
19
- element :pubDate, as: :published
20
- element :pubdate, as: :published
21
- element :"dc:date", as: :published
22
- element :"dc:Date", as: :published
23
- element :"dcterms:created", as: :published
24
-
25
- element :"dcterms:modified", as: :updated
26
- element :issued, as: :published
27
- elements :category, as: :categories
28
-
29
- element :guid, as: :entry_id
30
- element :"dc:identifier", as: :dc_identifier
31
-
32
- def id
33
- @entry_id ||= @dc_identifier || @url
34
- end
7
+ include RSSEntryUtilities
35
8
  end
36
9
  end
37
10
  end
@@ -1,4 +1,3 @@
1
- # rubocop:disable Style/DocumentationMethod
2
1
  module Feedjira
3
2
  module Parser
4
3
  # Parser for dealing with RSS feeds.
@@ -1,38 +1,18 @@
1
- # rubocop:disable Style/DocumentationMethod
2
1
  module Feedjira
3
2
  module Parser
4
3
  # Parser for dealing with RDF feed entries.
5
4
  class RSSFeedBurnerEntry
6
5
  include SAXMachine
7
6
  include FeedEntryUtilities
7
+ include RSSEntryUtilities
8
8
 
9
- element :title
10
-
11
- element :"feedburner:origLink", as: :url
12
- element :link, as: :url
13
-
14
- element :"dc:creator", as: :author
15
- element :author, as: :author
16
- element :"content:encoded", as: :content
17
- element :description, as: :summary
18
-
19
- element :"media:content", as: :image, value: :url
20
- element :enclosure, as: :image, value: :url
21
-
22
- element :pubDate, as: :published
23
- element :pubdate, as: :published
24
- element :"dc:date", as: :published
25
- element :"dc:Date", as: :published
26
- element :"dcterms:created", as: :published
27
-
28
- element :"dcterms:modified", as: :updated
29
- element :issued, as: :published
30
- elements :category, as: :categories
31
-
32
- element :guid, as: :entry_id
9
+ element :"feedburner:origLink", as: :orig_link
10
+ # rubocop:disable Style/AccessModifierDeclarations
11
+ private :orig_link
12
+ # rubocop:enable Style/AccessModifierDeclarations
33
13
 
34
14
  def url
35
- @url || @link
15
+ orig_link || super
36
16
  end
37
17
  end
38
18
  end
@@ -1,5 +1,3 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
3
1
  module Feedjira
4
2
  class Preprocessor
5
3
  def initialize(xml)
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Feedjira
4
+ module RSSEntryUtilities
5
+ def self.included(mod)
6
+ mod.class_exec do
7
+ element :title
8
+
9
+ element :"content:encoded", as: :content
10
+ element :description, as: :summary
11
+
12
+ element :link, as: :url
13
+
14
+ element :author
15
+ element :"dc:creator", as: :author
16
+
17
+ element :pubDate, as: :published
18
+ element :pubdate, as: :published
19
+ element :issued, as: :published
20
+ element :"dc:date", as: :published
21
+ element :"dc:Date", as: :published
22
+ element :"dcterms:created", as: :published
23
+
24
+ element :"dcterms:modified", as: :updated
25
+
26
+ element :guid, as: :entry_id
27
+ element :"dc:identifier", as: :dc_identifier
28
+
29
+ element :"media:thumbnail", as: :image, value: :url
30
+ element :"media:content", as: :image, value: :url
31
+ element :enclosure, as: :image, value: :url
32
+
33
+ elements :category, as: :categories
34
+ end
35
+ end
36
+
37
+ attr_reader :url
38
+
39
+ # rubocop:disable Naming/MemoizedInstanceVariableName
40
+ def id
41
+ @entry_id ||= @dc_identifier || @url
42
+ end
43
+ # rubocop:enable Naming/MemoizedInstanceVariableName
44
+ end
45
+ end
@@ -1,3 +1,3 @@
1
1
  module Feedjira
2
- VERSION = "3.0.0.beta1".freeze
2
+ VERSION = "3.0.0".freeze
3
3
  end
@@ -3,7 +3,7 @@ require "spec_helper"
3
3
  class Hell < StandardError; end
4
4
 
5
5
  class FailParser
6
- def self.parse(_, &on_failure)
6
+ def self.parse(_xml, &on_failure)
7
7
  on_failure.call "this parser always fails."
8
8
  end
9
9
  end
@@ -20,11 +20,11 @@ describe Feedjira::Parser::AtomYoutubeEntry do
20
20
  end
21
21
 
22
22
  it "should have the published date" do
23
- expect(@entry.published).to eq Time.parse_safely("2015-05-04T00:01:27+00:00") # rubocop:disable Metrics/LineLength
23
+ expect(@entry.published).to eq Time.parse_safely("2015-05-04T00:01:27+00:00")
24
24
  end
25
25
 
26
26
  it "should have the updated date" do
27
- expect(@entry.updated).to eq Time.parse_safely("2015-05-13T17:38:30+00:00") # rubocop:disable Metrics/LineLength
27
+ expect(@entry.updated).to eq Time.parse_safely("2015-05-13T17:38:30+00:00")
28
28
  end
29
29
 
30
30
  it "should have the content populated from the media:description element" do
@@ -62,6 +62,16 @@ RSpec.describe Feedjira do
62
62
  expect(feed.entries.first.id).to eq "23246627"
63
63
  expect(feed.entries.last.id.strip).to eq "1"
64
64
  end
65
+
66
+ it "does not fail if multiple published dates exist and some are unparseable" do
67
+ expect(Feedjira.logger).to receive(:warn).twice
68
+
69
+ feed = Feedjira.parse(sample_invalid_date_format_feed)
70
+ expect(feed.title).to eq "Invalid date format feed"
71
+ published = Time.parse_safely "Mon, 16 Oct 2017 15:10:00 GMT"
72
+ expect(feed.entries.first.published).to eq published
73
+ expect(feed.entries.size).to eq 2
74
+ end
65
75
  end
66
76
 
67
77
  context "when there's no available parser" do
@@ -128,7 +138,7 @@ RSpec.describe Feedjira do
128
138
  it "does not use default parsers" do
129
139
  xml = "Atom asdf"
130
140
  new_parser = Class.new do
131
- def self.able_to_parse?(_)
141
+ def self.able_to_parse?(_xml)
132
142
  true
133
143
  end
134
144
  end
@@ -1,5 +1,3 @@
1
- # rubocop:disable Metrics/LineLength
2
-
3
1
  module SampleFeeds
4
2
  FEEDS = {
5
3
  sample_atom_feed: "AmazonWebServicesBlog.xml",
@@ -29,6 +27,7 @@ module SampleFeeds
29
27
  sample_atom_xhtml_with_escpaed_html_in_pre_tag_feed: "AtomEscapedHTMLInPreTag.xml",
30
28
  sample_json_feed: "json_feed.json",
31
29
  sample_rss_feed_huffpost_ca: "HuffPostCanada.xml",
30
+ sample_invalid_date_format_feed: "InvalidDateFormat.xml"
32
31
  }.freeze
33
32
 
34
33
  FEEDS.each do |method, filename|
@@ -39,5 +38,3 @@ module SampleFeeds
39
38
  File.read("#{File.dirname(__FILE__)}/sample_feeds/#{filename}")
40
39
  end
41
40
  end
42
-
43
- # rubocop:enable Metrics/LineLength
@@ -0,0 +1,20 @@
1
+ <?xml version="1.0" encoding="UTF-8"?><rss version="2.0"
2
+ xmlns:dc="http://purl.org/dc/elements/1.1/">
3
+
4
+ <channel>
5
+ <title>Invalid date format feed</title>
6
+ <link>http://example.com/feed</link>
7
+ <language>en-US</language>
8
+ <item>
9
+ <title>Item 0 with an invalid date</title>
10
+ <link>http://example.com/item0</link>
11
+ <pubDate>Mon, 16 Oct 2017 15:10:00 +0000</pubDate>
12
+ <dc:date>1518478934</dc:date>
13
+ </item>
14
+ <item>
15
+ <title>Item 1 with all valid dates</title>
16
+ <link>http://example.com/item1</link>
17
+ <pubDate>Tue, 17 Oct 2017 12:17:00 +0000</pubDate>
18
+ <dc:date>Tue, 17 Oct 2017 22:17:00 +0000</dc:date>
19
+ </item>
20
+ </channel>
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feedjira
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0.beta1
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adam Hess
@@ -13,7 +13,7 @@ authors:
13
13
  autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
- date: 2017-12-22 00:00:00.000000000 Z
16
+ date: 2019-07-10 00:00:00.000000000 Z
17
17
  dependencies:
18
18
  - !ruby/object:Gem::Dependency
19
19
  name: loofah
@@ -21,14 +21,14 @@ dependencies:
21
21
  requirements:
22
22
  - - ">="
23
23
  - !ruby/object:Gem::Version
24
- version: '2.0'
24
+ version: 2.2.1
25
25
  type: :runtime
26
26
  prerelease: false
27
27
  version_requirements: !ruby/object:Gem::Requirement
28
28
  requirements:
29
29
  - - ">="
30
30
  - !ruby/object:Gem::Version
31
- version: '2.0'
31
+ version: 2.2.1
32
32
  - !ruby/object:Gem::Dependency
33
33
  name: sax-machine
34
34
  requirement: !ruby/object:Gem::Requirement
@@ -136,6 +136,7 @@ files:
136
136
  - ".gitignore"
137
137
  - ".rspec"
138
138
  - ".rubocop.yml"
139
+ - ".rubocop_todo.yml"
139
140
  - ".travis.yml"
140
141
  - CHANGELOG.md
141
142
  - CODE_OF_CONDUCT.md
@@ -146,6 +147,7 @@ files:
146
147
  - Rakefile
147
148
  - feedjira.gemspec
148
149
  - lib/feedjira.rb
150
+ - lib/feedjira/atom_entry_utilities.rb
149
151
  - lib/feedjira/configuration.rb
150
152
  - lib/feedjira/core_ext.rb
151
153
  - lib/feedjira/core_ext/date.rb
@@ -180,6 +182,7 @@ files:
180
182
  - lib/feedjira/parser/rss_feed_burner_entry.rb
181
183
  - lib/feedjira/parser/rss_image.rb
182
184
  - lib/feedjira/preprocessor.rb
185
+ - lib/feedjira/rss_entry_utilities.rb
183
186
  - lib/feedjira/version.rb
184
187
  - spec/feedjira/configuration_spec.rb
185
188
  - spec/feedjira/date_time_utilities_spec.rb
@@ -223,6 +226,7 @@ files:
223
226
  - spec/sample_feeds/HuffPostCanada.xml
224
227
  - spec/sample_feeds/ITunesWithSingleQuotedAttributes.xml
225
228
  - spec/sample_feeds/ITunesWithSpacesInAttributes.xml
229
+ - spec/sample_feeds/InvalidDateFormat.xml
226
230
  - spec/sample_feeds/PaulDixExplainsNothing.xml
227
231
  - spec/sample_feeds/PaulDixExplainsNothingAlternate.xml
228
232
  - spec/sample_feeds/PaulDixExplainsNothingFirstEntryContent.xml
@@ -244,7 +248,10 @@ files:
244
248
  homepage: http://feedjira.com
245
249
  licenses:
246
250
  - MIT
247
- metadata: {}
251
+ metadata:
252
+ homepage_uri: http://feedjira.com
253
+ source_code_uri: https://github.com/feedjira/feedjira
254
+ changelog_uri: https://github.com/feedjira/feedjira/blob/master/CHANGELOG.md
248
255
  post_install_message:
249
256
  rdoc_options: []
250
257
  require_paths:
@@ -253,15 +260,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
253
260
  requirements:
254
261
  - - ">="
255
262
  - !ruby/object:Gem::Version
256
- version: 1.9.3
263
+ version: '2.2'
257
264
  required_rubygems_version: !ruby/object:Gem::Requirement
258
265
  requirements:
259
- - - ">"
266
+ - - ">="
260
267
  - !ruby/object:Gem::Version
261
- version: 1.3.1
268
+ version: '0'
262
269
  requirements: []
263
- rubyforge_project:
264
- rubygems_version: 2.6.13
270
+ rubygems_version: 3.0.1
265
271
  signing_key:
266
272
  specification_version: 4
267
273
  summary: A feed parsing library
@@ -308,6 +314,7 @@ test_files:
308
314
  - spec/sample_feeds/HuffPostCanada.xml
309
315
  - spec/sample_feeds/ITunesWithSingleQuotedAttributes.xml
310
316
  - spec/sample_feeds/ITunesWithSpacesInAttributes.xml
317
+ - spec/sample_feeds/InvalidDateFormat.xml
311
318
  - spec/sample_feeds/PaulDixExplainsNothing.xml
312
319
  - spec/sample_feeds/PaulDixExplainsNothingAlternate.xml
313
320
  - spec/sample_feeds/PaulDixExplainsNothingFirstEntryContent.xml