feedjira 3.0.0.beta1 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +5 -5
  2. data/.rubocop.yml +5 -2
  3. data/.rubocop_todo.yml +36 -0
  4. data/.travis.yml +0 -3
  5. data/Gemfile +1 -0
  6. data/README.md +3 -0
  7. data/feedjira.gemspec +9 -3
  8. data/lib/feedjira.rb +2 -0
  9. data/lib/feedjira/atom_entry_utilities.rb +35 -0
  10. data/lib/feedjira/core_ext/date.rb +0 -1
  11. data/lib/feedjira/core_ext/string.rb +0 -1
  12. data/lib/feedjira/core_ext/time.rb +8 -10
  13. data/lib/feedjira/date_time_utilities.rb +0 -2
  14. data/lib/feedjira/date_time_utilities/date_time_epoch_parser.rb +0 -2
  15. data/lib/feedjira/date_time_utilities/date_time_language_parser.rb +0 -2
  16. data/lib/feedjira/date_time_utilities/date_time_pattern_parser.rb +0 -4
  17. data/lib/feedjira/feed.rb +0 -2
  18. data/lib/feedjira/feed_entry_utilities.rb +11 -6
  19. data/lib/feedjira/feed_utilities.rb +0 -2
  20. data/lib/feedjira/parser/atom.rb +0 -1
  21. data/lib/feedjira/parser/atom_entry.rb +2 -21
  22. data/lib/feedjira/parser/atom_feed_burner.rb +1 -2
  23. data/lib/feedjira/parser/atom_feed_burner_entry.rb +7 -18
  24. data/lib/feedjira/parser/atom_youtube.rb +0 -1
  25. data/lib/feedjira/parser/atom_youtube_entry.rb +6 -7
  26. data/lib/feedjira/parser/google_docs_atom.rb +0 -2
  27. data/lib/feedjira/parser/google_docs_atom_entry.rb +1 -19
  28. data/lib/feedjira/parser/itunes_rss.rb +0 -1
  29. data/lib/feedjira/parser/itunes_rss_category.rb +0 -1
  30. data/lib/feedjira/parser/itunes_rss_item.rb +2 -7
  31. data/lib/feedjira/parser/itunes_rss_owner.rb +0 -1
  32. data/lib/feedjira/parser/podlove_chapter.rb +0 -2
  33. data/lib/feedjira/parser/rss.rb +0 -1
  34. data/lib/feedjira/parser/rss_entry.rb +1 -28
  35. data/lib/feedjira/parser/rss_feed_burner.rb +0 -1
  36. data/lib/feedjira/parser/rss_feed_burner_entry.rb +6 -26
  37. data/lib/feedjira/preprocessor.rb +0 -2
  38. data/lib/feedjira/rss_entry_utilities.rb +45 -0
  39. data/lib/feedjira/version.rb +1 -1
  40. data/spec/feedjira/feed_spec.rb +1 -1
  41. data/spec/feedjira/parser/atom_youtube_entry_spec.rb +2 -2
  42. data/spec/feedjira_spec.rb +11 -1
  43. data/spec/sample_feeds.rb +1 -4
  44. data/spec/sample_feeds/InvalidDateFormat.xml +20 -0
  45. metadata +17 -10
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: b128986f5aba338178d635d04c7c76a6bf4786c9
4
- data.tar.gz: d812e34dcf097bd6cc3d5d6b299337898eb119ec
2
+ SHA256:
3
+ metadata.gz: 49efb7655e500df91c7119e3afbb2b91fab6d0a8282703fecb69f24d15f54357
4
+ data.tar.gz: ea6fc3b58be4968be8c8561b6e8c2720b5732882ff2556fe6966583df9bcf130
5
5
  SHA512:
6
- metadata.gz: e199341333e63c4b11fd65532c41d8efd9d7236bd264e21edd9795f1fcbe8b79e35a122534f1b0ee61a0e76fade5364e70a032e142da63589e3158c4fa78440a
7
- data.tar.gz: bcddaf13bdc6214acb552dadfe23f481a2e0d6e7da99b3e182e9bb067f1e0f70e93f80d62b75e02f52304c5cf4b47252c6a9ee55f6c1c697e9c9800acf230e07
6
+ metadata.gz: dd52aec9d212c0428095c3adee3cbfd1115d53a54ac2281bf7ce7f4142ace6659e658658097e43edb3c90deec583941ed220aea1fe1709f66110adb79c471620
7
+ data.tar.gz: d39bfe14c74a642311d55369498195e588beec99e5bfb897b0f00c5019f1985f89c1280b8c8eaafb3d36f55b9558b04c9037ccc8710c34c4bad2cb6127702d61
@@ -1,4 +1,7 @@
1
+ inherit_from: .rubocop_todo.yml
2
+
1
3
  AllCops:
4
+ TargetRubyVersion: 2.2
2
5
  Exclude:
3
6
  - db/schema.rb
4
7
  - vendor/**/*
@@ -360,7 +363,7 @@ Style/TrailingCommaInArguments:
360
363
  - no_comma
361
364
  Enabled: true
362
365
 
363
- Style/TrailingCommaInLiteral:
366
+ Style/TrailingCommaInArrayLiteral:
364
367
  Description: 'Checks for trailing comma in array and hash literals.'
365
368
  StyleGuide: 'https://github.com/bbatsov/ruby-style-guide#no-trailing-array-commas'
366
369
  EnforcedStyleForMultiline: comma
@@ -527,7 +530,7 @@ Lint/UnderscorePrefixedVariableName:
527
530
  Description: 'Do not use prefix `_` for a variable that is used.'
528
531
  Enabled: false
529
532
 
530
- Lint/UnneededDisable:
533
+ Lint/UnneededCopDisableDirective:
531
534
  Description: >-
532
535
  Checks for rubocop:disable comments that can be removed.
533
536
  Note: this cop is not disabled when disabling all cops.
@@ -0,0 +1,36 @@
1
+ # This configuration was generated by
2
+ # `rubocop --auto-gen-config`
3
+ # on 2018-10-08 04:14:19 +0900 using RuboCop version 0.59.2.
4
+ # The point is for the user to remove these configuration records
5
+ # one by one as the offenses are removed from the code base.
6
+ # Note that changes in the inspected code, or installation of new
7
+ # versions of RuboCop, may require this file to be generated again.
8
+
9
+ # Offense count: 7
10
+ # Cop supports --auto-correct.
11
+ Layout/EmptyLineAfterGuardClause:
12
+ Exclude:
13
+ - 'lib/feedjira/date_time_utilities/date_time_epoch_parser.rb'
14
+ - 'lib/feedjira/feed_utilities.rb'
15
+ - 'lib/feedjira/parser/json_feed_item.rb'
16
+ - 'lib/feedjira/parser/podlove_chapter.rb'
17
+
18
+ # Offense count: 1
19
+ # Cop supports --auto-correct.
20
+ Performance/UnneededSort:
21
+ Exclude:
22
+ - 'lib/feedjira/feed_utilities.rb'
23
+
24
+ # Offense count: 3
25
+ # Configuration parameters: EnforcedStyle.
26
+ # SupportedStyles: inline, group
27
+ Style/AccessModifierDeclarations:
28
+ Exclude:
29
+ - 'lib/feedjira.rb'
30
+ - 'lib/feedjira/parser/itunes_rss.rb'
31
+
32
+ # Offense count: 1
33
+ # Cop supports --auto-correct.
34
+ Style/UnneededCondition:
35
+ Exclude:
36
+ - 'lib/feedjira/feed_utilities.rb'
@@ -32,6 +32,3 @@ script:
32
32
 
33
33
  notifications:
34
34
  email: false
35
- webhooks:
36
- urls:
37
- secure: XjoUz2rPXFHnitw//jN4qA92jq7bH19iOI/5KnuptLzz5HrWq1VAXxAr/Fh0KxYZT29G/9i5szaHX1QacfO7he4xa2tZKudRL70Dw3KRLgqLi70G6kFuZYlh+MgMHZy6KwZ/4/250wO31fpv24PCb2M56iTsev2g2uporeobO0Q=
data/Gemfile CHANGED
@@ -3,6 +3,7 @@ source "https://rubygems.org/"
3
3
  gemspec
4
4
 
5
5
  gem "pry"
6
+ gem "rubocop", "~> 0.59.2"
6
7
 
7
8
  group :test do
8
9
  gem "oga"
data/README.md CHANGED
@@ -143,6 +143,8 @@ add-ons and everything in between. Here are some of them:
143
143
 
144
144
  * [Solve for All][solve]: Solve for All combines search engine and feed parsing
145
145
  while protecting your privacy. It's even extendable by the community!
146
+
147
+ * [Feedi API][feedi]: Feedi simplifies how you handle RSS, Atom, or JSON feeds. You can add and keep track of your favourite feed data with a simple and clean REST API. All entries are enriched by Machine Learning and Semantic engines.
146
148
 
147
149
  [Feedbin]: https://feedbin.com/
148
150
  [Stringer]: https://github.com/swanson/stringer
@@ -150,6 +152,7 @@ add-ons and everything in between. Here are some of them:
150
152
  [Feedbunch]: https://github.com/amatriain/feedbunch
151
153
  [old]: http://theoldreader.com/
152
154
  [solve]: https://solveforall.com/
155
+ [feedi]: https://github.com/davidesantangelo/feedi
153
156
 
154
157
  Note: to get your project on this list, simply [send an email](mailto:feedjira@gmail.com)
155
158
  with your project's details.
@@ -1,6 +1,6 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
- require File.expand_path("../lib/feedjira/version", __FILE__)
3
+ require File.expand_path("lib/feedjira/version", __dir__)
4
4
 
5
5
  # rubocop:disable Metrics/BlockLength
6
6
  Gem::Specification.new do |s|
@@ -20,13 +20,19 @@ Gem::Specification.new do |s|
20
20
  s.summary = "A feed parsing library"
21
21
  s.version = Feedjira::VERSION
22
22
 
23
+ s.metadata = {
24
+ "homepage_uri" => "http://feedjira.com",
25
+ "source_code_uri" => "https://github.com/feedjira/feedjira",
26
+ "changelog_uri" => "https://github.com/feedjira/feedjira/blob/master/CHANGELOG.md"
27
+ }
28
+
23
29
  s.files = `git ls-files`.split("\n")
24
30
  s.require_paths = ["lib"]
25
31
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
26
32
 
27
- s.required_ruby_version = ">=1.9.3"
33
+ s.required_ruby_version = ">=2.2"
28
34
 
29
- s.add_dependency "loofah", ">= 2.0"
35
+ s.add_dependency "loofah", ">= 2.2.1"
30
36
  s.add_dependency "sax-machine", ">= 1.0"
31
37
 
32
38
  s.add_development_dependency "danger"
@@ -14,6 +14,8 @@ require "feedjira/date_time_utilities"
14
14
  require "feedjira/feed_entry_utilities"
15
15
  require "feedjira/feed_utilities"
16
16
  require "feedjira/feed"
17
+ require "feedjira/rss_entry_utilities"
18
+ require "feedjira/atom_entry_utilities"
17
19
  require "feedjira/parser"
18
20
  require "feedjira/parser/rss_entry"
19
21
  require "feedjira/parser/rss_image"
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Feedjira
4
+ module AtomEntryUtilities
5
+ def self.included(mod)
6
+ mod.class_exec do
7
+ element :title
8
+ element :name, as: :author
9
+ element :content
10
+ element :summary
11
+ element :enclosure, as: :image, value: :href
12
+
13
+ element :published
14
+ element :id, as: :entry_id
15
+ element :created, as: :published
16
+ element :issued, as: :published
17
+ element :updated
18
+ element :modified, as: :updated
19
+
20
+ elements :category, as: :categories, value: :term
21
+
22
+ element :link, as: :url, value: :href, with: {
23
+ type: "text/html",
24
+ rel: "alternate"
25
+ }
26
+
27
+ elements :link, as: :links, value: :href
28
+ end
29
+ end
30
+
31
+ def url
32
+ @url ||= links.first
33
+ end
34
+ end
35
+ end
@@ -2,7 +2,6 @@
2
2
  # Ruby Cookbook by Lucas Carlson and Leonard Richardson
3
3
  # Published by O'Reilly
4
4
  # ISBN: 0-596-52369-6
5
- # rubocop:disable Style/DocumentationMethod
6
5
  class Date
7
6
  def feed_utils_to_gm_time
8
7
  feed_utils_to_time(new_offset, :gm)
@@ -1,4 +1,3 @@
1
- # rubocop:disable Style/DocumentationMethod Style/Documentation
2
1
  class String
3
2
  def sanitize!
4
3
  replace(sanitize)
@@ -1,7 +1,6 @@
1
1
  require "time"
2
2
  require "date"
3
3
 
4
- # rubocop:disable Style/DocumentationMethod
5
4
  class Time
6
5
  # Parse a time string and convert it to UTC without raising errors.
7
6
  # Parses a flattened 14-digit time (YYYYmmddHHMMMSS) as UTC.
@@ -11,17 +10,16 @@ class Time
11
10
  #
12
11
  # === Returns
13
12
  # A Time instance in UTC or nil if there were errors while parsing.
14
- # rubocop:disable Metrics/MethodLength
15
- def self.parse_safely(dt)
16
- if dt.is_a?(Time)
17
- dt.utc
18
- elsif dt.respond_to?(:to_datetime)
19
- dt.to_datetime.utc
20
- elsif dt.respond_to? :to_s
21
- parse_string_safely dt.to_s
13
+ def self.parse_safely(datetime)
14
+ if datetime.is_a?(Time)
15
+ datetime.utc
16
+ elsif datetime.respond_to?(:to_datetime)
17
+ datetime.to_datetime.utc
18
+ elsif datetime.respond_to? :to_s
19
+ parse_string_safely datetime.to_s
22
20
  end
23
21
  rescue StandardError => e
24
- Feedjira.logger.debug { "Failed to parse time #{dt}" }
22
+ Feedjira.logger.debug { "Failed to parse time #{datetime}" }
25
23
  Feedjira.logger.debug(e)
26
24
  nil
27
25
  end
@@ -1,4 +1,3 @@
1
- # rubocop:disable Style/Documentation
2
1
  module Feedjira
3
2
  module DateTimeUtilities
4
3
  # This is our date parsing heuristic.
@@ -12,7 +11,6 @@ module Feedjira
12
11
 
13
12
  # Parse the given string starting with the most common parser (default ruby)
14
13
  # and going over all other available parsers
15
- # rubocop:disable Metrics/MethodLength
16
14
  def parse_datetime(string)
17
15
  res = DATE_PARSERS.detect do |parser|
18
16
  begin
@@ -1,5 +1,3 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
3
1
  module Feedjira
4
2
  module DateTimeUtilities
5
3
  class DateTimeEpochParser
@@ -1,5 +1,3 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
3
1
  module Feedjira
4
2
  module DateTimeUtilities
5
3
  class DateTimeLanguageParser
@@ -1,15 +1,11 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
3
1
  module Feedjira
4
2
  module DateTimeUtilities
5
3
  class DateTimePatternParser
6
- # rubocop:disable Style/AsciiComments
7
4
  # Japanese Symbols are required for strange Date Strings like
8
5
  # '水, 31 8 2016 07:37:00 PDT'
9
6
  JAPANESE_SYMBOLS = %w(日 月 火 水 木 金 土).freeze
10
7
  PATTERNS = ["%m/%d/%Y %T %p", "%d %m %Y %T %Z"].freeze
11
8
 
12
- # rubocop:disable Metrics/MethodLength
13
9
  def self.parse(string)
14
10
  PATTERNS.each do |p|
15
11
  begin
@@ -1,5 +1,3 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
3
1
  module Feedjira
4
2
  class Feed
5
3
  class << self
@@ -1,5 +1,3 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
3
1
  module Feedjira
4
2
  module FeedEntryUtilities
5
3
  include Enumerable
@@ -20,22 +18,24 @@ module Feedjira
20
18
  ##
21
19
  # Returns the id of the entry or its url if not id is present, as some
22
20
  # formats don't support it
21
+ # rubocop:disable Naming/MemoizedInstanceVariableName
23
22
  def id
24
23
  @entry_id ||= @url
25
24
  end
25
+ # rubocop:enable Naming/MemoizedInstanceVariableName
26
26
 
27
27
  ##
28
28
  # Writer for published. By default, we keep the "oldest" publish time found.
29
29
  def published=(val)
30
30
  parsed = parse_datetime(val)
31
- @published = parsed if !@published || parsed < @published
31
+ @published = parsed if parsed && (!@published || parsed < @published)
32
32
  end
33
33
 
34
34
  ##
35
35
  # Writer for updated. By default, we keep the most recent update time found.
36
36
  def updated=(val)
37
37
  parsed = parse_datetime(val)
38
- @updated = parsed if !@updated || parsed > @updated
38
+ @updated = parsed if parsed && (!@updated || parsed > @updated)
39
39
  end
40
40
 
41
41
  def sanitize!
@@ -49,10 +49,15 @@ module Feedjira
49
49
  alias last_modified published
50
50
 
51
51
  def each
52
- @rss_fields ||= instance_variables
52
+ @rss_fields ||= instance_variables.map do |ivar|
53
+ ivar.to_s.sub("@", "")
54
+ end.select do |field|
55
+ # select callable (public) methods only
56
+ respond_to?(field)
57
+ end
53
58
 
54
59
  @rss_fields.each do |field|
55
- yield(field.to_s.sub("@", ""), instance_variable_get(field))
60
+ yield(field, instance_variable_get(:"@#{field}"))
56
61
  end
57
62
  end
58
63
 
@@ -1,5 +1,3 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
3
1
  module Feedjira
4
2
  module FeedUtilities
5
3
  UPDATABLE_ATTRIBUTES = %w(title feed_url url last_modified etag).freeze
@@ -1,4 +1,3 @@
1
- # rubocop:disable Style/DocumentationMethod
2
1
  module Feedjira
3
2
  module Parser
4
3
  # Parser for dealing with Atom feeds.
@@ -1,32 +1,13 @@
1
- # rubocop:disable Style/DocumentationMethod
2
1
  module Feedjira
3
2
  module Parser
4
3
  # Parser for dealing with Atom feed entries.
5
4
  class AtomEntry
6
5
  include SAXMachine
7
6
  include FeedEntryUtilities
7
+ include AtomEntryUtilities
8
8
 
9
- element :title
10
- element :link, as: :url, value: :href, with: { type: "text/html", rel: "alternate" } # rubocop:disable Metrics/LineLength
11
- element :name, as: :author
12
- element :content
13
- element :summary
14
-
9
+ element :"media:thumbnail", as: :image, value: :url
15
10
  element :"media:content", as: :image, value: :url
16
- element :enclosure, as: :image, value: :href
17
-
18
- element :published
19
- element :id, as: :entry_id
20
- element :created, as: :published
21
- element :issued, as: :published
22
- element :updated
23
- element :modified, as: :updated
24
- elements :category, as: :categories, value: :term
25
- elements :link, as: :links, value: :href
26
-
27
- def url
28
- @url ||= links.first
29
- end
30
11
  end
31
12
  end
32
13
  end
@@ -1,4 +1,3 @@
1
- # rubocop:disable Style/DocumentationMethod
2
1
  module Feedjira
3
2
  module Parser
4
3
  # Parser for dealing with Feedburner Atom feeds.
@@ -20,7 +19,7 @@ module Feedjira
20
19
  attr_writer :url, :feed_url
21
20
 
22
21
  def self.able_to_parse?(xml)
23
- ((/Atom/ =~ xml) && (/feedburner/ =~ xml) && !(/\<rss|\<rdf/ =~ xml)) || false # rubocop:disable Metrics/LineLength
22
+ ((/<feed/ =~ xml) && (/Atom/ =~ xml) && (/feedburner/ =~ xml) && !(/\<rss|\<rdf/ =~ xml)) || false # rubocop:disable Metrics/LineLength
24
23
  end
25
24
 
26
25
  # Feed url is <link> with type="text/html" if present,
@@ -1,32 +1,21 @@
1
- # rubocop:disable Style/DocumentationMethod
2
1
  module Feedjira
3
2
  module Parser
4
3
  # Parser for dealing with Feedburner Atom feed entries.
5
4
  class AtomFeedBurnerEntry
6
5
  include SAXMachine
7
6
  include FeedEntryUtilities
7
+ include AtomEntryUtilities
8
8
 
9
- element :title
10
- element :name, as: :author
11
- element :link, as: :url, value: :href, with: { type: "text/html", rel: "alternate" } # rubocop:disable Metrics/LineLength
12
- element :"feedburner:origLink", as: :url
13
- element :summary
14
- element :content
9
+ element :"feedburner:origLink", as: :orig_link
10
+ # rubocop:disable Style/AccessModifierDeclarations
11
+ private :orig_link
12
+ # rubocop:enable Style/AccessModifierDeclarations
15
13
 
14
+ element :"media:thumbnail", as: :image, value: :url
16
15
  element :"media:content", as: :image, value: :url
17
- element :enclosure, as: :image, value: :href
18
-
19
- element :published
20
- element :id, as: :entry_id
21
- element :issued, as: :published
22
- element :created, as: :published
23
- element :updated
24
- element :modified, as: :updated
25
- elements :category, as: :categories, value: :term
26
- elements :link, as: :links, value: :href
27
16
 
28
17
  def url
29
- @url ||= links.first
18
+ orig_link || super
30
19
  end
31
20
  end
32
21
  end
@@ -1,4 +1,3 @@
1
- # rubocop:disable Style/DocumentationMethod
2
1
  module Feedjira
3
2
  module Parser
4
3
  # Parser for dealing with RSS feeds.
@@ -1,19 +1,18 @@
1
- # rubocop:disable Style/Documentation
2
1
  module Feedjira
3
2
  module Parser
4
3
  class AtomYoutubeEntry
5
4
  include SAXMachine
6
5
  include FeedEntryUtilities
6
+ include AtomEntryUtilities
7
+
8
+ sax_config.top_level_elements["link"].clear
9
+ sax_config.collection_elements["link"].clear
7
10
 
8
- element :title
9
11
  element :link, as: :url, value: :href, with: { rel: "alternate" }
10
- element :name, as: :author
12
+
11
13
  element :"media:description", as: :content
12
- element :summary
13
- element :published
14
- element :id, as: :entry_id
15
- element :updated
16
14
  element :"yt:videoId", as: :youtube_video_id
15
+ element :"yt:channelId", as: :youtube_channel_id
17
16
  element :"media:title", as: :media_title
18
17
  element :"media:content", as: :media_url, value: :url
19
18
  element :"media:content", as: :media_type, value: :type
@@ -1,6 +1,4 @@
1
1
  require File.expand_path("./atom", File.dirname(__FILE__))
2
- # rubocop:disable Style/Documentation
3
- # rubocop:disable Style/DocumentationMethod
4
2
  module Feedjira
5
3
  module Parser
6
4
  class GoogleDocsAtom
@@ -1,31 +1,13 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
3
1
  module Feedjira
4
2
  module Parser
5
3
  class GoogleDocsAtomEntry
6
4
  include SAXMachine
7
5
  include FeedEntryUtilities
6
+ include AtomEntryUtilities
8
7
 
9
- element :title
10
- element :link, as: :url, value: :href, with: { type: "text/html", rel: "alternate" } # rubocop:disable Metrics/LineLength
11
- element :name, as: :author
12
- element :content
13
- element :summary
14
- element :published
15
- element :id, as: :entry_id
16
- element :created, as: :published
17
- element :issued, as: :published
18
- element :updated
19
- element :modified, as: :updated
20
- elements :category, as: :categories, value: :term
21
- elements :link, as: :links, value: :href
22
8
  element :"docs:md5Checksum", as: :checksum
23
9
  element :"docs:filename", as: :original_filename
24
10
  element :"docs:suggestedFilename", as: :suggested_filename
25
-
26
- def url
27
- @url ||= links.first
28
- end
29
11
  end
30
12
  end
31
13
  end
@@ -1,4 +1,3 @@
1
- # rubocop:disable Style/DocumentationMethod
2
1
  module Feedjira
3
2
  module Parser
4
3
  # iTunes is RSS 2.0 + some apple extensions
@@ -1,4 +1,3 @@
1
- # rubocop:disable Style/DocumentationMethod
2
1
  module Feedjira
3
2
  module Parser
4
3
  # iTunes extensions to the standard RSS2.0 item
@@ -5,14 +5,9 @@ module Feedjira
5
5
  class ITunesRSSItem
6
6
  include SAXMachine
7
7
  include FeedEntryUtilities
8
+ include RSSEntryUtilities
8
9
 
9
- element :author
10
- element :guid, as: :entry_id
11
- element :title
12
- element :link, as: :url
13
- element :description, as: :summary
14
- element :"content:encoded", as: :content
15
- element :pubDate, as: :published
10
+ sax_config.top_level_elements["enclosure"].clear
16
11
 
17
12
  # If author is not present use author tag on the item
18
13
  element :"itunes:author", as: :itunes_author
@@ -1,4 +1,3 @@
1
- # rubocop:disable Style/Documentation
2
1
  module Feedjira
3
2
  module Parser
4
3
  class ITunesRSSOwner
@@ -1,5 +1,3 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
3
1
  module Feedjira
4
2
  module Parser
5
3
  class PodloveChapter
@@ -1,4 +1,3 @@
1
- # rubocop:disable Style/DocumentationMethod
2
1
  module Feedjira
3
2
  module Parser
4
3
  # Parser for dealing with RSS feeds.
@@ -4,34 +4,7 @@ module Feedjira
4
4
  class RSSEntry
5
5
  include SAXMachine
6
6
  include FeedEntryUtilities
7
-
8
- element :title
9
- element :link, as: :url
10
-
11
- element :"dc:creator", as: :author
12
- element :author, as: :author
13
- element :"content:encoded", as: :content
14
- element :description, as: :summary
15
-
16
- element :"media:content", as: :image, value: :url
17
- element :enclosure, as: :image, value: :url
18
-
19
- element :pubDate, as: :published
20
- element :pubdate, as: :published
21
- element :"dc:date", as: :published
22
- element :"dc:Date", as: :published
23
- element :"dcterms:created", as: :published
24
-
25
- element :"dcterms:modified", as: :updated
26
- element :issued, as: :published
27
- elements :category, as: :categories
28
-
29
- element :guid, as: :entry_id
30
- element :"dc:identifier", as: :dc_identifier
31
-
32
- def id
33
- @entry_id ||= @dc_identifier || @url
34
- end
7
+ include RSSEntryUtilities
35
8
  end
36
9
  end
37
10
  end
@@ -1,4 +1,3 @@
1
- # rubocop:disable Style/DocumentationMethod
2
1
  module Feedjira
3
2
  module Parser
4
3
  # Parser for dealing with RSS feeds.
@@ -1,38 +1,18 @@
1
- # rubocop:disable Style/DocumentationMethod
2
1
  module Feedjira
3
2
  module Parser
4
3
  # Parser for dealing with RDF feed entries.
5
4
  class RSSFeedBurnerEntry
6
5
  include SAXMachine
7
6
  include FeedEntryUtilities
7
+ include RSSEntryUtilities
8
8
 
9
- element :title
10
-
11
- element :"feedburner:origLink", as: :url
12
- element :link, as: :url
13
-
14
- element :"dc:creator", as: :author
15
- element :author, as: :author
16
- element :"content:encoded", as: :content
17
- element :description, as: :summary
18
-
19
- element :"media:content", as: :image, value: :url
20
- element :enclosure, as: :image, value: :url
21
-
22
- element :pubDate, as: :published
23
- element :pubdate, as: :published
24
- element :"dc:date", as: :published
25
- element :"dc:Date", as: :published
26
- element :"dcterms:created", as: :published
27
-
28
- element :"dcterms:modified", as: :updated
29
- element :issued, as: :published
30
- elements :category, as: :categories
31
-
32
- element :guid, as: :entry_id
9
+ element :"feedburner:origLink", as: :orig_link
10
+ # rubocop:disable Style/AccessModifierDeclarations
11
+ private :orig_link
12
+ # rubocop:enable Style/AccessModifierDeclarations
33
13
 
34
14
  def url
35
- @url || @link
15
+ orig_link || super
36
16
  end
37
17
  end
38
18
  end
@@ -1,5 +1,3 @@
1
- # rubocop:disable Style/Documentation
2
- # rubocop:disable Style/DocumentationMethod
3
1
  module Feedjira
4
2
  class Preprocessor
5
3
  def initialize(xml)
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Feedjira
4
+ module RSSEntryUtilities
5
+ def self.included(mod)
6
+ mod.class_exec do
7
+ element :title
8
+
9
+ element :"content:encoded", as: :content
10
+ element :description, as: :summary
11
+
12
+ element :link, as: :url
13
+
14
+ element :author
15
+ element :"dc:creator", as: :author
16
+
17
+ element :pubDate, as: :published
18
+ element :pubdate, as: :published
19
+ element :issued, as: :published
20
+ element :"dc:date", as: :published
21
+ element :"dc:Date", as: :published
22
+ element :"dcterms:created", as: :published
23
+
24
+ element :"dcterms:modified", as: :updated
25
+
26
+ element :guid, as: :entry_id
27
+ element :"dc:identifier", as: :dc_identifier
28
+
29
+ element :"media:thumbnail", as: :image, value: :url
30
+ element :"media:content", as: :image, value: :url
31
+ element :enclosure, as: :image, value: :url
32
+
33
+ elements :category, as: :categories
34
+ end
35
+ end
36
+
37
+ attr_reader :url
38
+
39
+ # rubocop:disable Naming/MemoizedInstanceVariableName
40
+ def id
41
+ @entry_id ||= @dc_identifier || @url
42
+ end
43
+ # rubocop:enable Naming/MemoizedInstanceVariableName
44
+ end
45
+ end
@@ -1,3 +1,3 @@
1
1
  module Feedjira
2
- VERSION = "3.0.0.beta1".freeze
2
+ VERSION = "3.0.0".freeze
3
3
  end
@@ -3,7 +3,7 @@ require "spec_helper"
3
3
  class Hell < StandardError; end
4
4
 
5
5
  class FailParser
6
- def self.parse(_, &on_failure)
6
+ def self.parse(_xml, &on_failure)
7
7
  on_failure.call "this parser always fails."
8
8
  end
9
9
  end
@@ -20,11 +20,11 @@ describe Feedjira::Parser::AtomYoutubeEntry do
20
20
  end
21
21
 
22
22
  it "should have the published date" do
23
- expect(@entry.published).to eq Time.parse_safely("2015-05-04T00:01:27+00:00") # rubocop:disable Metrics/LineLength
23
+ expect(@entry.published).to eq Time.parse_safely("2015-05-04T00:01:27+00:00")
24
24
  end
25
25
 
26
26
  it "should have the updated date" do
27
- expect(@entry.updated).to eq Time.parse_safely("2015-05-13T17:38:30+00:00") # rubocop:disable Metrics/LineLength
27
+ expect(@entry.updated).to eq Time.parse_safely("2015-05-13T17:38:30+00:00")
28
28
  end
29
29
 
30
30
  it "should have the content populated from the media:description element" do
@@ -62,6 +62,16 @@ RSpec.describe Feedjira do
62
62
  expect(feed.entries.first.id).to eq "23246627"
63
63
  expect(feed.entries.last.id.strip).to eq "1"
64
64
  end
65
+
66
+ it "does not fail if multiple published dates exist and some are unparseable" do
67
+ expect(Feedjira.logger).to receive(:warn).twice
68
+
69
+ feed = Feedjira.parse(sample_invalid_date_format_feed)
70
+ expect(feed.title).to eq "Invalid date format feed"
71
+ published = Time.parse_safely "Mon, 16 Oct 2017 15:10:00 GMT"
72
+ expect(feed.entries.first.published).to eq published
73
+ expect(feed.entries.size).to eq 2
74
+ end
65
75
  end
66
76
 
67
77
  context "when there's no available parser" do
@@ -128,7 +138,7 @@ RSpec.describe Feedjira do
128
138
  it "does not use default parsers" do
129
139
  xml = "Atom asdf"
130
140
  new_parser = Class.new do
131
- def self.able_to_parse?(_)
141
+ def self.able_to_parse?(_xml)
132
142
  true
133
143
  end
134
144
  end
@@ -1,5 +1,3 @@
1
- # rubocop:disable Metrics/LineLength
2
-
3
1
  module SampleFeeds
4
2
  FEEDS = {
5
3
  sample_atom_feed: "AmazonWebServicesBlog.xml",
@@ -29,6 +27,7 @@ module SampleFeeds
29
27
  sample_atom_xhtml_with_escpaed_html_in_pre_tag_feed: "AtomEscapedHTMLInPreTag.xml",
30
28
  sample_json_feed: "json_feed.json",
31
29
  sample_rss_feed_huffpost_ca: "HuffPostCanada.xml",
30
+ sample_invalid_date_format_feed: "InvalidDateFormat.xml"
32
31
  }.freeze
33
32
 
34
33
  FEEDS.each do |method, filename|
@@ -39,5 +38,3 @@ module SampleFeeds
39
38
  File.read("#{File.dirname(__FILE__)}/sample_feeds/#{filename}")
40
39
  end
41
40
  end
42
-
43
- # rubocop:enable Metrics/LineLength
@@ -0,0 +1,20 @@
1
+ <?xml version="1.0" encoding="UTF-8"?><rss version="2.0"
2
+ xmlns:dc="http://purl.org/dc/elements/1.1/">
3
+
4
+ <channel>
5
+ <title>Invalid date format feed</title>
6
+ <link>http://example.com/feed</link>
7
+ <language>en-US</language>
8
+ <item>
9
+ <title>Item 0 with an invalid date</title>
10
+ <link>http://example.com/item0</link>
11
+ <pubDate>Mon, 16 Oct 2017 15:10:00 +0000</pubDate>
12
+ <dc:date>1518478934</dc:date>
13
+ </item>
14
+ <item>
15
+ <title>Item 1 with all valid dates</title>
16
+ <link>http://example.com/item1</link>
17
+ <pubDate>Tue, 17 Oct 2017 12:17:00 +0000</pubDate>
18
+ <dc:date>Tue, 17 Oct 2017 22:17:00 +0000</dc:date>
19
+ </item>
20
+ </channel>
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feedjira
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0.beta1
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adam Hess
@@ -13,7 +13,7 @@ authors:
13
13
  autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
- date: 2017-12-22 00:00:00.000000000 Z
16
+ date: 2019-07-10 00:00:00.000000000 Z
17
17
  dependencies:
18
18
  - !ruby/object:Gem::Dependency
19
19
  name: loofah
@@ -21,14 +21,14 @@ dependencies:
21
21
  requirements:
22
22
  - - ">="
23
23
  - !ruby/object:Gem::Version
24
- version: '2.0'
24
+ version: 2.2.1
25
25
  type: :runtime
26
26
  prerelease: false
27
27
  version_requirements: !ruby/object:Gem::Requirement
28
28
  requirements:
29
29
  - - ">="
30
30
  - !ruby/object:Gem::Version
31
- version: '2.0'
31
+ version: 2.2.1
32
32
  - !ruby/object:Gem::Dependency
33
33
  name: sax-machine
34
34
  requirement: !ruby/object:Gem::Requirement
@@ -136,6 +136,7 @@ files:
136
136
  - ".gitignore"
137
137
  - ".rspec"
138
138
  - ".rubocop.yml"
139
+ - ".rubocop_todo.yml"
139
140
  - ".travis.yml"
140
141
  - CHANGELOG.md
141
142
  - CODE_OF_CONDUCT.md
@@ -146,6 +147,7 @@ files:
146
147
  - Rakefile
147
148
  - feedjira.gemspec
148
149
  - lib/feedjira.rb
150
+ - lib/feedjira/atom_entry_utilities.rb
149
151
  - lib/feedjira/configuration.rb
150
152
  - lib/feedjira/core_ext.rb
151
153
  - lib/feedjira/core_ext/date.rb
@@ -180,6 +182,7 @@ files:
180
182
  - lib/feedjira/parser/rss_feed_burner_entry.rb
181
183
  - lib/feedjira/parser/rss_image.rb
182
184
  - lib/feedjira/preprocessor.rb
185
+ - lib/feedjira/rss_entry_utilities.rb
183
186
  - lib/feedjira/version.rb
184
187
  - spec/feedjira/configuration_spec.rb
185
188
  - spec/feedjira/date_time_utilities_spec.rb
@@ -223,6 +226,7 @@ files:
223
226
  - spec/sample_feeds/HuffPostCanada.xml
224
227
  - spec/sample_feeds/ITunesWithSingleQuotedAttributes.xml
225
228
  - spec/sample_feeds/ITunesWithSpacesInAttributes.xml
229
+ - spec/sample_feeds/InvalidDateFormat.xml
226
230
  - spec/sample_feeds/PaulDixExplainsNothing.xml
227
231
  - spec/sample_feeds/PaulDixExplainsNothingAlternate.xml
228
232
  - spec/sample_feeds/PaulDixExplainsNothingFirstEntryContent.xml
@@ -244,7 +248,10 @@ files:
244
248
  homepage: http://feedjira.com
245
249
  licenses:
246
250
  - MIT
247
- metadata: {}
251
+ metadata:
252
+ homepage_uri: http://feedjira.com
253
+ source_code_uri: https://github.com/feedjira/feedjira
254
+ changelog_uri: https://github.com/feedjira/feedjira/blob/master/CHANGELOG.md
248
255
  post_install_message:
249
256
  rdoc_options: []
250
257
  require_paths:
@@ -253,15 +260,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
253
260
  requirements:
254
261
  - - ">="
255
262
  - !ruby/object:Gem::Version
256
- version: 1.9.3
263
+ version: '2.2'
257
264
  required_rubygems_version: !ruby/object:Gem::Requirement
258
265
  requirements:
259
- - - ">"
266
+ - - ">="
260
267
  - !ruby/object:Gem::Version
261
- version: 1.3.1
268
+ version: '0'
262
269
  requirements: []
263
- rubyforge_project:
264
- rubygems_version: 2.6.13
270
+ rubygems_version: 3.0.1
265
271
  signing_key:
266
272
  specification_version: 4
267
273
  summary: A feed parsing library
@@ -308,6 +314,7 @@ test_files:
308
314
  - spec/sample_feeds/HuffPostCanada.xml
309
315
  - spec/sample_feeds/ITunesWithSingleQuotedAttributes.xml
310
316
  - spec/sample_feeds/ITunesWithSpacesInAttributes.xml
317
+ - spec/sample_feeds/InvalidDateFormat.xml
311
318
  - spec/sample_feeds/PaulDixExplainsNothing.xml
312
319
  - spec/sample_feeds/PaulDixExplainsNothingAlternate.xml
313
320
  - spec/sample_feeds/PaulDixExplainsNothingFirstEntryContent.xml