feedjira 3.0.0.beta1 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.rubocop.yml +5 -2
- data/.rubocop_todo.yml +36 -0
- data/.travis.yml +0 -3
- data/Gemfile +1 -0
- data/README.md +3 -0
- data/feedjira.gemspec +9 -3
- data/lib/feedjira.rb +2 -0
- data/lib/feedjira/atom_entry_utilities.rb +35 -0
- data/lib/feedjira/core_ext/date.rb +0 -1
- data/lib/feedjira/core_ext/string.rb +0 -1
- data/lib/feedjira/core_ext/time.rb +8 -10
- data/lib/feedjira/date_time_utilities.rb +0 -2
- data/lib/feedjira/date_time_utilities/date_time_epoch_parser.rb +0 -2
- data/lib/feedjira/date_time_utilities/date_time_language_parser.rb +0 -2
- data/lib/feedjira/date_time_utilities/date_time_pattern_parser.rb +0 -4
- data/lib/feedjira/feed.rb +0 -2
- data/lib/feedjira/feed_entry_utilities.rb +11 -6
- data/lib/feedjira/feed_utilities.rb +0 -2
- data/lib/feedjira/parser/atom.rb +0 -1
- data/lib/feedjira/parser/atom_entry.rb +2 -21
- data/lib/feedjira/parser/atom_feed_burner.rb +1 -2
- data/lib/feedjira/parser/atom_feed_burner_entry.rb +7 -18
- data/lib/feedjira/parser/atom_youtube.rb +0 -1
- data/lib/feedjira/parser/atom_youtube_entry.rb +6 -7
- data/lib/feedjira/parser/google_docs_atom.rb +0 -2
- data/lib/feedjira/parser/google_docs_atom_entry.rb +1 -19
- data/lib/feedjira/parser/itunes_rss.rb +0 -1
- data/lib/feedjira/parser/itunes_rss_category.rb +0 -1
- data/lib/feedjira/parser/itunes_rss_item.rb +2 -7
- data/lib/feedjira/parser/itunes_rss_owner.rb +0 -1
- data/lib/feedjira/parser/podlove_chapter.rb +0 -2
- data/lib/feedjira/parser/rss.rb +0 -1
- data/lib/feedjira/parser/rss_entry.rb +1 -28
- data/lib/feedjira/parser/rss_feed_burner.rb +0 -1
- data/lib/feedjira/parser/rss_feed_burner_entry.rb +6 -26
- data/lib/feedjira/preprocessor.rb +0 -2
- data/lib/feedjira/rss_entry_utilities.rb +45 -0
- data/lib/feedjira/version.rb +1 -1
- data/spec/feedjira/feed_spec.rb +1 -1
- data/spec/feedjira/parser/atom_youtube_entry_spec.rb +2 -2
- data/spec/feedjira_spec.rb +11 -1
- data/spec/sample_feeds.rb +1 -4
- data/spec/sample_feeds/InvalidDateFormat.xml +20 -0
- metadata +17 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 49efb7655e500df91c7119e3afbb2b91fab6d0a8282703fecb69f24d15f54357
|
4
|
+
data.tar.gz: ea6fc3b58be4968be8c8561b6e8c2720b5732882ff2556fe6966583df9bcf130
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dd52aec9d212c0428095c3adee3cbfd1115d53a54ac2281bf7ce7f4142ace6659e658658097e43edb3c90deec583941ed220aea1fe1709f66110adb79c471620
|
7
|
+
data.tar.gz: d39bfe14c74a642311d55369498195e588beec99e5bfb897b0f00c5019f1985f89c1280b8c8eaafb3d36f55b9558b04c9037ccc8710c34c4bad2cb6127702d61
|
data/.rubocop.yml
CHANGED
@@ -1,4 +1,7 @@
|
|
1
|
+
inherit_from: .rubocop_todo.yml
|
2
|
+
|
1
3
|
AllCops:
|
4
|
+
TargetRubyVersion: 2.2
|
2
5
|
Exclude:
|
3
6
|
- db/schema.rb
|
4
7
|
- vendor/**/*
|
@@ -360,7 +363,7 @@ Style/TrailingCommaInArguments:
|
|
360
363
|
- no_comma
|
361
364
|
Enabled: true
|
362
365
|
|
363
|
-
Style/
|
366
|
+
Style/TrailingCommaInArrayLiteral:
|
364
367
|
Description: 'Checks for trailing comma in array and hash literals.'
|
365
368
|
StyleGuide: 'https://github.com/bbatsov/ruby-style-guide#no-trailing-array-commas'
|
366
369
|
EnforcedStyleForMultiline: comma
|
@@ -527,7 +530,7 @@ Lint/UnderscorePrefixedVariableName:
|
|
527
530
|
Description: 'Do not use prefix `_` for a variable that is used.'
|
528
531
|
Enabled: false
|
529
532
|
|
530
|
-
Lint/
|
533
|
+
Lint/UnneededCopDisableDirective:
|
531
534
|
Description: >-
|
532
535
|
Checks for rubocop:disable comments that can be removed.
|
533
536
|
Note: this cop is not disabled when disabling all cops.
|
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2018-10-08 04:14:19 +0900 using RuboCop version 0.59.2.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 7
|
10
|
+
# Cop supports --auto-correct.
|
11
|
+
Layout/EmptyLineAfterGuardClause:
|
12
|
+
Exclude:
|
13
|
+
- 'lib/feedjira/date_time_utilities/date_time_epoch_parser.rb'
|
14
|
+
- 'lib/feedjira/feed_utilities.rb'
|
15
|
+
- 'lib/feedjira/parser/json_feed_item.rb'
|
16
|
+
- 'lib/feedjira/parser/podlove_chapter.rb'
|
17
|
+
|
18
|
+
# Offense count: 1
|
19
|
+
# Cop supports --auto-correct.
|
20
|
+
Performance/UnneededSort:
|
21
|
+
Exclude:
|
22
|
+
- 'lib/feedjira/feed_utilities.rb'
|
23
|
+
|
24
|
+
# Offense count: 3
|
25
|
+
# Configuration parameters: EnforcedStyle.
|
26
|
+
# SupportedStyles: inline, group
|
27
|
+
Style/AccessModifierDeclarations:
|
28
|
+
Exclude:
|
29
|
+
- 'lib/feedjira.rb'
|
30
|
+
- 'lib/feedjira/parser/itunes_rss.rb'
|
31
|
+
|
32
|
+
# Offense count: 1
|
33
|
+
# Cop supports --auto-correct.
|
34
|
+
Style/UnneededCondition:
|
35
|
+
Exclude:
|
36
|
+
- 'lib/feedjira/feed_utilities.rb'
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -143,6 +143,8 @@ add-ons and everything in between. Here are some of them:
|
|
143
143
|
|
144
144
|
* [Solve for All][solve]: Solve for All combines search engine and feed parsing
|
145
145
|
while protecting your privacy. It's even extendable by the community!
|
146
|
+
|
147
|
+
* [Feedi API][feedi]: Feedi simplifies how you handle RSS, Atom, or JSON feeds. You can add and keep track of your favourite feed data with a simple and clean REST API. All entries are enriched by Machine Learning and Semantic engines.
|
146
148
|
|
147
149
|
[Feedbin]: https://feedbin.com/
|
148
150
|
[Stringer]: https://github.com/swanson/stringer
|
@@ -150,6 +152,7 @@ add-ons and everything in between. Here are some of them:
|
|
150
152
|
[Feedbunch]: https://github.com/amatriain/feedbunch
|
151
153
|
[old]: http://theoldreader.com/
|
152
154
|
[solve]: https://solveforall.com/
|
155
|
+
[feedi]: https://github.com/davidesantangelo/feedi
|
153
156
|
|
154
157
|
Note: to get your project on this list, simply [send an email](mailto:feedjira@gmail.com)
|
155
158
|
with your project's details.
|
data/feedjira.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
|
3
|
-
require File.expand_path("
|
3
|
+
require File.expand_path("lib/feedjira/version", __dir__)
|
4
4
|
|
5
5
|
# rubocop:disable Metrics/BlockLength
|
6
6
|
Gem::Specification.new do |s|
|
@@ -20,13 +20,19 @@ Gem::Specification.new do |s|
|
|
20
20
|
s.summary = "A feed parsing library"
|
21
21
|
s.version = Feedjira::VERSION
|
22
22
|
|
23
|
+
s.metadata = {
|
24
|
+
"homepage_uri" => "http://feedjira.com",
|
25
|
+
"source_code_uri" => "https://github.com/feedjira/feedjira",
|
26
|
+
"changelog_uri" => "https://github.com/feedjira/feedjira/blob/master/CHANGELOG.md"
|
27
|
+
}
|
28
|
+
|
23
29
|
s.files = `git ls-files`.split("\n")
|
24
30
|
s.require_paths = ["lib"]
|
25
31
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
26
32
|
|
27
|
-
s.required_ruby_version = ">=
|
33
|
+
s.required_ruby_version = ">=2.2"
|
28
34
|
|
29
|
-
s.add_dependency "loofah", ">= 2.
|
35
|
+
s.add_dependency "loofah", ">= 2.2.1"
|
30
36
|
s.add_dependency "sax-machine", ">= 1.0"
|
31
37
|
|
32
38
|
s.add_development_dependency "danger"
|
data/lib/feedjira.rb
CHANGED
@@ -14,6 +14,8 @@ require "feedjira/date_time_utilities"
|
|
14
14
|
require "feedjira/feed_entry_utilities"
|
15
15
|
require "feedjira/feed_utilities"
|
16
16
|
require "feedjira/feed"
|
17
|
+
require "feedjira/rss_entry_utilities"
|
18
|
+
require "feedjira/atom_entry_utilities"
|
17
19
|
require "feedjira/parser"
|
18
20
|
require "feedjira/parser/rss_entry"
|
19
21
|
require "feedjira/parser/rss_image"
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Feedjira
|
4
|
+
module AtomEntryUtilities
|
5
|
+
def self.included(mod)
|
6
|
+
mod.class_exec do
|
7
|
+
element :title
|
8
|
+
element :name, as: :author
|
9
|
+
element :content
|
10
|
+
element :summary
|
11
|
+
element :enclosure, as: :image, value: :href
|
12
|
+
|
13
|
+
element :published
|
14
|
+
element :id, as: :entry_id
|
15
|
+
element :created, as: :published
|
16
|
+
element :issued, as: :published
|
17
|
+
element :updated
|
18
|
+
element :modified, as: :updated
|
19
|
+
|
20
|
+
elements :category, as: :categories, value: :term
|
21
|
+
|
22
|
+
element :link, as: :url, value: :href, with: {
|
23
|
+
type: "text/html",
|
24
|
+
rel: "alternate"
|
25
|
+
}
|
26
|
+
|
27
|
+
elements :link, as: :links, value: :href
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def url
|
32
|
+
@url ||= links.first
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -1,7 +1,6 @@
|
|
1
1
|
require "time"
|
2
2
|
require "date"
|
3
3
|
|
4
|
-
# rubocop:disable Style/DocumentationMethod
|
5
4
|
class Time
|
6
5
|
# Parse a time string and convert it to UTC without raising errors.
|
7
6
|
# Parses a flattened 14-digit time (YYYYmmddHHMMMSS) as UTC.
|
@@ -11,17 +10,16 @@ class Time
|
|
11
10
|
#
|
12
11
|
# === Returns
|
13
12
|
# A Time instance in UTC or nil if there were errors while parsing.
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
parse_string_safely dt.to_s
|
13
|
+
def self.parse_safely(datetime)
|
14
|
+
if datetime.is_a?(Time)
|
15
|
+
datetime.utc
|
16
|
+
elsif datetime.respond_to?(:to_datetime)
|
17
|
+
datetime.to_datetime.utc
|
18
|
+
elsif datetime.respond_to? :to_s
|
19
|
+
parse_string_safely datetime.to_s
|
22
20
|
end
|
23
21
|
rescue StandardError => e
|
24
|
-
Feedjira.logger.debug { "Failed to parse time #{
|
22
|
+
Feedjira.logger.debug { "Failed to parse time #{datetime}" }
|
25
23
|
Feedjira.logger.debug(e)
|
26
24
|
nil
|
27
25
|
end
|
@@ -1,4 +1,3 @@
|
|
1
|
-
# rubocop:disable Style/Documentation
|
2
1
|
module Feedjira
|
3
2
|
module DateTimeUtilities
|
4
3
|
# This is our date parsing heuristic.
|
@@ -12,7 +11,6 @@ module Feedjira
|
|
12
11
|
|
13
12
|
# Parse the given string starting with the most common parser (default ruby)
|
14
13
|
# and going over all other available parsers
|
15
|
-
# rubocop:disable Metrics/MethodLength
|
16
14
|
def parse_datetime(string)
|
17
15
|
res = DATE_PARSERS.detect do |parser|
|
18
16
|
begin
|
@@ -1,15 +1,11 @@
|
|
1
|
-
# rubocop:disable Style/Documentation
|
2
|
-
# rubocop:disable Style/DocumentationMethod
|
3
1
|
module Feedjira
|
4
2
|
module DateTimeUtilities
|
5
3
|
class DateTimePatternParser
|
6
|
-
# rubocop:disable Style/AsciiComments
|
7
4
|
# Japanese Symbols are required for strange Date Strings like
|
8
5
|
# '水, 31 8 2016 07:37:00 PDT'
|
9
6
|
JAPANESE_SYMBOLS = %w(日 月 火 水 木 金 土).freeze
|
10
7
|
PATTERNS = ["%m/%d/%Y %T %p", "%d %m %Y %T %Z"].freeze
|
11
8
|
|
12
|
-
# rubocop:disable Metrics/MethodLength
|
13
9
|
def self.parse(string)
|
14
10
|
PATTERNS.each do |p|
|
15
11
|
begin
|
data/lib/feedjira/feed.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
# rubocop:disable Style/Documentation
|
2
|
-
# rubocop:disable Style/DocumentationMethod
|
3
1
|
module Feedjira
|
4
2
|
module FeedEntryUtilities
|
5
3
|
include Enumerable
|
@@ -20,22 +18,24 @@ module Feedjira
|
|
20
18
|
##
|
21
19
|
# Returns the id of the entry or its url if not id is present, as some
|
22
20
|
# formats don't support it
|
21
|
+
# rubocop:disable Naming/MemoizedInstanceVariableName
|
23
22
|
def id
|
24
23
|
@entry_id ||= @url
|
25
24
|
end
|
25
|
+
# rubocop:enable Naming/MemoizedInstanceVariableName
|
26
26
|
|
27
27
|
##
|
28
28
|
# Writer for published. By default, we keep the "oldest" publish time found.
|
29
29
|
def published=(val)
|
30
30
|
parsed = parse_datetime(val)
|
31
|
-
@published = parsed if !@published || parsed < @published
|
31
|
+
@published = parsed if parsed && (!@published || parsed < @published)
|
32
32
|
end
|
33
33
|
|
34
34
|
##
|
35
35
|
# Writer for updated. By default, we keep the most recent update time found.
|
36
36
|
def updated=(val)
|
37
37
|
parsed = parse_datetime(val)
|
38
|
-
@updated = parsed if !@updated || parsed > @updated
|
38
|
+
@updated = parsed if parsed && (!@updated || parsed > @updated)
|
39
39
|
end
|
40
40
|
|
41
41
|
def sanitize!
|
@@ -49,10 +49,15 @@ module Feedjira
|
|
49
49
|
alias last_modified published
|
50
50
|
|
51
51
|
def each
|
52
|
-
@rss_fields ||= instance_variables
|
52
|
+
@rss_fields ||= instance_variables.map do |ivar|
|
53
|
+
ivar.to_s.sub("@", "")
|
54
|
+
end.select do |field|
|
55
|
+
# select callable (public) methods only
|
56
|
+
respond_to?(field)
|
57
|
+
end
|
53
58
|
|
54
59
|
@rss_fields.each do |field|
|
55
|
-
yield(field
|
60
|
+
yield(field, instance_variable_get(:"@#{field}"))
|
56
61
|
end
|
57
62
|
end
|
58
63
|
|
data/lib/feedjira/parser/atom.rb
CHANGED
@@ -1,32 +1,13 @@
|
|
1
|
-
# rubocop:disable Style/DocumentationMethod
|
2
1
|
module Feedjira
|
3
2
|
module Parser
|
4
3
|
# Parser for dealing with Atom feed entries.
|
5
4
|
class AtomEntry
|
6
5
|
include SAXMachine
|
7
6
|
include FeedEntryUtilities
|
7
|
+
include AtomEntryUtilities
|
8
8
|
|
9
|
-
element :
|
10
|
-
element :link, as: :url, value: :href, with: { type: "text/html", rel: "alternate" } # rubocop:disable Metrics/LineLength
|
11
|
-
element :name, as: :author
|
12
|
-
element :content
|
13
|
-
element :summary
|
14
|
-
|
9
|
+
element :"media:thumbnail", as: :image, value: :url
|
15
10
|
element :"media:content", as: :image, value: :url
|
16
|
-
element :enclosure, as: :image, value: :href
|
17
|
-
|
18
|
-
element :published
|
19
|
-
element :id, as: :entry_id
|
20
|
-
element :created, as: :published
|
21
|
-
element :issued, as: :published
|
22
|
-
element :updated
|
23
|
-
element :modified, as: :updated
|
24
|
-
elements :category, as: :categories, value: :term
|
25
|
-
elements :link, as: :links, value: :href
|
26
|
-
|
27
|
-
def url
|
28
|
-
@url ||= links.first
|
29
|
-
end
|
30
11
|
end
|
31
12
|
end
|
32
13
|
end
|
@@ -1,4 +1,3 @@
|
|
1
|
-
# rubocop:disable Style/DocumentationMethod
|
2
1
|
module Feedjira
|
3
2
|
module Parser
|
4
3
|
# Parser for dealing with Feedburner Atom feeds.
|
@@ -20,7 +19,7 @@ module Feedjira
|
|
20
19
|
attr_writer :url, :feed_url
|
21
20
|
|
22
21
|
def self.able_to_parse?(xml)
|
23
|
-
((/Atom/ =~ xml) && (/feedburner/ =~ xml) && !(/\<rss|\<rdf/ =~ xml)) || false # rubocop:disable Metrics/LineLength
|
22
|
+
((/<feed/ =~ xml) && (/Atom/ =~ xml) && (/feedburner/ =~ xml) && !(/\<rss|\<rdf/ =~ xml)) || false # rubocop:disable Metrics/LineLength
|
24
23
|
end
|
25
24
|
|
26
25
|
# Feed url is <link> with type="text/html" if present,
|
@@ -1,32 +1,21 @@
|
|
1
|
-
# rubocop:disable Style/DocumentationMethod
|
2
1
|
module Feedjira
|
3
2
|
module Parser
|
4
3
|
# Parser for dealing with Feedburner Atom feed entries.
|
5
4
|
class AtomFeedBurnerEntry
|
6
5
|
include SAXMachine
|
7
6
|
include FeedEntryUtilities
|
7
|
+
include AtomEntryUtilities
|
8
8
|
|
9
|
-
element :
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
element :summary
|
14
|
-
element :content
|
9
|
+
element :"feedburner:origLink", as: :orig_link
|
10
|
+
# rubocop:disable Style/AccessModifierDeclarations
|
11
|
+
private :orig_link
|
12
|
+
# rubocop:enable Style/AccessModifierDeclarations
|
15
13
|
|
14
|
+
element :"media:thumbnail", as: :image, value: :url
|
16
15
|
element :"media:content", as: :image, value: :url
|
17
|
-
element :enclosure, as: :image, value: :href
|
18
|
-
|
19
|
-
element :published
|
20
|
-
element :id, as: :entry_id
|
21
|
-
element :issued, as: :published
|
22
|
-
element :created, as: :published
|
23
|
-
element :updated
|
24
|
-
element :modified, as: :updated
|
25
|
-
elements :category, as: :categories, value: :term
|
26
|
-
elements :link, as: :links, value: :href
|
27
16
|
|
28
17
|
def url
|
29
|
-
|
18
|
+
orig_link || super
|
30
19
|
end
|
31
20
|
end
|
32
21
|
end
|
@@ -1,19 +1,18 @@
|
|
1
|
-
# rubocop:disable Style/Documentation
|
2
1
|
module Feedjira
|
3
2
|
module Parser
|
4
3
|
class AtomYoutubeEntry
|
5
4
|
include SAXMachine
|
6
5
|
include FeedEntryUtilities
|
6
|
+
include AtomEntryUtilities
|
7
|
+
|
8
|
+
sax_config.top_level_elements["link"].clear
|
9
|
+
sax_config.collection_elements["link"].clear
|
7
10
|
|
8
|
-
element :title
|
9
11
|
element :link, as: :url, value: :href, with: { rel: "alternate" }
|
10
|
-
|
12
|
+
|
11
13
|
element :"media:description", as: :content
|
12
|
-
element :summary
|
13
|
-
element :published
|
14
|
-
element :id, as: :entry_id
|
15
|
-
element :updated
|
16
14
|
element :"yt:videoId", as: :youtube_video_id
|
15
|
+
element :"yt:channelId", as: :youtube_channel_id
|
17
16
|
element :"media:title", as: :media_title
|
18
17
|
element :"media:content", as: :media_url, value: :url
|
19
18
|
element :"media:content", as: :media_type, value: :type
|
@@ -1,31 +1,13 @@
|
|
1
|
-
# rubocop:disable Style/Documentation
|
2
|
-
# rubocop:disable Style/DocumentationMethod
|
3
1
|
module Feedjira
|
4
2
|
module Parser
|
5
3
|
class GoogleDocsAtomEntry
|
6
4
|
include SAXMachine
|
7
5
|
include FeedEntryUtilities
|
6
|
+
include AtomEntryUtilities
|
8
7
|
|
9
|
-
element :title
|
10
|
-
element :link, as: :url, value: :href, with: { type: "text/html", rel: "alternate" } # rubocop:disable Metrics/LineLength
|
11
|
-
element :name, as: :author
|
12
|
-
element :content
|
13
|
-
element :summary
|
14
|
-
element :published
|
15
|
-
element :id, as: :entry_id
|
16
|
-
element :created, as: :published
|
17
|
-
element :issued, as: :published
|
18
|
-
element :updated
|
19
|
-
element :modified, as: :updated
|
20
|
-
elements :category, as: :categories, value: :term
|
21
|
-
elements :link, as: :links, value: :href
|
22
8
|
element :"docs:md5Checksum", as: :checksum
|
23
9
|
element :"docs:filename", as: :original_filename
|
24
10
|
element :"docs:suggestedFilename", as: :suggested_filename
|
25
|
-
|
26
|
-
def url
|
27
|
-
@url ||= links.first
|
28
|
-
end
|
29
11
|
end
|
30
12
|
end
|
31
13
|
end
|
@@ -5,14 +5,9 @@ module Feedjira
|
|
5
5
|
class ITunesRSSItem
|
6
6
|
include SAXMachine
|
7
7
|
include FeedEntryUtilities
|
8
|
+
include RSSEntryUtilities
|
8
9
|
|
9
|
-
|
10
|
-
element :guid, as: :entry_id
|
11
|
-
element :title
|
12
|
-
element :link, as: :url
|
13
|
-
element :description, as: :summary
|
14
|
-
element :"content:encoded", as: :content
|
15
|
-
element :pubDate, as: :published
|
10
|
+
sax_config.top_level_elements["enclosure"].clear
|
16
11
|
|
17
12
|
# If author is not present use author tag on the item
|
18
13
|
element :"itunes:author", as: :itunes_author
|
data/lib/feedjira/parser/rss.rb
CHANGED
@@ -4,34 +4,7 @@ module Feedjira
|
|
4
4
|
class RSSEntry
|
5
5
|
include SAXMachine
|
6
6
|
include FeedEntryUtilities
|
7
|
-
|
8
|
-
element :title
|
9
|
-
element :link, as: :url
|
10
|
-
|
11
|
-
element :"dc:creator", as: :author
|
12
|
-
element :author, as: :author
|
13
|
-
element :"content:encoded", as: :content
|
14
|
-
element :description, as: :summary
|
15
|
-
|
16
|
-
element :"media:content", as: :image, value: :url
|
17
|
-
element :enclosure, as: :image, value: :url
|
18
|
-
|
19
|
-
element :pubDate, as: :published
|
20
|
-
element :pubdate, as: :published
|
21
|
-
element :"dc:date", as: :published
|
22
|
-
element :"dc:Date", as: :published
|
23
|
-
element :"dcterms:created", as: :published
|
24
|
-
|
25
|
-
element :"dcterms:modified", as: :updated
|
26
|
-
element :issued, as: :published
|
27
|
-
elements :category, as: :categories
|
28
|
-
|
29
|
-
element :guid, as: :entry_id
|
30
|
-
element :"dc:identifier", as: :dc_identifier
|
31
|
-
|
32
|
-
def id
|
33
|
-
@entry_id ||= @dc_identifier || @url
|
34
|
-
end
|
7
|
+
include RSSEntryUtilities
|
35
8
|
end
|
36
9
|
end
|
37
10
|
end
|
@@ -1,38 +1,18 @@
|
|
1
|
-
# rubocop:disable Style/DocumentationMethod
|
2
1
|
module Feedjira
|
3
2
|
module Parser
|
4
3
|
# Parser for dealing with RDF feed entries.
|
5
4
|
class RSSFeedBurnerEntry
|
6
5
|
include SAXMachine
|
7
6
|
include FeedEntryUtilities
|
7
|
+
include RSSEntryUtilities
|
8
8
|
|
9
|
-
element :
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
element :"dc:creator", as: :author
|
15
|
-
element :author, as: :author
|
16
|
-
element :"content:encoded", as: :content
|
17
|
-
element :description, as: :summary
|
18
|
-
|
19
|
-
element :"media:content", as: :image, value: :url
|
20
|
-
element :enclosure, as: :image, value: :url
|
21
|
-
|
22
|
-
element :pubDate, as: :published
|
23
|
-
element :pubdate, as: :published
|
24
|
-
element :"dc:date", as: :published
|
25
|
-
element :"dc:Date", as: :published
|
26
|
-
element :"dcterms:created", as: :published
|
27
|
-
|
28
|
-
element :"dcterms:modified", as: :updated
|
29
|
-
element :issued, as: :published
|
30
|
-
elements :category, as: :categories
|
31
|
-
|
32
|
-
element :guid, as: :entry_id
|
9
|
+
element :"feedburner:origLink", as: :orig_link
|
10
|
+
# rubocop:disable Style/AccessModifierDeclarations
|
11
|
+
private :orig_link
|
12
|
+
# rubocop:enable Style/AccessModifierDeclarations
|
33
13
|
|
34
14
|
def url
|
35
|
-
|
15
|
+
orig_link || super
|
36
16
|
end
|
37
17
|
end
|
38
18
|
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Feedjira
|
4
|
+
module RSSEntryUtilities
|
5
|
+
def self.included(mod)
|
6
|
+
mod.class_exec do
|
7
|
+
element :title
|
8
|
+
|
9
|
+
element :"content:encoded", as: :content
|
10
|
+
element :description, as: :summary
|
11
|
+
|
12
|
+
element :link, as: :url
|
13
|
+
|
14
|
+
element :author
|
15
|
+
element :"dc:creator", as: :author
|
16
|
+
|
17
|
+
element :pubDate, as: :published
|
18
|
+
element :pubdate, as: :published
|
19
|
+
element :issued, as: :published
|
20
|
+
element :"dc:date", as: :published
|
21
|
+
element :"dc:Date", as: :published
|
22
|
+
element :"dcterms:created", as: :published
|
23
|
+
|
24
|
+
element :"dcterms:modified", as: :updated
|
25
|
+
|
26
|
+
element :guid, as: :entry_id
|
27
|
+
element :"dc:identifier", as: :dc_identifier
|
28
|
+
|
29
|
+
element :"media:thumbnail", as: :image, value: :url
|
30
|
+
element :"media:content", as: :image, value: :url
|
31
|
+
element :enclosure, as: :image, value: :url
|
32
|
+
|
33
|
+
elements :category, as: :categories
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
attr_reader :url
|
38
|
+
|
39
|
+
# rubocop:disable Naming/MemoizedInstanceVariableName
|
40
|
+
def id
|
41
|
+
@entry_id ||= @dc_identifier || @url
|
42
|
+
end
|
43
|
+
# rubocop:enable Naming/MemoizedInstanceVariableName
|
44
|
+
end
|
45
|
+
end
|
data/lib/feedjira/version.rb
CHANGED
data/spec/feedjira/feed_spec.rb
CHANGED
@@ -20,11 +20,11 @@ describe Feedjira::Parser::AtomYoutubeEntry do
|
|
20
20
|
end
|
21
21
|
|
22
22
|
it "should have the published date" do
|
23
|
-
expect(@entry.published).to eq Time.parse_safely("2015-05-04T00:01:27+00:00")
|
23
|
+
expect(@entry.published).to eq Time.parse_safely("2015-05-04T00:01:27+00:00")
|
24
24
|
end
|
25
25
|
|
26
26
|
it "should have the updated date" do
|
27
|
-
expect(@entry.updated).to eq Time.parse_safely("2015-05-13T17:38:30+00:00")
|
27
|
+
expect(@entry.updated).to eq Time.parse_safely("2015-05-13T17:38:30+00:00")
|
28
28
|
end
|
29
29
|
|
30
30
|
it "should have the content populated from the media:description element" do
|
data/spec/feedjira_spec.rb
CHANGED
@@ -62,6 +62,16 @@ RSpec.describe Feedjira do
|
|
62
62
|
expect(feed.entries.first.id).to eq "23246627"
|
63
63
|
expect(feed.entries.last.id.strip).to eq "1"
|
64
64
|
end
|
65
|
+
|
66
|
+
it "does not fail if multiple published dates exist and some are unparseable" do
|
67
|
+
expect(Feedjira.logger).to receive(:warn).twice
|
68
|
+
|
69
|
+
feed = Feedjira.parse(sample_invalid_date_format_feed)
|
70
|
+
expect(feed.title).to eq "Invalid date format feed"
|
71
|
+
published = Time.parse_safely "Mon, 16 Oct 2017 15:10:00 GMT"
|
72
|
+
expect(feed.entries.first.published).to eq published
|
73
|
+
expect(feed.entries.size).to eq 2
|
74
|
+
end
|
65
75
|
end
|
66
76
|
|
67
77
|
context "when there's no available parser" do
|
@@ -128,7 +138,7 @@ RSpec.describe Feedjira do
|
|
128
138
|
it "does not use default parsers" do
|
129
139
|
xml = "Atom asdf"
|
130
140
|
new_parser = Class.new do
|
131
|
-
def self.able_to_parse?(
|
141
|
+
def self.able_to_parse?(_xml)
|
132
142
|
true
|
133
143
|
end
|
134
144
|
end
|
data/spec/sample_feeds.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
# rubocop:disable Metrics/LineLength
|
2
|
-
|
3
1
|
module SampleFeeds
|
4
2
|
FEEDS = {
|
5
3
|
sample_atom_feed: "AmazonWebServicesBlog.xml",
|
@@ -29,6 +27,7 @@ module SampleFeeds
|
|
29
27
|
sample_atom_xhtml_with_escpaed_html_in_pre_tag_feed: "AtomEscapedHTMLInPreTag.xml",
|
30
28
|
sample_json_feed: "json_feed.json",
|
31
29
|
sample_rss_feed_huffpost_ca: "HuffPostCanada.xml",
|
30
|
+
sample_invalid_date_format_feed: "InvalidDateFormat.xml"
|
32
31
|
}.freeze
|
33
32
|
|
34
33
|
FEEDS.each do |method, filename|
|
@@ -39,5 +38,3 @@ module SampleFeeds
|
|
39
38
|
File.read("#{File.dirname(__FILE__)}/sample_feeds/#{filename}")
|
40
39
|
end
|
41
40
|
end
|
42
|
-
|
43
|
-
# rubocop:enable Metrics/LineLength
|
@@ -0,0 +1,20 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?><rss version="2.0"
|
2
|
+
xmlns:dc="http://purl.org/dc/elements/1.1/">
|
3
|
+
|
4
|
+
<channel>
|
5
|
+
<title>Invalid date format feed</title>
|
6
|
+
<link>http://example.com/feed</link>
|
7
|
+
<language>en-US</language>
|
8
|
+
<item>
|
9
|
+
<title>Item 0 with an invalid date</title>
|
10
|
+
<link>http://example.com/item0</link>
|
11
|
+
<pubDate>Mon, 16 Oct 2017 15:10:00 +0000</pubDate>
|
12
|
+
<dc:date>1518478934</dc:date>
|
13
|
+
</item>
|
14
|
+
<item>
|
15
|
+
<title>Item 1 with all valid dates</title>
|
16
|
+
<link>http://example.com/item1</link>
|
17
|
+
<pubDate>Tue, 17 Oct 2017 12:17:00 +0000</pubDate>
|
18
|
+
<dc:date>Tue, 17 Oct 2017 22:17:00 +0000</dc:date>
|
19
|
+
</item>
|
20
|
+
</channel>
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: feedjira
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.0
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adam Hess
|
@@ -13,7 +13,7 @@ authors:
|
|
13
13
|
autorequire:
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
|
-
date:
|
16
|
+
date: 2019-07-10 00:00:00.000000000 Z
|
17
17
|
dependencies:
|
18
18
|
- !ruby/object:Gem::Dependency
|
19
19
|
name: loofah
|
@@ -21,14 +21,14 @@ dependencies:
|
|
21
21
|
requirements:
|
22
22
|
- - ">="
|
23
23
|
- !ruby/object:Gem::Version
|
24
|
-
version:
|
24
|
+
version: 2.2.1
|
25
25
|
type: :runtime
|
26
26
|
prerelease: false
|
27
27
|
version_requirements: !ruby/object:Gem::Requirement
|
28
28
|
requirements:
|
29
29
|
- - ">="
|
30
30
|
- !ruby/object:Gem::Version
|
31
|
-
version:
|
31
|
+
version: 2.2.1
|
32
32
|
- !ruby/object:Gem::Dependency
|
33
33
|
name: sax-machine
|
34
34
|
requirement: !ruby/object:Gem::Requirement
|
@@ -136,6 +136,7 @@ files:
|
|
136
136
|
- ".gitignore"
|
137
137
|
- ".rspec"
|
138
138
|
- ".rubocop.yml"
|
139
|
+
- ".rubocop_todo.yml"
|
139
140
|
- ".travis.yml"
|
140
141
|
- CHANGELOG.md
|
141
142
|
- CODE_OF_CONDUCT.md
|
@@ -146,6 +147,7 @@ files:
|
|
146
147
|
- Rakefile
|
147
148
|
- feedjira.gemspec
|
148
149
|
- lib/feedjira.rb
|
150
|
+
- lib/feedjira/atom_entry_utilities.rb
|
149
151
|
- lib/feedjira/configuration.rb
|
150
152
|
- lib/feedjira/core_ext.rb
|
151
153
|
- lib/feedjira/core_ext/date.rb
|
@@ -180,6 +182,7 @@ files:
|
|
180
182
|
- lib/feedjira/parser/rss_feed_burner_entry.rb
|
181
183
|
- lib/feedjira/parser/rss_image.rb
|
182
184
|
- lib/feedjira/preprocessor.rb
|
185
|
+
- lib/feedjira/rss_entry_utilities.rb
|
183
186
|
- lib/feedjira/version.rb
|
184
187
|
- spec/feedjira/configuration_spec.rb
|
185
188
|
- spec/feedjira/date_time_utilities_spec.rb
|
@@ -223,6 +226,7 @@ files:
|
|
223
226
|
- spec/sample_feeds/HuffPostCanada.xml
|
224
227
|
- spec/sample_feeds/ITunesWithSingleQuotedAttributes.xml
|
225
228
|
- spec/sample_feeds/ITunesWithSpacesInAttributes.xml
|
229
|
+
- spec/sample_feeds/InvalidDateFormat.xml
|
226
230
|
- spec/sample_feeds/PaulDixExplainsNothing.xml
|
227
231
|
- spec/sample_feeds/PaulDixExplainsNothingAlternate.xml
|
228
232
|
- spec/sample_feeds/PaulDixExplainsNothingFirstEntryContent.xml
|
@@ -244,7 +248,10 @@ files:
|
|
244
248
|
homepage: http://feedjira.com
|
245
249
|
licenses:
|
246
250
|
- MIT
|
247
|
-
metadata:
|
251
|
+
metadata:
|
252
|
+
homepage_uri: http://feedjira.com
|
253
|
+
source_code_uri: https://github.com/feedjira/feedjira
|
254
|
+
changelog_uri: https://github.com/feedjira/feedjira/blob/master/CHANGELOG.md
|
248
255
|
post_install_message:
|
249
256
|
rdoc_options: []
|
250
257
|
require_paths:
|
@@ -253,15 +260,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
253
260
|
requirements:
|
254
261
|
- - ">="
|
255
262
|
- !ruby/object:Gem::Version
|
256
|
-
version:
|
263
|
+
version: '2.2'
|
257
264
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
258
265
|
requirements:
|
259
|
-
- - "
|
266
|
+
- - ">="
|
260
267
|
- !ruby/object:Gem::Version
|
261
|
-
version:
|
268
|
+
version: '0'
|
262
269
|
requirements: []
|
263
|
-
|
264
|
-
rubygems_version: 2.6.13
|
270
|
+
rubygems_version: 3.0.1
|
265
271
|
signing_key:
|
266
272
|
specification_version: 4
|
267
273
|
summary: A feed parsing library
|
@@ -308,6 +314,7 @@ test_files:
|
|
308
314
|
- spec/sample_feeds/HuffPostCanada.xml
|
309
315
|
- spec/sample_feeds/ITunesWithSingleQuotedAttributes.xml
|
310
316
|
- spec/sample_feeds/ITunesWithSpacesInAttributes.xml
|
317
|
+
- spec/sample_feeds/InvalidDateFormat.xml
|
311
318
|
- spec/sample_feeds/PaulDixExplainsNothing.xml
|
312
319
|
- spec/sample_feeds/PaulDixExplainsNothingAlternate.xml
|
313
320
|
- spec/sample_feeds/PaulDixExplainsNothingFirstEntryContent.xml
|