feedjira 3.0.0.beta1 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.rubocop.yml +5 -2
- data/.rubocop_todo.yml +36 -0
- data/.travis.yml +0 -3
- data/Gemfile +1 -0
- data/README.md +3 -0
- data/feedjira.gemspec +9 -3
- data/lib/feedjira.rb +2 -0
- data/lib/feedjira/atom_entry_utilities.rb +35 -0
- data/lib/feedjira/core_ext/date.rb +0 -1
- data/lib/feedjira/core_ext/string.rb +0 -1
- data/lib/feedjira/core_ext/time.rb +8 -10
- data/lib/feedjira/date_time_utilities.rb +0 -2
- data/lib/feedjira/date_time_utilities/date_time_epoch_parser.rb +0 -2
- data/lib/feedjira/date_time_utilities/date_time_language_parser.rb +0 -2
- data/lib/feedjira/date_time_utilities/date_time_pattern_parser.rb +0 -4
- data/lib/feedjira/feed.rb +0 -2
- data/lib/feedjira/feed_entry_utilities.rb +11 -6
- data/lib/feedjira/feed_utilities.rb +0 -2
- data/lib/feedjira/parser/atom.rb +0 -1
- data/lib/feedjira/parser/atom_entry.rb +2 -21
- data/lib/feedjira/parser/atom_feed_burner.rb +1 -2
- data/lib/feedjira/parser/atom_feed_burner_entry.rb +7 -18
- data/lib/feedjira/parser/atom_youtube.rb +0 -1
- data/lib/feedjira/parser/atom_youtube_entry.rb +6 -7
- data/lib/feedjira/parser/google_docs_atom.rb +0 -2
- data/lib/feedjira/parser/google_docs_atom_entry.rb +1 -19
- data/lib/feedjira/parser/itunes_rss.rb +0 -1
- data/lib/feedjira/parser/itunes_rss_category.rb +0 -1
- data/lib/feedjira/parser/itunes_rss_item.rb +2 -7
- data/lib/feedjira/parser/itunes_rss_owner.rb +0 -1
- data/lib/feedjira/parser/podlove_chapter.rb +0 -2
- data/lib/feedjira/parser/rss.rb +0 -1
- data/lib/feedjira/parser/rss_entry.rb +1 -28
- data/lib/feedjira/parser/rss_feed_burner.rb +0 -1
- data/lib/feedjira/parser/rss_feed_burner_entry.rb +6 -26
- data/lib/feedjira/preprocessor.rb +0 -2
- data/lib/feedjira/rss_entry_utilities.rb +45 -0
- data/lib/feedjira/version.rb +1 -1
- data/spec/feedjira/feed_spec.rb +1 -1
- data/spec/feedjira/parser/atom_youtube_entry_spec.rb +2 -2
- data/spec/feedjira_spec.rb +11 -1
- data/spec/sample_feeds.rb +1 -4
- data/spec/sample_feeds/InvalidDateFormat.xml +20 -0
- metadata +17 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 49efb7655e500df91c7119e3afbb2b91fab6d0a8282703fecb69f24d15f54357
|
4
|
+
data.tar.gz: ea6fc3b58be4968be8c8561b6e8c2720b5732882ff2556fe6966583df9bcf130
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dd52aec9d212c0428095c3adee3cbfd1115d53a54ac2281bf7ce7f4142ace6659e658658097e43edb3c90deec583941ed220aea1fe1709f66110adb79c471620
|
7
|
+
data.tar.gz: d39bfe14c74a642311d55369498195e588beec99e5bfb897b0f00c5019f1985f89c1280b8c8eaafb3d36f55b9558b04c9037ccc8710c34c4bad2cb6127702d61
|
data/.rubocop.yml
CHANGED
@@ -1,4 +1,7 @@
|
|
1
|
+
inherit_from: .rubocop_todo.yml
|
2
|
+
|
1
3
|
AllCops:
|
4
|
+
TargetRubyVersion: 2.2
|
2
5
|
Exclude:
|
3
6
|
- db/schema.rb
|
4
7
|
- vendor/**/*
|
@@ -360,7 +363,7 @@ Style/TrailingCommaInArguments:
|
|
360
363
|
- no_comma
|
361
364
|
Enabled: true
|
362
365
|
|
363
|
-
Style/
|
366
|
+
Style/TrailingCommaInArrayLiteral:
|
364
367
|
Description: 'Checks for trailing comma in array and hash literals.'
|
365
368
|
StyleGuide: 'https://github.com/bbatsov/ruby-style-guide#no-trailing-array-commas'
|
366
369
|
EnforcedStyleForMultiline: comma
|
@@ -527,7 +530,7 @@ Lint/UnderscorePrefixedVariableName:
|
|
527
530
|
Description: 'Do not use prefix `_` for a variable that is used.'
|
528
531
|
Enabled: false
|
529
532
|
|
530
|
-
Lint/
|
533
|
+
Lint/UnneededCopDisableDirective:
|
531
534
|
Description: >-
|
532
535
|
Checks for rubocop:disable comments that can be removed.
|
533
536
|
Note: this cop is not disabled when disabling all cops.
|
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2018-10-08 04:14:19 +0900 using RuboCop version 0.59.2.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 7
|
10
|
+
# Cop supports --auto-correct.
|
11
|
+
Layout/EmptyLineAfterGuardClause:
|
12
|
+
Exclude:
|
13
|
+
- 'lib/feedjira/date_time_utilities/date_time_epoch_parser.rb'
|
14
|
+
- 'lib/feedjira/feed_utilities.rb'
|
15
|
+
- 'lib/feedjira/parser/json_feed_item.rb'
|
16
|
+
- 'lib/feedjira/parser/podlove_chapter.rb'
|
17
|
+
|
18
|
+
# Offense count: 1
|
19
|
+
# Cop supports --auto-correct.
|
20
|
+
Performance/UnneededSort:
|
21
|
+
Exclude:
|
22
|
+
- 'lib/feedjira/feed_utilities.rb'
|
23
|
+
|
24
|
+
# Offense count: 3
|
25
|
+
# Configuration parameters: EnforcedStyle.
|
26
|
+
# SupportedStyles: inline, group
|
27
|
+
Style/AccessModifierDeclarations:
|
28
|
+
Exclude:
|
29
|
+
- 'lib/feedjira.rb'
|
30
|
+
- 'lib/feedjira/parser/itunes_rss.rb'
|
31
|
+
|
32
|
+
# Offense count: 1
|
33
|
+
# Cop supports --auto-correct.
|
34
|
+
Style/UnneededCondition:
|
35
|
+
Exclude:
|
36
|
+
- 'lib/feedjira/feed_utilities.rb'
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -143,6 +143,8 @@ add-ons and everything in between. Here are some of them:
|
|
143
143
|
|
144
144
|
* [Solve for All][solve]: Solve for All combines search engine and feed parsing
|
145
145
|
while protecting your privacy. It's even extendable by the community!
|
146
|
+
|
147
|
+
* [Feedi API][feedi]: Feedi simplifies how you handle RSS, Atom, or JSON feeds. You can add and keep track of your favourite feed data with a simple and clean REST API. All entries are enriched by Machine Learning and Semantic engines.
|
146
148
|
|
147
149
|
[Feedbin]: https://feedbin.com/
|
148
150
|
[Stringer]: https://github.com/swanson/stringer
|
@@ -150,6 +152,7 @@ add-ons and everything in between. Here are some of them:
|
|
150
152
|
[Feedbunch]: https://github.com/amatriain/feedbunch
|
151
153
|
[old]: http://theoldreader.com/
|
152
154
|
[solve]: https://solveforall.com/
|
155
|
+
[feedi]: https://github.com/davidesantangelo/feedi
|
153
156
|
|
154
157
|
Note: to get your project on this list, simply [send an email](mailto:feedjira@gmail.com)
|
155
158
|
with your project's details.
|
data/feedjira.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
|
3
|
-
require File.expand_path("
|
3
|
+
require File.expand_path("lib/feedjira/version", __dir__)
|
4
4
|
|
5
5
|
# rubocop:disable Metrics/BlockLength
|
6
6
|
Gem::Specification.new do |s|
|
@@ -20,13 +20,19 @@ Gem::Specification.new do |s|
|
|
20
20
|
s.summary = "A feed parsing library"
|
21
21
|
s.version = Feedjira::VERSION
|
22
22
|
|
23
|
+
s.metadata = {
|
24
|
+
"homepage_uri" => "http://feedjira.com",
|
25
|
+
"source_code_uri" => "https://github.com/feedjira/feedjira",
|
26
|
+
"changelog_uri" => "https://github.com/feedjira/feedjira/blob/master/CHANGELOG.md"
|
27
|
+
}
|
28
|
+
|
23
29
|
s.files = `git ls-files`.split("\n")
|
24
30
|
s.require_paths = ["lib"]
|
25
31
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
26
32
|
|
27
|
-
s.required_ruby_version = ">=
|
33
|
+
s.required_ruby_version = ">=2.2"
|
28
34
|
|
29
|
-
s.add_dependency "loofah", ">= 2.
|
35
|
+
s.add_dependency "loofah", ">= 2.2.1"
|
30
36
|
s.add_dependency "sax-machine", ">= 1.0"
|
31
37
|
|
32
38
|
s.add_development_dependency "danger"
|
data/lib/feedjira.rb
CHANGED
@@ -14,6 +14,8 @@ require "feedjira/date_time_utilities"
|
|
14
14
|
require "feedjira/feed_entry_utilities"
|
15
15
|
require "feedjira/feed_utilities"
|
16
16
|
require "feedjira/feed"
|
17
|
+
require "feedjira/rss_entry_utilities"
|
18
|
+
require "feedjira/atom_entry_utilities"
|
17
19
|
require "feedjira/parser"
|
18
20
|
require "feedjira/parser/rss_entry"
|
19
21
|
require "feedjira/parser/rss_image"
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Feedjira
|
4
|
+
module AtomEntryUtilities
|
5
|
+
def self.included(mod)
|
6
|
+
mod.class_exec do
|
7
|
+
element :title
|
8
|
+
element :name, as: :author
|
9
|
+
element :content
|
10
|
+
element :summary
|
11
|
+
element :enclosure, as: :image, value: :href
|
12
|
+
|
13
|
+
element :published
|
14
|
+
element :id, as: :entry_id
|
15
|
+
element :created, as: :published
|
16
|
+
element :issued, as: :published
|
17
|
+
element :updated
|
18
|
+
element :modified, as: :updated
|
19
|
+
|
20
|
+
elements :category, as: :categories, value: :term
|
21
|
+
|
22
|
+
element :link, as: :url, value: :href, with: {
|
23
|
+
type: "text/html",
|
24
|
+
rel: "alternate"
|
25
|
+
}
|
26
|
+
|
27
|
+
elements :link, as: :links, value: :href
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def url
|
32
|
+
@url ||= links.first
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -1,7 +1,6 @@
|
|
1
1
|
require "time"
|
2
2
|
require "date"
|
3
3
|
|
4
|
-
# rubocop:disable Style/DocumentationMethod
|
5
4
|
class Time
|
6
5
|
# Parse a time string and convert it to UTC without raising errors.
|
7
6
|
# Parses a flattened 14-digit time (YYYYmmddHHMMMSS) as UTC.
|
@@ -11,17 +10,16 @@ class Time
|
|
11
10
|
#
|
12
11
|
# === Returns
|
13
12
|
# A Time instance in UTC or nil if there were errors while parsing.
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
parse_string_safely dt.to_s
|
13
|
+
def self.parse_safely(datetime)
|
14
|
+
if datetime.is_a?(Time)
|
15
|
+
datetime.utc
|
16
|
+
elsif datetime.respond_to?(:to_datetime)
|
17
|
+
datetime.to_datetime.utc
|
18
|
+
elsif datetime.respond_to? :to_s
|
19
|
+
parse_string_safely datetime.to_s
|
22
20
|
end
|
23
21
|
rescue StandardError => e
|
24
|
-
Feedjira.logger.debug { "Failed to parse time #{
|
22
|
+
Feedjira.logger.debug { "Failed to parse time #{datetime}" }
|
25
23
|
Feedjira.logger.debug(e)
|
26
24
|
nil
|
27
25
|
end
|
@@ -1,4 +1,3 @@
|
|
1
|
-
# rubocop:disable Style/Documentation
|
2
1
|
module Feedjira
|
3
2
|
module DateTimeUtilities
|
4
3
|
# This is our date parsing heuristic.
|
@@ -12,7 +11,6 @@ module Feedjira
|
|
12
11
|
|
13
12
|
# Parse the given string starting with the most common parser (default ruby)
|
14
13
|
# and going over all other available parsers
|
15
|
-
# rubocop:disable Metrics/MethodLength
|
16
14
|
def parse_datetime(string)
|
17
15
|
res = DATE_PARSERS.detect do |parser|
|
18
16
|
begin
|
@@ -1,15 +1,11 @@
|
|
1
|
-
# rubocop:disable Style/Documentation
|
2
|
-
# rubocop:disable Style/DocumentationMethod
|
3
1
|
module Feedjira
|
4
2
|
module DateTimeUtilities
|
5
3
|
class DateTimePatternParser
|
6
|
-
# rubocop:disable Style/AsciiComments
|
7
4
|
# Japanese Symbols are required for strange Date Strings like
|
8
5
|
# '水, 31 8 2016 07:37:00 PDT'
|
9
6
|
JAPANESE_SYMBOLS = %w(日 月 火 水 木 金 土).freeze
|
10
7
|
PATTERNS = ["%m/%d/%Y %T %p", "%d %m %Y %T %Z"].freeze
|
11
8
|
|
12
|
-
# rubocop:disable Metrics/MethodLength
|
13
9
|
def self.parse(string)
|
14
10
|
PATTERNS.each do |p|
|
15
11
|
begin
|
data/lib/feedjira/feed.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
# rubocop:disable Style/Documentation
|
2
|
-
# rubocop:disable Style/DocumentationMethod
|
3
1
|
module Feedjira
|
4
2
|
module FeedEntryUtilities
|
5
3
|
include Enumerable
|
@@ -20,22 +18,24 @@ module Feedjira
|
|
20
18
|
##
|
21
19
|
# Returns the id of the entry or its url if not id is present, as some
|
22
20
|
# formats don't support it
|
21
|
+
# rubocop:disable Naming/MemoizedInstanceVariableName
|
23
22
|
def id
|
24
23
|
@entry_id ||= @url
|
25
24
|
end
|
25
|
+
# rubocop:enable Naming/MemoizedInstanceVariableName
|
26
26
|
|
27
27
|
##
|
28
28
|
# Writer for published. By default, we keep the "oldest" publish time found.
|
29
29
|
def published=(val)
|
30
30
|
parsed = parse_datetime(val)
|
31
|
-
@published = parsed if !@published || parsed < @published
|
31
|
+
@published = parsed if parsed && (!@published || parsed < @published)
|
32
32
|
end
|
33
33
|
|
34
34
|
##
|
35
35
|
# Writer for updated. By default, we keep the most recent update time found.
|
36
36
|
def updated=(val)
|
37
37
|
parsed = parse_datetime(val)
|
38
|
-
@updated = parsed if !@updated || parsed > @updated
|
38
|
+
@updated = parsed if parsed && (!@updated || parsed > @updated)
|
39
39
|
end
|
40
40
|
|
41
41
|
def sanitize!
|
@@ -49,10 +49,15 @@ module Feedjira
|
|
49
49
|
alias last_modified published
|
50
50
|
|
51
51
|
def each
|
52
|
-
@rss_fields ||= instance_variables
|
52
|
+
@rss_fields ||= instance_variables.map do |ivar|
|
53
|
+
ivar.to_s.sub("@", "")
|
54
|
+
end.select do |field|
|
55
|
+
# select callable (public) methods only
|
56
|
+
respond_to?(field)
|
57
|
+
end
|
53
58
|
|
54
59
|
@rss_fields.each do |field|
|
55
|
-
yield(field
|
60
|
+
yield(field, instance_variable_get(:"@#{field}"))
|
56
61
|
end
|
57
62
|
end
|
58
63
|
|
data/lib/feedjira/parser/atom.rb
CHANGED
@@ -1,32 +1,13 @@
|
|
1
|
-
# rubocop:disable Style/DocumentationMethod
|
2
1
|
module Feedjira
|
3
2
|
module Parser
|
4
3
|
# Parser for dealing with Atom feed entries.
|
5
4
|
class AtomEntry
|
6
5
|
include SAXMachine
|
7
6
|
include FeedEntryUtilities
|
7
|
+
include AtomEntryUtilities
|
8
8
|
|
9
|
-
element :
|
10
|
-
element :link, as: :url, value: :href, with: { type: "text/html", rel: "alternate" } # rubocop:disable Metrics/LineLength
|
11
|
-
element :name, as: :author
|
12
|
-
element :content
|
13
|
-
element :summary
|
14
|
-
|
9
|
+
element :"media:thumbnail", as: :image, value: :url
|
15
10
|
element :"media:content", as: :image, value: :url
|
16
|
-
element :enclosure, as: :image, value: :href
|
17
|
-
|
18
|
-
element :published
|
19
|
-
element :id, as: :entry_id
|
20
|
-
element :created, as: :published
|
21
|
-
element :issued, as: :published
|
22
|
-
element :updated
|
23
|
-
element :modified, as: :updated
|
24
|
-
elements :category, as: :categories, value: :term
|
25
|
-
elements :link, as: :links, value: :href
|
26
|
-
|
27
|
-
def url
|
28
|
-
@url ||= links.first
|
29
|
-
end
|
30
11
|
end
|
31
12
|
end
|
32
13
|
end
|
@@ -1,4 +1,3 @@
|
|
1
|
-
# rubocop:disable Style/DocumentationMethod
|
2
1
|
module Feedjira
|
3
2
|
module Parser
|
4
3
|
# Parser for dealing with Feedburner Atom feeds.
|
@@ -20,7 +19,7 @@ module Feedjira
|
|
20
19
|
attr_writer :url, :feed_url
|
21
20
|
|
22
21
|
def self.able_to_parse?(xml)
|
23
|
-
((/Atom/ =~ xml) && (/feedburner/ =~ xml) && !(/\<rss|\<rdf/ =~ xml)) || false # rubocop:disable Metrics/LineLength
|
22
|
+
((/<feed/ =~ xml) && (/Atom/ =~ xml) && (/feedburner/ =~ xml) && !(/\<rss|\<rdf/ =~ xml)) || false # rubocop:disable Metrics/LineLength
|
24
23
|
end
|
25
24
|
|
26
25
|
# Feed url is <link> with type="text/html" if present,
|
@@ -1,32 +1,21 @@
|
|
1
|
-
# rubocop:disable Style/DocumentationMethod
|
2
1
|
module Feedjira
|
3
2
|
module Parser
|
4
3
|
# Parser for dealing with Feedburner Atom feed entries.
|
5
4
|
class AtomFeedBurnerEntry
|
6
5
|
include SAXMachine
|
7
6
|
include FeedEntryUtilities
|
7
|
+
include AtomEntryUtilities
|
8
8
|
|
9
|
-
element :
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
element :summary
|
14
|
-
element :content
|
9
|
+
element :"feedburner:origLink", as: :orig_link
|
10
|
+
# rubocop:disable Style/AccessModifierDeclarations
|
11
|
+
private :orig_link
|
12
|
+
# rubocop:enable Style/AccessModifierDeclarations
|
15
13
|
|
14
|
+
element :"media:thumbnail", as: :image, value: :url
|
16
15
|
element :"media:content", as: :image, value: :url
|
17
|
-
element :enclosure, as: :image, value: :href
|
18
|
-
|
19
|
-
element :published
|
20
|
-
element :id, as: :entry_id
|
21
|
-
element :issued, as: :published
|
22
|
-
element :created, as: :published
|
23
|
-
element :updated
|
24
|
-
element :modified, as: :updated
|
25
|
-
elements :category, as: :categories, value: :term
|
26
|
-
elements :link, as: :links, value: :href
|
27
16
|
|
28
17
|
def url
|
29
|
-
|
18
|
+
orig_link || super
|
30
19
|
end
|
31
20
|
end
|
32
21
|
end
|
@@ -1,19 +1,18 @@
|
|
1
|
-
# rubocop:disable Style/Documentation
|
2
1
|
module Feedjira
|
3
2
|
module Parser
|
4
3
|
class AtomYoutubeEntry
|
5
4
|
include SAXMachine
|
6
5
|
include FeedEntryUtilities
|
6
|
+
include AtomEntryUtilities
|
7
|
+
|
8
|
+
sax_config.top_level_elements["link"].clear
|
9
|
+
sax_config.collection_elements["link"].clear
|
7
10
|
|
8
|
-
element :title
|
9
11
|
element :link, as: :url, value: :href, with: { rel: "alternate" }
|
10
|
-
|
12
|
+
|
11
13
|
element :"media:description", as: :content
|
12
|
-
element :summary
|
13
|
-
element :published
|
14
|
-
element :id, as: :entry_id
|
15
|
-
element :updated
|
16
14
|
element :"yt:videoId", as: :youtube_video_id
|
15
|
+
element :"yt:channelId", as: :youtube_channel_id
|
17
16
|
element :"media:title", as: :media_title
|
18
17
|
element :"media:content", as: :media_url, value: :url
|
19
18
|
element :"media:content", as: :media_type, value: :type
|
@@ -1,31 +1,13 @@
|
|
1
|
-
# rubocop:disable Style/Documentation
|
2
|
-
# rubocop:disable Style/DocumentationMethod
|
3
1
|
module Feedjira
|
4
2
|
module Parser
|
5
3
|
class GoogleDocsAtomEntry
|
6
4
|
include SAXMachine
|
7
5
|
include FeedEntryUtilities
|
6
|
+
include AtomEntryUtilities
|
8
7
|
|
9
|
-
element :title
|
10
|
-
element :link, as: :url, value: :href, with: { type: "text/html", rel: "alternate" } # rubocop:disable Metrics/LineLength
|
11
|
-
element :name, as: :author
|
12
|
-
element :content
|
13
|
-
element :summary
|
14
|
-
element :published
|
15
|
-
element :id, as: :entry_id
|
16
|
-
element :created, as: :published
|
17
|
-
element :issued, as: :published
|
18
|
-
element :updated
|
19
|
-
element :modified, as: :updated
|
20
|
-
elements :category, as: :categories, value: :term
|
21
|
-
elements :link, as: :links, value: :href
|
22
8
|
element :"docs:md5Checksum", as: :checksum
|
23
9
|
element :"docs:filename", as: :original_filename
|
24
10
|
element :"docs:suggestedFilename", as: :suggested_filename
|
25
|
-
|
26
|
-
def url
|
27
|
-
@url ||= links.first
|
28
|
-
end
|
29
11
|
end
|
30
12
|
end
|
31
13
|
end
|
@@ -5,14 +5,9 @@ module Feedjira
|
|
5
5
|
class ITunesRSSItem
|
6
6
|
include SAXMachine
|
7
7
|
include FeedEntryUtilities
|
8
|
+
include RSSEntryUtilities
|
8
9
|
|
9
|
-
|
10
|
-
element :guid, as: :entry_id
|
11
|
-
element :title
|
12
|
-
element :link, as: :url
|
13
|
-
element :description, as: :summary
|
14
|
-
element :"content:encoded", as: :content
|
15
|
-
element :pubDate, as: :published
|
10
|
+
sax_config.top_level_elements["enclosure"].clear
|
16
11
|
|
17
12
|
# If author is not present use author tag on the item
|
18
13
|
element :"itunes:author", as: :itunes_author
|
data/lib/feedjira/parser/rss.rb
CHANGED
@@ -4,34 +4,7 @@ module Feedjira
|
|
4
4
|
class RSSEntry
|
5
5
|
include SAXMachine
|
6
6
|
include FeedEntryUtilities
|
7
|
-
|
8
|
-
element :title
|
9
|
-
element :link, as: :url
|
10
|
-
|
11
|
-
element :"dc:creator", as: :author
|
12
|
-
element :author, as: :author
|
13
|
-
element :"content:encoded", as: :content
|
14
|
-
element :description, as: :summary
|
15
|
-
|
16
|
-
element :"media:content", as: :image, value: :url
|
17
|
-
element :enclosure, as: :image, value: :url
|
18
|
-
|
19
|
-
element :pubDate, as: :published
|
20
|
-
element :pubdate, as: :published
|
21
|
-
element :"dc:date", as: :published
|
22
|
-
element :"dc:Date", as: :published
|
23
|
-
element :"dcterms:created", as: :published
|
24
|
-
|
25
|
-
element :"dcterms:modified", as: :updated
|
26
|
-
element :issued, as: :published
|
27
|
-
elements :category, as: :categories
|
28
|
-
|
29
|
-
element :guid, as: :entry_id
|
30
|
-
element :"dc:identifier", as: :dc_identifier
|
31
|
-
|
32
|
-
def id
|
33
|
-
@entry_id ||= @dc_identifier || @url
|
34
|
-
end
|
7
|
+
include RSSEntryUtilities
|
35
8
|
end
|
36
9
|
end
|
37
10
|
end
|
@@ -1,38 +1,18 @@
|
|
1
|
-
# rubocop:disable Style/DocumentationMethod
|
2
1
|
module Feedjira
|
3
2
|
module Parser
|
4
3
|
# Parser for dealing with RDF feed entries.
|
5
4
|
class RSSFeedBurnerEntry
|
6
5
|
include SAXMachine
|
7
6
|
include FeedEntryUtilities
|
7
|
+
include RSSEntryUtilities
|
8
8
|
|
9
|
-
element :
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
element :"dc:creator", as: :author
|
15
|
-
element :author, as: :author
|
16
|
-
element :"content:encoded", as: :content
|
17
|
-
element :description, as: :summary
|
18
|
-
|
19
|
-
element :"media:content", as: :image, value: :url
|
20
|
-
element :enclosure, as: :image, value: :url
|
21
|
-
|
22
|
-
element :pubDate, as: :published
|
23
|
-
element :pubdate, as: :published
|
24
|
-
element :"dc:date", as: :published
|
25
|
-
element :"dc:Date", as: :published
|
26
|
-
element :"dcterms:created", as: :published
|
27
|
-
|
28
|
-
element :"dcterms:modified", as: :updated
|
29
|
-
element :issued, as: :published
|
30
|
-
elements :category, as: :categories
|
31
|
-
|
32
|
-
element :guid, as: :entry_id
|
9
|
+
element :"feedburner:origLink", as: :orig_link
|
10
|
+
# rubocop:disable Style/AccessModifierDeclarations
|
11
|
+
private :orig_link
|
12
|
+
# rubocop:enable Style/AccessModifierDeclarations
|
33
13
|
|
34
14
|
def url
|
35
|
-
|
15
|
+
orig_link || super
|
36
16
|
end
|
37
17
|
end
|
38
18
|
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Feedjira
|
4
|
+
module RSSEntryUtilities
|
5
|
+
def self.included(mod)
|
6
|
+
mod.class_exec do
|
7
|
+
element :title
|
8
|
+
|
9
|
+
element :"content:encoded", as: :content
|
10
|
+
element :description, as: :summary
|
11
|
+
|
12
|
+
element :link, as: :url
|
13
|
+
|
14
|
+
element :author
|
15
|
+
element :"dc:creator", as: :author
|
16
|
+
|
17
|
+
element :pubDate, as: :published
|
18
|
+
element :pubdate, as: :published
|
19
|
+
element :issued, as: :published
|
20
|
+
element :"dc:date", as: :published
|
21
|
+
element :"dc:Date", as: :published
|
22
|
+
element :"dcterms:created", as: :published
|
23
|
+
|
24
|
+
element :"dcterms:modified", as: :updated
|
25
|
+
|
26
|
+
element :guid, as: :entry_id
|
27
|
+
element :"dc:identifier", as: :dc_identifier
|
28
|
+
|
29
|
+
element :"media:thumbnail", as: :image, value: :url
|
30
|
+
element :"media:content", as: :image, value: :url
|
31
|
+
element :enclosure, as: :image, value: :url
|
32
|
+
|
33
|
+
elements :category, as: :categories
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
attr_reader :url
|
38
|
+
|
39
|
+
# rubocop:disable Naming/MemoizedInstanceVariableName
|
40
|
+
def id
|
41
|
+
@entry_id ||= @dc_identifier || @url
|
42
|
+
end
|
43
|
+
# rubocop:enable Naming/MemoizedInstanceVariableName
|
44
|
+
end
|
45
|
+
end
|
data/lib/feedjira/version.rb
CHANGED
data/spec/feedjira/feed_spec.rb
CHANGED
@@ -20,11 +20,11 @@ describe Feedjira::Parser::AtomYoutubeEntry do
|
|
20
20
|
end
|
21
21
|
|
22
22
|
it "should have the published date" do
|
23
|
-
expect(@entry.published).to eq Time.parse_safely("2015-05-04T00:01:27+00:00")
|
23
|
+
expect(@entry.published).to eq Time.parse_safely("2015-05-04T00:01:27+00:00")
|
24
24
|
end
|
25
25
|
|
26
26
|
it "should have the updated date" do
|
27
|
-
expect(@entry.updated).to eq Time.parse_safely("2015-05-13T17:38:30+00:00")
|
27
|
+
expect(@entry.updated).to eq Time.parse_safely("2015-05-13T17:38:30+00:00")
|
28
28
|
end
|
29
29
|
|
30
30
|
it "should have the content populated from the media:description element" do
|
data/spec/feedjira_spec.rb
CHANGED
@@ -62,6 +62,16 @@ RSpec.describe Feedjira do
|
|
62
62
|
expect(feed.entries.first.id).to eq "23246627"
|
63
63
|
expect(feed.entries.last.id.strip).to eq "1"
|
64
64
|
end
|
65
|
+
|
66
|
+
it "does not fail if multiple published dates exist and some are unparseable" do
|
67
|
+
expect(Feedjira.logger).to receive(:warn).twice
|
68
|
+
|
69
|
+
feed = Feedjira.parse(sample_invalid_date_format_feed)
|
70
|
+
expect(feed.title).to eq "Invalid date format feed"
|
71
|
+
published = Time.parse_safely "Mon, 16 Oct 2017 15:10:00 GMT"
|
72
|
+
expect(feed.entries.first.published).to eq published
|
73
|
+
expect(feed.entries.size).to eq 2
|
74
|
+
end
|
65
75
|
end
|
66
76
|
|
67
77
|
context "when there's no available parser" do
|
@@ -128,7 +138,7 @@ RSpec.describe Feedjira do
|
|
128
138
|
it "does not use default parsers" do
|
129
139
|
xml = "Atom asdf"
|
130
140
|
new_parser = Class.new do
|
131
|
-
def self.able_to_parse?(
|
141
|
+
def self.able_to_parse?(_xml)
|
132
142
|
true
|
133
143
|
end
|
134
144
|
end
|
data/spec/sample_feeds.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
# rubocop:disable Metrics/LineLength
|
2
|
-
|
3
1
|
module SampleFeeds
|
4
2
|
FEEDS = {
|
5
3
|
sample_atom_feed: "AmazonWebServicesBlog.xml",
|
@@ -29,6 +27,7 @@ module SampleFeeds
|
|
29
27
|
sample_atom_xhtml_with_escpaed_html_in_pre_tag_feed: "AtomEscapedHTMLInPreTag.xml",
|
30
28
|
sample_json_feed: "json_feed.json",
|
31
29
|
sample_rss_feed_huffpost_ca: "HuffPostCanada.xml",
|
30
|
+
sample_invalid_date_format_feed: "InvalidDateFormat.xml"
|
32
31
|
}.freeze
|
33
32
|
|
34
33
|
FEEDS.each do |method, filename|
|
@@ -39,5 +38,3 @@ module SampleFeeds
|
|
39
38
|
File.read("#{File.dirname(__FILE__)}/sample_feeds/#{filename}")
|
40
39
|
end
|
41
40
|
end
|
42
|
-
|
43
|
-
# rubocop:enable Metrics/LineLength
|
@@ -0,0 +1,20 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?><rss version="2.0"
|
2
|
+
xmlns:dc="http://purl.org/dc/elements/1.1/">
|
3
|
+
|
4
|
+
<channel>
|
5
|
+
<title>Invalid date format feed</title>
|
6
|
+
<link>http://example.com/feed</link>
|
7
|
+
<language>en-US</language>
|
8
|
+
<item>
|
9
|
+
<title>Item 0 with an invalid date</title>
|
10
|
+
<link>http://example.com/item0</link>
|
11
|
+
<pubDate>Mon, 16 Oct 2017 15:10:00 +0000</pubDate>
|
12
|
+
<dc:date>1518478934</dc:date>
|
13
|
+
</item>
|
14
|
+
<item>
|
15
|
+
<title>Item 1 with all valid dates</title>
|
16
|
+
<link>http://example.com/item1</link>
|
17
|
+
<pubDate>Tue, 17 Oct 2017 12:17:00 +0000</pubDate>
|
18
|
+
<dc:date>Tue, 17 Oct 2017 22:17:00 +0000</dc:date>
|
19
|
+
</item>
|
20
|
+
</channel>
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: feedjira
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.0
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adam Hess
|
@@ -13,7 +13,7 @@ authors:
|
|
13
13
|
autorequire:
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
|
-
date:
|
16
|
+
date: 2019-07-10 00:00:00.000000000 Z
|
17
17
|
dependencies:
|
18
18
|
- !ruby/object:Gem::Dependency
|
19
19
|
name: loofah
|
@@ -21,14 +21,14 @@ dependencies:
|
|
21
21
|
requirements:
|
22
22
|
- - ">="
|
23
23
|
- !ruby/object:Gem::Version
|
24
|
-
version:
|
24
|
+
version: 2.2.1
|
25
25
|
type: :runtime
|
26
26
|
prerelease: false
|
27
27
|
version_requirements: !ruby/object:Gem::Requirement
|
28
28
|
requirements:
|
29
29
|
- - ">="
|
30
30
|
- !ruby/object:Gem::Version
|
31
|
-
version:
|
31
|
+
version: 2.2.1
|
32
32
|
- !ruby/object:Gem::Dependency
|
33
33
|
name: sax-machine
|
34
34
|
requirement: !ruby/object:Gem::Requirement
|
@@ -136,6 +136,7 @@ files:
|
|
136
136
|
- ".gitignore"
|
137
137
|
- ".rspec"
|
138
138
|
- ".rubocop.yml"
|
139
|
+
- ".rubocop_todo.yml"
|
139
140
|
- ".travis.yml"
|
140
141
|
- CHANGELOG.md
|
141
142
|
- CODE_OF_CONDUCT.md
|
@@ -146,6 +147,7 @@ files:
|
|
146
147
|
- Rakefile
|
147
148
|
- feedjira.gemspec
|
148
149
|
- lib/feedjira.rb
|
150
|
+
- lib/feedjira/atom_entry_utilities.rb
|
149
151
|
- lib/feedjira/configuration.rb
|
150
152
|
- lib/feedjira/core_ext.rb
|
151
153
|
- lib/feedjira/core_ext/date.rb
|
@@ -180,6 +182,7 @@ files:
|
|
180
182
|
- lib/feedjira/parser/rss_feed_burner_entry.rb
|
181
183
|
- lib/feedjira/parser/rss_image.rb
|
182
184
|
- lib/feedjira/preprocessor.rb
|
185
|
+
- lib/feedjira/rss_entry_utilities.rb
|
183
186
|
- lib/feedjira/version.rb
|
184
187
|
- spec/feedjira/configuration_spec.rb
|
185
188
|
- spec/feedjira/date_time_utilities_spec.rb
|
@@ -223,6 +226,7 @@ files:
|
|
223
226
|
- spec/sample_feeds/HuffPostCanada.xml
|
224
227
|
- spec/sample_feeds/ITunesWithSingleQuotedAttributes.xml
|
225
228
|
- spec/sample_feeds/ITunesWithSpacesInAttributes.xml
|
229
|
+
- spec/sample_feeds/InvalidDateFormat.xml
|
226
230
|
- spec/sample_feeds/PaulDixExplainsNothing.xml
|
227
231
|
- spec/sample_feeds/PaulDixExplainsNothingAlternate.xml
|
228
232
|
- spec/sample_feeds/PaulDixExplainsNothingFirstEntryContent.xml
|
@@ -244,7 +248,10 @@ files:
|
|
244
248
|
homepage: http://feedjira.com
|
245
249
|
licenses:
|
246
250
|
- MIT
|
247
|
-
metadata:
|
251
|
+
metadata:
|
252
|
+
homepage_uri: http://feedjira.com
|
253
|
+
source_code_uri: https://github.com/feedjira/feedjira
|
254
|
+
changelog_uri: https://github.com/feedjira/feedjira/blob/master/CHANGELOG.md
|
248
255
|
post_install_message:
|
249
256
|
rdoc_options: []
|
250
257
|
require_paths:
|
@@ -253,15 +260,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
253
260
|
requirements:
|
254
261
|
- - ">="
|
255
262
|
- !ruby/object:Gem::Version
|
256
|
-
version:
|
263
|
+
version: '2.2'
|
257
264
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
258
265
|
requirements:
|
259
|
-
- - "
|
266
|
+
- - ">="
|
260
267
|
- !ruby/object:Gem::Version
|
261
|
-
version:
|
268
|
+
version: '0'
|
262
269
|
requirements: []
|
263
|
-
|
264
|
-
rubygems_version: 2.6.13
|
270
|
+
rubygems_version: 3.0.1
|
265
271
|
signing_key:
|
266
272
|
specification_version: 4
|
267
273
|
summary: A feed parsing library
|
@@ -308,6 +314,7 @@ test_files:
|
|
308
314
|
- spec/sample_feeds/HuffPostCanada.xml
|
309
315
|
- spec/sample_feeds/ITunesWithSingleQuotedAttributes.xml
|
310
316
|
- spec/sample_feeds/ITunesWithSpacesInAttributes.xml
|
317
|
+
- spec/sample_feeds/InvalidDateFormat.xml
|
311
318
|
- spec/sample_feeds/PaulDixExplainsNothing.xml
|
312
319
|
- spec/sample_feeds/PaulDixExplainsNothingAlternate.xml
|
313
320
|
- spec/sample_feeds/PaulDixExplainsNothingFirstEntryContent.xml
|