feedjira 2.1.0 → 2.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rubocop.yml +9 -2
- data/CHANGELOG.md +4 -0
- data/LICENSE +1 -1
- data/README.md +210 -7
- data/Rakefile +5 -0
- data/feedjira.gemspec +2 -1
- data/lib/feedjira.rb +7 -1
- data/lib/feedjira/configuration.rb +76 -0
- data/lib/feedjira/core_ext/date.rb +1 -0
- data/lib/feedjira/core_ext/string.rb +1 -0
- data/lib/feedjira/core_ext/time.rb +5 -1
- data/lib/feedjira/date_time_utilities.rb +11 -3
- data/lib/feedjira/date_time_utilities/date_time_epoch_parser.rb +13 -0
- data/lib/feedjira/date_time_utilities/date_time_language_parser.rb +2 -0
- data/lib/feedjira/date_time_utilities/date_time_pattern_parser.rb +6 -1
- data/lib/feedjira/feed.rb +87 -69
- data/lib/feedjira/feed_entry_utilities.rb +5 -2
- data/lib/feedjira/feed_utilities.rb +11 -1
- data/lib/feedjira/parser.rb +1 -1
- data/lib/feedjira/parser/atom.rb +1 -0
- data/lib/feedjira/parser/atom_entry.rb +1 -0
- data/lib/feedjira/parser/atom_feed_burner.rb +19 -2
- data/lib/feedjira/parser/atom_feed_burner_entry.rb +1 -0
- data/lib/feedjira/parser/atom_youtube.rb +1 -0
- data/lib/feedjira/parser/atom_youtube_entry.rb +1 -0
- data/lib/feedjira/parser/google_docs_atom.rb +2 -1
- data/lib/feedjira/parser/google_docs_atom_entry.rb +2 -0
- data/lib/feedjira/parser/itunes_rss.rb +1 -0
- data/lib/feedjira/parser/itunes_rss_category.rb +1 -0
- data/lib/feedjira/parser/itunes_rss_owner.rb +1 -0
- data/lib/feedjira/parser/podlove_chapter.rb +2 -0
- data/lib/feedjira/parser/rss.rb +1 -0
- data/lib/feedjira/parser/rss_feed_burner.rb +1 -0
- data/lib/feedjira/parser/rss_feed_burner_entry.rb +1 -0
- data/lib/feedjira/preprocessor.rb +2 -0
- data/lib/feedjira/version.rb +1 -1
- data/spec/feedjira/configuration_spec.rb +25 -0
- data/spec/feedjira/date_time_utilities_spec.rb +6 -0
- data/spec/feedjira/feed_spec.rb +20 -2
- data/spec/feedjira/feed_utilities_spec.rb +18 -0
- data/spec/feedjira/parser/atom_feed_burner_spec.rb +32 -1
- data/spec/sample_feeds.rb +1 -0
- data/spec/sample_feeds/GiantRobotsSmashingIntoOtherGiantRobots.xml +682 -0
- metadata +49 -29
@@ -1,3 +1,4 @@
|
|
1
|
+
# rubocop:disable Style/Documentation
|
1
2
|
module Feedjira
|
2
3
|
module DateTimeUtilities
|
3
4
|
# This is our date parsing heuristic.
|
@@ -5,20 +6,27 @@ module Feedjira
|
|
5
6
|
DATE_PARSERS = [
|
6
7
|
DateTimePatternParser,
|
7
8
|
DateTimeLanguageParser,
|
9
|
+
DateTimeEpochParser,
|
8
10
|
DateTime
|
9
11
|
].freeze
|
10
12
|
|
11
13
|
# Parse the given string starting with the most common parser (default ruby)
|
12
14
|
# and going over all other available parsers
|
15
|
+
# rubocop:disable Metrics/MethodLength
|
13
16
|
def parse_datetime(string)
|
14
|
-
DATE_PARSERS.find do |parser|
|
17
|
+
res = DATE_PARSERS.find do |parser|
|
15
18
|
begin
|
16
19
|
return parser.parse(string).feed_utils_to_gm_time
|
17
|
-
rescue
|
20
|
+
rescue StandardError => e
|
21
|
+
Feedjira.logger.debug { "Failed to parse date #{string}" }
|
22
|
+
Feedjira.logger.debug(e)
|
18
23
|
nil
|
19
24
|
end
|
20
25
|
end
|
21
|
-
|
26
|
+
|
27
|
+
Feedjira.logger.warn { "Failed to parse date #{string}" } if res.nil?
|
28
|
+
|
29
|
+
res
|
22
30
|
end
|
23
31
|
end
|
24
32
|
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# rubocop:disable Style/Documentation
|
2
|
+
# rubocop:disable Style/DocumentationMethod
|
3
|
+
module Feedjira
|
4
|
+
module DateTimeUtilities
|
5
|
+
class DateTimeEpochParser
|
6
|
+
def self.parse(string)
|
7
|
+
epoch_time = string.to_i
|
8
|
+
return Time.at(epoch_time).to_datetime if epoch_time.to_s == string
|
9
|
+
raise "#{string} is not a valid epoch time"
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# rubocop:disable Style/Documentation
|
2
|
+
# rubocop:disable Style/DocumentationMethod
|
1
3
|
module Feedjira
|
2
4
|
module DateTimeUtilities
|
3
5
|
class DateTimePatternParser
|
@@ -7,12 +9,15 @@ module Feedjira
|
|
7
9
|
JAPANESE_SYMBOLS = %w(日 月 火 水 木 金 土).freeze
|
8
10
|
PATTERNS = ['%m/%d/%Y %T %p', '%d %m %Y %T %Z'].freeze
|
9
11
|
|
12
|
+
# rubocop:disable Metrics/MethodLength
|
10
13
|
def self.parse(string)
|
11
14
|
PATTERNS.each do |p|
|
12
15
|
begin
|
13
16
|
datetime = DateTime.strptime(prepare(string), p)
|
14
17
|
return datetime
|
15
|
-
rescue
|
18
|
+
rescue StandardError => e
|
19
|
+
Feedjira.logger.debug("Failed to parse date #{string}")
|
20
|
+
Feedjira.logger.debug(e)
|
16
21
|
nil
|
17
22
|
end
|
18
23
|
end
|
data/lib/feedjira/feed.rb
CHANGED
@@ -1,95 +1,113 @@
|
|
1
|
+
# rubocop:disable Style/Documentation
|
2
|
+
# rubocop:disable Style/DocumentationMethod
|
1
3
|
module Feedjira
|
2
4
|
class Feed
|
3
|
-
|
4
|
-
parser
|
5
|
-
|
5
|
+
class << self
|
6
|
+
def parse_with(parser, xml, &block)
|
7
|
+
parser.parse xml, &block
|
8
|
+
end
|
6
9
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
10
|
+
def parse(xml, &block)
|
11
|
+
parser = determine_feed_parser_for_xml(xml)
|
12
|
+
raise NoParserAvailable, 'No valid parser for XML.' unless parser
|
13
|
+
parse_with parser, xml, &block
|
14
|
+
end
|
12
15
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
16
|
+
def determine_feed_parser_for_xml(xml)
|
17
|
+
start_of_doc = xml.slice(0, 2000)
|
18
|
+
feed_classes.detect { |klass| klass.able_to_parse?(start_of_doc) }
|
19
|
+
end
|
17
20
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
+
def add_feed_class(klass)
|
22
|
+
feed_classes.unshift klass
|
23
|
+
end
|
21
24
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
Feedjira::Parser::GoogleDocsAtom,
|
26
|
-
Feedjira::Parser::AtomYoutube,
|
27
|
-
Feedjira::Parser::AtomFeedBurner,
|
28
|
-
Feedjira::Parser::Atom,
|
29
|
-
Feedjira::Parser::ITunesRSS,
|
30
|
-
Feedjira::Parser::RSS
|
31
|
-
]
|
32
|
-
end
|
25
|
+
def feed_classes
|
26
|
+
@feed_classes ||= Feedjira.parsers
|
27
|
+
end
|
33
28
|
|
34
|
-
|
35
|
-
|
36
|
-
k.element element_tag, options
|
29
|
+
def reset_parsers!
|
30
|
+
@feed_classes = nil
|
37
31
|
end
|
38
|
-
end
|
39
32
|
|
40
|
-
|
41
|
-
|
42
|
-
|
33
|
+
def add_common_feed_element(element_tag, options = {})
|
34
|
+
feed_classes.each do |k|
|
35
|
+
k.element element_tag, options
|
36
|
+
end
|
43
37
|
end
|
44
|
-
end
|
45
38
|
|
46
|
-
|
47
|
-
|
48
|
-
|
39
|
+
def add_common_feed_elements(element_tag, options = {})
|
40
|
+
feed_classes.each do |k|
|
41
|
+
k.elements element_tag, options
|
42
|
+
end
|
43
|
+
end
|
49
44
|
|
50
|
-
|
51
|
-
|
52
|
-
|
45
|
+
def add_common_feed_entry_element(element_tag, options = {})
|
46
|
+
call_on_each_feed_entry :element, element_tag, options
|
47
|
+
end
|
53
48
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
collection_configs = value.select do |v|
|
58
|
-
v.accessor == 'entries' && v.data_class.class == Class
|
59
|
-
end
|
49
|
+
def add_common_feed_entry_elements(element_tag, options = {})
|
50
|
+
call_on_each_feed_entry :elements, element_tag, options
|
51
|
+
end
|
60
52
|
|
61
|
-
|
62
|
-
|
53
|
+
def call_on_each_feed_entry(method, *parameters)
|
54
|
+
feed_classes.each do |klass|
|
55
|
+
klass.sax_config.collection_elements.each_value do |value|
|
56
|
+
collection_configs = value.select do |v|
|
57
|
+
v.accessor == 'entries' && v.data_class.class == Class
|
58
|
+
end
|
59
|
+
|
60
|
+
collection_configs.each do |config|
|
61
|
+
config.data_class.send(method, *parameters)
|
62
|
+
end
|
63
63
|
end
|
64
64
|
end
|
65
65
|
end
|
66
|
-
end
|
67
66
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
67
|
+
def fetch_and_parse(url)
|
68
|
+
response = connection(url).get
|
69
|
+
unless response.success?
|
70
|
+
raise FetchFailure, "Fetch failed - #{response.status}"
|
71
|
+
end
|
72
|
+
feed = parse response.body
|
73
|
+
feed.feed_url = url
|
74
|
+
feed.etag = response.headers['etag'].to_s.delete '"'
|
75
|
+
|
76
|
+
feed.last_modified = parse_last_modified(response)
|
77
|
+
feed
|
72
78
|
end
|
73
|
-
feed = parse response.body
|
74
|
-
feed.feed_url = url
|
75
|
-
feed.etag = response.headers['etag'].to_s.delete '"'
|
76
79
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
+
# rubocop:disable LineLength
|
81
|
+
def connection(url)
|
82
|
+
Faraday.new(url: url, headers: headers, request: request_options) do |conn|
|
83
|
+
conn.use FaradayMiddleware::FollowRedirects, limit: Feedjira.follow_redirect_limit
|
84
|
+
conn.adapter :net_http
|
85
|
+
end
|
86
|
+
end
|
87
|
+
# rubocop:enable LineLength
|
88
|
+
|
89
|
+
private
|
80
90
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
91
|
+
def headers
|
92
|
+
{
|
93
|
+
user_agent: Feedjira.user_agent
|
94
|
+
}
|
95
|
+
end
|
96
|
+
|
97
|
+
def request_options
|
98
|
+
{
|
99
|
+
timeout: Feedjira.request_timeout
|
100
|
+
}
|
85
101
|
end
|
86
|
-
end
|
87
102
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
103
|
+
def parse_last_modified(response)
|
104
|
+
lm = response.headers['last-modified']
|
105
|
+
DateTime.parse(lm).to_time
|
106
|
+
rescue StandardError => e
|
107
|
+
Feedjira.logger.warn { "Failed to parse last modified '#{lm}'" }
|
108
|
+
Feedjira.logger.debug(e)
|
109
|
+
nil
|
110
|
+
end
|
92
111
|
end
|
93
|
-
private_class_method :parse_last_modified
|
94
112
|
end
|
95
113
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# rubocop:disable Style/Documentation
|
2
|
+
# rubocop:disable Style/DocumentationMethod
|
1
3
|
module Feedjira
|
2
4
|
module FeedEntryUtilities
|
3
5
|
include Enumerable
|
@@ -9,8 +11,9 @@ module Feedjira
|
|
9
11
|
|
10
12
|
def parse_datetime(string)
|
11
13
|
DateTime.parse(string).feed_utils_to_gm_time
|
12
|
-
rescue
|
13
|
-
warn "Failed to parse date #{string.inspect}"
|
14
|
+
rescue StandardError => e
|
15
|
+
Feedjira.logger.warn { "Failed to parse date #{string.inspect}" }
|
16
|
+
Feedjira.logger.warn(e)
|
14
17
|
nil
|
15
18
|
end
|
16
19
|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# rubocop:disable Style/Documentation
|
2
|
+
# rubocop:disable Style/DocumentationMethod
|
1
3
|
module Feedjira
|
2
4
|
module FeedUtilities
|
3
5
|
UPDATABLE_ATTRIBUTES = %w(title feed_url url last_modified etag).freeze
|
@@ -11,7 +13,7 @@ module Feedjira
|
|
11
13
|
|
12
14
|
module ClassMethods
|
13
15
|
def parse(xml, &block)
|
14
|
-
xml = xml
|
16
|
+
xml = strip_whitespace(xml)
|
15
17
|
xml = preprocess(xml) if preprocess_xml
|
16
18
|
super xml, &block
|
17
19
|
end
|
@@ -28,6 +30,14 @@ module Feedjira
|
|
28
30
|
def preprocess_xml
|
29
31
|
@preprocess_xml
|
30
32
|
end
|
33
|
+
|
34
|
+
def strip_whitespace(xml)
|
35
|
+
if Feedjira.strip_whitespace
|
36
|
+
xml.strip
|
37
|
+
else
|
38
|
+
xml.lstrip
|
39
|
+
end
|
40
|
+
end
|
31
41
|
end
|
32
42
|
|
33
43
|
def last_modified
|
data/lib/feedjira/parser.rb
CHANGED
@@ -1 +1 @@
|
|
1
|
-
module Feedjira::Parser; end
|
1
|
+
module Feedjira::Parser; end # rubocop:disable Style/Documentation
|
data/lib/feedjira/parser/atom.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# rubocop:disable Style/DocumentationMethod
|
1
2
|
module Feedjira
|
2
3
|
module Parser
|
3
4
|
# Parser for dealing with Feedburner Atom feeds.
|
@@ -7,8 +8,12 @@ module Feedjira
|
|
7
8
|
|
8
9
|
element :title
|
9
10
|
element :subtitle, as: :description
|
10
|
-
element :link, as: :
|
11
|
-
|
11
|
+
element :link, as: :url_text_html, value: :href,
|
12
|
+
with: { type: 'text/html' }
|
13
|
+
element :link, as: :url_notype, value: :href, with: { type: nil }
|
14
|
+
element :link, as: :feed_url_link, value: :href, with: { type: 'application/atom+xml' } # rubocop:disable Metrics/LineLength
|
15
|
+
element :"atom10:link", as: :feed_url_atom10_link, value: :href,
|
16
|
+
with: { type: 'application/atom+xml' }
|
12
17
|
elements :"atom10:link", as: :hubs, value: :href, with: { rel: 'hub' }
|
13
18
|
elements :entry, as: :entries, class: AtomFeedBurnerEntry
|
14
19
|
|
@@ -16,6 +21,18 @@ module Feedjira
|
|
16
21
|
((/Atom/ =~ xml) && (/feedburner/ =~ xml) && !(/\<rss|\<rdf/ =~ xml)) || false # rubocop:disable Metrics/LineLength
|
17
22
|
end
|
18
23
|
|
24
|
+
# Feed url is <link> with type="text/html" if present,
|
25
|
+
# <link> with no type attribute otherwise
|
26
|
+
def url
|
27
|
+
@url_text_html || url_notype
|
28
|
+
end
|
29
|
+
|
30
|
+
# Feed feed_url is <link> with type="application/atom+xml" if present,
|
31
|
+
# <atom10:link> with type="application/atom+xml" otherwise
|
32
|
+
def feed_url
|
33
|
+
@feed_url_link || feed_url_atom10_link
|
34
|
+
end
|
35
|
+
|
19
36
|
def self.preprocess(xml)
|
20
37
|
Preprocessor.new(xml).to_xml
|
21
38
|
end
|
data/lib/feedjira/parser/rss.rb
CHANGED