feedjira 2.2.0 → 3.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE/feed-parsing.md +15 -0
- data/.rubocop.yml +32 -8
- data/.rubocop_todo.yml +11 -0
- data/.travis.yml +3 -7
- data/CHANGELOG.md +18 -9
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +8 -5
- data/README.md +46 -99
- data/Rakefile +8 -6
- data/feedjira.gemspec +31 -20
- data/lib/feedjira.rb +75 -41
- data/lib/feedjira/atom_entry_utilities.rb +51 -0
- data/lib/feedjira/configuration.rb +8 -10
- data/lib/feedjira/core_ext.rb +5 -3
- data/lib/feedjira/core_ext/date.rb +2 -1
- data/lib/feedjira/core_ext/string.rb +2 -1
- data/lib/feedjira/core_ext/time.rb +12 -12
- data/lib/feedjira/date_time_utilities.rb +8 -10
- data/lib/feedjira/date_time_utilities/date_time_epoch_parser.rb +3 -2
- data/lib/feedjira/date_time_utilities/date_time_language_parser.rb +4 -4
- data/lib/feedjira/date_time_utilities/date_time_pattern_parser.rb +11 -15
- data/lib/feedjira/feed.rb +12 -82
- data/lib/feedjira/feed_entry_utilities.rb +14 -7
- data/lib/feedjira/feed_utilities.rb +5 -4
- data/lib/feedjira/parser.rb +6 -1
- data/lib/feedjira/parser/atom.rb +6 -5
- data/lib/feedjira/parser/atom_entry.rb +4 -21
- data/lib/feedjira/parser/atom_feed_burner.rb +7 -6
- data/lib/feedjira/parser/atom_feed_burner_entry.rb +7 -18
- data/lib/feedjira/parser/atom_google_alerts.rb +26 -0
- data/lib/feedjira/parser/atom_google_alerts_entry.rb +21 -0
- data/lib/feedjira/parser/atom_youtube.rb +4 -3
- data/lib/feedjira/parser/atom_youtube_entry.rb +9 -8
- data/lib/feedjira/parser/globally_unique_identifier.rb +21 -0
- data/lib/feedjira/parser/google_docs_atom.rb +6 -6
- data/lib/feedjira/parser/google_docs_atom_entry.rb +3 -19
- data/lib/feedjira/parser/itunes_rss.rb +4 -3
- data/lib/feedjira/parser/itunes_rss_category.rb +6 -5
- data/lib/feedjira/parser/itunes_rss_item.rb +5 -8
- data/lib/feedjira/parser/itunes_rss_owner.rb +2 -1
- data/lib/feedjira/parser/json_feed.rb +41 -0
- data/lib/feedjira/parser/json_feed_item.rb +57 -0
- data/lib/feedjira/parser/podlove_chapter.rb +4 -3
- data/lib/feedjira/parser/rss.rb +5 -3
- data/lib/feedjira/parser/rss_entry.rb +3 -24
- data/lib/feedjira/parser/rss_feed_burner.rb +4 -3
- data/lib/feedjira/parser/rss_feed_burner_entry.rb +6 -26
- data/lib/feedjira/parser/rss_image.rb +2 -0
- data/lib/feedjira/preprocessor.rb +4 -4
- data/lib/feedjira/rss_entry_utilities.rb +53 -0
- data/lib/feedjira/version.rb +3 -1
- data/spec/feedjira/configuration_spec.rb +11 -16
- data/spec/feedjira/date_time_utilities_spec.rb +22 -20
- data/spec/feedjira/feed_entry_utilities_spec.rb +20 -18
- data/spec/feedjira/feed_spec.rb +17 -229
- data/spec/feedjira/feed_utilities_spec.rb +75 -73
- data/spec/feedjira/parser/atom_entry_spec.rb +41 -38
- data/spec/feedjira/parser/atom_feed_burner_entry_spec.rb +22 -20
- data/spec/feedjira/parser/atom_feed_burner_spec.rb +122 -118
- data/spec/feedjira/parser/atom_google_alerts_entry_spec.rb +34 -0
- data/spec/feedjira/parser/atom_google_alerts_spec.rb +62 -0
- data/spec/feedjira/parser/atom_spec.rb +83 -77
- data/spec/feedjira/parser/atom_youtube_entry_spec.rb +41 -39
- data/spec/feedjira/parser/atom_youtube_spec.rb +21 -19
- data/spec/feedjira/parser/google_docs_atom_entry_spec.rb +10 -8
- data/spec/feedjira/parser/google_docs_atom_spec.rb +25 -21
- data/spec/feedjira/parser/itunes_rss_item_spec.rb +39 -37
- data/spec/feedjira/parser/itunes_rss_owner_spec.rb +7 -5
- data/spec/feedjira/parser/itunes_rss_spec.rb +120 -116
- data/spec/feedjira/parser/json_feed_item_spec.rb +81 -0
- data/spec/feedjira/parser/json_feed_spec.rb +55 -0
- data/spec/feedjira/parser/podlove_chapter_spec.rb +14 -12
- data/spec/feedjira/parser/rss_entry_spec.rb +56 -34
- data/spec/feedjira/parser/rss_feed_burner_entry_spec.rb +36 -34
- data/spec/feedjira/parser/rss_feed_burner_spec.rb +49 -45
- data/spec/feedjira/parser/rss_spec.rb +38 -36
- data/spec/feedjira/preprocessor_spec.rb +9 -7
- data/spec/feedjira_spec.rb +166 -0
- data/spec/sample_feeds.rb +32 -29
- data/spec/sample_feeds/HuffPostCanada.xml +279 -0
- data/spec/sample_feeds/Permalinks.xml +22 -0
- data/spec/sample_feeds/a10.xml +72 -0
- data/spec/sample_feeds/google_alerts_atom.xml +1 -0
- data/spec/sample_feeds/json_feed.json +156 -0
- data/spec/spec_helper.rb +7 -5
- metadata +59 -70
- data/Dangerfile +0 -1
- data/fixtures/vcr_cassettes/fetch_failure.yml +0 -62
- data/fixtures/vcr_cassettes/parse_error.yml +0 -222
- data/fixtures/vcr_cassettes/success.yml +0 -281
|
@@ -1,32 +1,28 @@
|
|
|
1
|
-
#
|
|
2
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
3
|
module Feedjira
|
|
4
4
|
module DateTimeUtilities
|
|
5
5
|
class DateTimePatternParser
|
|
6
|
-
# rubocop:disable Style/AsciiComments
|
|
7
6
|
# Japanese Symbols are required for strange Date Strings like
|
|
8
7
|
# '水, 31 8 2016 07:37:00 PDT'
|
|
9
|
-
JAPANESE_SYMBOLS = %w
|
|
10
|
-
PATTERNS = [
|
|
8
|
+
JAPANESE_SYMBOLS = %w[日 月 火 水 木 金 土].freeze
|
|
9
|
+
PATTERNS = ["%m/%d/%Y %T %p", "%d %m %Y %T %Z"].freeze
|
|
11
10
|
|
|
12
|
-
# rubocop:disable Metrics/MethodLength
|
|
13
11
|
def self.parse(string)
|
|
14
12
|
PATTERNS.each do |p|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
nil
|
|
22
|
-
end
|
|
13
|
+
datetime = DateTime.strptime(prepare(string), p)
|
|
14
|
+
return datetime
|
|
15
|
+
rescue StandardError => e
|
|
16
|
+
Feedjira.logger.debug("Failed to parse date #{string}")
|
|
17
|
+
Feedjira.logger.debug(e)
|
|
18
|
+
nil
|
|
23
19
|
end
|
|
24
20
|
raise "No pattern matched #{string}"
|
|
25
21
|
end
|
|
26
22
|
|
|
27
23
|
def self.prepare(string)
|
|
28
24
|
rgx = Regexp.new("^(#{JAPANESE_SYMBOLS.join('|')}),\s")
|
|
29
|
-
string.gsub(rgx,
|
|
25
|
+
string.gsub(rgx, "")
|
|
30
26
|
end
|
|
31
27
|
private_class_method :prepare
|
|
32
28
|
end
|
data/lib/feedjira/feed.rb
CHANGED
|
@@ -1,60 +1,35 @@
|
|
|
1
|
-
#
|
|
2
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
3
|
module Feedjira
|
|
4
4
|
class Feed
|
|
5
5
|
class << self
|
|
6
|
-
def parse_with(parser, xml, &block)
|
|
7
|
-
parser.parse xml, &block
|
|
8
|
-
end
|
|
9
|
-
|
|
10
|
-
def parse(xml, &block)
|
|
11
|
-
parser = determine_feed_parser_for_xml(xml)
|
|
12
|
-
raise NoParserAvailable, 'No valid parser for XML.' unless parser
|
|
13
|
-
parse_with parser, xml, &block
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
def determine_feed_parser_for_xml(xml)
|
|
17
|
-
start_of_doc = xml.slice(0, 2000)
|
|
18
|
-
feed_classes.detect { |klass| klass.able_to_parse?(start_of_doc) }
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
def add_feed_class(klass)
|
|
22
|
-
feed_classes.unshift klass
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
def feed_classes
|
|
26
|
-
@feed_classes ||= Feedjira.parsers
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
def reset_parsers!
|
|
30
|
-
@feed_classes = nil
|
|
31
|
-
end
|
|
32
|
-
|
|
33
6
|
def add_common_feed_element(element_tag, options = {})
|
|
34
|
-
|
|
35
|
-
k.element
|
|
7
|
+
Feedjira.parsers.each do |k|
|
|
8
|
+
k.element(element_tag, options)
|
|
36
9
|
end
|
|
37
10
|
end
|
|
38
11
|
|
|
39
12
|
def add_common_feed_elements(element_tag, options = {})
|
|
40
|
-
|
|
41
|
-
k.elements
|
|
13
|
+
Feedjira.parsers.each do |k|
|
|
14
|
+
k.elements(element_tag, options)
|
|
42
15
|
end
|
|
43
16
|
end
|
|
44
17
|
|
|
45
18
|
def add_common_feed_entry_element(element_tag, options = {})
|
|
46
|
-
call_on_each_feed_entry
|
|
19
|
+
call_on_each_feed_entry(:element, element_tag, options)
|
|
47
20
|
end
|
|
48
21
|
|
|
49
22
|
def add_common_feed_entry_elements(element_tag, options = {})
|
|
50
|
-
call_on_each_feed_entry
|
|
23
|
+
call_on_each_feed_entry(:elements, element_tag, options)
|
|
51
24
|
end
|
|
52
25
|
|
|
26
|
+
private
|
|
27
|
+
|
|
53
28
|
def call_on_each_feed_entry(method, *parameters)
|
|
54
|
-
|
|
29
|
+
Feedjira.parsers.each do |klass|
|
|
55
30
|
klass.sax_config.collection_elements.each_value do |value|
|
|
56
31
|
collection_configs = value.select do |v|
|
|
57
|
-
v.accessor ==
|
|
32
|
+
v.accessor == "entries" && v.data_class.class == Class
|
|
58
33
|
end
|
|
59
34
|
|
|
60
35
|
collection_configs.each do |config|
|
|
@@ -63,51 +38,6 @@ module Feedjira
|
|
|
63
38
|
end
|
|
64
39
|
end
|
|
65
40
|
end
|
|
66
|
-
|
|
67
|
-
def fetch_and_parse(url)
|
|
68
|
-
response = connection(url).get
|
|
69
|
-
unless response.success?
|
|
70
|
-
raise FetchFailure, "Fetch failed - #{response.status}"
|
|
71
|
-
end
|
|
72
|
-
feed = parse response.body
|
|
73
|
-
feed.feed_url = url
|
|
74
|
-
feed.etag = response.headers['etag'].to_s.delete '"'
|
|
75
|
-
|
|
76
|
-
feed.last_modified = parse_last_modified(response)
|
|
77
|
-
feed
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
# rubocop:disable LineLength
|
|
81
|
-
def connection(url)
|
|
82
|
-
Faraday.new(url: url, headers: headers, request: request_options) do |conn|
|
|
83
|
-
conn.use FaradayMiddleware::FollowRedirects, limit: Feedjira.follow_redirect_limit
|
|
84
|
-
conn.adapter(*Faraday.default_adapter)
|
|
85
|
-
end
|
|
86
|
-
end
|
|
87
|
-
# rubocop:enable LineLength
|
|
88
|
-
|
|
89
|
-
private
|
|
90
|
-
|
|
91
|
-
def headers
|
|
92
|
-
{
|
|
93
|
-
user_agent: Feedjira.user_agent
|
|
94
|
-
}
|
|
95
|
-
end
|
|
96
|
-
|
|
97
|
-
def request_options
|
|
98
|
-
{
|
|
99
|
-
timeout: Feedjira.request_timeout
|
|
100
|
-
}
|
|
101
|
-
end
|
|
102
|
-
|
|
103
|
-
def parse_last_modified(response)
|
|
104
|
-
lm = response.headers['last-modified']
|
|
105
|
-
DateTime.parse(lm).to_time
|
|
106
|
-
rescue StandardError => e
|
|
107
|
-
Feedjira.logger.warn { "Failed to parse last modified '#{lm}'" }
|
|
108
|
-
Feedjira.logger.debug(e)
|
|
109
|
-
nil
|
|
110
|
-
end
|
|
111
41
|
end
|
|
112
42
|
end
|
|
113
43
|
end
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
#
|
|
2
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
3
|
module Feedjira
|
|
4
4
|
module FeedEntryUtilities
|
|
5
5
|
include Enumerable
|
|
@@ -13,16 +13,18 @@ module Feedjira
|
|
|
13
13
|
DateTime.parse(string).feed_utils_to_gm_time
|
|
14
14
|
rescue StandardError => e
|
|
15
15
|
Feedjira.logger.warn { "Failed to parse date #{string.inspect}" }
|
|
16
|
-
Feedjira.logger.
|
|
16
|
+
Feedjira.logger.warn(e)
|
|
17
17
|
nil
|
|
18
18
|
end
|
|
19
19
|
|
|
20
20
|
##
|
|
21
21
|
# Returns the id of the entry or its url if not id is present, as some
|
|
22
22
|
# formats don't support it
|
|
23
|
+
# rubocop:disable Naming/MemoizedInstanceVariableName
|
|
23
24
|
def id
|
|
24
25
|
@entry_id ||= @url
|
|
25
26
|
end
|
|
27
|
+
# rubocop:enable Naming/MemoizedInstanceVariableName
|
|
26
28
|
|
|
27
29
|
##
|
|
28
30
|
# Writer for published. By default, we keep the "oldest" publish time found.
|
|
@@ -39,9 +41,9 @@ module Feedjira
|
|
|
39
41
|
end
|
|
40
42
|
|
|
41
43
|
def sanitize!
|
|
42
|
-
%w
|
|
44
|
+
%w[title author summary content image].each do |name|
|
|
43
45
|
if respond_to?(name) && send(name).respond_to?(:sanitize!)
|
|
44
|
-
send(name).send
|
|
46
|
+
send(name).send(:sanitize!)
|
|
45
47
|
end
|
|
46
48
|
end
|
|
47
49
|
end
|
|
@@ -49,10 +51,15 @@ module Feedjira
|
|
|
49
51
|
alias last_modified published
|
|
50
52
|
|
|
51
53
|
def each
|
|
52
|
-
@rss_fields ||= instance_variables
|
|
54
|
+
@rss_fields ||= instance_variables.map do |ivar|
|
|
55
|
+
ivar.to_s.sub("@", "")
|
|
56
|
+
end.select do |field| # rubocop:disable Style/MultilineBlockChain
|
|
57
|
+
# select callable (public) methods only
|
|
58
|
+
respond_to?(field)
|
|
59
|
+
end
|
|
53
60
|
|
|
54
61
|
@rss_fields.each do |field|
|
|
55
|
-
yield(field
|
|
62
|
+
yield(field, instance_variable_get(:"@#{field}"))
|
|
56
63
|
end
|
|
57
64
|
end
|
|
58
65
|
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
#
|
|
2
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
3
|
module Feedjira
|
|
4
4
|
module FeedUtilities
|
|
5
|
-
UPDATABLE_ATTRIBUTES = %w
|
|
5
|
+
UPDATABLE_ATTRIBUTES = %w[title feed_url url last_modified etag].freeze
|
|
6
6
|
|
|
7
7
|
attr_writer :new_entries, :updated, :last_modified
|
|
8
8
|
attr_accessor :etag
|
|
@@ -43,7 +43,7 @@ module Feedjira
|
|
|
43
43
|
def last_modified
|
|
44
44
|
@last_modified ||= begin
|
|
45
45
|
published = entries.reject { |e| e.published.nil? }
|
|
46
|
-
entry = published.
|
|
46
|
+
entry = published.max_by(&:published)
|
|
47
47
|
entry ? entry.published : nil
|
|
48
48
|
end
|
|
49
49
|
end
|
|
@@ -102,6 +102,7 @@ module Feedjira
|
|
|
102
102
|
|
|
103
103
|
feed.entries.each do |entry|
|
|
104
104
|
break unless new_entry?(entry, latest_entry)
|
|
105
|
+
|
|
105
106
|
found_new_entries << entry
|
|
106
107
|
end
|
|
107
108
|
|
data/lib/feedjira/parser.rb
CHANGED
data/lib/feedjira/parser/atom.rb
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
#
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
2
3
|
module Feedjira
|
|
3
4
|
module Parser
|
|
4
5
|
# Parser for dealing with Atom feeds.
|
|
@@ -8,14 +9,14 @@ module Feedjira
|
|
|
8
9
|
|
|
9
10
|
element :title
|
|
10
11
|
element :subtitle, as: :description
|
|
11
|
-
element :link, as: :url, value: :href, with: { type:
|
|
12
|
-
element :link, as: :feed_url, value: :href, with: { rel:
|
|
12
|
+
element :link, as: :url, value: :href, with: { type: "text/html" }
|
|
13
|
+
element :link, as: :feed_url, value: :href, with: { rel: "self" }
|
|
13
14
|
elements :link, as: :links, value: :href
|
|
14
|
-
elements :link, as: :hubs, value: :href, with: { rel:
|
|
15
|
+
elements :link, as: :hubs, value: :href, with: { rel: "hub" }
|
|
15
16
|
elements :entry, as: :entries, class: AtomEntry
|
|
16
17
|
|
|
17
18
|
def self.able_to_parse?(xml)
|
|
18
|
-
%r{
|
|
19
|
+
%r{<feed[^>]+xmlns\s?=\s?["'](http://www\.w3\.org/2005/Atom|http://purl\.org/atom/ns\#)["'][^>]*>} =~ xml
|
|
19
20
|
end
|
|
20
21
|
|
|
21
22
|
def url
|
|
@@ -1,32 +1,15 @@
|
|
|
1
|
-
#
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
2
3
|
module Feedjira
|
|
3
4
|
module Parser
|
|
4
5
|
# Parser for dealing with Atom feed entries.
|
|
5
6
|
class AtomEntry
|
|
6
7
|
include SAXMachine
|
|
7
8
|
include FeedEntryUtilities
|
|
9
|
+
include AtomEntryUtilities
|
|
8
10
|
|
|
9
|
-
element :
|
|
10
|
-
element :link, as: :url, value: :href, with: { type: 'text/html', rel: 'alternate' } # rubocop:disable Metrics/LineLength
|
|
11
|
-
element :name, as: :author
|
|
12
|
-
element :content
|
|
13
|
-
element :summary
|
|
14
|
-
|
|
11
|
+
element :"media:thumbnail", as: :image, value: :url
|
|
15
12
|
element :"media:content", as: :image, value: :url
|
|
16
|
-
element :enclosure, as: :image, value: :href
|
|
17
|
-
|
|
18
|
-
element :published
|
|
19
|
-
element :id, as: :entry_id
|
|
20
|
-
element :created, as: :published
|
|
21
|
-
element :issued, as: :published
|
|
22
|
-
element :updated
|
|
23
|
-
element :modified, as: :updated
|
|
24
|
-
elements :category, as: :categories, value: :term
|
|
25
|
-
elements :link, as: :links, value: :href
|
|
26
|
-
|
|
27
|
-
def url
|
|
28
|
-
@url ||= links.first
|
|
29
|
-
end
|
|
30
13
|
end
|
|
31
14
|
end
|
|
32
15
|
end
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
#
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
2
3
|
module Feedjira
|
|
3
4
|
module Parser
|
|
4
5
|
# Parser for dealing with Feedburner Atom feeds.
|
|
@@ -9,18 +10,18 @@ module Feedjira
|
|
|
9
10
|
element :title
|
|
10
11
|
element :subtitle, as: :description
|
|
11
12
|
element :link, as: :url_text_html, value: :href,
|
|
12
|
-
with: { type:
|
|
13
|
+
with: { type: "text/html" }
|
|
13
14
|
element :link, as: :url_notype, value: :href, with: { type: nil }
|
|
14
|
-
element :link, as: :feed_url_link, value: :href, with: { type:
|
|
15
|
+
element :link, as: :feed_url_link, value: :href, with: { type: "application/atom+xml" }
|
|
15
16
|
element :"atom10:link", as: :feed_url_atom10_link, value: :href,
|
|
16
|
-
with: { type:
|
|
17
|
-
elements :"atom10:link", as: :hubs, value: :href, with: { rel:
|
|
17
|
+
with: { type: "application/atom+xml" }
|
|
18
|
+
elements :"atom10:link", as: :hubs, value: :href, with: { rel: "hub" }
|
|
18
19
|
elements :entry, as: :entries, class: AtomFeedBurnerEntry
|
|
19
20
|
|
|
20
21
|
attr_writer :url, :feed_url
|
|
21
22
|
|
|
22
23
|
def self.able_to_parse?(xml)
|
|
23
|
-
((/Atom/ =~ xml) && (/feedburner/ =~ xml) && !(
|
|
24
|
+
((/<feed/ =~ xml) && (/Atom/ =~ xml) && (/feedburner/ =~ xml) && !(/<rss|<rdf/ =~ xml)) || false
|
|
24
25
|
end
|
|
25
26
|
|
|
26
27
|
# Feed url is <link> with type="text/html" if present,
|
|
@@ -1,32 +1,21 @@
|
|
|
1
|
-
#
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
2
3
|
module Feedjira
|
|
3
4
|
module Parser
|
|
4
5
|
# Parser for dealing with Feedburner Atom feed entries.
|
|
5
6
|
class AtomFeedBurnerEntry
|
|
6
7
|
include SAXMachine
|
|
7
8
|
include FeedEntryUtilities
|
|
9
|
+
include AtomEntryUtilities
|
|
8
10
|
|
|
9
|
-
element :
|
|
10
|
-
|
|
11
|
-
element :link, as: :url, value: :href, with: { type: 'text/html', rel: 'alternate' } # rubocop:disable Metrics/LineLength
|
|
12
|
-
element :"feedburner:origLink", as: :url
|
|
13
|
-
element :summary
|
|
14
|
-
element :content
|
|
11
|
+
element :"feedburner:origLink", as: :orig_link
|
|
12
|
+
private :orig_link
|
|
15
13
|
|
|
14
|
+
element :"media:thumbnail", as: :image, value: :url
|
|
16
15
|
element :"media:content", as: :image, value: :url
|
|
17
|
-
element :enclosure, as: :image, value: :href
|
|
18
|
-
|
|
19
|
-
element :published
|
|
20
|
-
element :id, as: :entry_id
|
|
21
|
-
element :issued, as: :published
|
|
22
|
-
element :created, as: :published
|
|
23
|
-
element :updated
|
|
24
|
-
element :modified, as: :updated
|
|
25
|
-
elements :category, as: :categories, value: :term
|
|
26
|
-
elements :link, as: :links, value: :href
|
|
27
16
|
|
|
28
17
|
def url
|
|
29
|
-
|
|
18
|
+
orig_link || super
|
|
30
19
|
end
|
|
31
20
|
end
|
|
32
21
|
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Feedjira
|
|
4
|
+
module Parser
|
|
5
|
+
# Parser for dealing with Feedburner Atom feeds.
|
|
6
|
+
class AtomGoogleAlerts
|
|
7
|
+
include SAXMachine
|
|
8
|
+
include FeedUtilities
|
|
9
|
+
|
|
10
|
+
element :title
|
|
11
|
+
element :subtitle, as: :description
|
|
12
|
+
element :link, as: :feed_url, value: :href, with: { rel: "self" }
|
|
13
|
+
element :link, as: :url, value: :href, with: { rel: "self" }
|
|
14
|
+
elements :link, as: :links, value: :href
|
|
15
|
+
elements :entry, as: :entries, class: AtomGoogleAlertsEntry
|
|
16
|
+
|
|
17
|
+
def self.able_to_parse?(xml)
|
|
18
|
+
Atom.able_to_parse?(xml) && (%r{<id>tag:google\.com,2005:[^<]+/com\.google/alerts/} === xml) # rubocop:disable Style/CaseEquality
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def self.preprocess(xml)
|
|
22
|
+
Preprocessor.new(xml).to_xml
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Feedjira
|
|
4
|
+
module Parser
|
|
5
|
+
# Parser for dealing with Feedburner Atom feed entries.
|
|
6
|
+
class AtomGoogleAlertsEntry
|
|
7
|
+
include SAXMachine
|
|
8
|
+
include FeedEntryUtilities
|
|
9
|
+
include AtomEntryUtilities
|
|
10
|
+
|
|
11
|
+
def url
|
|
12
|
+
url = super
|
|
13
|
+
return unless url&.start_with?("https://www.google.com/url?")
|
|
14
|
+
|
|
15
|
+
uri = URI(url)
|
|
16
|
+
cons = URI.decode_www_form(uri.query).assoc("url")
|
|
17
|
+
cons && cons[1]
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|