rss_feed_plus 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -1
- data/lib/rss_feed/feed/base.rb +0 -2
- data/lib/rss_feed/feed/channel.rb +3 -2
- data/lib/rss_feed/feed/item.rb +1 -1
- data/lib/rss_feed/feed/namespace.rb +7 -4
- data/lib/rss_feed/parser.rb +3 -3
- data/lib/rss_feed/version.rb +1 -1
- data/parser.rb +44 -8
- data/sig/rss_feed/feed/base.rbs +0 -2
- data/sig/rss_feed/feed/namespace.rbs +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ed3bccb810245ef55c8098543c200b38d82a3e42db7c7740a7a330c15729c518
|
4
|
+
data.tar.gz: 4ffdfd85aedd999af87a4053064a2c50a76ff93772c4910ca83ef802518be77e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7a0c323043a3d4c014e9f840f364a9db8ab27c14c231a157496ea2e824b614e9f0121318ba400e891c111c89e89ac990b8d381d8d03b19739c093a48717e8611
|
7
|
+
data.tar.gz: a1c4dcb037b7d3c43e20a1d4403e239238cbb94d192a31a81270f7e498fd80b50ca2cd1ef081973d3bc7a7b742d5373941d6613c8ed7bc38f81b1b00957a45be
|
data/README.md
CHANGED
@@ -49,7 +49,10 @@ parser = RssFeed::Parser.new(feed_urls, xml_parser: xml_parser, uri_parser: uri_
|
|
49
49
|
# or
|
50
50
|
parser = RssFeed::Parser.new(feed_urls)
|
51
51
|
# Parse the RSS feeds
|
52
|
-
parsed_data = parser.parse_as_object
|
52
|
+
parsed_data = parser.parse_as_object
|
53
|
+
|
54
|
+
# OR Parse the RSS feed as a JSON
|
55
|
+
parsed_data = parser.parse
|
53
56
|
|
54
57
|
# Process the parsed data
|
55
58
|
puts parsed_data.inspect
|
data/lib/rss_feed/feed/base.rb
CHANGED
@@ -17,7 +17,8 @@ module RssFeed
|
|
17
17
|
image logo icon rating
|
18
18
|
rights copyright
|
19
19
|
textInput feedburner:browserFriendly
|
20
|
-
itunes:author itunes:category
|
20
|
+
itunes:author itunes:category category itunes:explicit itunes:image itunes:keywords itunes:owner itunes:subtitle
|
21
|
+
itunes:summary
|
21
22
|
].freeze
|
22
23
|
|
23
24
|
# XPath expression for selecting the RSS channel.
|
@@ -31,7 +32,7 @@ module RssFeed
|
|
31
32
|
def atom
|
32
33
|
return nil if document.blank?
|
33
34
|
|
34
|
-
'//feed'
|
35
|
+
'//xmlns:feed'
|
35
36
|
end
|
36
37
|
|
37
38
|
alias feed atom
|
data/lib/rss_feed/feed/item.rb
CHANGED
@@ -12,7 +12,9 @@ module RssFeed
|
|
12
12
|
'feedburner' => 'http://rssnamespace.org/feedburner/ext/1.0',
|
13
13
|
'content' => 'http://purl.org/rss/1.0/modules/content/',
|
14
14
|
'trackback' => 'http://example.com/trackback',
|
15
|
-
'media' => 'http://search.yahoo.com/mrss/'
|
15
|
+
'media' => 'http://search.yahoo.com/mrss/',
|
16
|
+
'atom' => 'http://www.w3.org/2005/Atom',
|
17
|
+
'xmlns' => 'http://www.w3.org/2005/Atom'
|
16
18
|
}.freeze
|
17
19
|
|
18
20
|
class << self
|
@@ -21,8 +23,9 @@ module RssFeed
|
|
21
23
|
# @param tag [String] The XML tag to access.
|
22
24
|
# @param doc [Nokogiri::XML::Document] The XML document.
|
23
25
|
# @return [Hash] The tag data including text, nested elements flag, nested attributes flag, and the document.
|
24
|
-
def access_tag(tag, doc)
|
25
|
-
|
26
|
+
def access_tag(tag, doc, feed)
|
27
|
+
feed_tag = %w[atom feed].include?(feed.detect_feed_type) && namespace(tag).blank? ? "xmlns:#{tag}" : tag
|
28
|
+
doc = doc.xpath(feed_tag, namespace(tag))
|
26
29
|
nested_elements = nested_elements?(doc)
|
27
30
|
{ text: doc.to_s, nested_elements: nested_elements, nested_attributes: nested_attributes?(doc), docs: doc }
|
28
31
|
end
|
@@ -33,7 +36,7 @@ module RssFeed
|
|
33
36
|
# @return [Hash] The namespace declaration.
|
34
37
|
def namespace(tag)
|
35
38
|
namespace_key = tag.split(':').first
|
36
|
-
{ namespace_key.to_s => NAMESPACES[namespace_key] }.compact
|
39
|
+
NAMESPACES[namespace_key].blank? ? nil : { namespace_key.to_s => NAMESPACES[namespace_key] }.compact
|
37
40
|
end
|
38
41
|
|
39
42
|
# Removes HTML tags from the given content.
|
data/lib/rss_feed/parser.rb
CHANGED
@@ -93,7 +93,7 @@ module RssFeed
|
|
93
93
|
item_data = {}
|
94
94
|
|
95
95
|
feed.class::TAGS.each do |tag|
|
96
|
-
tag_data = extract_tag_data(tag, feed_parse)
|
96
|
+
tag_data = extract_tag_data(tag, feed_parse, feed)
|
97
97
|
next if skip_extraction?(tag_data)
|
98
98
|
|
99
99
|
items = extract_items(tag_data)
|
@@ -136,8 +136,8 @@ module RssFeed
|
|
136
136
|
# @param tag [String] The tag to extract.
|
137
137
|
# @param feed_parse [Hash] The parsed XML data.
|
138
138
|
# @return [Hash] The extracted tag data.
|
139
|
-
def extract_tag_data(tag, feed_parse)
|
140
|
-
value = RssFeed::Feed::Namespace.access_tag(tag, feed_parse)
|
139
|
+
def extract_tag_data(tag, feed_parse, feed)
|
140
|
+
value = RssFeed::Feed::Namespace.access_tag(tag, feed_parse, feed)
|
141
141
|
value[:attributes] = extract_attributes(value[:docs]) if value[:nested_attributes]
|
142
142
|
value
|
143
143
|
end
|
data/lib/rss_feed/version.rb
CHANGED
data/parser.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'rss_feed'
|
2
2
|
require 'nokogiri'
|
3
3
|
|
4
|
-
|
4
|
+
require 'open-uri'
|
5
5
|
#
|
6
6
|
# # url = 'https://feeds.nbcnews.com/nbcnews/public/news'
|
7
7
|
# # url = 'https://abcnews.go.com/abcnews/usheadlines'
|
@@ -11,7 +11,9 @@ require 'nokogiri'
|
|
11
11
|
# p rss
|
12
12
|
|
13
13
|
# Define your custom options
|
14
|
-
feed_urls = 'https://
|
14
|
+
# feed_urls = 'https://rubygems.org/gems/rss_feed_plus/versions.atom'
|
15
|
+
feed_urls = 'http://dev.fyicenter.com/atom_xml.php'
|
16
|
+
# feed_urls = 'https://feeds.nbcnews.com/nbcnews/public/news'
|
15
17
|
xml_parser = Nokogiri
|
16
18
|
uri_parser = URI
|
17
19
|
timeout = 10
|
@@ -19,11 +21,45 @@ timeout = 10
|
|
19
21
|
# Initialize the Parser class with custom options
|
20
22
|
parser = RssFeed::Parser.new(feed_urls, xml_parser: xml_parser, uri_parser: uri_parser, timeout: timeout)
|
21
23
|
|
22
|
-
# Parse the RSS feeds
|
23
|
-
# parsed_data = parser.parse
|
24
24
|
|
25
|
-
|
26
|
-
# puts parsed_data.inspect
|
27
|
-
|
28
|
-
parsed_data = parser.parse_as_object
|
25
|
+
parsed_data = parser.parse
|
29
26
|
puts parsed_data.inspect
|
27
|
+
|
28
|
+
# xml = URI.parse(feed_urls).open
|
29
|
+
# doc = Nokogiri::XML(xml)
|
30
|
+
# namespaces = doc.root.namespaces
|
31
|
+
# spaces = {}
|
32
|
+
# namespaces.each do |prefix, uri|
|
33
|
+
# puts "Namespace Prefix: #{prefix}"
|
34
|
+
# puts "Namespace URI: #{uri}"
|
35
|
+
# spaces[prefix] = uri
|
36
|
+
# end
|
37
|
+
# title = doc.xpath("//xmlns:entry").map { |entry| entry.at_xpath("dc:date", 'dc' => 'http://purl.org/dc/elements/1.1/') }
|
38
|
+
# title = doc.xpath('//xmlns:entry').map { |entry| entry.xpath('//xmlns:title', 'xmlns' => 'http://www.w3.org/2005/Atom').text }
|
39
|
+
|
40
|
+
# p title
|
41
|
+
# tags = doc.xpath('//*[not(self::text() or self::comment())]').map(&:name).uniq
|
42
|
+
# feed_tags = doc.xpath('/rss/channel/*').map(&:name).uniq
|
43
|
+
#
|
44
|
+
# # Get tags under <item> (entry)
|
45
|
+
# entry_tags = doc.xpath('/rss/channel/item/*').map(&:name).uniq
|
46
|
+
#
|
47
|
+
# # Print tags under <channel> (feed)
|
48
|
+
# puts "Tags under <channel> (feed):"
|
49
|
+
# puts feed_tags
|
50
|
+
#
|
51
|
+
# puts "\nTags under <item> (entry):"
|
52
|
+
# puts entry_tags
|
53
|
+
|
54
|
+
# feed_tags = doc.xpath('/xmlns:feed/*').map(&:name).uniq
|
55
|
+
#
|
56
|
+
# # Get tags under <entry>
|
57
|
+
# entry_tags = doc.xpath('/xmlns:feed/xmlns:entry/*').map(&:name).uniq
|
58
|
+
#
|
59
|
+
# # Print tags under <feed>
|
60
|
+
# puts "Tags under <feed> (channel):"
|
61
|
+
# puts feed_tags
|
62
|
+
#
|
63
|
+
# # Print tags under <entry>
|
64
|
+
# puts "\nTags under <entry>:"
|
65
|
+
# puts entry_tags
|
data/sig/rss_feed/feed/base.rbs
CHANGED
@@ -5,7 +5,7 @@ module RssFeed
|
|
5
5
|
|
6
6
|
def self.access_tag: (tag: String, doc: Nokogiri::XML::NodeSet)-> Hash[Symbol, bool | String | Nokogiri::XML::Document ]
|
7
7
|
|
8
|
-
def self.namespace: (tag: String)-> Hash[String, String]
|
8
|
+
def self.namespace: (tag: String)-> Hash[String, String] | nil
|
9
9
|
|
10
10
|
def self.nested_attributes?: (node: Nokogiri::XML::NodeSet) -> bool
|
11
11
|
|