rss_feed_plus 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -1
- data/lib/rss_feed/feed/base.rb +0 -2
- data/lib/rss_feed/feed/channel.rb +3 -2
- data/lib/rss_feed/feed/item.rb +1 -1
- data/lib/rss_feed/feed/namespace.rb +7 -4
- data/lib/rss_feed/parser.rb +3 -3
- data/lib/rss_feed/version.rb +1 -1
- data/parser.rb +44 -8
- data/sig/rss_feed/feed/base.rbs +0 -2
- data/sig/rss_feed/feed/namespace.rbs +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ed3bccb810245ef55c8098543c200b38d82a3e42db7c7740a7a330c15729c518
|
4
|
+
data.tar.gz: 4ffdfd85aedd999af87a4053064a2c50a76ff93772c4910ca83ef802518be77e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7a0c323043a3d4c014e9f840f364a9db8ab27c14c231a157496ea2e824b614e9f0121318ba400e891c111c89e89ac990b8d381d8d03b19739c093a48717e8611
|
7
|
+
data.tar.gz: a1c4dcb037b7d3c43e20a1d4403e239238cbb94d192a31a81270f7e498fd80b50ca2cd1ef081973d3bc7a7b742d5373941d6613c8ed7bc38f81b1b00957a45be
|
data/README.md
CHANGED
@@ -49,7 +49,10 @@ parser = RssFeed::Parser.new(feed_urls, xml_parser: xml_parser, uri_parser: uri_
|
|
49
49
|
# or
|
50
50
|
parser = RssFeed::Parser.new(feed_urls)
|
51
51
|
# Parse the RSS feeds
|
52
|
-
parsed_data = parser.parse_as_object
|
52
|
+
parsed_data = parser.parse_as_object
|
53
|
+
|
54
|
+
# OR Parse the RSS feed as a JSON
|
55
|
+
parsed_data = parser.parse
|
53
56
|
|
54
57
|
# Process the parsed data
|
55
58
|
puts parsed_data.inspect
|
data/lib/rss_feed/feed/base.rb
CHANGED
@@ -17,7 +17,8 @@ module RssFeed
|
|
17
17
|
image logo icon rating
|
18
18
|
rights copyright
|
19
19
|
textInput feedburner:browserFriendly
|
20
|
-
itunes:author itunes:category
|
20
|
+
itunes:author itunes:category category itunes:explicit itunes:image itunes:keywords itunes:owner itunes:subtitle
|
21
|
+
itunes:summary
|
21
22
|
].freeze
|
22
23
|
|
23
24
|
# XPath expression for selecting the RSS channel.
|
@@ -31,7 +32,7 @@ module RssFeed
|
|
31
32
|
def atom
|
32
33
|
return nil if document.blank?
|
33
34
|
|
34
|
-
'//feed'
|
35
|
+
'//xmlns:feed'
|
35
36
|
end
|
36
37
|
|
37
38
|
alias feed atom
|
data/lib/rss_feed/feed/item.rb
CHANGED
@@ -12,7 +12,9 @@ module RssFeed
|
|
12
12
|
'feedburner' => 'http://rssnamespace.org/feedburner/ext/1.0',
|
13
13
|
'content' => 'http://purl.org/rss/1.0/modules/content/',
|
14
14
|
'trackback' => 'http://example.com/trackback',
|
15
|
-
'media' => 'http://search.yahoo.com/mrss/'
|
15
|
+
'media' => 'http://search.yahoo.com/mrss/',
|
16
|
+
'atom' => 'http://www.w3.org/2005/Atom',
|
17
|
+
'xmlns' => 'http://www.w3.org/2005/Atom'
|
16
18
|
}.freeze
|
17
19
|
|
18
20
|
class << self
|
@@ -21,8 +23,9 @@ module RssFeed
|
|
21
23
|
# @param tag [String] The XML tag to access.
|
22
24
|
# @param doc [Nokogiri::XML::Document] The XML document.
|
23
25
|
# @return [Hash] The tag data including text, nested elements flag, nested attributes flag, and the document.
|
24
|
-
def access_tag(tag, doc)
|
25
|
-
|
26
|
+
def access_tag(tag, doc, feed)
|
27
|
+
feed_tag = %w[atom feed].include?(feed.detect_feed_type) && namespace(tag).blank? ? "xmlns:#{tag}" : tag
|
28
|
+
doc = doc.xpath(feed_tag, namespace(tag))
|
26
29
|
nested_elements = nested_elements?(doc)
|
27
30
|
{ text: doc.to_s, nested_elements: nested_elements, nested_attributes: nested_attributes?(doc), docs: doc }
|
28
31
|
end
|
@@ -33,7 +36,7 @@ module RssFeed
|
|
33
36
|
# @return [Hash] The namespace declaration.
|
34
37
|
def namespace(tag)
|
35
38
|
namespace_key = tag.split(':').first
|
36
|
-
{ namespace_key.to_s => NAMESPACES[namespace_key] }.compact
|
39
|
+
NAMESPACES[namespace_key].blank? ? nil : { namespace_key.to_s => NAMESPACES[namespace_key] }.compact
|
37
40
|
end
|
38
41
|
|
39
42
|
# Removes HTML tags from the given content.
|
data/lib/rss_feed/parser.rb
CHANGED
@@ -93,7 +93,7 @@ module RssFeed
|
|
93
93
|
item_data = {}
|
94
94
|
|
95
95
|
feed.class::TAGS.each do |tag|
|
96
|
-
tag_data = extract_tag_data(tag, feed_parse)
|
96
|
+
tag_data = extract_tag_data(tag, feed_parse, feed)
|
97
97
|
next if skip_extraction?(tag_data)
|
98
98
|
|
99
99
|
items = extract_items(tag_data)
|
@@ -136,8 +136,8 @@ module RssFeed
|
|
136
136
|
# @param tag [String] The tag to extract.
|
137
137
|
# @param feed_parse [Hash] The parsed XML data.
|
138
138
|
# @return [Hash] The extracted tag data.
|
139
|
-
def extract_tag_data(tag, feed_parse)
|
140
|
-
value = RssFeed::Feed::Namespace.access_tag(tag, feed_parse)
|
139
|
+
def extract_tag_data(tag, feed_parse, feed)
|
140
|
+
value = RssFeed::Feed::Namespace.access_tag(tag, feed_parse, feed)
|
141
141
|
value[:attributes] = extract_attributes(value[:docs]) if value[:nested_attributes]
|
142
142
|
value
|
143
143
|
end
|
data/lib/rss_feed/version.rb
CHANGED
data/parser.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'rss_feed'
|
2
2
|
require 'nokogiri'
|
3
3
|
|
4
|
-
|
4
|
+
require 'open-uri'
|
5
5
|
#
|
6
6
|
# # url = 'https://feeds.nbcnews.com/nbcnews/public/news'
|
7
7
|
# # url = 'https://abcnews.go.com/abcnews/usheadlines'
|
@@ -11,7 +11,9 @@ require 'nokogiri'
|
|
11
11
|
# p rss
|
12
12
|
|
13
13
|
# Define your custom options
|
14
|
-
feed_urls = 'https://
|
14
|
+
# feed_urls = 'https://rubygems.org/gems/rss_feed_plus/versions.atom'
|
15
|
+
feed_urls = 'http://dev.fyicenter.com/atom_xml.php'
|
16
|
+
# feed_urls = 'https://feeds.nbcnews.com/nbcnews/public/news'
|
15
17
|
xml_parser = Nokogiri
|
16
18
|
uri_parser = URI
|
17
19
|
timeout = 10
|
@@ -19,11 +21,45 @@ timeout = 10
|
|
19
21
|
# Initialize the Parser class with custom options
|
20
22
|
parser = RssFeed::Parser.new(feed_urls, xml_parser: xml_parser, uri_parser: uri_parser, timeout: timeout)
|
21
23
|
|
22
|
-
# Parse the RSS feeds
|
23
|
-
# parsed_data = parser.parse
|
24
24
|
|
25
|
-
|
26
|
-
# puts parsed_data.inspect
|
27
|
-
|
28
|
-
parsed_data = parser.parse_as_object
|
25
|
+
parsed_data = parser.parse
|
29
26
|
puts parsed_data.inspect
|
27
|
+
|
28
|
+
# xml = URI.parse(feed_urls).open
|
29
|
+
# doc = Nokogiri::XML(xml)
|
30
|
+
# namespaces = doc.root.namespaces
|
31
|
+
# spaces = {}
|
32
|
+
# namespaces.each do |prefix, uri|
|
33
|
+
# puts "Namespace Prefix: #{prefix}"
|
34
|
+
# puts "Namespace URI: #{uri}"
|
35
|
+
# spaces[prefix] = uri
|
36
|
+
# end
|
37
|
+
# title = doc.xpath("//xmlns:entry").map { |entry| entry.at_xpath("dc:date", 'dc' => 'http://purl.org/dc/elements/1.1/') }
|
38
|
+
# title = doc.xpath('//xmlns:entry').map { |entry| entry.xpath('//xmlns:title', 'xmlns' => 'http://www.w3.org/2005/Atom').text }
|
39
|
+
|
40
|
+
# p title
|
41
|
+
# tags = doc.xpath('//*[not(self::text() or self::comment())]').map(&:name).uniq
|
42
|
+
# feed_tags = doc.xpath('/rss/channel/*').map(&:name).uniq
|
43
|
+
#
|
44
|
+
# # Get tags under <item> (entry)
|
45
|
+
# entry_tags = doc.xpath('/rss/channel/item/*').map(&:name).uniq
|
46
|
+
#
|
47
|
+
# # Print tags under <channel> (feed)
|
48
|
+
# puts "Tags under <channel> (feed):"
|
49
|
+
# puts feed_tags
|
50
|
+
#
|
51
|
+
# puts "\nTags under <item> (entry):"
|
52
|
+
# puts entry_tags
|
53
|
+
|
54
|
+
# feed_tags = doc.xpath('/xmlns:feed/*').map(&:name).uniq
|
55
|
+
#
|
56
|
+
# # Get tags under <entry>
|
57
|
+
# entry_tags = doc.xpath('/xmlns:feed/xmlns:entry/*').map(&:name).uniq
|
58
|
+
#
|
59
|
+
# # Print tags under <feed>
|
60
|
+
# puts "Tags under <feed> (channel):"
|
61
|
+
# puts feed_tags
|
62
|
+
#
|
63
|
+
# # Print tags under <entry>
|
64
|
+
# puts "\nTags under <entry>:"
|
65
|
+
# puts entry_tags
|
data/sig/rss_feed/feed/base.rbs
CHANGED
@@ -5,7 +5,7 @@ module RssFeed
|
|
5
5
|
|
6
6
|
def self.access_tag: (tag: String, doc: Nokogiri::XML::NodeSet)-> Hash[Symbol, bool | String | Nokogiri::XML::Document ]
|
7
7
|
|
8
|
-
def self.namespace: (tag: String)-> Hash[String, String]
|
8
|
+
def self.namespace: (tag: String)-> Hash[String, String] | nil
|
9
9
|
|
10
10
|
def self.nested_attributes?: (node: Nokogiri::XML::NodeSet) -> bool
|
11
11
|
|