rss_feed_plus 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ec579256585d8f4a9ddf84497d214779d61c605cbf443bd1e55e4f094204cdc5
4
- data.tar.gz: 0bf2827e8bfab1aa2806578303ff04a570991c97a61673dc0edd8c9bb82c57e1
3
+ metadata.gz: ed3bccb810245ef55c8098543c200b38d82a3e42db7c7740a7a330c15729c518
4
+ data.tar.gz: 4ffdfd85aedd999af87a4053064a2c50a76ff93772c4910ca83ef802518be77e
5
5
  SHA512:
6
- metadata.gz: 566a7978173b0d7527a6413d80f6f0a2245633c51b7dc2950346e0b1b81ba10d465f906887f5e0b1fa5b06870c657e7426f47a3ac7e1bd989aa739daf3dfac7f
7
- data.tar.gz: 1f0c46d97d326cbb2aa65dced535beee715b58b9570e7ced3f2d4cb36d5d876ac25c8d16cd17884b383171cdcd0dae34927c90d20e40cc1f1952b17c4bba3f23
6
+ metadata.gz: 7a0c323043a3d4c014e9f840f364a9db8ab27c14c231a157496ea2e824b614e9f0121318ba400e891c111c89e89ac990b8d381d8d03b19739c093a48717e8611
7
+ data.tar.gz: a1c4dcb037b7d3c43e20a1d4403e239238cbb94d192a31a81270f7e498fd80b50ca2cd1ef081973d3bc7a7b742d5373941d6613c8ed7bc38f81b1b00957a45be
data/README.md CHANGED
@@ -49,7 +49,10 @@ parser = RssFeed::Parser.new(feed_urls, xml_parser: xml_parser, uri_parser: uri_
49
49
  # or
50
50
  parser = RssFeed::Parser.new(feed_urls)
51
51
  # Parse the RSS feeds
52
- parsed_data = parser.parse_as_object
52
+ parsed_data = parser.parse_as_object
53
+
54
+ # OR Parse the RSS feed as a JSON
55
+ parsed_data = parser.parse
53
56
 
54
57
  # Process the parsed data
55
58
  puts parsed_data.inspect
@@ -18,8 +18,6 @@ module RssFeed
18
18
  raise NotImplementedError
19
19
  end
20
20
 
21
- private
22
-
23
21
  # Detects the type of the feed based on the root element of the XML document.
24
22
  #
25
23
  # @return [String] The name of the root element.
@@ -17,7 +17,8 @@ module RssFeed
17
17
  image logo icon rating
18
18
  rights copyright
19
19
  textInput feedburner:browserFriendly
20
- itunes:author itunes:category
20
+ itunes:author itunes:category category itunes:explicit itunes:image itunes:keywords itunes:owner itunes:subtitle
21
+ itunes:summary
21
22
  ].freeze
22
23
 
23
24
  # XPath expression for selecting the RSS channel.
@@ -31,7 +32,7 @@ module RssFeed
31
32
  def atom
32
33
  return nil if document.blank?
33
34
 
34
- '//feed'
35
+ '//xmlns:feed'
35
36
  end
36
37
 
37
38
  alias feed atom
@@ -27,7 +27,7 @@ module RssFeed
27
27
 
28
28
  # XPath expression for selecting the Atom entry.
29
29
  def atom
30
- '//entry'
30
+ '//xmlns:entry'
31
31
  end
32
32
 
33
33
  alias feed atom
@@ -12,7 +12,9 @@ module RssFeed
12
12
  'feedburner' => 'http://rssnamespace.org/feedburner/ext/1.0',
13
13
  'content' => 'http://purl.org/rss/1.0/modules/content/',
14
14
  'trackback' => 'http://example.com/trackback',
15
- 'media' => 'http://search.yahoo.com/mrss/'
15
+ 'media' => 'http://search.yahoo.com/mrss/',
16
+ 'atom' => 'http://www.w3.org/2005/Atom',
17
+ 'xmlns' => 'http://www.w3.org/2005/Atom'
16
18
  }.freeze
17
19
 
18
20
  class << self
@@ -21,8 +23,9 @@ module RssFeed
21
23
  # @param tag [String] The XML tag to access.
22
24
  # @param doc [Nokogiri::XML::Document] The XML document.
23
25
  # @return [Hash] The tag data including text, nested elements flag, nested attributes flag, and the document.
24
- def access_tag(tag, doc)
25
- doc = doc.xpath(tag, namespace(tag))
26
+ def access_tag(tag, doc, feed)
27
+ feed_tag = %w[atom feed].include?(feed.detect_feed_type) && namespace(tag).blank? ? "xmlns:#{tag}" : tag
28
+ doc = doc.xpath(feed_tag, namespace(tag))
26
29
  nested_elements = nested_elements?(doc)
27
30
  { text: doc.to_s, nested_elements: nested_elements, nested_attributes: nested_attributes?(doc), docs: doc }
28
31
  end
@@ -33,7 +36,7 @@ module RssFeed
33
36
  # @return [Hash] The namespace declaration.
34
37
  def namespace(tag)
35
38
  namespace_key = tag.split(':').first
36
- { namespace_key.to_s => NAMESPACES[namespace_key] }.compact
39
+ NAMESPACES[namespace_key].blank? ? nil : { namespace_key.to_s => NAMESPACES[namespace_key] }.compact
37
40
  end
38
41
 
39
42
  # Removes HTML tags from the given content.
@@ -93,7 +93,7 @@ module RssFeed
93
93
  item_data = {}
94
94
 
95
95
  feed.class::TAGS.each do |tag|
96
- tag_data = extract_tag_data(tag, feed_parse)
96
+ tag_data = extract_tag_data(tag, feed_parse, feed)
97
97
  next if skip_extraction?(tag_data)
98
98
 
99
99
  items = extract_items(tag_data)
@@ -136,8 +136,8 @@ module RssFeed
136
136
  # @param tag [String] The tag to extract.
137
137
  # @param feed_parse [Hash] The parsed XML data.
138
138
  # @return [Hash] The extracted tag data.
139
- def extract_tag_data(tag, feed_parse)
140
- value = RssFeed::Feed::Namespace.access_tag(tag, feed_parse)
139
+ def extract_tag_data(tag, feed_parse, feed)
140
+ value = RssFeed::Feed::Namespace.access_tag(tag, feed_parse, feed)
141
141
  value[:attributes] = extract_attributes(value[:docs]) if value[:nested_attributes]
142
142
  value
143
143
  end
@@ -1,3 +1,3 @@
1
1
  module RssFeed
2
- VERSION = '0.1.0'.freeze
2
+ VERSION = '0.1.1'.freeze
3
3
  end
data/parser.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  require 'rss_feed'
2
2
  require 'nokogiri'
3
3
 
4
- # require 'open-uri'
4
+ require 'open-uri'
5
5
  #
6
6
  # # url = 'https://feeds.nbcnews.com/nbcnews/public/news'
7
7
  # # url = 'https://abcnews.go.com/abcnews/usheadlines'
@@ -11,7 +11,9 @@ require 'nokogiri'
11
11
  # p rss
12
12
 
13
13
  # Define your custom options
14
- feed_urls = 'https://feeds.nbcnews.com/nbcnews/public/news'
14
+ # feed_urls = 'https://rubygems.org/gems/rss_feed_plus/versions.atom'
15
+ feed_urls = 'http://dev.fyicenter.com/atom_xml.php'
16
+ # feed_urls = 'https://feeds.nbcnews.com/nbcnews/public/news'
15
17
  xml_parser = Nokogiri
16
18
  uri_parser = URI
17
19
  timeout = 10
@@ -19,11 +21,45 @@ timeout = 10
19
21
  # Initialize the Parser class with custom options
20
22
  parser = RssFeed::Parser.new(feed_urls, xml_parser: xml_parser, uri_parser: uri_parser, timeout: timeout)
21
23
 
22
- # Parse the RSS feeds
23
- # parsed_data = parser.parse
24
24
 
25
- # Process the parsed data
26
- # puts parsed_data.inspect
27
-
28
- parsed_data = parser.parse_as_object
25
+ parsed_data = parser.parse
29
26
  puts parsed_data.inspect
27
+
28
+ # xml = URI.parse(feed_urls).open
29
+ # doc = Nokogiri::XML(xml)
30
+ # namespaces = doc.root.namespaces
31
+ # spaces = {}
32
+ # namespaces.each do |prefix, uri|
33
+ # puts "Namespace Prefix: #{prefix}"
34
+ # puts "Namespace URI: #{uri}"
35
+ # spaces[prefix] = uri
36
+ # end
37
+ # title = doc.xpath("//xmlns:entry").map { |entry| entry.at_xpath("dc:date", 'dc' => 'http://purl.org/dc/elements/1.1/') }
38
+ # title = doc.xpath('//xmlns:entry').map { |entry| entry.xpath('//xmlns:title', 'xmlns' => 'http://www.w3.org/2005/Atom').text }
39
+
40
+ # p title
41
+ # tags = doc.xpath('//*[not(self::text() or self::comment())]').map(&:name).uniq
42
+ # feed_tags = doc.xpath('/rss/channel/*').map(&:name).uniq
43
+ #
44
+ # # Get tags under <item> (entry)
45
+ # entry_tags = doc.xpath('/rss/channel/item/*').map(&:name).uniq
46
+ #
47
+ # # Print tags under <channel> (feed)
48
+ # puts "Tags under <channel> (feed):"
49
+ # puts feed_tags
50
+ #
51
+ # puts "\nTags under <item> (entry):"
52
+ # puts entry_tags
53
+
54
+ # feed_tags = doc.xpath('/xmlns:feed/*').map(&:name).uniq
55
+ #
56
+ # # Get tags under <entry>
57
+ # entry_tags = doc.xpath('/xmlns:feed/xmlns:entry/*').map(&:name).uniq
58
+ #
59
+ # # Print tags under <feed>
60
+ # puts "Tags under <feed> (channel):"
61
+ # puts feed_tags
62
+ #
63
+ # # Print tags under <entry>
64
+ # puts "\nTags under <entry>:"
65
+ # puts entry_tags
@@ -7,8 +7,6 @@ module RssFeed
7
7
 
8
8
  def parser: -> Nokogiri::XML::Document
9
9
 
10
- private
11
-
12
10
  def detect_feed_type: -> String
13
11
 
14
12
 
@@ -5,7 +5,7 @@ module RssFeed
5
5
 
6
6
  def self.access_tag: (tag: String, doc: Nokogiri::XML::NodeSet)-> Hash[Symbol, bool | String | Nokogiri::XML::Document ]
7
7
 
8
- def self.namespace: (tag: String)-> Hash[String, String]
8
+ def self.namespace: (tag: String)-> Hash[String, String] | nil
9
9
 
10
10
  def self.nested_attributes?: (node: Nokogiri::XML::NodeSet) -> bool
11
11
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rss_feed_plus
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - talaatmagdyx