rss_feed_plus 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ec579256585d8f4a9ddf84497d214779d61c605cbf443bd1e55e4f094204cdc5
4
- data.tar.gz: 0bf2827e8bfab1aa2806578303ff04a570991c97a61673dc0edd8c9bb82c57e1
3
+ metadata.gz: ed3bccb810245ef55c8098543c200b38d82a3e42db7c7740a7a330c15729c518
4
+ data.tar.gz: 4ffdfd85aedd999af87a4053064a2c50a76ff93772c4910ca83ef802518be77e
5
5
  SHA512:
6
- metadata.gz: 566a7978173b0d7527a6413d80f6f0a2245633c51b7dc2950346e0b1b81ba10d465f906887f5e0b1fa5b06870c657e7426f47a3ac7e1bd989aa739daf3dfac7f
7
- data.tar.gz: 1f0c46d97d326cbb2aa65dced535beee715b58b9570e7ced3f2d4cb36d5d876ac25c8d16cd17884b383171cdcd0dae34927c90d20e40cc1f1952b17c4bba3f23
6
+ metadata.gz: 7a0c323043a3d4c014e9f840f364a9db8ab27c14c231a157496ea2e824b614e9f0121318ba400e891c111c89e89ac990b8d381d8d03b19739c093a48717e8611
7
+ data.tar.gz: a1c4dcb037b7d3c43e20a1d4403e239238cbb94d192a31a81270f7e498fd80b50ca2cd1ef081973d3bc7a7b742d5373941d6613c8ed7bc38f81b1b00957a45be
data/README.md CHANGED
@@ -49,7 +49,10 @@ parser = RssFeed::Parser.new(feed_urls, xml_parser: xml_parser, uri_parser: uri_
49
49
  # or
50
50
  parser = RssFeed::Parser.new(feed_urls)
51
51
  # Parse the RSS feeds
52
- parsed_data = parser.parse_as_object
52
+ parsed_data = parser.parse_as_object
53
+
54
+ # OR Parse the RSS feed as a JSON
55
+ parsed_data = parser.parse
53
56
 
54
57
  # Process the parsed data
55
58
  puts parsed_data.inspect
@@ -18,8 +18,6 @@ module RssFeed
18
18
  raise NotImplementedError
19
19
  end
20
20
 
21
- private
22
-
23
21
  # Detects the type of the feed based on the root element of the XML document.
24
22
  #
25
23
  # @return [String] The name of the root element.
@@ -17,7 +17,8 @@ module RssFeed
17
17
  image logo icon rating
18
18
  rights copyright
19
19
  textInput feedburner:browserFriendly
20
- itunes:author itunes:category
20
+ itunes:author itunes:category category itunes:explicit itunes:image itunes:keywords itunes:owner itunes:subtitle
21
+ itunes:summary
21
22
  ].freeze
22
23
 
23
24
  # XPath expression for selecting the RSS channel.
@@ -31,7 +32,7 @@ module RssFeed
31
32
  def atom
32
33
  return nil if document.blank?
33
34
 
34
- '//feed'
35
+ '//xmlns:feed'
35
36
  end
36
37
 
37
38
  alias feed atom
@@ -27,7 +27,7 @@ module RssFeed
27
27
 
28
28
  # XPath expression for selecting the Atom entry.
29
29
  def atom
30
- '//entry'
30
+ '//xmlns:entry'
31
31
  end
32
32
 
33
33
  alias feed atom
@@ -12,7 +12,9 @@ module RssFeed
12
12
  'feedburner' => 'http://rssnamespace.org/feedburner/ext/1.0',
13
13
  'content' => 'http://purl.org/rss/1.0/modules/content/',
14
14
  'trackback' => 'http://example.com/trackback',
15
- 'media' => 'http://search.yahoo.com/mrss/'
15
+ 'media' => 'http://search.yahoo.com/mrss/',
16
+ 'atom' => 'http://www.w3.org/2005/Atom',
17
+ 'xmlns' => 'http://www.w3.org/2005/Atom'
16
18
  }.freeze
17
19
 
18
20
  class << self
@@ -21,8 +23,9 @@ module RssFeed
21
23
  # @param tag [String] The XML tag to access.
22
24
  # @param doc [Nokogiri::XML::Document] The XML document.
23
25
  # @return [Hash] The tag data including text, nested elements flag, nested attributes flag, and the document.
24
- def access_tag(tag, doc)
25
- doc = doc.xpath(tag, namespace(tag))
26
+ def access_tag(tag, doc, feed)
27
+ feed_tag = %w[atom feed].include?(feed.detect_feed_type) && namespace(tag).blank? ? "xmlns:#{tag}" : tag
28
+ doc = doc.xpath(feed_tag, namespace(tag))
26
29
  nested_elements = nested_elements?(doc)
27
30
  { text: doc.to_s, nested_elements: nested_elements, nested_attributes: nested_attributes?(doc), docs: doc }
28
31
  end
@@ -33,7 +36,7 @@ module RssFeed
33
36
  # @return [Hash] The namespace declaration.
34
37
  def namespace(tag)
35
38
  namespace_key = tag.split(':').first
36
- { namespace_key.to_s => NAMESPACES[namespace_key] }.compact
39
+ NAMESPACES[namespace_key].blank? ? nil : { namespace_key.to_s => NAMESPACES[namespace_key] }.compact
37
40
  end
38
41
 
39
42
  # Removes HTML tags from the given content.
@@ -93,7 +93,7 @@ module RssFeed
93
93
  item_data = {}
94
94
 
95
95
  feed.class::TAGS.each do |tag|
96
- tag_data = extract_tag_data(tag, feed_parse)
96
+ tag_data = extract_tag_data(tag, feed_parse, feed)
97
97
  next if skip_extraction?(tag_data)
98
98
 
99
99
  items = extract_items(tag_data)
@@ -136,8 +136,8 @@ module RssFeed
136
136
  # @param tag [String] The tag to extract.
137
137
  # @param feed_parse [Hash] The parsed XML data.
138
138
  # @return [Hash] The extracted tag data.
139
- def extract_tag_data(tag, feed_parse)
140
- value = RssFeed::Feed::Namespace.access_tag(tag, feed_parse)
139
+ def extract_tag_data(tag, feed_parse, feed)
140
+ value = RssFeed::Feed::Namespace.access_tag(tag, feed_parse, feed)
141
141
  value[:attributes] = extract_attributes(value[:docs]) if value[:nested_attributes]
142
142
  value
143
143
  end
@@ -1,3 +1,3 @@
1
1
  module RssFeed
2
- VERSION = '0.1.0'.freeze
2
+ VERSION = '0.1.1'.freeze
3
3
  end
data/parser.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  require 'rss_feed'
2
2
  require 'nokogiri'
3
3
 
4
- # require 'open-uri'
4
+ require 'open-uri'
5
5
  #
6
6
  # # url = 'https://feeds.nbcnews.com/nbcnews/public/news'
7
7
  # # url = 'https://abcnews.go.com/abcnews/usheadlines'
@@ -11,7 +11,9 @@ require 'nokogiri'
11
11
  # p rss
12
12
 
13
13
  # Define your custom options
14
- feed_urls = 'https://feeds.nbcnews.com/nbcnews/public/news'
14
+ # feed_urls = 'https://rubygems.org/gems/rss_feed_plus/versions.atom'
15
+ feed_urls = 'http://dev.fyicenter.com/atom_xml.php'
16
+ # feed_urls = 'https://feeds.nbcnews.com/nbcnews/public/news'
15
17
  xml_parser = Nokogiri
16
18
  uri_parser = URI
17
19
  timeout = 10
@@ -19,11 +21,45 @@ timeout = 10
19
21
  # Initialize the Parser class with custom options
20
22
  parser = RssFeed::Parser.new(feed_urls, xml_parser: xml_parser, uri_parser: uri_parser, timeout: timeout)
21
23
 
22
- # Parse the RSS feeds
23
- # parsed_data = parser.parse
24
24
 
25
- # Process the parsed data
26
- # puts parsed_data.inspect
27
-
28
- parsed_data = parser.parse_as_object
25
+ parsed_data = parser.parse
29
26
  puts parsed_data.inspect
27
+
28
+ # xml = URI.parse(feed_urls).open
29
+ # doc = Nokogiri::XML(xml)
30
+ # namespaces = doc.root.namespaces
31
+ # spaces = {}
32
+ # namespaces.each do |prefix, uri|
33
+ # puts "Namespace Prefix: #{prefix}"
34
+ # puts "Namespace URI: #{uri}"
35
+ # spaces[prefix] = uri
36
+ # end
37
+ # title = doc.xpath("//xmlns:entry").map { |entry| entry.at_xpath("dc:date", 'dc' => 'http://purl.org/dc/elements/1.1/') }
38
+ # title = doc.xpath('//xmlns:entry').map { |entry| entry.xpath('//xmlns:title', 'xmlns' => 'http://www.w3.org/2005/Atom').text }
39
+
40
+ # p title
41
+ # tags = doc.xpath('//*[not(self::text() or self::comment())]').map(&:name).uniq
42
+ # feed_tags = doc.xpath('/rss/channel/*').map(&:name).uniq
43
+ #
44
+ # # Get tags under <item> (entry)
45
+ # entry_tags = doc.xpath('/rss/channel/item/*').map(&:name).uniq
46
+ #
47
+ # # Print tags under <channel> (feed)
48
+ # puts "Tags under <channel> (feed):"
49
+ # puts feed_tags
50
+ #
51
+ # puts "\nTags under <item> (entry):"
52
+ # puts entry_tags
53
+
54
+ # feed_tags = doc.xpath('/xmlns:feed/*').map(&:name).uniq
55
+ #
56
+ # # Get tags under <entry>
57
+ # entry_tags = doc.xpath('/xmlns:feed/xmlns:entry/*').map(&:name).uniq
58
+ #
59
+ # # Print tags under <feed>
60
+ # puts "Tags under <feed> (channel):"
61
+ # puts feed_tags
62
+ #
63
+ # # Print tags under <entry>
64
+ # puts "\nTags under <entry>:"
65
+ # puts entry_tags
@@ -7,8 +7,6 @@ module RssFeed
7
7
 
8
8
  def parser: -> Nokogiri::XML::Document
9
9
 
10
- private
11
-
12
10
  def detect_feed_type: -> String
13
11
 
14
12
 
@@ -5,7 +5,7 @@ module RssFeed
5
5
 
6
6
  def self.access_tag: (tag: String, doc: Nokogiri::XML::NodeSet)-> Hash[Symbol, bool | String | Nokogiri::XML::Document ]
7
7
 
8
- def self.namespace: (tag: String)-> Hash[String, String]
8
+ def self.namespace: (tag: String)-> Hash[String, String] | nil
9
9
 
10
10
  def self.nested_attributes?: (node: Nokogiri::XML::NodeSet) -> bool
11
11
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rss_feed_plus
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - talaatmagdyx