yarss 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: fb5a8eb19140ed00473aa01207c73ed9f9d7eb46
4
+ data.tar.gz: ec2f4a8d882acf25c80c6b9a1b90b97b08ee65ac
5
+ SHA512:
6
+ metadata.gz: 894f9cda3adfb3e109350028d1ba7b825c3871bb94ec265a13581728cbe54bd6f8d548bcec1f8b3f420fc99fb1624590fd0333eceac9021f60deb09702a91d23
7
+ data.tar.gz: 2f679583c97b1c728c2da37d36f70f98ef243b9d63c6f4fb23373e2dc02cd61c8cdafdc20f2d454640aa963f91772bef80f5d5c5704fbdcaabb9f8ef84c66fd1
data/.gitignore ADDED
@@ -0,0 +1,13 @@
1
+ *.gem
2
+ /.bundle/
3
+ /.yardoc
4
+ /data/
5
+ /dump/
6
+ /examples/
7
+ /Gemfile.lock
8
+ /_yardoc/
9
+ /coverage/
10
+ /doc/
11
+ /pkg/
12
+ /spec/reports/
13
+ /tmp/
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper.rb
data/.rubocop.yml ADDED
@@ -0,0 +1,8 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.3
3
+ Exclude:
4
+ - 'bin/console'
5
+ - 'examples/rss.rb'
6
+
7
+ Style/FrozenStringLiteralComment:
8
+ EnforcedStyle: always
data/.travis.yml ADDED
@@ -0,0 +1,18 @@
1
+ language: ruby
2
+
3
+ rvm:
4
+ - 2.3.0
5
+ - jruby-19mode
6
+
7
+ branches:
8
+ only:
9
+ - master
10
+
11
+ notifications:
12
+ email:
13
+ on_success: change
14
+ on_failure: always
15
+
16
+ before_install: "gem install bundler -v 1.11.2"
17
+ install: "bundle --jobs 4"
18
+ script: "bundle exec rspec"
data/.yardopts ADDED
@@ -0,0 +1,7 @@
1
+ --protected
2
+ --private
3
+ --embed-mixins
4
+ lib/**/*.rb
5
+ -
6
+ README.md
7
+ LICENSE.txt
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ # Specify your gem's dependencies in yarss.gemspec
6
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 Oldrich Vetesnik
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,60 @@
1
+ # Yarss – Yet Another RSS Feed Normalizer. [![Build Status](https://travis-ci.org/ollie/yarss.svg?branch=master)](https://travis-ci.org/ollie/yarss) [![Gem Version](https://img.shields.io/gem/v/yarss.svg)](https://rubygems.org/gems/yarss)
2
+
3
+ Parse and access RSS/RDF/Atom feeds with a uniform interface. Yarss uses
4
+ [MutliXml](https://rubygems.org/gems/multi_xml) behind the scenes so you may
5
+ want to drop in your favourite XML parser.
6
+
7
+ For MRI users [Ox](https://rubygems.org/gems/ox) is highly recommended as it is
8
+ the fastest XML parser I know of. JRuby users should probably use
9
+ [Nokogiri](https://rubygems.org/gems/nokogiri).
10
+
11
+ ## Usage
12
+
13
+ ```ruby
14
+ ['path/to/feed.rss', 'path/to/feed.atom', 'path/to/feed.rdf'].each do |file_path|
15
+ feed = Yarss.new(file_path)
16
+
17
+ puts "#{feed.title}, #{feed.link}, #{feed.description}"
18
+
19
+ feed.items.each do |item|
20
+ puts "#{item.id}, #{item.title}, #{item.updated_at}, #{item.link}"
21
+ puts item.content
22
+ end
23
+ end
24
+ ```
25
+
26
+ ## Installation
27
+
28
+ Add this line to your application's Gemfile:
29
+
30
+ ```ruby
31
+ gem 'yarss'
32
+
33
+ # To increase performance, add one of these gems:
34
+ # gem 'ox' # MRI compatible.
35
+ # gem 'nokogiri' # MRI and JRuby compatible.
36
+ # gem 'oga' # MRI, JRuby, Rubinius compatible.
37
+ ```
38
+
39
+ And then execute:
40
+
41
+ $ bundle
42
+
43
+ Or install it yourself as:
44
+
45
+ $ gem install yarss
46
+
47
+ ## Development
48
+
49
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
50
+
51
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
52
+
53
+ ## Contributing
54
+
55
+ Bug reports and pull requests are welcome on GitHub at https://github.com/ollie/yarss.
56
+
57
+ ## License
58
+
59
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
60
+
data/Rakefile ADDED
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+
5
+ task default: :combo
6
+
7
+ desc 'Run tests, rubocop and generate documentation'
8
+ task :combo do
9
+ sh 'bundle exec rspec'
10
+ sh('bundle exec rubocop') {} # ignore status > 0
11
+ sh 'bundle exec yardoc'
12
+ end
13
+
14
+ desc 'Same as :combo but build a gem, too'
15
+ task mega_combo: :combo do
16
+ sh 'gem build yarss.gemspec'
17
+ end
data/bin/console ADDED
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'multi_xml'
5
+ require 'yarss'
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ require 'pry'
11
+ Pry.start
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,87 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'item_parser'
4
+
5
+ module Yarss
6
+ # Contains feed and item parsers.
7
+ module Atom
8
+ # Extract title, link, description and items from a parsed Atom feed.
9
+ #
10
+ # @see http://atomenabled.org/developers/syndication/#requiredFeedElements
11
+ class FeedParser
12
+ # Parsed Atom feed.
13
+ #
14
+ # @return [Hash]
15
+ attr_accessor :data
16
+
17
+ # @param data [Hash] Parsed Atom feed.
18
+ def initialize(data)
19
+ self.data = data
20
+ end
21
+
22
+ # Parse out the feed title, link, description and items and wrap them
23
+ # in a data object.
24
+ #
25
+ # @raise [ParseError] If a required field is not found.
26
+ #
27
+ # @return [Feed]
28
+ def parse
29
+ Feed.new(
30
+ title: title,
31
+ link: link,
32
+ description: description,
33
+ items: items
34
+ )
35
+ end
36
+
37
+ # Extract the feed data.
38
+ #
39
+ # @raise [ParseError] If not found.
40
+ #
41
+ # @return [Hash]
42
+ def feed
43
+ @feed ||= data.fetch('feed')
44
+ rescue KeyError => e
45
+ raise ParseError, e
46
+ end
47
+
48
+ # Extract the title.
49
+ #
50
+ # @raise [ParseError] If not found.
51
+ #
52
+ # @return [String]
53
+ def title
54
+ Attribute.value(feed.fetch('title'))
55
+ rescue KeyError => e
56
+ raise ParseError, e
57
+ end
58
+
59
+ # Extract the link.
60
+ #
61
+ # @return [String]
62
+ def link
63
+ Attribute.link_value(feed['link'] || '')
64
+ end
65
+
66
+ # Extract the description.
67
+ #
68
+ # @return [String]
69
+ def description
70
+ Attribute.value(feed['subtitle'] || '')
71
+ end
72
+
73
+ # Extract and parse the items.
74
+ #
75
+ # @raise [ParseError] If not found.
76
+ #
77
+ # @return [Array<Item>]
78
+ def items
79
+ items = feed.fetch('entry')
80
+ items = [items] unless items.is_a?(Array)
81
+ items.map { |d| ItemParser.new(d).parse }
82
+ rescue KeyError => e
83
+ raise ParseError, e
84
+ end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Yarss
4
+ module Atom
5
+ # Extract id, title, updated, link and content from a feed item.
6
+ #
7
+ # http://atomenabled.org/developers/syndication/#requiredEntryElements
8
+ class ItemParser
9
+ # Parsed Atom feed item.
10
+ #
11
+ # @return [Hash]
12
+ attr_accessor :data
13
+
14
+ # @param data [Hash] Parsed Atom feed item.
15
+ def initialize(data)
16
+ self.data = data
17
+ end
18
+
19
+ # Parse out the feed item id, title, updated, link and content and wrap
20
+ # them in a data object.
21
+ #
22
+ # @raise [ParseError] If a required field is not found.
23
+ #
24
+ # @return [Item]
25
+ def parse
26
+ Item.new(
27
+ id: id,
28
+ title: title,
29
+ updated_at: updated,
30
+ link: link,
31
+ content: content
32
+ )
33
+ end
34
+
35
+ # Extract the ID.
36
+ #
37
+ # @raise [ParseError] If not found.
38
+ #
39
+ # @return [String]
40
+ def id
41
+ data.fetch('id')
42
+ rescue KeyError => e
43
+ raise ParseError, e
44
+ end
45
+
46
+ # Extract the title.
47
+ #
48
+ # @raise [ParseError] If not found.
49
+ #
50
+ # @return [String]
51
+ def title
52
+ Attribute.value(data.fetch('title'))
53
+ rescue KeyError => e
54
+ raise ParseError, e
55
+ end
56
+
57
+ # Extract the updated date.
58
+ #
59
+ # @raise [ParseError] If not found.
60
+ #
61
+ # @return [DateTime]
62
+ def updated
63
+ DateTime.parse(data.fetch('updated'))
64
+ rescue KeyError, ArgumentError => e
65
+ raise ParseError, e
66
+ end
67
+
68
+ # Extract the link.
69
+ #
70
+ # @raise [ParseError] If not found.
71
+ #
72
+ # @return [String]
73
+ def link
74
+ Attribute.link_value(data.fetch('link'))
75
+ rescue KeyError => e
76
+ raise ParseError, e
77
+ end
78
+
79
+ # Extract the content.
80
+ #
81
+ # @return [String]
82
+ def content
83
+ summary = Attribute.value(data['summary'] || '')
84
+ content = Attribute.value(data['content'] || '')
85
+ return content unless content.empty?
86
+ summary
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Yarss
4
+ # A bunch of helpers to extract a +String+ value out of a +Hash+, +Array+,
5
+ # etc.
6
+ module Attribute
7
+ # Extract a +String+ value from a given attribute.
8
+ #
9
+ # @raise [ParseError] If type of value is not known.
10
+ #
11
+ # @example
12
+ # Yarss::Attribute.value('Foo') # => 'Foo'
13
+ # Yarss::Attribute.value('__content__' => 'Foo') # => 'Foo'
14
+ #
15
+ # @param value [String, Hash] An attribute.
16
+ #
17
+ # @return [String]
18
+ def self.value(value)
19
+ value ||= ''
20
+
21
+ case value
22
+ when Hash
23
+ value(value.fetch('__content__'))
24
+ when String
25
+ value.strip
26
+ else
27
+ raise ParseError, "Unknown #{value.class} attribute: #{value.inspect}"
28
+ end
29
+ rescue KeyError => e
30
+ raise ParseError, e
31
+ end
32
+
33
+ # Extract a +String+ value from a given link attribute.
34
+ #
35
+ # @raise [ParseError] If type of value is not known or no link is found.
36
+ #
37
+ # @example
38
+ # Yarss::Attribute.link_value('Foo') # => 'Foo'
39
+ # Yarss::Attribute.link_value('href' => 'Foo') # => 'Foo'
40
+ # Yarss::Attribute.link_value([{ 'rel' => 'self', 'href' => 'Foo' }])
41
+ # # => 'Foo'
42
+ # Yarss::Attribute.link_value([{ 'rel' => 'alternate', 'href' => 'Foo' }])
43
+ # # => 'Foo'
44
+ #
45
+ # @param value [String, Hash, Array] A link attribute.
46
+ #
47
+ # @return [String]
48
+ def self.link_value(value)
49
+ value ||= ''
50
+
51
+ case value
52
+ when Hash
53
+ link_value(value.fetch('href'))
54
+ when Array
55
+ item = value.find { |l| l.is_a?(String) } ||
56
+ value.find { |l| l['rel'] && l['rel'] == 'self' } ||
57
+ value.find { |l| l['rel'] && l['rel'] == 'alternate' }
58
+ raise KeyError unless item
59
+ link_value(item)
60
+ when String
61
+ value.strip
62
+ else
63
+ raise ParseError, "Unknown #{value.class} attribute: #{value.inspect}"
64
+ end
65
+ rescue KeyError => e
66
+ raise ParseError, e
67
+ end
68
+ end
69
+ end
data/lib/yarss/feed.rb ADDED
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Yarss
4
+ # Feed data holder.
5
+ class Feed
6
+ # Title.
7
+ #
8
+ # @return [String]
9
+ attr_accessor :title
10
+
11
+ # URL to the related Web page.
12
+ #
13
+ # @return [String]
14
+ attr_accessor :link
15
+
16
+ # Description.
17
+ #
18
+ # @return [String]
19
+ attr_accessor :description
20
+
21
+ # Feed items.
22
+ #
23
+ # @return [Array<Item>]
24
+ attr_accessor :items
25
+
26
+ # @param attributes [Hash, nil] Data to set.
27
+ def initialize(attributes = nil)
28
+ attributes.each do |attribute, value|
29
+ setter = "#{attribute}="
30
+ send(setter, value)
31
+ end if attributes
32
+ end
33
+
34
+ # Treat this class as a value object.
35
+ #
36
+ # @param other [Feed]
37
+ #
38
+ # @return [Bool]
39
+ def ==(other)
40
+ return false unless other.is_a?(self.class)
41
+
42
+ title == other.title &&
43
+ link == other.link &&
44
+ description == other.description &&
45
+ items == other.items
46
+ end
47
+ end
48
+ end
data/lib/yarss/item.rb ADDED
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Yarss
4
+ # Feed item data holder.
5
+ class Item
6
+ # ID.
7
+ #
8
+ # @return [String]
9
+ attr_accessor :id
10
+
11
+ # Title.
12
+ #
13
+ # @return [String]
14
+ attr_accessor :title
15
+
16
+ # Date and time of the last modification.
17
+ #
18
+ # @return [DateTime]
19
+ attr_accessor :updated_at
20
+
21
+ # URL to the related Web page.
22
+ #
23
+ # @return [String]
24
+ attr_accessor :link
25
+
26
+ # Content.
27
+ #
28
+ # @return [String]
29
+ attr_accessor :content
30
+
31
+ # @param attributes [Hash, nil] Data to set.
32
+ def initialize(attributes = nil)
33
+ attributes.each do |attribute, value|
34
+ setter = "#{attribute}="
35
+ send(setter, value)
36
+ end if attributes
37
+ end
38
+
39
+ # Treat this class as a value object.
40
+ #
41
+ # @param other [Item]
42
+ #
43
+ # @return [Bool]
44
+ def ==(other) # rubocop:disable Metrics/AbcSize
45
+ return false unless other.is_a?(self.class)
46
+
47
+ id == other.id &&
48
+ title == other.title &&
49
+ updated_at == other.updated_at &&
50
+ link == other.link &&
51
+ content == other.content
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,110 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'item_parser'
4
+
5
+ module Yarss
6
+ # Contains feed and item parsers.
7
+ module Rdf
8
+ # Extract title, link, description and items from a parsed Rdf feed.
9
+ #
10
+ # @see http://atomenabled.org/developers/syndication/#requiredFeedElements
11
+ class FeedParser
12
+ # Parsed Rdf feed.
13
+ #
14
+ # @return [Hash]
15
+ attr_accessor :data
16
+
17
+ # @param data [Hash] Parsed Rdf feed.
18
+ def initialize(data)
19
+ self.data = data
20
+ end
21
+
22
+ # Parse out the feed title, link, description and items and wrap them
23
+ # in a data object.
24
+ #
25
+ # @raise [ParseError] If a required field is not found.
26
+ #
27
+ # @return [Feed]
28
+ def parse
29
+ Feed.new(
30
+ title: title,
31
+ link: link,
32
+ description: description,
33
+ items: items
34
+ )
35
+ end
36
+
37
+ # Extract the feed data.
38
+ #
39
+ # @raise [ParseError] If not found.
40
+ #
41
+ # @return [Hash]
42
+ def feed
43
+ @feed ||= begin
44
+ if data['RDF']
45
+ data['RDF']
46
+ else
47
+ data.fetch('rdf:RDF')
48
+ end
49
+ end
50
+ rescue KeyError => e
51
+ raise ParseError, e
52
+ end
53
+
54
+ # Extract the channel data.
55
+ #
56
+ # @raise [ParseError] If not found.
57
+ #
58
+ # @return [Hash]
59
+ def channel
60
+ @channel ||= feed.fetch('channel')
61
+ rescue KeyError => e
62
+ raise ParseError, e
63
+ end
64
+
65
+ # Extract the title.
66
+ #
67
+ # @raise [ParseError] If not found.
68
+ #
69
+ # @return [String]
70
+ def title
71
+ Attribute.value(channel.fetch('title'))
72
+ rescue KeyError => e
73
+ raise ParseError, e
74
+ end
75
+
76
+ # Extract the link.
77
+ #
78
+ # @raise [ParseError] If not found.
79
+ #
80
+ # @return [String]
81
+ def link
82
+ Attribute.link_value(channel.fetch('link'))
83
+ rescue KeyError => e
84
+ raise ParseError, e
85
+ end
86
+
87
+ # Extract the description.
88
+ #
89
+ # @return [String]
90
+ def description
91
+ Attribute.value(channel.fetch('description'))
92
+ rescue KeyError => e
93
+ raise ParseError, e
94
+ end
95
+
96
+ # Extract and parse the items.
97
+ #
98
+ # @raise [ParseError] If not found.
99
+ #
100
+ # @return [Array<Item>]
101
+ def items
102
+ items = feed.fetch('item')
103
+ items = [items] unless items.is_a?(Array)
104
+ items.map { |d| ItemParser.new(d).parse }
105
+ rescue KeyError => e
106
+ raise ParseError, e
107
+ end
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Yarss
4
+ module Rdf
5
+ # Extract id, title, updated, link and content from a feed item.
6
+ class ItemParser
7
+ # Parsed Rdf feed item.
8
+ #
9
+ # @return [Hash]
10
+ attr_accessor :data
11
+
12
+ # @param data [Hash] Parsed Rdf feed item.
13
+ def initialize(data)
14
+ self.data = data
15
+ end
16
+
17
+ # Parse out the feed item id, title, updated, link and content and wrap
18
+ # them in a data object.
19
+ #
20
+ # @raise [ParseError] If a required field is not found.
21
+ #
22
+ # @return [Item]
23
+ def parse
24
+ Item.new(
25
+ id: id,
26
+ title: title,
27
+ updated_at: updated,
28
+ link: link,
29
+ content: description
30
+ )
31
+ end
32
+
33
+ # Extract the ID.
34
+ #
35
+ # @raise [ParseError] If not found.
36
+ #
37
+ # @return [String]
38
+ def id
39
+ if data['about']
40
+ data['about']
41
+ else
42
+ data.fetch('rdf:about')
43
+ end
44
+ rescue KeyError => e
45
+ raise ParseError, e
46
+ end
47
+
48
+ # Extract the title.
49
+ #
50
+ # @raise [ParseError] If not found.
51
+ #
52
+ # @return [String]
53
+ def title
54
+ Attribute.value(data.fetch('title'))
55
+ rescue KeyError => e
56
+ raise ParseError, e
57
+ end
58
+
59
+ # Extract the updated date.
60
+ #
61
+ # @raise [ParseError] If not found.
62
+ #
63
+ # @return [DateTime]
64
+ def updated
65
+ date = if data['date']
66
+ data['date']
67
+ else
68
+ data.fetch('dc:date')
69
+ end
70
+ DateTime.parse(date)
71
+ rescue KeyError, ArgumentError => e
72
+ raise ParseError, e
73
+ end
74
+
75
+ # Extract the link.
76
+ #
77
+ # @raise [ParseError] If not found.
78
+ #
79
+ # @return [String]
80
+ def link
81
+ Attribute.link_value(data.fetch('link'))
82
+ rescue KeyError => e
83
+ raise ParseError, e
84
+ end
85
+
86
+ # Extract the description.
87
+ #
88
+ # @return [String]
89
+ def description
90
+ Attribute.value(data.fetch('description'))
91
+ rescue KeyError => e
92
+ raise ParseError, e
93
+ end
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,93 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'item_parser'
4
+
5
+ module Yarss
6
+ # Contains feed and item parsers.
7
+ module Rss
8
+ # Extract title, link, description and items from a parsed RSS feed.
9
+ #
10
+ # @see https://validator.w3.org/feed/docs/rss2.html#requiredChannelElements
11
+ class FeedParser
12
+ # Parsed RSS feed.
13
+ #
14
+ # @return [Hash]
15
+ attr_accessor :data
16
+
17
+ # @param data [Hash] Parsed RSS feed.
18
+ def initialize(data)
19
+ self.data = data
20
+ end
21
+
22
+ # Parse out the feed title, link, description and items and wrap them
23
+ # in a data object.
24
+ #
25
+ # @raise [ParseError] If a required field is not found.
26
+ #
27
+ # @return [Feed]
28
+ def parse
29
+ Feed.new(
30
+ title: title,
31
+ link: link,
32
+ description: description,
33
+ items: items
34
+ )
35
+ end
36
+
37
+ # Extract the channel data.
38
+ #
39
+ # @raise [ParseError] If not found.
40
+ #
41
+ # @return [Hash]
42
+ def feed
43
+ @feed ||= data.fetch('rss').fetch('channel')
44
+ rescue KeyError => e
45
+ raise ParseError, e
46
+ end
47
+
48
+ # Extract the title.
49
+ #
50
+ # @raise [ParseError] If not found.
51
+ #
52
+ # @return [String]
53
+ def title
54
+ Attribute.value(feed.fetch('title'))
55
+ rescue KeyError => e
56
+ raise ParseError, e
57
+ end
58
+
59
+ # Extract the link.
60
+ #
61
+ # @raise [ParseError] If not found.
62
+ #
63
+ # @return [String]
64
+ def link
65
+ Attribute.link_value(feed.fetch('link'))
66
+ rescue KeyError => e
67
+ raise ParseError, e
68
+ end
69
+
70
+ # Extract the description.
71
+ #
72
+ # @return [String]
73
+ def description
74
+ Attribute.value(feed.fetch('description'))
75
+ rescue KeyError => e
76
+ raise ParseError, e
77
+ end
78
+
79
+ # Extract and parse the items.
80
+ #
81
+ # @raise [ParseError] If not found.
82
+ #
83
+ # @return [Array<Item>]
84
+ def items
85
+ items = feed.fetch('item')
86
+ items = [items] unless items.is_a?(Array)
87
+ items.map { |d| ItemParser.new(d).parse }
88
+ rescue KeyError => e
89
+ raise ParseError, e
90
+ end
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Yarss
4
+ module Rss
5
+ # Extract id, title, updated, link and content from a feed item.
6
+ #
7
+ # https://validator.w3.org/feed/docs/rss2.html#hrelementsOfLtitemgt
8
+ class ItemParser
9
+ # Parsed RSS feed item.
10
+ #
11
+ # @return [Hash]
12
+ attr_accessor :data
13
+
14
+ # @param data [Hash] Parsed RSS feed item.
15
+ def initialize(data)
16
+ self.data = data
17
+ end
18
+
19
+ # Parse out the feed item id, title, updated, link and content and wrap
20
+ # them in a data object.
21
+ #
22
+ # @raise [ParseError] If a required field is not found.
23
+ #
24
+ # @return [Item]
25
+ def parse
26
+ Item.new(
27
+ id: guid,
28
+ title: title,
29
+ updated_at: pub_date,
30
+ link: link,
31
+ content: description
32
+ )
33
+ end
34
+
35
+ # Extract the ID. Use the title if guid is not present and title is.
36
+ #
37
+ # @raise [ParseError] If not found.
38
+ #
39
+ # @return [String]
40
+ def guid
41
+ Attribute.value(data.fetch('guid'))
42
+ rescue KeyError => e
43
+ return Digest::MD5.hexdigest(data['title']) if data['title']
44
+ raise ParseError, e
45
+ end
46
+
47
+ # Extract the title. Use guid if title is not present and guid is.
48
+ #
49
+ # @raise [ParseError] If not found.
50
+ #
51
+ # @return [String]
52
+ def title
53
+ Attribute.value(data.fetch('title'))
54
+ rescue KeyError => e
55
+ return Attribute.value(data['guid']) if data['guid']
56
+ raise ParseError, e
57
+ end
58
+
59
+ # Extract the updated date.
60
+ #
61
+ # @return [DateTime]
62
+ def pub_date
63
+ DateTime.parse(data.fetch('pubDate'))
64
+ rescue ArgumentError => e
65
+ raise ParseError, e
66
+ rescue KeyError
67
+ DateTime.now
68
+ end
69
+
70
+ # Extract the link.
71
+ #
72
+ # @raise [ParseError] If not found.
73
+ #
74
+ # @return [String]
75
+ def link
76
+ Attribute.link_value(data.fetch('link'))
77
+ rescue KeyError => e
78
+ raise ParseError, e
79
+ end
80
+
81
+ # Extract the content.
82
+ #
83
+ # @return [String]
84
+ def description
85
+ description = Attribute.value(data['description'] || '')
86
+
87
+ return Attribute.value(data['content:encoded']) if
88
+ description.empty? && data['content:encoded']
89
+
90
+ description
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Yarss
4
+ # Version number, happy now?
5
+ VERSION = '0.0.1'
6
+ end
data/lib/yarss.rb ADDED
@@ -0,0 +1,127 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'yarss/version'
4
+ require 'yarss/feed'
5
+ require 'yarss/item'
6
+ require 'yarss/attribute'
7
+ require 'yarss/rss/feed_parser'
8
+ require 'yarss/atom/feed_parser'
9
+ require 'yarss/rdf/feed_parser'
10
+
11
+ # RSS, RDF and Atom feeds parser.
12
+ module Yarss
13
+ # Generic Yarss error.
14
+ class Error < StandardError; end
15
+
16
+ # Parsing of the XML failed or a required field is not present.
17
+ class ParseError < Error; end
18
+
19
+ # Not a RSS, RDF or Atom feed.
20
+ class UnknownParserError < Error; end
21
+
22
+ # Parse a {Feed} out of a path to a XML or an IO (or whatever responds
23
+ # to +read+).
24
+ #
25
+ # @raise [UnknownParserError] If no corresponding parser was found.
26
+ # @raise [ParseError] If XML parsing or field extracting failed.
27
+ #
28
+ # @param path_or_io [String, #read] A path to the XML file or an IO, a
29
+ # Pathname, something that can be read.
30
+ #
31
+ # @example
32
+ # feed = Yarss.new('path/to/feed.rss')
33
+ #
34
+ # puts "#{feed.title}, #{feed.link}, #{feed.description}"
35
+ #
36
+ # feed.items.each do |item|
37
+ # puts "#{item.id}, #{item.title}, #{item.updated_at}, #{item.link}"
38
+ # puts item.content
39
+ # end
40
+ #
41
+ # @return [Feed]
42
+ def self.new(path_or_io)
43
+ if path_or_io.respond_to?(:read)
44
+ from_io(path_or_io)
45
+ else
46
+ from_file(path_or_io)
47
+ end
48
+ end
49
+
50
+ # Parse a {Feed} out of an IO (or whatever responds to +read+).
51
+ #
52
+ # @raise [UnknownParserError] If no corresponding parser was found.
53
+ # @raise [ParseError] If XML parsing or field extracting failed.
54
+ #
55
+ # @param io [#read] An IO, a Pathname, something that can be read.
56
+ #
57
+ # @example
58
+ # feed = Yarss.from_io(Pathname.new('path/to/feed.rss'))
59
+ # feed = Yarss.from_io(File.open('path/to/feed.rss', 'rb'))
60
+ #
61
+ # puts "#{feed.title}, #{feed.link}, #{feed.description}"
62
+ #
63
+ # feed.items.each do |item|
64
+ # puts "#{item.id}, #{item.title}, #{item.updated_at}, #{item.link}"
65
+ # puts item.content
66
+ # end
67
+ #
68
+ # @return [Feed]
69
+ def self.from_io(io)
70
+ data = io.read
71
+ from_string(data, io)
72
+ end
73
+
74
+ # Parse a {Feed} out of a path to a XML.
75
+ #
76
+ # @raise [UnknownParserError] If no corresponding parser was found.
77
+ # @raise [ParseError] If XML parsing or field extracting failed.
78
+ #
79
+ # @param path [String] Path to a XML.
80
+ #
81
+ # @example
82
+ # feed = Yarss.from_file('path/to/feed.rss')
83
+ #
84
+ # puts "#{feed.title}, #{feed.link}, #{feed.description}"
85
+ #
86
+ # feed.items.each do |item|
87
+ # puts "#{item.id}, #{item.title}, #{item.updated_at}, #{item.link}"
88
+ # puts item.content
89
+ # end
90
+ #
91
+ # @return [Feed]
92
+ def self.from_file(path)
93
+ data = File.read(path)
94
+ from_string(data, path)
95
+ end
96
+
97
+ # Parse a {Feed} out of raw XML.
98
+ #
99
+ # @raise [UnknownParserError] If no corresponding parser was found.
100
+ # @raise [ParseError] If XML parsing or field extracting failed.
101
+ #
102
+ # @param data [String] Raw RSS, RDF or Atom XML data.
103
+ # @param path_or_io [String, #read] Path to a file or an IO.
104
+ #
105
+ # @example
106
+ # feed = Yarss.from_string('...)
107
+ #
108
+ # puts "#{feed.title}, #{feed.link}, #{feed.description}"
109
+ #
110
+ # feed.items.each do |item|
111
+ # puts "#{item.id}, #{item.title}, #{item.updated_at}, #{item.link}"
112
+ # puts item.content
113
+ # end
114
+ #
115
+ # @return [Feed]
116
+ def self.from_string(data, path_or_io)
117
+ data = MultiXml.parse(data)
118
+
119
+ return Rss::FeedParser.new(data).parse if data['rss']
120
+ return Atom::FeedParser.new(data).parse if data['feed']
121
+ return Rdf::FeedParser.new(data).parse if data['rdf:RDF'] || data['RDF']
122
+
123
+ raise UnknownParserError, "Cannot find parser for #{path_or_io}"
124
+ rescue MultiXml::ParseError => e
125
+ raise ParseError, e
126
+ end
127
+ end
data/yarss.gemspec ADDED
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+ # coding: utf-8
3
+ lib = File.expand_path('../lib', __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require 'yarss/version'
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = 'yarss'
9
+ spec.version = Yarss::VERSION
10
+ spec.authors = ['Oldrich Vetesnik']
11
+ spec.email = ['oldrich.vetesnik@gmail.com']
12
+
13
+ spec.summary = 'Yet Another RSS Feed Normalizer.'
14
+ spec.description = 'Parse and access RSS/RDF/Atom feeds with ' \
15
+ 'a uniform interface.'
16
+ spec.homepage = 'https://github.com/ollie/yarss'
17
+ spec.license = 'MIT'
18
+
19
+ # rubocop:disable Metrics/LineLength
20
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
21
+ spec.bindir = 'exe'
22
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
23
+ spec.require_paths = ['lib']
24
+
25
+ # System
26
+ spec.add_development_dependency 'bundler', '~> 1.11'
27
+ # spec.add_development_dependency 'ox', '~> 2.2'
28
+ # spec.add_development_dependency 'nokogiri', '~> 1.6'
29
+ # spec.add_development_dependency 'oga', '~> 2.0'
30
+
31
+ # Test
32
+ spec.add_development_dependency 'rspec', '~> 3.4'
33
+ spec.add_development_dependency 'simplecov', '~> 0.11'
34
+
35
+ # Code style, debugging, docs
36
+ spec.add_development_dependency 'rubocop', '~> 0.37'
37
+ spec.add_development_dependency 'pry', '~> 0.10'
38
+ # spec.add_development_dependency 'pry-doc', '~> 0.8'
39
+ # spec.add_development_dependency 'pry-byebug', '~> 3.3'
40
+ spec.add_development_dependency 'yard', '~> 0.8'
41
+ spec.add_development_dependency 'rake', '~> 10.5'
42
+
43
+ # Provides swappable XML backends utilizing LibXML, Nokogiri, Ox, Oga or REXML.
44
+ spec.add_runtime_dependency 'multi_xml', '~> 0.5'
45
+ end
metadata ADDED
@@ -0,0 +1,180 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: yarss
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Oldrich Vetesnik
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2016-02-04 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.11'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.11'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '3.4'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.4'
41
+ - !ruby/object:Gem::Dependency
42
+ name: simplecov
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.11'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0.11'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rubocop
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.37'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.37'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '0.10'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.10'
83
+ - !ruby/object:Gem::Dependency
84
+ name: yard
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '0.8'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '0.8'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rake
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '10.5'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '10.5'
111
+ - !ruby/object:Gem::Dependency
112
+ name: multi_xml
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '0.5'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '0.5'
125
+ description: Parse and access RSS/RDF/Atom feeds with a uniform interface.
126
+ email:
127
+ - oldrich.vetesnik@gmail.com
128
+ executables: []
129
+ extensions: []
130
+ extra_rdoc_files: []
131
+ files:
132
+ - ".gitignore"
133
+ - ".rspec"
134
+ - ".rubocop.yml"
135
+ - ".travis.yml"
136
+ - ".yardopts"
137
+ - Gemfile
138
+ - LICENSE.txt
139
+ - README.md
140
+ - Rakefile
141
+ - bin/console
142
+ - bin/setup
143
+ - lib/yarss.rb
144
+ - lib/yarss/atom/feed_parser.rb
145
+ - lib/yarss/atom/item_parser.rb
146
+ - lib/yarss/attribute.rb
147
+ - lib/yarss/feed.rb
148
+ - lib/yarss/item.rb
149
+ - lib/yarss/rdf/feed_parser.rb
150
+ - lib/yarss/rdf/item_parser.rb
151
+ - lib/yarss/rss/feed_parser.rb
152
+ - lib/yarss/rss/item_parser.rb
153
+ - lib/yarss/version.rb
154
+ - yarss.gemspec
155
+ homepage: https://github.com/ollie/yarss
156
+ licenses:
157
+ - MIT
158
+ metadata: {}
159
+ post_install_message:
160
+ rdoc_options: []
161
+ require_paths:
162
+ - lib
163
+ required_ruby_version: !ruby/object:Gem::Requirement
164
+ requirements:
165
+ - - ">="
166
+ - !ruby/object:Gem::Version
167
+ version: '0'
168
+ required_rubygems_version: !ruby/object:Gem::Requirement
169
+ requirements:
170
+ - - ">="
171
+ - !ruby/object:Gem::Version
172
+ version: '0'
173
+ requirements: []
174
+ rubyforge_project:
175
+ rubygems_version: 2.5.2
176
+ signing_key:
177
+ specification_version: 4
178
+ summary: Yet Another RSS Feed Normalizer.
179
+ test_files: []
180
+ has_rdoc: