yarss 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: fb5a8eb19140ed00473aa01207c73ed9f9d7eb46
4
+ data.tar.gz: ec2f4a8d882acf25c80c6b9a1b90b97b08ee65ac
5
+ SHA512:
6
+ metadata.gz: 894f9cda3adfb3e109350028d1ba7b825c3871bb94ec265a13581728cbe54bd6f8d548bcec1f8b3f420fc99fb1624590fd0333eceac9021f60deb09702a91d23
7
+ data.tar.gz: 2f679583c97b1c728c2da37d36f70f98ef243b9d63c6f4fb23373e2dc02cd61c8cdafdc20f2d454640aa963f91772bef80f5d5c5704fbdcaabb9f8ef84c66fd1
data/.gitignore ADDED
@@ -0,0 +1,13 @@
1
+ *.gem
2
+ /.bundle/
3
+ /.yardoc
4
+ /data/
5
+ /dump/
6
+ /examples/
7
+ /Gemfile.lock
8
+ /_yardoc/
9
+ /coverage/
10
+ /doc/
11
+ /pkg/
12
+ /spec/reports/
13
+ /tmp/
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper.rb
data/.rubocop.yml ADDED
@@ -0,0 +1,8 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.3
3
+ Exclude:
4
+ - 'bin/console'
5
+ - 'examples/rss.rb'
6
+
7
+ Style/FrozenStringLiteralComment:
8
+ EnforcedStyle: always
data/.travis.yml ADDED
@@ -0,0 +1,18 @@
1
+ language: ruby
2
+
3
+ rvm:
4
+ - 2.3.0
5
+ - jruby-19mode
6
+
7
+ branches:
8
+ only:
9
+ - master
10
+
11
+ notifications:
12
+ email:
13
+ on_success: change
14
+ on_failure: always
15
+
16
+ before_install: "gem install bundler -v 1.11.2"
17
+ install: "bundle --jobs 4"
18
+ script: "bundle exec rspec"
data/.yardopts ADDED
@@ -0,0 +1,7 @@
1
+ --protected
2
+ --private
3
+ --embed-mixins
4
+ lib/**/*.rb
5
+ -
6
+ README.md
7
+ LICENSE.txt
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ # Specify your gem's dependencies in yarss.gemspec
6
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 Oldrich Vetesnik
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,60 @@
1
+ # Yarss – Yet Another RSS Feed Normalizer. [![Build Status](https://travis-ci.org/ollie/yarss.svg?branch=master)](https://travis-ci.org/ollie/yarss) [![Gem Version](https://img.shields.io/gem/v/yarss.svg)](https://rubygems.org/gems/yarss)
2
+
3
+ Parse and access RSS/RDF/Atom feeds with a uniform interface. Yarss uses
4
+ [MutliXml](https://rubygems.org/gems/multi_xml) behind the scenes so you may
5
+ want to drop in your favourite XML parser.
6
+
7
+ For MRI users [Ox](https://rubygems.org/gems/ox) is highly recommended as it is
8
+ the fastest XML parser I know of. JRuby users should probably use
9
+ [Nokogiri](https://rubygems.org/gems/nokogiri).
10
+
11
+ ## Usage
12
+
13
+ ```ruby
14
+ ['path/to/feed.rss', 'path/to/feed.atom', 'path/to/feed.rdf'].each do |file_path|
15
+ feed = Yarss.new(file_path)
16
+
17
+ puts "#{feed.title}, #{feed.link}, #{feed.description}"
18
+
19
+ feed.items.each do |item|
20
+ puts "#{item.id}, #{item.title}, #{item.updated_at}, #{item.link}"
21
+ puts item.content
22
+ end
23
+ end
24
+ ```
25
+
26
+ ## Installation
27
+
28
+ Add this line to your application's Gemfile:
29
+
30
+ ```ruby
31
+ gem 'yarss'
32
+
33
+ # To increase performance, add one of these gems:
34
+ # gem 'ox' # MRI compatible.
35
+ # gem 'nokogiri' # MRI and JRuby compatible.
36
+ # gem 'oga' # MRI, JRuby, Rubinius compatible.
37
+ ```
38
+
39
+ And then execute:
40
+
41
+ $ bundle
42
+
43
+ Or install it yourself as:
44
+
45
+ $ gem install yarss
46
+
47
+ ## Development
48
+
49
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
50
+
51
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
52
+
53
+ ## Contributing
54
+
55
+ Bug reports and pull requests are welcome on GitHub at https://github.com/ollie/yarss.
56
+
57
+ ## License
58
+
59
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
60
+
data/Rakefile ADDED
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+
5
+ task default: :combo
6
+
7
+ desc 'Run tests, rubocop and generate documentation'
8
+ task :combo do
9
+ sh 'bundle exec rspec'
10
+ sh('bundle exec rubocop') {} # ignore status > 0
11
+ sh 'bundle exec yardoc'
12
+ end
13
+
14
+ desc 'Same as :combo but build a gem, too'
15
+ task mega_combo: :combo do
16
+ sh 'gem build yarss.gemspec'
17
+ end
data/bin/console ADDED
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'multi_xml'
5
+ require 'yarss'
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ require 'pry'
11
+ Pry.start
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,87 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'item_parser'
4
+
5
+ module Yarss
6
+ # Contains feed and item parsers.
7
+ module Atom
8
+ # Extract title, link, description and items from a parsed Atom feed.
9
+ #
10
+ # @see http://atomenabled.org/developers/syndication/#requiredFeedElements
11
+ class FeedParser
12
+ # Parsed Atom feed.
13
+ #
14
+ # @return [Hash]
15
+ attr_accessor :data
16
+
17
+ # @param data [Hash] Parsed Atom feed.
18
+ def initialize(data)
19
+ self.data = data
20
+ end
21
+
22
+ # Parse out the feed title, link, description and items and wrap them
23
+ # in a data object.
24
+ #
25
+ # @raise [ParseError] If a required field is not found.
26
+ #
27
+ # @return [Feed]
28
+ def parse
29
+ Feed.new(
30
+ title: title,
31
+ link: link,
32
+ description: description,
33
+ items: items
34
+ )
35
+ end
36
+
37
+ # Extract the feed data.
38
+ #
39
+ # @raise [ParseError] If not found.
40
+ #
41
+ # @return [Hash]
42
+ def feed
43
+ @feed ||= data.fetch('feed')
44
+ rescue KeyError => e
45
+ raise ParseError, e
46
+ end
47
+
48
+ # Extract the title.
49
+ #
50
+ # @raise [ParseError] If not found.
51
+ #
52
+ # @return [String]
53
+ def title
54
+ Attribute.value(feed.fetch('title'))
55
+ rescue KeyError => e
56
+ raise ParseError, e
57
+ end
58
+
59
+ # Extract the link.
60
+ #
61
+ # @return [String]
62
+ def link
63
+ Attribute.link_value(feed['link'] || '')
64
+ end
65
+
66
+ # Extract the description.
67
+ #
68
+ # @return [String]
69
+ def description
70
+ Attribute.value(feed['subtitle'] || '')
71
+ end
72
+
73
+ # Extract and parse the items.
74
+ #
75
+ # @raise [ParseError] If not found.
76
+ #
77
+ # @return [Array<Item>]
78
+ def items
79
+ items = feed.fetch('entry')
80
+ items = [items] unless items.is_a?(Array)
81
+ items.map { |d| ItemParser.new(d).parse }
82
+ rescue KeyError => e
83
+ raise ParseError, e
84
+ end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Yarss
4
+ module Atom
5
+ # Extract id, title, updated, link and content from a feed item.
6
+ #
7
+ # http://atomenabled.org/developers/syndication/#requiredEntryElements
8
+ class ItemParser
9
+ # Parsed Atom feed item.
10
+ #
11
+ # @return [Hash]
12
+ attr_accessor :data
13
+
14
+ # @param data [Hash] Parsed Atom feed item.
15
+ def initialize(data)
16
+ self.data = data
17
+ end
18
+
19
+ # Parse out the feed item id, title, updated, link and content and wrap
20
+ # them in a data object.
21
+ #
22
+ # @raise [ParseError] If a required field is not found.
23
+ #
24
+ # @return [Item]
25
+ def parse
26
+ Item.new(
27
+ id: id,
28
+ title: title,
29
+ updated_at: updated,
30
+ link: link,
31
+ content: content
32
+ )
33
+ end
34
+
35
+ # Extract the ID.
36
+ #
37
+ # @raise [ParseError] If not found.
38
+ #
39
+ # @return [String]
40
+ def id
41
+ data.fetch('id')
42
+ rescue KeyError => e
43
+ raise ParseError, e
44
+ end
45
+
46
+ # Extract the title.
47
+ #
48
+ # @raise [ParseError] If not found.
49
+ #
50
+ # @return [String]
51
+ def title
52
+ Attribute.value(data.fetch('title'))
53
+ rescue KeyError => e
54
+ raise ParseError, e
55
+ end
56
+
57
+ # Extract the updated date.
58
+ #
59
+ # @raise [ParseError] If not found.
60
+ #
61
+ # @return [DateTime]
62
+ def updated
63
+ DateTime.parse(data.fetch('updated'))
64
+ rescue KeyError, ArgumentError => e
65
+ raise ParseError, e
66
+ end
67
+
68
+ # Extract the link.
69
+ #
70
+ # @raise [ParseError] If not found.
71
+ #
72
+ # @return [String]
73
+ def link
74
+ Attribute.link_value(data.fetch('link'))
75
+ rescue KeyError => e
76
+ raise ParseError, e
77
+ end
78
+
79
+ # Extract the content.
80
+ #
81
+ # @return [String]
82
+ def content
83
+ summary = Attribute.value(data['summary'] || '')
84
+ content = Attribute.value(data['content'] || '')
85
+ return content unless content.empty?
86
+ summary
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Yarss
4
+ # A bunch of helpers to extract a +String+ value out of a +Hash+, +Array+,
5
+ # etc.
6
+ module Attribute
7
+ # Extract a +String+ value from a given attribute.
8
+ #
9
+ # @raise [ParseError] If type of value is not known.
10
+ #
11
+ # @example
12
+ # Yarss::Attribute.value('Foo') # => 'Foo'
13
+ # Yarss::Attribute.value('__content__' => 'Foo') # => 'Foo'
14
+ #
15
+ # @param value [String, Hash] An attribute.
16
+ #
17
+ # @return [String]
18
+ def self.value(value)
19
+ value ||= ''
20
+
21
+ case value
22
+ when Hash
23
+ value(value.fetch('__content__'))
24
+ when String
25
+ value.strip
26
+ else
27
+ raise ParseError, "Unknown #{value.class} attribute: #{value.inspect}"
28
+ end
29
+ rescue KeyError => e
30
+ raise ParseError, e
31
+ end
32
+
33
+ # Extract a +String+ value from a given link attribute.
34
+ #
35
+ # @raise [ParseError] If type of value is not known or no link is found.
36
+ #
37
+ # @example
38
+ # Yarss::Attribute.link_value('Foo') # => 'Foo'
39
+ # Yarss::Attribute.link_value('href' => 'Foo') # => 'Foo'
40
+ # Yarss::Attribute.link_value([{ 'rel' => 'self', 'href' => 'Foo' }])
41
+ # # => 'Foo'
42
+ # Yarss::Attribute.link_value([{ 'rel' => 'alternate', 'href' => 'Foo' }])
43
+ # # => 'Foo'
44
+ #
45
+ # @param value [String, Hash, Array] A link attribute.
46
+ #
47
+ # @return [String]
48
+ def self.link_value(value)
49
+ value ||= ''
50
+
51
+ case value
52
+ when Hash
53
+ link_value(value.fetch('href'))
54
+ when Array
55
+ item = value.find { |l| l.is_a?(String) } ||
56
+ value.find { |l| l['rel'] && l['rel'] == 'self' } ||
57
+ value.find { |l| l['rel'] && l['rel'] == 'alternate' }
58
+ raise KeyError unless item
59
+ link_value(item)
60
+ when String
61
+ value.strip
62
+ else
63
+ raise ParseError, "Unknown #{value.class} attribute: #{value.inspect}"
64
+ end
65
+ rescue KeyError => e
66
+ raise ParseError, e
67
+ end
68
+ end
69
+ end
data/lib/yarss/feed.rb ADDED
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Yarss
4
+ # Feed data holder.
5
+ class Feed
6
+ # Title.
7
+ #
8
+ # @return [String]
9
+ attr_accessor :title
10
+
11
+ # URL to the related Web page.
12
+ #
13
+ # @return [String]
14
+ attr_accessor :link
15
+
16
+ # Description.
17
+ #
18
+ # @return [String]
19
+ attr_accessor :description
20
+
21
+ # Feed items.
22
+ #
23
+ # @return [Array<Item>]
24
+ attr_accessor :items
25
+
26
+ # @param attributes [Hash, nil] Data to set.
27
+ def initialize(attributes = nil)
28
+ attributes.each do |attribute, value|
29
+ setter = "#{attribute}="
30
+ send(setter, value)
31
+ end if attributes
32
+ end
33
+
34
+ # Treat this class as a value object.
35
+ #
36
+ # @param other [Feed]
37
+ #
38
+ # @return [Bool]
39
+ def ==(other)
40
+ return false unless other.is_a?(self.class)
41
+
42
+ title == other.title &&
43
+ link == other.link &&
44
+ description == other.description &&
45
+ items == other.items
46
+ end
47
+ end
48
+ end
data/lib/yarss/item.rb ADDED
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Yarss
4
+ # Feed item data holder.
5
+ class Item
6
+ # ID.
7
+ #
8
+ # @return [String]
9
+ attr_accessor :id
10
+
11
+ # Title.
12
+ #
13
+ # @return [String]
14
+ attr_accessor :title
15
+
16
+ # Date and time of the last modification.
17
+ #
18
+ # @return [DateTime]
19
+ attr_accessor :updated_at
20
+
21
+ # URL to the related Web page.
22
+ #
23
+ # @return [String]
24
+ attr_accessor :link
25
+
26
+ # Content.
27
+ #
28
+ # @return [String]
29
+ attr_accessor :content
30
+
31
+ # @param attributes [Hash, nil] Data to set.
32
+ def initialize(attributes = nil)
33
+ attributes.each do |attribute, value|
34
+ setter = "#{attribute}="
35
+ send(setter, value)
36
+ end if attributes
37
+ end
38
+
39
+ # Treat this class as a value object.
40
+ #
41
+ # @param other [Item]
42
+ #
43
+ # @return [Bool]
44
+ def ==(other) # rubocop:disable Metrics/AbcSize
45
+ return false unless other.is_a?(self.class)
46
+
47
+ id == other.id &&
48
+ title == other.title &&
49
+ updated_at == other.updated_at &&
50
+ link == other.link &&
51
+ content == other.content
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,110 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'item_parser'
4
+
5
+ module Yarss
6
+ # Contains feed and item parsers.
7
+ module Rdf
8
+ # Extract title, link, description and items from a parsed Rdf feed.
9
+ #
10
+ # @see http://atomenabled.org/developers/syndication/#requiredFeedElements
11
+ class FeedParser
12
+ # Parsed Rdf feed.
13
+ #
14
+ # @return [Hash]
15
+ attr_accessor :data
16
+
17
+ # @param data [Hash] Parsed Rdf feed.
18
+ def initialize(data)
19
+ self.data = data
20
+ end
21
+
22
+ # Parse out the feed title, link, description and items and wrap them
23
+ # in a data object.
24
+ #
25
+ # @raise [ParseError] If a required field is not found.
26
+ #
27
+ # @return [Feed]
28
+ def parse
29
+ Feed.new(
30
+ title: title,
31
+ link: link,
32
+ description: description,
33
+ items: items
34
+ )
35
+ end
36
+
37
+ # Extract the feed data.
38
+ #
39
+ # @raise [ParseError] If not found.
40
+ #
41
+ # @return [Hash]
42
+ def feed
43
+ @feed ||= begin
44
+ if data['RDF']
45
+ data['RDF']
46
+ else
47
+ data.fetch('rdf:RDF')
48
+ end
49
+ end
50
+ rescue KeyError => e
51
+ raise ParseError, e
52
+ end
53
+
54
+ # Extract the channel data.
55
+ #
56
+ # @raise [ParseError] If not found.
57
+ #
58
+ # @return [Hash]
59
+ def channel
60
+ @channel ||= feed.fetch('channel')
61
+ rescue KeyError => e
62
+ raise ParseError, e
63
+ end
64
+
65
+ # Extract the title.
66
+ #
67
+ # @raise [ParseError] If not found.
68
+ #
69
+ # @return [String]
70
+ def title
71
+ Attribute.value(channel.fetch('title'))
72
+ rescue KeyError => e
73
+ raise ParseError, e
74
+ end
75
+
76
+ # Extract the link.
77
+ #
78
+ # @raise [ParseError] If not found.
79
+ #
80
+ # @return [String]
81
+ def link
82
+ Attribute.link_value(channel.fetch('link'))
83
+ rescue KeyError => e
84
+ raise ParseError, e
85
+ end
86
+
87
+ # Extract the description.
88
+ #
89
+ # @return [String]
90
+ def description
91
+ Attribute.value(channel.fetch('description'))
92
+ rescue KeyError => e
93
+ raise ParseError, e
94
+ end
95
+
96
+ # Extract and parse the items.
97
+ #
98
+ # @raise [ParseError] If not found.
99
+ #
100
+ # @return [Array<Item>]
101
+ def items
102
+ items = feed.fetch('item')
103
+ items = [items] unless items.is_a?(Array)
104
+ items.map { |d| ItemParser.new(d).parse }
105
+ rescue KeyError => e
106
+ raise ParseError, e
107
+ end
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Yarss
4
+ module Rdf
5
+ # Extract id, title, updated, link and content from a feed item.
6
+ class ItemParser
7
+ # Parsed Rdf feed item.
8
+ #
9
+ # @return [Hash]
10
+ attr_accessor :data
11
+
12
+ # @param data [Hash] Parsed Rdf feed item.
13
+ def initialize(data)
14
+ self.data = data
15
+ end
16
+
17
+ # Parse out the feed item id, title, updated, link and content and wrap
18
+ # them in a data object.
19
+ #
20
+ # @raise [ParseError] If a required field is not found.
21
+ #
22
+ # @return [Item]
23
+ def parse
24
+ Item.new(
25
+ id: id,
26
+ title: title,
27
+ updated_at: updated,
28
+ link: link,
29
+ content: description
30
+ )
31
+ end
32
+
33
+ # Extract the ID.
34
+ #
35
+ # @raise [ParseError] If not found.
36
+ #
37
+ # @return [String]
38
+ def id
39
+ if data['about']
40
+ data['about']
41
+ else
42
+ data.fetch('rdf:about')
43
+ end
44
+ rescue KeyError => e
45
+ raise ParseError, e
46
+ end
47
+
48
+ # Extract the title.
49
+ #
50
+ # @raise [ParseError] If not found.
51
+ #
52
+ # @return [String]
53
+ def title
54
+ Attribute.value(data.fetch('title'))
55
+ rescue KeyError => e
56
+ raise ParseError, e
57
+ end
58
+
59
+ # Extract the updated date.
60
+ #
61
+ # @raise [ParseError] If not found.
62
+ #
63
+ # @return [DateTime]
64
+ def updated
65
+ date = if data['date']
66
+ data['date']
67
+ else
68
+ data.fetch('dc:date')
69
+ end
70
+ DateTime.parse(date)
71
+ rescue KeyError, ArgumentError => e
72
+ raise ParseError, e
73
+ end
74
+
75
+ # Extract the link.
76
+ #
77
+ # @raise [ParseError] If not found.
78
+ #
79
+ # @return [String]
80
+ def link
81
+ Attribute.link_value(data.fetch('link'))
82
+ rescue KeyError => e
83
+ raise ParseError, e
84
+ end
85
+
86
+ # Extract the description.
87
+ #
88
+ # @return [String]
89
+ def description
90
+ Attribute.value(data.fetch('description'))
91
+ rescue KeyError => e
92
+ raise ParseError, e
93
+ end
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,93 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'item_parser'
4
+
5
+ module Yarss
6
+ # Contains feed and item parsers.
7
+ module Rss
8
+ # Extract title, link, description and items from a parsed RSS feed.
9
+ #
10
+ # @see https://validator.w3.org/feed/docs/rss2.html#requiredChannelElements
11
+ class FeedParser
12
+ # Parsed RSS feed.
13
+ #
14
+ # @return [Hash]
15
+ attr_accessor :data
16
+
17
+ # @param data [Hash] Parsed RSS feed.
18
+ def initialize(data)
19
+ self.data = data
20
+ end
21
+
22
+ # Parse out the feed title, link, description and items and wrap them
23
+ # in a data object.
24
+ #
25
+ # @raise [ParseError] If a required field is not found.
26
+ #
27
+ # @return [Feed]
28
+ def parse
29
+ Feed.new(
30
+ title: title,
31
+ link: link,
32
+ description: description,
33
+ items: items
34
+ )
35
+ end
36
+
37
+ # Extract the channel data.
38
+ #
39
+ # @raise [ParseError] If not found.
40
+ #
41
+ # @return [Hash]
42
+ def feed
43
+ @feed ||= data.fetch('rss').fetch('channel')
44
+ rescue KeyError => e
45
+ raise ParseError, e
46
+ end
47
+
48
+ # Extract the title.
49
+ #
50
+ # @raise [ParseError] If not found.
51
+ #
52
+ # @return [String]
53
+ def title
54
+ Attribute.value(feed.fetch('title'))
55
+ rescue KeyError => e
56
+ raise ParseError, e
57
+ end
58
+
59
+ # Extract the link.
60
+ #
61
+ # @raise [ParseError] If not found.
62
+ #
63
+ # @return [String]
64
+ def link
65
+ Attribute.link_value(feed.fetch('link'))
66
+ rescue KeyError => e
67
+ raise ParseError, e
68
+ end
69
+
70
+ # Extract the description.
71
+ #
72
+ # @return [String]
73
+ def description
74
+ Attribute.value(feed.fetch('description'))
75
+ rescue KeyError => e
76
+ raise ParseError, e
77
+ end
78
+
79
+ # Extract and parse the items.
80
+ #
81
+ # @raise [ParseError] If not found.
82
+ #
83
+ # @return [Array<Item>]
84
+ def items
85
+ items = feed.fetch('item')
86
+ items = [items] unless items.is_a?(Array)
87
+ items.map { |d| ItemParser.new(d).parse }
88
+ rescue KeyError => e
89
+ raise ParseError, e
90
+ end
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Yarss
4
+ module Rss
5
+ # Extract id, title, updated, link and content from a feed item.
6
+ #
7
+ # https://validator.w3.org/feed/docs/rss2.html#hrelementsOfLtitemgt
8
+ class ItemParser
9
+ # Parsed RSS feed item.
10
+ #
11
+ # @return [Hash]
12
+ attr_accessor :data
13
+
14
+ # @param data [Hash] Parsed RSS feed item.
15
+ def initialize(data)
16
+ self.data = data
17
+ end
18
+
19
+ # Parse out the feed item id, title, updated, link and content and wrap
20
+ # them in a data object.
21
+ #
22
+ # @raise [ParseError] If a required field is not found.
23
+ #
24
+ # @return [Item]
25
+ def parse
26
+ Item.new(
27
+ id: guid,
28
+ title: title,
29
+ updated_at: pub_date,
30
+ link: link,
31
+ content: description
32
+ )
33
+ end
34
+
35
+ # Extract the ID. Use the title if guid is not present and title is.
36
+ #
37
+ # @raise [ParseError] If not found.
38
+ #
39
+ # @return [String]
40
+ def guid
41
+ Attribute.value(data.fetch('guid'))
42
+ rescue KeyError => e
43
+ return Digest::MD5.hexdigest(data['title']) if data['title']
44
+ raise ParseError, e
45
+ end
46
+
47
+ # Extract the title. Use guid if title is not present and guid is.
48
+ #
49
+ # @raise [ParseError] If not found.
50
+ #
51
+ # @return [String]
52
+ def title
53
+ Attribute.value(data.fetch('title'))
54
+ rescue KeyError => e
55
+ return Attribute.value(data['guid']) if data['guid']
56
+ raise ParseError, e
57
+ end
58
+
59
+ # Extract the updated date.
60
+ #
61
+ # @return [DateTime]
62
+ def pub_date
63
+ DateTime.parse(data.fetch('pubDate'))
64
+ rescue ArgumentError => e
65
+ raise ParseError, e
66
+ rescue KeyError
67
+ DateTime.now
68
+ end
69
+
70
+ # Extract the link.
71
+ #
72
+ # @raise [ParseError] If not found.
73
+ #
74
+ # @return [String]
75
+ def link
76
+ Attribute.link_value(data.fetch('link'))
77
+ rescue KeyError => e
78
+ raise ParseError, e
79
+ end
80
+
81
+ # Extract the content.
82
+ #
83
+ # @return [String]
84
+ def description
85
+ description = Attribute.value(data['description'] || '')
86
+
87
+ return Attribute.value(data['content:encoded']) if
88
+ description.empty? && data['content:encoded']
89
+
90
+ description
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Yarss
4
+ # Version number, happy now?
5
+ VERSION = '0.0.1'
6
+ end
data/lib/yarss.rb ADDED
@@ -0,0 +1,127 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'yarss/version'
4
+ require 'yarss/feed'
5
+ require 'yarss/item'
6
+ require 'yarss/attribute'
7
+ require 'yarss/rss/feed_parser'
8
+ require 'yarss/atom/feed_parser'
9
+ require 'yarss/rdf/feed_parser'
10
+
11
+ # RSS, RDF and Atom feeds parser.
12
+ module Yarss
13
+ # Generic Yarss error.
14
+ class Error < StandardError; end
15
+
16
+ # Parsing of the XML failed or a required field is not present.
17
+ class ParseError < Error; end
18
+
19
+ # Not a RSS, RDF or Atom feed.
20
+ class UnknownParserError < Error; end
21
+
22
+ # Parse a {Feed} out of a path to a XML or an IO (or whatever responds
23
+ # to +read+).
24
+ #
25
+ # @raise [UnknownParserError] If no corresponding parser was found.
26
+ # @raise [ParseError] If XML parsing or field extracting failed.
27
+ #
28
+ # @param path_or_io [String, #read] A path to the XML file or an IO, a
29
+ # Pathname, something that can be read.
30
+ #
31
+ # @example
32
+ # feed = Yarss.new('path/to/feed.rss')
33
+ #
34
+ # puts "#{feed.title}, #{feed.link}, #{feed.description}"
35
+ #
36
+ # feed.items.each do |item|
37
+ # puts "#{item.id}, #{item.title}, #{item.updated_at}, #{item.link}"
38
+ # puts item.content
39
+ # end
40
+ #
41
+ # @return [Feed]
42
+ def self.new(path_or_io)
43
+ if path_or_io.respond_to?(:read)
44
+ from_io(path_or_io)
45
+ else
46
+ from_file(path_or_io)
47
+ end
48
+ end
49
+
50
+ # Parse a {Feed} out of an IO (or whatever responds to +read+).
51
+ #
52
+ # @raise [UnknownParserError] If no corresponding parser was found.
53
+ # @raise [ParseError] If XML parsing or field extracting failed.
54
+ #
55
+ # @param io [#read] An IO, a Pathname, something that can be read.
56
+ #
57
+ # @example
58
+ # feed = Yarss.from_io(Pathname.new('path/to/feed.rss'))
59
+ # feed = Yarss.from_io(File.open('path/to/feed.rss', 'rb'))
60
+ #
61
+ # puts "#{feed.title}, #{feed.link}, #{feed.description}"
62
+ #
63
+ # feed.items.each do |item|
64
+ # puts "#{item.id}, #{item.title}, #{item.updated_at}, #{item.link}"
65
+ # puts item.content
66
+ # end
67
+ #
68
+ # @return [Feed]
69
+ def self.from_io(io)
70
+ data = io.read
71
+ from_string(data, io)
72
+ end
73
+
74
+ # Parse a {Feed} out of a path to a XML.
75
+ #
76
+ # @raise [UnknownParserError] If no corresponding parser was found.
77
+ # @raise [ParseError] If XML parsing or field extracting failed.
78
+ #
79
+ # @param path [String] Path to a XML.
80
+ #
81
+ # @example
82
+ # feed = Yarss.from_file('path/to/feed.rss')
83
+ #
84
+ # puts "#{feed.title}, #{feed.link}, #{feed.description}"
85
+ #
86
+ # feed.items.each do |item|
87
+ # puts "#{item.id}, #{item.title}, #{item.updated_at}, #{item.link}"
88
+ # puts item.content
89
+ # end
90
+ #
91
+ # @return [Feed]
92
+ def self.from_file(path)
93
+ data = File.read(path)
94
+ from_string(data, path)
95
+ end
96
+
97
+ # Parse a {Feed} out of raw XML.
98
+ #
99
+ # @raise [UnknownParserError] If no corresponding parser was found.
100
+ # @raise [ParseError] If XML parsing or field extracting failed.
101
+ #
102
+ # @param data [String] Raw RSS, RDF or Atom XML data.
103
+ # @param path_or_io [String, #read] Path to a file or an IO.
104
+ #
105
+ # @example
106
+ # feed = Yarss.from_string('...)
107
+ #
108
+ # puts "#{feed.title}, #{feed.link}, #{feed.description}"
109
+ #
110
+ # feed.items.each do |item|
111
+ # puts "#{item.id}, #{item.title}, #{item.updated_at}, #{item.link}"
112
+ # puts item.content
113
+ # end
114
+ #
115
+ # @return [Feed]
116
+ def self.from_string(data, path_or_io)
117
+ data = MultiXml.parse(data)
118
+
119
+ return Rss::FeedParser.new(data).parse if data['rss']
120
+ return Atom::FeedParser.new(data).parse if data['feed']
121
+ return Rdf::FeedParser.new(data).parse if data['rdf:RDF'] || data['RDF']
122
+
123
+ raise UnknownParserError, "Cannot find parser for #{path_or_io}"
124
+ rescue MultiXml::ParseError => e
125
+ raise ParseError, e
126
+ end
127
+ end
data/yarss.gemspec ADDED
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+ # coding: utf-8
3
+ lib = File.expand_path('../lib', __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require 'yarss/version'
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = 'yarss'
9
+ spec.version = Yarss::VERSION
10
+ spec.authors = ['Oldrich Vetesnik']
11
+ spec.email = ['oldrich.vetesnik@gmail.com']
12
+
13
+ spec.summary = 'Yet Another RSS Feed Normalizer.'
14
+ spec.description = 'Parse and access RSS/RDF/Atom feeds with ' \
15
+ 'a uniform interface.'
16
+ spec.homepage = 'https://github.com/ollie/yarss'
17
+ spec.license = 'MIT'
18
+
19
+ # rubocop:disable Metrics/LineLength
20
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
21
+ spec.bindir = 'exe'
22
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
23
+ spec.require_paths = ['lib']
24
+
25
+ # System
26
+ spec.add_development_dependency 'bundler', '~> 1.11'
27
+ # spec.add_development_dependency 'ox', '~> 2.2'
28
+ # spec.add_development_dependency 'nokogiri', '~> 1.6'
29
+ # spec.add_development_dependency 'oga', '~> 2.0'
30
+
31
+ # Test
32
+ spec.add_development_dependency 'rspec', '~> 3.4'
33
+ spec.add_development_dependency 'simplecov', '~> 0.11'
34
+
35
+ # Code style, debugging, docs
36
+ spec.add_development_dependency 'rubocop', '~> 0.37'
37
+ spec.add_development_dependency 'pry', '~> 0.10'
38
+ # spec.add_development_dependency 'pry-doc', '~> 0.8'
39
+ # spec.add_development_dependency 'pry-byebug', '~> 3.3'
40
+ spec.add_development_dependency 'yard', '~> 0.8'
41
+ spec.add_development_dependency 'rake', '~> 10.5'
42
+
43
+ # Provides swappable XML backends utilizing LibXML, Nokogiri, Ox, Oga or REXML.
44
+ spec.add_runtime_dependency 'multi_xml', '~> 0.5'
45
+ end
metadata ADDED
@@ -0,0 +1,180 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: yarss
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Oldrich Vetesnik
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2016-02-04 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.11'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.11'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '3.4'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.4'
41
+ - !ruby/object:Gem::Dependency
42
+ name: simplecov
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.11'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0.11'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rubocop
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.37'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.37'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '0.10'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.10'
83
+ - !ruby/object:Gem::Dependency
84
+ name: yard
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '0.8'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '0.8'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rake
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '10.5'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '10.5'
111
+ - !ruby/object:Gem::Dependency
112
+ name: multi_xml
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '0.5'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '0.5'
125
+ description: Parse and access RSS/RDF/Atom feeds with a uniform interface.
126
+ email:
127
+ - oldrich.vetesnik@gmail.com
128
+ executables: []
129
+ extensions: []
130
+ extra_rdoc_files: []
131
+ files:
132
+ - ".gitignore"
133
+ - ".rspec"
134
+ - ".rubocop.yml"
135
+ - ".travis.yml"
136
+ - ".yardopts"
137
+ - Gemfile
138
+ - LICENSE.txt
139
+ - README.md
140
+ - Rakefile
141
+ - bin/console
142
+ - bin/setup
143
+ - lib/yarss.rb
144
+ - lib/yarss/atom/feed_parser.rb
145
+ - lib/yarss/atom/item_parser.rb
146
+ - lib/yarss/attribute.rb
147
+ - lib/yarss/feed.rb
148
+ - lib/yarss/item.rb
149
+ - lib/yarss/rdf/feed_parser.rb
150
+ - lib/yarss/rdf/item_parser.rb
151
+ - lib/yarss/rss/feed_parser.rb
152
+ - lib/yarss/rss/item_parser.rb
153
+ - lib/yarss/version.rb
154
+ - yarss.gemspec
155
+ homepage: https://github.com/ollie/yarss
156
+ licenses:
157
+ - MIT
158
+ metadata: {}
159
+ post_install_message:
160
+ rdoc_options: []
161
+ require_paths:
162
+ - lib
163
+ required_ruby_version: !ruby/object:Gem::Requirement
164
+ requirements:
165
+ - - ">="
166
+ - !ruby/object:Gem::Version
167
+ version: '0'
168
+ required_rubygems_version: !ruby/object:Gem::Requirement
169
+ requirements:
170
+ - - ">="
171
+ - !ruby/object:Gem::Version
172
+ version: '0'
173
+ requirements: []
174
+ rubyforge_project:
175
+ rubygems_version: 2.5.2
176
+ signing_key:
177
+ specification_version: 4
178
+ summary: Yet Another RSS Feed Normalizer.
179
+ test_files: []
180
+ has_rdoc: