rssable 0.0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/rssable/detection/engine_detector.rb +47 -0
- data/lib/rssable/detection/feed_finder.rb +15 -0
- data/lib/rssable/parsers/channel.rb +53 -0
- data/lib/rssable/parsers/item_class_builder.rb +21 -0
- data/lib/rssable/parsers/items/base.rb +66 -0
- data/lib/rssable/parsers/items/blogger.rb +18 -0
- data/lib/rssable/parsers/items/medium.rb +23 -0
- data/lib/rssable/parsers/items/wordpress.rb +25 -0
- data/lib/rssable/processor.rb +22 -0
- data/lib/rssable/version.rb +12 -0
- data/lib/rssable.rb +23 -0
- metadata +96 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: cd99535f844b27a604a0ae97f648807976045c7350dc536312a50f77ed438446
|
4
|
+
data.tar.gz: 0a6d02f9f66a54959f41d3e912c40e7ee7b91b73039113cdcde1707e1b9be682
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9a19230d6acf341655b659f49b8bc0d8421a2e2f487174877db6e56fc587fc9303c559d3bd0be4c8a33033592dfc1d55d51d30e15dc47e22f4d9b2897f256be0
|
7
|
+
data.tar.gz: 77af58376a4f40ad9fdfadcf4ec2a9f78c01336bf97940945b10cf5086e5929a94763b0209c0463d37b1970c23c3342b484872915a970cd0e4486d5a6f273976
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module RSSable
|
2
|
+
module Detection
|
3
|
+
class EngineDetector
|
4
|
+
MAPPINGS = {
|
5
|
+
blogger: /\/feeds\/posts\/default\?alt\=rss$/,
|
6
|
+
wordpress: /\/feed\/$/,
|
7
|
+
medium: /medium\.com\/feed\//
|
8
|
+
}
|
9
|
+
|
10
|
+
# It returns array of RSS feed link and the driver
|
11
|
+
# If feed link is not found it returns blank array
|
12
|
+
# If the provider is not detected it returns :default
|
13
|
+
#
|
14
|
+
# @return [Array]
|
15
|
+
def self.call(urls:, source_url:)
|
16
|
+
result = nil
|
17
|
+
|
18
|
+
urls.each do |url|
|
19
|
+
result = if url.match(MAPPINGS[:blogger])
|
20
|
+
[url, :blogger]
|
21
|
+
elsif url.match(MAPPINGS[:wordpress]) && url.match(/\/comments\//).nil?
|
22
|
+
[url, :wordpress]
|
23
|
+
elsif url.match(MAPPINGS[:medium])
|
24
|
+
[url, :medium]
|
25
|
+
elsif url == "/feed.xml"
|
26
|
+
parsed_url = URI.join(source_url, "/feed.xml")
|
27
|
+
[parsed_url.to_s, :jekyll]
|
28
|
+
end
|
29
|
+
|
30
|
+
break unless result.nil?
|
31
|
+
end
|
32
|
+
|
33
|
+
return result unless result.nil?
|
34
|
+
|
35
|
+
if urls.size != 0
|
36
|
+
[urls.first, :default]
|
37
|
+
else
|
38
|
+
[]
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
attr_reader :urls
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module RSSable
|
2
|
+
module Detection
|
3
|
+
class FeedFinder
|
4
|
+
# It returns array of RSS feed links for given URL
|
5
|
+
#
|
6
|
+
# @return [Array<String>]
|
7
|
+
def self.call(url:)
|
8
|
+
response = RestClient.get(url)
|
9
|
+
html = Nokogiri::HTML(response.body)
|
10
|
+
|
11
|
+
html.css("link[type='application/rss+xml']").map { |node| node[:href] }
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module RSSable
|
2
|
+
module Parsers
|
3
|
+
class Channel
|
4
|
+
|
5
|
+
def initialize(feed:, driver:)
|
6
|
+
@feed = feed
|
7
|
+
@driver = driver
|
8
|
+
end
|
9
|
+
|
10
|
+
# Returns the RSS feed title
|
11
|
+
#
|
12
|
+
# @return [String]
|
13
|
+
def title
|
14
|
+
xml.xpath("//channel").at("title").text
|
15
|
+
end
|
16
|
+
|
17
|
+
# Returns the RSS feed description
|
18
|
+
#
|
19
|
+
# @return [String]
|
20
|
+
def description
|
21
|
+
xml.xpath("//channel").at("description").text
|
22
|
+
end
|
23
|
+
|
24
|
+
# Returns the website link
|
25
|
+
#
|
26
|
+
# @return [String]
|
27
|
+
def link
|
28
|
+
xml.xpath("//channel").at("link").text
|
29
|
+
end
|
30
|
+
|
31
|
+
# Returns a collection of the RSS feed items
|
32
|
+
#
|
33
|
+
# @return [Array]
|
34
|
+
def items
|
35
|
+
xml.css("item").map { |node| item_class.new(node) }
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
attr_reader :feed, :driver
|
41
|
+
|
42
|
+
def xml
|
43
|
+
@xml ||= Nokogiri::XML(feed)
|
44
|
+
end
|
45
|
+
|
46
|
+
def item_class
|
47
|
+
@item_class ||= ::RSSable::Parsers::ItemClassBuilder.call(
|
48
|
+
driver: driver
|
49
|
+
)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module RSSable
|
2
|
+
module Parsers
|
3
|
+
class ItemClassBuilder
|
4
|
+
# Returns the parser class for the given driver
|
5
|
+
#
|
6
|
+
# @return [Class]
|
7
|
+
def self.call(driver:)
|
8
|
+
case driver
|
9
|
+
when :wordpress
|
10
|
+
RSSable::Parsers::Items::Wordpress
|
11
|
+
when :blogger
|
12
|
+
RSSable::Parsers::Items::Blogger
|
13
|
+
when :medium
|
14
|
+
RSSable::Parsers::Items::Medium
|
15
|
+
else
|
16
|
+
RSSable::Parsers::Items::Base
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
module RSSable
|
2
|
+
module Parsers
|
3
|
+
module Items
|
4
|
+
class Base
|
5
|
+
def initialize(node)
|
6
|
+
@node = node
|
7
|
+
end
|
8
|
+
|
9
|
+
# Returns the item title
|
10
|
+
#
|
11
|
+
# @return [String]
|
12
|
+
def title
|
13
|
+
node_text(node.at("title"))
|
14
|
+
end
|
15
|
+
|
16
|
+
# Returns the item link
|
17
|
+
#
|
18
|
+
# @return [String]
|
19
|
+
def link
|
20
|
+
node_text(node.at("link"))
|
21
|
+
end
|
22
|
+
|
23
|
+
# Returns the item publication date
|
24
|
+
#
|
25
|
+
# @return [String]
|
26
|
+
def published_at
|
27
|
+
node_text(node.at("pubDate"))
|
28
|
+
end
|
29
|
+
|
30
|
+
# Returns the item description
|
31
|
+
#
|
32
|
+
# @return [String]
|
33
|
+
def description
|
34
|
+
node_text(node.at("description"))
|
35
|
+
end
|
36
|
+
|
37
|
+
# Returns an array of the item categories
|
38
|
+
#
|
39
|
+
# @return [Array<String>]
|
40
|
+
def tags
|
41
|
+
node.css("category").map(&:text)
|
42
|
+
end
|
43
|
+
|
44
|
+
# Returns the article author
|
45
|
+
#
|
46
|
+
# @return [String]
|
47
|
+
def author
|
48
|
+
node_text(node.at("author"))
|
49
|
+
end
|
50
|
+
|
51
|
+
# Returns nil by the default
|
52
|
+
#
|
53
|
+
# @return [NilClass]
|
54
|
+
def comments_count; end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
attr_reader :node
|
59
|
+
|
60
|
+
def node_text(node)
|
61
|
+
node.text unless node.nil?
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module RSSable
|
2
|
+
module Parsers
|
3
|
+
module Items
|
4
|
+
class Blogger < RSSable::Parsers::Items::Base
|
5
|
+
|
6
|
+
# Returns the comments count
|
7
|
+
# If the comments count node does not exist it returns nil
|
8
|
+
#
|
9
|
+
# @return [Integer]
|
10
|
+
def comments_count
|
11
|
+
count = node_text(node.at_xpath("//thr:total"))
|
12
|
+
count.to_i unless count.nil?
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module RSSable
|
2
|
+
module Parsers
|
3
|
+
module Items
|
4
|
+
class Medium < RSSable::Parsers::Items::Base
|
5
|
+
|
6
|
+
# Returns the article author
|
7
|
+
#
|
8
|
+
# @return [String]
|
9
|
+
def author
|
10
|
+
creator_node = node.elements.find { |c| c.name == "creator" }
|
11
|
+
node_text(creator_node)
|
12
|
+
end
|
13
|
+
|
14
|
+
# Returns the article description
|
15
|
+
#
|
16
|
+
# @return [String]
|
17
|
+
def description
|
18
|
+
node_text(node.at_xpath("//content:encoded"))
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module RSSable
|
2
|
+
module Parsers
|
3
|
+
module Items
|
4
|
+
class Wordpress < RSSable::Parsers::Items::Base
|
5
|
+
|
6
|
+
# Returns the article author
|
7
|
+
#
|
8
|
+
# @return [String]
|
9
|
+
def author
|
10
|
+
creator_node = node.elements.find { |c| c.name == "creator" }
|
11
|
+
node_text(creator_node)
|
12
|
+
end
|
13
|
+
|
14
|
+
# Returns the comments count
|
15
|
+
# If the comments count node does not exist it returns nil
|
16
|
+
#
|
17
|
+
# @return [Integer]
|
18
|
+
def comments_count
|
19
|
+
count = node_text(node.at_xpath("//slash:comments"))
|
20
|
+
count.to_i unless count.nil?
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module RSSable
|
2
|
+
class Processor
|
3
|
+
def self.call(url:)
|
4
|
+
# Returns the RSSable::Parsers::Channel instance
|
5
|
+
# If the feed URL is not detected it returns nil
|
6
|
+
#
|
7
|
+
# @return [RSSable::Parsers::Channel]
|
8
|
+
urls = RSSable::Detection::FeedFinder.call(url: url)
|
9
|
+
feed_url, driver = RSSable::Detection::EngineDetector.call(
|
10
|
+
urls: urls, source_url: url
|
11
|
+
)
|
12
|
+
|
13
|
+
return if feed_url.nil?
|
14
|
+
|
15
|
+
response = RestClient.get(feed_url)
|
16
|
+
|
17
|
+
RSSable::Parsers::Channel.new(
|
18
|
+
feed: response.body, driver: driver
|
19
|
+
)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
data/lib/rssable.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'rest-client'
|
3
|
+
require 'URI'
|
4
|
+
require 'rssable/detection/feed_finder'
|
5
|
+
require 'rssable/detection/engine_detector'
|
6
|
+
require 'rssable/parsers/items/base'
|
7
|
+
require 'rssable/parsers/items/wordpress'
|
8
|
+
require 'rssable/parsers/items/blogger'
|
9
|
+
require 'rssable/parsers/items/medium'
|
10
|
+
require 'rssable/parsers/item_class_builder'
|
11
|
+
require 'rssable/parsers/channel'
|
12
|
+
require 'rssable/processor'
|
13
|
+
|
14
|
+
module RSSable
|
15
|
+
# Returns the channel driver instance
|
16
|
+
#
|
17
|
+
# @return [RSSable::Parsers::Channel]
|
18
|
+
def self.subscribe(url)
|
19
|
+
RSSable::Processor.call(
|
20
|
+
url: url
|
21
|
+
)
|
22
|
+
end
|
23
|
+
end
|
metadata
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rssable
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Paweł Dąbrowski
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-04-25 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rest-client
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 2.0.2
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 2.0.2
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: nokogiri
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 1.8.2
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 1.8.2
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description: Access the RSS channel of any webiste without worrying about the engine
|
56
|
+
email: dziamber@gmail.com
|
57
|
+
executables: []
|
58
|
+
extensions: []
|
59
|
+
extra_rdoc_files: []
|
60
|
+
files:
|
61
|
+
- lib/rssable.rb
|
62
|
+
- lib/rssable/detection/engine_detector.rb
|
63
|
+
- lib/rssable/detection/feed_finder.rb
|
64
|
+
- lib/rssable/parsers/channel.rb
|
65
|
+
- lib/rssable/parsers/item_class_builder.rb
|
66
|
+
- lib/rssable/parsers/items/base.rb
|
67
|
+
- lib/rssable/parsers/items/blogger.rb
|
68
|
+
- lib/rssable/parsers/items/medium.rb
|
69
|
+
- lib/rssable/parsers/items/wordpress.rb
|
70
|
+
- lib/rssable/processor.rb
|
71
|
+
- lib/rssable/version.rb
|
72
|
+
homepage: http://github.com/rubyhero/rssable
|
73
|
+
licenses:
|
74
|
+
- MIT
|
75
|
+
metadata: {}
|
76
|
+
post_install_message:
|
77
|
+
rdoc_options: []
|
78
|
+
require_paths:
|
79
|
+
- lib
|
80
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
version: 2.0.0
|
85
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
requirements: []
|
91
|
+
rubyforge_project:
|
92
|
+
rubygems_version: 2.7.3
|
93
|
+
signing_key:
|
94
|
+
specification_version: 4
|
95
|
+
summary: Access the RSS channel of any webiste without worrying about the engine
|
96
|
+
test_files: []
|