arssene 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.ruby-version +1 -0
- data/.travis.yml +1 -1
- data/Gemfile.lock +53 -42
- data/README.md +27 -20
- data/arssene.gemspec +25 -24
- data/lib/arssene/lib/interactions/feed.rb +15 -0
- data/lib/arssene/lib/interactions/fetch.rb +28 -0
- data/lib/arssene/lib/interactions/ping.rb +25 -0
- data/lib/arssene/lib/models/channel.rb +8 -0
- data/lib/arssene/lib/models/entry.rb +7 -0
- data/lib/arssene/lib/presenters/channel_presenter.rb +55 -0
- data/lib/arssene/lib/presenters/entry_presenter.rb +30 -0
- data/lib/arssene/lib/repositories/channel_repository.rb +39 -0
- data/lib/arssene/lib/repositories/feed_repository.rb +32 -0
- data/lib/arssene/version.rb +1 -1
- data/lib/arssene.rb +16 -4
- metadata +43 -7
- data/lib/arssene/lib/rss.rb +0 -175
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0b0c8aeb8a061cf07d89e8127c3bb361403a072c6dcfce8ca94e81e23230f877
|
4
|
+
data.tar.gz: 6813554d7301928467699b5153b36fc2278ba4f51a058223b0cb9d0972e5d4d6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4695f325f392c8ac1a3f74fdf5835d4eecab16790cdca081e37096b0fea7d4342369969b5c275372bcaff34d539f3619a74a13cb827e27a86f2082e08f213164
|
7
|
+
data.tar.gz: a03516b3730e72de5c82da56eb8d91b3263315ad6bfe6e5552556e0334196a34393f4a83b63c1003a5b263e18a7096533f31b70db2280fc0e8718e5910a8b715
|
data/.gitignore
CHANGED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.7.2
|
data/.travis.yml
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,62 +1,73 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
arssene (0.1.
|
4
|
+
arssene (0.1.4)
|
5
|
+
faraday
|
5
6
|
mechanize
|
7
|
+
parallel
|
6
8
|
sanitize
|
7
9
|
|
8
10
|
GEM
|
9
11
|
remote: https://rubygems.org/
|
10
12
|
specs:
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
13
|
+
addressable (2.8.1)
|
14
|
+
public_suffix (>= 2.0.2, < 6.0)
|
15
|
+
connection_pool (2.3.0)
|
16
|
+
crass (1.0.6)
|
17
|
+
diff-lcs (1.5.0)
|
18
|
+
domain_name (0.5.20190701)
|
15
19
|
unf (>= 0.0.5, < 1.0.0)
|
16
|
-
|
20
|
+
faraday (2.5.2)
|
21
|
+
faraday-net_http (>= 2.0, < 3.1)
|
22
|
+
ruby2_keywords (>= 0.0.4)
|
23
|
+
faraday-net_http (3.0.0)
|
24
|
+
http-cookie (1.0.5)
|
17
25
|
domain_name (~> 0.5)
|
18
|
-
mechanize (2.
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
net-http-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
26
|
+
mechanize (2.8.5)
|
27
|
+
addressable (~> 2.8)
|
28
|
+
domain_name (~> 0.5, >= 0.5.20190701)
|
29
|
+
http-cookie (~> 1.0, >= 1.0.3)
|
30
|
+
mime-types (~> 3.0)
|
31
|
+
net-http-digest_auth (~> 1.4, >= 1.4.1)
|
32
|
+
net-http-persistent (>= 2.5.2, < 5.0.dev)
|
33
|
+
nokogiri (~> 1.11, >= 1.11.2)
|
34
|
+
rubyntlm (~> 0.6, >= 0.6.3)
|
35
|
+
webrick (~> 1.7)
|
36
|
+
webrobots (~> 0.1.2)
|
37
|
+
mime-types (3.4.1)
|
28
38
|
mime-types-data (~> 3.2015)
|
29
|
-
mime-types-data (3.
|
30
|
-
mini_portile2 (2.4.0)
|
39
|
+
mime-types-data (3.2022.0105)
|
31
40
|
net-http-digest_auth (1.4.1)
|
32
|
-
net-http-persistent (
|
41
|
+
net-http-persistent (4.0.1)
|
33
42
|
connection_pool (~> 2.2)
|
34
|
-
nokogiri (1.
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
rake (
|
40
|
-
rspec (3.
|
41
|
-
rspec-core (~> 3.
|
42
|
-
rspec-expectations (~> 3.
|
43
|
-
rspec-mocks (~> 3.
|
44
|
-
rspec-core (3.
|
45
|
-
rspec-support (~> 3.
|
46
|
-
rspec-expectations (3.
|
43
|
+
nokogiri (1.13.8-arm64-darwin)
|
44
|
+
racc (~> 1.4)
|
45
|
+
parallel (1.21.0)
|
46
|
+
public_suffix (5.0.0)
|
47
|
+
racc (1.6.0)
|
48
|
+
rake (13.0.6)
|
49
|
+
rspec (3.11.0)
|
50
|
+
rspec-core (~> 3.11.0)
|
51
|
+
rspec-expectations (~> 3.11.0)
|
52
|
+
rspec-mocks (~> 3.11.0)
|
53
|
+
rspec-core (3.11.0)
|
54
|
+
rspec-support (~> 3.11.0)
|
55
|
+
rspec-expectations (3.11.1)
|
47
56
|
diff-lcs (>= 1.2.0, < 2.0)
|
48
|
-
rspec-support (~> 3.
|
49
|
-
rspec-mocks (3.
|
57
|
+
rspec-support (~> 3.11.0)
|
58
|
+
rspec-mocks (3.11.1)
|
50
59
|
diff-lcs (>= 1.2.0, < 2.0)
|
51
|
-
rspec-support (~> 3.
|
52
|
-
rspec-support (3.
|
53
|
-
|
60
|
+
rspec-support (~> 3.11.0)
|
61
|
+
rspec-support (3.11.1)
|
62
|
+
ruby2_keywords (0.0.5)
|
63
|
+
rubyntlm (0.6.3)
|
64
|
+
sanitize (6.0.0)
|
54
65
|
crass (~> 1.0.2)
|
55
|
-
nokogiri (>= 1.
|
56
|
-
nokogumbo (~> 2.0)
|
66
|
+
nokogiri (>= 1.12.0)
|
57
67
|
unf (0.1.4)
|
58
68
|
unf_ext
|
59
|
-
unf_ext (0.0.
|
69
|
+
unf_ext (0.0.8.2)
|
70
|
+
webrick (1.7.0)
|
60
71
|
webrobots (0.1.2)
|
61
72
|
|
62
73
|
PLATFORMS
|
@@ -65,8 +76,8 @@ PLATFORMS
|
|
65
76
|
DEPENDENCIES
|
66
77
|
arssene!
|
67
78
|
bundler (~> 2.0)
|
68
|
-
rake (~>
|
79
|
+
rake (~> 13.0)
|
69
80
|
rspec (~> 3.0)
|
70
81
|
|
71
82
|
BUNDLED WITH
|
72
|
-
2.
|
83
|
+
2.1.4
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Arssene
|
2
2
|
|
3
|
-
Simple RSS solution for rails.
|
3
|
+
Simple RSS solution for rails.
|
4
4
|
|
5
5
|
## Installation
|
6
6
|
|
@@ -21,6 +21,7 @@ Or install it yourself as:
|
|
21
21
|
## Usage
|
22
22
|
|
23
23
|
### Ping a website
|
24
|
+
|
24
25
|
To obtain the rss feed of a website, first you should ping the website.
|
25
26
|
|
26
27
|
```ruby
|
@@ -28,20 +29,20 @@ url = "https://www.theonion.com/"
|
|
28
29
|
rss = Arssene::Feed.ping(url)
|
29
30
|
|
30
31
|
puts rss
|
31
|
-
# => [ {:
|
32
|
+
# => [ { feed: "https://www.theonion.com/rss"} ]
|
32
33
|
```
|
33
34
|
|
34
|
-
You can also send an array of urls
|
35
|
+
You can also send an array of urls.
|
35
36
|
|
36
37
|
```ruby
|
37
38
|
urls = ["http://www.lifehacker.com", "http://www.deadspin.com", "https://www.kotaku.com"]
|
38
39
|
rss = Arssene::Feed.ping(urls)
|
39
40
|
puts rss
|
40
|
-
# =>
|
41
|
-
# [
|
42
|
-
# { :
|
43
|
-
# { :
|
44
|
-
# { :
|
41
|
+
# =>
|
42
|
+
# [
|
43
|
+
# { feed: "https://www.lifehacker.com/rss" },
|
44
|
+
# { feed: "http://www.deadspin.com/rss" },
|
45
|
+
# { feed: "http://www.kotaku.com/rss"}
|
45
46
|
# ]
|
46
47
|
|
47
48
|
```
|
@@ -70,8 +71,8 @@ puts rss
|
|
70
71
|
|
71
72
|
# =>
|
72
73
|
# [
|
73
|
-
# { :
|
74
|
-
# { :
|
74
|
+
# { feed: "https://lifehacker.com/rss" } ,
|
75
|
+
# { error: 500 => Net::HTTPInternalServerError for http://www.anime-town.com/ }
|
75
76
|
# ]
|
76
77
|
```
|
77
78
|
|
@@ -82,10 +83,10 @@ Once you have the correct URL for the feed, you can request the website's feed.
|
|
82
83
|
```ruby
|
83
84
|
url = "https://www.lifehacker.com/rss"
|
84
85
|
rss = Arssene::Feed.request(url)
|
85
|
-
# =>
|
86
|
+
# =>
|
86
87
|
# {
|
87
|
-
# :
|
88
|
-
# :
|
88
|
+
# feed: "https://www.lifehacker.com/rss",
|
89
|
+
# channel: <Arssene::Channel:0x00007f0dbc011500>
|
89
90
|
# }
|
90
91
|
|
91
92
|
# Where if your feed is rss[:channel], you could:
|
@@ -98,7 +99,7 @@ puts rss.link
|
|
98
99
|
# => https://www.lifehacker.com
|
99
100
|
|
100
101
|
puts rss.feed_type
|
101
|
-
# => rss
|
102
|
+
# => rss
|
102
103
|
|
103
104
|
puts rss.feed_version
|
104
105
|
# => 2.0
|
@@ -118,22 +119,25 @@ puts rss.entries[0] # Array of type Entry
|
|
118
119
|
# link: https://theinventory.com/ravpower-struck-a-61-watt-blow-in-the-usb-c-gan-wars-1834586407
|
119
120
|
# description: <p> Description in html </p>
|
120
121
|
# publication_date: 2019-05-13 16:15:00.000000000 +00:00
|
121
|
-
# author:
|
122
|
-
# content:
|
122
|
+
# author:
|
123
|
+
# content:
|
123
124
|
```
|
124
125
|
|
125
126
|
## Options
|
127
|
+
|
126
128
|
You can send an additional parameter to the request method with a hash of options to filter the response of the feed.
|
127
129
|
|
128
130
|
### :ignore parameter
|
131
|
+
|
129
132
|
If you'd like to filter feeds that include the following words in the title, you can by doing the following:
|
130
133
|
|
131
134
|
```ruby
|
132
135
|
ignore = ["comment", "comments", "store", "corporate"]
|
133
136
|
|
134
137
|
url = "https://ignore-feed-website.com/rss"
|
135
|
-
rss = Arssene::Feed.request(url, { :
|
138
|
+
rss = Arssene::Feed.request(url, { ignore: ignore })
|
136
139
|
```
|
140
|
+
|
137
141
|
If Arssene finds that the feed is not relevant according to your parameters it will result in a change the 'relevant' property to false. Otherwise, by default all feeds return true for the 'relevant' property.
|
138
142
|
|
139
143
|
```ruby
|
@@ -144,6 +148,7 @@ puts feed.relevant
|
|
144
148
|
```
|
145
149
|
|
146
150
|
### :from_date parameter
|
151
|
+
|
147
152
|
You can specify the date from which you'd like to include entries. The :from_date parameter does NOT include the entries of the date sent.
|
148
153
|
|
149
154
|
```ruby
|
@@ -151,22 +156,24 @@ last_days = DateTime.now - 2
|
|
151
156
|
# => 2019-05-12T15:45:49+02:00
|
152
157
|
|
153
158
|
url = "https://www.kotaku.com/rss"
|
154
|
-
rss = Arssene::Feed.request(url, { :
|
159
|
+
rss = Arssene::Feed.request(url, { from_date: last_days })
|
155
160
|
```
|
161
|
+
|
156
162
|
Entries will include only from the date specifed all the way up to the newest. If you'd like to include the day you need, you can send an aditional day to the :from_date parmeter.
|
157
163
|
|
158
164
|
### :limit parameter
|
165
|
+
|
159
166
|
You can also specify a limit of entries that you'd like to receive for a given result.
|
160
167
|
|
161
168
|
```ruby
|
162
169
|
url = "https://www.kotaku.com/rss"
|
163
|
-
rss = Arssene::Feed.request(url, { :
|
170
|
+
rss = Arssene::Feed.request(url, { limit: 5 })
|
164
171
|
|
165
172
|
feed = rss[:channel]
|
166
173
|
|
167
174
|
# Should be the latest 5
|
168
175
|
puts feed.entries.length
|
169
|
-
# => 5
|
176
|
+
# => 5
|
170
177
|
```
|
171
178
|
|
172
179
|
You can also combine any of the three specified parameters to suit your request.
|
data/arssene.gemspec
CHANGED
@@ -1,44 +1,45 @@
|
|
1
|
-
|
2
|
-
lib = File.expand_path("../lib", __FILE__)
|
1
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
-
require
|
3
|
+
require 'arssene/version'
|
5
4
|
|
6
5
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name =
|
6
|
+
spec.name = 'arssene'
|
8
7
|
spec.version = Arssene::VERSION
|
9
|
-
spec.authors = [
|
10
|
-
spec.email = [
|
8
|
+
spec.authors = ['Vic Carrasco']
|
9
|
+
spec.email = ['vic@viccarrasco.com']
|
11
10
|
|
12
|
-
spec.summary =
|
13
|
-
spec.description =
|
14
|
-
spec.homepage =
|
15
|
-
spec.license =
|
11
|
+
spec.summary = 'Simple RSS solution for rails'
|
12
|
+
spec.description = 'Gem for retrieving entries from RSS feeds given the URL of a feed or website'
|
13
|
+
spec.homepage = 'https://github.com/viccarrasco/arssene'
|
14
|
+
spec.license = 'MIT'
|
16
15
|
|
17
16
|
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
18
17
|
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
19
18
|
if spec.respond_to?(:metadata)
|
20
|
-
spec.metadata[
|
19
|
+
spec.metadata['allowed_push_host'] = 'https://rubygems.org'
|
21
20
|
|
22
|
-
spec.metadata[
|
23
|
-
spec.metadata[
|
24
|
-
spec.metadata[
|
21
|
+
spec.metadata['homepage_uri'] = spec.homepage
|
22
|
+
spec.metadata['source_code_uri'] = 'https://github.com/viccarrasco/arssene'
|
23
|
+
spec.metadata['changelog_uri'] = 'https://github.com/viccarrasco/arssene/blob/master/CHANGE_LOG.md'
|
25
24
|
else
|
26
|
-
raise
|
27
|
-
|
25
|
+
raise 'RubyGems 2.0 or newer is required to protect against ' \
|
26
|
+
'public gem pushes.'
|
28
27
|
end
|
29
28
|
|
30
29
|
# Specify which files should be added to the gem when it is released.
|
31
30
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
32
|
-
spec.files
|
31
|
+
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
33
32
|
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
34
33
|
end
|
35
|
-
spec.bindir =
|
34
|
+
spec.bindir = 'exe'
|
36
35
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
37
|
-
spec.require_paths = [
|
36
|
+
spec.require_paths = ['lib']
|
38
37
|
|
39
|
-
spec.add_development_dependency
|
40
|
-
spec.add_development_dependency
|
41
|
-
spec.add_development_dependency
|
42
|
-
spec.add_dependency
|
43
|
-
spec.add_dependency
|
38
|
+
spec.add_development_dependency 'bundler', '~> 2.0'
|
39
|
+
spec.add_development_dependency 'rake', '~> 13.0'
|
40
|
+
spec.add_development_dependency 'rspec', '~> 3.0'
|
41
|
+
spec.add_dependency 'faraday'
|
42
|
+
spec.add_dependency 'mechanize'
|
43
|
+
spec.add_dependency 'parallel'
|
44
|
+
spec.add_dependency 'sanitize'
|
44
45
|
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Arssene
|
4
|
+
class Fetch
|
5
|
+
def initialize
|
6
|
+
@channel_repository = Arssene::ChannelRepository.new
|
7
|
+
end
|
8
|
+
|
9
|
+
def request(urls, **filters)
|
10
|
+
klass = urls.class
|
11
|
+
|
12
|
+
return fetch_channel(urls, filters) if klass == String
|
13
|
+
|
14
|
+
Parallel.map(urls) { |url| fetch_channel(url, filters) }
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def fetch_channel(urls, **filters)
|
20
|
+
feed = channel_repository
|
21
|
+
.fetch_as_channel(urls)
|
22
|
+
|
23
|
+
Arssene::ChannelPresenter.new(feed).filter(filters)
|
24
|
+
end
|
25
|
+
|
26
|
+
attr_reader :channel_repository
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Arssene
|
4
|
+
class Ping
|
5
|
+
def initialize
|
6
|
+
@feed_repository = Arssene::FeedRepository.new
|
7
|
+
end
|
8
|
+
|
9
|
+
def request(urls)
|
10
|
+
klass = urls.class
|
11
|
+
|
12
|
+
return retrieve_feed_urls(urls) if klass == String
|
13
|
+
|
14
|
+
Parallel.map(urls) { |url| retrieve_feed_urls(url).first }
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def retrieve_feed_urls(urls)
|
20
|
+
feed_repository.retrieve_feed_urls(urls)
|
21
|
+
end
|
22
|
+
|
23
|
+
attr_reader :feed_repository
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Arssene
|
4
|
+
class ChannelPresenter
|
5
|
+
attr_accessor :channel
|
6
|
+
|
7
|
+
def initialize(channel)
|
8
|
+
@channel = channel
|
9
|
+
end
|
10
|
+
|
11
|
+
def filter(options)
|
12
|
+
return channel unless options
|
13
|
+
|
14
|
+
filter_by_ignore(options) if options[:ignore]
|
15
|
+
filter_by_date(options) if options[:from_date]
|
16
|
+
limit(options) if options[:limit]
|
17
|
+
|
18
|
+
channel
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def filter_by_ignore(options)
|
24
|
+
return if channel.entries.empty?
|
25
|
+
|
26
|
+
ignore_options = options[:ignore]
|
27
|
+
|
28
|
+
title = channel.title.downcase.split.join
|
29
|
+
|
30
|
+
ignored = ignore_options.is_a?(Array) ? ignore_options.join('|') : ignore_options
|
31
|
+
rxp = /.?(#{ignored}).?/
|
32
|
+
|
33
|
+
channel.relevant = (rxp.match(title) == false || rxp.match(title).nil?)
|
34
|
+
end
|
35
|
+
|
36
|
+
def filter_by_date(options)
|
37
|
+
return if channel.entries.empty?
|
38
|
+
|
39
|
+
from_date_option = options[:from_date]
|
40
|
+
|
41
|
+
index = channel.entries
|
42
|
+
.index { |entry| entry.publication_date == from_date_option }
|
43
|
+
|
44
|
+
channel.entries.slice!(0..index) if index
|
45
|
+
end
|
46
|
+
|
47
|
+
def limit(options)
|
48
|
+
return if channel.entries.empty?
|
49
|
+
|
50
|
+
limit_quantity = options[:limit]
|
51
|
+
|
52
|
+
channel.entries = channel.entries.last(limit_quantity) if channel.entries.length > limit_quantity
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Arssene
|
4
|
+
class EntryPresenter
|
5
|
+
def as_entries(handler:, items:)
|
6
|
+
items.map do |item|
|
7
|
+
entry = handler
|
8
|
+
entry.title = item.title
|
9
|
+
entry.link = item.link
|
10
|
+
entry.description = item.description
|
11
|
+
entry.publication_date = item.pubDate
|
12
|
+
entry.author = item.author
|
13
|
+
entry.content = generate_content(item)
|
14
|
+
entry
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def generate_content(item)
|
21
|
+
if item.respond_to?('content')
|
22
|
+
item.content
|
23
|
+
elsif item.respond_to?('content_encoded')
|
24
|
+
item.content_encoded
|
25
|
+
else
|
26
|
+
entry.description
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Arssene
|
4
|
+
class ChannelRepository
|
5
|
+
def fetch_as_channel(url)
|
6
|
+
begin
|
7
|
+
channel = Arssene::Channel.new
|
8
|
+
|
9
|
+
response = Faraday.get(url)
|
10
|
+
raise "Unable to reach feed: #{url} | error corde: #{response.status}" unless response.status == 200
|
11
|
+
|
12
|
+
feed = RSS::Parser.parse(response.body)
|
13
|
+
url = URI.parse(feed.channel.link)
|
14
|
+
channel.title = feed.channel.title
|
15
|
+
channel.link = feed.channel.link
|
16
|
+
channel.host = url.host
|
17
|
+
channel.feed_type = feed.feed_type
|
18
|
+
channel.feed_version = feed.feed_version
|
19
|
+
channel.description = feed.channel.description
|
20
|
+
channel.publication_date = feed.channel.pubDate
|
21
|
+
channel.language = feed.channel.language ? feed.channel.language.downcase : nil
|
22
|
+
channel.copyright = feed.channel.copyright
|
23
|
+
channel.entries = extract_items feed
|
24
|
+
channel.meta = feed
|
25
|
+
channel.relevant = true
|
26
|
+
rescue StandardError => e
|
27
|
+
return ({ error: e.to_s })
|
28
|
+
end
|
29
|
+
channel
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def extract_items(feed)
|
35
|
+
entries_presenter = Arssene::EntryPresenter.new
|
36
|
+
entries_presenter.as_entries(handler: Entry.new, items: feed.items)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Arssene
|
4
|
+
class FeedRepository
|
5
|
+
def initialize
|
6
|
+
@agent = Mechanize.new
|
7
|
+
end
|
8
|
+
|
9
|
+
def retrieve_feed_urls(urls)
|
10
|
+
pinged = []
|
11
|
+
embeded_links = embeded_html_source_links(urls)
|
12
|
+
raise 'Non existing feeds' if embeded_links.empty?
|
13
|
+
|
14
|
+
pinged = embeded_links.map do |feed|
|
15
|
+
{ feed: feed.attr('href').split.join }
|
16
|
+
end
|
17
|
+
rescue StandardError => e
|
18
|
+
pinged.push(error: e.to_s)
|
19
|
+
ensure
|
20
|
+
pinged
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def embeded_html_source_links(url)
|
26
|
+
site = agent.get URI.parse(url)
|
27
|
+
site.search(".//link[@type='application/rss+xml']")
|
28
|
+
end
|
29
|
+
|
30
|
+
attr_reader :agent
|
31
|
+
end
|
32
|
+
end
|
data/lib/arssene/version.rb
CHANGED
data/lib/arssene.rb
CHANGED
@@ -1,9 +1,21 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'arssene/version'
|
3
4
|
require 'rss'
|
4
|
-
require '
|
5
|
-
require 'sanitize'
|
5
|
+
require 'faraday'
|
6
6
|
require 'mechanize'
|
7
|
+
require 'parallel'
|
8
|
+
require 'sanitize'
|
9
|
+
|
10
|
+
require 'arssene/lib/models/channel'
|
11
|
+
require 'arssene/lib/models/entry'
|
12
|
+
require 'arssene/lib/interactions/feed'
|
13
|
+
require 'arssene/lib/interactions/ping'
|
14
|
+
require 'arssene/lib/interactions/fetch'
|
15
|
+
require 'arssene/lib/repositories/feed_repository'
|
16
|
+
require 'arssene/lib/repositories/channel_repository'
|
17
|
+
require 'arssene/lib/presenters/entry_presenter'
|
18
|
+
require 'arssene/lib/presenters/channel_presenter'
|
7
19
|
module Arssene
|
8
20
|
class Error < StandardError; end
|
9
21
|
# Your code goes here...
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arssene
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Vic Carrasco
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-10-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '13.0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '13.0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rspec
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '3.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: faraday
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: mechanize
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -66,6 +80,20 @@ dependencies:
|
|
66
80
|
- - ">="
|
67
81
|
- !ruby/object:Gem::Version
|
68
82
|
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: parallel
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
69
97
|
- !ruby/object:Gem::Dependency
|
70
98
|
name: sanitize
|
71
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -90,6 +118,7 @@ extra_rdoc_files: []
|
|
90
118
|
files:
|
91
119
|
- ".gitignore"
|
92
120
|
- ".rspec"
|
121
|
+
- ".ruby-version"
|
93
122
|
- ".travis.yml"
|
94
123
|
- CHANGE_LOG.md
|
95
124
|
- CODE_OF_CONDUCT.md
|
@@ -102,7 +131,15 @@ files:
|
|
102
131
|
- bin/console
|
103
132
|
- bin/setup
|
104
133
|
- lib/arssene.rb
|
105
|
-
- lib/arssene/lib/
|
134
|
+
- lib/arssene/lib/interactions/feed.rb
|
135
|
+
- lib/arssene/lib/interactions/fetch.rb
|
136
|
+
- lib/arssene/lib/interactions/ping.rb
|
137
|
+
- lib/arssene/lib/models/channel.rb
|
138
|
+
- lib/arssene/lib/models/entry.rb
|
139
|
+
- lib/arssene/lib/presenters/channel_presenter.rb
|
140
|
+
- lib/arssene/lib/presenters/entry_presenter.rb
|
141
|
+
- lib/arssene/lib/repositories/channel_repository.rb
|
142
|
+
- lib/arssene/lib/repositories/feed_repository.rb
|
106
143
|
- lib/arssene/version.rb
|
107
144
|
homepage: https://github.com/viccarrasco/arssene
|
108
145
|
licenses:
|
@@ -127,8 +164,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
127
164
|
- !ruby/object:Gem::Version
|
128
165
|
version: '0'
|
129
166
|
requirements: []
|
130
|
-
|
131
|
-
rubygems_version: 2.7.7
|
167
|
+
rubygems_version: 3.1.4
|
132
168
|
signing_key:
|
133
169
|
specification_version: 4
|
134
170
|
summary: Simple RSS solution for rails
|
data/lib/arssene/lib/rss.rb
DELETED
@@ -1,175 +0,0 @@
|
|
1
|
-
module Arssene
|
2
|
-
class Channel
|
3
|
-
attr_accessor :title
|
4
|
-
attr_accessor :link
|
5
|
-
attr_accessor :host
|
6
|
-
attr_accessor :feed_type
|
7
|
-
attr_accessor :feed_version
|
8
|
-
attr_accessor :description
|
9
|
-
attr_accessor :publication_date
|
10
|
-
attr_accessor :language
|
11
|
-
attr_accessor :copyright
|
12
|
-
attr_accessor :entries
|
13
|
-
attr_accessor :meta
|
14
|
-
attr_accessor :relevant
|
15
|
-
end
|
16
|
-
|
17
|
-
class Entry
|
18
|
-
attr_accessor :title
|
19
|
-
attr_accessor :link
|
20
|
-
attr_accessor :description
|
21
|
-
attr_accessor :content
|
22
|
-
attr_accessor :publication_date
|
23
|
-
attr_accessor :author
|
24
|
-
end
|
25
|
-
|
26
|
-
class Feed
|
27
|
-
def self.ping(uri)
|
28
|
-
response = []
|
29
|
-
begin
|
30
|
-
if uri.is_a?(String)
|
31
|
-
feed_uris = Feed.retrieve(uri)
|
32
|
-
if feed_uris
|
33
|
-
feed_uris.each do |feed|
|
34
|
-
link = (feed.attr('href')).split.join
|
35
|
-
response.push({:feed => link})
|
36
|
-
end
|
37
|
-
end
|
38
|
-
elsif uri.is_a?(Array)
|
39
|
-
uri.each do |url|
|
40
|
-
Thread.new {
|
41
|
-
begin
|
42
|
-
feed_uris = Feed.retrieve(url)
|
43
|
-
if feed_uris
|
44
|
-
feed_uris.each do |feed|
|
45
|
-
link = (feed.attr('href')).split.join
|
46
|
-
response.push({:feed => link})
|
47
|
-
end
|
48
|
-
end
|
49
|
-
rescue => exception
|
50
|
-
response.push({:error => exception.to_s})
|
51
|
-
end
|
52
|
-
}.join
|
53
|
-
end
|
54
|
-
else
|
55
|
-
[]
|
56
|
-
end
|
57
|
-
response
|
58
|
-
rescue => exception
|
59
|
-
response.push({:error => exception})
|
60
|
-
end
|
61
|
-
response
|
62
|
-
end
|
63
|
-
|
64
|
-
def self.request(uri, options = {})
|
65
|
-
response = []
|
66
|
-
|
67
|
-
begin
|
68
|
-
if uri.is_a?(String)
|
69
|
-
channel = fetch(uri, options)
|
70
|
-
if (channel.is_a?(Hash) && channel.has_key?(:error))
|
71
|
-
return ({:error => channel[:error]})
|
72
|
-
else
|
73
|
-
channel = self.filter_by_options(channel, options)
|
74
|
-
return ({:feed => uri, :channel => channel})
|
75
|
-
end
|
76
|
-
elsif uri.is_a?(Array)
|
77
|
-
uri.each do |url|
|
78
|
-
Thread.new {
|
79
|
-
channel = fetch(url, options)
|
80
|
-
if (channel.is_a?(Hash) && channel.has_key?(:error))
|
81
|
-
response.push({:error => channel[:error]})
|
82
|
-
else
|
83
|
-
channel = self.filter_by_options(channel, options)
|
84
|
-
response.push({:feed => url, :channel => channel})
|
85
|
-
end
|
86
|
-
}.join
|
87
|
-
end
|
88
|
-
else
|
89
|
-
[]
|
90
|
-
end
|
91
|
-
rescue => exception
|
92
|
-
response.push({:error => exception.to_s})
|
93
|
-
end
|
94
|
-
response
|
95
|
-
end
|
96
|
-
|
97
|
-
private
|
98
|
-
def self.fetch(uri, options)
|
99
|
-
begin
|
100
|
-
channel = Channel.new
|
101
|
-
open(uri) do |rss|
|
102
|
-
feed = RSS::Parser.parse(rss)
|
103
|
-
uri = URI.parse(feed.channel.link)
|
104
|
-
channel.title = feed.channel.title
|
105
|
-
channel.link = feed.channel.link
|
106
|
-
channel.host = uri.host
|
107
|
-
channel.feed_type = feed.feed_type
|
108
|
-
channel.feed_version= feed.feed_version
|
109
|
-
channel.description = feed.channel.description
|
110
|
-
channel.publication_date = feed.channel.pubDate
|
111
|
-
channel.language = (feed.channel.language) ? feed.channel.language.downcase : nil
|
112
|
-
channel.copyright = feed.channel.copyright
|
113
|
-
channel.entries = extract_items feed
|
114
|
-
channel.meta = feed
|
115
|
-
channel.relevant = true
|
116
|
-
end
|
117
|
-
rescue => exception
|
118
|
-
return ({ :error => exception.to_s })
|
119
|
-
end
|
120
|
-
channel
|
121
|
-
end
|
122
|
-
|
123
|
-
def self.filter_by_options(channel, options)
|
124
|
-
if options.has_key?(:ignore)
|
125
|
-
title = channel.title.downcase.split.join
|
126
|
-
ignore = options[:ignore]
|
127
|
-
ignore = (ignore.is_a?(Array)) ? (ignore = ignore.join('|')) : ignore
|
128
|
-
rxp = /.?(#{ignore}).?/
|
129
|
-
|
130
|
-
channel.relevant = (rxp.match(title) == false || rxp.match(title) == nil)
|
131
|
-
end
|
132
|
-
|
133
|
-
if options.has_key?(:from_date)
|
134
|
-
if channel.entries.length > 0
|
135
|
-
index = channel.entries.index {|entry| entry.publication_date == options[:from_date]}
|
136
|
-
channel.entries = (index) ? channel.entries.slice(0..index) : channel.entries
|
137
|
-
end
|
138
|
-
end
|
139
|
-
|
140
|
-
if options.has_key?(:limit)
|
141
|
-
limit = (options[:limit]-1)
|
142
|
-
channel.entries = channel.entries.slice(0..limit) if (channel.entries.length > limit)
|
143
|
-
end
|
144
|
-
channel
|
145
|
-
end
|
146
|
-
|
147
|
-
def self.retrieve(uri)
|
148
|
-
agent = Mechanize.new
|
149
|
-
uri = URI.parse(uri)
|
150
|
-
site = agent.get(uri)
|
151
|
-
site.search(".//link[@type='application/rss+xml']")
|
152
|
-
end
|
153
|
-
|
154
|
-
def self.extract_items(feed)
|
155
|
-
items = []
|
156
|
-
feed.items.each do |i|
|
157
|
-
entry = Entry.new
|
158
|
-
entry.title = i.title
|
159
|
-
entry.link = i.link
|
160
|
-
entry.description = i.description
|
161
|
-
entry.publication_date = i.pubDate
|
162
|
-
entry.author = i.author
|
163
|
-
if i.respond_to?("content")
|
164
|
-
entry.content = i.content
|
165
|
-
elsif i.respond_to?("content_encoded")
|
166
|
-
entry.content = i.content_encoded
|
167
|
-
else
|
168
|
-
entry.content = entry.description
|
169
|
-
end
|
170
|
-
items.push(entry)
|
171
|
-
end
|
172
|
-
items
|
173
|
-
end
|
174
|
-
end
|
175
|
-
end
|