logstash-input-multirss 1.0.1 → 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +43 -0
- data/lib/logstash/inputs/multirss.rb +11 -10
- data/logstash-input-multirss.gemspec +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8e9c37035e249154e18a6791d97ed293dae87c8fe7c50dfe615362df6271be84
|
4
|
+
data.tar.gz: be4e1b502d9c7d6d502e1c56f01e339e250a402885fd03e4824a39f467366682
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 25d8d59b55f0f40faab1d0b65bbe06811875516a8ef6547879b188622267983c81ce47c9408c2af2a120b5f3312b2398dc10684dffaef19593e8cf884e813a8e
|
7
|
+
data.tar.gz: 4ffc47877ab4d21928b3d3cb4a96531f1d660d6bb113ac72010a5ee3ee99f647bfeea02398c548a7c3993acc7e77cbc0ad34cef44cea23dcd053a5bbb42400fc
|
data/README.md
CHANGED
@@ -4,6 +4,49 @@ This is a plugin for [Logstash](https://github.com/elastic/logstash).
|
|
4
4
|
|
5
5
|
It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way.
|
6
6
|
|
7
|
+
# Install
|
8
|
+
|
9
|
+
U can install the plugin from https://rubygems.org/gems/logstash-input-multirss , or build it yuouself in a logstash service or container with :
|
10
|
+
|
11
|
+
git clone https://github.com/felixramirezgarcia/logstash-input-multirss
|
12
|
+
rm logstash-input-multirss-[nº_version].gem
|
13
|
+
ruby -S gem build logstash-input-multirss.gemspec
|
14
|
+
logstash-plugin install logstash-input-multirss-[nº_version].gem
|
15
|
+
|
16
|
+
# Pipeline Example
|
17
|
+
|
18
|
+
input {
|
19
|
+
multirss
|
20
|
+
{
|
21
|
+
multi_feed => ['https://www.diaridegirona.cat/servicios/rss/rss.jsp','https://www.elconfidencial.com/rss/','https://www.20minutos.es/sindicacion/','http://www.europapress.es/contenidosrss/','https://www.abc.es/rss/','https://servicios.elpais.com/rss/','http://www.expansion.com/rss/','https://cronicaglobal.elespanol.com/rss.html','https://www.diarioinformacion.com/servicios/rss/rss.jsp']
|
22
|
+
|
23
|
+
one_feed => ['http://www.finanzas.com/rss/noticiasportada.xml','https://www.invertia.com/es/rss/-/rss/getHome','https://www.invertia.com/es/rss/-/rss/getHomeExtra','https://www.invertia.com/es/rss/-/rss/getNews?_rss_WAR_ivrssportlet_category=mercados']
|
24
|
+
|
25
|
+
interval => 1000
|
26
|
+
|
27
|
+
blacklist => ['google.com','yahoo.com','live.com','netvibes.com','bloglines.com','feedly.com','/atom']
|
28
|
+
}
|
29
|
+
}
|
30
|
+
|
31
|
+
filter {
|
32
|
+
|
33
|
+
}
|
34
|
+
|
35
|
+
|
36
|
+
output {
|
37
|
+
file {
|
38
|
+
path => "/tmp/rss_links.json"
|
39
|
+
codec => "json"
|
40
|
+
}
|
41
|
+
}
|
42
|
+
|
43
|
+
|
44
|
+
|
45
|
+
The difference between the attributes multi_feed and one_feed is that the multi_feed is the URI of the parent address where several rss (xml) are found. For the case where you want to explore only one of those links you can use the one_feed attribute. A visual example can be seen by visiting the following links:
|
46
|
+
|
47
|
+
Father (multi_feed) => http://rss.elmundo.es/rss/
|
48
|
+
Son (one_feed) => http://estaticos.elmundo.es/elmundo/rss/portada.xml
|
49
|
+
|
7
50
|
## Documentation
|
8
51
|
|
9
52
|
Logstash provides infrastructure to automatically generate documentation for this plugin. We use the asciidoc format to write documentation so any comments in the source code will be first converted into asciidoc and then into html. All plugin documentation are placed under one [central location](http://www.elastic.co/guide/en/logstash/current/).
|
@@ -5,9 +5,10 @@ require "stud/interval"
|
|
5
5
|
require "net/http"
|
6
6
|
require "uri"
|
7
7
|
require "mechanize"
|
8
|
-
require "rss"
|
9
8
|
require "nokogiri"
|
10
9
|
|
10
|
+
#if you want to debug it you just have to uncomment the puts
|
11
|
+
|
11
12
|
class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
12
13
|
config_name "multirss"
|
13
14
|
|
@@ -22,7 +23,7 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
|
22
23
|
#Set de interval for stoppable_sleep
|
23
24
|
config :interval, :validate => :number, :default => 3600
|
24
25
|
|
25
|
-
#Set de black list to forget
|
26
|
+
#Set de black list to forget read
|
26
27
|
config :blacklist, :validate => :array, :default => []
|
27
28
|
|
28
29
|
public
|
@@ -39,7 +40,7 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
|
39
40
|
while !stop?
|
40
41
|
|
41
42
|
@multi_feed.each do |rss|
|
42
|
-
puts "Read parent: " + rss
|
43
|
+
#puts "Read parent: " + rss
|
43
44
|
begin
|
44
45
|
page = @agent.get(rss)
|
45
46
|
page.links.each do |link|
|
@@ -48,16 +49,16 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
|
48
49
|
end
|
49
50
|
end
|
50
51
|
rescue
|
51
|
-
puts "Fail to get " + rss + "
|
52
|
+
#puts "Fail to get " + rss + " childrens links"
|
52
53
|
end
|
53
54
|
|
54
55
|
links = urls.uniq
|
55
56
|
links.each do |link|
|
56
57
|
begin
|
57
58
|
response_link(link,queue)
|
58
|
-
puts "Read clidren: " + link
|
59
|
+
#puts "Read clidren: " + link
|
59
60
|
rescue
|
60
|
-
puts "Fail to get " + link + " children"
|
61
|
+
#puts "Fail to get " + link + " children"
|
61
62
|
next
|
62
63
|
end
|
63
64
|
end
|
@@ -73,9 +74,9 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
|
73
74
|
all_links.each do |link|
|
74
75
|
begin
|
75
76
|
response_link(link,queue)
|
76
|
-
puts "Read clidren: " + link
|
77
|
+
#puts "Read clidren: " + link
|
77
78
|
rescue
|
78
|
-
puts "Fail to get " + link
|
79
|
+
#puts "Fail to get " + link
|
79
80
|
next
|
80
81
|
end # begin
|
81
82
|
end # all_links loop
|
@@ -115,8 +116,8 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
|
115
116
|
|
116
117
|
@logger.error("Error : ", :exception => ex)
|
117
118
|
rescue => exc
|
118
|
-
puts "ERROR"
|
119
|
-
|
119
|
+
#puts "ERROR"
|
120
|
+
#@logger.error("Uknown error while parsing the feed", :exception => exc)
|
120
121
|
end # begin
|
121
122
|
end # def response_link
|
122
123
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'logstash-input-multirss'
|
3
|
-
s.version = '1.0.
|
3
|
+
s.version = '1.0.2'
|
4
4
|
s.licenses = ['Apache-2.0']
|
5
5
|
s.summary = 'Simple multi rss plugin'
|
6
6
|
s.description = 'This plugin needs a list of links of different rss. Get all the links of the main feed pages and get all the content of each of the links.'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-input-multirss
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Felix R G
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-08-
|
11
|
+
date: 2018-08-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|