logstash-input-multirss 1.0.2 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -26
- data/lib/logstash/inputs/multirss.rb +15 -9
- data/logstash-input-multirss.gemspec +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 13067ead10a7d5f0f79e4917f438fd4ab6bb94dd52ee37d216cecd57fd2b9c01
|
4
|
+
data.tar.gz: 17c13ebebe0b913e9e79d77e1ef3093dc4cc2d3af16ea122e2c9ac6a61315d89
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b37d8389d3b92271e53160ed809695fb9dcdc3158f04270f6bf722640837b9982d9933db17a6147c2d8704b25778d752874e4cdcb5c9c58483b41638c8846e7b
|
7
|
+
data.tar.gz: b79f50fad25ebe755440e8b421d47ef6b2dc38b770fbabb03fb13a9d8411d33681995ba7d2e3ab1d3c0820bd1190ab3c9258899e7abfb6f4b14f6d848398f087
|
data/README.md
CHANGED
@@ -9,38 +9,16 @@ It is fully free and fully open source. The license is Apache 2.0, meaning you a
|
|
9
9
|
U can install the plugin from https://rubygems.org/gems/logstash-input-multirss , or build it yuouself in a logstash service or container with :
|
10
10
|
|
11
11
|
git clone https://github.com/felixramirezgarcia/logstash-input-multirss
|
12
|
+
|
12
13
|
rm logstash-input-multirss-[nº_version].gem
|
14
|
+
|
13
15
|
ruby -S gem build logstash-input-multirss.gemspec
|
16
|
+
|
14
17
|
logstash-plugin install logstash-input-multirss-[nº_version].gem
|
15
18
|
|
16
19
|
# Pipeline Example
|
17
20
|
|
18
|
-
input
|
19
|
-
multirss
|
20
|
-
{
|
21
|
-
multi_feed => ['https://www.diaridegirona.cat/servicios/rss/rss.jsp','https://www.elconfidencial.com/rss/','https://www.20minutos.es/sindicacion/','http://www.europapress.es/contenidosrss/','https://www.abc.es/rss/','https://servicios.elpais.com/rss/','http://www.expansion.com/rss/','https://cronicaglobal.elespanol.com/rss.html','https://www.diarioinformacion.com/servicios/rss/rss.jsp']
|
22
|
-
|
23
|
-
one_feed => ['http://www.finanzas.com/rss/noticiasportada.xml','https://www.invertia.com/es/rss/-/rss/getHome','https://www.invertia.com/es/rss/-/rss/getHomeExtra','https://www.invertia.com/es/rss/-/rss/getNews?_rss_WAR_ivrssportlet_category=mercados']
|
24
|
-
|
25
|
-
interval => 1000
|
26
|
-
|
27
|
-
blacklist => ['google.com','yahoo.com','live.com','netvibes.com','bloglines.com','feedly.com','/atom']
|
28
|
-
}
|
29
|
-
}
|
30
|
-
|
31
|
-
filter {
|
32
|
-
|
33
|
-
}
|
34
|
-
|
35
|
-
|
36
|
-
output {
|
37
|
-
file {
|
38
|
-
path => "/tmp/rss_links.json"
|
39
|
-
codec => "json"
|
40
|
-
}
|
41
|
-
}
|
42
|
-
|
43
|
-
|
21
|
+
You can see a example in https://github.com/felixramirezgarcia/logstash-input-multirss/blob/master/example-pipeline.conf
|
44
22
|
|
45
23
|
The difference between the attributes multi_feed and one_feed is that the multi_feed is the URI of the parent address where several rss (xml) are found. For the case where you want to explore only one of those links you can use the one_feed attribute. A visual example can be seen by visiting the following links:
|
46
24
|
|
@@ -5,6 +5,7 @@ require "stud/interval"
|
|
5
5
|
require "net/http"
|
6
6
|
require "uri"
|
7
7
|
require "mechanize"
|
8
|
+
require "rss"
|
8
9
|
require "nokogiri"
|
9
10
|
|
10
11
|
#if you want to debug it you just have to uncomment the puts
|
@@ -23,7 +24,7 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
|
23
24
|
#Set de interval for stoppable_sleep
|
24
25
|
config :interval, :validate => :number, :default => 3600
|
25
26
|
|
26
|
-
#Set de black list to forget read
|
27
|
+
#Set de black list to forget read
|
27
28
|
config :blacklist, :validate => :array, :default => []
|
28
29
|
|
29
30
|
public
|
@@ -40,7 +41,8 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
|
40
41
|
while !stop?
|
41
42
|
|
42
43
|
@multi_feed.each do |rss|
|
43
|
-
|
44
|
+
str = "Read parent: " + rss
|
45
|
+
#puts str
|
44
46
|
begin
|
45
47
|
page = @agent.get(rss)
|
46
48
|
page.links.each do |link|
|
@@ -49,16 +51,19 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
|
49
51
|
end
|
50
52
|
end
|
51
53
|
rescue
|
52
|
-
|
54
|
+
str = "Fail to get " + rss + " childrens links"
|
55
|
+
#puts str
|
53
56
|
end
|
54
57
|
|
55
58
|
links = urls.uniq
|
56
59
|
links.each do |link|
|
57
60
|
begin
|
58
61
|
response_link(link,queue)
|
59
|
-
|
62
|
+
str = "Read clidren: " + link
|
63
|
+
#puts str
|
60
64
|
rescue
|
61
|
-
|
65
|
+
str = "Fail to get " + link + " children"
|
66
|
+
#puts str
|
62
67
|
next
|
63
68
|
end
|
64
69
|
end
|
@@ -74,9 +79,11 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
|
74
79
|
all_links.each do |link|
|
75
80
|
begin
|
76
81
|
response_link(link,queue)
|
77
|
-
|
82
|
+
str = "Read clidren: " + link
|
83
|
+
#puts str
|
78
84
|
rescue
|
79
|
-
|
85
|
+
str = "Fail to get " + link
|
86
|
+
#puts str
|
80
87
|
next
|
81
88
|
end # begin
|
82
89
|
end # all_links loop
|
@@ -116,8 +123,7 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
|
116
123
|
|
117
124
|
@logger.error("Error : ", :exception => ex)
|
118
125
|
rescue => exc
|
119
|
-
|
120
|
-
#@logger.error("Uknown error while parsing the feed", :exception => exc)
|
126
|
+
@logger.error("Uknown error while parsing the feed", :exception => exc)
|
121
127
|
end # begin
|
122
128
|
end # def response_link
|
123
129
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'logstash-input-multirss'
|
3
|
-
s.version = '1.0.
|
3
|
+
s.version = '1.0.3'
|
4
4
|
s.licenses = ['Apache-2.0']
|
5
5
|
s.summary = 'Simple multi rss plugin'
|
6
6
|
s.description = 'This plugin needs a list of links of different rss. Get all the links of the main feed pages and get all the content of each of the links.'
|