logstash-input-multirss 1.0.2 → 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8e9c37035e249154e18a6791d97ed293dae87c8fe7c50dfe615362df6271be84
4
- data.tar.gz: be4e1b502d9c7d6d502e1c56f01e339e250a402885fd03e4824a39f467366682
3
+ metadata.gz: 13067ead10a7d5f0f79e4917f438fd4ab6bb94dd52ee37d216cecd57fd2b9c01
4
+ data.tar.gz: 17c13ebebe0b913e9e79d77e1ef3093dc4cc2d3af16ea122e2c9ac6a61315d89
5
5
  SHA512:
6
- metadata.gz: 25d8d59b55f0f40faab1d0b65bbe06811875516a8ef6547879b188622267983c81ce47c9408c2af2a120b5f3312b2398dc10684dffaef19593e8cf884e813a8e
7
- data.tar.gz: 4ffc47877ab4d21928b3d3cb4a96531f1d660d6bb113ac72010a5ee3ee99f647bfeea02398c548a7c3993acc7e77cbc0ad34cef44cea23dcd053a5bbb42400fc
6
+ metadata.gz: b37d8389d3b92271e53160ed809695fb9dcdc3158f04270f6bf722640837b9982d9933db17a6147c2d8704b25778d752874e4cdcb5c9c58483b41638c8846e7b
7
+ data.tar.gz: b79f50fad25ebe755440e8b421d47ef6b2dc38b770fbabb03fb13a9d8411d33681995ba7d2e3ab1d3c0820bd1190ab3c9258899e7abfb6f4b14f6d848398f087
data/README.md CHANGED
@@ -9,38 +9,16 @@ It is fully free and fully open source. The license is Apache 2.0, meaning you a
9
9
  U can install the plugin from https://rubygems.org/gems/logstash-input-multirss , or build it yuouself in a logstash service or container with :
10
10
 
11
11
  git clone https://github.com/felixramirezgarcia/logstash-input-multirss
12
+
12
13
  rm logstash-input-multirss-[nº_version].gem
14
+
13
15
  ruby -S gem build logstash-input-multirss.gemspec
16
+
14
17
  logstash-plugin install logstash-input-multirss-[nº_version].gem
15
18
 
16
19
  # Pipeline Example
17
20
 
18
- input {
19
- multirss
20
- {
21
- multi_feed => ['https://www.diaridegirona.cat/servicios/rss/rss.jsp','https://www.elconfidencial.com/rss/','https://www.20minutos.es/sindicacion/','http://www.europapress.es/contenidosrss/','https://www.abc.es/rss/','https://servicios.elpais.com/rss/','http://www.expansion.com/rss/','https://cronicaglobal.elespanol.com/rss.html','https://www.diarioinformacion.com/servicios/rss/rss.jsp']
22
-
23
- one_feed => ['http://www.finanzas.com/rss/noticiasportada.xml','https://www.invertia.com/es/rss/-/rss/getHome','https://www.invertia.com/es/rss/-/rss/getHomeExtra','https://www.invertia.com/es/rss/-/rss/getNews?_rss_WAR_ivrssportlet_category=mercados']
24
-
25
- interval => 1000
26
-
27
- blacklist => ['google.com','yahoo.com','live.com','netvibes.com','bloglines.com','feedly.com','/atom']
28
- }
29
- }
30
-
31
- filter {
32
-
33
- }
34
-
35
-
36
- output {
37
- file {
38
- path => "/tmp/rss_links.json"
39
- codec => "json"
40
- }
41
- }
42
-
43
-
21
+ You can see a example in https://github.com/felixramirezgarcia/logstash-input-multirss/blob/master/example-pipeline.conf
44
22
 
45
23
  The difference between the attributes multi_feed and one_feed is that the multi_feed is the URI of the parent address where several rss (xml) are found. For the case where you want to explore only one of those links you can use the one_feed attribute. A visual example can be seen by visiting the following links:
46
24
 
@@ -5,6 +5,7 @@ require "stud/interval"
5
5
  require "net/http"
6
6
  require "uri"
7
7
  require "mechanize"
8
+ require "rss"
8
9
  require "nokogiri"
9
10
 
10
11
  #if you want to debug it you just have to uncomment the puts
@@ -23,7 +24,7 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
23
24
  #Set de interval for stoppable_sleep
24
25
  config :interval, :validate => :number, :default => 3600
25
26
 
26
- #Set de black list to forget read
27
+ #Set de black list to forget read
27
28
  config :blacklist, :validate => :array, :default => []
28
29
 
29
30
  public
@@ -40,7 +41,8 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
40
41
  while !stop?
41
42
 
42
43
  @multi_feed.each do |rss|
43
- #puts "Read parent: " + rss
44
+ str = "Read parent: " + rss
45
+ #puts str
44
46
  begin
45
47
  page = @agent.get(rss)
46
48
  page.links.each do |link|
@@ -49,16 +51,19 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
49
51
  end
50
52
  end
51
53
  rescue
52
- #puts "Fail to get " + rss + " childrens links"
54
+ str = "Fail to get " + rss + " childrens links"
55
+ #puts str
53
56
  end
54
57
 
55
58
  links = urls.uniq
56
59
  links.each do |link|
57
60
  begin
58
61
  response_link(link,queue)
59
- #puts "Read clidren: " + link
62
+ str = "Read clidren: " + link
63
+ #puts str
60
64
  rescue
61
- #puts "Fail to get " + link + " children"
65
+ str = "Fail to get " + link + " children"
66
+ #puts str
62
67
  next
63
68
  end
64
69
  end
@@ -74,9 +79,11 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
74
79
  all_links.each do |link|
75
80
  begin
76
81
  response_link(link,queue)
77
- #puts "Read clidren: " + link
82
+ str = "Read clidren: " + link
83
+ #puts str
78
84
  rescue
79
- #puts "Fail to get " + link
85
+ str = "Fail to get " + link
86
+ #puts str
80
87
  next
81
88
  end # begin
82
89
  end # all_links loop
@@ -116,8 +123,7 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
116
123
 
117
124
  @logger.error("Error : ", :exception => ex)
118
125
  rescue => exc
119
- #puts "ERROR"
120
- #@logger.error("Uknown error while parsing the feed", :exception => exc)
126
+ @logger.error("Uknown error while parsing the feed", :exception => exc)
121
127
  end # begin
122
128
  end # def response_link
123
129
 
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-input-multirss'
3
- s.version = '1.0.2'
3
+ s.version = '1.0.3'
4
4
  s.licenses = ['Apache-2.0']
5
5
  s.summary = 'Simple multi rss plugin'
6
6
  s.description = 'This plugin needs a list of links of different rss. Get all the links of the main feed pages and get all the content of each of the links.'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-multirss
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Felix R G