logstash-input-multirss 1.0.1 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d3e3215896069197c28eeaf2bc1d97b8a7b922b5f629d33f15c82b6cbe84c20a
4
- data.tar.gz: d1391c39b3f2bf48888f93ca26633c821da10f439772408ac61de4ba9a561681
3
+ metadata.gz: 8e9c37035e249154e18a6791d97ed293dae87c8fe7c50dfe615362df6271be84
4
+ data.tar.gz: be4e1b502d9c7d6d502e1c56f01e339e250a402885fd03e4824a39f467366682
5
5
  SHA512:
6
- metadata.gz: acbfa422e853ab42d439c960064f45df5277e372da471b20aa60f6a78d508fd872d9674fb009413fd0aee88b774d88eebe4e68db355c1ae6b7fee21f1d17d0c8
7
- data.tar.gz: 0f1cf2179aa9722c65d610b6ea7c266866ad1719984cb89b280607c5fe3f6003688b55fcbbab33cb96255f517200ce5f637d8377c47600de3286b010a9597b7d
6
+ metadata.gz: 25d8d59b55f0f40faab1d0b65bbe06811875516a8ef6547879b188622267983c81ce47c9408c2af2a120b5f3312b2398dc10684dffaef19593e8cf884e813a8e
7
+ data.tar.gz: 4ffc47877ab4d21928b3d3cb4a96531f1d660d6bb113ac72010a5ee3ee99f647bfeea02398c548a7c3993acc7e77cbc0ad34cef44cea23dcd053a5bbb42400fc
data/README.md CHANGED
@@ -4,6 +4,49 @@ This is a plugin for [Logstash](https://github.com/elastic/logstash).
4
4
 
5
5
  It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way.
6
6
 
7
+ # Install
8
+
9
+ U can install the plugin from https://rubygems.org/gems/logstash-input-multirss , or build it yuouself in a logstash service or container with :
10
+
11
+ git clone https://github.com/felixramirezgarcia/logstash-input-multirss
12
+ rm logstash-input-multirss-[nº_version].gem
13
+ ruby -S gem build logstash-input-multirss.gemspec
14
+ logstash-plugin install logstash-input-multirss-[nº_version].gem
15
+
16
+ # Pipeline Example
17
+
18
+ input {
19
+ multirss
20
+ {
21
+ multi_feed => ['https://www.diaridegirona.cat/servicios/rss/rss.jsp','https://www.elconfidencial.com/rss/','https://www.20minutos.es/sindicacion/','http://www.europapress.es/contenidosrss/','https://www.abc.es/rss/','https://servicios.elpais.com/rss/','http://www.expansion.com/rss/','https://cronicaglobal.elespanol.com/rss.html','https://www.diarioinformacion.com/servicios/rss/rss.jsp']
22
+
23
+ one_feed => ['http://www.finanzas.com/rss/noticiasportada.xml','https://www.invertia.com/es/rss/-/rss/getHome','https://www.invertia.com/es/rss/-/rss/getHomeExtra','https://www.invertia.com/es/rss/-/rss/getNews?_rss_WAR_ivrssportlet_category=mercados']
24
+
25
+ interval => 1000
26
+
27
+ blacklist => ['google.com','yahoo.com','live.com','netvibes.com','bloglines.com','feedly.com','/atom']
28
+ }
29
+ }
30
+
31
+ filter {
32
+
33
+ }
34
+
35
+
36
+ output {
37
+ file {
38
+ path => "/tmp/rss_links.json"
39
+ codec => "json"
40
+ }
41
+ }
42
+
43
+
44
+
45
+ The difference between the attributes multi_feed and one_feed is that the multi_feed is the URI of the parent address where several rss (xml) are found. For the case where you want to explore only one of those links you can use the one_feed attribute. A visual example can be seen by visiting the following links:
46
+
47
+ Father (multi_feed) => http://rss.elmundo.es/rss/
48
+ Son (one_feed) => http://estaticos.elmundo.es/elmundo/rss/portada.xml
49
+
7
50
  ## Documentation
8
51
 
9
52
  Logstash provides infrastructure to automatically generate documentation for this plugin. We use the asciidoc format to write documentation so any comments in the source code will be first converted into asciidoc and then into html. All plugin documentation are placed under one [central location](http://www.elastic.co/guide/en/logstash/current/).
@@ -5,9 +5,10 @@ require "stud/interval"
5
5
  require "net/http"
6
6
  require "uri"
7
7
  require "mechanize"
8
- require "rss"
9
8
  require "nokogiri"
10
9
 
10
+ #if you want to debug it you just have to uncomment the puts
11
+
11
12
  class LogStash::Inputs::Multirss < LogStash::Inputs::Base
12
13
  config_name "multirss"
13
14
 
@@ -22,7 +23,7 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
22
23
  #Set de interval for stoppable_sleep
23
24
  config :interval, :validate => :number, :default => 3600
24
25
 
25
- #Set de black list to forget
26
+ #Set de black list to forget read
26
27
  config :blacklist, :validate => :array, :default => []
27
28
 
28
29
  public
@@ -39,7 +40,7 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
39
40
  while !stop?
40
41
 
41
42
  @multi_feed.each do |rss|
42
- puts "Read parent: " + rss
43
+ #puts "Read parent: " + rss
43
44
  begin
44
45
  page = @agent.get(rss)
45
46
  page.links.each do |link|
@@ -48,16 +49,16 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
48
49
  end
49
50
  end
50
51
  rescue
51
- puts "Fail to get " + rss + " feed"
52
+ #puts "Fail to get " + rss + " childrens links"
52
53
  end
53
54
 
54
55
  links = urls.uniq
55
56
  links.each do |link|
56
57
  begin
57
58
  response_link(link,queue)
58
- puts "Read clidren: " + link
59
+ #puts "Read clidren: " + link
59
60
  rescue
60
- puts "Fail to get " + link + " children"
61
+ #puts "Fail to get " + link + " children"
61
62
  next
62
63
  end
63
64
  end
@@ -73,9 +74,9 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
73
74
  all_links.each do |link|
74
75
  begin
75
76
  response_link(link,queue)
76
- puts "Read clidren: " + link
77
+ #puts "Read clidren: " + link
77
78
  rescue
78
- puts "Fail to get " + link
79
+ #puts "Fail to get " + link
79
80
  next
80
81
  end # begin
81
82
  end # all_links loop
@@ -115,8 +116,8 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
115
116
 
116
117
  @logger.error("Error : ", :exception => ex)
117
118
  rescue => exc
118
- puts "ERROR"
119
- @logger.error("Uknown error while parsing the feed", :exception => exc)
119
+ #puts "ERROR"
120
+ #@logger.error("Uknown error while parsing the feed", :exception => exc)
120
121
  end # begin
121
122
  end # def response_link
122
123
 
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-input-multirss'
3
- s.version = '1.0.1'
3
+ s.version = '1.0.2'
4
4
  s.licenses = ['Apache-2.0']
5
5
  s.summary = 'Simple multi rss plugin'
6
6
  s.description = 'This plugin needs a list of links of different rss. Get all the links of the main feed pages and get all the content of each of the links.'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-multirss
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Felix R G
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-08-16 00:00:00.000000000 Z
11
+ date: 2018-08-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement