logstash-input-multirss 1.0.5 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 39f775e02e37041ac3235927b046b8c20633cfcda938336bcfcfe22d30832352
4
- data.tar.gz: a95dd45d23116dfa3aeccb1f699cbfb84b6bdd70e39321f215a6836323a44a38
3
+ metadata.gz: 48be80f785a5000cf522c4717ba16872e8547082844d38d032698b2813ddf2af
4
+ data.tar.gz: 3e37d5fcddafc3cff0cb9dd6c4d488ae6a1fd40c9aad1219f8938771f7fd1ed2
5
5
  SHA512:
6
- metadata.gz: f4e00ebcbb6b1652efcc4ed5c1c913c57cc3fb044482c03d79620373e6a54df7ae2f160341b960baad5483b6bc3654f42d35eb449a3cac0cf5f30b5de60a63fa
7
- data.tar.gz: a76cee32268fd1bfefdbd8d7db4584d2c280efd2c95493abd92fa0e805d54c43fbfeaa6be10005dbd89768e32295de96c6307ba95995c9f5dcb4588d899b434c
6
+ metadata.gz: b2795603d6db7056798272912fc7986e410aad8cabdfda6fdce74a5e7dc26ccbfe621a13817f191ff8a04bfa50ffe193b060a661e80ec9d6ba53cf70acc8f38c
7
+ data.tar.gz: 1b83d04df5d8f0a35003f4c1287f7f800e1a4a3792c87ac58b7a6ca151f60ef0a31c20d940a663468aae62327820a2540995756e63684d3ad302d7e4e9310ab7
data/README.md CHANGED
@@ -23,7 +23,6 @@ You can see a example in https://github.com/felixramirezgarcia/logstash-input-mu
23
23
  The difference between the attributes multi_feed and one_feed is that the multi_feed is the URI of the parent address where several rss (xml) are found. For the case where you want to explore only one of those links you can use the one_feed attribute. A visual example can be seen by visiting the following links:
24
24
 
25
25
  Father (multi_feed) => http://rss.elmundo.es/rss/
26
-
27
26
  Son (one_feed) => http://estaticos.elmundo.es/elmundo/rss/portada.xml
28
27
 
29
28
  ## Documentation
@@ -8,7 +8,10 @@ require "mechanize"
8
8
  require "rss"
9
9
  require "nokogiri"
10
10
 
11
- #if you want to debug it you just have to uncomment the puts
11
+ # if you want to debug it you just have to uncomment the puts and build the gem with
12
+ # ruby -S gem build logstash-input-multirss.gemspec
13
+ # and install the gem in a logstash service with
14
+ # logstash-plugin install logstash-input-multirss-x.x.x.gem
12
15
 
13
16
  class LogStash::Inputs::Multirss < LogStash::Inputs::Base
14
17
  config_name "multirss"
@@ -41,7 +44,8 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
41
44
  while !stop?
42
45
 
43
46
  @multi_feed.each do |rss|
44
- puts "Read parent: " + rss
47
+ str = "Read parent: " + rss
48
+ #puts str
45
49
  begin
46
50
  page = @agent.get(rss)
47
51
  page.links.each do |link|
@@ -49,22 +53,25 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
49
53
  urls << link.href
50
54
  end
51
55
  end
52
- rescue
53
- puts "Fail to get " + rss + " childrens links"
54
- end
56
+ rescue
57
+ str = "Fail to get " + rss + " childrens links"
58
+ #puts str
59
+ end # end begin
55
60
 
56
61
  links = urls.uniq
57
62
  links.each do |link|
58
63
  begin
59
64
  response_link(link,queue)
60
- puts "Read clidren: " + link
65
+ str = "Read clidren: " + link
66
+ #puts str
61
67
  rescue
62
- puts "Fail to get " + link + " children"
68
+ str = "Fail to get " + link + " children"
69
+ #puts str
63
70
  next
64
- end
65
- end
66
- urls.clear
71
+ end # end begin
72
+ end # end each links
67
73
 
74
+ urls.clear
68
75
  end # multi_feed loop
69
76
 
70
77
  @one_feed.each do |feed|
@@ -75,9 +82,11 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
75
82
  all_links.each do |link|
76
83
  begin
77
84
  response_link(link,queue)
78
- puts "Read clidren: " + link
85
+ str = "Read clidren: " + link
86
+ #puts str
79
87
  rescue
80
- puts "Fail to get " + link
88
+ str = "Fail to get " + link
89
+ #puts str
81
90
  next
82
91
  end # begin
83
92
  end # all_links loop
@@ -85,9 +94,8 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
85
94
  urls.clear
86
95
 
87
96
  Stud.stoppable_sleep(@interval) { stop? }
88
-
89
- end # loop while
90
- end # def run
97
+ end # end while
98
+ end # end def run
91
99
 
92
100
 
93
101
  def stop
@@ -96,10 +104,10 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
96
104
  def response_link(link, queue)
97
105
  tried = 2
98
106
  begin
99
- page = Nokogiri::XML(open(link,&:read))
107
+ page = Nokogiri::XML(open(link,&:read)) # [&:read] -> no OpenURI outputs in /tmp
100
108
  page.search('item').each do |item|
101
109
  link_rss_response(queue, item)
102
- end
110
+ end # end each page
103
111
  rescue => ex
104
112
  if link.chars.first(1).join == "/" && link.chars.first(2).join != "//"
105
113
  link = "http:/" + link
@@ -107,19 +115,17 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
107
115
  elsif link.chars.first(1).join == "/" && link.chars.first(2).join == "//"
108
116
  link = "http:" + link
109
117
  retry
110
- end
111
-
118
+ end # end if elsif
112
119
  if link.chars.first(4).join == "http" && link.chars.first(5).join != "https"
113
120
  link = link.sub('http','https')
114
121
  tried = tried - 1
115
122
  retry if (tried > 0)
116
- end
117
-
118
- @logger.error("Error with #{link}: ", :exception => ex)
123
+ end # end if
124
+ #@logger.error("Error : ", :exception => ex)
119
125
  rescue => exc
120
- @logger.error("Uknown error while parsing the #{link} feed", :exception => exc)
121
- end # begin
122
- end # def response_link
126
+ @logger.error("Uknown error while parsing the feed", :exception => exc)
127
+ end # end begin
128
+ end # end def response_link
123
129
 
124
130
  def link_rss_response(queue, item)
125
131
  event = LogStash::Event.new()
@@ -130,8 +136,8 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
130
136
  else
131
137
  eve = LogStash::Event.new( x.name => x.inner_html.to_s )
132
138
  event.append( eve )
133
- end # if
134
- end # loop
139
+ end # end if
140
+ end # end loop
135
141
  decorate(event)
136
142
  queue << event
137
143
  end # def link_rss_response
@@ -140,8 +146,8 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
140
146
  for i in 0..@blacklist.length-1
141
147
  if link.href.include?(@blacklist[i])
142
148
  return false
143
- end
144
- end
149
+ end # end if
150
+ end # end for
145
151
  return true
146
152
  end # def not_include_blacklist
147
153
 
@@ -1,12 +1,9 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-input-multirss'
3
- s.version = '1.0.5'
3
+ s.version = '1.1.0'
4
4
  s.licenses = ['Apache-2.0']
5
5
  s.summary = 'Simple multi rss plugin'
6
- s.description = 'This plugin get the feed content and works with :
7
- 1) multi_feed => [array] URI parent with more rss links inside , something like this: http://rss.elmundo.es/rss/
8
- 2) one_feed => [array] (optionally) childs URIS with XML content inside , something like this: http://estaticos.elmundo.es/elmundo/rss/portada.xml
9
- 3) blacklist => [array] (optionally) strings , links, text ... what you dont want explored'
6
+ s.description = 'This plugin needs a list of links of different rss. Get all the links of the main feed pages and get all the content of each of the links.'
10
7
  s.homepage = 'https://github.com/felixramirezgarcia/logstash-input-multirss'
11
8
  s.authors = ['Felix R G']
12
9
  s.email = 'felixramirezgarcia@correo.ugr.es'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-multirss
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.5
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Felix R G
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-08-24 00:00:00.000000000 Z
11
+ date: 2018-08-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -94,13 +94,8 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
- description: "This plugin get the feed content and works with : \n \
98
- \ 1) multi_feed => [array] URI parent with more rss links inside , something\
99
- \ like this: http://rss.elmundo.es/rss/ \n 2) one_feed =>\
100
- \ [array] (optionally) childs URIS with XML content inside , something like this:\
101
- \ http://estaticos.elmundo.es/elmundo/rss/portada.xml \n 3)\
102
- \ blacklist => [array] (optionally) strings , links, text ... what you dont want\
103
- \ explored"
97
+ description: This plugin needs a list of links of different rss. Get all the links
98
+ of the main feed pages and get all the content of each of the links.
104
99
  email: felixramirezgarcia@correo.ugr.es
105
100
  executables: []
106
101
  extensions: []