logstash-input-multirss 1.0.5 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +0 -1
- data/lib/logstash/inputs/multirss.rb +35 -29
- data/logstash-input-multirss.gemspec +2 -5
- metadata +4 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 48be80f785a5000cf522c4717ba16872e8547082844d38d032698b2813ddf2af
|
4
|
+
data.tar.gz: 3e37d5fcddafc3cff0cb9dd6c4d488ae6a1fd40c9aad1219f8938771f7fd1ed2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b2795603d6db7056798272912fc7986e410aad8cabdfda6fdce74a5e7dc26ccbfe621a13817f191ff8a04bfa50ffe193b060a661e80ec9d6ba53cf70acc8f38c
|
7
|
+
data.tar.gz: 1b83d04df5d8f0a35003f4c1287f7f800e1a4a3792c87ac58b7a6ca151f60ef0a31c20d940a663468aae62327820a2540995756e63684d3ad302d7e4e9310ab7
|
data/README.md
CHANGED
@@ -23,7 +23,6 @@ You can see a example in https://github.com/felixramirezgarcia/logstash-input-mu
|
|
23
23
|
The difference between the attributes multi_feed and one_feed is that the multi_feed is the URI of the parent address where several rss (xml) are found. For the case where you want to explore only one of those links you can use the one_feed attribute. A visual example can be seen by visiting the following links:
|
24
24
|
|
25
25
|
Father (multi_feed) => http://rss.elmundo.es/rss/
|
26
|
-
|
27
26
|
Son (one_feed) => http://estaticos.elmundo.es/elmundo/rss/portada.xml
|
28
27
|
|
29
28
|
## Documentation
|
@@ -8,7 +8,10 @@ require "mechanize"
|
|
8
8
|
require "rss"
|
9
9
|
require "nokogiri"
|
10
10
|
|
11
|
-
#if you want to debug it you just have to uncomment the puts
|
11
|
+
# if you want to debug it you just have to uncomment the puts and build the gem with
|
12
|
+
# ruby -S gem build logstash-input-multirss.gemspec
|
13
|
+
# and install the gem in a logstash service with
|
14
|
+
# logstash-plugin install logstash-input-multirss-x.x.x.gem
|
12
15
|
|
13
16
|
class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
14
17
|
config_name "multirss"
|
@@ -41,7 +44,8 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
|
41
44
|
while !stop?
|
42
45
|
|
43
46
|
@multi_feed.each do |rss|
|
44
|
-
|
47
|
+
str = "Read parent: " + rss
|
48
|
+
#puts str
|
45
49
|
begin
|
46
50
|
page = @agent.get(rss)
|
47
51
|
page.links.each do |link|
|
@@ -49,22 +53,25 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
|
49
53
|
urls << link.href
|
50
54
|
end
|
51
55
|
end
|
52
|
-
rescue
|
53
|
-
|
54
|
-
|
56
|
+
rescue
|
57
|
+
str = "Fail to get " + rss + " childrens links"
|
58
|
+
#puts str
|
59
|
+
end # end begin
|
55
60
|
|
56
61
|
links = urls.uniq
|
57
62
|
links.each do |link|
|
58
63
|
begin
|
59
64
|
response_link(link,queue)
|
60
|
-
|
65
|
+
str = "Read clidren: " + link
|
66
|
+
#puts str
|
61
67
|
rescue
|
62
|
-
|
68
|
+
str = "Fail to get " + link + " children"
|
69
|
+
#puts str
|
63
70
|
next
|
64
|
-
end
|
65
|
-
end
|
66
|
-
urls.clear
|
71
|
+
end # end begin
|
72
|
+
end # end each links
|
67
73
|
|
74
|
+
urls.clear
|
68
75
|
end # multi_feed loop
|
69
76
|
|
70
77
|
@one_feed.each do |feed|
|
@@ -75,9 +82,11 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
|
75
82
|
all_links.each do |link|
|
76
83
|
begin
|
77
84
|
response_link(link,queue)
|
78
|
-
|
85
|
+
str = "Read clidren: " + link
|
86
|
+
#puts str
|
79
87
|
rescue
|
80
|
-
|
88
|
+
str = "Fail to get " + link
|
89
|
+
#puts str
|
81
90
|
next
|
82
91
|
end # begin
|
83
92
|
end # all_links loop
|
@@ -85,9 +94,8 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
|
85
94
|
urls.clear
|
86
95
|
|
87
96
|
Stud.stoppable_sleep(@interval) { stop? }
|
88
|
-
|
89
|
-
|
90
|
-
end # def run
|
97
|
+
end # end while
|
98
|
+
end # end def run
|
91
99
|
|
92
100
|
|
93
101
|
def stop
|
@@ -96,10 +104,10 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
|
96
104
|
def response_link(link, queue)
|
97
105
|
tried = 2
|
98
106
|
begin
|
99
|
-
page = Nokogiri::XML(open(link,&:read))
|
107
|
+
page = Nokogiri::XML(open(link,&:read)) # [&:read] -> no OpenURI outputs in /tmp
|
100
108
|
page.search('item').each do |item|
|
101
109
|
link_rss_response(queue, item)
|
102
|
-
end
|
110
|
+
end # end each page
|
103
111
|
rescue => ex
|
104
112
|
if link.chars.first(1).join == "/" && link.chars.first(2).join != "//"
|
105
113
|
link = "http:/" + link
|
@@ -107,19 +115,17 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
|
107
115
|
elsif link.chars.first(1).join == "/" && link.chars.first(2).join == "//"
|
108
116
|
link = "http:" + link
|
109
117
|
retry
|
110
|
-
end
|
111
|
-
|
118
|
+
end # end if elsif
|
112
119
|
if link.chars.first(4).join == "http" && link.chars.first(5).join != "https"
|
113
120
|
link = link.sub('http','https')
|
114
121
|
tried = tried - 1
|
115
122
|
retry if (tried > 0)
|
116
|
-
end
|
117
|
-
|
118
|
-
@logger.error("Error with #{link}: ", :exception => ex)
|
123
|
+
end # end if
|
124
|
+
#@logger.error("Error : ", :exception => ex)
|
119
125
|
rescue => exc
|
120
|
-
@logger.error("Uknown error while parsing the
|
121
|
-
end # begin
|
122
|
-
end # def response_link
|
126
|
+
@logger.error("Uknown error while parsing the feed", :exception => exc)
|
127
|
+
end # end begin
|
128
|
+
end # end def response_link
|
123
129
|
|
124
130
|
def link_rss_response(queue, item)
|
125
131
|
event = LogStash::Event.new()
|
@@ -130,8 +136,8 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
|
130
136
|
else
|
131
137
|
eve = LogStash::Event.new( x.name => x.inner_html.to_s )
|
132
138
|
event.append( eve )
|
133
|
-
end # if
|
134
|
-
end # loop
|
139
|
+
end # end if
|
140
|
+
end # end loop
|
135
141
|
decorate(event)
|
136
142
|
queue << event
|
137
143
|
end # def link_rss_response
|
@@ -140,8 +146,8 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
|
140
146
|
for i in 0..@blacklist.length-1
|
141
147
|
if link.href.include?(@blacklist[i])
|
142
148
|
return false
|
143
|
-
end
|
144
|
-
end
|
149
|
+
end # end if
|
150
|
+
end # end for
|
145
151
|
return true
|
146
152
|
end # def not_include_blacklist
|
147
153
|
|
@@ -1,12 +1,9 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'logstash-input-multirss'
|
3
|
-
s.version = '1.0
|
3
|
+
s.version = '1.1.0'
|
4
4
|
s.licenses = ['Apache-2.0']
|
5
5
|
s.summary = 'Simple multi rss plugin'
|
6
|
-
s.description = 'This plugin
|
7
|
-
1) multi_feed => [array] URI parent with more rss links inside , something like this: http://rss.elmundo.es/rss/
|
8
|
-
2) one_feed => [array] (optionally) childs URIS with XML content inside , something like this: http://estaticos.elmundo.es/elmundo/rss/portada.xml
|
9
|
-
3) blacklist => [array] (optionally) strings , links, text ... what you dont want explored'
|
6
|
+
s.description = 'This plugin needs a list of links of different rss. Get all the links of the main feed pages and get all the content of each of the links.'
|
10
7
|
s.homepage = 'https://github.com/felixramirezgarcia/logstash-input-multirss'
|
11
8
|
s.authors = ['Felix R G']
|
12
9
|
s.email = 'felixramirezgarcia@correo.ugr.es'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-input-multirss
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Felix R G
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-08-
|
11
|
+
date: 2018-08-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -94,13 +94,8 @@ dependencies:
|
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
-
description:
|
98
|
-
|
99
|
-
\ like this: http://rss.elmundo.es/rss/ \n 2) one_feed =>\
|
100
|
-
\ [array] (optionally) childs URIS with XML content inside , something like this:\
|
101
|
-
\ http://estaticos.elmundo.es/elmundo/rss/portada.xml \n 3)\
|
102
|
-
\ blacklist => [array] (optionally) strings , links, text ... what you dont want\
|
103
|
-
\ explored"
|
97
|
+
description: This plugin needs a list of links of different rss. Get all the links
|
98
|
+
of the main feed pages and get all the content of each of the links.
|
104
99
|
email: felixramirezgarcia@correo.ugr.es
|
105
100
|
executables: []
|
106
101
|
extensions: []
|