logstash-input-multirss 1.0.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/logstash/inputs/multirss.rb +48 -29
- data/logstash-input-multirss.gemspec +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d3e3215896069197c28eeaf2bc1d97b8a7b922b5f629d33f15c82b6cbe84c20a
|
4
|
+
data.tar.gz: d1391c39b3f2bf48888f93ca26633c821da10f439772408ac61de4ba9a561681
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: acbfa422e853ab42d439c960064f45df5277e372da471b20aa60f6a78d508fd872d9674fb009413fd0aee88b774d88eebe4e68db355c1ae6b7fee21f1d17d0c8
|
7
|
+
data.tar.gz: 0f1cf2179aa9722c65d610b6ea7c266866ad1719984cb89b280607c5fe3f6003688b55fcbbab33cb96255f517200ce5f637d8377c47600de3286b010a9597b7d
|
@@ -20,21 +20,22 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
|
20
20
|
config :one_feed, :validate => :array, :default => []
|
21
21
|
|
22
22
|
#Set de interval for stoppable_sleep
|
23
|
-
config :interval, :validate => :number, :default =>
|
23
|
+
config :interval, :validate => :number, :default => 3600
|
24
24
|
|
25
25
|
#Set de black list to forget
|
26
|
-
config :blacklist, :validate => :array, :default => [
|
26
|
+
config :blacklist, :validate => :array, :default => []
|
27
27
|
|
28
28
|
public
|
29
29
|
def register
|
30
|
-
@urls = []
|
31
30
|
@agent = Mechanize.new
|
32
31
|
@agent.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
33
32
|
end # def register
|
34
33
|
|
35
34
|
|
36
|
-
|
35
|
+
def run(queue)
|
37
36
|
# we can abort the loop if stop? becomes true
|
37
|
+
urls = []
|
38
|
+
|
38
39
|
while !stop?
|
39
40
|
|
40
41
|
@multi_feed.each do |rss|
|
@@ -42,64 +43,82 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
|
42
43
|
begin
|
43
44
|
page = @agent.get(rss)
|
44
45
|
page.links.each do |link|
|
45
|
-
if link.href.chars.last(3).join == "xml" && not_include_blacklist(link)
|
46
|
-
|
46
|
+
if (link.href.chars.last(3).join == "xml" || link.href.include?('/rss') || link.href.include?('/feed')) && not_include_blacklist(link)
|
47
|
+
urls << link.href
|
47
48
|
end
|
48
49
|
end
|
49
50
|
rescue
|
50
51
|
puts "Fail to get " + rss + " feed"
|
51
52
|
end
|
52
53
|
|
53
|
-
links =
|
54
|
+
links = urls.uniq
|
54
55
|
links.each do |link|
|
55
56
|
begin
|
56
57
|
response_link(link,queue)
|
57
58
|
puts "Read clidren: " + link
|
58
59
|
rescue
|
59
|
-
puts "Fail to get " + link
|
60
|
+
puts "Fail to get " + link + " children"
|
60
61
|
next
|
61
62
|
end
|
62
63
|
end
|
63
|
-
|
64
|
+
urls.clear
|
64
65
|
|
65
|
-
end
|
66
|
+
end # multi_feed loop
|
66
67
|
|
67
68
|
@one_feed.each do |feed|
|
68
|
-
|
69
|
-
end
|
70
|
-
|
71
|
-
|
69
|
+
urls << feed
|
70
|
+
end # one_feed loop
|
71
|
+
|
72
|
+
all_links = urls.uniq
|
73
|
+
all_links.each do |link|
|
72
74
|
begin
|
73
75
|
response_link(link,queue)
|
74
76
|
puts "Read clidren: " + link
|
75
77
|
rescue
|
76
78
|
puts "Fail to get " + link
|
77
79
|
next
|
78
|
-
end
|
79
|
-
end
|
80
|
+
end # begin
|
81
|
+
end # all_links loop
|
80
82
|
|
81
|
-
|
83
|
+
urls.clear
|
82
84
|
|
83
85
|
Stud.stoppable_sleep(@interval) { stop? }
|
84
|
-
end # loop
|
85
|
-
end # def run
|
86
86
|
|
87
|
+
end # loop while
|
88
|
+
end # def run
|
87
89
|
|
88
|
-
|
89
|
-
|
90
|
-
|
90
|
+
|
91
|
+
def stop
|
92
|
+
end #def stop
|
91
93
|
|
92
94
|
def response_link(link, queue)
|
95
|
+
tried = 2
|
93
96
|
begin
|
94
|
-
page = Nokogiri::XML(open(link))
|
97
|
+
page = Nokogiri::XML(open(link,&:read))
|
95
98
|
page.search('item').each do |item|
|
96
99
|
link_rss_response(queue, item)
|
97
100
|
end
|
101
|
+
rescue => ex
|
102
|
+
if link.chars.first(1).join == "/" && link.chars.first(2).join != "//"
|
103
|
+
link = "http:/" + link
|
104
|
+
retry
|
105
|
+
elsif link.chars.first(1).join == "/" && link.chars.first(2).join == "//"
|
106
|
+
link = "http:" + link
|
107
|
+
retry
|
108
|
+
end
|
109
|
+
|
110
|
+
if link.chars.first(4).join == "http" && link.chars.first(5).join != "https"
|
111
|
+
link = link.sub('http','https')
|
112
|
+
tried = tried - 1
|
113
|
+
retry if (tried > 0)
|
114
|
+
end
|
115
|
+
|
116
|
+
@logger.error("Error : ", :exception => ex)
|
98
117
|
rescue => exc
|
99
118
|
puts "ERROR"
|
100
119
|
@logger.error("Uknown error while parsing the feed", :exception => exc)
|
101
|
-
end
|
102
|
-
end
|
120
|
+
end # begin
|
121
|
+
end # def response_link
|
103
122
|
|
104
123
|
def link_rss_response(queue, item)
|
105
124
|
event = LogStash::Event.new()
|
@@ -110,11 +129,11 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
|
110
129
|
else
|
111
130
|
eve = LogStash::Event.new( x.name => x.inner_html.to_s )
|
112
131
|
event.append( eve )
|
113
|
-
end
|
114
|
-
end
|
132
|
+
end # if
|
133
|
+
end # loop
|
115
134
|
decorate(event)
|
116
135
|
queue << event
|
117
|
-
end
|
136
|
+
end # def link_rss_response
|
118
137
|
|
119
138
|
def not_include_blacklist(link)
|
120
139
|
for i in 0..@blacklist.length-1
|
@@ -123,7 +142,7 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
|
123
142
|
end
|
124
143
|
end
|
125
144
|
return true
|
126
|
-
end
|
145
|
+
end # def not_include_blacklist
|
127
146
|
|
128
147
|
|
129
148
|
end # class LogStash::Inputs::Crawler
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'logstash-input-multirss'
|
3
|
-
s.version = '1.0.
|
3
|
+
s.version = '1.0.1'
|
4
4
|
s.licenses = ['Apache-2.0']
|
5
5
|
s.summary = 'Simple multi rss plugin'
|
6
6
|
s.description = 'This plugin needs a list of links of different rss. Get all the links of the main feed pages and get all the content of each of the links.'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-input-multirss
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Felix R G
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-08-
|
11
|
+
date: 2018-08-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|