logstash-input-multirss 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/logstash/inputs/multirss.rb +48 -29
- data/logstash-input-multirss.gemspec +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d3e3215896069197c28eeaf2bc1d97b8a7b922b5f629d33f15c82b6cbe84c20a
|
4
|
+
data.tar.gz: d1391c39b3f2bf48888f93ca26633c821da10f439772408ac61de4ba9a561681
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: acbfa422e853ab42d439c960064f45df5277e372da471b20aa60f6a78d508fd872d9674fb009413fd0aee88b774d88eebe4e68db355c1ae6b7fee21f1d17d0c8
|
7
|
+
data.tar.gz: 0f1cf2179aa9722c65d610b6ea7c266866ad1719984cb89b280607c5fe3f6003688b55fcbbab33cb96255f517200ce5f637d8377c47600de3286b010a9597b7d
|
@@ -20,21 +20,22 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
|
20
20
|
config :one_feed, :validate => :array, :default => []
|
21
21
|
|
22
22
|
#Set de interval for stoppable_sleep
|
23
|
-
config :interval, :validate => :number, :default =>
|
23
|
+
config :interval, :validate => :number, :default => 3600
|
24
24
|
|
25
25
|
#Set de black list to forget
|
26
|
-
config :blacklist, :validate => :array, :default => [
|
26
|
+
config :blacklist, :validate => :array, :default => []
|
27
27
|
|
28
28
|
public
|
29
29
|
def register
|
30
|
-
@urls = []
|
31
30
|
@agent = Mechanize.new
|
32
31
|
@agent.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
33
32
|
end # def register
|
34
33
|
|
35
34
|
|
36
|
-
|
35
|
+
def run(queue)
|
37
36
|
# we can abort the loop if stop? becomes true
|
37
|
+
urls = []
|
38
|
+
|
38
39
|
while !stop?
|
39
40
|
|
40
41
|
@multi_feed.each do |rss|
|
@@ -42,64 +43,82 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
|
42
43
|
begin
|
43
44
|
page = @agent.get(rss)
|
44
45
|
page.links.each do |link|
|
45
|
-
if link.href.chars.last(3).join == "xml" && not_include_blacklist(link)
|
46
|
-
|
46
|
+
if (link.href.chars.last(3).join == "xml" || link.href.include?('/rss') || link.href.include?('/feed')) && not_include_blacklist(link)
|
47
|
+
urls << link.href
|
47
48
|
end
|
48
49
|
end
|
49
50
|
rescue
|
50
51
|
puts "Fail to get " + rss + " feed"
|
51
52
|
end
|
52
53
|
|
53
|
-
links =
|
54
|
+
links = urls.uniq
|
54
55
|
links.each do |link|
|
55
56
|
begin
|
56
57
|
response_link(link,queue)
|
57
58
|
puts "Read clidren: " + link
|
58
59
|
rescue
|
59
|
-
puts "Fail to get " + link
|
60
|
+
puts "Fail to get " + link + " children"
|
60
61
|
next
|
61
62
|
end
|
62
63
|
end
|
63
|
-
|
64
|
+
urls.clear
|
64
65
|
|
65
|
-
end
|
66
|
+
end # multi_feed loop
|
66
67
|
|
67
68
|
@one_feed.each do |feed|
|
68
|
-
|
69
|
-
end
|
70
|
-
|
71
|
-
|
69
|
+
urls << feed
|
70
|
+
end # one_feed loop
|
71
|
+
|
72
|
+
all_links = urls.uniq
|
73
|
+
all_links.each do |link|
|
72
74
|
begin
|
73
75
|
response_link(link,queue)
|
74
76
|
puts "Read clidren: " + link
|
75
77
|
rescue
|
76
78
|
puts "Fail to get " + link
|
77
79
|
next
|
78
|
-
end
|
79
|
-
end
|
80
|
+
end # begin
|
81
|
+
end # all_links loop
|
80
82
|
|
81
|
-
|
83
|
+
urls.clear
|
82
84
|
|
83
85
|
Stud.stoppable_sleep(@interval) { stop? }
|
84
|
-
end # loop
|
85
|
-
end # def run
|
86
86
|
|
87
|
+
end # loop while
|
88
|
+
end # def run
|
87
89
|
|
88
|
-
|
89
|
-
|
90
|
-
|
90
|
+
|
91
|
+
def stop
|
92
|
+
end #def stop
|
91
93
|
|
92
94
|
def response_link(link, queue)
|
95
|
+
tried = 2
|
93
96
|
begin
|
94
|
-
page = Nokogiri::XML(open(link))
|
97
|
+
page = Nokogiri::XML(open(link,&:read))
|
95
98
|
page.search('item').each do |item|
|
96
99
|
link_rss_response(queue, item)
|
97
100
|
end
|
101
|
+
rescue => ex
|
102
|
+
if link.chars.first(1).join == "/" && link.chars.first(2).join != "//"
|
103
|
+
link = "http:/" + link
|
104
|
+
retry
|
105
|
+
elsif link.chars.first(1).join == "/" && link.chars.first(2).join == "//"
|
106
|
+
link = "http:" + link
|
107
|
+
retry
|
108
|
+
end
|
109
|
+
|
110
|
+
if link.chars.first(4).join == "http" && link.chars.first(5).join != "https"
|
111
|
+
link = link.sub('http','https')
|
112
|
+
tried = tried - 1
|
113
|
+
retry if (tried > 0)
|
114
|
+
end
|
115
|
+
|
116
|
+
@logger.error("Error : ", :exception => ex)
|
98
117
|
rescue => exc
|
99
118
|
puts "ERROR"
|
100
119
|
@logger.error("Uknown error while parsing the feed", :exception => exc)
|
101
|
-
end
|
102
|
-
end
|
120
|
+
end # begin
|
121
|
+
end # def response_link
|
103
122
|
|
104
123
|
def link_rss_response(queue, item)
|
105
124
|
event = LogStash::Event.new()
|
@@ -110,11 +129,11 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
|
110
129
|
else
|
111
130
|
eve = LogStash::Event.new( x.name => x.inner_html.to_s )
|
112
131
|
event.append( eve )
|
113
|
-
end
|
114
|
-
end
|
132
|
+
end # if
|
133
|
+
end # loop
|
115
134
|
decorate(event)
|
116
135
|
queue << event
|
117
|
-
end
|
136
|
+
end # def link_rss_response
|
118
137
|
|
119
138
|
def not_include_blacklist(link)
|
120
139
|
for i in 0..@blacklist.length-1
|
@@ -123,7 +142,7 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
|
123
142
|
end
|
124
143
|
end
|
125
144
|
return true
|
126
|
-
end
|
145
|
+
end # def not_include_blacklist
|
127
146
|
|
128
147
|
|
129
148
|
end # class LogStash::Inputs::Crawler
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'logstash-input-multirss'
|
3
|
-
s.version = '1.0.
|
3
|
+
s.version = '1.0.1'
|
4
4
|
s.licenses = ['Apache-2.0']
|
5
5
|
s.summary = 'Simple multi rss plugin'
|
6
6
|
s.description = 'This plugin needs a list of links of different rss. Get all the links of the main feed pages and get all the content of each of the links.'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-input-multirss
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Felix R G
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-08-
|
11
|
+
date: 2018-08-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|