logstash-input-multirss 0.1.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/lib/logstash/inputs/multirss.rb +54 -62
- data/logstash-input-multirss.gemspec +1 -1
- metadata +19 -21
- data/lib/logstash/inputs/multirss.rb.bk +0 -51
- data/lib/logstash/inputs/multirss.rb.bk2 +0 -117
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: cad07458c379cfcd9f3a7cbb9608f693a33efe5dfee0a2087387df491c135218
|
4
|
+
data.tar.gz: a5956c590a9e0d9667b278de7f690d3a55ce9eaf886919f2b355c9e12fff8675
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6611bda686c887dbaebba928718cd6521e4c2076e0c44feabdd1e13e89aef3aeb6a80cf8861dfba40b8b9a442f381262bc72023e6d3b3a14c86b94aadf3611ea
|
7
|
+
data.tar.gz: d73bfa18ec57247ff7b566f94e2d6e6a9e710bd09b5b5563034bb9cc74c46ddc2bc5c9f8e13f268ceb259913195922388f3efe2f0c952689e88cee932cb6a453
|
@@ -6,6 +6,7 @@ require "net/http"
|
|
6
6
|
require "uri"
|
7
7
|
require "mechanize"
|
8
8
|
require "rss"
|
9
|
+
require "nokogiri"
|
9
10
|
|
10
11
|
class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
11
12
|
config_name "multirss"
|
@@ -13,7 +14,10 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
|
13
14
|
default :codec, "plain"
|
14
15
|
|
15
16
|
# The rss array list to use in the pipe
|
16
|
-
config :
|
17
|
+
config :multi_feed, :validate => :array, :required => true
|
18
|
+
|
19
|
+
# The rss array list to use in the pipe
|
20
|
+
config :one_feed, :validate => :array, :default => []
|
17
21
|
|
18
22
|
#Set de interval for stoppable_sleep
|
19
23
|
config :interval, :validate => :number, :default => 200
|
@@ -24,9 +28,8 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
|
24
28
|
public
|
25
29
|
def register
|
26
30
|
@urls = []
|
27
|
-
@list_rss = @rss_list
|
28
31
|
@agent = Mechanize.new
|
29
|
-
|
32
|
+
@agent.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
30
33
|
end # def register
|
31
34
|
|
32
35
|
|
@@ -34,104 +37,93 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
|
34
37
|
# we can abort the loop if stop? becomes true
|
35
38
|
while !stop?
|
36
39
|
|
37
|
-
@
|
38
|
-
|
39
|
-
puts "Read parent: " + @actual_rss
|
40
|
+
@multi_feed.each do |rss|
|
41
|
+
puts "Read parent: " + rss
|
40
42
|
begin
|
41
|
-
page = @agent.get(
|
43
|
+
page = @agent.get(rss)
|
42
44
|
page.links.each do |link|
|
43
45
|
if link.href.chars.last(3).join == "xml" && not_include_blacklist(link)
|
44
46
|
@urls << link.href
|
45
47
|
end
|
46
48
|
end
|
47
49
|
rescue
|
48
|
-
puts "Fail to get " +
|
50
|
+
puts "Fail to get " + rss + " feed"
|
49
51
|
end
|
50
52
|
|
51
53
|
links = @urls.uniq
|
52
|
-
|
53
54
|
links.each do |link|
|
54
55
|
begin
|
55
|
-
|
56
|
-
response = @agente.get(link)
|
57
|
-
@agente.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
58
|
-
handle_response(response, queue)
|
56
|
+
response_link(link,queue)
|
59
57
|
puts "Read clidren: " + link
|
60
58
|
rescue
|
61
59
|
puts "Fail to get " + link
|
60
|
+
next
|
62
61
|
end
|
63
62
|
end
|
64
|
-
|
65
63
|
@urls.clear
|
66
64
|
|
67
65
|
end
|
68
66
|
|
67
|
+
@one_feed.each do |feed|
|
68
|
+
@urls << feed
|
69
|
+
end
|
70
|
+
links_o = @urls.uniq
|
71
|
+
links_o.each do |link|
|
72
|
+
begin
|
73
|
+
response_link(link,queue)
|
74
|
+
puts "Read clidren: " + link
|
75
|
+
rescue
|
76
|
+
puts "Fail to get " + link
|
77
|
+
next
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
@urls.clear
|
82
|
+
|
69
83
|
Stud.stoppable_sleep(@interval) { stop? }
|
70
84
|
end # loop
|
71
85
|
end # def run
|
72
86
|
|
73
87
|
|
74
88
|
def stop
|
75
|
-
|
76
|
-
# examples of common "stop" tasks:
|
77
|
-
# * close sockets (unblocking blocking reads/accets)
|
78
|
-
# * cleanup temporary files
|
79
|
-
# * terminate spawned threads
|
89
|
+
|
80
90
|
end
|
81
91
|
|
82
|
-
|
83
|
-
for i in 0..@blacklist.length-1
|
84
|
-
if link.href.include?(@blacklist[i])
|
85
|
-
return false
|
86
|
-
end
|
87
|
-
end
|
88
|
-
return true
|
89
|
-
end
|
90
|
-
|
91
|
-
|
92
|
-
def handle_response(response, queue)
|
93
|
-
body = response.body
|
92
|
+
def response_link(link, queue)
|
94
93
|
begin
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
puts "item have a enclosure field"
|
99
|
-
next
|
100
|
-
else
|
101
|
-
handle_rss_response(queue, item)
|
102
|
-
end
|
94
|
+
page = Nokogiri::XML(open(link))
|
95
|
+
page.search('item').each do |item|
|
96
|
+
link_rss_response(queue, item)
|
103
97
|
end
|
104
|
-
rescue RSS::MissingTagError => e
|
105
|
-
next
|
106
|
-
@logger.error("Invalid RSS feed", :exception => e)
|
107
|
-
rescue RSS::TooMuchTagError => ex
|
108
|
-
next
|
109
|
-
@logger.error("TooMuchTagError feed (have enclosure tag)", :exception => ex)
|
110
98
|
rescue => exc
|
111
|
-
|
99
|
+
puts "ERROR"
|
112
100
|
@logger.error("Uknown error while parsing the feed", :exception => exc)
|
113
101
|
end
|
114
102
|
end
|
115
103
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
event.
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
queue << event
|
104
|
+
def link_rss_response(queue, item)
|
105
|
+
event = LogStash::Event.new()
|
106
|
+
item.element_children.each do |x|
|
107
|
+
if x.inner_html.to_s.chars.first(9).join == "<![CDATA["
|
108
|
+
eve = LogStash::Event.new( x.name => x.inner_html.to_s[9..x.inner_html.to_s.length-4])
|
109
|
+
event.append( eve )
|
110
|
+
else
|
111
|
+
eve = LogStash::Event.new( x.name => x.inner_html.to_s )
|
112
|
+
event.append( eve )
|
126
113
|
end
|
114
|
+
end
|
115
|
+
decorate(event)
|
116
|
+
queue << event
|
127
117
|
end
|
128
118
|
|
129
|
-
def
|
130
|
-
|
119
|
+
def not_include_blacklist(link)
|
120
|
+
for i in 0..@blacklist.length-1
|
121
|
+
if link.href.include?(@blacklist[i])
|
122
|
+
return false
|
123
|
+
end
|
124
|
+
end
|
131
125
|
return true
|
132
|
-
|
133
|
-
|
134
|
-
end
|
135
|
-
end
|
126
|
+
end
|
127
|
+
|
136
128
|
|
137
129
|
end # class LogStash::Inputs::Crawler
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'logstash-input-multirss'
|
3
|
-
s.version = '0.
|
3
|
+
s.version = '1.0.0'
|
4
4
|
s.licenses = ['Apache-2.0']
|
5
5
|
s.summary = 'Simple multi rss plugin'
|
6
6
|
s.description = 'This plugin needs a list of links of different rss. Get all the links of the main feed pages and get all the content of each of the links.'
|
metadata
CHANGED
@@ -1,94 +1,94 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-input-multirss
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Felix R G
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-08-
|
11
|
+
date: 2018-08-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name: logstash-core
|
15
14
|
requirement: !ruby/object:Gem::Requirement
|
16
15
|
requirements:
|
17
16
|
- - ">="
|
18
17
|
- !ruby/object:Gem::Version
|
19
18
|
version: '0'
|
20
|
-
|
19
|
+
name: logstash-core
|
21
20
|
prerelease: false
|
21
|
+
type: :runtime
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name: logstash-codec-plain
|
29
28
|
requirement: !ruby/object:Gem::Requirement
|
30
29
|
requirements:
|
31
30
|
- - ">="
|
32
31
|
- !ruby/object:Gem::Version
|
33
32
|
version: '0'
|
34
|
-
|
33
|
+
name: logstash-codec-plain
|
35
34
|
prerelease: false
|
35
|
+
type: :runtime
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name: stud
|
43
42
|
requirement: !ruby/object:Gem::Requirement
|
44
43
|
requirements:
|
45
44
|
- - ">="
|
46
45
|
- !ruby/object:Gem::Version
|
47
46
|
version: 0.0.22
|
48
|
-
|
47
|
+
name: stud
|
49
48
|
prerelease: false
|
49
|
+
type: :runtime
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: 0.0.22
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name: logstash-devutils
|
57
56
|
requirement: !ruby/object:Gem::Requirement
|
58
57
|
requirements:
|
59
58
|
- - ">="
|
60
59
|
- !ruby/object:Gem::Version
|
61
60
|
version: 0.0.16
|
62
|
-
|
61
|
+
name: logstash-devutils
|
63
62
|
prerelease: false
|
63
|
+
type: :development
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: 0.0.16
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
|
-
name: mechanize
|
71
70
|
requirement: !ruby/object:Gem::Requirement
|
72
71
|
requirements:
|
73
72
|
- - ">="
|
74
73
|
- !ruby/object:Gem::Version
|
75
74
|
version: '0'
|
76
|
-
|
75
|
+
name: mechanize
|
77
76
|
prerelease: false
|
77
|
+
type: :runtime
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
|
-
name: nokogiri
|
85
84
|
requirement: !ruby/object:Gem::Requirement
|
86
85
|
requirements:
|
87
86
|
- - ">="
|
88
87
|
- !ruby/object:Gem::Version
|
89
88
|
version: '0'
|
90
|
-
|
89
|
+
name: nokogiri
|
91
90
|
prerelease: false
|
91
|
+
type: :runtime
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
94
|
- - ">="
|
@@ -108,8 +108,6 @@ files:
|
|
108
108
|
- LICENSE
|
109
109
|
- README.md
|
110
110
|
- lib/logstash/inputs/multirss.rb
|
111
|
-
- lib/logstash/inputs/multirss.rb.bk
|
112
|
-
- lib/logstash/inputs/multirss.rb.bk2
|
113
111
|
- logstash-input-multirss.gemspec
|
114
112
|
- spec/inputs/multirss_spec.rb
|
115
113
|
homepage: https://github.com/felixramirezgarcia/logstash-input-multirss
|
@@ -118,7 +116,7 @@ licenses:
|
|
118
116
|
metadata:
|
119
117
|
logstash_plugin: 'true'
|
120
118
|
logstash_group: input
|
121
|
-
post_install_message:
|
119
|
+
post_install_message:
|
122
120
|
rdoc_options: []
|
123
121
|
require_paths:
|
124
122
|
- lib
|
@@ -133,9 +131,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
133
131
|
- !ruby/object:Gem::Version
|
134
132
|
version: '0'
|
135
133
|
requirements: []
|
136
|
-
rubyforge_project:
|
137
|
-
rubygems_version: 2.
|
138
|
-
signing_key:
|
134
|
+
rubyforge_project:
|
135
|
+
rubygems_version: 2.6.13
|
136
|
+
signing_key:
|
139
137
|
specification_version: 4
|
140
138
|
summary: Simple multi rss plugin
|
141
139
|
test_files:
|
@@ -1,51 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
require "logstash/inputs/base"
|
3
|
-
require "logstash/namespace"
|
4
|
-
require "stud/interval"
|
5
|
-
require "socket" # for Socket.gethostname
|
6
|
-
|
7
|
-
# Generate a repeating message.
|
8
|
-
#
|
9
|
-
# This plugin is intented only as an example.
|
10
|
-
|
11
|
-
class LogStash::Inputs::Multirss < LogStash::Inputs::Base
|
12
|
-
config_name "multirss"
|
13
|
-
|
14
|
-
# If undefined, Logstash will complain, even if codec is unused.
|
15
|
-
default :codec, "plain"
|
16
|
-
|
17
|
-
# The message string to use in the event.
|
18
|
-
config :message, :validate => :string, :default => "Hello World!"
|
19
|
-
|
20
|
-
# Set how frequently messages should be sent.
|
21
|
-
#
|
22
|
-
# The default, `1`, means send a message every second.
|
23
|
-
config :interval, :validate => :number, :default => 1
|
24
|
-
|
25
|
-
public
|
26
|
-
def register
|
27
|
-
@host = Socket.gethostname
|
28
|
-
end # def register
|
29
|
-
|
30
|
-
def run(queue)
|
31
|
-
# we can abort the loop if stop? becomes true
|
32
|
-
while !stop?
|
33
|
-
event = LogStash::Event.new("message" => @message, "host" => @host)
|
34
|
-
decorate(event)
|
35
|
-
queue << event
|
36
|
-
# because the sleep interval can be big, when shutdown happens
|
37
|
-
# we want to be able to abort the sleep
|
38
|
-
# Stud.stoppable_sleep will frequently evaluate the given block
|
39
|
-
# and abort the sleep(@interval) if the return value is true
|
40
|
-
Stud.stoppable_sleep(@interval) { stop? }
|
41
|
-
end # loop
|
42
|
-
end # def run
|
43
|
-
|
44
|
-
def stop
|
45
|
-
# nothing to do in this case so it is not necessary to define stop
|
46
|
-
# examples of common "stop" tasks:
|
47
|
-
# * close sockets (unblocking blocking reads/accepts)
|
48
|
-
# * cleanup temporary files
|
49
|
-
# * terminate spawned threads
|
50
|
-
end
|
51
|
-
end # class LogStash::Inputs::Multirss
|
@@ -1,117 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
require "logstash/inputs/base"
|
3
|
-
require "logstash/namespace"
|
4
|
-
require "stud/interval"
|
5
|
-
require "net/http"
|
6
|
-
require "uri"
|
7
|
-
require "mechanize"
|
8
|
-
require "faraday"
|
9
|
-
require "rss"
|
10
|
-
|
11
|
-
class LogStash::Inputs::Crawler < LogStash::Inputs::Base
|
12
|
-
config_name "multirss"
|
13
|
-
|
14
|
-
# If undefined, Logstash will complain, even if codec is unused.
|
15
|
-
default :codec, "plain"
|
16
|
-
|
17
|
-
# The message string to use in the event.
|
18
|
-
config :urls, :validate => :array, :required => true
|
19
|
-
|
20
|
-
#Set de interval for stoppable_sleep
|
21
|
-
config :interval, :validate => :number, :default => 86400
|
22
|
-
|
23
|
-
#Domains to exclude
|
24
|
-
config :blacklist, :validate => :array , :default => ['http://fusion.google.com/','yahoo.com','live.com','netvibes.com']
|
25
|
-
|
26
|
-
public
|
27
|
-
def register
|
28
|
-
@urls = []
|
29
|
-
puts "**********************************************************"
|
30
|
-
puts "STARTING MULTI-RSS"
|
31
|
-
puts "*******************************************************"
|
32
|
-
@agent = Mechanize.new
|
33
|
-
@agent.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
34
|
-
end # def register
|
35
|
-
|
36
|
-
|
37
|
-
def run(queue)
|
38
|
-
# we can abort the loop if stop? becomes true
|
39
|
-
while !stop?
|
40
|
-
puts "********************ENTRA1"
|
41
|
-
@urls.each do |url|
|
42
|
-
page = @agent.get(@url)
|
43
|
-
page.links.each do |link|
|
44
|
-
if link.href.chars.last(3).join == "xml" && valid_link?(link)
|
45
|
-
puts "*******************************************************"
|
46
|
-
puts link.href
|
47
|
-
@new_urls << link.href
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
links = @new_urls.uniq
|
52
|
-
|
53
|
-
links.each do |link|
|
54
|
-
puts "********************ENTRA2"
|
55
|
-
response = @agent.get(link)
|
56
|
-
handle_response(response, queue)
|
57
|
-
end
|
58
|
-
|
59
|
-
end #urls.each
|
60
|
-
Stud.stoppable_sleep(@interval) { stop? }
|
61
|
-
end # loop
|
62
|
-
end # def run
|
63
|
-
|
64
|
-
|
65
|
-
def stop
|
66
|
-
# nothing to do in this case so it is not necessary to define stop
|
67
|
-
# examples of common "stop" tasks:
|
68
|
-
# * close sockets (unblocking blocking reads/accets)
|
69
|
-
# * cleanup temporary files
|
70
|
-
# * terminate spawned threads
|
71
|
-
end
|
72
|
-
|
73
|
-
def valid_link?(link)
|
74
|
-
puts "********************ENTRA3"
|
75
|
-
@blacklist.each do |black_link|
|
76
|
-
if link.href.include?(black_link)
|
77
|
-
return false
|
78
|
-
end
|
79
|
-
end
|
80
|
-
return true
|
81
|
-
end
|
82
|
-
|
83
|
-
|
84
|
-
def handle_response(response, queue)
|
85
|
-
puts "********************ENTRA4"
|
86
|
-
body = response.body
|
87
|
-
begin
|
88
|
-
feed = RSS::Parser.parse(body)
|
89
|
-
feed.items.each do |item|
|
90
|
-
# Put each item into an event
|
91
|
-
@logger.debug("Item", :item => item.author)
|
92
|
-
handle_rss_response(queue, item)
|
93
|
-
end
|
94
|
-
rescue RSS::MissingTagError => e
|
95
|
-
@logger.error("Invalid RSS feed", :exception => e)
|
96
|
-
rescue => e
|
97
|
-
@logger.error("Uknown error while parsing the feed", :exception => e)
|
98
|
-
end
|
99
|
-
end
|
100
|
-
|
101
|
-
|
102
|
-
def handle_rss_response(queue, item)
|
103
|
-
puts "********************ENTRA5"
|
104
|
-
@codec.decode(item.description) do |event|
|
105
|
-
event.set("Feed", @url)
|
106
|
-
event.set("published", item.pubDate)
|
107
|
-
event.set("title", item.title)
|
108
|
-
event.set("link", item.link)
|
109
|
-
event.set("author", item.author)
|
110
|
-
decorate(event)
|
111
|
-
queue << event
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
end # class LogStash::Inputs::Crawler
|