logstash-input-multirss 0.1.1 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: efd60b980a94b246bcc89d1bda92d59e25d59734
4
- data.tar.gz: 8f92650a28b9d7af40036a3a0df5d9fd3f5f339e
2
+ SHA256:
3
+ metadata.gz: cad07458c379cfcd9f3a7cbb9608f693a33efe5dfee0a2087387df491c135218
4
+ data.tar.gz: a5956c590a9e0d9667b278de7f690d3a55ce9eaf886919f2b355c9e12fff8675
5
5
  SHA512:
6
- metadata.gz: 7f4d76bf1c671c441a01b0c7b1b6bb64a21ead59bea3d3625a594b02c595688efdf14577350ca5a209fe089a9e4bd01d686e07c99e24b32a06e4173fdc99d033
7
- data.tar.gz: e6df04af7ec7b0882ac5882f7692339fbca247b79cc562120d2a672a03331e52a5fa528922d91c267b3eb1cc98d1ac97ac3dced91035110af8052de281461beb
6
+ metadata.gz: 6611bda686c887dbaebba928718cd6521e4c2076e0c44feabdd1e13e89aef3aeb6a80cf8861dfba40b8b9a442f381262bc72023e6d3b3a14c86b94aadf3611ea
7
+ data.tar.gz: d73bfa18ec57247ff7b566f94e2d6e6a9e710bd09b5b5563034bb9cc74c46ddc2bc5c9f8e13f268ceb259913195922388f3efe2f0c952689e88cee932cb6a453
@@ -6,6 +6,7 @@ require "net/http"
6
6
  require "uri"
7
7
  require "mechanize"
8
8
  require "rss"
9
+ require "nokogiri"
9
10
 
10
11
  class LogStash::Inputs::Multirss < LogStash::Inputs::Base
11
12
  config_name "multirss"
@@ -13,7 +14,10 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
13
14
  default :codec, "plain"
14
15
 
15
16
  # The rss array list to use in the pipe
16
- config :rss_list, :validate => :array, :required => true
17
+ config :multi_feed, :validate => :array, :required => true
18
+
19
+ # The rss array list to use in the pipe
20
+ config :one_feed, :validate => :array, :default => []
17
21
 
18
22
  #Set de interval for stoppable_sleep
19
23
  config :interval, :validate => :number, :default => 200
@@ -24,9 +28,8 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
24
28
  public
25
29
  def register
26
30
  @urls = []
27
- @list_rss = @rss_list
28
31
  @agent = Mechanize.new
29
- @agent.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE
32
+ @agent.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE
30
33
  end # def register
31
34
 
32
35
 
@@ -34,104 +37,93 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
34
37
  # we can abort the loop if stop? becomes true
35
38
  while !stop?
36
39
 
37
- @rss_list.each do |rss|
38
- @actual_rss = rss
39
- puts "Read parent: " + @actual_rss
40
+ @multi_feed.each do |rss|
41
+ puts "Read parent: " + rss
40
42
  begin
41
- page = @agent.get(@actual_rss)
43
+ page = @agent.get(rss)
42
44
  page.links.each do |link|
43
45
  if link.href.chars.last(3).join == "xml" && not_include_blacklist(link)
44
46
  @urls << link.href
45
47
  end
46
48
  end
47
49
  rescue
48
- puts "Fail to get " + @actual_rss + "feed"
50
+ puts "Fail to get " + rss + " feed"
49
51
  end
50
52
 
51
53
  links = @urls.uniq
52
-
53
54
  links.each do |link|
54
55
  begin
55
- @agente = Mechanize.new
56
- response = @agente.get(link)
57
- @agente.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE
58
- handle_response(response, queue)
56
+ response_link(link,queue)
59
57
  puts "Read clidren: " + link
60
58
  rescue
61
59
  puts "Fail to get " + link
60
+ next
62
61
  end
63
62
  end
64
-
65
63
  @urls.clear
66
64
 
67
65
  end
68
66
 
67
+ @one_feed.each do |feed|
68
+ @urls << feed
69
+ end
70
+ links_o = @urls.uniq
71
+ links_o.each do |link|
72
+ begin
73
+ response_link(link,queue)
74
+ puts "Read clidren: " + link
75
+ rescue
76
+ puts "Fail to get " + link
77
+ next
78
+ end
79
+ end
80
+
81
+ @urls.clear
82
+
69
83
  Stud.stoppable_sleep(@interval) { stop? }
70
84
  end # loop
71
85
  end # def run
72
86
 
73
87
 
74
88
  def stop
75
- # nothing to do in this case so it is not necessary to define stop
76
- # examples of common "stop" tasks:
77
- # * close sockets (unblocking blocking reads/accets)
78
- # * cleanup temporary files
79
- # * terminate spawned threads
89
+
80
90
  end
81
91
 
82
- def not_include_blacklist(link)
83
- for i in 0..@blacklist.length-1
84
- if link.href.include?(@blacklist[i])
85
- return false
86
- end
87
- end
88
- return true
89
- end
90
-
91
-
92
- def handle_response(response, queue)
93
- body = response.body
92
+ def response_link(link, queue)
94
93
  begin
95
- feed = RSS::Parser.parse(body)
96
- feed.items.each do |item|
97
- if has_enclosure?(item)
98
- puts "item have a enclosure field"
99
- next
100
- else
101
- handle_rss_response(queue, item)
102
- end
94
+ page = Nokogiri::XML(open(link))
95
+ page.search('item').each do |item|
96
+ link_rss_response(queue, item)
103
97
  end
104
- rescue RSS::MissingTagError => e
105
- next
106
- @logger.error("Invalid RSS feed", :exception => e)
107
- rescue RSS::TooMuchTagError => ex
108
- next
109
- @logger.error("TooMuchTagError feed (have enclosure tag)", :exception => ex)
110
98
  rescue => exc
111
- next
99
+ puts "ERROR"
112
100
  @logger.error("Uknown error while parsing the feed", :exception => exc)
113
101
  end
114
102
  end
115
103
 
116
-
117
- def handle_rss_response(queue, item)
118
- @codec.decode(item.description) do |event|
119
- event.set("Feed", @actual_rss)
120
- event.set("published", item.pubDate)
121
- event.set("title", item.title)
122
- event.set("link", item.link)
123
- event.set("author", item.author)
124
- decorate(event)
125
- queue << event
104
+ def link_rss_response(queue, item)
105
+ event = LogStash::Event.new()
106
+ item.element_children.each do |x|
107
+ if x.inner_html.to_s.chars.first(9).join == "<![CDATA["
108
+ eve = LogStash::Event.new( x.name => x.inner_html.to_s[9..x.inner_html.to_s.length-4])
109
+ event.append( eve )
110
+ else
111
+ eve = LogStash::Event.new( x.name => x.inner_html.to_s )
112
+ event.append( eve )
126
113
  end
114
+ end
115
+ decorate(event)
116
+ queue << event
127
117
  end
128
118
 
129
- def has_enclosure?(item)
130
- if item.enclosure
119
+ def not_include_blacklist(link)
120
+ for i in 0..@blacklist.length-1
121
+ if link.href.include?(@blacklist[i])
122
+ return false
123
+ end
124
+ end
131
125
  return true
132
- else
133
- return false
134
- end
135
- end
126
+ end
127
+
136
128
 
137
129
  end # class LogStash::Inputs::Crawler
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-input-multirss'
3
- s.version = '0.1.1'
3
+ s.version = '1.0.0'
4
4
  s.licenses = ['Apache-2.0']
5
5
  s.summary = 'Simple multi rss plugin'
6
6
  s.description = 'This plugin needs a list of links of different rss. Get all the links of the main feed pages and get all the content of each of the links.'
metadata CHANGED
@@ -1,94 +1,94 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-multirss
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Felix R G
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-08-07 00:00:00.000000000 Z
11
+ date: 2018-08-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: logstash-core
15
14
  requirement: !ruby/object:Gem::Requirement
16
15
  requirements:
17
16
  - - ">="
18
17
  - !ruby/object:Gem::Version
19
18
  version: '0'
20
- type: :runtime
19
+ name: logstash-core
21
20
  prerelease: false
21
+ type: :runtime
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
- name: logstash-codec-plain
29
28
  requirement: !ruby/object:Gem::Requirement
30
29
  requirements:
31
30
  - - ">="
32
31
  - !ruby/object:Gem::Version
33
32
  version: '0'
34
- type: :runtime
33
+ name: logstash-codec-plain
35
34
  prerelease: false
35
+ type: :runtime
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: stud
43
42
  requirement: !ruby/object:Gem::Requirement
44
43
  requirements:
45
44
  - - ">="
46
45
  - !ruby/object:Gem::Version
47
46
  version: 0.0.22
48
- type: :runtime
47
+ name: stud
49
48
  prerelease: false
49
+ type: :runtime
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: 0.0.22
55
55
  - !ruby/object:Gem::Dependency
56
- name: logstash-devutils
57
56
  requirement: !ruby/object:Gem::Requirement
58
57
  requirements:
59
58
  - - ">="
60
59
  - !ruby/object:Gem::Version
61
60
  version: 0.0.16
62
- type: :development
61
+ name: logstash-devutils
63
62
  prerelease: false
63
+ type: :development
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: 0.0.16
69
69
  - !ruby/object:Gem::Dependency
70
- name: mechanize
71
70
  requirement: !ruby/object:Gem::Requirement
72
71
  requirements:
73
72
  - - ">="
74
73
  - !ruby/object:Gem::Version
75
74
  version: '0'
76
- type: :runtime
75
+ name: mechanize
77
76
  prerelease: false
77
+ type: :runtime
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
83
  - !ruby/object:Gem::Dependency
84
- name: nokogiri
85
84
  requirement: !ruby/object:Gem::Requirement
86
85
  requirements:
87
86
  - - ">="
88
87
  - !ruby/object:Gem::Version
89
88
  version: '0'
90
- type: :runtime
89
+ name: nokogiri
91
90
  prerelease: false
91
+ type: :runtime
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
94
  - - ">="
@@ -108,8 +108,6 @@ files:
108
108
  - LICENSE
109
109
  - README.md
110
110
  - lib/logstash/inputs/multirss.rb
111
- - lib/logstash/inputs/multirss.rb.bk
112
- - lib/logstash/inputs/multirss.rb.bk2
113
111
  - logstash-input-multirss.gemspec
114
112
  - spec/inputs/multirss_spec.rb
115
113
  homepage: https://github.com/felixramirezgarcia/logstash-input-multirss
@@ -118,7 +116,7 @@ licenses:
118
116
  metadata:
119
117
  logstash_plugin: 'true'
120
118
  logstash_group: input
121
- post_install_message:
119
+ post_install_message:
122
120
  rdoc_options: []
123
121
  require_paths:
124
122
  - lib
@@ -133,9 +131,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
133
131
  - !ruby/object:Gem::Version
134
132
  version: '0'
135
133
  requirements: []
136
- rubyforge_project:
137
- rubygems_version: 2.5.2.1
138
- signing_key:
134
+ rubyforge_project:
135
+ rubygems_version: 2.6.13
136
+ signing_key:
139
137
  specification_version: 4
140
138
  summary: Simple multi rss plugin
141
139
  test_files:
@@ -1,51 +0,0 @@
1
- # encoding: utf-8
2
- require "logstash/inputs/base"
3
- require "logstash/namespace"
4
- require "stud/interval"
5
- require "socket" # for Socket.gethostname
6
-
7
- # Generate a repeating message.
8
- #
9
- # This plugin is intented only as an example.
10
-
11
- class LogStash::Inputs::Multirss < LogStash::Inputs::Base
12
- config_name "multirss"
13
-
14
- # If undefined, Logstash will complain, even if codec is unused.
15
- default :codec, "plain"
16
-
17
- # The message string to use in the event.
18
- config :message, :validate => :string, :default => "Hello World!"
19
-
20
- # Set how frequently messages should be sent.
21
- #
22
- # The default, `1`, means send a message every second.
23
- config :interval, :validate => :number, :default => 1
24
-
25
- public
26
- def register
27
- @host = Socket.gethostname
28
- end # def register
29
-
30
- def run(queue)
31
- # we can abort the loop if stop? becomes true
32
- while !stop?
33
- event = LogStash::Event.new("message" => @message, "host" => @host)
34
- decorate(event)
35
- queue << event
36
- # because the sleep interval can be big, when shutdown happens
37
- # we want to be able to abort the sleep
38
- # Stud.stoppable_sleep will frequently evaluate the given block
39
- # and abort the sleep(@interval) if the return value is true
40
- Stud.stoppable_sleep(@interval) { stop? }
41
- end # loop
42
- end # def run
43
-
44
- def stop
45
- # nothing to do in this case so it is not necessary to define stop
46
- # examples of common "stop" tasks:
47
- # * close sockets (unblocking blocking reads/accepts)
48
- # * cleanup temporary files
49
- # * terminate spawned threads
50
- end
51
- end # class LogStash::Inputs::Multirss
@@ -1,117 +0,0 @@
1
- # encoding: utf-8
2
- require "logstash/inputs/base"
3
- require "logstash/namespace"
4
- require "stud/interval"
5
- require "net/http"
6
- require "uri"
7
- require "mechanize"
8
- require "faraday"
9
- require "rss"
10
-
11
- class LogStash::Inputs::Crawler < LogStash::Inputs::Base
12
- config_name "multirss"
13
-
14
- # If undefined, Logstash will complain, even if codec is unused.
15
- default :codec, "plain"
16
-
17
- # The message string to use in the event.
18
- config :urls, :validate => :array, :required => true
19
-
20
- #Set de interval for stoppable_sleep
21
- config :interval, :validate => :number, :default => 86400
22
-
23
- #Domains to exclude
24
- config :blacklist, :validate => :array , :default => ['http://fusion.google.com/','yahoo.com','live.com','netvibes.com']
25
-
26
- public
27
- def register
28
- @urls = []
29
- puts "**********************************************************"
30
- puts "STARTING MULTI-RSS"
31
- puts "*******************************************************"
32
- @agent = Mechanize.new
33
- @agent.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE
34
- end # def register
35
-
36
-
37
- def run(queue)
38
- # we can abort the loop if stop? becomes true
39
- while !stop?
40
- puts "********************ENTRA1"
41
- @urls.each do |url|
42
- page = @agent.get(@url)
43
- page.links.each do |link|
44
- if link.href.chars.last(3).join == "xml" && valid_link?(link)
45
- puts "*******************************************************"
46
- puts link.href
47
- @new_urls << link.href
48
- end
49
- end
50
-
51
- links = @new_urls.uniq
52
-
53
- links.each do |link|
54
- puts "********************ENTRA2"
55
- response = @agent.get(link)
56
- handle_response(response, queue)
57
- end
58
-
59
- end #urls.each
60
- Stud.stoppable_sleep(@interval) { stop? }
61
- end # loop
62
- end # def run
63
-
64
-
65
- def stop
66
- # nothing to do in this case so it is not necessary to define stop
67
- # examples of common "stop" tasks:
68
- # * close sockets (unblocking blocking reads/accets)
69
- # * cleanup temporary files
70
- # * terminate spawned threads
71
- end
72
-
73
- def valid_link?(link)
74
- puts "********************ENTRA3"
75
- @blacklist.each do |black_link|
76
- if link.href.include?(black_link)
77
- return false
78
- end
79
- end
80
- return true
81
- end
82
-
83
-
84
- def handle_response(response, queue)
85
- puts "********************ENTRA4"
86
- body = response.body
87
- begin
88
- feed = RSS::Parser.parse(body)
89
- feed.items.each do |item|
90
- # Put each item into an event
91
- @logger.debug("Item", :item => item.author)
92
- handle_rss_response(queue, item)
93
- end
94
- rescue RSS::MissingTagError => e
95
- @logger.error("Invalid RSS feed", :exception => e)
96
- rescue => e
97
- @logger.error("Uknown error while parsing the feed", :exception => e)
98
- end
99
- end
100
-
101
-
102
- def handle_rss_response(queue, item)
103
- puts "********************ENTRA5"
104
- @codec.decode(item.description) do |event|
105
- event.set("Feed", @url)
106
- event.set("published", item.pubDate)
107
- event.set("title", item.title)
108
- event.set("link", item.link)
109
- event.set("author", item.author)
110
- decorate(event)
111
- queue << event
112
- end
113
- end
114
-
115
-
116
-
117
- end # class LogStash::Inputs::Crawler