logstash-input-multirss 0.1.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: efd60b980a94b246bcc89d1bda92d59e25d59734
4
- data.tar.gz: 8f92650a28b9d7af40036a3a0df5d9fd3f5f339e
2
+ SHA256:
3
+ metadata.gz: cad07458c379cfcd9f3a7cbb9608f693a33efe5dfee0a2087387df491c135218
4
+ data.tar.gz: a5956c590a9e0d9667b278de7f690d3a55ce9eaf886919f2b355c9e12fff8675
5
5
  SHA512:
6
- metadata.gz: 7f4d76bf1c671c441a01b0c7b1b6bb64a21ead59bea3d3625a594b02c595688efdf14577350ca5a209fe089a9e4bd01d686e07c99e24b32a06e4173fdc99d033
7
- data.tar.gz: e6df04af7ec7b0882ac5882f7692339fbca247b79cc562120d2a672a03331e52a5fa528922d91c267b3eb1cc98d1ac97ac3dced91035110af8052de281461beb
6
+ metadata.gz: 6611bda686c887dbaebba928718cd6521e4c2076e0c44feabdd1e13e89aef3aeb6a80cf8861dfba40b8b9a442f381262bc72023e6d3b3a14c86b94aadf3611ea
7
+ data.tar.gz: d73bfa18ec57247ff7b566f94e2d6e6a9e710bd09b5b5563034bb9cc74c46ddc2bc5c9f8e13f268ceb259913195922388f3efe2f0c952689e88cee932cb6a453
@@ -6,6 +6,7 @@ require "net/http"
6
6
  require "uri"
7
7
  require "mechanize"
8
8
  require "rss"
9
+ require "nokogiri"
9
10
 
10
11
  class LogStash::Inputs::Multirss < LogStash::Inputs::Base
11
12
  config_name "multirss"
@@ -13,7 +14,10 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
13
14
  default :codec, "plain"
14
15
 
15
16
  # The rss array list to use in the pipe
16
- config :rss_list, :validate => :array, :required => true
17
+ config :multi_feed, :validate => :array, :required => true
18
+
19
+ # The rss array list to use in the pipe
20
+ config :one_feed, :validate => :array, :default => []
17
21
 
18
22
  #Set de interval for stoppable_sleep
19
23
  config :interval, :validate => :number, :default => 200
@@ -24,9 +28,8 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
24
28
  public
25
29
  def register
26
30
  @urls = []
27
- @list_rss = @rss_list
28
31
  @agent = Mechanize.new
29
- @agent.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE
32
+ @agent.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE
30
33
  end # def register
31
34
 
32
35
 
@@ -34,104 +37,93 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
34
37
  # we can abort the loop if stop? becomes true
35
38
  while !stop?
36
39
 
37
- @rss_list.each do |rss|
38
- @actual_rss = rss
39
- puts "Read parent: " + @actual_rss
40
+ @multi_feed.each do |rss|
41
+ puts "Read parent: " + rss
40
42
  begin
41
- page = @agent.get(@actual_rss)
43
+ page = @agent.get(rss)
42
44
  page.links.each do |link|
43
45
  if link.href.chars.last(3).join == "xml" && not_include_blacklist(link)
44
46
  @urls << link.href
45
47
  end
46
48
  end
47
49
  rescue
48
- puts "Fail to get " + @actual_rss + "feed"
50
+ puts "Fail to get " + rss + " feed"
49
51
  end
50
52
 
51
53
  links = @urls.uniq
52
-
53
54
  links.each do |link|
54
55
  begin
55
- @agente = Mechanize.new
56
- response = @agente.get(link)
57
- @agente.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE
58
- handle_response(response, queue)
56
+ response_link(link,queue)
59
57
  puts "Read clidren: " + link
60
58
  rescue
61
59
  puts "Fail to get " + link
60
+ next
62
61
  end
63
62
  end
64
-
65
63
  @urls.clear
66
64
 
67
65
  end
68
66
 
67
+ @one_feed.each do |feed|
68
+ @urls << feed
69
+ end
70
+ links_o = @urls.uniq
71
+ links_o.each do |link|
72
+ begin
73
+ response_link(link,queue)
74
+ puts "Read clidren: " + link
75
+ rescue
76
+ puts "Fail to get " + link
77
+ next
78
+ end
79
+ end
80
+
81
+ @urls.clear
82
+
69
83
  Stud.stoppable_sleep(@interval) { stop? }
70
84
  end # loop
71
85
  end # def run
72
86
 
73
87
 
74
88
  def stop
75
- # nothing to do in this case so it is not necessary to define stop
76
- # examples of common "stop" tasks:
77
- # * close sockets (unblocking blocking reads/accets)
78
- # * cleanup temporary files
79
- # * terminate spawned threads
89
+
80
90
  end
81
91
 
82
- def not_include_blacklist(link)
83
- for i in 0..@blacklist.length-1
84
- if link.href.include?(@blacklist[i])
85
- return false
86
- end
87
- end
88
- return true
89
- end
90
-
91
-
92
- def handle_response(response, queue)
93
- body = response.body
92
+ def response_link(link, queue)
94
93
  begin
95
- feed = RSS::Parser.parse(body)
96
- feed.items.each do |item|
97
- if has_enclosure?(item)
98
- puts "item have a enclosure field"
99
- next
100
- else
101
- handle_rss_response(queue, item)
102
- end
94
+ page = Nokogiri::XML(open(link))
95
+ page.search('item').each do |item|
96
+ link_rss_response(queue, item)
103
97
  end
104
- rescue RSS::MissingTagError => e
105
- next
106
- @logger.error("Invalid RSS feed", :exception => e)
107
- rescue RSS::TooMuchTagError => ex
108
- next
109
- @logger.error("TooMuchTagError feed (have enclosure tag)", :exception => ex)
110
98
  rescue => exc
111
- next
99
+ puts "ERROR"
112
100
  @logger.error("Uknown error while parsing the feed", :exception => exc)
113
101
  end
114
102
  end
115
103
 
116
-
117
- def handle_rss_response(queue, item)
118
- @codec.decode(item.description) do |event|
119
- event.set("Feed", @actual_rss)
120
- event.set("published", item.pubDate)
121
- event.set("title", item.title)
122
- event.set("link", item.link)
123
- event.set("author", item.author)
124
- decorate(event)
125
- queue << event
104
+ def link_rss_response(queue, item)
105
+ event = LogStash::Event.new()
106
+ item.element_children.each do |x|
107
+ if x.inner_html.to_s.chars.first(9).join == "<![CDATA["
108
+ eve = LogStash::Event.new( x.name => x.inner_html.to_s[9..x.inner_html.to_s.length-4])
109
+ event.append( eve )
110
+ else
111
+ eve = LogStash::Event.new( x.name => x.inner_html.to_s )
112
+ event.append( eve )
126
113
  end
114
+ end
115
+ decorate(event)
116
+ queue << event
127
117
  end
128
118
 
129
- def has_enclosure?(item)
130
- if item.enclosure
119
+ def not_include_blacklist(link)
120
+ for i in 0..@blacklist.length-1
121
+ if link.href.include?(@blacklist[i])
122
+ return false
123
+ end
124
+ end
131
125
  return true
132
- else
133
- return false
134
- end
135
- end
126
+ end
127
+
136
128
 
137
129
  end # class LogStash::Inputs::Crawler
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-input-multirss'
3
- s.version = '0.1.1'
3
+ s.version = '1.0.0'
4
4
  s.licenses = ['Apache-2.0']
5
5
  s.summary = 'Simple multi rss plugin'
6
6
  s.description = 'This plugin needs a list of links of different rss. Get all the links of the main feed pages and get all the content of each of the links.'
metadata CHANGED
@@ -1,94 +1,94 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-multirss
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Felix R G
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-08-07 00:00:00.000000000 Z
11
+ date: 2018-08-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: logstash-core
15
14
  requirement: !ruby/object:Gem::Requirement
16
15
  requirements:
17
16
  - - ">="
18
17
  - !ruby/object:Gem::Version
19
18
  version: '0'
20
- type: :runtime
19
+ name: logstash-core
21
20
  prerelease: false
21
+ type: :runtime
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
- name: logstash-codec-plain
29
28
  requirement: !ruby/object:Gem::Requirement
30
29
  requirements:
31
30
  - - ">="
32
31
  - !ruby/object:Gem::Version
33
32
  version: '0'
34
- type: :runtime
33
+ name: logstash-codec-plain
35
34
  prerelease: false
35
+ type: :runtime
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: stud
43
42
  requirement: !ruby/object:Gem::Requirement
44
43
  requirements:
45
44
  - - ">="
46
45
  - !ruby/object:Gem::Version
47
46
  version: 0.0.22
48
- type: :runtime
47
+ name: stud
49
48
  prerelease: false
49
+ type: :runtime
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: 0.0.22
55
55
  - !ruby/object:Gem::Dependency
56
- name: logstash-devutils
57
56
  requirement: !ruby/object:Gem::Requirement
58
57
  requirements:
59
58
  - - ">="
60
59
  - !ruby/object:Gem::Version
61
60
  version: 0.0.16
62
- type: :development
61
+ name: logstash-devutils
63
62
  prerelease: false
63
+ type: :development
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: 0.0.16
69
69
  - !ruby/object:Gem::Dependency
70
- name: mechanize
71
70
  requirement: !ruby/object:Gem::Requirement
72
71
  requirements:
73
72
  - - ">="
74
73
  - !ruby/object:Gem::Version
75
74
  version: '0'
76
- type: :runtime
75
+ name: mechanize
77
76
  prerelease: false
77
+ type: :runtime
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
83
  - !ruby/object:Gem::Dependency
84
- name: nokogiri
85
84
  requirement: !ruby/object:Gem::Requirement
86
85
  requirements:
87
86
  - - ">="
88
87
  - !ruby/object:Gem::Version
89
88
  version: '0'
90
- type: :runtime
89
+ name: nokogiri
91
90
  prerelease: false
91
+ type: :runtime
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
94
  - - ">="
@@ -108,8 +108,6 @@ files:
108
108
  - LICENSE
109
109
  - README.md
110
110
  - lib/logstash/inputs/multirss.rb
111
- - lib/logstash/inputs/multirss.rb.bk
112
- - lib/logstash/inputs/multirss.rb.bk2
113
111
  - logstash-input-multirss.gemspec
114
112
  - spec/inputs/multirss_spec.rb
115
113
  homepage: https://github.com/felixramirezgarcia/logstash-input-multirss
@@ -118,7 +116,7 @@ licenses:
118
116
  metadata:
119
117
  logstash_plugin: 'true'
120
118
  logstash_group: input
121
- post_install_message:
119
+ post_install_message:
122
120
  rdoc_options: []
123
121
  require_paths:
124
122
  - lib
@@ -133,9 +131,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
133
131
  - !ruby/object:Gem::Version
134
132
  version: '0'
135
133
  requirements: []
136
- rubyforge_project:
137
- rubygems_version: 2.5.2.1
138
- signing_key:
134
+ rubyforge_project:
135
+ rubygems_version: 2.6.13
136
+ signing_key:
139
137
  specification_version: 4
140
138
  summary: Simple multi rss plugin
141
139
  test_files:
@@ -1,51 +0,0 @@
1
- # encoding: utf-8
2
- require "logstash/inputs/base"
3
- require "logstash/namespace"
4
- require "stud/interval"
5
- require "socket" # for Socket.gethostname
6
-
7
- # Generate a repeating message.
8
- #
9
- # This plugin is intented only as an example.
10
-
11
- class LogStash::Inputs::Multirss < LogStash::Inputs::Base
12
- config_name "multirss"
13
-
14
- # If undefined, Logstash will complain, even if codec is unused.
15
- default :codec, "plain"
16
-
17
- # The message string to use in the event.
18
- config :message, :validate => :string, :default => "Hello World!"
19
-
20
- # Set how frequently messages should be sent.
21
- #
22
- # The default, `1`, means send a message every second.
23
- config :interval, :validate => :number, :default => 1
24
-
25
- public
26
- def register
27
- @host = Socket.gethostname
28
- end # def register
29
-
30
- def run(queue)
31
- # we can abort the loop if stop? becomes true
32
- while !stop?
33
- event = LogStash::Event.new("message" => @message, "host" => @host)
34
- decorate(event)
35
- queue << event
36
- # because the sleep interval can be big, when shutdown happens
37
- # we want to be able to abort the sleep
38
- # Stud.stoppable_sleep will frequently evaluate the given block
39
- # and abort the sleep(@interval) if the return value is true
40
- Stud.stoppable_sleep(@interval) { stop? }
41
- end # loop
42
- end # def run
43
-
44
- def stop
45
- # nothing to do in this case so it is not necessary to define stop
46
- # examples of common "stop" tasks:
47
- # * close sockets (unblocking blocking reads/accepts)
48
- # * cleanup temporary files
49
- # * terminate spawned threads
50
- end
51
- end # class LogStash::Inputs::Multirss
@@ -1,117 +0,0 @@
1
- # encoding: utf-8
2
- require "logstash/inputs/base"
3
- require "logstash/namespace"
4
- require "stud/interval"
5
- require "net/http"
6
- require "uri"
7
- require "mechanize"
8
- require "faraday"
9
- require "rss"
10
-
11
- class LogStash::Inputs::Crawler < LogStash::Inputs::Base
12
- config_name "multirss"
13
-
14
- # If undefined, Logstash will complain, even if codec is unused.
15
- default :codec, "plain"
16
-
17
- # The message string to use in the event.
18
- config :urls, :validate => :array, :required => true
19
-
20
- #Set de interval for stoppable_sleep
21
- config :interval, :validate => :number, :default => 86400
22
-
23
- #Domains to exclude
24
- config :blacklist, :validate => :array , :default => ['http://fusion.google.com/','yahoo.com','live.com','netvibes.com']
25
-
26
- public
27
- def register
28
- @urls = []
29
- puts "**********************************************************"
30
- puts "STARTING MULTI-RSS"
31
- puts "*******************************************************"
32
- @agent = Mechanize.new
33
- @agent.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE
34
- end # def register
35
-
36
-
37
- def run(queue)
38
- # we can abort the loop if stop? becomes true
39
- while !stop?
40
- puts "********************ENTRA1"
41
- @urls.each do |url|
42
- page = @agent.get(@url)
43
- page.links.each do |link|
44
- if link.href.chars.last(3).join == "xml" && valid_link?(link)
45
- puts "*******************************************************"
46
- puts link.href
47
- @new_urls << link.href
48
- end
49
- end
50
-
51
- links = @new_urls.uniq
52
-
53
- links.each do |link|
54
- puts "********************ENTRA2"
55
- response = @agent.get(link)
56
- handle_response(response, queue)
57
- end
58
-
59
- end #urls.each
60
- Stud.stoppable_sleep(@interval) { stop? }
61
- end # loop
62
- end # def run
63
-
64
-
65
- def stop
66
- # nothing to do in this case so it is not necessary to define stop
67
- # examples of common "stop" tasks:
68
- # * close sockets (unblocking blocking reads/accets)
69
- # * cleanup temporary files
70
- # * terminate spawned threads
71
- end
72
-
73
- def valid_link?(link)
74
- puts "********************ENTRA3"
75
- @blacklist.each do |black_link|
76
- if link.href.include?(black_link)
77
- return false
78
- end
79
- end
80
- return true
81
- end
82
-
83
-
84
- def handle_response(response, queue)
85
- puts "********************ENTRA4"
86
- body = response.body
87
- begin
88
- feed = RSS::Parser.parse(body)
89
- feed.items.each do |item|
90
- # Put each item into an event
91
- @logger.debug("Item", :item => item.author)
92
- handle_rss_response(queue, item)
93
- end
94
- rescue RSS::MissingTagError => e
95
- @logger.error("Invalid RSS feed", :exception => e)
96
- rescue => e
97
- @logger.error("Uknown error while parsing the feed", :exception => e)
98
- end
99
- end
100
-
101
-
102
- def handle_rss_response(queue, item)
103
- puts "********************ENTRA5"
104
- @codec.decode(item.description) do |event|
105
- event.set("Feed", @url)
106
- event.set("published", item.pubDate)
107
- event.set("title", item.title)
108
- event.set("link", item.link)
109
- event.set("author", item.author)
110
- decorate(event)
111
- queue << event
112
- end
113
- end
114
-
115
-
116
-
117
- end # class LogStash::Inputs::Crawler