logstash-filter-collate 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ YTA3MjQ5ZGZjYTM1NTdmZGFiNjViNzI1NDllMmI2YjM2YzE1NmQ2ZQ==
5
+ data.tar.gz: !binary |-
6
+ MmZiNzk5MmM4NmM3ZDMxOGMzYzBjZWE1MjQ1YjI2ZjQ1YzRkOTVlOA==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ODk2YzZlMGM4NjM0NWY1OTk3ZTMyNzY0OGE4OTBhMmQ3NTQzOTQ0NzJhYTgw
10
+ MjQ3YzBiZmJmODJiODliYjNkYmUzNmMzNTZiM2M1YzM4ZjQwOWFjZGJlMDM1
11
+ NDlhZDdkMDIwNmFkODY5YmZhNmExOThjMzI3YjBhMmUwMmQ2MGY=
12
+ data.tar.gz: !binary |-
13
+ MTRlYTFjYTYwNzg5MDZlNjE2ZGMzMDI2YWQwZTM2YTEzOGZiMDQ2NWQ0MWI1
14
+ NjY2ZWMxNzY4Mjg0OGVmNzJmNDdjNTJjOGY1ZmMyZTg1OTc3MTI3OTNhMmY4
15
+ MmNiNjFmMTE4MmZkMGFmNTNhNjBkMTUxMzQwMzI0ODA1OWFiNGQ=
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ Gemfile.lock
3
+ .bundle
4
+ vendor
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'http://rubygems.org'
2
+ gem 'rake'
3
+ gem 'gem_publisher'
data/LICENSE ADDED
@@ -0,0 +1,13 @@
1
+ Copyright (c) 2012-2014 Elasticsearch <http://www.elasticsearch.org>
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ @files=[]
2
+
3
+ task :default do
4
+ system("rake -T")
5
+ end
6
+
@@ -0,0 +1,116 @@
1
+ # encoding: utf-8
2
+ require "logstash/filters/base"
3
+ require "logstash/namespace"
4
+
5
+
6
+ # Collate events by time or count.
7
+ #
8
+ # The original goal of this filter was to merge the logs from different sources
9
+ # by the time of log, for example, in real-time log collection, logs can be
10
+ # collated by amount of 3000 logs or can be collated in 30 seconds.
11
+ #
12
+ # The config looks like this:
13
+ # [source,ruby]
14
+ # filter {
15
+ # collate {
16
+ # size => 3000
17
+ # interval => "30s"
18
+ # order => "ascending"
19
+ # }
20
+ # }
21
+ class LogStash::Filters::Collate < LogStash::Filters::Base
22
+
23
+ config_name "collate"
24
+ milestone 1
25
+
26
+ # How many logs should be collated.
27
+ config :count, :validate => :number, :default => 1000
28
+
29
+ # The `interval` is the time window which how long the logs should be collated. (default `1m`)
30
+ config :interval, :validate => :string, :default => "1m"
31
+
32
+ # The `order` collated events should appear in.
33
+ config :order, :validate => ["ascending", "descending"], :default => "ascending"
34
+
35
+ public
36
+ def register
37
+ require "thread"
38
+ require "rufus/scheduler"
39
+
40
+ @mutex = Mutex.new
41
+ @collatingDone = false
42
+ @collatingArray = Array.new
43
+ @scheduler = Rufus::Scheduler.start_new
44
+ @job = @scheduler.every @interval do
45
+ @logger.info("Scheduler Activated")
46
+ @mutex.synchronize{
47
+ collate
48
+ }
49
+ end
50
+ end # def register
51
+
52
+ public
53
+ def filter(event)
54
+ @logger.info("do collate filter")
55
+ if event == LogStash::SHUTDOWN
56
+ @job.trigger()
57
+ @job.unschedule()
58
+ @logger.info("collate filter thread shutdown.")
59
+ return
60
+ end
61
+
62
+ # if the event is collated, a "collated" tag will be marked, so for those uncollated event, cancel them first.
63
+ if event["tags"].nil? || !event.tags.include?("collated")
64
+ event.cancel
65
+ else
66
+ return
67
+ end
68
+
69
+ @mutex.synchronize{
70
+ @collatingArray.push(event.clone)
71
+
72
+ if (@collatingArray.length == @count)
73
+ collate
74
+ end
75
+
76
+ if (@collatingDone)
77
+ while collatedEvent = @collatingArray.pop
78
+ collatedEvent["tags"] = Array.new if collatedEvent["tags"].nil?
79
+ collatedEvent["tags"] << "collated"
80
+ filter_matched(collatedEvent)
81
+ yield collatedEvent
82
+ end # while @collatingArray.pop
83
+ # reset collatingDone flag
84
+ @collatingDone = false
85
+ end
86
+ }
87
+ end # def filter
88
+
89
+ private
90
+ def collate
91
+ if (@order == "ascending")
92
+ # call .to_i for now until https://github.com/elasticsearch/logstash/issues/2052 is fixed
93
+ @collatingArray.sort! { |eventA, eventB| eventB.timestamp.to_i <=> eventA.timestamp.to_i }
94
+ else
95
+ @collatingArray.sort! { |eventA, eventB| eventA.timestamp.to_i <=> eventB.timestamp.to_i }
96
+ end
97
+ @collatingDone = true
98
+ end # def collate
99
+
100
+ # Flush any pending messages.
101
+ public
102
+ def flush(options = {})
103
+ events = []
104
+ if (@collatingDone)
105
+ @mutex.synchronize{
106
+ while collatedEvent = @collatingArray.pop
107
+ collatedEvent["tags"] << "collated"
108
+ events << collatedEvent
109
+ end # while @collatingArray.pop
110
+ }
111
+ # reset collatingDone flag.
112
+ @collatingDone = false
113
+ end
114
+ return events
115
+ end # def flush
116
+ end #
@@ -0,0 +1,27 @@
1
+ Gem::Specification.new do |s|
2
+
3
+ s.name = 'logstash-filter-collate'
4
+ s.version = '0.1.0'
5
+ s.licenses = ['Apache License (2.0)']
6
+ s.summary = "Collate events by time or count"
7
+ s.description = "The original goal of this filter was to merge the logs from different sources by the time of log."
8
+ s.authors = ["Elasticsearch"]
9
+ s.email = 'richard.pijnenburg@elasticsearch.com'
10
+ s.homepage = "http://logstash.net/"
11
+ s.require_paths = ["lib"]
12
+
13
+ # Files
14
+ s.files = `git ls-files`.split($\)+::Dir.glob('vendor/*')
15
+
16
+ # Tests
17
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
18
+
19
+ # Special flag to let us know this is actually a logstash plugin
20
+ s.metadata = { "logstash_plugin" => "true", "group" => "filter" }
21
+
22
+ # Gem dependencies
23
+ s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
24
+ s.add_runtime_dependency 'rufus-scheduler', ['~> 2.0.24']
25
+
26
+ end
27
+
@@ -0,0 +1,9 @@
1
+ require "gem_publisher"
2
+
3
+ desc "Publish gem to RubyGems.org"
4
+ task :publish_gem do |t|
5
+ gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
6
+ gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
7
+ puts "Published #{gem}" if gem
8
+ end
9
+
@@ -0,0 +1,169 @@
1
+ require "net/http"
2
+ require "uri"
3
+ require "digest/sha1"
4
+
5
+ def vendor(*args)
6
+ return File.join("vendor", *args)
7
+ end
8
+
9
+ directory "vendor/" => ["vendor"] do |task, args|
10
+ mkdir task.name
11
+ end
12
+
13
+ def fetch(url, sha1, output)
14
+
15
+ puts "Downloading #{url}"
16
+ actual_sha1 = download(url, output)
17
+
18
+ if actual_sha1 != sha1
19
+ fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
20
+ end
21
+ end # def fetch
22
+
23
+ def file_fetch(url, sha1)
24
+ filename = File.basename( URI(url).path )
25
+ output = "vendor/#{filename}"
26
+ task output => [ "vendor/" ] do
27
+ begin
28
+ actual_sha1 = file_sha1(output)
29
+ if actual_sha1 != sha1
30
+ fetch(url, sha1, output)
31
+ end
32
+ rescue Errno::ENOENT
33
+ fetch(url, sha1, output)
34
+ end
35
+ end.invoke
36
+
37
+ return output
38
+ end
39
+
40
+ def file_sha1(path)
41
+ digest = Digest::SHA1.new
42
+ fd = File.new(path, "r")
43
+ while true
44
+ begin
45
+ digest << fd.sysread(16384)
46
+ rescue EOFError
47
+ break
48
+ end
49
+ end
50
+ return digest.hexdigest
51
+ ensure
52
+ fd.close if fd
53
+ end
54
+
55
+ def download(url, output)
56
+ uri = URI(url)
57
+ digest = Digest::SHA1.new
58
+ tmp = "#{output}.tmp"
59
+ Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
60
+ request = Net::HTTP::Get.new(uri.path)
61
+ http.request(request) do |response|
62
+ fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
63
+ size = (response["content-length"].to_i || -1).to_f
64
+ count = 0
65
+ File.open(tmp, "w") do |fd|
66
+ response.read_body do |chunk|
67
+ fd.write(chunk)
68
+ digest << chunk
69
+ if size > 0 && $stdout.tty?
70
+ count += chunk.bytesize
71
+ $stdout.write(sprintf("\r%0.2f%%", count/size * 100))
72
+ end
73
+ end
74
+ end
75
+ $stdout.write("\r \r") if $stdout.tty?
76
+ end
77
+ end
78
+
79
+ File.rename(tmp, output)
80
+
81
+ return digest.hexdigest
82
+ rescue SocketError => e
83
+ puts "Failure while downloading #{url}: #{e}"
84
+ raise
85
+ ensure
86
+ File.unlink(tmp) if File.exist?(tmp)
87
+ end # def download
88
+
89
+ def untar(tarball, &block)
90
+ require "archive/tar/minitar"
91
+ tgz = Zlib::GzipReader.new(File.open(tarball))
92
+ # Pull out typesdb
93
+ tar = Archive::Tar::Minitar::Input.open(tgz)
94
+ tar.each do |entry|
95
+ path = block.call(entry)
96
+ next if path.nil?
97
+ parent = File.dirname(path)
98
+
99
+ mkdir_p parent unless File.directory?(parent)
100
+
101
+ # Skip this file if the output file is the same size
102
+ if entry.directory?
103
+ mkdir path unless File.directory?(path)
104
+ else
105
+ entry_mode = entry.instance_eval { @mode } & 0777
106
+ if File.exists?(path)
107
+ stat = File.stat(path)
108
+ # TODO(sissel): Submit a patch to archive-tar-minitar upstream to
109
+ # expose headers in the entry.
110
+ entry_size = entry.instance_eval { @size }
111
+ # If file sizes are same, skip writing.
112
+ next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
113
+ end
114
+ puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
115
+ File.open(path, "w") do |fd|
116
+ # eof? check lets us skip empty files. Necessary because the API provided by
117
+ # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
118
+ # IO object. Something about empty files in this EntryStream causes
119
+ # IO.copy_stream to throw "can't convert nil into String" on JRuby
120
+ # TODO(sissel): File a bug about this.
121
+ while !entry.eof?
122
+ chunk = entry.read(16384)
123
+ fd.write(chunk)
124
+ end
125
+ #IO.copy_stream(entry, fd)
126
+ end
127
+ File.chmod(entry_mode, path)
128
+ end
129
+ end
130
+ tar.close
131
+ File.unlink(tarball) if File.file?(tarball)
132
+ end # def untar
133
+
134
+ def ungz(file)
135
+
136
+ outpath = file.gsub('.gz', '')
137
+ tgz = Zlib::GzipReader.new(File.open(file))
138
+ begin
139
+ File.open(outpath, "w") do |out|
140
+ IO::copy_stream(tgz, out)
141
+ end
142
+ File.unlink(file)
143
+ rescue
144
+ File.unlink(outpath) if File.file?(outpath)
145
+ raise
146
+ end
147
+ tgz.close
148
+ end
149
+
150
+ desc "Process any vendor files required for this plugin"
151
+ task "vendor" do |task, args|
152
+
153
+ @files.each do |file|
154
+ download = file_fetch(file['url'], file['sha1'])
155
+ if download =~ /.tar.gz/
156
+ prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
157
+ untar(download) do |entry|
158
+ if !file['files'].nil?
159
+ next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
160
+ out = entry.full_name.split("/").last
161
+ end
162
+ File.join('vendor', out)
163
+ end
164
+ elsif download =~ /.gz/
165
+ ungz(download)
166
+ end
167
+ end
168
+
169
+ end
@@ -0,0 +1,122 @@
1
+ require "spec_helper"
2
+ require "logstash/filters/collate"
3
+
4
+ describe LogStash::Filters::Collate do
5
+
6
+
7
+ describe "collate when count is full" do
8
+ config <<-CONFIG
9
+ filter {
10
+ collate {
11
+ count => 2
12
+ }
13
+ }
14
+ CONFIG
15
+
16
+ events = [
17
+ {
18
+ "@timestamp" => Time.iso8601("2013-01-02T00:00:00.000Z"),
19
+ "message" => "later message"
20
+ },
21
+ {
22
+ "@timestamp" => Time.iso8601("2013-01-01T00:00:00.000Z"),
23
+ "message" => "earlier message"
24
+ }
25
+ ]
26
+
27
+ sample(events) do
28
+ insist { subject }.is_a? Array
29
+ insist { subject.length } == 2
30
+ subject.each_with_index do |s,i|
31
+ if i == 0 # first one should be the earlier message
32
+ insist { s["message"] } == "earlier message"
33
+ end
34
+ if i == 1 # second one should be the later message
35
+ insist { s["message"]} == "later message"
36
+ end
37
+ end
38
+ end
39
+ end
40
+
41
+ describe "collate by desc" do
42
+ config <<-CONFIG
43
+ filter {
44
+ collate {
45
+ count => 3
46
+ order => "descending"
47
+ }
48
+ }
49
+ CONFIG
50
+
51
+ events = [
52
+ {
53
+ "@timestamp" => Time.iso8601("2013-01-03T00:00:00.000Z"),
54
+ "message" => "third message"
55
+ },
56
+ {
57
+ "@timestamp" => Time.iso8601("2013-01-01T00:00:00.000Z"),
58
+ "message" => "first message"
59
+ },
60
+ {
61
+ "@timestamp" => Time.iso8601("2013-01-02T00:00:00.000Z"),
62
+ "message" => "second message"
63
+ }
64
+ ]
65
+
66
+ sample(events) do
67
+ insist { subject }.is_a? Array
68
+ insist { subject.length } == 3
69
+ subject.each_with_index do |s,i|
70
+ if i == 0 # first one should be the third message
71
+ insist { s["message"] } == "third message"
72
+ end
73
+ if i == 1 # second one should be the second message
74
+ insist { s["message"]} == "second message"
75
+ end
76
+ if i == 2 # third one should be the third message
77
+ insist { s["message"]} == "first message"
78
+ end
79
+ end
80
+ end
81
+ end
82
+
83
+ # (Ignored) Currently this case can't pass because of the case depends on the flush function of the filter in the test,
84
+ # there was a TODO marked in the code (spec_helper.rb, # TODO(sissel): pipeline flush needs to be implemented.),
85
+ # and the case wants to test the scenario which collate was triggered by a scheduler, so in this case, it needs to sleep few seconds
86
+ # waiting the scheduler triggered, and after the events were flushed, then the result can be checked.
87
+
88
+ # describe "collate when interval reached" do
89
+ # config <<-CONFIG
90
+ # filter {
91
+ # collate {
92
+ # interval => "1s"
93
+ # }
94
+ # }
95
+ # CONFIG
96
+
97
+ # events = [
98
+ # {
99
+ # "@timestamp" => Time.iso8601("2013-01-02T00:00:00.000Z"),
100
+ # "message" => "later message"
101
+ # },
102
+ # {
103
+ # "@timestamp" => Time.iso8601("2013-01-01T00:00:00.000Z"),
104
+ # "message" => "earlier message"
105
+ # }
106
+ # ]
107
+
108
+ # sample(events) do
109
+ # sleep(2)
110
+ # insist { subject }.is_a? Array
111
+ # insist { subject.length } == 2
112
+ # subject.each_with_index do |s,i|
113
+ # if i == 0 # first one should be the earlier message
114
+ # insist { s["message"] } == "earlier message"
115
+ # end
116
+ # if i == 1 # second one should be the later message
117
+ # insist { s["message"]} == "later message"
118
+ # end
119
+ # end
120
+ # end
121
+ # end
122
+ end
metadata ADDED
@@ -0,0 +1,90 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: logstash-filter-collate
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Elasticsearch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-11 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: logstash
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.4.0
20
+ - - <
21
+ - !ruby/object:Gem::Version
22
+ version: 2.0.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.4.0
30
+ - - <
31
+ - !ruby/object:Gem::Version
32
+ version: 2.0.0
33
+ - !ruby/object:Gem::Dependency
34
+ name: rufus-scheduler
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ~>
38
+ - !ruby/object:Gem::Version
39
+ version: 2.0.24
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ~>
45
+ - !ruby/object:Gem::Version
46
+ version: 2.0.24
47
+ description: The original goal of this filter was to merge the logs from different
48
+ sources by the time of log.
49
+ email: richard.pijnenburg@elasticsearch.com
50
+ executables: []
51
+ extensions: []
52
+ extra_rdoc_files: []
53
+ files:
54
+ - .gitignore
55
+ - Gemfile
56
+ - LICENSE
57
+ - Rakefile
58
+ - lib/logstash/filters/collate.rb
59
+ - logstash-filter-collate.gemspec
60
+ - rakelib/publish.rake
61
+ - rakelib/vendor.rake
62
+ - spec/filters/collate_spec.rb
63
+ homepage: http://logstash.net/
64
+ licenses:
65
+ - Apache License (2.0)
66
+ metadata:
67
+ logstash_plugin: 'true'
68
+ group: filter
69
+ post_install_message:
70
+ rdoc_options: []
71
+ require_paths:
72
+ - lib
73
+ required_ruby_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ required_rubygems_version: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ! '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ requirements: []
84
+ rubyforge_project:
85
+ rubygems_version: 2.4.1
86
+ signing_key:
87
+ specification_version: 4
88
+ summary: Collate events by time or count
89
+ test_files:
90
+ - spec/filters/collate_spec.rb