logstash-filter-collate 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ YTA3MjQ5ZGZjYTM1NTdmZGFiNjViNzI1NDllMmI2YjM2YzE1NmQ2ZQ==
5
+ data.tar.gz: !binary |-
6
+ MmZiNzk5MmM4NmM3ZDMxOGMzYzBjZWE1MjQ1YjI2ZjQ1YzRkOTVlOA==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ODk2YzZlMGM4NjM0NWY1OTk3ZTMyNzY0OGE4OTBhMmQ3NTQzOTQ0NzJhYTgw
10
+ MjQ3YzBiZmJmODJiODliYjNkYmUzNmMzNTZiM2M1YzM4ZjQwOWFjZGJlMDM1
11
+ NDlhZDdkMDIwNmFkODY5YmZhNmExOThjMzI3YjBhMmUwMmQ2MGY=
12
+ data.tar.gz: !binary |-
13
+ MTRlYTFjYTYwNzg5MDZlNjE2ZGMzMDI2YWQwZTM2YTEzOGZiMDQ2NWQ0MWI1
14
+ NjY2ZWMxNzY4Mjg0OGVmNzJmNDdjNTJjOGY1ZmMyZTg1OTc3MTI3OTNhMmY4
15
+ MmNiNjFmMTE4MmZkMGFmNTNhNjBkMTUxMzQwMzI0ODA1OWFiNGQ=
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ Gemfile.lock
3
+ .bundle
4
+ vendor
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'http://rubygems.org'
2
+ gem 'rake'
3
+ gem 'gem_publisher'
data/LICENSE ADDED
@@ -0,0 +1,13 @@
1
+ Copyright (c) 2012-2014 Elasticsearch <http://www.elasticsearch.org>
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ @files=[]
2
+
3
+ task :default do
4
+ system("rake -T")
5
+ end
6
+
@@ -0,0 +1,116 @@
1
+ # encoding: utf-8
2
+ require "logstash/filters/base"
3
+ require "logstash/namespace"
4
+
5
+
6
+ # Collate events by time or count.
7
+ #
8
+ # The original goal of this filter was to merge the logs from different sources
9
+ # by the time of log, for example, in real-time log collection, logs can be
10
+ # collated by amount of 3000 logs or can be collated in 30 seconds.
11
+ #
12
+ # The config looks like this:
13
+ # [source,ruby]
14
+ # filter {
15
+ # collate {
16
+ # size => 3000
17
+ # interval => "30s"
18
+ # order => "ascending"
19
+ # }
20
+ # }
21
+ class LogStash::Filters::Collate < LogStash::Filters::Base
22
+
23
+ config_name "collate"
24
+ milestone 1
25
+
26
+ # How many logs should be collated.
27
+ config :count, :validate => :number, :default => 1000
28
+
29
+ # The `interval` is the time window which how long the logs should be collated. (default `1m`)
30
+ config :interval, :validate => :string, :default => "1m"
31
+
32
+ # The `order` collated events should appear in.
33
+ config :order, :validate => ["ascending", "descending"], :default => "ascending"
34
+
35
+ public
36
+ def register
37
+ require "thread"
38
+ require "rufus/scheduler"
39
+
40
+ @mutex = Mutex.new
41
+ @collatingDone = false
42
+ @collatingArray = Array.new
43
+ @scheduler = Rufus::Scheduler.start_new
44
+ @job = @scheduler.every @interval do
45
+ @logger.info("Scheduler Activated")
46
+ @mutex.synchronize{
47
+ collate
48
+ }
49
+ end
50
+ end # def register
51
+
52
+ public
53
+ def filter(event)
54
+ @logger.info("do collate filter")
55
+ if event == LogStash::SHUTDOWN
56
+ @job.trigger()
57
+ @job.unschedule()
58
+ @logger.info("collate filter thread shutdown.")
59
+ return
60
+ end
61
+
62
+ # if the event is collated, a "collated" tag will be marked, so for those uncollated event, cancel them first.
63
+ if event["tags"].nil? || !event.tags.include?("collated")
64
+ event.cancel
65
+ else
66
+ return
67
+ end
68
+
69
+ @mutex.synchronize{
70
+ @collatingArray.push(event.clone)
71
+
72
+ if (@collatingArray.length == @count)
73
+ collate
74
+ end
75
+
76
+ if (@collatingDone)
77
+ while collatedEvent = @collatingArray.pop
78
+ collatedEvent["tags"] = Array.new if collatedEvent["tags"].nil?
79
+ collatedEvent["tags"] << "collated"
80
+ filter_matched(collatedEvent)
81
+ yield collatedEvent
82
+ end # while @collatingArray.pop
83
+ # reset collatingDone flag
84
+ @collatingDone = false
85
+ end
86
+ }
87
+ end # def filter
88
+
89
+ private
90
+ def collate
91
+ if (@order == "ascending")
92
+ # call .to_i for now until https://github.com/elasticsearch/logstash/issues/2052 is fixed
93
+ @collatingArray.sort! { |eventA, eventB| eventB.timestamp.to_i <=> eventA.timestamp.to_i }
94
+ else
95
+ @collatingArray.sort! { |eventA, eventB| eventA.timestamp.to_i <=> eventB.timestamp.to_i }
96
+ end
97
+ @collatingDone = true
98
+ end # def collate
99
+
100
+ # Flush any pending messages.
101
+ public
102
+ def flush(options = {})
103
+ events = []
104
+ if (@collatingDone)
105
+ @mutex.synchronize{
106
+ while collatedEvent = @collatingArray.pop
107
+ collatedEvent["tags"] << "collated"
108
+ events << collatedEvent
109
+ end # while @collatingArray.pop
110
+ }
111
+ # reset collatingDone flag.
112
+ @collatingDone = false
113
+ end
114
+ return events
115
+ end # def flush
116
+ end #
@@ -0,0 +1,27 @@
1
+ Gem::Specification.new do |s|
2
+
3
+ s.name = 'logstash-filter-collate'
4
+ s.version = '0.1.0'
5
+ s.licenses = ['Apache License (2.0)']
6
+ s.summary = "Collate events by time or count"
7
+ s.description = "The original goal of this filter was to merge the logs from different sources by the time of log."
8
+ s.authors = ["Elasticsearch"]
9
+ s.email = 'richard.pijnenburg@elasticsearch.com'
10
+ s.homepage = "http://logstash.net/"
11
+ s.require_paths = ["lib"]
12
+
13
+ # Files
14
+ s.files = `git ls-files`.split($\)+::Dir.glob('vendor/*')
15
+
16
+ # Tests
17
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
18
+
19
+ # Special flag to let us know this is actually a logstash plugin
20
+ s.metadata = { "logstash_plugin" => "true", "group" => "filter" }
21
+
22
+ # Gem dependencies
23
+ s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
24
+ s.add_runtime_dependency 'rufus-scheduler', ['~> 2.0.24']
25
+
26
+ end
27
+
@@ -0,0 +1,9 @@
1
+ require "gem_publisher"
2
+
3
+ desc "Publish gem to RubyGems.org"
4
+ task :publish_gem do |t|
5
+ gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
6
+ gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
7
+ puts "Published #{gem}" if gem
8
+ end
9
+
@@ -0,0 +1,169 @@
1
+ require "net/http"
2
+ require "uri"
3
+ require "digest/sha1"
4
+
5
+ def vendor(*args)
6
+ return File.join("vendor", *args)
7
+ end
8
+
9
+ directory "vendor/" => ["vendor"] do |task, args|
10
+ mkdir task.name
11
+ end
12
+
13
+ def fetch(url, sha1, output)
14
+
15
+ puts "Downloading #{url}"
16
+ actual_sha1 = download(url, output)
17
+
18
+ if actual_sha1 != sha1
19
+ fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
20
+ end
21
+ end # def fetch
22
+
23
+ def file_fetch(url, sha1)
24
+ filename = File.basename( URI(url).path )
25
+ output = "vendor/#{filename}"
26
+ task output => [ "vendor/" ] do
27
+ begin
28
+ actual_sha1 = file_sha1(output)
29
+ if actual_sha1 != sha1
30
+ fetch(url, sha1, output)
31
+ end
32
+ rescue Errno::ENOENT
33
+ fetch(url, sha1, output)
34
+ end
35
+ end.invoke
36
+
37
+ return output
38
+ end
39
+
40
+ def file_sha1(path)
41
+ digest = Digest::SHA1.new
42
+ fd = File.new(path, "r")
43
+ while true
44
+ begin
45
+ digest << fd.sysread(16384)
46
+ rescue EOFError
47
+ break
48
+ end
49
+ end
50
+ return digest.hexdigest
51
+ ensure
52
+ fd.close if fd
53
+ end
54
+
55
+ def download(url, output)
56
+ uri = URI(url)
57
+ digest = Digest::SHA1.new
58
+ tmp = "#{output}.tmp"
59
+ Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
60
+ request = Net::HTTP::Get.new(uri.path)
61
+ http.request(request) do |response|
62
+ fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
63
+ size = (response["content-length"].to_i || -1).to_f
64
+ count = 0
65
+ File.open(tmp, "w") do |fd|
66
+ response.read_body do |chunk|
67
+ fd.write(chunk)
68
+ digest << chunk
69
+ if size > 0 && $stdout.tty?
70
+ count += chunk.bytesize
71
+ $stdout.write(sprintf("\r%0.2f%%", count/size * 100))
72
+ end
73
+ end
74
+ end
75
+ $stdout.write("\r \r") if $stdout.tty?
76
+ end
77
+ end
78
+
79
+ File.rename(tmp, output)
80
+
81
+ return digest.hexdigest
82
+ rescue SocketError => e
83
+ puts "Failure while downloading #{url}: #{e}"
84
+ raise
85
+ ensure
86
+ File.unlink(tmp) if File.exist?(tmp)
87
+ end # def download
88
+
89
+ def untar(tarball, &block)
90
+ require "archive/tar/minitar"
91
+ tgz = Zlib::GzipReader.new(File.open(tarball))
92
+ # Pull out typesdb
93
+ tar = Archive::Tar::Minitar::Input.open(tgz)
94
+ tar.each do |entry|
95
+ path = block.call(entry)
96
+ next if path.nil?
97
+ parent = File.dirname(path)
98
+
99
+ mkdir_p parent unless File.directory?(parent)
100
+
101
+ # Skip this file if the output file is the same size
102
+ if entry.directory?
103
+ mkdir path unless File.directory?(path)
104
+ else
105
+ entry_mode = entry.instance_eval { @mode } & 0777
106
+ if File.exists?(path)
107
+ stat = File.stat(path)
108
+ # TODO(sissel): Submit a patch to archive-tar-minitar upstream to
109
+ # expose headers in the entry.
110
+ entry_size = entry.instance_eval { @size }
111
+ # If file sizes are same, skip writing.
112
+ next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
113
+ end
114
+ puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
115
+ File.open(path, "w") do |fd|
116
+ # eof? check lets us skip empty files. Necessary because the API provided by
117
+ # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
118
+ # IO object. Something about empty files in this EntryStream causes
119
+ # IO.copy_stream to throw "can't convert nil into String" on JRuby
120
+ # TODO(sissel): File a bug about this.
121
+ while !entry.eof?
122
+ chunk = entry.read(16384)
123
+ fd.write(chunk)
124
+ end
125
+ #IO.copy_stream(entry, fd)
126
+ end
127
+ File.chmod(entry_mode, path)
128
+ end
129
+ end
130
+ tar.close
131
+ File.unlink(tarball) if File.file?(tarball)
132
+ end # def untar
133
+
134
+ def ungz(file)
135
+
136
+ outpath = file.gsub('.gz', '')
137
+ tgz = Zlib::GzipReader.new(File.open(file))
138
+ begin
139
+ File.open(outpath, "w") do |out|
140
+ IO::copy_stream(tgz, out)
141
+ end
142
+ File.unlink(file)
143
+ rescue
144
+ File.unlink(outpath) if File.file?(outpath)
145
+ raise
146
+ end
147
+ tgz.close
148
+ end
149
+
150
+ desc "Process any vendor files required for this plugin"
151
+ task "vendor" do |task, args|
152
+
153
+ @files.each do |file|
154
+ download = file_fetch(file['url'], file['sha1'])
155
+ if download =~ /.tar.gz/
156
+ prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
157
+ untar(download) do |entry|
158
+ if !file['files'].nil?
159
+ next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
160
+ out = entry.full_name.split("/").last
161
+ end
162
+ File.join('vendor', out)
163
+ end
164
+ elsif download =~ /.gz/
165
+ ungz(download)
166
+ end
167
+ end
168
+
169
+ end
@@ -0,0 +1,122 @@
1
+ require "spec_helper"
2
+ require "logstash/filters/collate"
3
+
4
+ describe LogStash::Filters::Collate do
5
+
6
+
7
+ describe "collate when count is full" do
8
+ config <<-CONFIG
9
+ filter {
10
+ collate {
11
+ count => 2
12
+ }
13
+ }
14
+ CONFIG
15
+
16
+ events = [
17
+ {
18
+ "@timestamp" => Time.iso8601("2013-01-02T00:00:00.000Z"),
19
+ "message" => "later message"
20
+ },
21
+ {
22
+ "@timestamp" => Time.iso8601("2013-01-01T00:00:00.000Z"),
23
+ "message" => "earlier message"
24
+ }
25
+ ]
26
+
27
+ sample(events) do
28
+ insist { subject }.is_a? Array
29
+ insist { subject.length } == 2
30
+ subject.each_with_index do |s,i|
31
+ if i == 0 # first one should be the earlier message
32
+ insist { s["message"] } == "earlier message"
33
+ end
34
+ if i == 1 # second one should be the later message
35
+ insist { s["message"]} == "later message"
36
+ end
37
+ end
38
+ end
39
+ end
40
+
41
+ describe "collate by desc" do
42
+ config <<-CONFIG
43
+ filter {
44
+ collate {
45
+ count => 3
46
+ order => "descending"
47
+ }
48
+ }
49
+ CONFIG
50
+
51
+ events = [
52
+ {
53
+ "@timestamp" => Time.iso8601("2013-01-03T00:00:00.000Z"),
54
+ "message" => "third message"
55
+ },
56
+ {
57
+ "@timestamp" => Time.iso8601("2013-01-01T00:00:00.000Z"),
58
+ "message" => "first message"
59
+ },
60
+ {
61
+ "@timestamp" => Time.iso8601("2013-01-02T00:00:00.000Z"),
62
+ "message" => "second message"
63
+ }
64
+ ]
65
+
66
+ sample(events) do
67
+ insist { subject }.is_a? Array
68
+ insist { subject.length } == 3
69
+ subject.each_with_index do |s,i|
70
+ if i == 0 # first one should be the third message
71
+ insist { s["message"] } == "third message"
72
+ end
73
+ if i == 1 # second one should be the second message
74
+ insist { s["message"]} == "second message"
75
+ end
76
+ if i == 2 # third one should be the third message
77
+ insist { s["message"]} == "first message"
78
+ end
79
+ end
80
+ end
81
+ end
82
+
83
+ # (Ignored) Currently this case can't pass because of the case depends on the flush function of the filter in the test,
84
+ # there was a TODO marked in the code (spec_helper.rb, # TODO(sissel): pipeline flush needs to be implemented.),
85
+ # and the case wants to test the scenario which collate was triggered by a scheduler, so in this case, it needs to sleep few seconds
86
+ # waiting the scheduler triggered, and after the events were flushed, then the result can be checked.
87
+
88
+ # describe "collate when interval reached" do
89
+ # config <<-CONFIG
90
+ # filter {
91
+ # collate {
92
+ # interval => "1s"
93
+ # }
94
+ # }
95
+ # CONFIG
96
+
97
+ # events = [
98
+ # {
99
+ # "@timestamp" => Time.iso8601("2013-01-02T00:00:00.000Z"),
100
+ # "message" => "later message"
101
+ # },
102
+ # {
103
+ # "@timestamp" => Time.iso8601("2013-01-01T00:00:00.000Z"),
104
+ # "message" => "earlier message"
105
+ # }
106
+ # ]
107
+
108
+ # sample(events) do
109
+ # sleep(2)
110
+ # insist { subject }.is_a? Array
111
+ # insist { subject.length } == 2
112
+ # subject.each_with_index do |s,i|
113
+ # if i == 0 # first one should be the earlier message
114
+ # insist { s["message"] } == "earlier message"
115
+ # end
116
+ # if i == 1 # second one should be the later message
117
+ # insist { s["message"]} == "later message"
118
+ # end
119
+ # end
120
+ # end
121
+ # end
122
+ end
metadata ADDED
@@ -0,0 +1,90 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: logstash-filter-collate
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Elasticsearch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-11 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: logstash
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.4.0
20
+ - - <
21
+ - !ruby/object:Gem::Version
22
+ version: 2.0.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.4.0
30
+ - - <
31
+ - !ruby/object:Gem::Version
32
+ version: 2.0.0
33
+ - !ruby/object:Gem::Dependency
34
+ name: rufus-scheduler
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ~>
38
+ - !ruby/object:Gem::Version
39
+ version: 2.0.24
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ~>
45
+ - !ruby/object:Gem::Version
46
+ version: 2.0.24
47
+ description: The original goal of this filter was to merge the logs from different
48
+ sources by the time of log.
49
+ email: richard.pijnenburg@elasticsearch.com
50
+ executables: []
51
+ extensions: []
52
+ extra_rdoc_files: []
53
+ files:
54
+ - .gitignore
55
+ - Gemfile
56
+ - LICENSE
57
+ - Rakefile
58
+ - lib/logstash/filters/collate.rb
59
+ - logstash-filter-collate.gemspec
60
+ - rakelib/publish.rake
61
+ - rakelib/vendor.rake
62
+ - spec/filters/collate_spec.rb
63
+ homepage: http://logstash.net/
64
+ licenses:
65
+ - Apache License (2.0)
66
+ metadata:
67
+ logstash_plugin: 'true'
68
+ group: filter
69
+ post_install_message:
70
+ rdoc_options: []
71
+ require_paths:
72
+ - lib
73
+ required_ruby_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ required_rubygems_version: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ! '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ requirements: []
84
+ rubyforge_project:
85
+ rubygems_version: 2.4.1
86
+ signing_key:
87
+ specification_version: 4
88
+ summary: Collate events by time or count
89
+ test_files:
90
+ - spec/filters/collate_spec.rb