RubyGems - logstash-filter-collate - Versions diffs - 0.1.0 - Mend

logstash-filter-collate 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

checksums.yaml +15 -0
data/.gitignore +4 -0
data/Gemfile +3 -0
data/LICENSE +13 -0
data/Rakefile +6 -0
data/lib/logstash/filters/collate.rb +116 -0
data/logstash-filter-collate.gemspec +27 -0
data/rakelib/publish.rake +9 -0
data/rakelib/vendor.rake +169 -0
data/spec/filters/collate_spec.rb +122 -0
metadata +90 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,15 @@
+---
+!binary "U0hBMQ==":
+  metadata.gz: !binary |-
+    YTA3MjQ5ZGZjYTM1NTdmZGFiNjViNzI1NDllMmI2YjM2YzE1NmQ2ZQ==
+  data.tar.gz: !binary |-
+    MmZiNzk5MmM4NmM3ZDMxOGMzYzBjZWE1MjQ1YjI2ZjQ1YzRkOTVlOA==
+SHA512:
+  metadata.gz: !binary |-
+    ODk2YzZlMGM4NjM0NWY1OTk3ZTMyNzY0OGE4OTBhMmQ3NTQzOTQ0NzJhYTgw
+    MjQ3YzBiZmJmODJiODliYjNkYmUzNmMzNTZiM2M1YzM4ZjQwOWFjZGJlMDM1
+    NDlhZDdkMDIwNmFkODY5YmZhNmExOThjMzI3YjBhMmUwMmQ2MGY=
+  data.tar.gz: !binary |-
+    MTRlYTFjYTYwNzg5MDZlNjE2ZGMzMDI2YWQwZTM2YTEzOGZiMDQ2NWQ0MWI1
+    NjY2ZWMxNzY4Mjg0OGVmNzJmNDdjNTJjOGY1ZmMyZTg1OTc3MTI3OTNhMmY4
+    MmNiNjFmMTE4MmZkMGFmNTNhNjBkMTUxMzQwMzI0ODA1OWFiNGQ=

data/.gitignore ADDED Viewed

@@ -0,0 +1,4 @@
+*.gem
+Gemfile.lock
+.bundle
+vendor

data/Gemfile ADDED Viewed

@@ -0,0 +1,3 @@
+source 'http://rubygems.org'
+gem 'rake'
+gem 'gem_publisher'

data/LICENSE ADDED Viewed

@@ -0,0 +1,13 @@
+Copyright (c) 2012-2014 Elasticsearch <http://www.elasticsearch.org>
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.

data/Rakefile ADDED Viewed

@@ -0,0 +1,6 @@
+@files=[]
+task :default do
+  system("rake -T")
+end

data/lib/logstash/filters/collate.rb ADDED Viewed

@@ -0,0 +1,116 @@
+# encoding: utf-8
+require "logstash/filters/base"
+require "logstash/namespace"
+# Collate events by time or count.
+#
+# The original goal of this filter was to merge the logs from different sources
+# by the time of log, for example, in real-time log collection, logs can be
+# collated by amount of 3000 logs or can be collated in 30 seconds.
+#
+# The config looks like this:
+# [source,ruby]
+#     filter {
+#       collate {
+#         size => 3000
+#         interval => "30s"
+#         order => "ascending"
+#       }
+#     }
+class LogStash::Filters::Collate < LogStash::Filters::Base
+  config_name "collate"
+  milestone 1
+  # How many logs should be collated.
+  config :count, :validate => :number, :default => 1000
+  # The `interval` is the time window which how long the logs should be collated. (default `1m`)
+  config :interval, :validate => :string, :default => "1m"
+  # The `order` collated events should appear in.
+  config :order, :validate => ["ascending", "descending"], :default => "ascending"
+  public
+  def register
+    require "thread"
+    require "rufus/scheduler"
+    @mutex = Mutex.new
+    @collatingDone = false
+    @collatingArray = Array.new
+    @scheduler = Rufus::Scheduler.start_new
+    @job = @scheduler.every @interval do
+      @logger.info("Scheduler Activated")
+      @mutex.synchronize{
+        collate
+      }
+    end
+  end # def register
+  public
+  def filter(event)
+    @logger.info("do collate filter")
+    if event == LogStash::SHUTDOWN
+      @job.trigger()
+      @job.unschedule()
+      @logger.info("collate filter thread shutdown.")
+      return
+    end
+    # if the event is collated, a "collated" tag will be marked, so for those uncollated event, cancel them first.
+    if event["tags"].nil? || !event.tags.include?("collated")
+      event.cancel
+    else
+      return
+    end
+    @mutex.synchronize{
+      @collatingArray.push(event.clone)
+      if (@collatingArray.length == @count)
+        collate
+      end
+      if (@collatingDone)
+        while collatedEvent = @collatingArray.pop
+          collatedEvent["tags"] = Array.new if collatedEvent["tags"].nil?
+          collatedEvent["tags"] << "collated"
+          filter_matched(collatedEvent)
+          yield collatedEvent
+        end # while @collatingArray.pop
+        # reset collatingDone flag
+        @collatingDone = false
+      end
+    }
+  end # def filter
+  private
+  def collate
+    if (@order == "ascending")
+      # call .to_i for now until https://github.com/elasticsearch/logstash/issues/2052 is fixed
+      @collatingArray.sort! { |eventA, eventB| eventB.timestamp.to_i <=> eventA.timestamp.to_i }
+    else
+      @collatingArray.sort! { |eventA, eventB| eventA.timestamp.to_i <=> eventB.timestamp.to_i }
+    end
+    @collatingDone = true
+  end # def collate
+  # Flush any pending messages.
+  public
+  def flush(options = {})
+    events = []
+    if (@collatingDone)
+      @mutex.synchronize{
+        while collatedEvent = @collatingArray.pop
+          collatedEvent["tags"] << "collated"
+          events << collatedEvent
+        end # while @collatingArray.pop
+      }
+      # reset collatingDone flag.
+      @collatingDone = false
+    end
+    return events
+  end # def flush
+end #

data/logstash-filter-collate.gemspec ADDED Viewed

@@ -0,0 +1,27 @@
+Gem::Specification.new do |s|
+  s.name            = 'logstash-filter-collate'
+  s.version         = '0.1.0'
+  s.licenses        = ['Apache License (2.0)']
+  s.summary         = "Collate events by time or count"
+  s.description     = "The original goal of this filter was to merge the logs from different sources by the time of log."
+  s.authors         = ["Elasticsearch"]
+  s.email           = 'richard.pijnenburg@elasticsearch.com'
+  s.homepage        = "http://logstash.net/"
+  s.require_paths = ["lib"]
+  # Files
+  s.files = `git ls-files`.split($\)+::Dir.glob('vendor/*')
+  # Tests
+  s.test_files = s.files.grep(%r{^(test|spec|features)/})
+  # Special flag to let us know this is actually a logstash plugin
+  s.metadata = { "logstash_plugin" => "true", "group" => "filter" }
+  # Gem dependencies
+  s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
+  s.add_runtime_dependency 'rufus-scheduler', ['~> 2.0.24']
+end

data/rakelib/publish.rake ADDED Viewed

@@ -0,0 +1,9 @@
+require "gem_publisher"
+desc "Publish gem to RubyGems.org"
+task :publish_gem do |t|
+  gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
+  gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
+  puts "Published #{gem}" if gem
+end

data/rakelib/vendor.rake ADDED Viewed

@@ -0,0 +1,169 @@
+require "net/http"
+require "uri"
+require "digest/sha1"
+def vendor(*args)
+  return File.join("vendor", *args)
+end
+directory "vendor/" => ["vendor"] do |task, args|
+  mkdir task.name
+end
+def fetch(url, sha1, output)
+  puts "Downloading #{url}"
+  actual_sha1 = download(url, output)
+  if actual_sha1 != sha1
+    fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
+  end
+end # def fetch
+def file_fetch(url, sha1)
+  filename = File.basename( URI(url).path )
+  output = "vendor/#{filename}"
+  task output => [ "vendor/" ] do
+    begin
+      actual_sha1 = file_sha1(output)
+      if actual_sha1 != sha1
+        fetch(url, sha1, output)
+      end
+    rescue Errno::ENOENT
+      fetch(url, sha1, output)
+    end
+  end.invoke
+  return output
+end
+def file_sha1(path)
+  digest = Digest::SHA1.new
+  fd = File.new(path, "r")
+  while true
+    begin
+      digest << fd.sysread(16384)
+    rescue EOFError
+      break
+    end
+  end
+  return digest.hexdigest
+ensure
+  fd.close if fd
+end
+def download(url, output)
+  uri = URI(url)
+  digest = Digest::SHA1.new
+  tmp = "#{output}.tmp"
+  Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
+    request = Net::HTTP::Get.new(uri.path)
+    http.request(request) do |response|
+      fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
+      size = (response["content-length"].to_i || -1).to_f
+      count = 0
+      File.open(tmp, "w") do |fd|
+        response.read_body do |chunk|
+          fd.write(chunk)
+          digest << chunk
+          if size > 0 && $stdout.tty?
+            count += chunk.bytesize
+            $stdout.write(sprintf("\r%0.2f%%", count/size * 100))
+          end
+        end
+      end
+      $stdout.write("\r      \r") if $stdout.tty?
+    end
+  end
+  File.rename(tmp, output)
+  return digest.hexdigest
+rescue SocketError => e
+  puts "Failure while downloading #{url}: #{e}"
+  raise
+ensure
+  File.unlink(tmp) if File.exist?(tmp)
+end # def download
+def untar(tarball, &block)
+  require "archive/tar/minitar"
+  tgz = Zlib::GzipReader.new(File.open(tarball))
+  # Pull out typesdb
+  tar = Archive::Tar::Minitar::Input.open(tgz)
+  tar.each do |entry|
+    path = block.call(entry)
+    next if path.nil?
+    parent = File.dirname(path)
+    mkdir_p parent unless File.directory?(parent)
+    # Skip this file if the output file is the same size
+    if entry.directory?
+      mkdir path unless File.directory?(path)
+    else
+      entry_mode = entry.instance_eval { @mode } & 0777
+      if File.exists?(path)
+        stat = File.stat(path)
+        # TODO(sissel): Submit a patch to archive-tar-minitar upstream to
+        # expose headers in the entry.
+        entry_size = entry.instance_eval { @size }
+        # If file sizes are same, skip writing.
+        next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
+      end
+      puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
+      File.open(path, "w") do |fd|
+        # eof? check lets us skip empty files. Necessary because the API provided by
+        # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
+        # IO object. Something about empty files in this EntryStream causes
+        # IO.copy_stream to throw "can't convert nil into String" on JRuby
+        # TODO(sissel): File a bug about this.
+        while !entry.eof?
+          chunk = entry.read(16384)
+          fd.write(chunk)
+        end
+          #IO.copy_stream(entry, fd)
+      end
+      File.chmod(entry_mode, path)
+    end
+  end
+  tar.close
+  File.unlink(tarball) if File.file?(tarball)
+end # def untar
+def ungz(file)
+  outpath = file.gsub('.gz', '')
+  tgz = Zlib::GzipReader.new(File.open(file))
+  begin
+    File.open(outpath, "w") do |out|
+      IO::copy_stream(tgz, out)
+    end
+    File.unlink(file)
+  rescue
+    File.unlink(outpath) if File.file?(outpath)
+   raise
+  end
+  tgz.close
+end
+desc "Process any vendor files required for this plugin"
+task "vendor" do |task, args|
+  @files.each do |file|
+    download = file_fetch(file['url'], file['sha1'])
+    if download =~ /.tar.gz/
+      prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
+      untar(download) do |entry|
+        if !file['files'].nil?
+          next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
+          out = entry.full_name.split("/").last
+        end
+        File.join('vendor', out)
+      end
+    elsif download =~ /.gz/
+      ungz(download)
+    end
+  end
+end

data/spec/filters/collate_spec.rb ADDED Viewed

@@ -0,0 +1,122 @@
+require "spec_helper"
+require "logstash/filters/collate"
+describe LogStash::Filters::Collate do
+  describe "collate when count is full" do
+    config <<-CONFIG
+      filter {
+        collate {
+          count => 2
+        }
+      }
+    CONFIG
+    events = [
+      {
+        "@timestamp" => Time.iso8601("2013-01-02T00:00:00.000Z"),
+        "message" => "later message"
+      },
+      {
+        "@timestamp" => Time.iso8601("2013-01-01T00:00:00.000Z"),
+        "message" => "earlier message"
+      }
+    ]
+    sample(events) do
+      insist { subject }.is_a? Array
+      insist { subject.length } == 2
+      subject.each_with_index do |s,i|
+        if i == 0 # first one should be the earlier message
+          insist { s["message"] } == "earlier message"
+        end
+        if i == 1 # second one should be the later message
+          insist { s["message"]} == "later message"
+        end
+      end
+    end
+  end
+  describe "collate by desc" do
+    config <<-CONFIG
+      filter {
+        collate {
+          count => 3
+          order => "descending"
+        }
+      }
+    CONFIG
+    events = [
+      {
+        "@timestamp" => Time.iso8601("2013-01-03T00:00:00.000Z"),
+        "message" => "third message"
+      },
+      {
+        "@timestamp" => Time.iso8601("2013-01-01T00:00:00.000Z"),
+        "message" => "first message"
+      },
+      {
+        "@timestamp" => Time.iso8601("2013-01-02T00:00:00.000Z"),
+        "message" => "second message"
+      }
+    ]
+    sample(events) do
+      insist { subject }.is_a? Array
+      insist { subject.length } == 3
+      subject.each_with_index do |s,i|
+        if i == 0 # first one should be the third message
+          insist { s["message"] } == "third message"
+        end
+        if i == 1 # second one should be the second message
+          insist { s["message"]} == "second message"
+        end
+        if i == 2 # third one should be the third message
+          insist { s["message"]} == "first message"
+        end
+      end
+    end
+  end
+  # (Ignored) Currently this case can't pass because of the case depends on the flush function of the filter in the test,
+  # there was a TODO marked in the code (spec_helper.rb, # TODO(sissel): pipeline flush needs to be implemented.),
+  # and the case wants to test the scenario which collate was triggered by a scheduler, so in this case, it needs to sleep few seconds
+  # waiting the scheduler triggered, and after the events were flushed, then the result can be checked.
+  # describe "collate when interval reached" do
+  #   config <<-CONFIG
+  #     filter {
+  #       collate {
+  #         interval => "1s"
+  #       }
+  #     }
+  #   CONFIG
+  #   events = [
+  #     {
+  #       "@timestamp" => Time.iso8601("2013-01-02T00:00:00.000Z"),
+  #       "message" => "later message"
+  #     },
+  #     {
+  #       "@timestamp" => Time.iso8601("2013-01-01T00:00:00.000Z"),
+  #       "message" => "earlier message"
+  #     }
+  #   ]
+  #   sample(events) do
+  #     sleep(2)
+  #     insist { subject }.is_a? Array
+  #     insist { subject.length } == 2
+  #     subject.each_with_index do |s,i|
+  #       if i == 0 # first one should be the earlier message
+  #         insist { s["message"] } == "earlier message"
+  #       end
+  #       if i == 1 # second one should be the later message
+  #         insist { s["message"]} == "later message"
+  #       end
+  #     end
+  #   end
+  # end
+end

metadata ADDED Viewed

@@ -0,0 +1,90 @@
+--- !ruby/object:Gem::Specification
+name: logstash-filter-collate
+version: !ruby/object:Gem::Version
+  version: 0.1.0
+platform: ruby
+authors:
+- Elasticsearch
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2014-11-11 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: logstash
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: 1.4.0
+    - - <
+      - !ruby/object:Gem::Version
+        version: 2.0.0
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: 1.4.0
+    - - <
+      - !ruby/object:Gem::Version
+        version: 2.0.0
+- !ruby/object:Gem::Dependency
+  name: rufus-scheduler
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: 2.0.24
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: 2.0.24
+description: The original goal of this filter was to merge the logs from different
+  sources by the time of log.
+email: richard.pijnenburg@elasticsearch.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- .gitignore
+- Gemfile
+- LICENSE
+- Rakefile
+- lib/logstash/filters/collate.rb
+- logstash-filter-collate.gemspec
+- rakelib/publish.rake
+- rakelib/vendor.rake
+- spec/filters/collate_spec.rb
+homepage: http://logstash.net/
+licenses:
+- Apache License (2.0)
+metadata:
+  logstash_plugin: 'true'
+  group: filter
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.4.1
+signing_key:
+specification_version: 4
+summary: Collate events by time or count
+test_files:
+- spec/filters/collate_spec.rb