RubyGems - logstash-filter-translate - Versions diffs - 3.1.0 → 3.3.0 - Mend

logstash-filter-translate 3.1.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +27 -0
data/LICENSE +199 -10
data/README.md +1 -1
data/docs/index.asciidoc +257 -56
data/lib/logstash/filters/array_of_maps_value_update.rb +44 -0
data/lib/logstash/filters/array_of_values_update.rb +47 -0
data/lib/logstash/filters/dictionary/csv_file.rb +25 -0
data/lib/logstash/filters/dictionary/file.rb +143 -0
data/lib/logstash/filters/dictionary/json_file.rb +87 -0
data/lib/logstash/filters/dictionary/memory.rb +32 -0
data/lib/logstash/filters/dictionary/yaml_file.rb +24 -0
data/lib/logstash/filters/dictionary/yaml_visitor.rb +42 -0
data/lib/logstash/filters/fetch_strategy/file.rb +81 -0
data/lib/logstash/filters/fetch_strategy/memory.rb +52 -0
data/lib/logstash/filters/single_value_update.rb +49 -0
data/lib/logstash/filters/translate.rb +104 -158
data/logstash-filter-translate.gemspec +8 -1
data/spec/filters/benchmark_rspec.rb +69 -0
data/spec/filters/scheduling_spec.rb +201 -0
data/spec/filters/translate_spec.rb +463 -73
data/spec/filters/yaml_visitor_spec.rb +16 -0
data/spec/fixtures/regex_dict.csv +4 -0
data/spec/fixtures/regex_union_dict.csv +4 -0
data/spec/fixtures/tag-map-dict.yml +21 -0
data/spec/fixtures/tag-omap-dict.yml +21 -0
data/spec/support/build_huge_dictionaries.rb +33 -0
data/spec/support/rspec_wait_handler_helper.rb +38 -0
metadata +129 -2

data/lib/logstash/filters/array_of_maps_value_update.rb ADDED Viewed

@@ -0,0 +1,44 @@
+# encoding: utf-8
+module LogStash module Filters
+  class ArrayOfMapsValueUpdate
+    def initialize(iterate_on, field, destination, fallback, lookup)
+      @iterate_on = ensure_reference_format(iterate_on)
+      @field = ensure_reference_format(field)
+      @destination = ensure_reference_format(destination)
+      @fallback = fallback
+      @use_fallback = !fallback.nil? # fallback is not nil, the user set a value in the config
+      @lookup = lookup
+    end
+    def test_for_inclusion(event, override)
+      event.include?(@iterate_on)
+    end
+    def update(event)
+      val = event.get(@iterate_on) # should be an array of hashes
+      source = Array(val)
+      matches = Array.new(source.size)
+      source.size.times do |index|
+        nested_field = "#{@iterate_on}[#{index}]#{@field}"
+        nested_destination = "#{@iterate_on}[#{index}]#{@destination}"
+        inner = event.get(nested_field)
+        next if inner.nil?
+        matched = [true, nil]
+        @lookup.fetch_strategy.fetch(inner.to_s, matched)
+        if matched.first
+          event.set(nested_destination, matched.last)
+          matches[index] = true
+        elsif @use_fallback
+          event.set(nested_destination, event.sprintf(@fallback))
+          matches[index] = true
+        end
+      end
+      return matches.any?
+    end
+    def ensure_reference_format(field)
+      field.start_with?("[") && field.end_with?("]") ? field : "[#{field}]"
+    end
+  end
+end end

data/lib/logstash/filters/array_of_values_update.rb ADDED Viewed

@@ -0,0 +1,47 @@
+# encoding: utf-8
+module LogStash module Filters
+  class ArrayOfValuesUpdate
+    class CoerceArray
+      def call(source) source; end
+    end
+    class CoerceOther
+      def call(source) Array(source); end
+    end
+    def initialize(iterate_on, destination, fallback, lookup)
+      @iterate_on = iterate_on
+      @destination = destination
+      @fallback = fallback
+      @use_fallback = !fallback.nil? # fallback is not nil, the user set a value in the config
+      @lookup = lookup
+      @coercers_table = {}
+      @coercers_table.default = CoerceOther.new
+      @coercers_table[Array] = CoerceArray.new
+    end
+    def test_for_inclusion(event, override)
+      # Skip translation in case @destination iterate_on already exists and @override is disabled.
+      return false if !override && event.include?(@destination)
+      event.include?(@iterate_on)
+    end
+    def update(event)
+      val = event.get(@iterate_on)
+      source = @coercers_table[val.class].call(val)
+      target = Array.new(source.size)
+      if @use_fallback
+        target.fill(event.sprintf(@fallback))
+      end
+      source.each_with_index do |inner, index|
+        matched = [true, nil]
+        @lookup.fetch_strategy.fetch(inner.to_s, matched)
+        if matched.first
+          target[index] = matched.last
+        end
+      end
+      event.set(@destination, target)
+      return target.any?
+    end
+  end
+end end

data/lib/logstash/filters/dictionary/csv_file.rb ADDED Viewed

@@ -0,0 +1,25 @@
+# encoding: utf-8
+require "csv"
+module LogStash module Filters module Dictionary
+  class CsvFile < File
+    protected
+    def initialize_for_file_type
+      @io = StringIO.new("")
+      @csv = ::CSV.new(@io)
+    end
+    def read_file_into_dictionary
+      # low level CSV read that tries to create as
+      # few intermediate objects as possible
+      # this overwrites the value at key
+      IO.foreach(@dictionary_path, :mode => 'r:bom|utf-8') do |line|
+        @io.string = line
+        k,v = @csv.shift
+        @dictionary[k] = v
+      end
+    end
+  end
+end end end

data/lib/logstash/filters/dictionary/file.rb ADDED Viewed

@@ -0,0 +1,143 @@
+# encoding: utf-8
+require 'concurrent/atomic/atomic_boolean'
+require 'rufus-scheduler'
+require "logstash/util/loggable"
+require "logstash/filters/fetch_strategy/file"
+java_import 'java.util.concurrent.locks.ReentrantReadWriteLock'
+module LogStash module Filters module Dictionary
+  class DictionaryFileError < StandardError; end
+  class File
+    def self.create(path, refresh_interval, refresh_behaviour, exact, regex)
+      if /\.y[a]?ml$/.match(path)
+        instance = YamlFile.new(path, refresh_interval, exact, regex)
+      elsif path.end_with?(".json")
+        instance = JsonFile.new(path, refresh_interval, exact, regex)
+      elsif path.end_with?(".csv")
+        instance = CsvFile.new(path, refresh_interval, exact, regex)
+      else
+        raise "Translate: Dictionary #{path} has a non valid format"
+      end
+      if refresh_behaviour == 'merge'
+        instance.set_update_strategy(:merge_dictionary)
+      elsif refresh_behaviour == 'replace'
+        instance.set_update_strategy(:replace_dictionary)
+      else
+        # we really should never get here
+        raise(LogStash::ConfigurationError, "Unknown value for refresh_behaviour=#{refresh_behaviour.to_s}")
+      end
+    end
+    include LogStash::Util::Loggable
+    attr_reader :dictionary, :fetch_strategy
+    def initialize(path, refresh_interval, exact, regex)
+      @dictionary_path = path
+      @refresh_interval = refresh_interval
+      @short_refresh = @refresh_interval <= 300
+      @stopping = Concurrent::AtomicBoolean.new # ported from jdbc_static, need a way to prevent a scheduled execution from running a load.
+      rw_lock = java.util.concurrent.locks.ReentrantReadWriteLock.new
+      @write_lock = rw_lock.writeLock
+      @dictionary = Hash.new
+      @update_method = method(:merge_dictionary)
+      initialize_for_file_type
+      args = [@dictionary, rw_lock]
+      klass = case
+              when exact && regex then FetchStrategy::File::ExactRegex
+              when exact          then FetchStrategy::File::Exact
+              else                     FetchStrategy::File::RegexUnion
+              end
+      @fetch_strategy = klass.new(*args)
+      load_dictionary(raise_exception = true)
+      stop_scheduler(initial = true)
+      start_scheduler unless @refresh_interval <= 0 # disabled, a scheduler interval of zero makes no sense
+    end
+    def stop_scheduler(initial = false)
+      @stopping.make_true unless initial
+      @scheduler.shutdown(:wait) if @scheduler
+    end
+    def load_dictionary(raise_exception=false)
+      begin
+        @dictionary_mtime = ::File.mtime(@dictionary_path).to_f
+        @update_method.call
+      rescue Errno::ENOENT
+        @logger.warn("dictionary file read failure, continuing with old dictionary", :path => @dictionary_path)
+      rescue => e
+        loading_exception(e, raise_exception)
+      end
+    end
+    def set_update_strategy(method_sym)
+      @update_method = method(method_sym)
+      self
+    end
+    protected
+    def initialize_for_file_type
+      # sub class specific initializer
+    end
+    def read_file_into_dictionary
+      # defined in csv_file, yaml_file and json_file
+    end
+    private
+    def start_scheduler
+      @scheduler = Rufus::Scheduler.new
+      @scheduler.interval("#{@refresh_interval}s", :overlap => false) do
+        reload_dictionary
+      end
+    end
+    def merge_dictionary
+      @write_lock.lock
+      begin
+        read_file_into_dictionary
+        @fetch_strategy.dictionary_updated
+      ensure
+        @write_lock.unlock
+      end
+    end
+    def replace_dictionary
+      @write_lock.lock
+      begin
+        @dictionary.clear
+        read_file_into_dictionary
+        @fetch_strategy.dictionary_updated
+      ensure
+        @write_lock.unlock
+      end
+    end
+    def reload_dictionary
+      return if @stopping.true?
+      if @short_refresh
+        load_dictionary if needs_refresh?
+      else
+        load_dictionary
+      end
+    end
+    def needs_refresh?
+      @dictionary_mtime != ::File.mtime(@dictionary_path).to_f
+    end
+    def loading_exception(e, raise_exception)
+      msg = "Translate: #{e.message} when loading dictionary file at #{@dictionary_path}"
+      if raise_exception
+        dfe = DictionaryFileError.new(msg)
+        dfe.set_backtrace(e.backtrace)
+        raise dfe
+      else
+        @logger.warn("#{msg}, continuing with old dictionary", :dictionary_path => @dictionary_path)
+      end
+    end
+  end
+end end end

data/lib/logstash/filters/dictionary/json_file.rb ADDED Viewed

@@ -0,0 +1,87 @@
+# encoding: utf-8
+require "json"
+module LogStash module Filters module Dictionary
+  class JsonFile < File
+    protected
+    def initialize_for_file_type
+    end
+    def read_file_into_dictionary
+      content = IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
+      @dictionary.update(LogStash::Json.load(content)) unless content.nil? || content.empty?
+    end
+  end
+end end end
+__END__
+Preserving the text below for near term prosperity...
+I tried hard to find a stream parsing solution with JrJackson and sc_load
+but it was no faster than the above code.
+The idea is for each line to be read into the streaming parse that will update
+the @dictionary as each key/value is found.
+It will be lower on memory consumption because the JSON string is not read into memory
+and then a Ruby Hash created and merged into @dictionary.
+I decided to trade speed for memory. Side Note, it seems that
+the json gem has become quite speedy lately.
+e.g.
+require_relative 'json_handler'
+...
+    def initialize_for_file_type
+      @handler = JsonHandler.new(@dictionary)
+    end
+    def read_file_into_dictionary
+      ::File.open(@dictionary_path, "r:bom|utf-8") do |io|
+        JrJackson::Json.sc_load(@handler, io, {raw: true})
+      end
+    end
+...
+where JsonHandler is:
+require 'jrjackson'
+module LogStash module Filters module Dictionary
+  class JsonHandler
+    def initialize(dictionary)
+      @dictionary = dictionary
+      @map_depth = 0
+    end
+    def hash_start()
+      @map_depth = @map_depth.succ
+      @map_depth == 1 ? @dictionary : {}
+    end
+    def hash_end()
+      @map_depth = @map_depth.pred
+    end
+    def hash_key(key)
+      key
+    end
+    def array_start()
+      []
+    end
+    def array_end()
+    end
+    def add_value(value)
+      # @result = value
+    end
+    def hash_set(h, key, value)
+      h[key] = value
+    end
+    def array_append(a, value)
+      a.push(value)
+    end
+  end
+end end end

data/lib/logstash/filters/dictionary/memory.rb ADDED Viewed

@@ -0,0 +1,32 @@
+# encoding: utf-8
+require "logstash/filters/fetch_strategy/memory"
+module LogStash module Filters module Dictionary
+  class Memory
+    attr_reader :dictionary, :fetch_strategy
+    def initialize(hash, exact, regex)
+      klass = case
+              when exact && regex then FetchStrategy::Memory::ExactRegex
+              when exact          then FetchStrategy::Memory::Exact
+              else                     FetchStrategy::Memory::RegexUnion
+              end
+      @fetch_strategy = klass.new(hash)
+    end
+    def stop_scheduler
+      # noop
+    end
+    private
+    def needs_refresh?
+      false
+    end
+    def load_dictionary(raise_exception=false)
+      # noop
+    end
+  end
+end end end

data/lib/logstash/filters/dictionary/yaml_file.rb ADDED Viewed

@@ -0,0 +1,24 @@
+# encoding: utf-8
+require_relative "yaml_visitor"
+module LogStash module Filters module Dictionary
+  class YamlFile < File
+    protected
+    def initialize_for_file_type
+      @visitor = YamlVisitor.create
+    end
+    def read_file_into_dictionary
+      # low level YAML read that tries to create as
+      # few intermediate objects as possible
+      # this overwrites the value at key
+      @visitor.accept_with_dictionary(
+        @dictionary, Psych.parse_stream(
+          IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
+      ))
+    end
+  end
+end end end

data/lib/logstash/filters/dictionary/yaml_visitor.rb ADDED Viewed

@@ -0,0 +1,42 @@
+# encoding: utf-8
+require 'psych/visitors/to_ruby'
+require 'psych/exception'
+unless defined?(Regexp::NOENCODING)
+  Regexp::NOENCODING = 32
+end
+module LogStash module Filters module Dictionary
+  class YamlVisitor < Psych::Visitors::ToRuby
+    TAG_MAP_TABLE = Hash.new(false)
+    TAG_MAP_TABLE[nil] = true
+    TAG_MAP_TABLE["tag:yaml.org,2002:map"] = true
+    TAG_MAP_TABLE["tag:yaml.org,2002:omap"] = true
+    def accept_with_dictionary(dictionary, target)
+      @dictionary = dictionary
+      @map_depth = 0
+      accept(target)
+    end
+    def visit_Psych_Nodes_Mapping(o)
+      if Psych.load_tags[o.tag]
+        return revive(resolve_class(Psych.load_tags[o.tag]), o)
+      end
+      target_hash = @map_depth == 0 ? @dictionary : {}
+      @map_depth = @map_depth.succ
+      if TAG_MAP_TABLE[o.tag]
+        result = revive_hash(register(o, target_hash), o)
+      else
+        result = super(o)
+      end
+      @map_depth = @map_depth.pred
+      result
+    end
+  end
+end end end