logstash-filter-translate 3.1.0 → 3.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/LICENSE +199 -10
- data/README.md +1 -1
- data/docs/index.asciidoc +257 -56
- data/lib/logstash/filters/array_of_maps_value_update.rb +44 -0
- data/lib/logstash/filters/array_of_values_update.rb +47 -0
- data/lib/logstash/filters/dictionary/csv_file.rb +25 -0
- data/lib/logstash/filters/dictionary/file.rb +143 -0
- data/lib/logstash/filters/dictionary/json_file.rb +87 -0
- data/lib/logstash/filters/dictionary/memory.rb +32 -0
- data/lib/logstash/filters/dictionary/yaml_file.rb +24 -0
- data/lib/logstash/filters/dictionary/yaml_visitor.rb +42 -0
- data/lib/logstash/filters/fetch_strategy/file.rb +81 -0
- data/lib/logstash/filters/fetch_strategy/memory.rb +52 -0
- data/lib/logstash/filters/single_value_update.rb +49 -0
- data/lib/logstash/filters/translate.rb +104 -158
- data/logstash-filter-translate.gemspec +8 -1
- data/spec/filters/benchmark_rspec.rb +69 -0
- data/spec/filters/scheduling_spec.rb +201 -0
- data/spec/filters/translate_spec.rb +463 -73
- data/spec/filters/yaml_visitor_spec.rb +16 -0
- data/spec/fixtures/regex_dict.csv +4 -0
- data/spec/fixtures/regex_union_dict.csv +4 -0
- data/spec/fixtures/tag-map-dict.yml +21 -0
- data/spec/fixtures/tag-omap-dict.yml +21 -0
- data/spec/support/build_huge_dictionaries.rb +33 -0
- data/spec/support/rspec_wait_handler_helper.rb +38 -0
- metadata +129 -2
@@ -0,0 +1,44 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module LogStash module Filters
|
4
|
+
class ArrayOfMapsValueUpdate
|
5
|
+
def initialize(iterate_on, field, destination, fallback, lookup)
|
6
|
+
@iterate_on = ensure_reference_format(iterate_on)
|
7
|
+
@field = ensure_reference_format(field)
|
8
|
+
@destination = ensure_reference_format(destination)
|
9
|
+
@fallback = fallback
|
10
|
+
@use_fallback = !fallback.nil? # fallback is not nil, the user set a value in the config
|
11
|
+
@lookup = lookup
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_for_inclusion(event, override)
|
15
|
+
event.include?(@iterate_on)
|
16
|
+
end
|
17
|
+
|
18
|
+
def update(event)
|
19
|
+
val = event.get(@iterate_on) # should be an array of hashes
|
20
|
+
source = Array(val)
|
21
|
+
matches = Array.new(source.size)
|
22
|
+
source.size.times do |index|
|
23
|
+
nested_field = "#{@iterate_on}[#{index}]#{@field}"
|
24
|
+
nested_destination = "#{@iterate_on}[#{index}]#{@destination}"
|
25
|
+
inner = event.get(nested_field)
|
26
|
+
next if inner.nil?
|
27
|
+
matched = [true, nil]
|
28
|
+
@lookup.fetch_strategy.fetch(inner.to_s, matched)
|
29
|
+
if matched.first
|
30
|
+
event.set(nested_destination, matched.last)
|
31
|
+
matches[index] = true
|
32
|
+
elsif @use_fallback
|
33
|
+
event.set(nested_destination, event.sprintf(@fallback))
|
34
|
+
matches[index] = true
|
35
|
+
end
|
36
|
+
end
|
37
|
+
return matches.any?
|
38
|
+
end
|
39
|
+
|
40
|
+
def ensure_reference_format(field)
|
41
|
+
field.start_with?("[") && field.end_with?("]") ? field : "[#{field}]"
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module LogStash module Filters
|
4
|
+
class ArrayOfValuesUpdate
|
5
|
+
class CoerceArray
|
6
|
+
def call(source) source; end
|
7
|
+
end
|
8
|
+
class CoerceOther
|
9
|
+
def call(source) Array(source); end
|
10
|
+
end
|
11
|
+
|
12
|
+
def initialize(iterate_on, destination, fallback, lookup)
|
13
|
+
@iterate_on = iterate_on
|
14
|
+
@destination = destination
|
15
|
+
@fallback = fallback
|
16
|
+
@use_fallback = !fallback.nil? # fallback is not nil, the user set a value in the config
|
17
|
+
@lookup = lookup
|
18
|
+
@coercers_table = {}
|
19
|
+
@coercers_table.default = CoerceOther.new
|
20
|
+
@coercers_table[Array] = CoerceArray.new
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_for_inclusion(event, override)
|
24
|
+
# Skip translation in case @destination iterate_on already exists and @override is disabled.
|
25
|
+
return false if !override && event.include?(@destination)
|
26
|
+
event.include?(@iterate_on)
|
27
|
+
end
|
28
|
+
|
29
|
+
def update(event)
|
30
|
+
val = event.get(@iterate_on)
|
31
|
+
source = @coercers_table[val.class].call(val)
|
32
|
+
target = Array.new(source.size)
|
33
|
+
if @use_fallback
|
34
|
+
target.fill(event.sprintf(@fallback))
|
35
|
+
end
|
36
|
+
source.each_with_index do |inner, index|
|
37
|
+
matched = [true, nil]
|
38
|
+
@lookup.fetch_strategy.fetch(inner.to_s, matched)
|
39
|
+
if matched.first
|
40
|
+
target[index] = matched.last
|
41
|
+
end
|
42
|
+
end
|
43
|
+
event.set(@destination, target)
|
44
|
+
return target.any?
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "csv"
|
3
|
+
|
4
|
+
module LogStash module Filters module Dictionary
|
5
|
+
class CsvFile < File
|
6
|
+
|
7
|
+
protected
|
8
|
+
|
9
|
+
def initialize_for_file_type
|
10
|
+
@io = StringIO.new("")
|
11
|
+
@csv = ::CSV.new(@io)
|
12
|
+
end
|
13
|
+
|
14
|
+
def read_file_into_dictionary
|
15
|
+
# low level CSV read that tries to create as
|
16
|
+
# few intermediate objects as possible
|
17
|
+
# this overwrites the value at key
|
18
|
+
IO.foreach(@dictionary_path, :mode => 'r:bom|utf-8') do |line|
|
19
|
+
@io.string = line
|
20
|
+
k,v = @csv.shift
|
21
|
+
@dictionary[k] = v
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end end end
|
@@ -0,0 +1,143 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'concurrent/atomic/atomic_boolean'
|
3
|
+
require 'rufus-scheduler'
|
4
|
+
require "logstash/util/loggable"
|
5
|
+
require "logstash/filters/fetch_strategy/file"
|
6
|
+
|
7
|
+
java_import 'java.util.concurrent.locks.ReentrantReadWriteLock'
|
8
|
+
|
9
|
+
module LogStash module Filters module Dictionary
|
10
|
+
class DictionaryFileError < StandardError; end
|
11
|
+
|
12
|
+
class File
|
13
|
+
def self.create(path, refresh_interval, refresh_behaviour, exact, regex)
|
14
|
+
if /\.y[a]?ml$/.match(path)
|
15
|
+
instance = YamlFile.new(path, refresh_interval, exact, regex)
|
16
|
+
elsif path.end_with?(".json")
|
17
|
+
instance = JsonFile.new(path, refresh_interval, exact, regex)
|
18
|
+
elsif path.end_with?(".csv")
|
19
|
+
instance = CsvFile.new(path, refresh_interval, exact, regex)
|
20
|
+
else
|
21
|
+
raise "Translate: Dictionary #{path} has a non valid format"
|
22
|
+
end
|
23
|
+
if refresh_behaviour == 'merge'
|
24
|
+
instance.set_update_strategy(:merge_dictionary)
|
25
|
+
elsif refresh_behaviour == 'replace'
|
26
|
+
instance.set_update_strategy(:replace_dictionary)
|
27
|
+
else
|
28
|
+
# we really should never get here
|
29
|
+
raise(LogStash::ConfigurationError, "Unknown value for refresh_behaviour=#{refresh_behaviour.to_s}")
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
include LogStash::Util::Loggable
|
34
|
+
attr_reader :dictionary, :fetch_strategy
|
35
|
+
|
36
|
+
def initialize(path, refresh_interval, exact, regex)
|
37
|
+
@dictionary_path = path
|
38
|
+
@refresh_interval = refresh_interval
|
39
|
+
@short_refresh = @refresh_interval <= 300
|
40
|
+
@stopping = Concurrent::AtomicBoolean.new # ported from jdbc_static, need a way to prevent a scheduled execution from running a load.
|
41
|
+
rw_lock = java.util.concurrent.locks.ReentrantReadWriteLock.new
|
42
|
+
@write_lock = rw_lock.writeLock
|
43
|
+
@dictionary = Hash.new
|
44
|
+
@update_method = method(:merge_dictionary)
|
45
|
+
initialize_for_file_type
|
46
|
+
args = [@dictionary, rw_lock]
|
47
|
+
klass = case
|
48
|
+
when exact && regex then FetchStrategy::File::ExactRegex
|
49
|
+
when exact then FetchStrategy::File::Exact
|
50
|
+
else FetchStrategy::File::RegexUnion
|
51
|
+
end
|
52
|
+
@fetch_strategy = klass.new(*args)
|
53
|
+
load_dictionary(raise_exception = true)
|
54
|
+
stop_scheduler(initial = true)
|
55
|
+
start_scheduler unless @refresh_interval <= 0 # disabled, a scheduler interval of zero makes no sense
|
56
|
+
end
|
57
|
+
|
58
|
+
def stop_scheduler(initial = false)
|
59
|
+
@stopping.make_true unless initial
|
60
|
+
@scheduler.shutdown(:wait) if @scheduler
|
61
|
+
end
|
62
|
+
|
63
|
+
def load_dictionary(raise_exception=false)
|
64
|
+
begin
|
65
|
+
@dictionary_mtime = ::File.mtime(@dictionary_path).to_f
|
66
|
+
@update_method.call
|
67
|
+
rescue Errno::ENOENT
|
68
|
+
@logger.warn("dictionary file read failure, continuing with old dictionary", :path => @dictionary_path)
|
69
|
+
rescue => e
|
70
|
+
loading_exception(e, raise_exception)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def set_update_strategy(method_sym)
|
75
|
+
@update_method = method(method_sym)
|
76
|
+
self
|
77
|
+
end
|
78
|
+
|
79
|
+
protected
|
80
|
+
|
81
|
+
def initialize_for_file_type
|
82
|
+
# sub class specific initializer
|
83
|
+
end
|
84
|
+
|
85
|
+
def read_file_into_dictionary
|
86
|
+
# defined in csv_file, yaml_file and json_file
|
87
|
+
end
|
88
|
+
|
89
|
+
private
|
90
|
+
|
91
|
+
def start_scheduler
|
92
|
+
@scheduler = Rufus::Scheduler.new
|
93
|
+
@scheduler.interval("#{@refresh_interval}s", :overlap => false) do
|
94
|
+
reload_dictionary
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def merge_dictionary
|
99
|
+
@write_lock.lock
|
100
|
+
begin
|
101
|
+
read_file_into_dictionary
|
102
|
+
@fetch_strategy.dictionary_updated
|
103
|
+
ensure
|
104
|
+
@write_lock.unlock
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def replace_dictionary
|
109
|
+
@write_lock.lock
|
110
|
+
begin
|
111
|
+
@dictionary.clear
|
112
|
+
read_file_into_dictionary
|
113
|
+
@fetch_strategy.dictionary_updated
|
114
|
+
ensure
|
115
|
+
@write_lock.unlock
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def reload_dictionary
|
120
|
+
return if @stopping.true?
|
121
|
+
if @short_refresh
|
122
|
+
load_dictionary if needs_refresh?
|
123
|
+
else
|
124
|
+
load_dictionary
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def needs_refresh?
|
129
|
+
@dictionary_mtime != ::File.mtime(@dictionary_path).to_f
|
130
|
+
end
|
131
|
+
|
132
|
+
def loading_exception(e, raise_exception)
|
133
|
+
msg = "Translate: #{e.message} when loading dictionary file at #{@dictionary_path}"
|
134
|
+
if raise_exception
|
135
|
+
dfe = DictionaryFileError.new(msg)
|
136
|
+
dfe.set_backtrace(e.backtrace)
|
137
|
+
raise dfe
|
138
|
+
else
|
139
|
+
@logger.warn("#{msg}, continuing with old dictionary", :dictionary_path => @dictionary_path)
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end end end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "json"
|
3
|
+
|
4
|
+
module LogStash module Filters module Dictionary
|
5
|
+
class JsonFile < File
|
6
|
+
|
7
|
+
protected
|
8
|
+
|
9
|
+
def initialize_for_file_type
|
10
|
+
end
|
11
|
+
|
12
|
+
def read_file_into_dictionary
|
13
|
+
content = IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
|
14
|
+
@dictionary.update(LogStash::Json.load(content)) unless content.nil? || content.empty?
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end end end
|
18
|
+
|
19
|
+
__END__
|
20
|
+
Preserving the text below for near term prosperity...
|
21
|
+
|
22
|
+
I tried hard to find a stream parsing solution with JrJackson and sc_load
|
23
|
+
but it was no faster than the above code.
|
24
|
+
The idea is for each line to be read into the streaming parse that will update
|
25
|
+
the @dictionary as each key/value is found.
|
26
|
+
It will be lower on memory consumption because the JSON string is not read into memory
|
27
|
+
and then a Ruby Hash created and merged into @dictionary.
|
28
|
+
I decided to trade speed for memory. Side Note, it seems that
|
29
|
+
the json gem has become quite speedy lately.
|
30
|
+
|
31
|
+
e.g.
|
32
|
+
require_relative 'json_handler'
|
33
|
+
...
|
34
|
+
def initialize_for_file_type
|
35
|
+
@handler = JsonHandler.new(@dictionary)
|
36
|
+
end
|
37
|
+
|
38
|
+
def read_file_into_dictionary
|
39
|
+
::File.open(@dictionary_path, "r:bom|utf-8") do |io|
|
40
|
+
JrJackson::Json.sc_load(@handler, io, {raw: true})
|
41
|
+
end
|
42
|
+
end
|
43
|
+
...
|
44
|
+
where JsonHandler is:
|
45
|
+
|
46
|
+
require 'jrjackson'
|
47
|
+
|
48
|
+
module LogStash module Filters module Dictionary
|
49
|
+
class JsonHandler
|
50
|
+
def initialize(dictionary)
|
51
|
+
@dictionary = dictionary
|
52
|
+
@map_depth = 0
|
53
|
+
end
|
54
|
+
|
55
|
+
def hash_start()
|
56
|
+
@map_depth = @map_depth.succ
|
57
|
+
@map_depth == 1 ? @dictionary : {}
|
58
|
+
end
|
59
|
+
|
60
|
+
def hash_end()
|
61
|
+
@map_depth = @map_depth.pred
|
62
|
+
end
|
63
|
+
|
64
|
+
def hash_key(key)
|
65
|
+
key
|
66
|
+
end
|
67
|
+
|
68
|
+
def array_start()
|
69
|
+
[]
|
70
|
+
end
|
71
|
+
|
72
|
+
def array_end()
|
73
|
+
end
|
74
|
+
|
75
|
+
def add_value(value)
|
76
|
+
# @result = value
|
77
|
+
end
|
78
|
+
|
79
|
+
def hash_set(h, key, value)
|
80
|
+
h[key] = value
|
81
|
+
end
|
82
|
+
|
83
|
+
def array_append(a, value)
|
84
|
+
a.push(value)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end end end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/filters/fetch_strategy/memory"
|
3
|
+
|
4
|
+
module LogStash module Filters module Dictionary
|
5
|
+
class Memory
|
6
|
+
|
7
|
+
attr_reader :dictionary, :fetch_strategy
|
8
|
+
|
9
|
+
def initialize(hash, exact, regex)
|
10
|
+
klass = case
|
11
|
+
when exact && regex then FetchStrategy::Memory::ExactRegex
|
12
|
+
when exact then FetchStrategy::Memory::Exact
|
13
|
+
else FetchStrategy::Memory::RegexUnion
|
14
|
+
end
|
15
|
+
@fetch_strategy = klass.new(hash)
|
16
|
+
end
|
17
|
+
|
18
|
+
def stop_scheduler
|
19
|
+
# noop
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def needs_refresh?
|
25
|
+
false
|
26
|
+
end
|
27
|
+
|
28
|
+
def load_dictionary(raise_exception=false)
|
29
|
+
# noop
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end end end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require_relative "yaml_visitor"
|
4
|
+
|
5
|
+
module LogStash module Filters module Dictionary
|
6
|
+
class YamlFile < File
|
7
|
+
|
8
|
+
protected
|
9
|
+
|
10
|
+
def initialize_for_file_type
|
11
|
+
@visitor = YamlVisitor.create
|
12
|
+
end
|
13
|
+
|
14
|
+
def read_file_into_dictionary
|
15
|
+
# low level YAML read that tries to create as
|
16
|
+
# few intermediate objects as possible
|
17
|
+
# this overwrites the value at key
|
18
|
+
@visitor.accept_with_dictionary(
|
19
|
+
@dictionary, Psych.parse_stream(
|
20
|
+
IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
|
21
|
+
))
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end end end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'psych/visitors/to_ruby'
|
4
|
+
require 'psych/exception'
|
5
|
+
|
6
|
+
unless defined?(Regexp::NOENCODING)
|
7
|
+
Regexp::NOENCODING = 32
|
8
|
+
end
|
9
|
+
|
10
|
+
module LogStash module Filters module Dictionary
|
11
|
+
class YamlVisitor < Psych::Visitors::ToRuby
|
12
|
+
|
13
|
+
TAG_MAP_TABLE = Hash.new(false)
|
14
|
+
TAG_MAP_TABLE[nil] = true
|
15
|
+
TAG_MAP_TABLE["tag:yaml.org,2002:map"] = true
|
16
|
+
TAG_MAP_TABLE["tag:yaml.org,2002:omap"] = true
|
17
|
+
|
18
|
+
def accept_with_dictionary(dictionary, target)
|
19
|
+
@dictionary = dictionary
|
20
|
+
@map_depth = 0
|
21
|
+
accept(target)
|
22
|
+
end
|
23
|
+
|
24
|
+
def visit_Psych_Nodes_Mapping(o)
|
25
|
+
if Psych.load_tags[o.tag]
|
26
|
+
return revive(resolve_class(Psych.load_tags[o.tag]), o)
|
27
|
+
end
|
28
|
+
|
29
|
+
target_hash = @map_depth == 0 ? @dictionary : {}
|
30
|
+
@map_depth = @map_depth.succ
|
31
|
+
|
32
|
+
if TAG_MAP_TABLE[o.tag]
|
33
|
+
result = revive_hash(register(o, target_hash), o)
|
34
|
+
else
|
35
|
+
result = super(o)
|
36
|
+
end
|
37
|
+
|
38
|
+
@map_depth = @map_depth.pred
|
39
|
+
result
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end end end
|