logstash-filter-translate 3.1.0 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/docs/index.asciidoc +173 -33
- data/lib/logstash/filters/array_of_maps_value_update.rb +44 -0
- data/lib/logstash/filters/array_of_values_update.rb +37 -0
- data/lib/logstash/filters/dictionary/csv_file.rb +25 -0
- data/lib/logstash/filters/dictionary/file.rb +140 -0
- data/lib/logstash/filters/dictionary/json_file.rb +87 -0
- data/lib/logstash/filters/dictionary/memory.rb +31 -0
- data/lib/logstash/filters/dictionary/yaml_file.rb +24 -0
- data/lib/logstash/filters/dictionary/yaml_visitor.rb +42 -0
- data/lib/logstash/filters/fetch_strategy/file.rb +81 -0
- data/lib/logstash/filters/fetch_strategy/memory.rb +52 -0
- data/lib/logstash/filters/single_value_update.rb +33 -0
- data/lib/logstash/filters/translate.rb +54 -155
- data/logstash-filter-translate.gemspec +5 -1
- data/spec/filters/benchmark_rspec.rb +69 -0
- data/spec/filters/scheduling_spec.rb +200 -0
- data/spec/filters/translate_spec.rb +238 -45
- data/spec/filters/yaml_visitor_spec.rb +16 -0
- data/spec/fixtures/regex_dict.csv +4 -0
- data/spec/fixtures/regex_union_dict.csv +4 -0
- data/spec/fixtures/tag-map-dict.yml +21 -0
- data/spec/fixtures/tag-omap-dict.yml +21 -0
- data/spec/support/build_huge_dictionaries.rb +33 -0
- data/spec/support/rspec_wait_handler_helper.rb +38 -0
- metadata +87 -2
@@ -0,0 +1,140 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'concurrent/atomic/atomic_boolean'
|
3
|
+
require 'rufus-scheduler'
|
4
|
+
require "logstash/util/loggable"
|
5
|
+
require "logstash/filters/fetch_strategy/file"
|
6
|
+
|
7
|
+
java_import 'java.util.concurrent.locks.ReentrantReadWriteLock'
|
8
|
+
|
9
|
+
module LogStash module Filters module Dictionary
|
10
|
+
class DictionaryFileError < StandardError; end
|
11
|
+
|
12
|
+
class File
|
13
|
+
def self.create(path, refresh_interval, refresh_behaviour, exact, regex)
|
14
|
+
if /\.y[a]?ml$/.match(path)
|
15
|
+
instance = YamlFile.new(path, refresh_interval, exact, regex)
|
16
|
+
elsif path.end_with?(".json")
|
17
|
+
instance = JsonFile.new(path, refresh_interval, exact, regex)
|
18
|
+
elsif path.end_with?(".csv")
|
19
|
+
instance = CsvFile.new(path, refresh_interval, exact, regex)
|
20
|
+
else
|
21
|
+
raise "Translate: Dictionary #{path} has a non valid format"
|
22
|
+
end
|
23
|
+
if refresh_behaviour == 'merge'
|
24
|
+
instance.set_update_strategy(:merge_dictionary)
|
25
|
+
elsif refresh_behaviour == 'replace'
|
26
|
+
instance.set_update_strategy(:replace_dictionary)
|
27
|
+
else
|
28
|
+
# we really should never get here
|
29
|
+
raise(LogStash::ConfigurationError, "Unknown value for refresh_behaviour=#{refresh_behaviour.to_s}")
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
include LogStash::Util::Loggable
|
34
|
+
attr_reader :dictionary, :fetch_strategy
|
35
|
+
|
36
|
+
def initialize(path, refresh_interval, exact, regex)
|
37
|
+
@dictionary_path = path
|
38
|
+
@refresh_interval = refresh_interval
|
39
|
+
@short_refresh = @refresh_interval <= 300
|
40
|
+
@stopping = Concurrent::AtomicBoolean.new # ported from jdbc_static, need a way to prevent a scheduled execution from running a load.
|
41
|
+
rw_lock = java.util.concurrent.locks.ReentrantReadWriteLock.new
|
42
|
+
@write_lock = rw_lock.writeLock
|
43
|
+
@dictionary = Hash.new
|
44
|
+
@update_method = method(:merge_dictionary)
|
45
|
+
initialize_for_file_type
|
46
|
+
args = [@dictionary, rw_lock]
|
47
|
+
if exact
|
48
|
+
@fetch_strategy = regex ? FetchStrategy::File::ExactRegex.new(*args) : FetchStrategy::File::ExactRegex.new(*args)
|
49
|
+
else
|
50
|
+
@fetch_strategy = FetchStrategy::File::RegexUnion.new(*args)
|
51
|
+
end
|
52
|
+
load_dictionary(raise_exception = true)
|
53
|
+
stop_scheduler(initial = true)
|
54
|
+
start_scheduler unless @refresh_interval <= 0 # disabled, a scheduler interval of zero makes no sense
|
55
|
+
end
|
56
|
+
|
57
|
+
def stop_scheduler(initial = false)
|
58
|
+
@stopping.make_true unless initial
|
59
|
+
@scheduler.shutdown(:wait) if @scheduler
|
60
|
+
end
|
61
|
+
|
62
|
+
def load_dictionary(raise_exception=false)
|
63
|
+
begin
|
64
|
+
@dictionary_mtime = ::File.mtime(@dictionary_path).to_f
|
65
|
+
@update_method.call
|
66
|
+
rescue Errno::ENOENT
|
67
|
+
@logger.warn("dictionary file read failure, continuing with old dictionary", :path => @dictionary_path)
|
68
|
+
rescue => e
|
69
|
+
loading_exception(e, raise_exception)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def set_update_strategy(method_sym)
|
74
|
+
@update_method = method(method_sym)
|
75
|
+
self
|
76
|
+
end
|
77
|
+
|
78
|
+
protected
|
79
|
+
|
80
|
+
def initialize_for_file_type
|
81
|
+
# sub class specific initializer
|
82
|
+
end
|
83
|
+
|
84
|
+
def read_file_into_dictionary
|
85
|
+
# defined in csv_file, yaml_file and json_file
|
86
|
+
end
|
87
|
+
|
88
|
+
private
|
89
|
+
|
90
|
+
def start_scheduler
|
91
|
+
@scheduler = Rufus::Scheduler.new
|
92
|
+
@scheduler.interval("#{@refresh_interval}s", :overlap => false) do
|
93
|
+
reload_dictionary
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def merge_dictionary
|
98
|
+
@write_lock.lock
|
99
|
+
begin
|
100
|
+
read_file_into_dictionary
|
101
|
+
@fetch_strategy.dictionary_updated
|
102
|
+
ensure
|
103
|
+
@write_lock.unlock
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def replace_dictionary
|
108
|
+
@write_lock.lock
|
109
|
+
begin
|
110
|
+
@dictionary.clear
|
111
|
+
read_file_into_dictionary
|
112
|
+
@fetch_strategy.dictionary_updated
|
113
|
+
ensure
|
114
|
+
@write_lock.unlock
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def reload_dictionary
|
119
|
+
return if @stopping.true?
|
120
|
+
if @short_refresh
|
121
|
+
load_dictionary if needs_refresh?
|
122
|
+
else
|
123
|
+
load_dictionary
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def needs_refresh?
|
128
|
+
@dictionary_mtime != ::File.mtime(@dictionary_path).to_f
|
129
|
+
end
|
130
|
+
|
131
|
+
def loading_exception(e, raise_exception)
|
132
|
+
msg = "Translate: #{e.message} when loading dictionary file at #{@dictionary_path}"
|
133
|
+
if raise_exception
|
134
|
+
raise DictionaryFileError.new(msg)
|
135
|
+
else
|
136
|
+
@logger.warn("#{msg}, continuing with old dictionary", :dictionary_path => @dictionary_path)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end end end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "json"
|
3
|
+
|
4
|
+
module LogStash module Filters module Dictionary
|
5
|
+
class JsonFile < File
|
6
|
+
|
7
|
+
protected
|
8
|
+
|
9
|
+
def initialize_for_file_type
|
10
|
+
end
|
11
|
+
|
12
|
+
def read_file_into_dictionary
|
13
|
+
content = IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
|
14
|
+
@dictionary.update(LogStash::Json.load(content)) unless content.nil? || content.empty?
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end end end
|
18
|
+
|
19
|
+
__END__
|
20
|
+
Preserving the text below for near term prosperity...
|
21
|
+
|
22
|
+
I tried hard to find a stream parsing solution with JrJackson and sc_load
|
23
|
+
but it was no faster than the above code.
|
24
|
+
The idea is for each line to be read into the streaming parse that will update
|
25
|
+
the @dictionary as each key/value is found.
|
26
|
+
It will be lower on memory consumption because the JSON string is not read into memory
|
27
|
+
and then a Ruby Hash created and merged into @dictionary.
|
28
|
+
I decided to trade speed for memory. Side Note, it seems that
|
29
|
+
the json gem has become quite speedy lately.
|
30
|
+
|
31
|
+
e.g.
|
32
|
+
require_relative 'json_handler'
|
33
|
+
...
|
34
|
+
def initialize_for_file_type
|
35
|
+
@handler = JsonHandler.new(@dictionary)
|
36
|
+
end
|
37
|
+
|
38
|
+
def read_file_into_dictionary
|
39
|
+
::File.open(@dictionary_path, "r:bom|utf-8") do |io|
|
40
|
+
JrJackson::Json.sc_load(@handler, io, {raw: true})
|
41
|
+
end
|
42
|
+
end
|
43
|
+
...
|
44
|
+
where JsonHandler is:
|
45
|
+
|
46
|
+
require 'jrjackson'
|
47
|
+
|
48
|
+
module LogStash module Filters module Dictionary
|
49
|
+
class JsonHandler
|
50
|
+
def initialize(dictionary)
|
51
|
+
@dictionary = dictionary
|
52
|
+
@map_depth = 0
|
53
|
+
end
|
54
|
+
|
55
|
+
def hash_start()
|
56
|
+
@map_depth = @map_depth.succ
|
57
|
+
@map_depth == 1 ? @dictionary : {}
|
58
|
+
end
|
59
|
+
|
60
|
+
def hash_end()
|
61
|
+
@map_depth = @map_depth.pred
|
62
|
+
end
|
63
|
+
|
64
|
+
def hash_key(key)
|
65
|
+
key
|
66
|
+
end
|
67
|
+
|
68
|
+
def array_start()
|
69
|
+
[]
|
70
|
+
end
|
71
|
+
|
72
|
+
def array_end()
|
73
|
+
end
|
74
|
+
|
75
|
+
def add_value(value)
|
76
|
+
# @result = value
|
77
|
+
end
|
78
|
+
|
79
|
+
def hash_set(h, key, value)
|
80
|
+
h[key] = value
|
81
|
+
end
|
82
|
+
|
83
|
+
def array_append(a, value)
|
84
|
+
a.push(value)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end end end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/filters/fetch_strategy/memory"
|
3
|
+
|
4
|
+
module LogStash module Filters module Dictionary
|
5
|
+
class Memory
|
6
|
+
|
7
|
+
attr_reader :dictionary, :fetch_strategy
|
8
|
+
|
9
|
+
def initialize(hash, exact, regex)
|
10
|
+
if exact
|
11
|
+
@fetch_strategy = regex ? FetchStrategy::Memory::ExactRegex.new(hash) : FetchStrategy::Memory::Exact.new(hash)
|
12
|
+
else
|
13
|
+
@fetch_strategy = FetchStrategy::Memory::RegexUnion.new(hash)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def stop_scheduler
|
18
|
+
# noop
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def needs_refresh?
|
24
|
+
false
|
25
|
+
end
|
26
|
+
|
27
|
+
def load_dictionary(raise_exception=false)
|
28
|
+
# noop
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end end end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require_relative "yaml_visitor"
|
4
|
+
|
5
|
+
module LogStash module Filters module Dictionary
|
6
|
+
class YamlFile < File
|
7
|
+
|
8
|
+
protected
|
9
|
+
|
10
|
+
def initialize_for_file_type
|
11
|
+
@visitor = YamlVisitor.create
|
12
|
+
end
|
13
|
+
|
14
|
+
def read_file_into_dictionary
|
15
|
+
# low level YAML read that tries to create as
|
16
|
+
# few intermediate objects as possible
|
17
|
+
# this overwrites the value at key
|
18
|
+
@visitor.accept_with_dictionary(
|
19
|
+
@dictionary, Psych.parse_stream(
|
20
|
+
IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
|
21
|
+
))
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end end end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'psych/visitors/to_ruby'
|
4
|
+
require 'psych/exception'
|
5
|
+
|
6
|
+
unless defined?(Regexp::NOENCODING)
|
7
|
+
Regexp::NOENCODING = 32
|
8
|
+
end
|
9
|
+
|
10
|
+
module LogStash module Filters module Dictionary
|
11
|
+
class YamlVisitor < Psych::Visitors::ToRuby
|
12
|
+
|
13
|
+
TAG_MAP_TABLE = Hash.new(false)
|
14
|
+
TAG_MAP_TABLE[nil] = true
|
15
|
+
TAG_MAP_TABLE["tag:yaml.org,2002:map"] = true
|
16
|
+
TAG_MAP_TABLE["tag:yaml.org,2002:omap"] = true
|
17
|
+
|
18
|
+
def accept_with_dictionary(dictionary, target)
|
19
|
+
@dictionary = dictionary
|
20
|
+
@map_depth = 0
|
21
|
+
accept(target)
|
22
|
+
end
|
23
|
+
|
24
|
+
def visit_Psych_Nodes_Mapping(o)
|
25
|
+
if Psych.load_tags[o.tag]
|
26
|
+
return revive(resolve_class(Psych.load_tags[o.tag]), o)
|
27
|
+
end
|
28
|
+
|
29
|
+
target_hash = @map_depth == 0 ? @dictionary : {}
|
30
|
+
@map_depth = @map_depth.succ
|
31
|
+
|
32
|
+
if TAG_MAP_TABLE[o.tag]
|
33
|
+
result = revive_hash(register(o, target_hash), o)
|
34
|
+
else
|
35
|
+
result = super(o)
|
36
|
+
end
|
37
|
+
|
38
|
+
@map_depth = @map_depth.pred
|
39
|
+
result
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end end end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module LogStash module Filters module FetchStrategy module File
|
4
|
+
class Exact
|
5
|
+
def initialize(dictionary, rw_lock)
|
6
|
+
@dictionary = dictionary
|
7
|
+
@read_lock = rw_lock.readLock
|
8
|
+
end
|
9
|
+
|
10
|
+
def dictionary_updated
|
11
|
+
end
|
12
|
+
|
13
|
+
def fetch(source, results)
|
14
|
+
@read_lock.lock
|
15
|
+
begin
|
16
|
+
if @dictionary.include?(source)
|
17
|
+
results[1] = LogStash::Util.deep_clone(@dictionary[source])
|
18
|
+
else
|
19
|
+
results[0] = false
|
20
|
+
end
|
21
|
+
ensure
|
22
|
+
@read_lock.unlock
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
class ExactRegex
|
28
|
+
def initialize(dictionary, rw_lock)
|
29
|
+
@keys_regex = Hash.new()
|
30
|
+
@dictionary = dictionary
|
31
|
+
@read_lock = rw_lock.readLock
|
32
|
+
end
|
33
|
+
|
34
|
+
def dictionary_updated
|
35
|
+
@keys_regex.clear
|
36
|
+
# rebuilding the regex map is time expensive
|
37
|
+
# 100 000 keys takes 0.5 seconds on a high spec Macbook Pro
|
38
|
+
# at least we are not doing it for every event like before
|
39
|
+
@dictionary.keys.each{|k| @keys_regex[k] = Regexp.new(k)}
|
40
|
+
end
|
41
|
+
|
42
|
+
def fetch(source, results)
|
43
|
+
@read_lock.lock
|
44
|
+
begin
|
45
|
+
key = @dictionary.keys.detect{|k| source.match(@keys_regex[k])}
|
46
|
+
if key.nil?
|
47
|
+
results[0] = false
|
48
|
+
else
|
49
|
+
results[1] = LogStash::Util.deep_clone(@dictionary[key])
|
50
|
+
end
|
51
|
+
ensure
|
52
|
+
@read_lock.unlock
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
class RegexUnion
|
58
|
+
def initialize(dictionary, rw_lock)
|
59
|
+
@dictionary = dictionary
|
60
|
+
@read_lock = rw_lock.readLock
|
61
|
+
end
|
62
|
+
|
63
|
+
def dictionary_updated
|
64
|
+
@union_regex_keys = Regexp.union(@dictionary.keys)
|
65
|
+
end
|
66
|
+
|
67
|
+
def fetch(source, results)
|
68
|
+
@read_lock.lock
|
69
|
+
begin
|
70
|
+
value = source.gsub(@union_regex_keys, @dictionary)
|
71
|
+
if source == value
|
72
|
+
results[0] = false
|
73
|
+
else
|
74
|
+
results[1] = LogStash::Util.deep_clone(value)
|
75
|
+
end
|
76
|
+
ensure
|
77
|
+
@read_lock.unlock
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end end end end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module LogStash module Filters module FetchStrategy module Memory
|
4
|
+
class Exact
|
5
|
+
def initialize(dictionary)
|
6
|
+
@dictionary = dictionary
|
7
|
+
end
|
8
|
+
|
9
|
+
def fetch(source, results)
|
10
|
+
if @dictionary.include?(source)
|
11
|
+
results[1] = LogStash::Util.deep_clone(@dictionary[source])
|
12
|
+
else
|
13
|
+
results[0] = false
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class ExactRegex
|
19
|
+
def initialize(dictionary)
|
20
|
+
@keys_regex = Hash.new()
|
21
|
+
@dictionary = dictionary
|
22
|
+
@dictionary.keys.each{|k| @keys_regex[k] = Regexp.new(k)}
|
23
|
+
end
|
24
|
+
|
25
|
+
def fetch(source, results)
|
26
|
+
key = @dictionary.keys.detect{|k| source.match(@keys_regex[k])}
|
27
|
+
if key.nil?
|
28
|
+
results[0] = false
|
29
|
+
else
|
30
|
+
results[1] = LogStash::Util.deep_clone(@dictionary[key])
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
class RegexUnion
|
36
|
+
def initialize(dictionary)
|
37
|
+
@dictionary = dictionary
|
38
|
+
@union_regex_keys = Regexp.union(@dictionary.keys)
|
39
|
+
end
|
40
|
+
|
41
|
+
def fetch(source, results)
|
42
|
+
value = source.gsub(@union_regex_keys, @dictionary)
|
43
|
+
if source == value
|
44
|
+
results[0] = false
|
45
|
+
else
|
46
|
+
results[1] = LogStash::Util.deep_clone(value)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end end end end
|
51
|
+
|
52
|
+
|