logstash-filter-translate 3.1.0 → 3.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/docs/index.asciidoc +173 -33
- data/lib/logstash/filters/array_of_maps_value_update.rb +44 -0
- data/lib/logstash/filters/array_of_values_update.rb +37 -0
- data/lib/logstash/filters/dictionary/csv_file.rb +25 -0
- data/lib/logstash/filters/dictionary/file.rb +140 -0
- data/lib/logstash/filters/dictionary/json_file.rb +87 -0
- data/lib/logstash/filters/dictionary/memory.rb +31 -0
- data/lib/logstash/filters/dictionary/yaml_file.rb +24 -0
- data/lib/logstash/filters/dictionary/yaml_visitor.rb +42 -0
- data/lib/logstash/filters/fetch_strategy/file.rb +81 -0
- data/lib/logstash/filters/fetch_strategy/memory.rb +52 -0
- data/lib/logstash/filters/single_value_update.rb +33 -0
- data/lib/logstash/filters/translate.rb +54 -155
- data/logstash-filter-translate.gemspec +5 -1
- data/spec/filters/benchmark_rspec.rb +69 -0
- data/spec/filters/scheduling_spec.rb +200 -0
- data/spec/filters/translate_spec.rb +238 -45
- data/spec/filters/yaml_visitor_spec.rb +16 -0
- data/spec/fixtures/regex_dict.csv +4 -0
- data/spec/fixtures/regex_union_dict.csv +4 -0
- data/spec/fixtures/tag-map-dict.yml +21 -0
- data/spec/fixtures/tag-omap-dict.yml +21 -0
- data/spec/support/build_huge_dictionaries.rb +33 -0
- data/spec/support/rspec_wait_handler_helper.rb +38 -0
- metadata +87 -2
@@ -0,0 +1,140 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'concurrent/atomic/atomic_boolean'
|
3
|
+
require 'rufus-scheduler'
|
4
|
+
require "logstash/util/loggable"
|
5
|
+
require "logstash/filters/fetch_strategy/file"
|
6
|
+
|
7
|
+
java_import 'java.util.concurrent.locks.ReentrantReadWriteLock'
|
8
|
+
|
9
|
+
module LogStash module Filters module Dictionary
|
10
|
+
class DictionaryFileError < StandardError; end
|
11
|
+
|
12
|
+
class File
|
13
|
+
def self.create(path, refresh_interval, refresh_behaviour, exact, regex)
|
14
|
+
if /\.y[a]?ml$/.match(path)
|
15
|
+
instance = YamlFile.new(path, refresh_interval, exact, regex)
|
16
|
+
elsif path.end_with?(".json")
|
17
|
+
instance = JsonFile.new(path, refresh_interval, exact, regex)
|
18
|
+
elsif path.end_with?(".csv")
|
19
|
+
instance = CsvFile.new(path, refresh_interval, exact, regex)
|
20
|
+
else
|
21
|
+
raise "Translate: Dictionary #{path} has a non valid format"
|
22
|
+
end
|
23
|
+
if refresh_behaviour == 'merge'
|
24
|
+
instance.set_update_strategy(:merge_dictionary)
|
25
|
+
elsif refresh_behaviour == 'replace'
|
26
|
+
instance.set_update_strategy(:replace_dictionary)
|
27
|
+
else
|
28
|
+
# we really should never get here
|
29
|
+
raise(LogStash::ConfigurationError, "Unknown value for refresh_behaviour=#{refresh_behaviour.to_s}")
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
include LogStash::Util::Loggable
|
34
|
+
attr_reader :dictionary, :fetch_strategy
|
35
|
+
|
36
|
+
def initialize(path, refresh_interval, exact, regex)
|
37
|
+
@dictionary_path = path
|
38
|
+
@refresh_interval = refresh_interval
|
39
|
+
@short_refresh = @refresh_interval <= 300
|
40
|
+
@stopping = Concurrent::AtomicBoolean.new # ported from jdbc_static, need a way to prevent a scheduled execution from running a load.
|
41
|
+
rw_lock = java.util.concurrent.locks.ReentrantReadWriteLock.new
|
42
|
+
@write_lock = rw_lock.writeLock
|
43
|
+
@dictionary = Hash.new
|
44
|
+
@update_method = method(:merge_dictionary)
|
45
|
+
initialize_for_file_type
|
46
|
+
args = [@dictionary, rw_lock]
|
47
|
+
if exact
|
48
|
+
@fetch_strategy = regex ? FetchStrategy::File::ExactRegex.new(*args) : FetchStrategy::File::ExactRegex.new(*args)
|
49
|
+
else
|
50
|
+
@fetch_strategy = FetchStrategy::File::RegexUnion.new(*args)
|
51
|
+
end
|
52
|
+
load_dictionary(raise_exception = true)
|
53
|
+
stop_scheduler(initial = true)
|
54
|
+
start_scheduler unless @refresh_interval <= 0 # disabled, a scheduler interval of zero makes no sense
|
55
|
+
end
|
56
|
+
|
57
|
+
def stop_scheduler(initial = false)
|
58
|
+
@stopping.make_true unless initial
|
59
|
+
@scheduler.shutdown(:wait) if @scheduler
|
60
|
+
end
|
61
|
+
|
62
|
+
def load_dictionary(raise_exception=false)
|
63
|
+
begin
|
64
|
+
@dictionary_mtime = ::File.mtime(@dictionary_path).to_f
|
65
|
+
@update_method.call
|
66
|
+
rescue Errno::ENOENT
|
67
|
+
@logger.warn("dictionary file read failure, continuing with old dictionary", :path => @dictionary_path)
|
68
|
+
rescue => e
|
69
|
+
loading_exception(e, raise_exception)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def set_update_strategy(method_sym)
|
74
|
+
@update_method = method(method_sym)
|
75
|
+
self
|
76
|
+
end
|
77
|
+
|
78
|
+
protected
|
79
|
+
|
80
|
+
def initialize_for_file_type
|
81
|
+
# sub class specific initializer
|
82
|
+
end
|
83
|
+
|
84
|
+
def read_file_into_dictionary
|
85
|
+
# defined in csv_file, yaml_file and json_file
|
86
|
+
end
|
87
|
+
|
88
|
+
private
|
89
|
+
|
90
|
+
def start_scheduler
|
91
|
+
@scheduler = Rufus::Scheduler.new
|
92
|
+
@scheduler.interval("#{@refresh_interval}s", :overlap => false) do
|
93
|
+
reload_dictionary
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def merge_dictionary
|
98
|
+
@write_lock.lock
|
99
|
+
begin
|
100
|
+
read_file_into_dictionary
|
101
|
+
@fetch_strategy.dictionary_updated
|
102
|
+
ensure
|
103
|
+
@write_lock.unlock
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def replace_dictionary
|
108
|
+
@write_lock.lock
|
109
|
+
begin
|
110
|
+
@dictionary.clear
|
111
|
+
read_file_into_dictionary
|
112
|
+
@fetch_strategy.dictionary_updated
|
113
|
+
ensure
|
114
|
+
@write_lock.unlock
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def reload_dictionary
|
119
|
+
return if @stopping.true?
|
120
|
+
if @short_refresh
|
121
|
+
load_dictionary if needs_refresh?
|
122
|
+
else
|
123
|
+
load_dictionary
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def needs_refresh?
|
128
|
+
@dictionary_mtime != ::File.mtime(@dictionary_path).to_f
|
129
|
+
end
|
130
|
+
|
131
|
+
def loading_exception(e, raise_exception)
|
132
|
+
msg = "Translate: #{e.message} when loading dictionary file at #{@dictionary_path}"
|
133
|
+
if raise_exception
|
134
|
+
raise DictionaryFileError.new(msg)
|
135
|
+
else
|
136
|
+
@logger.warn("#{msg}, continuing with old dictionary", :dictionary_path => @dictionary_path)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end end end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "json"
|
3
|
+
|
4
|
+
module LogStash module Filters module Dictionary
|
5
|
+
class JsonFile < File
|
6
|
+
|
7
|
+
protected
|
8
|
+
|
9
|
+
def initialize_for_file_type
|
10
|
+
end
|
11
|
+
|
12
|
+
def read_file_into_dictionary
|
13
|
+
content = IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
|
14
|
+
@dictionary.update(LogStash::Json.load(content)) unless content.nil? || content.empty?
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end end end
|
18
|
+
|
19
|
+
__END__
|
20
|
+
Preserving the text below for near term prosperity...
|
21
|
+
|
22
|
+
I tried hard to find a stream parsing solution with JrJackson and sc_load
|
23
|
+
but it was no faster than the above code.
|
24
|
+
The idea is for each line to be read into the streaming parse that will update
|
25
|
+
the @dictionary as each key/value is found.
|
26
|
+
It will be lower on memory consumption because the JSON string is not read into memory
|
27
|
+
and then a Ruby Hash created and merged into @dictionary.
|
28
|
+
I decided to trade speed for memory. Side Note, it seems that
|
29
|
+
the json gem has become quite speedy lately.
|
30
|
+
|
31
|
+
e.g.
|
32
|
+
require_relative 'json_handler'
|
33
|
+
...
|
34
|
+
def initialize_for_file_type
|
35
|
+
@handler = JsonHandler.new(@dictionary)
|
36
|
+
end
|
37
|
+
|
38
|
+
def read_file_into_dictionary
|
39
|
+
::File.open(@dictionary_path, "r:bom|utf-8") do |io|
|
40
|
+
JrJackson::Json.sc_load(@handler, io, {raw: true})
|
41
|
+
end
|
42
|
+
end
|
43
|
+
...
|
44
|
+
where JsonHandler is:
|
45
|
+
|
46
|
+
require 'jrjackson'
|
47
|
+
|
48
|
+
module LogStash module Filters module Dictionary
|
49
|
+
class JsonHandler
|
50
|
+
def initialize(dictionary)
|
51
|
+
@dictionary = dictionary
|
52
|
+
@map_depth = 0
|
53
|
+
end
|
54
|
+
|
55
|
+
def hash_start()
|
56
|
+
@map_depth = @map_depth.succ
|
57
|
+
@map_depth == 1 ? @dictionary : {}
|
58
|
+
end
|
59
|
+
|
60
|
+
def hash_end()
|
61
|
+
@map_depth = @map_depth.pred
|
62
|
+
end
|
63
|
+
|
64
|
+
def hash_key(key)
|
65
|
+
key
|
66
|
+
end
|
67
|
+
|
68
|
+
def array_start()
|
69
|
+
[]
|
70
|
+
end
|
71
|
+
|
72
|
+
def array_end()
|
73
|
+
end
|
74
|
+
|
75
|
+
def add_value(value)
|
76
|
+
# @result = value
|
77
|
+
end
|
78
|
+
|
79
|
+
def hash_set(h, key, value)
|
80
|
+
h[key] = value
|
81
|
+
end
|
82
|
+
|
83
|
+
def array_append(a, value)
|
84
|
+
a.push(value)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end end end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/filters/fetch_strategy/memory"
|
3
|
+
|
4
|
+
module LogStash module Filters module Dictionary
|
5
|
+
class Memory
|
6
|
+
|
7
|
+
attr_reader :dictionary, :fetch_strategy
|
8
|
+
|
9
|
+
def initialize(hash, exact, regex)
|
10
|
+
if exact
|
11
|
+
@fetch_strategy = regex ? FetchStrategy::Memory::ExactRegex.new(hash) : FetchStrategy::Memory::Exact.new(hash)
|
12
|
+
else
|
13
|
+
@fetch_strategy = FetchStrategy::Memory::RegexUnion.new(hash)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def stop_scheduler
|
18
|
+
# noop
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def needs_refresh?
|
24
|
+
false
|
25
|
+
end
|
26
|
+
|
27
|
+
def load_dictionary(raise_exception=false)
|
28
|
+
# noop
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end end end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require_relative "yaml_visitor"
|
4
|
+
|
5
|
+
module LogStash module Filters module Dictionary
|
6
|
+
class YamlFile < File
|
7
|
+
|
8
|
+
protected
|
9
|
+
|
10
|
+
def initialize_for_file_type
|
11
|
+
@visitor = YamlVisitor.create
|
12
|
+
end
|
13
|
+
|
14
|
+
def read_file_into_dictionary
|
15
|
+
# low level YAML read that tries to create as
|
16
|
+
# few intermediate objects as possible
|
17
|
+
# this overwrites the value at key
|
18
|
+
@visitor.accept_with_dictionary(
|
19
|
+
@dictionary, Psych.parse_stream(
|
20
|
+
IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
|
21
|
+
))
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end end end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'psych/visitors/to_ruby'
|
4
|
+
require 'psych/exception'
|
5
|
+
|
6
|
+
unless defined?(Regexp::NOENCODING)
|
7
|
+
Regexp::NOENCODING = 32
|
8
|
+
end
|
9
|
+
|
10
|
+
module LogStash module Filters module Dictionary
|
11
|
+
class YamlVisitor < Psych::Visitors::ToRuby
|
12
|
+
|
13
|
+
TAG_MAP_TABLE = Hash.new(false)
|
14
|
+
TAG_MAP_TABLE[nil] = true
|
15
|
+
TAG_MAP_TABLE["tag:yaml.org,2002:map"] = true
|
16
|
+
TAG_MAP_TABLE["tag:yaml.org,2002:omap"] = true
|
17
|
+
|
18
|
+
def accept_with_dictionary(dictionary, target)
|
19
|
+
@dictionary = dictionary
|
20
|
+
@map_depth = 0
|
21
|
+
accept(target)
|
22
|
+
end
|
23
|
+
|
24
|
+
def visit_Psych_Nodes_Mapping(o)
|
25
|
+
if Psych.load_tags[o.tag]
|
26
|
+
return revive(resolve_class(Psych.load_tags[o.tag]), o)
|
27
|
+
end
|
28
|
+
|
29
|
+
target_hash = @map_depth == 0 ? @dictionary : {}
|
30
|
+
@map_depth = @map_depth.succ
|
31
|
+
|
32
|
+
if TAG_MAP_TABLE[o.tag]
|
33
|
+
result = revive_hash(register(o, target_hash), o)
|
34
|
+
else
|
35
|
+
result = super(o)
|
36
|
+
end
|
37
|
+
|
38
|
+
@map_depth = @map_depth.pred
|
39
|
+
result
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end end end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module LogStash module Filters module FetchStrategy module File
|
4
|
+
class Exact
|
5
|
+
def initialize(dictionary, rw_lock)
|
6
|
+
@dictionary = dictionary
|
7
|
+
@read_lock = rw_lock.readLock
|
8
|
+
end
|
9
|
+
|
10
|
+
def dictionary_updated
|
11
|
+
end
|
12
|
+
|
13
|
+
def fetch(source, results)
|
14
|
+
@read_lock.lock
|
15
|
+
begin
|
16
|
+
if @dictionary.include?(source)
|
17
|
+
results[1] = LogStash::Util.deep_clone(@dictionary[source])
|
18
|
+
else
|
19
|
+
results[0] = false
|
20
|
+
end
|
21
|
+
ensure
|
22
|
+
@read_lock.unlock
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
class ExactRegex
|
28
|
+
def initialize(dictionary, rw_lock)
|
29
|
+
@keys_regex = Hash.new()
|
30
|
+
@dictionary = dictionary
|
31
|
+
@read_lock = rw_lock.readLock
|
32
|
+
end
|
33
|
+
|
34
|
+
def dictionary_updated
|
35
|
+
@keys_regex.clear
|
36
|
+
# rebuilding the regex map is time expensive
|
37
|
+
# 100 000 keys takes 0.5 seconds on a high spec Macbook Pro
|
38
|
+
# at least we are not doing it for every event like before
|
39
|
+
@dictionary.keys.each{|k| @keys_regex[k] = Regexp.new(k)}
|
40
|
+
end
|
41
|
+
|
42
|
+
def fetch(source, results)
|
43
|
+
@read_lock.lock
|
44
|
+
begin
|
45
|
+
key = @dictionary.keys.detect{|k| source.match(@keys_regex[k])}
|
46
|
+
if key.nil?
|
47
|
+
results[0] = false
|
48
|
+
else
|
49
|
+
results[1] = LogStash::Util.deep_clone(@dictionary[key])
|
50
|
+
end
|
51
|
+
ensure
|
52
|
+
@read_lock.unlock
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
class RegexUnion
|
58
|
+
def initialize(dictionary, rw_lock)
|
59
|
+
@dictionary = dictionary
|
60
|
+
@read_lock = rw_lock.readLock
|
61
|
+
end
|
62
|
+
|
63
|
+
def dictionary_updated
|
64
|
+
@union_regex_keys = Regexp.union(@dictionary.keys)
|
65
|
+
end
|
66
|
+
|
67
|
+
def fetch(source, results)
|
68
|
+
@read_lock.lock
|
69
|
+
begin
|
70
|
+
value = source.gsub(@union_regex_keys, @dictionary)
|
71
|
+
if source == value
|
72
|
+
results[0] = false
|
73
|
+
else
|
74
|
+
results[1] = LogStash::Util.deep_clone(value)
|
75
|
+
end
|
76
|
+
ensure
|
77
|
+
@read_lock.unlock
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end end end end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module LogStash module Filters module FetchStrategy module Memory
|
4
|
+
class Exact
|
5
|
+
def initialize(dictionary)
|
6
|
+
@dictionary = dictionary
|
7
|
+
end
|
8
|
+
|
9
|
+
def fetch(source, results)
|
10
|
+
if @dictionary.include?(source)
|
11
|
+
results[1] = LogStash::Util.deep_clone(@dictionary[source])
|
12
|
+
else
|
13
|
+
results[0] = false
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class ExactRegex
|
19
|
+
def initialize(dictionary)
|
20
|
+
@keys_regex = Hash.new()
|
21
|
+
@dictionary = dictionary
|
22
|
+
@dictionary.keys.each{|k| @keys_regex[k] = Regexp.new(k)}
|
23
|
+
end
|
24
|
+
|
25
|
+
def fetch(source, results)
|
26
|
+
key = @dictionary.keys.detect{|k| source.match(@keys_regex[k])}
|
27
|
+
if key.nil?
|
28
|
+
results[0] = false
|
29
|
+
else
|
30
|
+
results[1] = LogStash::Util.deep_clone(@dictionary[key])
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
class RegexUnion
|
36
|
+
def initialize(dictionary)
|
37
|
+
@dictionary = dictionary
|
38
|
+
@union_regex_keys = Regexp.union(@dictionary.keys)
|
39
|
+
end
|
40
|
+
|
41
|
+
def fetch(source, results)
|
42
|
+
value = source.gsub(@union_regex_keys, @dictionary)
|
43
|
+
if source == value
|
44
|
+
results[0] = false
|
45
|
+
else
|
46
|
+
results[1] = LogStash::Util.deep_clone(value)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end end end end
|
51
|
+
|
52
|
+
|