logstash-filter-translate 3.1.0 → 3.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,140 @@
1
+ # encoding: utf-8
2
+ require 'concurrent/atomic/atomic_boolean'
3
+ require 'rufus-scheduler'
4
+ require "logstash/util/loggable"
5
+ require "logstash/filters/fetch_strategy/file"
6
+
7
+ java_import 'java.util.concurrent.locks.ReentrantReadWriteLock'
8
+
9
+ module LogStash module Filters module Dictionary
10
+ class DictionaryFileError < StandardError; end
11
+
12
+ class File
13
+ def self.create(path, refresh_interval, refresh_behaviour, exact, regex)
14
+ if /\.y[a]?ml$/.match(path)
15
+ instance = YamlFile.new(path, refresh_interval, exact, regex)
16
+ elsif path.end_with?(".json")
17
+ instance = JsonFile.new(path, refresh_interval, exact, regex)
18
+ elsif path.end_with?(".csv")
19
+ instance = CsvFile.new(path, refresh_interval, exact, regex)
20
+ else
21
+ raise "Translate: Dictionary #{path} has a non valid format"
22
+ end
23
+ if refresh_behaviour == 'merge'
24
+ instance.set_update_strategy(:merge_dictionary)
25
+ elsif refresh_behaviour == 'replace'
26
+ instance.set_update_strategy(:replace_dictionary)
27
+ else
28
+ # we really should never get here
29
+ raise(LogStash::ConfigurationError, "Unknown value for refresh_behaviour=#{refresh_behaviour.to_s}")
30
+ end
31
+ end
32
+
33
+ include LogStash::Util::Loggable
34
+ attr_reader :dictionary, :fetch_strategy
35
+
36
+ def initialize(path, refresh_interval, exact, regex)
37
+ @dictionary_path = path
38
+ @refresh_interval = refresh_interval
39
+ @short_refresh = @refresh_interval <= 300
40
+ @stopping = Concurrent::AtomicBoolean.new # ported from jdbc_static, need a way to prevent a scheduled execution from running a load.
41
+ rw_lock = java.util.concurrent.locks.ReentrantReadWriteLock.new
42
+ @write_lock = rw_lock.writeLock
43
+ @dictionary = Hash.new
44
+ @update_method = method(:merge_dictionary)
45
+ initialize_for_file_type
46
+ args = [@dictionary, rw_lock]
47
+ if exact
48
+ @fetch_strategy = regex ? FetchStrategy::File::ExactRegex.new(*args) : FetchStrategy::File::ExactRegex.new(*args)
49
+ else
50
+ @fetch_strategy = FetchStrategy::File::RegexUnion.new(*args)
51
+ end
52
+ load_dictionary(raise_exception = true)
53
+ stop_scheduler(initial = true)
54
+ start_scheduler unless @refresh_interval <= 0 # disabled, a scheduler interval of zero makes no sense
55
+ end
56
+
57
+ def stop_scheduler(initial = false)
58
+ @stopping.make_true unless initial
59
+ @scheduler.shutdown(:wait) if @scheduler
60
+ end
61
+
62
+ def load_dictionary(raise_exception=false)
63
+ begin
64
+ @dictionary_mtime = ::File.mtime(@dictionary_path).to_f
65
+ @update_method.call
66
+ rescue Errno::ENOENT
67
+ @logger.warn("dictionary file read failure, continuing with old dictionary", :path => @dictionary_path)
68
+ rescue => e
69
+ loading_exception(e, raise_exception)
70
+ end
71
+ end
72
+
73
+ def set_update_strategy(method_sym)
74
+ @update_method = method(method_sym)
75
+ self
76
+ end
77
+
78
+ protected
79
+
80
+ def initialize_for_file_type
81
+ # sub class specific initializer
82
+ end
83
+
84
+ def read_file_into_dictionary
85
+ # defined in csv_file, yaml_file and json_file
86
+ end
87
+
88
+ private
89
+
90
+ def start_scheduler
91
+ @scheduler = Rufus::Scheduler.new
92
+ @scheduler.interval("#{@refresh_interval}s", :overlap => false) do
93
+ reload_dictionary
94
+ end
95
+ end
96
+
97
+ def merge_dictionary
98
+ @write_lock.lock
99
+ begin
100
+ read_file_into_dictionary
101
+ @fetch_strategy.dictionary_updated
102
+ ensure
103
+ @write_lock.unlock
104
+ end
105
+ end
106
+
107
+ def replace_dictionary
108
+ @write_lock.lock
109
+ begin
110
+ @dictionary.clear
111
+ read_file_into_dictionary
112
+ @fetch_strategy.dictionary_updated
113
+ ensure
114
+ @write_lock.unlock
115
+ end
116
+ end
117
+
118
+ def reload_dictionary
119
+ return if @stopping.true?
120
+ if @short_refresh
121
+ load_dictionary if needs_refresh?
122
+ else
123
+ load_dictionary
124
+ end
125
+ end
126
+
127
+ def needs_refresh?
128
+ @dictionary_mtime != ::File.mtime(@dictionary_path).to_f
129
+ end
130
+
131
+ def loading_exception(e, raise_exception)
132
+ msg = "Translate: #{e.message} when loading dictionary file at #{@dictionary_path}"
133
+ if raise_exception
134
+ raise DictionaryFileError.new(msg)
135
+ else
136
+ @logger.warn("#{msg}, continuing with old dictionary", :dictionary_path => @dictionary_path)
137
+ end
138
+ end
139
+ end
140
+ end end end
@@ -0,0 +1,87 @@
1
+ # encoding: utf-8
2
+ require "json"
3
+
4
+ module LogStash module Filters module Dictionary
5
+ class JsonFile < File
6
+
7
+ protected
8
+
9
+ def initialize_for_file_type
10
+ end
11
+
12
+ def read_file_into_dictionary
13
+ content = IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
14
+ @dictionary.update(LogStash::Json.load(content)) unless content.nil? || content.empty?
15
+ end
16
+ end
17
+ end end end
18
+
19
+ __END__
20
+ Preserving the text below for near term prosperity...
21
+
22
+ I tried hard to find a stream parsing solution with JrJackson and sc_load
23
+ but it was no faster than the above code.
24
+ The idea is for each line to be read into the streaming parse that will update
25
+ the @dictionary as each key/value is found.
26
+ It will be lower on memory consumption because the JSON string is not read into memory
27
+ and then a Ruby Hash created and merged into @dictionary.
28
+ I decided to trade speed for memory. Side Note, it seems that
29
+ the json gem has become quite speedy lately.
30
+
31
+ e.g.
32
+ require_relative 'json_handler'
33
+ ...
34
+ def initialize_for_file_type
35
+ @handler = JsonHandler.new(@dictionary)
36
+ end
37
+
38
+ def read_file_into_dictionary
39
+ ::File.open(@dictionary_path, "r:bom|utf-8") do |io|
40
+ JrJackson::Json.sc_load(@handler, io, {raw: true})
41
+ end
42
+ end
43
+ ...
44
+ where JsonHandler is:
45
+
46
+ require 'jrjackson'
47
+
48
+ module LogStash module Filters module Dictionary
49
+ class JsonHandler
50
+ def initialize(dictionary)
51
+ @dictionary = dictionary
52
+ @map_depth = 0
53
+ end
54
+
55
+ def hash_start()
56
+ @map_depth = @map_depth.succ
57
+ @map_depth == 1 ? @dictionary : {}
58
+ end
59
+
60
+ def hash_end()
61
+ @map_depth = @map_depth.pred
62
+ end
63
+
64
+ def hash_key(key)
65
+ key
66
+ end
67
+
68
+ def array_start()
69
+ []
70
+ end
71
+
72
+ def array_end()
73
+ end
74
+
75
+ def add_value(value)
76
+ # @result = value
77
+ end
78
+
79
+ def hash_set(h, key, value)
80
+ h[key] = value
81
+ end
82
+
83
+ def array_append(a, value)
84
+ a.push(value)
85
+ end
86
+ end
87
+ end end end
@@ -0,0 +1,31 @@
1
+ # encoding: utf-8
2
+ require "logstash/filters/fetch_strategy/memory"
3
+
4
+ module LogStash module Filters module Dictionary
5
+ class Memory
6
+
7
+ attr_reader :dictionary, :fetch_strategy
8
+
9
+ def initialize(hash, exact, regex)
10
+ if exact
11
+ @fetch_strategy = regex ? FetchStrategy::Memory::ExactRegex.new(hash) : FetchStrategy::Memory::Exact.new(hash)
12
+ else
13
+ @fetch_strategy = FetchStrategy::Memory::RegexUnion.new(hash)
14
+ end
15
+ end
16
+
17
+ def stop_scheduler
18
+ # noop
19
+ end
20
+
21
+ private
22
+
23
+ def needs_refresh?
24
+ false
25
+ end
26
+
27
+ def load_dictionary(raise_exception=false)
28
+ # noop
29
+ end
30
+ end
31
+ end end end
@@ -0,0 +1,24 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative "yaml_visitor"
4
+
5
+ module LogStash module Filters module Dictionary
6
+ class YamlFile < File
7
+
8
+ protected
9
+
10
+ def initialize_for_file_type
11
+ @visitor = YamlVisitor.create
12
+ end
13
+
14
+ def read_file_into_dictionary
15
+ # low level YAML read that tries to create as
16
+ # few intermediate objects as possible
17
+ # this overwrites the value at key
18
+ @visitor.accept_with_dictionary(
19
+ @dictionary, Psych.parse_stream(
20
+ IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
21
+ ))
22
+ end
23
+ end
24
+ end end end
@@ -0,0 +1,42 @@
1
+ # encoding: utf-8
2
+
3
+ require 'psych/visitors/to_ruby'
4
+ require 'psych/exception'
5
+
6
+ unless defined?(Regexp::NOENCODING)
7
+ Regexp::NOENCODING = 32
8
+ end
9
+
10
+ module LogStash module Filters module Dictionary
11
+ class YamlVisitor < Psych::Visitors::ToRuby
12
+
13
+ TAG_MAP_TABLE = Hash.new(false)
14
+ TAG_MAP_TABLE[nil] = true
15
+ TAG_MAP_TABLE["tag:yaml.org,2002:map"] = true
16
+ TAG_MAP_TABLE["tag:yaml.org,2002:omap"] = true
17
+
18
+ def accept_with_dictionary(dictionary, target)
19
+ @dictionary = dictionary
20
+ @map_depth = 0
21
+ accept(target)
22
+ end
23
+
24
+ def visit_Psych_Nodes_Mapping(o)
25
+ if Psych.load_tags[o.tag]
26
+ return revive(resolve_class(Psych.load_tags[o.tag]), o)
27
+ end
28
+
29
+ target_hash = @map_depth == 0 ? @dictionary : {}
30
+ @map_depth = @map_depth.succ
31
+
32
+ if TAG_MAP_TABLE[o.tag]
33
+ result = revive_hash(register(o, target_hash), o)
34
+ else
35
+ result = super(o)
36
+ end
37
+
38
+ @map_depth = @map_depth.pred
39
+ result
40
+ end
41
+ end
42
+ end end end
@@ -0,0 +1,81 @@
1
+ # encoding: utf-8
2
+
3
+ module LogStash module Filters module FetchStrategy module File
4
+ class Exact
5
+ def initialize(dictionary, rw_lock)
6
+ @dictionary = dictionary
7
+ @read_lock = rw_lock.readLock
8
+ end
9
+
10
+ def dictionary_updated
11
+ end
12
+
13
+ def fetch(source, results)
14
+ @read_lock.lock
15
+ begin
16
+ if @dictionary.include?(source)
17
+ results[1] = LogStash::Util.deep_clone(@dictionary[source])
18
+ else
19
+ results[0] = false
20
+ end
21
+ ensure
22
+ @read_lock.unlock
23
+ end
24
+ end
25
+ end
26
+
27
+ class ExactRegex
28
+ def initialize(dictionary, rw_lock)
29
+ @keys_regex = Hash.new()
30
+ @dictionary = dictionary
31
+ @read_lock = rw_lock.readLock
32
+ end
33
+
34
+ def dictionary_updated
35
+ @keys_regex.clear
36
+ # rebuilding the regex map is time expensive
37
+ # 100 000 keys takes 0.5 seconds on a high spec Macbook Pro
38
+ # at least we are not doing it for every event like before
39
+ @dictionary.keys.each{|k| @keys_regex[k] = Regexp.new(k)}
40
+ end
41
+
42
+ def fetch(source, results)
43
+ @read_lock.lock
44
+ begin
45
+ key = @dictionary.keys.detect{|k| source.match(@keys_regex[k])}
46
+ if key.nil?
47
+ results[0] = false
48
+ else
49
+ results[1] = LogStash::Util.deep_clone(@dictionary[key])
50
+ end
51
+ ensure
52
+ @read_lock.unlock
53
+ end
54
+ end
55
+ end
56
+
57
+ class RegexUnion
58
+ def initialize(dictionary, rw_lock)
59
+ @dictionary = dictionary
60
+ @read_lock = rw_lock.readLock
61
+ end
62
+
63
+ def dictionary_updated
64
+ @union_regex_keys = Regexp.union(@dictionary.keys)
65
+ end
66
+
67
+ def fetch(source, results)
68
+ @read_lock.lock
69
+ begin
70
+ value = source.gsub(@union_regex_keys, @dictionary)
71
+ if source == value
72
+ results[0] = false
73
+ else
74
+ results[1] = LogStash::Util.deep_clone(value)
75
+ end
76
+ ensure
77
+ @read_lock.unlock
78
+ end
79
+ end
80
+ end
81
+ end end end end
@@ -0,0 +1,52 @@
1
+ # encoding: utf-8
2
+
3
+ module LogStash module Filters module FetchStrategy module Memory
4
+ class Exact
5
+ def initialize(dictionary)
6
+ @dictionary = dictionary
7
+ end
8
+
9
+ def fetch(source, results)
10
+ if @dictionary.include?(source)
11
+ results[1] = LogStash::Util.deep_clone(@dictionary[source])
12
+ else
13
+ results[0] = false
14
+ end
15
+ end
16
+ end
17
+
18
+ class ExactRegex
19
+ def initialize(dictionary)
20
+ @keys_regex = Hash.new()
21
+ @dictionary = dictionary
22
+ @dictionary.keys.each{|k| @keys_regex[k] = Regexp.new(k)}
23
+ end
24
+
25
+ def fetch(source, results)
26
+ key = @dictionary.keys.detect{|k| source.match(@keys_regex[k])}
27
+ if key.nil?
28
+ results[0] = false
29
+ else
30
+ results[1] = LogStash::Util.deep_clone(@dictionary[key])
31
+ end
32
+ end
33
+ end
34
+
35
+ class RegexUnion
36
+ def initialize(dictionary)
37
+ @dictionary = dictionary
38
+ @union_regex_keys = Regexp.union(@dictionary.keys)
39
+ end
40
+
41
+ def fetch(source, results)
42
+ value = source.gsub(@union_regex_keys, @dictionary)
43
+ if source == value
44
+ results[0] = false
45
+ else
46
+ results[1] = LogStash::Util.deep_clone(value)
47
+ end
48
+ end
49
+ end
50
+ end end end end
51
+
52
+