logstash-filter-translate 3.1.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,140 @@
1
+ # encoding: utf-8
2
+ require 'concurrent/atomic/atomic_boolean'
3
+ require 'rufus-scheduler'
4
+ require "logstash/util/loggable"
5
+ require "logstash/filters/fetch_strategy/file"
6
+
7
+ java_import 'java.util.concurrent.locks.ReentrantReadWriteLock'
8
+
9
+ module LogStash module Filters module Dictionary
10
+ class DictionaryFileError < StandardError; end
11
+
12
+ class File
13
+ def self.create(path, refresh_interval, refresh_behaviour, exact, regex)
14
+ if /\.y[a]?ml$/.match(path)
15
+ instance = YamlFile.new(path, refresh_interval, exact, regex)
16
+ elsif path.end_with?(".json")
17
+ instance = JsonFile.new(path, refresh_interval, exact, regex)
18
+ elsif path.end_with?(".csv")
19
+ instance = CsvFile.new(path, refresh_interval, exact, regex)
20
+ else
21
+ raise "Translate: Dictionary #{path} has a non valid format"
22
+ end
23
+ if refresh_behaviour == 'merge'
24
+ instance.set_update_strategy(:merge_dictionary)
25
+ elsif refresh_behaviour == 'replace'
26
+ instance.set_update_strategy(:replace_dictionary)
27
+ else
28
+ # we really should never get here
29
+ raise(LogStash::ConfigurationError, "Unknown value for refresh_behaviour=#{refresh_behaviour.to_s}")
30
+ end
31
+ end
32
+
33
+ include LogStash::Util::Loggable
34
+ attr_reader :dictionary, :fetch_strategy
35
+
36
+ def initialize(path, refresh_interval, exact, regex)
37
+ @dictionary_path = path
38
+ @refresh_interval = refresh_interval
39
+ @short_refresh = @refresh_interval <= 300
40
+ @stopping = Concurrent::AtomicBoolean.new # ported from jdbc_static, need a way to prevent a scheduled execution from running a load.
41
+ rw_lock = java.util.concurrent.locks.ReentrantReadWriteLock.new
42
+ @write_lock = rw_lock.writeLock
43
+ @dictionary = Hash.new
44
+ @update_method = method(:merge_dictionary)
45
+ initialize_for_file_type
46
+ args = [@dictionary, rw_lock]
47
+ if exact
48
+ @fetch_strategy = regex ? FetchStrategy::File::ExactRegex.new(*args) : FetchStrategy::File::ExactRegex.new(*args)
49
+ else
50
+ @fetch_strategy = FetchStrategy::File::RegexUnion.new(*args)
51
+ end
52
+ load_dictionary(raise_exception = true)
53
+ stop_scheduler(initial = true)
54
+ start_scheduler unless @refresh_interval <= 0 # disabled, a scheduler interval of zero makes no sense
55
+ end
56
+
57
+ def stop_scheduler(initial = false)
58
+ @stopping.make_true unless initial
59
+ @scheduler.shutdown(:wait) if @scheduler
60
+ end
61
+
62
+ def load_dictionary(raise_exception=false)
63
+ begin
64
+ @dictionary_mtime = ::File.mtime(@dictionary_path).to_f
65
+ @update_method.call
66
+ rescue Errno::ENOENT
67
+ @logger.warn("dictionary file read failure, continuing with old dictionary", :path => @dictionary_path)
68
+ rescue => e
69
+ loading_exception(e, raise_exception)
70
+ end
71
+ end
72
+
73
+ def set_update_strategy(method_sym)
74
+ @update_method = method(method_sym)
75
+ self
76
+ end
77
+
78
+ protected
79
+
80
+ def initialize_for_file_type
81
+ # sub class specific initializer
82
+ end
83
+
84
+ def read_file_into_dictionary
85
+ # defined in csv_file, yaml_file and json_file
86
+ end
87
+
88
+ private
89
+
90
+ def start_scheduler
91
+ @scheduler = Rufus::Scheduler.new
92
+ @scheduler.interval("#{@refresh_interval}s", :overlap => false) do
93
+ reload_dictionary
94
+ end
95
+ end
96
+
97
+ def merge_dictionary
98
+ @write_lock.lock
99
+ begin
100
+ read_file_into_dictionary
101
+ @fetch_strategy.dictionary_updated
102
+ ensure
103
+ @write_lock.unlock
104
+ end
105
+ end
106
+
107
+ def replace_dictionary
108
+ @write_lock.lock
109
+ begin
110
+ @dictionary.clear
111
+ read_file_into_dictionary
112
+ @fetch_strategy.dictionary_updated
113
+ ensure
114
+ @write_lock.unlock
115
+ end
116
+ end
117
+
118
+ def reload_dictionary
119
+ return if @stopping.true?
120
+ if @short_refresh
121
+ load_dictionary if needs_refresh?
122
+ else
123
+ load_dictionary
124
+ end
125
+ end
126
+
127
+ def needs_refresh?
128
+ @dictionary_mtime != ::File.mtime(@dictionary_path).to_f
129
+ end
130
+
131
+ def loading_exception(e, raise_exception)
132
+ msg = "Translate: #{e.message} when loading dictionary file at #{@dictionary_path}"
133
+ if raise_exception
134
+ raise DictionaryFileError.new(msg)
135
+ else
136
+ @logger.warn("#{msg}, continuing with old dictionary", :dictionary_path => @dictionary_path)
137
+ end
138
+ end
139
+ end
140
+ end end end
@@ -0,0 +1,87 @@
1
+ # encoding: utf-8
2
+ require "json"
3
+
4
+ module LogStash module Filters module Dictionary
5
+ class JsonFile < File
6
+
7
+ protected
8
+
9
+ def initialize_for_file_type
10
+ end
11
+
12
+ def read_file_into_dictionary
13
+ content = IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
14
+ @dictionary.update(LogStash::Json.load(content)) unless content.nil? || content.empty?
15
+ end
16
+ end
17
+ end end end
18
+
19
+ __END__
20
+ Preserving the text below for near term prosperity...
21
+
22
+ I tried hard to find a stream parsing solution with JrJackson and sc_load
23
+ but it was no faster than the above code.
24
+ The idea is for each line to be read into the streaming parse that will update
25
+ the @dictionary as each key/value is found.
26
+ It will be lower on memory consumption because the JSON string is not read into memory
27
+ and then a Ruby Hash created and merged into @dictionary.
28
+ I decided to trade speed for memory. Side Note, it seems that
29
+ the json gem has become quite speedy lately.
30
+
31
+ e.g.
32
+ require_relative 'json_handler'
33
+ ...
34
+ def initialize_for_file_type
35
+ @handler = JsonHandler.new(@dictionary)
36
+ end
37
+
38
+ def read_file_into_dictionary
39
+ ::File.open(@dictionary_path, "r:bom|utf-8") do |io|
40
+ JrJackson::Json.sc_load(@handler, io, {raw: true})
41
+ end
42
+ end
43
+ ...
44
+ where JsonHandler is:
45
+
46
+ require 'jrjackson'
47
+
48
+ module LogStash module Filters module Dictionary
49
+ class JsonHandler
50
+ def initialize(dictionary)
51
+ @dictionary = dictionary
52
+ @map_depth = 0
53
+ end
54
+
55
+ def hash_start()
56
+ @map_depth = @map_depth.succ
57
+ @map_depth == 1 ? @dictionary : {}
58
+ end
59
+
60
+ def hash_end()
61
+ @map_depth = @map_depth.pred
62
+ end
63
+
64
+ def hash_key(key)
65
+ key
66
+ end
67
+
68
+ def array_start()
69
+ []
70
+ end
71
+
72
+ def array_end()
73
+ end
74
+
75
+ def add_value(value)
76
+ # @result = value
77
+ end
78
+
79
+ def hash_set(h, key, value)
80
+ h[key] = value
81
+ end
82
+
83
+ def array_append(a, value)
84
+ a.push(value)
85
+ end
86
+ end
87
+ end end end
@@ -0,0 +1,31 @@
1
+ # encoding: utf-8
2
+ require "logstash/filters/fetch_strategy/memory"
3
+
4
+ module LogStash module Filters module Dictionary
5
+ class Memory
6
+
7
+ attr_reader :dictionary, :fetch_strategy
8
+
9
+ def initialize(hash, exact, regex)
10
+ if exact
11
+ @fetch_strategy = regex ? FetchStrategy::Memory::ExactRegex.new(hash) : FetchStrategy::Memory::Exact.new(hash)
12
+ else
13
+ @fetch_strategy = FetchStrategy::Memory::RegexUnion.new(hash)
14
+ end
15
+ end
16
+
17
+ def stop_scheduler
18
+ # noop
19
+ end
20
+
21
+ private
22
+
23
+ def needs_refresh?
24
+ false
25
+ end
26
+
27
+ def load_dictionary(raise_exception=false)
28
+ # noop
29
+ end
30
+ end
31
+ end end end
@@ -0,0 +1,24 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative "yaml_visitor"
4
+
5
+ module LogStash module Filters module Dictionary
6
+ class YamlFile < File
7
+
8
+ protected
9
+
10
+ def initialize_for_file_type
11
+ @visitor = YamlVisitor.create
12
+ end
13
+
14
+ def read_file_into_dictionary
15
+ # low level YAML read that tries to create as
16
+ # few intermediate objects as possible
17
+ # this overwrites the value at key
18
+ @visitor.accept_with_dictionary(
19
+ @dictionary, Psych.parse_stream(
20
+ IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
21
+ ))
22
+ end
23
+ end
24
+ end end end
@@ -0,0 +1,42 @@
1
+ # encoding: utf-8
2
+
3
+ require 'psych/visitors/to_ruby'
4
+ require 'psych/exception'
5
+
6
+ unless defined?(Regexp::NOENCODING)
7
+ Regexp::NOENCODING = 32
8
+ end
9
+
10
+ module LogStash module Filters module Dictionary
11
+ class YamlVisitor < Psych::Visitors::ToRuby
12
+
13
+ TAG_MAP_TABLE = Hash.new(false)
14
+ TAG_MAP_TABLE[nil] = true
15
+ TAG_MAP_TABLE["tag:yaml.org,2002:map"] = true
16
+ TAG_MAP_TABLE["tag:yaml.org,2002:omap"] = true
17
+
18
+ def accept_with_dictionary(dictionary, target)
19
+ @dictionary = dictionary
20
+ @map_depth = 0
21
+ accept(target)
22
+ end
23
+
24
+ def visit_Psych_Nodes_Mapping(o)
25
+ if Psych.load_tags[o.tag]
26
+ return revive(resolve_class(Psych.load_tags[o.tag]), o)
27
+ end
28
+
29
+ target_hash = @map_depth == 0 ? @dictionary : {}
30
+ @map_depth = @map_depth.succ
31
+
32
+ if TAG_MAP_TABLE[o.tag]
33
+ result = revive_hash(register(o, target_hash), o)
34
+ else
35
+ result = super(o)
36
+ end
37
+
38
+ @map_depth = @map_depth.pred
39
+ result
40
+ end
41
+ end
42
+ end end end
@@ -0,0 +1,81 @@
1
+ # encoding: utf-8
2
+
3
+ module LogStash module Filters module FetchStrategy module File
4
+ class Exact
5
+ def initialize(dictionary, rw_lock)
6
+ @dictionary = dictionary
7
+ @read_lock = rw_lock.readLock
8
+ end
9
+
10
+ def dictionary_updated
11
+ end
12
+
13
+ def fetch(source, results)
14
+ @read_lock.lock
15
+ begin
16
+ if @dictionary.include?(source)
17
+ results[1] = LogStash::Util.deep_clone(@dictionary[source])
18
+ else
19
+ results[0] = false
20
+ end
21
+ ensure
22
+ @read_lock.unlock
23
+ end
24
+ end
25
+ end
26
+
27
+ class ExactRegex
28
+ def initialize(dictionary, rw_lock)
29
+ @keys_regex = Hash.new()
30
+ @dictionary = dictionary
31
+ @read_lock = rw_lock.readLock
32
+ end
33
+
34
+ def dictionary_updated
35
+ @keys_regex.clear
36
+ # rebuilding the regex map is time expensive
37
+ # 100 000 keys takes 0.5 seconds on a high spec Macbook Pro
38
+ # at least we are not doing it for every event like before
39
+ @dictionary.keys.each{|k| @keys_regex[k] = Regexp.new(k)}
40
+ end
41
+
42
+ def fetch(source, results)
43
+ @read_lock.lock
44
+ begin
45
+ key = @dictionary.keys.detect{|k| source.match(@keys_regex[k])}
46
+ if key.nil?
47
+ results[0] = false
48
+ else
49
+ results[1] = LogStash::Util.deep_clone(@dictionary[key])
50
+ end
51
+ ensure
52
+ @read_lock.unlock
53
+ end
54
+ end
55
+ end
56
+
57
+ class RegexUnion
58
+ def initialize(dictionary, rw_lock)
59
+ @dictionary = dictionary
60
+ @read_lock = rw_lock.readLock
61
+ end
62
+
63
+ def dictionary_updated
64
+ @union_regex_keys = Regexp.union(@dictionary.keys)
65
+ end
66
+
67
+ def fetch(source, results)
68
+ @read_lock.lock
69
+ begin
70
+ value = source.gsub(@union_regex_keys, @dictionary)
71
+ if source == value
72
+ results[0] = false
73
+ else
74
+ results[1] = LogStash::Util.deep_clone(value)
75
+ end
76
+ ensure
77
+ @read_lock.unlock
78
+ end
79
+ end
80
+ end
81
+ end end end end
@@ -0,0 +1,52 @@
1
+ # encoding: utf-8
2
+
3
+ module LogStash module Filters module FetchStrategy module Memory
4
+ class Exact
5
+ def initialize(dictionary)
6
+ @dictionary = dictionary
7
+ end
8
+
9
+ def fetch(source, results)
10
+ if @dictionary.include?(source)
11
+ results[1] = LogStash::Util.deep_clone(@dictionary[source])
12
+ else
13
+ results[0] = false
14
+ end
15
+ end
16
+ end
17
+
18
+ class ExactRegex
19
+ def initialize(dictionary)
20
+ @keys_regex = Hash.new()
21
+ @dictionary = dictionary
22
+ @dictionary.keys.each{|k| @keys_regex[k] = Regexp.new(k)}
23
+ end
24
+
25
+ def fetch(source, results)
26
+ key = @dictionary.keys.detect{|k| source.match(@keys_regex[k])}
27
+ if key.nil?
28
+ results[0] = false
29
+ else
30
+ results[1] = LogStash::Util.deep_clone(@dictionary[key])
31
+ end
32
+ end
33
+ end
34
+
35
+ class RegexUnion
36
+ def initialize(dictionary)
37
+ @dictionary = dictionary
38
+ @union_regex_keys = Regexp.union(@dictionary.keys)
39
+ end
40
+
41
+ def fetch(source, results)
42
+ value = source.gsub(@union_regex_keys, @dictionary)
43
+ if source == value
44
+ results[0] = false
45
+ else
46
+ results[1] = LogStash::Util.deep_clone(value)
47
+ end
48
+ end
49
+ end
50
+ end end end end
51
+
52
+