logstash-filter-translate 3.1.0 → 3.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,44 @@
1
+ # encoding: utf-8
2
+
3
+ module LogStash module Filters
4
+ class ArrayOfMapsValueUpdate
5
+ def initialize(iterate_on, field, destination, fallback, lookup)
6
+ @iterate_on = ensure_reference_format(iterate_on)
7
+ @field = ensure_reference_format(field)
8
+ @destination = ensure_reference_format(destination)
9
+ @fallback = fallback
10
+ @use_fallback = !fallback.nil? # fallback is not nil, the user set a value in the config
11
+ @lookup = lookup
12
+ end
13
+
14
+ def test_for_inclusion(event, override)
15
+ event.include?(@iterate_on)
16
+ end
17
+
18
+ def update(event)
19
+ val = event.get(@iterate_on) # should be an array of hashes
20
+ source = Array(val)
21
+ matches = Array.new(source.size)
22
+ source.size.times do |index|
23
+ nested_field = "#{@iterate_on}[#{index}]#{@field}"
24
+ nested_destination = "#{@iterate_on}[#{index}]#{@destination}"
25
+ inner = event.get(nested_field)
26
+ next if inner.nil?
27
+ matched = [true, nil]
28
+ @lookup.fetch_strategy.fetch(inner.to_s, matched)
29
+ if matched.first
30
+ event.set(nested_destination, matched.last)
31
+ matches[index] = true
32
+ elsif @use_fallback
33
+ event.set(nested_destination, event.sprintf(@fallback))
34
+ matches[index] = true
35
+ end
36
+ end
37
+ return matches.any?
38
+ end
39
+
40
+ def ensure_reference_format(field)
41
+ field.start_with?("[") && field.end_with?("]") ? field : "[#{field}]"
42
+ end
43
+ end
44
+ end end
@@ -0,0 +1,47 @@
1
+ # encoding: utf-8
2
+
3
+ module LogStash module Filters
4
+ class ArrayOfValuesUpdate
5
+ class CoerceArray
6
+ def call(source) source; end
7
+ end
8
+ class CoerceOther
9
+ def call(source) Array(source); end
10
+ end
11
+
12
+ def initialize(iterate_on, destination, fallback, lookup)
13
+ @iterate_on = iterate_on
14
+ @destination = destination
15
+ @fallback = fallback
16
+ @use_fallback = !fallback.nil? # fallback is not nil, the user set a value in the config
17
+ @lookup = lookup
18
+ @coercers_table = {}
19
+ @coercers_table.default = CoerceOther.new
20
+ @coercers_table[Array] = CoerceArray.new
21
+ end
22
+
23
+ def test_for_inclusion(event, override)
24
+ # Skip translation in case @destination iterate_on already exists and @override is disabled.
25
+ return false if !override && event.include?(@destination)
26
+ event.include?(@iterate_on)
27
+ end
28
+
29
+ def update(event)
30
+ val = event.get(@iterate_on)
31
+ source = @coercers_table[val.class].call(val)
32
+ target = Array.new(source.size)
33
+ if @use_fallback
34
+ target.fill(event.sprintf(@fallback))
35
+ end
36
+ source.each_with_index do |inner, index|
37
+ matched = [true, nil]
38
+ @lookup.fetch_strategy.fetch(inner.to_s, matched)
39
+ if matched.first
40
+ target[index] = matched.last
41
+ end
42
+ end
43
+ event.set(@destination, target)
44
+ return target.any?
45
+ end
46
+ end
47
+ end end
@@ -0,0 +1,25 @@
1
+ # encoding: utf-8
2
+ require "csv"
3
+
4
+ module LogStash module Filters module Dictionary
5
+ class CsvFile < File
6
+
7
+ protected
8
+
9
+ def initialize_for_file_type
10
+ @io = StringIO.new("")
11
+ @csv = ::CSV.new(@io)
12
+ end
13
+
14
+ def read_file_into_dictionary
15
+ # low level CSV read that tries to create as
16
+ # few intermediate objects as possible
17
+ # this overwrites the value at key
18
+ IO.foreach(@dictionary_path, :mode => 'r:bom|utf-8') do |line|
19
+ @io.string = line
20
+ k,v = @csv.shift
21
+ @dictionary[k] = v
22
+ end
23
+ end
24
+ end
25
+ end end end
@@ -0,0 +1,143 @@
1
+ # encoding: utf-8
2
+ require 'concurrent/atomic/atomic_boolean'
3
+ require 'rufus-scheduler'
4
+ require "logstash/util/loggable"
5
+ require "logstash/filters/fetch_strategy/file"
6
+
7
+ java_import 'java.util.concurrent.locks.ReentrantReadWriteLock'
8
+
9
+ module LogStash module Filters module Dictionary
10
+ class DictionaryFileError < StandardError; end
11
+
12
+ class File
13
+ def self.create(path, refresh_interval, refresh_behaviour, exact, regex)
14
+ if /\.y[a]?ml$/.match(path)
15
+ instance = YamlFile.new(path, refresh_interval, exact, regex)
16
+ elsif path.end_with?(".json")
17
+ instance = JsonFile.new(path, refresh_interval, exact, regex)
18
+ elsif path.end_with?(".csv")
19
+ instance = CsvFile.new(path, refresh_interval, exact, regex)
20
+ else
21
+ raise "Translate: Dictionary #{path} has a non valid format"
22
+ end
23
+ if refresh_behaviour == 'merge'
24
+ instance.set_update_strategy(:merge_dictionary)
25
+ elsif refresh_behaviour == 'replace'
26
+ instance.set_update_strategy(:replace_dictionary)
27
+ else
28
+ # we really should never get here
29
+ raise(LogStash::ConfigurationError, "Unknown value for refresh_behaviour=#{refresh_behaviour.to_s}")
30
+ end
31
+ end
32
+
33
+ include LogStash::Util::Loggable
34
+ attr_reader :dictionary, :fetch_strategy
35
+
36
+ def initialize(path, refresh_interval, exact, regex)
37
+ @dictionary_path = path
38
+ @refresh_interval = refresh_interval
39
+ @short_refresh = @refresh_interval <= 300
40
+ @stopping = Concurrent::AtomicBoolean.new # ported from jdbc_static, need a way to prevent a scheduled execution from running a load.
41
+ rw_lock = java.util.concurrent.locks.ReentrantReadWriteLock.new
42
+ @write_lock = rw_lock.writeLock
43
+ @dictionary = Hash.new
44
+ @update_method = method(:merge_dictionary)
45
+ initialize_for_file_type
46
+ args = [@dictionary, rw_lock]
47
+ klass = case
48
+ when exact && regex then FetchStrategy::File::ExactRegex
49
+ when exact then FetchStrategy::File::Exact
50
+ else FetchStrategy::File::RegexUnion
51
+ end
52
+ @fetch_strategy = klass.new(*args)
53
+ load_dictionary(raise_exception = true)
54
+ stop_scheduler(initial = true)
55
+ start_scheduler unless @refresh_interval <= 0 # disabled, a scheduler interval of zero makes no sense
56
+ end
57
+
58
+ def stop_scheduler(initial = false)
59
+ @stopping.make_true unless initial
60
+ @scheduler.shutdown(:wait) if @scheduler
61
+ end
62
+
63
+ def load_dictionary(raise_exception=false)
64
+ begin
65
+ @dictionary_mtime = ::File.mtime(@dictionary_path).to_f
66
+ @update_method.call
67
+ rescue Errno::ENOENT
68
+ @logger.warn("dictionary file read failure, continuing with old dictionary", :path => @dictionary_path)
69
+ rescue => e
70
+ loading_exception(e, raise_exception)
71
+ end
72
+ end
73
+
74
+ def set_update_strategy(method_sym)
75
+ @update_method = method(method_sym)
76
+ self
77
+ end
78
+
79
+ protected
80
+
81
+ def initialize_for_file_type
82
+ # sub class specific initializer
83
+ end
84
+
85
+ def read_file_into_dictionary
86
+ # defined in csv_file, yaml_file and json_file
87
+ end
88
+
89
+ private
90
+
91
+ def start_scheduler
92
+ @scheduler = Rufus::Scheduler.new
93
+ @scheduler.interval("#{@refresh_interval}s", :overlap => false) do
94
+ reload_dictionary
95
+ end
96
+ end
97
+
98
+ def merge_dictionary
99
+ @write_lock.lock
100
+ begin
101
+ read_file_into_dictionary
102
+ @fetch_strategy.dictionary_updated
103
+ ensure
104
+ @write_lock.unlock
105
+ end
106
+ end
107
+
108
+ def replace_dictionary
109
+ @write_lock.lock
110
+ begin
111
+ @dictionary.clear
112
+ read_file_into_dictionary
113
+ @fetch_strategy.dictionary_updated
114
+ ensure
115
+ @write_lock.unlock
116
+ end
117
+ end
118
+
119
+ def reload_dictionary
120
+ return if @stopping.true?
121
+ if @short_refresh
122
+ load_dictionary if needs_refresh?
123
+ else
124
+ load_dictionary
125
+ end
126
+ end
127
+
128
+ def needs_refresh?
129
+ @dictionary_mtime != ::File.mtime(@dictionary_path).to_f
130
+ end
131
+
132
+ def loading_exception(e, raise_exception)
133
+ msg = "Translate: #{e.message} when loading dictionary file at #{@dictionary_path}"
134
+ if raise_exception
135
+ dfe = DictionaryFileError.new(msg)
136
+ dfe.set_backtrace(e.backtrace)
137
+ raise dfe
138
+ else
139
+ @logger.warn("#{msg}, continuing with old dictionary", :dictionary_path => @dictionary_path)
140
+ end
141
+ end
142
+ end
143
+ end end end
@@ -0,0 +1,87 @@
1
+ # encoding: utf-8
2
+ require "json"
3
+
4
+ module LogStash module Filters module Dictionary
5
+ class JsonFile < File
6
+
7
+ protected
8
+
9
+ def initialize_for_file_type
10
+ end
11
+
12
+ def read_file_into_dictionary
13
+ content = IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
14
+ @dictionary.update(LogStash::Json.load(content)) unless content.nil? || content.empty?
15
+ end
16
+ end
17
+ end end end
18
+
19
+ __END__
20
+ Preserving the text below for near term prosperity...
21
+
22
+ I tried hard to find a stream parsing solution with JrJackson and sc_load
23
+ but it was no faster than the above code.
24
+ The idea is for each line to be read into the streaming parse that will update
25
+ the @dictionary as each key/value is found.
26
+ It will be lower on memory consumption because the JSON string is not read into memory
27
+ and then a Ruby Hash created and merged into @dictionary.
28
+ I decided to trade speed for memory. Side Note, it seems that
29
+ the json gem has become quite speedy lately.
30
+
31
+ e.g.
32
+ require_relative 'json_handler'
33
+ ...
34
+ def initialize_for_file_type
35
+ @handler = JsonHandler.new(@dictionary)
36
+ end
37
+
38
+ def read_file_into_dictionary
39
+ ::File.open(@dictionary_path, "r:bom|utf-8") do |io|
40
+ JrJackson::Json.sc_load(@handler, io, {raw: true})
41
+ end
42
+ end
43
+ ...
44
+ where JsonHandler is:
45
+
46
+ require 'jrjackson'
47
+
48
+ module LogStash module Filters module Dictionary
49
+ class JsonHandler
50
+ def initialize(dictionary)
51
+ @dictionary = dictionary
52
+ @map_depth = 0
53
+ end
54
+
55
+ def hash_start()
56
+ @map_depth = @map_depth.succ
57
+ @map_depth == 1 ? @dictionary : {}
58
+ end
59
+
60
+ def hash_end()
61
+ @map_depth = @map_depth.pred
62
+ end
63
+
64
+ def hash_key(key)
65
+ key
66
+ end
67
+
68
+ def array_start()
69
+ []
70
+ end
71
+
72
+ def array_end()
73
+ end
74
+
75
+ def add_value(value)
76
+ # @result = value
77
+ end
78
+
79
+ def hash_set(h, key, value)
80
+ h[key] = value
81
+ end
82
+
83
+ def array_append(a, value)
84
+ a.push(value)
85
+ end
86
+ end
87
+ end end end
@@ -0,0 +1,32 @@
1
+ # encoding: utf-8
2
+ require "logstash/filters/fetch_strategy/memory"
3
+
4
+ module LogStash module Filters module Dictionary
5
+ class Memory
6
+
7
+ attr_reader :dictionary, :fetch_strategy
8
+
9
+ def initialize(hash, exact, regex)
10
+ klass = case
11
+ when exact && regex then FetchStrategy::Memory::ExactRegex
12
+ when exact then FetchStrategy::Memory::Exact
13
+ else FetchStrategy::Memory::RegexUnion
14
+ end
15
+ @fetch_strategy = klass.new(hash)
16
+ end
17
+
18
+ def stop_scheduler
19
+ # noop
20
+ end
21
+
22
+ private
23
+
24
+ def needs_refresh?
25
+ false
26
+ end
27
+
28
+ def load_dictionary(raise_exception=false)
29
+ # noop
30
+ end
31
+ end
32
+ end end end
@@ -0,0 +1,24 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative "yaml_visitor"
4
+
5
+ module LogStash module Filters module Dictionary
6
+ class YamlFile < File
7
+
8
+ protected
9
+
10
+ def initialize_for_file_type
11
+ @visitor = YamlVisitor.create
12
+ end
13
+
14
+ def read_file_into_dictionary
15
+ # low level YAML read that tries to create as
16
+ # few intermediate objects as possible
17
+ # this overwrites the value at key
18
+ @visitor.accept_with_dictionary(
19
+ @dictionary, Psych.parse_stream(
20
+ IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
21
+ ))
22
+ end
23
+ end
24
+ end end end
@@ -0,0 +1,42 @@
1
+ # encoding: utf-8
2
+
3
+ require 'psych/visitors/to_ruby'
4
+ require 'psych/exception'
5
+
6
+ unless defined?(Regexp::NOENCODING)
7
+ Regexp::NOENCODING = 32
8
+ end
9
+
10
+ module LogStash module Filters module Dictionary
11
+ class YamlVisitor < Psych::Visitors::ToRuby
12
+
13
+ TAG_MAP_TABLE = Hash.new(false)
14
+ TAG_MAP_TABLE[nil] = true
15
+ TAG_MAP_TABLE["tag:yaml.org,2002:map"] = true
16
+ TAG_MAP_TABLE["tag:yaml.org,2002:omap"] = true
17
+
18
+ def accept_with_dictionary(dictionary, target)
19
+ @dictionary = dictionary
20
+ @map_depth = 0
21
+ accept(target)
22
+ end
23
+
24
+ def visit_Psych_Nodes_Mapping(o)
25
+ if Psych.load_tags[o.tag]
26
+ return revive(resolve_class(Psych.load_tags[o.tag]), o)
27
+ end
28
+
29
+ target_hash = @map_depth == 0 ? @dictionary : {}
30
+ @map_depth = @map_depth.succ
31
+
32
+ if TAG_MAP_TABLE[o.tag]
33
+ result = revive_hash(register(o, target_hash), o)
34
+ else
35
+ result = super(o)
36
+ end
37
+
38
+ @map_depth = @map_depth.pred
39
+ result
40
+ end
41
+ end
42
+ end end end