logstash-filter-translate 3.1.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,44 @@
1
+ # encoding: utf-8
2
+
3
+ module LogStash module Filters
4
+ class ArrayOfMapsValueUpdate
5
+ def initialize(iterate_on, field, destination, fallback, lookup)
6
+ @iterate_on = ensure_reference_format(iterate_on)
7
+ @field = ensure_reference_format(field)
8
+ @destination = ensure_reference_format(destination)
9
+ @fallback = fallback
10
+ @use_fallback = !fallback.nil? # fallback is not nil, the user set a value in the config
11
+ @lookup = lookup
12
+ end
13
+
14
+ def test_for_inclusion(event, override)
15
+ event.include?(@iterate_on)
16
+ end
17
+
18
+ def update(event)
19
+ val = event.get(@iterate_on) # should be an array of hashes
20
+ source = Array(val)
21
+ matches = Array.new(source.size)
22
+ source.size.times do |index|
23
+ nested_field = "#{@iterate_on}[#{index}]#{@field}"
24
+ nested_destination = "#{@iterate_on}[#{index}]#{@destination}"
25
+ inner = event.get(nested_field)
26
+ next if inner.nil?
27
+ matched = [true, nil]
28
+ @lookup.fetch_strategy.fetch(inner.to_s, matched)
29
+ if matched.first
30
+ event.set(nested_destination, matched.last)
31
+ matches[index] = true
32
+ elsif @use_fallback
33
+ event.set(nested_destination, event.sprintf(@fallback))
34
+ matches[index] = true
35
+ end
36
+ end
37
+ return matches.any?
38
+ end
39
+
40
+ def ensure_reference_format(field)
41
+ field.start_with?("[") && field.end_with?("]") ? field : "[#{field}]"
42
+ end
43
+ end
44
+ end end
@@ -0,0 +1,47 @@
1
+ # encoding: utf-8
2
+
3
+ module LogStash module Filters
4
+ class ArrayOfValuesUpdate
5
+ class CoerceArray
6
+ def call(source) source; end
7
+ end
8
+ class CoerceOther
9
+ def call(source) Array(source); end
10
+ end
11
+
12
+ def initialize(iterate_on, destination, fallback, lookup)
13
+ @iterate_on = iterate_on
14
+ @destination = destination
15
+ @fallback = fallback
16
+ @use_fallback = !fallback.nil? # fallback is not nil, the user set a value in the config
17
+ @lookup = lookup
18
+ @coercers_table = {}
19
+ @coercers_table.default = CoerceOther.new
20
+ @coercers_table[Array] = CoerceArray.new
21
+ end
22
+
23
+ def test_for_inclusion(event, override)
24
+ # Skip translation in case @destination iterate_on already exists and @override is disabled.
25
+ return false if !override && event.include?(@destination)
26
+ event.include?(@iterate_on)
27
+ end
28
+
29
+ def update(event)
30
+ val = event.get(@iterate_on)
31
+ source = @coercers_table[val.class].call(val)
32
+ target = Array.new(source.size)
33
+ if @use_fallback
34
+ target.fill(event.sprintf(@fallback))
35
+ end
36
+ source.each_with_index do |inner, index|
37
+ matched = [true, nil]
38
+ @lookup.fetch_strategy.fetch(inner.to_s, matched)
39
+ if matched.first
40
+ target[index] = matched.last
41
+ end
42
+ end
43
+ event.set(@destination, target)
44
+ return target.any?
45
+ end
46
+ end
47
+ end end
@@ -0,0 +1,25 @@
1
+ # encoding: utf-8
2
+ require "csv"
3
+
4
+ module LogStash module Filters module Dictionary
5
+ class CsvFile < File
6
+
7
+ protected
8
+
9
+ def initialize_for_file_type
10
+ @io = StringIO.new("")
11
+ @csv = ::CSV.new(@io)
12
+ end
13
+
14
+ def read_file_into_dictionary
15
+ # low level CSV read that tries to create as
16
+ # few intermediate objects as possible
17
+ # this overwrites the value at key
18
+ IO.foreach(@dictionary_path, :mode => 'r:bom|utf-8') do |line|
19
+ @io.string = line
20
+ k,v = @csv.shift
21
+ @dictionary[k] = v
22
+ end
23
+ end
24
+ end
25
+ end end end
@@ -0,0 +1,143 @@
1
+ # encoding: utf-8
2
+ require 'concurrent/atomic/atomic_boolean'
3
+ require 'rufus-scheduler'
4
+ require "logstash/util/loggable"
5
+ require "logstash/filters/fetch_strategy/file"
6
+
7
+ java_import 'java.util.concurrent.locks.ReentrantReadWriteLock'
8
+
9
+ module LogStash module Filters module Dictionary
10
+ class DictionaryFileError < StandardError; end
11
+
12
+ class File
13
+ def self.create(path, refresh_interval, refresh_behaviour, exact, regex)
14
+ if /\.y[a]?ml$/.match(path)
15
+ instance = YamlFile.new(path, refresh_interval, exact, regex)
16
+ elsif path.end_with?(".json")
17
+ instance = JsonFile.new(path, refresh_interval, exact, regex)
18
+ elsif path.end_with?(".csv")
19
+ instance = CsvFile.new(path, refresh_interval, exact, regex)
20
+ else
21
+ raise "Translate: Dictionary #{path} has a non valid format"
22
+ end
23
+ if refresh_behaviour == 'merge'
24
+ instance.set_update_strategy(:merge_dictionary)
25
+ elsif refresh_behaviour == 'replace'
26
+ instance.set_update_strategy(:replace_dictionary)
27
+ else
28
+ # we really should never get here
29
+ raise(LogStash::ConfigurationError, "Unknown value for refresh_behaviour=#{refresh_behaviour.to_s}")
30
+ end
31
+ end
32
+
33
+ include LogStash::Util::Loggable
34
+ attr_reader :dictionary, :fetch_strategy
35
+
36
+ def initialize(path, refresh_interval, exact, regex)
37
+ @dictionary_path = path
38
+ @refresh_interval = refresh_interval
39
+ @short_refresh = @refresh_interval <= 300
40
+ @stopping = Concurrent::AtomicBoolean.new # ported from jdbc_static, need a way to prevent a scheduled execution from running a load.
41
+ rw_lock = java.util.concurrent.locks.ReentrantReadWriteLock.new
42
+ @write_lock = rw_lock.writeLock
43
+ @dictionary = Hash.new
44
+ @update_method = method(:merge_dictionary)
45
+ initialize_for_file_type
46
+ args = [@dictionary, rw_lock]
47
+ klass = case
48
+ when exact && regex then FetchStrategy::File::ExactRegex
49
+ when exact then FetchStrategy::File::Exact
50
+ else FetchStrategy::File::RegexUnion
51
+ end
52
+ @fetch_strategy = klass.new(*args)
53
+ load_dictionary(raise_exception = true)
54
+ stop_scheduler(initial = true)
55
+ start_scheduler unless @refresh_interval <= 0 # disabled, a scheduler interval of zero makes no sense
56
+ end
57
+
58
+ def stop_scheduler(initial = false)
59
+ @stopping.make_true unless initial
60
+ @scheduler.shutdown(:wait) if @scheduler
61
+ end
62
+
63
+ def load_dictionary(raise_exception=false)
64
+ begin
65
+ @dictionary_mtime = ::File.mtime(@dictionary_path).to_f
66
+ @update_method.call
67
+ rescue Errno::ENOENT
68
+ @logger.warn("dictionary file read failure, continuing with old dictionary", :path => @dictionary_path)
69
+ rescue => e
70
+ loading_exception(e, raise_exception)
71
+ end
72
+ end
73
+
74
+ def set_update_strategy(method_sym)
75
+ @update_method = method(method_sym)
76
+ self
77
+ end
78
+
79
+ protected
80
+
81
+ def initialize_for_file_type
82
+ # sub class specific initializer
83
+ end
84
+
85
+ def read_file_into_dictionary
86
+ # defined in csv_file, yaml_file and json_file
87
+ end
88
+
89
+ private
90
+
91
+ def start_scheduler
92
+ @scheduler = Rufus::Scheduler.new
93
+ @scheduler.interval("#{@refresh_interval}s", :overlap => false) do
94
+ reload_dictionary
95
+ end
96
+ end
97
+
98
+ def merge_dictionary
99
+ @write_lock.lock
100
+ begin
101
+ read_file_into_dictionary
102
+ @fetch_strategy.dictionary_updated
103
+ ensure
104
+ @write_lock.unlock
105
+ end
106
+ end
107
+
108
+ def replace_dictionary
109
+ @write_lock.lock
110
+ begin
111
+ @dictionary.clear
112
+ read_file_into_dictionary
113
+ @fetch_strategy.dictionary_updated
114
+ ensure
115
+ @write_lock.unlock
116
+ end
117
+ end
118
+
119
+ def reload_dictionary
120
+ return if @stopping.true?
121
+ if @short_refresh
122
+ load_dictionary if needs_refresh?
123
+ else
124
+ load_dictionary
125
+ end
126
+ end
127
+
128
+ def needs_refresh?
129
+ @dictionary_mtime != ::File.mtime(@dictionary_path).to_f
130
+ end
131
+
132
+ def loading_exception(e, raise_exception)
133
+ msg = "Translate: #{e.message} when loading dictionary file at #{@dictionary_path}"
134
+ if raise_exception
135
+ dfe = DictionaryFileError.new(msg)
136
+ dfe.set_backtrace(e.backtrace)
137
+ raise dfe
138
+ else
139
+ @logger.warn("#{msg}, continuing with old dictionary", :dictionary_path => @dictionary_path)
140
+ end
141
+ end
142
+ end
143
+ end end end
@@ -0,0 +1,87 @@
1
+ # encoding: utf-8
2
+ require "json"
3
+
4
+ module LogStash module Filters module Dictionary
5
+ class JsonFile < File
6
+
7
+ protected
8
+
9
+ def initialize_for_file_type
10
+ end
11
+
12
+ def read_file_into_dictionary
13
+ content = IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
14
+ @dictionary.update(LogStash::Json.load(content)) unless content.nil? || content.empty?
15
+ end
16
+ end
17
+ end end end
18
+
19
+ __END__
20
+ Preserving the text below for near term prosperity...
21
+
22
+ I tried hard to find a stream parsing solution with JrJackson and sc_load
23
+ but it was no faster than the above code.
24
+ The idea is for each line to be read into the streaming parse that will update
25
+ the @dictionary as each key/value is found.
26
+ It will be lower on memory consumption because the JSON string is not read into memory
27
+ and then a Ruby Hash created and merged into @dictionary.
28
+ I decided to trade speed for memory. Side Note, it seems that
29
+ the json gem has become quite speedy lately.
30
+
31
+ e.g.
32
+ require_relative 'json_handler'
33
+ ...
34
+ def initialize_for_file_type
35
+ @handler = JsonHandler.new(@dictionary)
36
+ end
37
+
38
+ def read_file_into_dictionary
39
+ ::File.open(@dictionary_path, "r:bom|utf-8") do |io|
40
+ JrJackson::Json.sc_load(@handler, io, {raw: true})
41
+ end
42
+ end
43
+ ...
44
+ where JsonHandler is:
45
+
46
+ require 'jrjackson'
47
+
48
+ module LogStash module Filters module Dictionary
49
+ class JsonHandler
50
+ def initialize(dictionary)
51
+ @dictionary = dictionary
52
+ @map_depth = 0
53
+ end
54
+
55
+ def hash_start()
56
+ @map_depth = @map_depth.succ
57
+ @map_depth == 1 ? @dictionary : {}
58
+ end
59
+
60
+ def hash_end()
61
+ @map_depth = @map_depth.pred
62
+ end
63
+
64
+ def hash_key(key)
65
+ key
66
+ end
67
+
68
+ def array_start()
69
+ []
70
+ end
71
+
72
+ def array_end()
73
+ end
74
+
75
+ def add_value(value)
76
+ # @result = value
77
+ end
78
+
79
+ def hash_set(h, key, value)
80
+ h[key] = value
81
+ end
82
+
83
+ def array_append(a, value)
84
+ a.push(value)
85
+ end
86
+ end
87
+ end end end
@@ -0,0 +1,32 @@
1
+ # encoding: utf-8
2
+ require "logstash/filters/fetch_strategy/memory"
3
+
4
+ module LogStash module Filters module Dictionary
5
+ class Memory
6
+
7
+ attr_reader :dictionary, :fetch_strategy
8
+
9
+ def initialize(hash, exact, regex)
10
+ klass = case
11
+ when exact && regex then FetchStrategy::Memory::ExactRegex
12
+ when exact then FetchStrategy::Memory::Exact
13
+ else FetchStrategy::Memory::RegexUnion
14
+ end
15
+ @fetch_strategy = klass.new(hash)
16
+ end
17
+
18
+ def stop_scheduler
19
+ # noop
20
+ end
21
+
22
+ private
23
+
24
+ def needs_refresh?
25
+ false
26
+ end
27
+
28
+ def load_dictionary(raise_exception=false)
29
+ # noop
30
+ end
31
+ end
32
+ end end end
@@ -0,0 +1,24 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative "yaml_visitor"
4
+
5
+ module LogStash module Filters module Dictionary
6
+ class YamlFile < File
7
+
8
+ protected
9
+
10
+ def initialize_for_file_type
11
+ @visitor = YamlVisitor.create
12
+ end
13
+
14
+ def read_file_into_dictionary
15
+ # low level YAML read that tries to create as
16
+ # few intermediate objects as possible
17
+ # this overwrites the value at key
18
+ @visitor.accept_with_dictionary(
19
+ @dictionary, Psych.parse_stream(
20
+ IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
21
+ ))
22
+ end
23
+ end
24
+ end end end
@@ -0,0 +1,42 @@
1
+ # encoding: utf-8
2
+
3
+ require 'psych/visitors/to_ruby'
4
+ require 'psych/exception'
5
+
6
+ unless defined?(Regexp::NOENCODING)
7
+ Regexp::NOENCODING = 32
8
+ end
9
+
10
+ module LogStash module Filters module Dictionary
11
+ class YamlVisitor < Psych::Visitors::ToRuby
12
+
13
+ TAG_MAP_TABLE = Hash.new(false)
14
+ TAG_MAP_TABLE[nil] = true
15
+ TAG_MAP_TABLE["tag:yaml.org,2002:map"] = true
16
+ TAG_MAP_TABLE["tag:yaml.org,2002:omap"] = true
17
+
18
+ def accept_with_dictionary(dictionary, target)
19
+ @dictionary = dictionary
20
+ @map_depth = 0
21
+ accept(target)
22
+ end
23
+
24
+ def visit_Psych_Nodes_Mapping(o)
25
+ if Psych.load_tags[o.tag]
26
+ return revive(resolve_class(Psych.load_tags[o.tag]), o)
27
+ end
28
+
29
+ target_hash = @map_depth == 0 ? @dictionary : {}
30
+ @map_depth = @map_depth.succ
31
+
32
+ if TAG_MAP_TABLE[o.tag]
33
+ result = revive_hash(register(o, target_hash), o)
34
+ else
35
+ result = super(o)
36
+ end
37
+
38
+ @map_depth = @map_depth.pred
39
+ result
40
+ end
41
+ end
42
+ end end end