logstash-codec-protobuf 1.2.8-jruby
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +45 -0
- data/CONTRIBUTORS +12 -0
- data/DEVELOPER.md +2 -0
- data/Gemfile +11 -0
- data/LICENSE +202 -0
- data/NOTICE.TXT +4 -0
- data/README.md +184 -0
- data/docs/index.asciidoc +241 -0
- data/google-protobuf-lib-update.md +57 -0
- data/lib/logstash/codecs/protobuf.rb +735 -0
- data/logstash-codec-protobuf.gemspec +28 -0
- data/spec/codecs/pb2_spec.rb +236 -0
- data/spec/codecs/pb3_decode_spec.rb +445 -0
- data/spec/codecs/pb3_encode_spec.rb +243 -0
- data/spec/helpers/pb2/ColourTestcase.pb.rb +35 -0
- data/spec/helpers/pb2/ColourTestcase.proto +24 -0
- data/spec/helpers/pb2/event.pb.rb +19 -0
- data/spec/helpers/pb2/event.proto +12 -0
- data/spec/helpers/pb2/header/header.pb.rb +16 -0
- data/spec/helpers/pb2/header/header.proto +8 -0
- data/spec/helpers/pb2/human.pb.rb +26 -0
- data/spec/helpers/pb2/unicorn.pb.rb +19 -0
- data/spec/helpers/pb2/unicorn_event.pb.rb +24 -0
- data/spec/helpers/pb3/FantasyHorse_pb.rb +44 -0
- data/spec/helpers/pb3/ProbeResult_pb.rb +26 -0
- data/spec/helpers/pb3/dnsmessage_pb.rb +82 -0
- data/spec/helpers/pb3/events.proto3 +10 -0
- data/spec/helpers/pb3/events_pb.rb +17 -0
- data/spec/helpers/pb3/header/header.proto3 +7 -0
- data/spec/helpers/pb3/header/header_pb.rb +12 -0
- data/spec/helpers/pb3/integertest_pb.rb +20 -0
- data/spec/helpers/pb3/messageA.proto3 +12 -0
- data/spec/helpers/pb3/messageA_pb.rb +16 -0
- data/spec/helpers/pb3/messageB.proto3 +12 -0
- data/spec/helpers/pb3/messageB_pb.rb +16 -0
- data/spec/helpers/pb3/rum2_pb.rb +87 -0
- data/spec/helpers/pb3/rum3_pb.rb +87 -0
- data/spec/helpers/pb3/rum_pb.rb +87 -0
- data/spec/helpers/pb3/unicorn.proto3 +31 -0
- data/spec/helpers/pb3/unicorn_pb.rb +31 -0
- metadata +177 -0
@@ -0,0 +1,735 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'logstash/codecs/base'
|
3
|
+
require 'logstash/util/charset'
|
4
|
+
require 'google/protobuf' # for protobuf3
|
5
|
+
require 'protocol_buffers' # https://github.com/codekitchen/ruby-protocol-buffers, for protobuf2
|
6
|
+
|
7
|
+
# Monkey-patch the `Google::Protobuf::DescriptorPool` with a mutex for exclusive
|
8
|
+
# access.
|
9
|
+
#
|
10
|
+
# The DescriptorPool instance is not thread-safe when loading protobuf
|
11
|
+
# definitions. This can cause unrecoverable errors when registering multiple
|
12
|
+
# concurrent pipelines that try to register the same dependency. The
|
13
|
+
# DescriptorPool instance is global to the JVM and shared among all pipelines.
|
14
|
+
class << Google::Protobuf::DescriptorPool
|
15
|
+
def with_lock
|
16
|
+
if !@mutex
|
17
|
+
@mutex = Mutex.new
|
18
|
+
end
|
19
|
+
|
20
|
+
return @mutex
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# This codec converts protobuf encoded messages into logstash events and vice versa.
|
25
|
+
#
|
26
|
+
# Requires the protobuf definitions as ruby files. You can create those using the [ruby-protoc compiler](https://github.com/codekitchen/ruby-protocol-buffers).
|
27
|
+
#
|
28
|
+
# The following shows a usage example for decoding protobuf 2 encoded events from a kafka stream:
|
29
|
+
# [source,ruby]
|
30
|
+
# kafka
|
31
|
+
# {
|
32
|
+
# zk_connect => "127.0.0.1"
|
33
|
+
# topic_id => "your_topic_goes_here"
|
34
|
+
# key_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
|
35
|
+
# value_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
|
36
|
+
# codec => protobuf
|
37
|
+
# {
|
38
|
+
# class_name => "Animal::Unicorn"
|
39
|
+
# include_path => ['/path/to/protobuf/definitions/UnicornProtobuf.pb.rb']
|
40
|
+
# }
|
41
|
+
# }
|
42
|
+
#
|
43
|
+
# Same example for protobuf 3:
|
44
|
+
# [source,ruby]
|
45
|
+
# kafka
|
46
|
+
# {
|
47
|
+
# zk_connect => "127.0.0.1"
|
48
|
+
# topic_id => "your_topic_goes_here"
|
49
|
+
# key_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
|
50
|
+
# value_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
|
51
|
+
# codec => protobuf
|
52
|
+
# {
|
53
|
+
# class_name => "Animal.Unicorn"
|
54
|
+
# include_path => ['/path/to/protobuf/definitions/UnicornProtobuf_pb.rb']
|
55
|
+
# protobuf_version => 3
|
56
|
+
# }
|
57
|
+
# }
|
58
|
+
#
|
59
|
+
# Specifically for the kafka input: please set the deserializer classes as shown above.
|
60
|
+
|
61
|
+
class LogStash::Codecs::Protobuf < LogStash::Codecs::Base
|
62
|
+
config_name 'protobuf'
|
63
|
+
|
64
|
+
# Name of the class to decode.
|
65
|
+
# If your protobuf 2 definition contains modules, prepend them to the class name with double colons like so:
|
66
|
+
# [source,ruby]
|
67
|
+
# class_name => "Animal::Horse::Unicorn"
|
68
|
+
#
|
69
|
+
# This corresponds to a protobuf definition starting as follows:
|
70
|
+
# [source,ruby]
|
71
|
+
# module Animal
|
72
|
+
# module Horse
|
73
|
+
# class Unicorn
|
74
|
+
# # here are your field definitions.
|
75
|
+
#
|
76
|
+
# For protobuf 3 separate the modules with single dots.
|
77
|
+
# [source,ruby]
|
78
|
+
# class_name => "Animal.Horse.Unicorn"
|
79
|
+
# Check the bottom of the generated protobuf ruby file. It contains lines like this:
|
80
|
+
# [source,ruby]
|
81
|
+
# Animals.Unicorn = Google::Protobuf::DescriptorPool.generated_pool.lookup("Animals.Unicorn").msgclass
|
82
|
+
# Use the parameter for the lookup call as the class_name for the codec config.
|
83
|
+
#
|
84
|
+
# If your class references other definitions: you only have to add the main class here.
|
85
|
+
config :class_name, :validate => :string, :required => true
|
86
|
+
|
87
|
+
# Relative path to the ruby file that contains class_name
|
88
|
+
#
|
89
|
+
# Relative path (from `protobuf_root_directory`) that holds the definition of the class specified in
|
90
|
+
# `class_name`.
|
91
|
+
#
|
92
|
+
# `class_file` and `include_path` cannot be used at the same time.
|
93
|
+
config :class_file, :validate => :string, :default => '', :required => false
|
94
|
+
|
95
|
+
# Absolute path to the root directory that contains all referenced/used dependencies
|
96
|
+
# of the main class (`class_name`) or any of its dependencies.
|
97
|
+
#
|
98
|
+
# For instance:
|
99
|
+
#
|
100
|
+
# pb3
|
101
|
+
# ├── header
|
102
|
+
# │ └── header_pb.rb
|
103
|
+
# ├── messageA_pb.rb
|
104
|
+
#
|
105
|
+
# In this case `messageA_pb.rb` has an embedded message from `header/header_pb.rb`.
|
106
|
+
# If `class_file` is set to `messageA_pb.rb`, and `class_name` to
|
107
|
+
# `MessageA`, `protobuf_root_directory` must be set to `/path/to/pb3`, which includes
|
108
|
+
# both definitions.
|
109
|
+
config :protobuf_root_directory, :validate => :string, :required => false
|
110
|
+
|
111
|
+
# List of absolute pathes to files with protobuf definitions.
|
112
|
+
# When using more than one file, make sure to arrange the files in reverse order of dependency so that each class is loaded before it is
|
113
|
+
# refered to by another.
|
114
|
+
#
|
115
|
+
# Example: a class _Unicorn_ referencing another protobuf class _Wings_
|
116
|
+
# [source,ruby]
|
117
|
+
# module Animal
|
118
|
+
# module Horse
|
119
|
+
# class Unicorn
|
120
|
+
# set_fully_qualified_name "Animal.Horse.Unicorn"
|
121
|
+
# optional ::Animal::Bodypart::Wings, :wings, 1
|
122
|
+
# optional :string, :name, 2
|
123
|
+
# # here be more field definitions
|
124
|
+
#
|
125
|
+
# would be configured as
|
126
|
+
# [source,ruby]
|
127
|
+
# include_path => ['/path/to/protobuf/definitions/Wings.pb.rb','/path/to/protobuf/definitions/Unicorn.pb.rb']
|
128
|
+
#
|
129
|
+
# `class_file` and `include_path` cannot be used at the same time.
|
130
|
+
config :include_path, :validate => :array, :default => [], :required => false
|
131
|
+
|
132
|
+
# Protocol buffer version switch. Defaults to version 2. Please note that the behaviour for enums varies between the versions.
|
133
|
+
# For protobuf 2 you will get integer representations for enums, for protobuf 3 you'll get string representations due to a different converter library.
|
134
|
+
# Recommendation: use the translate plugin to restore previous behaviour when upgrading.
|
135
|
+
config :protobuf_version, :validate => [2,3], :default => 2, :required => true
|
136
|
+
|
137
|
+
# To tolerate faulty messages that cannot be en/decoded, set this to false. Otherwise the pipeline will stop upon encountering a non decipherable message.
|
138
|
+
config :stop_on_error, :validate => :boolean, :default => false, :required => false
|
139
|
+
|
140
|
+
# Instruct the encoder to attempt converting data types to match the protobuf definitions. Available only for protobuf version 3.
|
141
|
+
config :pb3_encoder_autoconvert_types, :validate => :boolean, :default => true, :required => false
|
142
|
+
|
143
|
+
# Add meta information to `[@metadata][pb_oneof]` about which classes were chosen for [oneof](https://developers.google.com/protocol-buffers/docs/proto3#oneof) fields.
|
144
|
+
# Example values: for the protobuf definition
|
145
|
+
# ``` oneof :horse_type do
|
146
|
+
# optional :unicorn, :message, 2, "FantasyUnicorn"
|
147
|
+
# optional :pegasus, :message, 3, "FantasyPegasus"
|
148
|
+
# end
|
149
|
+
# ```
|
150
|
+
# the field `[@metadata][pb_oneof][horse_type]` will be set to either `pegasus` or `unicorn`.
|
151
|
+
# Available only for protobuf version 3.
|
152
|
+
config :pb3_set_oneof_metainfo, :validate => :boolean, :default => false, :required => false
|
153
|
+
|
154
|
+
|
155
|
+
attr_reader :execution_context
|
156
|
+
|
157
|
+
# id of the pipeline whose events you want to read from.
|
158
|
+
def pipeline_id
|
159
|
+
respond_to?(:execution_context) && !execution_context.nil? ? execution_context.pipeline_id : "main"
|
160
|
+
end
|
161
|
+
|
162
|
+
def register
|
163
|
+
@metainfo_messageclasses = {}
|
164
|
+
@metainfo_enumclasses = {}
|
165
|
+
@metainfo_pb2_enumlist = []
|
166
|
+
@pb3_typeconversion_tag = "_protobuf_type_converted"
|
167
|
+
|
168
|
+
if @include_path.length > 0 and not class_file.strip.empty?
|
169
|
+
raise LogStash::ConfigurationError, "Cannot use `include_path` and `class_file` at the same time"
|
170
|
+
end
|
171
|
+
|
172
|
+
if @include_path.length == 0 and class_file.strip.empty?
|
173
|
+
raise LogStash::ConfigurationError, "Need to specify `include_path` or `class_file`"
|
174
|
+
end
|
175
|
+
|
176
|
+
should_register = Google::Protobuf::DescriptorPool.generated_pool.lookup(class_name).nil?
|
177
|
+
|
178
|
+
unless @protobuf_root_directory.nil? or @protobuf_root_directory.strip.empty?
|
179
|
+
if !$LOAD_PATH.include? @protobuf_root_directory and should_register
|
180
|
+
$LOAD_PATH.unshift(@protobuf_root_directory)
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
@class_file = "#{@protobuf_root_directory}/#{@class_file}" unless (Pathname.new @class_file).absolute? or @class_file.empty?
|
185
|
+
# exclusive access while loading protobuf definitions
|
186
|
+
Google::Protobuf::DescriptorPool.with_lock.synchronize do
|
187
|
+
# load from `class_file`
|
188
|
+
load_protobuf_definition(@class_file) if should_register and !@class_file.empty?
|
189
|
+
# load from `include_path`
|
190
|
+
include_path.each { |path| load_protobuf_definition(path) } if include_path.length > 0 and should_register
|
191
|
+
|
192
|
+
if @protobuf_version == 3
|
193
|
+
@pb_builder = Google::Protobuf::DescriptorPool.generated_pool.lookup(class_name).msgclass
|
194
|
+
|
195
|
+
else
|
196
|
+
@pb_builder = pb2_create_instance(class_name)
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
# Pipelines using this plugin cannot be reloaded.
|
202
|
+
# https://github.com/elastic/logstash/pull/6499
|
203
|
+
#
|
204
|
+
# The DescriptorPool instance registers the protobuf classes (and
|
205
|
+
# dependencies) as global objects. This makes it very difficult to reload a
|
206
|
+
# pipeline, because `class_name` and all of its dependencies are already
|
207
|
+
# registered.
|
208
|
+
def reloadable?
|
209
|
+
return false
|
210
|
+
end
|
211
|
+
|
212
|
+
def decode(data)
|
213
|
+
if @protobuf_version == 3
|
214
|
+
decoded = @pb_builder.decode(data.to_s)
|
215
|
+
if @pb3_set_oneof_metainfo
|
216
|
+
meta = pb3_get_oneof_metainfo(decoded, @class_name)
|
217
|
+
end
|
218
|
+
h = pb3_deep_to_hash(decoded)
|
219
|
+
else
|
220
|
+
decoded = @pb_builder.parse(data.to_s)
|
221
|
+
h = decoded.to_hash
|
222
|
+
end
|
223
|
+
e = LogStash::Event.new(h)
|
224
|
+
if @protobuf_version == 3 and @pb3_set_oneof_metainfo
|
225
|
+
e.set("[@metadata][pb_oneof]", meta)
|
226
|
+
end
|
227
|
+
yield e if block_given?
|
228
|
+
rescue => ex
|
229
|
+
@logger.warn("Couldn't decode protobuf: #{ex.inspect}.")
|
230
|
+
if stop_on_error
|
231
|
+
raise ex
|
232
|
+
else # keep original message so that the user can debug it.
|
233
|
+
yield LogStash::Event.new("message" => data, "tags" => ["_protobufdecodefailure"])
|
234
|
+
end
|
235
|
+
end # def decode
|
236
|
+
|
237
|
+
|
238
|
+
def encode(event)
|
239
|
+
if @protobuf_version == 3
|
240
|
+
protobytes = pb3_encode(event)
|
241
|
+
else
|
242
|
+
protobytes = pb2_encode(event)
|
243
|
+
end
|
244
|
+
unless protobytes.nil? or protobytes.empty?
|
245
|
+
@on_event.call(event, protobytes)
|
246
|
+
end
|
247
|
+
end # def encode
|
248
|
+
|
249
|
+
|
250
|
+
private
|
251
|
+
def pb3_deep_to_hash(input)
|
252
|
+
case input
|
253
|
+
when Google::Protobuf::MessageExts # it's a protobuf class
|
254
|
+
result = Hash.new
|
255
|
+
input.to_h.each {|key, value|
|
256
|
+
result[key] = pb3_deep_to_hash(value) # the key is required for the class lookup of enums.
|
257
|
+
}
|
258
|
+
when ::Array
|
259
|
+
result = []
|
260
|
+
input.each {|value|
|
261
|
+
result << pb3_deep_to_hash(value)
|
262
|
+
}
|
263
|
+
when ::Hash
|
264
|
+
result = {}
|
265
|
+
input.each {|key, value|
|
266
|
+
result[key] = pb3_deep_to_hash(value)
|
267
|
+
}
|
268
|
+
when Symbol # is an Enum
|
269
|
+
result = input.to_s.sub(':','')
|
270
|
+
else
|
271
|
+
result = input
|
272
|
+
end
|
273
|
+
result
|
274
|
+
end
|
275
|
+
|
276
|
+
def pb3_encode(event)
|
277
|
+
|
278
|
+
datahash = event.to_hash
|
279
|
+
|
280
|
+
is_recursive_call = !event.get('tags').nil? and event.get('tags').include? @pb3_typeconversion_tag
|
281
|
+
if is_recursive_call
|
282
|
+
datahash = pb3_remove_typeconversion_tag(datahash)
|
283
|
+
end
|
284
|
+
datahash = pb3_prepare_for_encoding(datahash)
|
285
|
+
if datahash.nil?
|
286
|
+
@logger.warn("Protobuf encoding error 4: empty data for event #{event.to_hash}")
|
287
|
+
end
|
288
|
+
if @pb_builder.nil?
|
289
|
+
@logger.warn("Protobuf encoding error 5: empty protobuf builder for class #{@class_name}")
|
290
|
+
end
|
291
|
+
pb_obj = @pb_builder.new(datahash)
|
292
|
+
@pb_builder.encode(pb_obj)
|
293
|
+
|
294
|
+
rescue ArgumentError => e
|
295
|
+
k = event.to_hash.keys.join(", ")
|
296
|
+
@logger.warn("Protobuf encoding error 1: Argument error (#{e.inspect}). Reason: probably mismatching protobuf definition. \
|
297
|
+
Required fields in the protobuf definition are: #{k} and fields must not begin with @ sign. The event has been discarded.")
|
298
|
+
nil
|
299
|
+
rescue TypeError => e
|
300
|
+
pb3_handle_type_errors(event, e, is_recursive_call, datahash)
|
301
|
+
nil
|
302
|
+
rescue => e
|
303
|
+
@logger.warn("Protobuf encoding error 3: #{e.inspect}. Event discarded. Input data: #{datahash}. The event has been discarded. Backtrace: #{e.backtrace}")
|
304
|
+
nil
|
305
|
+
end
|
306
|
+
|
307
|
+
|
308
|
+
|
309
|
+
|
310
|
+
def pb3_handle_type_errors(event, e, is_recursive_call, datahash)
|
311
|
+
begin
|
312
|
+
if is_recursive_call
|
313
|
+
@logger.warn("Protobuf encoding error 2.1: Type error (#{e.inspect}). Some types could not be converted. The event has been discarded. Type mismatches: #{mismatches}.")
|
314
|
+
else
|
315
|
+
if @pb3_encoder_autoconvert_types
|
316
|
+
|
317
|
+
msg = "Protobuf encoding error 2.2: Type error (#{e.inspect}). Will try to convert the data types. Original data: #{datahash}"
|
318
|
+
@logger.warn(msg)
|
319
|
+
mismatches = pb3_get_type_mismatches(datahash, "", @class_name)
|
320
|
+
|
321
|
+
event = pb3_convert_mismatched_types(event, mismatches)
|
322
|
+
# Add a (temporary) tag to handle the recursion stop
|
323
|
+
pb3_add_tag(event, @pb3_typeconversion_tag )
|
324
|
+
pb3_encode(event)
|
325
|
+
else
|
326
|
+
@logger.warn("Protobuf encoding error 2.3: Type error (#{e.inspect}). The event has been discarded. Try setting pb3_encoder_autoconvert_types => true for automatic type conversion.")
|
327
|
+
end
|
328
|
+
end
|
329
|
+
rescue TypeError => e
|
330
|
+
if @pb3_encoder_autoconvert_types
|
331
|
+
@logger.warn("Protobuf encoding error 2.4.1: (#{e.inspect}). Failed to convert data types. The event has been discarded. original data: #{datahash}")
|
332
|
+
else
|
333
|
+
@logger.warn("Protobuf encoding error 2.4.2: (#{e.inspect}). The event has been discarded.")
|
334
|
+
end
|
335
|
+
if @stop_on_error
|
336
|
+
raise e
|
337
|
+
end
|
338
|
+
nil
|
339
|
+
rescue => ex
|
340
|
+
@logger.warn("Protobuf encoding error 2.5: (#{e.inspect}). The event has been discarded. Auto-typecasting was on: #{@pb3_encoder_autoconvert_types}")
|
341
|
+
if @stop_on_error
|
342
|
+
raise ex
|
343
|
+
end
|
344
|
+
nil
|
345
|
+
end
|
346
|
+
end # pb3_handle_type_errors
|
347
|
+
|
348
|
+
|
349
|
+
def pb3_get_type_mismatches(data, key_prefix, pb_class)
|
350
|
+
mismatches = []
|
351
|
+
data.to_h.each do |key, value|
|
352
|
+
expected_type = pb3_get_expected_type(key, pb_class)
|
353
|
+
r = pb3_compare_datatypes(value, key, key_prefix, pb_class, expected_type)
|
354
|
+
mismatches.concat(r)
|
355
|
+
end # data.each
|
356
|
+
mismatches
|
357
|
+
end
|
358
|
+
|
359
|
+
|
360
|
+
def pb3_get_expected_type(key, pb_class)
|
361
|
+
pb_descriptor = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class)
|
362
|
+
|
363
|
+
if !pb_descriptor.nil?
|
364
|
+
pb_builder = pb_descriptor.msgclass
|
365
|
+
pb_obj = pb_builder.new({})
|
366
|
+
v = pb_obj.send(key)
|
367
|
+
|
368
|
+
if !v.nil?
|
369
|
+
v.class
|
370
|
+
else
|
371
|
+
nil
|
372
|
+
end
|
373
|
+
end
|
374
|
+
end
|
375
|
+
|
376
|
+
def pb3_compare_datatypes(value, key, key_prefix, pb_class, expected_type)
|
377
|
+
mismatches = []
|
378
|
+
|
379
|
+
if value.nil?
|
380
|
+
is_mismatch = false
|
381
|
+
else
|
382
|
+
case value
|
383
|
+
when ::Hash, Google::Protobuf::MessageExts
|
384
|
+
is_mismatch = false
|
385
|
+
descriptor = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class).lookup(key)
|
386
|
+
if !descriptor.subtype.nil?
|
387
|
+
class_of_nested_object = pb3_get_descriptorpool_name(descriptor.subtype.msgclass)
|
388
|
+
new_prefix = "#{key}."
|
389
|
+
recursive_mismatches = pb3_get_type_mismatches(value, new_prefix, class_of_nested_object)
|
390
|
+
mismatches.concat(recursive_mismatches)
|
391
|
+
end
|
392
|
+
when ::Array
|
393
|
+
expected_type = pb3_get_expected_type(key, pb_class)
|
394
|
+
is_mismatch = (expected_type != Google::Protobuf::RepeatedField)
|
395
|
+
child_type = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class).lookup(key).type
|
396
|
+
value.each_with_index do | v, i |
|
397
|
+
new_prefix = "#{key}."
|
398
|
+
recursive_mismatches = pb3_compare_datatypes(v, i.to_s, new_prefix, pb_class, child_type)
|
399
|
+
mismatches.concat(recursive_mismatches)
|
400
|
+
is_mismatch |= recursive_mismatches.any?
|
401
|
+
end # do
|
402
|
+
else # is scalar data type
|
403
|
+
|
404
|
+
is_mismatch = ! pb3_is_scalar_datatype_match(expected_type, value.class)
|
405
|
+
end # if
|
406
|
+
end # if value.nil?
|
407
|
+
|
408
|
+
if is_mismatch
|
409
|
+
mismatches << {"key" => "#{key_prefix}#{key}", "actual_type" => value.class, "expected_type" => expected_type, "value" => value}
|
410
|
+
end
|
411
|
+
mismatches
|
412
|
+
end
|
413
|
+
|
414
|
+
def pb3_remove_typeconversion_tag(data)
|
415
|
+
# remove the tag that we added to the event because
|
416
|
+
# the protobuf definition might not have a field for tags
|
417
|
+
data['tags'].delete(@pb3_typeconversion_tag)
|
418
|
+
if data['tags'].length == 0
|
419
|
+
data.delete('tags')
|
420
|
+
end
|
421
|
+
data
|
422
|
+
end
|
423
|
+
|
424
|
+
def pb3_get_descriptorpool_name(child_class)
|
425
|
+
# make instance
|
426
|
+
inst = child_class.new
|
427
|
+
# get the lookup name for the Descriptorpool
|
428
|
+
inst.class.descriptor.name
|
429
|
+
end
|
430
|
+
|
431
|
+
def pb3_is_scalar_datatype_match(expected_type, actual_type)
|
432
|
+
if expected_type == actual_type
|
433
|
+
true
|
434
|
+
else
|
435
|
+
e = expected_type.to_s.downcase.to_sym
|
436
|
+
a = actual_type.to_s.downcase.to_sym
|
437
|
+
case e
|
438
|
+
# when :string, :integer
|
439
|
+
when :string
|
440
|
+
a == e
|
441
|
+
when :integer
|
442
|
+
a == e
|
443
|
+
when :float
|
444
|
+
a == :float || a == :integer
|
445
|
+
end
|
446
|
+
end
|
447
|
+
end
|
448
|
+
|
449
|
+
|
450
|
+
def pb3_convert_mismatched_types_getter(struct, key)
|
451
|
+
if struct.is_a? ::Hash
|
452
|
+
struct[key]
|
453
|
+
else
|
454
|
+
struct.get(key)
|
455
|
+
end
|
456
|
+
end
|
457
|
+
|
458
|
+
def pb3_convert_mismatched_types_setter(struct, key, value)
|
459
|
+
if struct.is_a? ::Hash
|
460
|
+
struct[key] = value
|
461
|
+
else
|
462
|
+
struct.set(key, value)
|
463
|
+
end
|
464
|
+
struct
|
465
|
+
end
|
466
|
+
|
467
|
+
def pb3_add_tag(event, tag )
|
468
|
+
if event.get('tags').nil?
|
469
|
+
event.set('tags', [tag])
|
470
|
+
else
|
471
|
+
existing_tags = event.get('tags')
|
472
|
+
event.set("tags", existing_tags << tag)
|
473
|
+
end
|
474
|
+
end
|
475
|
+
|
476
|
+
# Due to recursion on nested fields in the event object this method might be given an event (1st call) or a hash (2nd .. nth call)
|
477
|
+
# First call will be the event object, child objects will be hashes.
|
478
|
+
def pb3_convert_mismatched_types(struct, mismatches)
|
479
|
+
mismatches.each do | m |
|
480
|
+
key = m['key']
|
481
|
+
expected_type = m['expected_type']
|
482
|
+
actual_type = m['actual_type']
|
483
|
+
if key.include? "." # the mismatch is in a child object
|
484
|
+
levels = key.split(/\./) # key is something like http_user_agent.minor_version and needs to be splitted.
|
485
|
+
key = levels[0]
|
486
|
+
sub_levels = levels.drop(1).join(".")
|
487
|
+
new_mismatches = [{"key"=>sub_levels, "actual_type"=>m["actual_type"], "expected_type"=>m["expected_type"]}]
|
488
|
+
value = pb3_convert_mismatched_types_getter(struct, key)
|
489
|
+
new_value = pb3_convert_mismatched_types(value, new_mismatches)
|
490
|
+
struct = pb3_convert_mismatched_types_setter(struct, key, new_value )
|
491
|
+
else
|
492
|
+
value = pb3_convert_mismatched_types_getter(struct, key)
|
493
|
+
begin
|
494
|
+
case expected_type.to_s
|
495
|
+
when "Integer"
|
496
|
+
case actual_type.to_s
|
497
|
+
when "String"
|
498
|
+
new_value = value.to_i
|
499
|
+
when "Float"
|
500
|
+
if value.floor == value # convert values like 2.0 to 2, but not 2.1
|
501
|
+
new_value = value.to_i
|
502
|
+
end
|
503
|
+
end
|
504
|
+
when "String"
|
505
|
+
new_value = value.to_s
|
506
|
+
when "Float"
|
507
|
+
new_value = value.to_f
|
508
|
+
when "Boolean","TrueClass", "FalseClass"
|
509
|
+
new_value = value.to_s.downcase == "true"
|
510
|
+
end
|
511
|
+
if !new_value.nil?
|
512
|
+
struct = pb3_convert_mismatched_types_setter(struct, key, new_value )
|
513
|
+
end
|
514
|
+
rescue Exception => ex
|
515
|
+
@logger.debug("Protobuf encoding error 5: Could not convert types for protobuf encoding: #{ex}")
|
516
|
+
end
|
517
|
+
end # if key contains .
|
518
|
+
end # mismatches.each
|
519
|
+
struct
|
520
|
+
end
|
521
|
+
|
522
|
+
def pb3_prepare_for_encoding(datahash)
|
523
|
+
# 0) Remove empty fields.
|
524
|
+
datahash = datahash.select { |key, value| !value.nil? }
|
525
|
+
|
526
|
+
# Preparation: the data cannot be encoded until certain criteria are met:
|
527
|
+
# 1) remove @ signs from keys.
|
528
|
+
# 2) convert timestamps and other objects to strings
|
529
|
+
datahash = datahash.inject({}){|x,(k,v)| x[k.gsub(/@/,'').to_sym] = (should_convert_to_string?(v) ? v.to_s : v); x}
|
530
|
+
|
531
|
+
datahash.each do |key, value|
|
532
|
+
datahash[key] = pb3_prepare_for_encoding(value) if value.is_a?(Hash)
|
533
|
+
end
|
534
|
+
|
535
|
+
datahash
|
536
|
+
end
|
537
|
+
|
538
|
+
def pb3_get_oneof_metainfo(pb_object, pb_class_name)
|
539
|
+
meta = {}
|
540
|
+
pb_class = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class_name).msgclass
|
541
|
+
|
542
|
+
pb_class.descriptor.each_oneof { |field|
|
543
|
+
field.each { | group_option |
|
544
|
+
if !pb_object.send(group_option.name).nil?
|
545
|
+
meta[field.name] = group_option.name
|
546
|
+
end
|
547
|
+
}
|
548
|
+
}
|
549
|
+
|
550
|
+
pb_class.descriptor.select{ |field| field.type == :message }.each { | field |
|
551
|
+
# recurse over nested protobuf classes
|
552
|
+
pb_sub_object = pb_object.send(field.name)
|
553
|
+
if !pb_sub_object.nil? and !field.subtype.nil?
|
554
|
+
pb_sub_class = pb3_get_descriptorpool_name(field.subtype.msgclass)
|
555
|
+
meta[field.name] = pb3_get_oneof_metainfo(pb_sub_object, pb_sub_class)
|
556
|
+
end
|
557
|
+
}
|
558
|
+
|
559
|
+
meta
|
560
|
+
end
|
561
|
+
|
562
|
+
|
563
|
+
def pb2_encode(event)
|
564
|
+
data = pb2_prepare_for_encoding(event.to_hash, @class_name)
|
565
|
+
msg = @pb_builder.new(data)
|
566
|
+
msg.serialize_to_string
|
567
|
+
rescue NoMethodError => e
|
568
|
+
@logger.warn("Encoding error 2. Probably mismatching protobuf definition. Required fields in the protobuf definition are: " + event.to_hash.keys.join(", ") + " and the timestamp field name must not include a @. ")
|
569
|
+
raise e
|
570
|
+
rescue => e
|
571
|
+
@logger.warn("Encoding error 1: #{e.inspect}")
|
572
|
+
raise e
|
573
|
+
end
|
574
|
+
|
575
|
+
|
576
|
+
def pb2_prepare_for_encoding(datahash, class_name)
|
577
|
+
if datahash.is_a?(::Hash)
|
578
|
+
# Preparation: the data cannot be encoded until certain criteria are met:
|
579
|
+
# 1) remove @ signs from keys.
|
580
|
+
# 2) convert timestamps and other objects to strings
|
581
|
+
datahash = ::Hash[datahash.map{|(k,v)| [k.to_s.dup.gsub(/@/,''), (should_convert_to_string?(v) ? v.to_s : v)] }]
|
582
|
+
|
583
|
+
# Check if any of the fields in this hash are protobuf classes and if so, create a builder for them.
|
584
|
+
meta = @metainfo_messageclasses[class_name]
|
585
|
+
if meta
|
586
|
+
meta.map do | (k,c) |
|
587
|
+
if datahash.include?(k)
|
588
|
+
original_value = datahash[k]
|
589
|
+
datahash[k] =
|
590
|
+
if original_value.is_a?(::Array)
|
591
|
+
# make this field an array/list of protobuf objects
|
592
|
+
# value is a list of hashed complex objects, each of which needs to be protobuffed and
|
593
|
+
# put back into the list.
|
594
|
+
original_value.map { |x| pb2_prepare_for_encoding(x, c) }
|
595
|
+
original_value
|
596
|
+
else
|
597
|
+
proto_obj = pb2_create_instance(c)
|
598
|
+
proto_obj.new(pb2_prepare_for_encoding(original_value, c)) # this line is reached in the colourtest for an enum. Enums should not be instantiated. Should enums even be in the messageclasses? I dont think so! TODO bug
|
599
|
+
end # if is array
|
600
|
+
end # if datahash_include
|
601
|
+
end # do
|
602
|
+
end # if meta
|
603
|
+
end
|
604
|
+
datahash
|
605
|
+
end
|
606
|
+
|
607
|
+
|
608
|
+
def should_convert_to_string?(v)
|
609
|
+
!(v.is_a?(Integer) || v.is_a?(Float) || v.is_a?(::Hash) || v.is_a?(::Array) || [true, false].include?(v))
|
610
|
+
end
|
611
|
+
|
612
|
+
|
613
|
+
def pb2_create_instance(name)
|
614
|
+
@logger.debug("Creating instance of " + name)
|
615
|
+
name.split('::').inject(Object) { |n,c| n.const_get c }
|
616
|
+
end
|
617
|
+
|
618
|
+
|
619
|
+
def pb3_metadata_analyis(filename)
|
620
|
+
|
621
|
+
regex_class_name = /\s*add_message "(?<name>.+?)" do\s+/ # TODO optimize both regexes for speed (negative lookahead)
|
622
|
+
regex_pbdefs = /\s*(optional|repeated)(\s*):(?<name>.+),(\s*):(?<type>\w+),(\s*)(?<position>\d+)(, \"(?<enum_class>.*?)\")?/
|
623
|
+
class_name = ""
|
624
|
+
type = ""
|
625
|
+
field_name = ""
|
626
|
+
File.readlines(filename).each do |line|
|
627
|
+
if ! (line =~ regex_class_name).nil?
|
628
|
+
class_name = $1
|
629
|
+
@metainfo_messageclasses[class_name] = {}
|
630
|
+
@metainfo_enumclasses[class_name] = {}
|
631
|
+
end # if
|
632
|
+
if ! (line =~ regex_pbdefs).nil?
|
633
|
+
field_name = $1
|
634
|
+
type = $2
|
635
|
+
field_class_name = $4
|
636
|
+
if type == "message"
|
637
|
+
@metainfo_messageclasses[class_name][field_name] = field_class_name
|
638
|
+
elsif type == "enum"
|
639
|
+
@metainfo_enumclasses[class_name][field_name] = field_class_name
|
640
|
+
end
|
641
|
+
end # if
|
642
|
+
end # readlines
|
643
|
+
if class_name.nil?
|
644
|
+
@logger.warn("Error 4: class name not found in file " + filename)
|
645
|
+
raise ArgumentError, "Invalid protobuf file: " + filename
|
646
|
+
end
|
647
|
+
rescue Exception => e
|
648
|
+
@logger.warn("Error 3: unable to read pb definition from file " + filename+ ". Reason: #{e.inspect}. Last settings were: class #{class_name} field #{field_name} type #{type}. Backtrace: " + e.backtrace.inspect.to_s)
|
649
|
+
raise e
|
650
|
+
end
|
651
|
+
|
652
|
+
|
653
|
+
|
654
|
+
def pb2_metadata_analyis(filename)
|
655
|
+
regex_class_start = /\s*set_fully_qualified_name \"(?<name>.+)\".*?/
|
656
|
+
regex_enum_name = /\s*include ..ProtocolBuffers..Enum\s*/
|
657
|
+
regex_pbdefs = /\s*(optional|repeated)(\s*):(?<type>.+),(\s*):(?<name>\w+),(\s*)(?<position>\d+)/
|
658
|
+
# now we also need to find out which class it contains and the protobuf definitions in it.
|
659
|
+
# We'll unfortunately need that later so that we can create nested objects.
|
660
|
+
|
661
|
+
class_name = ""
|
662
|
+
type = ""
|
663
|
+
field_name = ""
|
664
|
+
is_enum_class = false
|
665
|
+
|
666
|
+
File.readlines(filename).each do |line|
|
667
|
+
if ! (line =~ regex_enum_name).nil?
|
668
|
+
is_enum_class= true
|
669
|
+
end
|
670
|
+
|
671
|
+
if ! (line =~ regex_class_start).nil?
|
672
|
+
class_name = $1.gsub('.',"::").split('::').map {|word| word.capitalize}.join('::')
|
673
|
+
if is_enum_class
|
674
|
+
@metainfo_pb2_enumlist << class_name.downcase
|
675
|
+
end
|
676
|
+
is_enum_class= false # reset when next class starts
|
677
|
+
end
|
678
|
+
if ! (line =~ regex_pbdefs).nil?
|
679
|
+
type = $1
|
680
|
+
field_name = $2
|
681
|
+
if type =~ /::/
|
682
|
+
clean_type = type.gsub(/^:/,"")
|
683
|
+
e = @metainfo_pb2_enumlist.include? clean_type.downcase
|
684
|
+
|
685
|
+
if e
|
686
|
+
if not @metainfo_enumclasses.key? class_name
|
687
|
+
@metainfo_enumclasses[class_name] = {}
|
688
|
+
end
|
689
|
+
@metainfo_enumclasses[class_name][field_name] = clean_type
|
690
|
+
else
|
691
|
+
if not @metainfo_messageclasses.key? class_name
|
692
|
+
@metainfo_messageclasses[class_name] = {}
|
693
|
+
end
|
694
|
+
@metainfo_messageclasses[class_name][field_name] = clean_type
|
695
|
+
end
|
696
|
+
end
|
697
|
+
end
|
698
|
+
end
|
699
|
+
if class_name.nil?
|
700
|
+
@logger.warn("Error 4: class name not found in file " + filename)
|
701
|
+
raise ArgumentError, "Invalid protobuf file: " + filename
|
702
|
+
end
|
703
|
+
rescue LoadError => e
|
704
|
+
raise ArgumentError.new("Could not load file: " + filename + ". Please try to use absolute pathes. Current working dir: " + Dir.pwd + ", loadpath: " + $LOAD_PATH.join(" "))
|
705
|
+
rescue => e
|
706
|
+
|
707
|
+
@logger.warn("Error 3: unable to read pb definition from file " + filename+ ". Reason: #{e.inspect}. Last settings were: class #{class_name} field #{field_name} type #{type}. Backtrace: " + e.backtrace.inspect.to_s)
|
708
|
+
raise e
|
709
|
+
end
|
710
|
+
|
711
|
+
|
712
|
+
def load_protobuf_definition(filename)
|
713
|
+
if filename.end_with? ('.rb')
|
714
|
+
# Add to the loading path of the protobuf definitions
|
715
|
+
if (Pathname.new filename).absolute?
|
716
|
+
begin
|
717
|
+
require filename
|
718
|
+
rescue Exception => e
|
719
|
+
@logger.error("Unable to load file: #{filename}. Reason: #{e.inspect}")
|
720
|
+
raise e
|
721
|
+
end
|
722
|
+
end
|
723
|
+
|
724
|
+
if @protobuf_version == 3
|
725
|
+
pb3_metadata_analyis(filename)
|
726
|
+
else
|
727
|
+
pb2_metadata_analyis(filename)
|
728
|
+
end
|
729
|
+
|
730
|
+
else
|
731
|
+
@logger.warn("Not a ruby file: " + filename)
|
732
|
+
end
|
733
|
+
end
|
734
|
+
|
735
|
+
end # class LogStash::Codecs::Protobuf
|