logstash-codec-protobuf 1.3.0-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +56 -0
  3. data/CONTRIBUTORS +12 -0
  4. data/DEVELOPER.md +2 -0
  5. data/Gemfile +11 -0
  6. data/LICENSE +202 -0
  7. data/NOTICE.TXT +4 -0
  8. data/README.md +184 -0
  9. data/docs/index.asciidoc +241 -0
  10. data/google-protobuf-lib-update.md +57 -0
  11. data/lib/logstash/codecs/protobuf.rb +804 -0
  12. data/logstash-codec-protobuf.gemspec +33 -0
  13. data/spec/codecs/pb2_spec.rb +236 -0
  14. data/spec/codecs/pb3_decode_spec.rb +665 -0
  15. data/spec/codecs/pb3_encode_spec.rb +243 -0
  16. data/spec/helpers/pb2/ColourTestcase.pb.rb +35 -0
  17. data/spec/helpers/pb2/ColourTestcase.proto +24 -0
  18. data/spec/helpers/pb2/event.pb.rb +19 -0
  19. data/spec/helpers/pb2/event.proto +12 -0
  20. data/spec/helpers/pb2/header/header.pb.rb +16 -0
  21. data/spec/helpers/pb2/header/header.proto +8 -0
  22. data/spec/helpers/pb2/human.pb.rb +26 -0
  23. data/spec/helpers/pb2/unicorn.pb.rb +19 -0
  24. data/spec/helpers/pb2/unicorn_event.pb.rb +24 -0
  25. data/spec/helpers/pb3/FantasyHorse_pb.rb +48 -0
  26. data/spec/helpers/pb3/PhoneDirectory_pb.rb +37 -0
  27. data/spec/helpers/pb3/ProbeResult_pb.rb +26 -0
  28. data/spec/helpers/pb3/ResultListComposerRequest_pb.rb +25 -0
  29. data/spec/helpers/pb3/dnsmessage_pb.rb +82 -0
  30. data/spec/helpers/pb3/events_pb.rb +17 -0
  31. data/spec/helpers/pb3/header/header.proto3 +7 -0
  32. data/spec/helpers/pb3/header/header_pb.rb +12 -0
  33. data/spec/helpers/pb3/integertest_pb.rb +18 -0
  34. data/spec/helpers/pb3/messageA_pb.rb +16 -0
  35. data/spec/helpers/pb3/messageB_pb.rb +15 -0
  36. data/spec/helpers/pb3/rum2_pb.rb +87 -0
  37. data/spec/helpers/pb3/rum3_pb.rb +87 -0
  38. data/spec/helpers/pb3/rum_pb.rb +87 -0
  39. data/spec/helpers/pb3/struct_test_pb.rb +21 -0
  40. data/spec/helpers/pb3/unicorn_pb.rb +31 -0
  41. metadata +175 -0
@@ -0,0 +1,804 @@
1
+ # encoding: utf-8
2
+ require 'logstash/codecs/base'
3
+ require 'logstash/util/charset'
4
+ require 'google/protobuf' # for protobuf3
5
+ require 'google/protobuf/struct_pb'
6
+ require 'protocol_buffers' # https://github.com/codekitchen/ruby-protocol-buffers, for protobuf2
7
+
8
+
9
+ # Monkey-patch the `Google::Protobuf::DescriptorPool` with a mutex for exclusive
10
+ # access.
11
+ #
12
+ # The DescriptorPool instance is not thread-safe when loading protobuf
13
+ # definitions. This can cause unrecoverable errors when registering multiple
14
+ # concurrent pipelines that try to register the same dependency. The
15
+ # DescriptorPool instance is global to the JVM and shared among all pipelines.
16
+ class << Google::Protobuf::DescriptorPool
17
+ def with_lock
18
+ if !@mutex
19
+ @mutex = Mutex.new
20
+ end
21
+
22
+ return @mutex
23
+ end
24
+ end
25
+
26
+ # This codec converts protobuf encoded messages into logstash events and vice versa.
27
+ #
28
+ # Requires the protobuf definitions as ruby files. You can create those using the [ruby-protoc compiler](https://github.com/codekitchen/ruby-protocol-buffers).
29
+ #
30
+ # The following shows a usage example for decoding protobuf 2 encoded events from a kafka stream:
31
+ # [source,ruby]
32
+ # kafka
33
+ # {
34
+ # zk_connect => "127.0.0.1"
35
+ # topic_id => "your_topic_goes_here"
36
+ # key_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
37
+ # value_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
38
+ # codec => protobuf
39
+ # {
40
+ # class_name => "Animal::Unicorn"
41
+ # include_path => ['/path/to/protobuf/definitions/UnicornProtobuf.pb.rb']
42
+ # }
43
+ # }
44
+ #
45
+ # Same example for protobuf 3:
46
+ # [source,ruby]
47
+ # kafka
48
+ # {
49
+ # zk_connect => "127.0.0.1"
50
+ # topic_id => "your_topic_goes_here"
51
+ # key_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
52
+ # value_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
53
+ # codec => protobuf
54
+ # {
55
+ # class_name => "Animal.Unicorn"
56
+ # include_path => ['/path/to/protobuf/definitions/UnicornProtobuf_pb.rb']
57
+ # protobuf_version => 3
58
+ # }
59
+ # }
60
+ #
61
+ # Specifically for the kafka input: please set the deserializer classes as shown above.
62
+
63
+ class LogStash::Codecs::Protobuf < LogStash::Codecs::Base
64
+ config_name 'protobuf'
65
+
66
+ # Name of the class to decode.
67
+ # If your protobuf 2 definition contains modules, prepend them to the class name with double colons like so:
68
+ # [source,ruby]
69
+ # class_name => "Animal::Horse::Unicorn"
70
+ #
71
+ # This corresponds to a protobuf definition starting as follows:
72
+ # [source,ruby]
73
+ # module Animal
74
+ # module Horse
75
+ # class Unicorn
76
+ # # here are your field definitions.
77
+ #
78
+ # For protobuf 3 separate the modules with single dots.
79
+ # [source,ruby]
80
+ # class_name => "Animal.Horse.Unicorn"
81
+ # Check the bottom of the generated protobuf ruby file. It contains lines like this:
82
+ # [source,ruby]
83
+ # Animals.Unicorn = Google::Protobuf::DescriptorPool.generated_pool.lookup("Animals.Unicorn").msgclass
84
+ # Use the parameter for the lookup call as the class_name for the codec config.
85
+ #
86
+ # If your class references other definitions: you only have to add the main class here.
87
+ config :class_name, :validate => :string, :required => true
88
+
89
+ # Relative path to the ruby file that contains class_name
90
+ #
91
+ # Relative path (from `protobuf_root_directory`) that holds the definition of the class specified in
92
+ # `class_name`.
93
+ #
94
+ # `class_file` and `include_path` cannot be used at the same time.
95
+ config :class_file, :validate => :string, :default => '', :required => false
96
+
97
+ # Absolute path to the root directory that contains all referenced/used dependencies
98
+ # of the main class (`class_name`) or any of its dependencies.
99
+ #
100
+ # For instance:
101
+ #
102
+ # pb3
103
+ # ├── header
104
+ # │ └── header_pb.rb
105
+ # ├── messageA_pb.rb
106
+ #
107
+ # In this case `messageA_pb.rb` has an embedded message from `header/header_pb.rb`.
108
+ # If `class_file` is set to `messageA_pb.rb`, and `class_name` to
109
+ # `MessageA`, `protobuf_root_directory` must be set to `/path/to/pb3`, which includes
110
+ # both definitions.
111
+ config :protobuf_root_directory, :validate => :string, :required => false
112
+
113
+ # List of absolute pathes to files with protobuf definitions.
114
+ # When using more than one file, make sure to arrange the files in reverse order of dependency so that each class is loaded before it is
115
+ # refered to by another.
116
+ #
117
+ # Example: a class _Unicorn_ referencing another protobuf class _Wings_
118
+ # [source,ruby]
119
+ # module Animal
120
+ # module Horse
121
+ # class Unicorn
122
+ # set_fully_qualified_name "Animal.Horse.Unicorn"
123
+ # optional ::Animal::Bodypart::Wings, :wings, 1
124
+ # optional :string, :name, 2
125
+ # # here be more field definitions
126
+ #
127
+ # would be configured as
128
+ # [source,ruby]
129
+ # include_path => ['/path/to/protobuf/definitions/Wings.pb.rb','/path/to/protobuf/definitions/Unicorn.pb.rb']
130
+ #
131
+ # `class_file` and `include_path` cannot be used at the same time.
132
+ config :include_path, :validate => :array, :default => [], :required => false
133
+
134
+ # Protocol buffer version switch. Defaults to version 2. Please note that the behaviour for enums varies between the versions.
135
+ # For protobuf 2 you will get integer representations for enums, for protobuf 3 you'll get string representations due to a different converter library.
136
+ # Recommendation: use the translate plugin to restore previous behaviour when upgrading.
137
+ config :protobuf_version, :validate => [2,3], :default => 2, :required => true
138
+
139
+ # To tolerate faulty messages that cannot be en/decoded, set this to false. Otherwise the pipeline will stop upon encountering a non decipherable message.
140
+ config :stop_on_error, :validate => :boolean, :default => false, :required => false
141
+
142
+ # Instruct the encoder to attempt converting data types to match the protobuf definitions. Available only for protobuf version 3.
143
+ config :pb3_encoder_autoconvert_types, :validate => :boolean, :default => true, :required => false
144
+
145
+ # Add meta information to `[@metadata][pb_oneof]` about which classes were chosen for [oneof](https://developers.google.com/protocol-buffers/docs/proto3#oneof) fields.
146
+ # Example values: for the protobuf definition
147
+ # ``` oneof :horse_type do
148
+ # optional :unicorn, :message, 2, "FantasyUnicorn"
149
+ # optional :pegasus, :message, 3, "FantasyPegasus"
150
+ # end
151
+ # ```
152
+ # the field `[@metadata][pb_oneof][horse_type]` will be set to either `pegasus` or `unicorn`.
153
+ # Available only for protobuf version 3.
154
+ config :pb3_set_oneof_metainfo, :validate => :boolean, :default => false, :required => false
155
+
156
+
157
+ attr_reader :execution_context
158
+
159
+ # id of the pipeline whose events you want to read from.
160
+ def pipeline_id
161
+ respond_to?(:execution_context) && !execution_context.nil? ? execution_context.pipeline_id : "main"
162
+ end
163
+
164
+ def register
165
+ @metainfo_messageclasses = {}
166
+ @metainfo_enumclasses = {}
167
+ @metainfo_pb2_enumlist = []
168
+ @pb3_typeconversion_tag = "_protobuf_type_converted"
169
+
170
+ if @include_path.length > 0 and not class_file.strip.empty?
171
+ raise LogStash::ConfigurationError, "Cannot use `include_path` and `class_file` at the same time"
172
+ end
173
+
174
+ if @include_path.length == 0 and class_file.strip.empty?
175
+ raise LogStash::ConfigurationError, "Need to specify `include_path` or `class_file`"
176
+ end
177
+
178
+ should_register = Google::Protobuf::DescriptorPool.generated_pool.lookup(class_name).nil?
179
+
180
+ unless @protobuf_root_directory.nil? or @protobuf_root_directory.strip.empty?
181
+ if !$LOAD_PATH.include? @protobuf_root_directory and should_register
182
+ $LOAD_PATH.unshift(@protobuf_root_directory)
183
+ end
184
+ end
185
+
186
+ @class_file = "#{@protobuf_root_directory}/#{@class_file}" unless (Pathname.new @class_file).absolute? or @class_file.empty?
187
+ # exclusive access while loading protobuf definitions
188
+ Google::Protobuf::DescriptorPool.with_lock.synchronize do
189
+ # load from `class_file`
190
+ load_protobuf_definition(@class_file) if should_register and !@class_file.empty?
191
+ # load from `include_path`
192
+ include_path.each { |path| load_protobuf_definition(path) } if include_path.length > 0 and should_register
193
+ if @protobuf_version == 3
194
+ @pb_builder = Google::Protobuf::DescriptorPool.generated_pool.lookup(class_name).msgclass
195
+ else
196
+ @pb_builder = pb2_create_instance(class_name)
197
+ end
198
+ end
199
+ end
200
+
201
+ # Pipelines using this plugin cannot be reloaded.
202
+ # https://github.com/elastic/logstash/pull/6499
203
+ #
204
+ # The DescriptorPool instance registers the protobuf classes (and
205
+ # dependencies) as global objects. This makes it very difficult to reload a
206
+ # pipeline, because `class_name` and all of its dependencies are already
207
+ # registered.
208
+ def reloadable?
209
+ return false
210
+ end
211
+
212
+ def decode(data)
213
+ if @protobuf_version == 3
214
+ decoded = @pb_builder.decode(data.to_s)
215
+ hashed, meta = pb3_to_hash(decoded)
216
+ else # version = 2
217
+ decoded = @pb_builder.parse(data.to_s)
218
+ hashed = decoded.to_hash
219
+ end
220
+ e = LogStash::Event.new(hashed)
221
+ if @protobuf_version == 3 and @pb3_set_oneof_metainfo
222
+ e.set("[@metadata][pb_oneof]", meta)
223
+ end
224
+ yield e if block_given?
225
+ rescue => ex
226
+ @logger.warn("Couldn't decode protobuf: #{ex.inspect}")
227
+ if @stop_on_error
228
+ raise ex
229
+ else # keep original message so that the user can debug it.
230
+ yield LogStash::Event.new(
231
+ "message" => data, "tags" => ["_protobufdecodefailure"],
232
+ "decoder_exception" => "#{ex.inspect}")
233
+ end
234
+ end # def decode
235
+
236
+
237
+ def encode(event)
238
+ if @protobuf_version == 3
239
+ protobytes = pb3_encode(event)
240
+ else
241
+ protobytes = pb2_encode(event)
242
+ end
243
+ unless protobytes.nil? or protobytes.empty?
244
+ @on_event.call(event, protobytes)
245
+ end
246
+ end # def encode
247
+
248
+
249
+ # Get the builder class for any given protobuf object from the descriptor pool
250
+ # Exposed for testing
251
+ # @param [Object] pb_obj The pb object instance to do the lookup for
252
+ # @return [Object] The pb builder class
253
+ def pb3_class_for_name(pb_obj)
254
+ Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_obj.class.descriptor.name)
255
+ end
256
+
257
+ private
258
+
259
+ # Helper function for debugging: print data types for fields of a hash
260
+ def print_types(hashy, i = 0)
261
+ hashy.each do |key, value|
262
+ puts ws(i) + "#{key} " + value.class.name
263
+ if value.is_a? ::Hash
264
+ print_types(value, i + 1)
265
+ end
266
+ if value.is_a? ::Array
267
+ value.each do |v|
268
+ puts ws(i + 1) + "" + v.class.name
269
+ if v.is_a? ::Hash
270
+ print_types(v, i + 2)
271
+ end
272
+ end
273
+ end
274
+ end
275
+ end
276
+
277
+ # Helper function for debugging: indent print statements based on recursion level
278
+ def ws(i)
279
+ " " * i
280
+ end
281
+
282
+
283
+ # Converts the pb class to a hash, including its nested objects.
284
+ # @param [Object] input The pb class or any of its nested data structures
285
+ # @param [Numeric] i Level of recursion, needed only for whitespace indentation in debug output
286
+ # @return [Hash, Hash] The converted data as a hash + meta information about the one-of choices.
287
+ def pb3_to_hash(input, i = 0)
288
+ meta = {}
289
+ case input
290
+ when Google::Protobuf::Struct
291
+ result = JSON.parse input.to_json({
292
+ :preserve_proto_fieldnames => true,
293
+ :emit_defaults => true
294
+ })
295
+ when Google::Protobuf::MessageExts # it's a protobuf class
296
+ result = Hash.new
297
+ input.clone().to_h.keys.each {|key|
298
+ # 'class' is a reserved word so we cannot send() it to the pb object.
299
+ # It would give the pb definition class instead of the value of a field of such name.
300
+ if key.to_s == "class"
301
+ value = input[key]
302
+ else
303
+ value = input.send(key)
304
+ end
305
+ unless value.nil?
306
+ r, m = pb3_to_hash(value, 1 + i)
307
+ result[key.to_s] = r unless r.nil?
308
+ meta[key] = m unless m.empty?
309
+ end
310
+ }
311
+ result, m = oneof_clean(result, input, i)
312
+ meta = meta.merge(m) unless m.empty?
313
+ when ::Array
314
+ when Google::Protobuf::RepeatedField
315
+ result = []
316
+ meta = []
317
+ input.each {|value|
318
+ r, m = pb3_to_hash(value, 1 + i)
319
+ result << r unless r.nil?
320
+ meta << m unless r.nil?
321
+ }
322
+ when ::Hash
323
+ when Google::Protobuf::Map
324
+ result = {}
325
+ input.each {|key, value|
326
+ r, m = pb3_to_hash(value, 1 + i)
327
+ result[key.to_s] = r unless r.nil?
328
+ meta[key] = m unless m.empty?
329
+ }
330
+ when Symbol # is an Enum
331
+ result = input.to_s.sub(':','')
332
+ else # any other scalar
333
+ result = input
334
+ end
335
+ return result, meta
336
+ end
337
+
338
+
339
+ # For one-of options, remove the non-chosen options.
340
+ # @param [Hash] datahash The data hash including all options for each one-of field
341
+ # @param [Object] pb_obj The protobuf class from which datahash was created
342
+ # @param [Numeric] i Level of recursion, needed only for whitespace indentation in debug output
343
+ # @return [Hash, Hash] The reduced data as a hash + meta information about the one-of choices.
344
+ def oneof_clean(datahash, pb_obj, i = 0)
345
+ # If a field is part of a one-of then it must only be set if it's the selected option.
346
+ # In codec versions <= 1.2.x this was not the case. The .to_h delivered default values
347
+ # for every one-of option regardless of which one had been chosen, instead of respecting the XOR relation between them.
348
+ # The selected option's field name can be queried from input[parent_field]
349
+ # where parent_field is the name of the one-of field outside the option list.
350
+ # It's unclear though how to identify a) if a field is part of a one-of struct
351
+ # because the class of the chosen option will always be a scalar,
352
+ # and b) the name of the parent field.
353
+ # As a workaround we look up the names of the 'parent fields' for this class and then the chosen options for those.
354
+ # Then we remove the other options which weren't set by the producer.
355
+ pb_class = pb3_class_for_name(pb_obj)
356
+ meta = {}
357
+ unless pb_class.nil?
358
+ pb_class.msgclass.descriptor.each_oneof { |field|
359
+ # Find out which one-of option has been set
360
+ chosen = pb_obj.send(field.name).to_s
361
+ # Go through the options and remove the names of the non-chosen fields from the hash
362
+ # Whacky solution, better ideas are welcome.
363
+ field.each { | group_option |
364
+ if group_option.name != chosen
365
+ key = group_option.name
366
+ datahash.delete(key)
367
+ end
368
+ }
369
+ meta[field.name.to_s] = chosen
370
+ }
371
+ end # unless
372
+ return datahash, meta
373
+ end
374
+
375
+
376
+ def pb3_encode(event)
377
+ datahash = event.to_hash
378
+ is_recursive_call = !event.get('tags').nil? and event.get('tags').include? @pb3_typeconversion_tag
379
+ if is_recursive_call
380
+ datahash = pb3_remove_typeconversion_tag(datahash)
381
+ end
382
+ datahash = pb3_prepare_for_encoding(datahash)
383
+ if datahash.nil?
384
+ @logger.warn("Protobuf encoding error 4: empty data for event #{event.to_hash}")
385
+ end
386
+ if @pb_builder.nil?
387
+ @logger.warn("Protobuf encoding error 5: empty protobuf builder for class #{@class_name}")
388
+ end
389
+ pb_obj = @pb_builder.new(datahash)
390
+ @pb_builder.encode(pb_obj)
391
+ rescue ArgumentError => e
392
+ k = event.to_hash.keys.join(", ")
393
+ @logger.warn("Protobuf encoding error 1: Argument error (#{e.inspect}). Reason: probably mismatching protobuf definition. \
394
+ Required fields in the protobuf definition are: #{k} and fields must not begin with @ sign. The event has been discarded.")
395
+ nil
396
+ rescue TypeError => e
397
+ pb3_handle_type_errors(event, e, is_recursive_call, datahash)
398
+ nil
399
+ rescue => e
400
+ @logger.warn("Protobuf encoding error 3: #{e.inspect}. Event discarded. Input data: #{datahash}. The event has been discarded. Backtrace: #{e.backtrace}")
401
+ nil
402
+ end
403
+
404
+
405
+ def pb3_handle_type_errors(event, e, is_recursive_call, datahash)
406
+ begin
407
+ if is_recursive_call
408
+ @logger.warn("Protobuf encoding error 2.1: Type error (#{e.inspect}). Some types could not be converted. The event has been discarded. Type mismatches: #{mismatches}.")
409
+ else
410
+ if @pb3_encoder_autoconvert_types
411
+ msg = "Protobuf encoding error 2.2: Type error (#{e.inspect}). Will try to convert the data types. Original data: #{datahash}"
412
+ @logger.warn(msg)
413
+ mismatches = pb3_get_type_mismatches(datahash, "", @class_name)
414
+
415
+ event = pb3_convert_mismatched_types(event, mismatches)
416
+ # Add a (temporary) tag to handle the recursion stop
417
+ pb3_add_tag(event, @pb3_typeconversion_tag )
418
+ pb3_encode(event)
419
+ else
420
+ @logger.warn("Protobuf encoding error 2.3: Type error (#{e.inspect}). The event has been discarded. Try setting pb3_encoder_autoconvert_types => true for automatic type conversion.")
421
+ end
422
+ end
423
+ rescue TypeError => e
424
+ if @pb3_encoder_autoconvert_types
425
+ @logger.warn("Protobuf encoding error 2.4.1: (#{e.inspect}). Failed to convert data types. The event has been discarded. original data: #{datahash}")
426
+ else
427
+ @logger.warn("Protobuf encoding error 2.4.2: (#{e.inspect}). The event has been discarded.")
428
+ end
429
+ if @stop_on_error
430
+ raise e
431
+ end
432
+ nil
433
+ rescue => ex
434
+ @logger.warn("Protobuf encoding error 2.5: (#{e.inspect}). The event has been discarded. Auto-typecasting was on: #{@pb3_encoder_autoconvert_types}")
435
+ if @stop_on_error
436
+ raise ex
437
+ end
438
+ nil
439
+ end
440
+ end # pb3_handle_type_errors
441
+
442
+
443
+ def pb3_get_type_mismatches(data, key_prefix, pb_class)
444
+ mismatches = []
445
+ data.to_h.each do |key, value|
446
+ expected_type = pb3_get_expected_type(key, pb_class)
447
+ r = pb3_compare_datatypes(value, key, key_prefix, pb_class, expected_type)
448
+ mismatches.concat(r)
449
+ end # data.each
450
+ mismatches
451
+ end
452
+
453
+
454
+ def pb3_get_expected_type(key, pb_class)
455
+ pb_descriptor = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class)
456
+ if !pb_descriptor.nil?
457
+ pb_builder = pb_descriptor.msgclass
458
+ pb_obj = pb_builder.new({})
459
+ v = pb_obj.send(key)
460
+ if !v.nil?
461
+ v.class
462
+ else
463
+ nil
464
+ end
465
+ end
466
+ end
467
+
468
+
469
+ def pb3_compare_datatypes(value, key, key_prefix, pb_class, expected_type)
470
+ mismatches = []
471
+ if value.nil?
472
+ is_mismatch = false
473
+ else
474
+ case value
475
+ when ::Hash, Google::Protobuf::MessageExts
476
+ is_mismatch = false
477
+ descriptor = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class).lookup(key)
478
+ if !descriptor.subtype.nil?
479
+ class_of_nested_object = pb3_get_descriptorpool_name(descriptor.subtype.msgclass)
480
+ new_prefix = "#{key}."
481
+ recursive_mismatches = pb3_get_type_mismatches(value, new_prefix, class_of_nested_object)
482
+ mismatches.concat(recursive_mismatches)
483
+ end
484
+ when ::Array
485
+ expected_type = pb3_get_expected_type(key, pb_class)
486
+ is_mismatch = (expected_type != Google::Protobuf::RepeatedField)
487
+ child_type = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class).lookup(key).type
488
+ value.each_with_index do | v, i |
489
+ new_prefix = "#{key}."
490
+ recursive_mismatches = pb3_compare_datatypes(v, i.to_s, new_prefix, pb_class, child_type)
491
+ mismatches.concat(recursive_mismatches)
492
+ is_mismatch |= recursive_mismatches.any?
493
+ end # do
494
+ else # is scalar data type
495
+
496
+ is_mismatch = ! pb3_is_scalar_datatype_match(expected_type, value.class)
497
+ end # if
498
+ end # if value.nil?
499
+
500
+ if is_mismatch
501
+ mismatches << {"key" => "#{key_prefix}#{key}", "actual_type" => value.class, "expected_type" => expected_type, "value" => value}
502
+ end
503
+ mismatches
504
+ end
505
+
506
+ def pb3_remove_typeconversion_tag(data)
507
+ # remove the tag that we added to the event because
508
+ # the protobuf definition might not have a field for tags
509
+ data['tags'].delete(@pb3_typeconversion_tag)
510
+ if data['tags'].length == 0
511
+ data.delete('tags')
512
+ end
513
+ data
514
+ end
515
+
516
+ def pb3_get_descriptorpool_name(child_class)
517
+ # make instance
518
+ inst = child_class.new
519
+ # get the lookup name for the Descriptorpool
520
+ inst.class.descriptor.name
521
+ end
522
+
523
+ def pb3_is_scalar_datatype_match(expected_type, actual_type)
524
+ if expected_type == actual_type
525
+ true
526
+ else
527
+ e = expected_type.to_s.downcase.to_sym
528
+ a = actual_type.to_s.downcase.to_sym
529
+ case e
530
+ # when :string, :integer
531
+ when :string
532
+ a == e
533
+ when :integer
534
+ a == e
535
+ when :float
536
+ a == :float || a == :integer
537
+ end
538
+ end
539
+ end
540
+
541
+
542
+ def pb3_convert_mismatched_types_getter(struct, key)
543
+ if struct.is_a? ::Hash
544
+ struct[key]
545
+ else
546
+ struct.get(key)
547
+ end
548
+ end
549
+
550
+ def pb3_convert_mismatched_types_setter(struct, key, value)
551
+ if struct.is_a? ::Hash
552
+ struct[key] = value
553
+ else
554
+ struct.set(key, value)
555
+ end
556
+ struct
557
+ end
558
+
559
+ def pb3_add_tag(event, tag )
560
+ if event.get('tags').nil?
561
+ event.set('tags', [tag])
562
+ else
563
+ existing_tags = event.get('tags')
564
+ event.set("tags", existing_tags << tag)
565
+ end
566
+ end
567
+
568
+
569
+ # Due to recursion on nested fields in the event object this method might be given an event (1st call) or a hash (2nd .. nth call)
570
+ # First call will be the event object, child objects will be hashes.
571
+ def pb3_convert_mismatched_types(struct, mismatches)
572
+ mismatches.each do | m |
573
+ key = m['key']
574
+ expected_type = m['expected_type']
575
+ actual_type = m['actual_type']
576
+ if key.include? "." # the mismatch is in a child object
577
+ levels = key.split(/\./) # key is something like http_user_agent.minor_version and needs to be splitted.
578
+ key = levels[0]
579
+ sub_levels = levels.drop(1).join(".")
580
+ new_mismatches = [{"key"=>sub_levels, "actual_type"=>m["actual_type"], "expected_type"=>m["expected_type"]}]
581
+ value = pb3_convert_mismatched_types_getter(struct, key)
582
+ new_value = pb3_convert_mismatched_types(value, new_mismatches)
583
+ struct = pb3_convert_mismatched_types_setter(struct, key, new_value )
584
+ else
585
+ value = pb3_convert_mismatched_types_getter(struct, key)
586
+ begin
587
+ case expected_type.to_s
588
+ when "Integer"
589
+ case actual_type.to_s
590
+ when "String"
591
+ new_value = value.to_i
592
+ when "Float"
593
+ if value.floor == value # convert values like 2.0 to 2, but not 2.1
594
+ new_value = value.to_i
595
+ end
596
+ end
597
+ when "String"
598
+ new_value = value.to_s
599
+ when "Float"
600
+ new_value = value.to_f
601
+ when "Boolean","TrueClass", "FalseClass"
602
+ new_value = value.to_s.downcase == "true"
603
+ end
604
+ if !new_value.nil?
605
+ struct = pb3_convert_mismatched_types_setter(struct, key, new_value )
606
+ end
607
+ rescue Exception => ex
608
+ @logger.debug("Protobuf encoding error 5: Could not convert types for protobuf encoding: #{ex}")
609
+ end
610
+ end # if key contains .
611
+ end # mismatches.each
612
+ struct
613
+ end
614
+
615
+
616
+ def pb3_prepare_for_encoding(datahash)
617
+ # 0) Remove empty fields.
618
+ datahash = datahash.select { |key, value| !value.nil? }
619
+
620
+ # Preparation: the data cannot be encoded until certain criteria are met:
621
+ # 1) remove @ signs from keys.
622
+ # 2) convert timestamps and other objects to strings
623
+ datahash = datahash.inject({}){|x,(k,v)| x[k.gsub(/@/,'').to_sym] = (should_convert_to_string?(v) ? v.to_s : v); x}
624
+
625
+ datahash.each do |key, value|
626
+ datahash[key] = pb3_prepare_for_encoding(value) if value.is_a?(Hash)
627
+ end
628
+
629
+ datahash
630
+ end
631
+
632
+
633
+ def pb2_encode(event)
634
+ data = pb2_prepare_for_encoding(event.to_hash, @class_name)
635
+ msg = @pb_builder.new(data)
636
+ msg.serialize_to_string
637
+ rescue NoMethodError => e
638
+ @logger.warn("Encoding error 2. Probably mismatching protobuf definition. Required fields in the protobuf definition are: " + event.to_hash.keys.join(", ") + " and the timestamp field name must not include a @. ")
639
+ raise e
640
+ rescue => e
641
+ @logger.warn("Encoding error 1: #{e.inspect}")
642
+ raise e
643
+ end
644
+
645
+
646
+ def pb2_prepare_for_encoding(datahash, class_name)
647
+ if datahash.is_a?(::Hash)
648
+ # Preparation: the data cannot be encoded until certain criteria are met:
649
+ # 1) remove @ signs from keys.
650
+ # 2) convert timestamps and other objects to strings
651
+ datahash = ::Hash[datahash.map{|(k,v)| [k.to_s.dup.gsub(/@/,''), (should_convert_to_string?(v) ? v.to_s : v)] }]
652
+
653
+ # Check if any of the fields in this hash are protobuf classes and if so, create a builder for them.
654
+ meta = @metainfo_messageclasses[class_name]
655
+ if meta
656
+ meta.map do | (k,c) |
657
+ if datahash.include?(k)
658
+ original_value = datahash[k]
659
+ datahash[k] =
660
+ if original_value.is_a?(::Array)
661
+ # make this field an array/list of protobuf objects
662
+ # value is a list of hashed complex objects, each of which needs to be protobuffed and
663
+ # put back into the list.
664
+ original_value.map { |x| pb2_prepare_for_encoding(x, c) }
665
+ original_value
666
+ else
667
+ proto_obj = pb2_create_instance(c)
668
+ proto_obj.new(pb2_prepare_for_encoding(original_value, c)) # this line is reached in the colourtest for an enum.
669
+ # Enums should not be instantiated. Should enums even be in the messageclasses? I dont think so!
670
+ end # if is array
671
+ end # if datahash_include
672
+ end # do
673
+ end # if meta
674
+ end
675
+ datahash
676
+ end
677
+
678
+
679
+ def should_convert_to_string?(v)
680
+ !(v.is_a?(Integer) || v.is_a?(Float) || v.is_a?(::Hash) || v.is_a?(::Array) || [true, false].include?(v))
681
+ end
682
+
683
+
684
+ def pb2_create_instance(name)
685
+ @logger.debug("Creating instance of " + name)
686
+ name.split('::').inject(Object) { |n,c| n.const_get c }
687
+ end
688
+
689
+
690
+ def pb3_metadata_analyis(filename)
691
+ regex_class_name = /\s*add_message "(?<name>.+?)" do\s+/
692
+ regex_pbdefs = /\s*(optional|repeated)(\s*):(?<name>.+),(\s*):(?<type>\w+),(\s*)(?<position>\d+)(, \"(?<enum_class>.*?)\")?/
693
+ class_name = ""
694
+ type = ""
695
+ field_name = ""
696
+ File.readlines(filename).each do |line|
697
+ if ! (line =~ regex_class_name).nil?
698
+ class_name = $1
699
+ @metainfo_messageclasses[class_name] = {}
700
+ @metainfo_enumclasses[class_name] = {}
701
+ end # if
702
+ if ! (line =~ regex_pbdefs).nil?
703
+ field_name = $1
704
+ type = $2
705
+ field_class_name = $4
706
+ if type == "message"
707
+ @metainfo_messageclasses[class_name][field_name] = field_class_name
708
+ elsif type == "enum"
709
+ @metainfo_enumclasses[class_name][field_name] = field_class_name
710
+ end
711
+ end # if
712
+ end # readlines
713
+ if class_name.nil?
714
+ @logger.error("Error 4: class name not found in file " + filename)
715
+ raise ArgumentError, "Invalid protobuf file: " + filename
716
+ end
717
+ rescue Exception => e
718
+ @logger.error("Error 3: unable to read pb definition from file " + filename+ ". Reason: #{e.inspect}. Last settings were: class #{class_name} field #{field_name} type #{type}. Backtrace: " + e.backtrace.inspect.to_s)
719
+ raise e
720
+ end
721
+
722
+
723
+ def pb2_metadata_analyis(filename)
724
+ regex_class_start = /\s*set_fully_qualified_name \"(?<name>.+)\".*?/
725
+ regex_enum_name = /\s*include ..ProtocolBuffers..Enum\s*/
726
+ regex_pbdefs = /\s*(optional|repeated)(\s*):(?<type>.+),(\s*):(?<name>\w+),(\s*)(?<position>\d+)/
727
+ # now we also need to find out which class it contains and the protobuf definitions in it.
728
+ # We'll unfortunately need that later so that we can create nested objects.
729
+
730
+ class_name = ""
731
+ type = ""
732
+ field_name = ""
733
+ is_enum_class = false
734
+
735
+ File.readlines(filename).each do |line|
736
+ if ! (line =~ regex_enum_name).nil?
737
+ is_enum_class= true
738
+ end
739
+
740
+ if ! (line =~ regex_class_start).nil?
741
+ class_name = $1.gsub('.',"::").split('::').map {|word| word.capitalize}.join('::')
742
+ if is_enum_class
743
+ @metainfo_pb2_enumlist << class_name.downcase
744
+ end
745
+ is_enum_class= false # reset when next class starts
746
+ end
747
+ if ! (line =~ regex_pbdefs).nil?
748
+ type = $1
749
+ field_name = $2
750
+ if type =~ /::/
751
+ clean_type = type.gsub(/^:/,"")
752
+ e = @metainfo_pb2_enumlist.include? clean_type.downcase
753
+
754
+ if e
755
+ if not @metainfo_enumclasses.key? class_name
756
+ @metainfo_enumclasses[class_name] = {}
757
+ end
758
+ @metainfo_enumclasses[class_name][field_name] = clean_type
759
+ else
760
+ if not @metainfo_messageclasses.key? class_name
761
+ @metainfo_messageclasses[class_name] = {}
762
+ end
763
+ @metainfo_messageclasses[class_name][field_name] = clean_type
764
+ end
765
+ end
766
+ end
767
+ end
768
+ if class_name.nil?
769
+ @logger.warn("Error 4: class name not found in file " + filename)
770
+ raise ArgumentError, "Invalid protobuf file: " + filename
771
+ end
772
+ rescue LoadError => e
773
+ raise ArgumentError.new("Could not load file: " + filename + ". Please try to use absolute pathes. Current working dir: " + Dir.pwd + ", loadpath: " + $LOAD_PATH.join(" "))
774
+ rescue => e
775
+
776
+ @logger.warn("Error 3: unable to read pb definition from file " + filename+ ". Reason: #{e.inspect}. Last settings were: class #{class_name} field #{field_name} type #{type}. Backtrace: " + e.backtrace.inspect.to_s)
777
+ raise e
778
+ end
779
+
780
+
781
+ def load_protobuf_definition(filename)
782
+ if filename.end_with? ('.rb')
783
+ # Add to the loading path of the protobuf definitions
784
+ if (Pathname.new filename).absolute?
785
+ begin
786
+ require filename
787
+ rescue Exception => e
788
+ @logger.error("Unable to load file: #{filename}. Reason: #{e.inspect}")
789
+ raise e
790
+ end
791
+ end
792
+
793
+ if @protobuf_version == 3
794
+ pb3_metadata_analyis(filename)
795
+ else
796
+ pb2_metadata_analyis(filename)
797
+ end
798
+
799
+ else
800
+ @logger.warn("Not a ruby file: " + filename)
801
+ end
802
+ end
803
+
804
+ end # class LogStash::Codecs::Protobuf