logstash-codec-protobuf 1.3.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +56 -0
  3. data/CONTRIBUTORS +12 -0
  4. data/DEVELOPER.md +2 -0
  5. data/Gemfile +11 -0
  6. data/LICENSE +202 -0
  7. data/NOTICE.TXT +4 -0
  8. data/README.md +184 -0
  9. data/docs/index.asciidoc +241 -0
  10. data/google-protobuf-lib-update.md +57 -0
  11. data/lib/logstash/codecs/protobuf.rb +804 -0
  12. data/logstash-codec-protobuf.gemspec +33 -0
  13. data/spec/codecs/pb2_spec.rb +236 -0
  14. data/spec/codecs/pb3_decode_spec.rb +665 -0
  15. data/spec/codecs/pb3_encode_spec.rb +243 -0
  16. data/spec/helpers/pb2/ColourTestcase.pb.rb +35 -0
  17. data/spec/helpers/pb2/ColourTestcase.proto +24 -0
  18. data/spec/helpers/pb2/event.pb.rb +19 -0
  19. data/spec/helpers/pb2/event.proto +12 -0
  20. data/spec/helpers/pb2/header/header.pb.rb +16 -0
  21. data/spec/helpers/pb2/header/header.proto +8 -0
  22. data/spec/helpers/pb2/human.pb.rb +26 -0
  23. data/spec/helpers/pb2/unicorn.pb.rb +19 -0
  24. data/spec/helpers/pb2/unicorn_event.pb.rb +24 -0
  25. data/spec/helpers/pb3/FantasyHorse_pb.rb +48 -0
  26. data/spec/helpers/pb3/PhoneDirectory_pb.rb +37 -0
  27. data/spec/helpers/pb3/ProbeResult_pb.rb +26 -0
  28. data/spec/helpers/pb3/ResultListComposerRequest_pb.rb +25 -0
  29. data/spec/helpers/pb3/dnsmessage_pb.rb +82 -0
  30. data/spec/helpers/pb3/events_pb.rb +17 -0
  31. data/spec/helpers/pb3/header/header.proto3 +7 -0
  32. data/spec/helpers/pb3/header/header_pb.rb +12 -0
  33. data/spec/helpers/pb3/integertest_pb.rb +18 -0
  34. data/spec/helpers/pb3/messageA_pb.rb +16 -0
  35. data/spec/helpers/pb3/messageB_pb.rb +15 -0
  36. data/spec/helpers/pb3/rum2_pb.rb +87 -0
  37. data/spec/helpers/pb3/rum3_pb.rb +87 -0
  38. data/spec/helpers/pb3/rum_pb.rb +87 -0
  39. data/spec/helpers/pb3/struct_test_pb.rb +21 -0
  40. data/spec/helpers/pb3/unicorn_pb.rb +31 -0
  41. metadata +175 -0
@@ -0,0 +1,804 @@
1
+ # encoding: utf-8
2
+ require 'logstash/codecs/base'
3
+ require 'logstash/util/charset'
4
+ require 'google/protobuf' # for protobuf3
5
+ require 'google/protobuf/struct_pb'
6
+ require 'protocol_buffers' # https://github.com/codekitchen/ruby-protocol-buffers, for protobuf2
7
+
8
+
9
+ # Monkey-patch the `Google::Protobuf::DescriptorPool` with a mutex for exclusive
10
+ # access.
11
+ #
12
+ # The DescriptorPool instance is not thread-safe when loading protobuf
13
+ # definitions. This can cause unrecoverable errors when registering multiple
14
+ # concurrent pipelines that try to register the same dependency. The
15
+ # DescriptorPool instance is global to the JVM and shared among all pipelines.
16
+ class << Google::Protobuf::DescriptorPool
17
+ def with_lock
18
+ if !@mutex
19
+ @mutex = Mutex.new
20
+ end
21
+
22
+ return @mutex
23
+ end
24
+ end
25
+
26
+ # This codec converts protobuf encoded messages into logstash events and vice versa.
27
+ #
28
+ # Requires the protobuf definitions as ruby files. You can create those using the [ruby-protoc compiler](https://github.com/codekitchen/ruby-protocol-buffers).
29
+ #
30
+ # The following shows a usage example for decoding protobuf 2 encoded events from a kafka stream:
31
+ # [source,ruby]
32
+ # kafka
33
+ # {
34
+ # zk_connect => "127.0.0.1"
35
+ # topic_id => "your_topic_goes_here"
36
+ # key_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
37
+ # value_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
38
+ # codec => protobuf
39
+ # {
40
+ # class_name => "Animal::Unicorn"
41
+ # include_path => ['/path/to/protobuf/definitions/UnicornProtobuf.pb.rb']
42
+ # }
43
+ # }
44
+ #
45
+ # Same example for protobuf 3:
46
+ # [source,ruby]
47
+ # kafka
48
+ # {
49
+ # zk_connect => "127.0.0.1"
50
+ # topic_id => "your_topic_goes_here"
51
+ # key_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
52
+ # value_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
53
+ # codec => protobuf
54
+ # {
55
+ # class_name => "Animal.Unicorn"
56
+ # include_path => ['/path/to/protobuf/definitions/UnicornProtobuf_pb.rb']
57
+ # protobuf_version => 3
58
+ # }
59
+ # }
60
+ #
61
+ # Specifically for the kafka input: please set the deserializer classes as shown above.
62
+
63
+ class LogStash::Codecs::Protobuf < LogStash::Codecs::Base
64
+ config_name 'protobuf'
65
+
66
+ # Name of the class to decode.
67
+ # If your protobuf 2 definition contains modules, prepend them to the class name with double colons like so:
68
+ # [source,ruby]
69
+ # class_name => "Animal::Horse::Unicorn"
70
+ #
71
+ # This corresponds to a protobuf definition starting as follows:
72
+ # [source,ruby]
73
+ # module Animal
74
+ # module Horse
75
+ # class Unicorn
76
+ # # here are your field definitions.
77
+ #
78
+ # For protobuf 3 separate the modules with single dots.
79
+ # [source,ruby]
80
+ # class_name => "Animal.Horse.Unicorn"
81
+ # Check the bottom of the generated protobuf ruby file. It contains lines like this:
82
+ # [source,ruby]
83
+ # Animals.Unicorn = Google::Protobuf::DescriptorPool.generated_pool.lookup("Animals.Unicorn").msgclass
84
+ # Use the parameter for the lookup call as the class_name for the codec config.
85
+ #
86
+ # If your class references other definitions: you only have to add the main class here.
87
+ config :class_name, :validate => :string, :required => true
88
+
89
+ # Relative path to the ruby file that contains class_name
90
+ #
91
+ # Relative path (from `protobuf_root_directory`) that holds the definition of the class specified in
92
+ # `class_name`.
93
+ #
94
+ # `class_file` and `include_path` cannot be used at the same time.
95
+ config :class_file, :validate => :string, :default => '', :required => false
96
+
97
+ # Absolute path to the root directory that contains all referenced/used dependencies
98
+ # of the main class (`class_name`) or any of its dependencies.
99
+ #
100
+ # For instance:
101
+ #
102
+ # pb3
103
+ # ├── header
104
+ # │ └── header_pb.rb
105
+ # ├── messageA_pb.rb
106
+ #
107
+ # In this case `messageA_pb.rb` has an embedded message from `header/header_pb.rb`.
108
+ # If `class_file` is set to `messageA_pb.rb`, and `class_name` to
109
+ # `MessageA`, `protobuf_root_directory` must be set to `/path/to/pb3`, which includes
110
+ # both definitions.
111
+ config :protobuf_root_directory, :validate => :string, :required => false
112
+
113
+ # List of absolute pathes to files with protobuf definitions.
114
+ # When using more than one file, make sure to arrange the files in reverse order of dependency so that each class is loaded before it is
115
+ # refered to by another.
116
+ #
117
+ # Example: a class _Unicorn_ referencing another protobuf class _Wings_
118
+ # [source,ruby]
119
+ # module Animal
120
+ # module Horse
121
+ # class Unicorn
122
+ # set_fully_qualified_name "Animal.Horse.Unicorn"
123
+ # optional ::Animal::Bodypart::Wings, :wings, 1
124
+ # optional :string, :name, 2
125
+ # # here be more field definitions
126
+ #
127
+ # would be configured as
128
+ # [source,ruby]
129
+ # include_path => ['/path/to/protobuf/definitions/Wings.pb.rb','/path/to/protobuf/definitions/Unicorn.pb.rb']
130
+ #
131
+ # `class_file` and `include_path` cannot be used at the same time.
132
+ config :include_path, :validate => :array, :default => [], :required => false
133
+
134
+ # Protocol buffer version switch. Defaults to version 2. Please note that the behaviour for enums varies between the versions.
135
+ # For protobuf 2 you will get integer representations for enums, for protobuf 3 you'll get string representations due to a different converter library.
136
+ # Recommendation: use the translate plugin to restore previous behaviour when upgrading.
137
+ config :protobuf_version, :validate => [2,3], :default => 2, :required => true
138
+
139
+ # To tolerate faulty messages that cannot be en/decoded, set this to false. Otherwise the pipeline will stop upon encountering a non decipherable message.
140
+ config :stop_on_error, :validate => :boolean, :default => false, :required => false
141
+
142
+ # Instruct the encoder to attempt converting data types to match the protobuf definitions. Available only for protobuf version 3.
143
+ config :pb3_encoder_autoconvert_types, :validate => :boolean, :default => true, :required => false
144
+
145
+ # Add meta information to `[@metadata][pb_oneof]` about which classes were chosen for [oneof](https://developers.google.com/protocol-buffers/docs/proto3#oneof) fields.
146
+ # Example values: for the protobuf definition
147
+ # ``` oneof :horse_type do
148
+ # optional :unicorn, :message, 2, "FantasyUnicorn"
149
+ # optional :pegasus, :message, 3, "FantasyPegasus"
150
+ # end
151
+ # ```
152
+ # the field `[@metadata][pb_oneof][horse_type]` will be set to either `pegasus` or `unicorn`.
153
+ # Available only for protobuf version 3.
154
+ config :pb3_set_oneof_metainfo, :validate => :boolean, :default => false, :required => false
155
+
156
+
157
+ attr_reader :execution_context
158
+
159
+ # id of the pipeline whose events you want to read from.
160
+ def pipeline_id
161
+ respond_to?(:execution_context) && !execution_context.nil? ? execution_context.pipeline_id : "main"
162
+ end
163
+
164
+ def register
165
+ @metainfo_messageclasses = {}
166
+ @metainfo_enumclasses = {}
167
+ @metainfo_pb2_enumlist = []
168
+ @pb3_typeconversion_tag = "_protobuf_type_converted"
169
+
170
+ if @include_path.length > 0 and not class_file.strip.empty?
171
+ raise LogStash::ConfigurationError, "Cannot use `include_path` and `class_file` at the same time"
172
+ end
173
+
174
+ if @include_path.length == 0 and class_file.strip.empty?
175
+ raise LogStash::ConfigurationError, "Need to specify `include_path` or `class_file`"
176
+ end
177
+
178
+ should_register = Google::Protobuf::DescriptorPool.generated_pool.lookup(class_name).nil?
179
+
180
+ unless @protobuf_root_directory.nil? or @protobuf_root_directory.strip.empty?
181
+ if !$LOAD_PATH.include? @protobuf_root_directory and should_register
182
+ $LOAD_PATH.unshift(@protobuf_root_directory)
183
+ end
184
+ end
185
+
186
+ @class_file = "#{@protobuf_root_directory}/#{@class_file}" unless (Pathname.new @class_file).absolute? or @class_file.empty?
187
+ # exclusive access while loading protobuf definitions
188
+ Google::Protobuf::DescriptorPool.with_lock.synchronize do
189
+ # load from `class_file`
190
+ load_protobuf_definition(@class_file) if should_register and !@class_file.empty?
191
+ # load from `include_path`
192
+ include_path.each { |path| load_protobuf_definition(path) } if include_path.length > 0 and should_register
193
+ if @protobuf_version == 3
194
+ @pb_builder = Google::Protobuf::DescriptorPool.generated_pool.lookup(class_name).msgclass
195
+ else
196
+ @pb_builder = pb2_create_instance(class_name)
197
+ end
198
+ end
199
+ end
200
+
201
+ # Pipelines using this plugin cannot be reloaded.
202
+ # https://github.com/elastic/logstash/pull/6499
203
+ #
204
+ # The DescriptorPool instance registers the protobuf classes (and
205
+ # dependencies) as global objects. This makes it very difficult to reload a
206
+ # pipeline, because `class_name` and all of its dependencies are already
207
+ # registered.
208
+ def reloadable?
209
+ return false
210
+ end
211
+
212
+ def decode(data)
213
+ if @protobuf_version == 3
214
+ decoded = @pb_builder.decode(data.to_s)
215
+ hashed, meta = pb3_to_hash(decoded)
216
+ else # version = 2
217
+ decoded = @pb_builder.parse(data.to_s)
218
+ hashed = decoded.to_hash
219
+ end
220
+ e = LogStash::Event.new(hashed)
221
+ if @protobuf_version == 3 and @pb3_set_oneof_metainfo
222
+ e.set("[@metadata][pb_oneof]", meta)
223
+ end
224
+ yield e if block_given?
225
+ rescue => ex
226
+ @logger.warn("Couldn't decode protobuf: #{ex.inspect}")
227
+ if @stop_on_error
228
+ raise ex
229
+ else # keep original message so that the user can debug it.
230
+ yield LogStash::Event.new(
231
+ "message" => data, "tags" => ["_protobufdecodefailure"],
232
+ "decoder_exception" => "#{ex.inspect}")
233
+ end
234
+ end # def decode
235
+
236
+
237
+ def encode(event)
238
+ if @protobuf_version == 3
239
+ protobytes = pb3_encode(event)
240
+ else
241
+ protobytes = pb2_encode(event)
242
+ end
243
+ unless protobytes.nil? or protobytes.empty?
244
+ @on_event.call(event, protobytes)
245
+ end
246
+ end # def encode
247
+
248
+
249
+ # Get the builder class for any given protobuf object from the descriptor pool
250
+ # Exposed for testing
251
+ # @param [Object] pb_obj The pb object instance to do the lookup for
252
+ # @return [Object] The pb builder class
253
+ def pb3_class_for_name(pb_obj)
254
+ Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_obj.class.descriptor.name)
255
+ end
256
+
257
+ private
258
+
259
+ # Helper function for debugging: print data types for fields of a hash
260
+ def print_types(hashy, i = 0)
261
+ hashy.each do |key, value|
262
+ puts ws(i) + "#{key} " + value.class.name
263
+ if value.is_a? ::Hash
264
+ print_types(value, i + 1)
265
+ end
266
+ if value.is_a? ::Array
267
+ value.each do |v|
268
+ puts ws(i + 1) + "" + v.class.name
269
+ if v.is_a? ::Hash
270
+ print_types(v, i + 2)
271
+ end
272
+ end
273
+ end
274
+ end
275
+ end
276
+
277
+ # Helper function for debugging: indent print statements based on recursion level
278
+ def ws(i)
279
+ " " * i
280
+ end
281
+
282
+
283
+ # Converts the pb class to a hash, including its nested objects.
284
+ # @param [Object] input The pb class or any of its nested data structures
285
+ # @param [Numeric] i Level of recursion, needed only for whitespace indentation in debug output
286
+ # @return [Hash, Hash] The converted data as a hash + meta information about the one-of choices.
287
+ def pb3_to_hash(input, i = 0)
288
+ meta = {}
289
+ case input
290
+ when Google::Protobuf::Struct
291
+ result = JSON.parse input.to_json({
292
+ :preserve_proto_fieldnames => true,
293
+ :emit_defaults => true
294
+ })
295
+ when Google::Protobuf::MessageExts # it's a protobuf class
296
+ result = Hash.new
297
+ input.clone().to_h.keys.each {|key|
298
+ # 'class' is a reserved word so we cannot send() it to the pb object.
299
+ # It would give the pb definition class instead of the value of a field of such name.
300
+ if key.to_s == "class"
301
+ value = input[key]
302
+ else
303
+ value = input.send(key)
304
+ end
305
+ unless value.nil?
306
+ r, m = pb3_to_hash(value, 1 + i)
307
+ result[key.to_s] = r unless r.nil?
308
+ meta[key] = m unless m.empty?
309
+ end
310
+ }
311
+ result, m = oneof_clean(result, input, i)
312
+ meta = meta.merge(m) unless m.empty?
313
+ when ::Array
314
+ when Google::Protobuf::RepeatedField
315
+ result = []
316
+ meta = []
317
+ input.each {|value|
318
+ r, m = pb3_to_hash(value, 1 + i)
319
+ result << r unless r.nil?
320
+ meta << m unless r.nil?
321
+ }
322
+ when ::Hash
323
+ when Google::Protobuf::Map
324
+ result = {}
325
+ input.each {|key, value|
326
+ r, m = pb3_to_hash(value, 1 + i)
327
+ result[key.to_s] = r unless r.nil?
328
+ meta[key] = m unless m.empty?
329
+ }
330
+ when Symbol # is an Enum
331
+ result = input.to_s.sub(':','')
332
+ else # any other scalar
333
+ result = input
334
+ end
335
+ return result, meta
336
+ end
337
+
338
+
339
+ # For one-of options, remove the non-chosen options.
340
+ # @param [Hash] datahash The data hash including all options for each one-of field
341
+ # @param [Object] pb_obj The protobuf class from which datahash was created
342
+ # @param [Numeric] i Level of recursion, needed only for whitespace indentation in debug output
343
+ # @return [Hash, Hash] The reduced data as a hash + meta information about the one-of choices.
344
+ def oneof_clean(datahash, pb_obj, i = 0)
345
+ # If a field is part of a one-of then it must only be set if it's the selected option.
346
+ # In codec versions <= 1.2.x this was not the case. The .to_h delivered default values
347
+ # for every one-of option regardless of which one had been chosen, instead of respecting the XOR relation between them.
348
+ # The selected option's field name can be queried from input[parent_field]
349
+ # where parent_field is the name of the one-of field outside the option list.
350
+ # It's unclear though how to identify a) if a field is part of a one-of struct
351
+ # because the class of the chosen option will always be a scalar,
352
+ # and b) the name of the parent field.
353
+ # As a workaround we look up the names of the 'parent fields' for this class and then the chosen options for those.
354
+ # Then we remove the other options which weren't set by the producer.
355
+ pb_class = pb3_class_for_name(pb_obj)
356
+ meta = {}
357
+ unless pb_class.nil?
358
+ pb_class.msgclass.descriptor.each_oneof { |field|
359
+ # Find out which one-of option has been set
360
+ chosen = pb_obj.send(field.name).to_s
361
+ # Go through the options and remove the names of the non-chosen fields from the hash
362
+ # Whacky solution, better ideas are welcome.
363
+ field.each { | group_option |
364
+ if group_option.name != chosen
365
+ key = group_option.name
366
+ datahash.delete(key)
367
+ end
368
+ }
369
+ meta[field.name.to_s] = chosen
370
+ }
371
+ end # unless
372
+ return datahash, meta
373
+ end
374
+
375
+
376
+ def pb3_encode(event)
377
+ datahash = event.to_hash
378
+ is_recursive_call = !event.get('tags').nil? and event.get('tags').include? @pb3_typeconversion_tag
379
+ if is_recursive_call
380
+ datahash = pb3_remove_typeconversion_tag(datahash)
381
+ end
382
+ datahash = pb3_prepare_for_encoding(datahash)
383
+ if datahash.nil?
384
+ @logger.warn("Protobuf encoding error 4: empty data for event #{event.to_hash}")
385
+ end
386
+ if @pb_builder.nil?
387
+ @logger.warn("Protobuf encoding error 5: empty protobuf builder for class #{@class_name}")
388
+ end
389
+ pb_obj = @pb_builder.new(datahash)
390
+ @pb_builder.encode(pb_obj)
391
+ rescue ArgumentError => e
392
+ k = event.to_hash.keys.join(", ")
393
+ @logger.warn("Protobuf encoding error 1: Argument error (#{e.inspect}). Reason: probably mismatching protobuf definition. \
394
+ Required fields in the protobuf definition are: #{k} and fields must not begin with @ sign. The event has been discarded.")
395
+ nil
396
+ rescue TypeError => e
397
+ pb3_handle_type_errors(event, e, is_recursive_call, datahash)
398
+ nil
399
+ rescue => e
400
+ @logger.warn("Protobuf encoding error 3: #{e.inspect}. Event discarded. Input data: #{datahash}. The event has been discarded. Backtrace: #{e.backtrace}")
401
+ nil
402
+ end
403
+
404
+
405
+ def pb3_handle_type_errors(event, e, is_recursive_call, datahash)
406
+ begin
407
+ if is_recursive_call
408
+ @logger.warn("Protobuf encoding error 2.1: Type error (#{e.inspect}). Some types could not be converted. The event has been discarded. Type mismatches: #{mismatches}.")
409
+ else
410
+ if @pb3_encoder_autoconvert_types
411
+ msg = "Protobuf encoding error 2.2: Type error (#{e.inspect}). Will try to convert the data types. Original data: #{datahash}"
412
+ @logger.warn(msg)
413
+ mismatches = pb3_get_type_mismatches(datahash, "", @class_name)
414
+
415
+ event = pb3_convert_mismatched_types(event, mismatches)
416
+ # Add a (temporary) tag to handle the recursion stop
417
+ pb3_add_tag(event, @pb3_typeconversion_tag )
418
+ pb3_encode(event)
419
+ else
420
+ @logger.warn("Protobuf encoding error 2.3: Type error (#{e.inspect}). The event has been discarded. Try setting pb3_encoder_autoconvert_types => true for automatic type conversion.")
421
+ end
422
+ end
423
+ rescue TypeError => e
424
+ if @pb3_encoder_autoconvert_types
425
+ @logger.warn("Protobuf encoding error 2.4.1: (#{e.inspect}). Failed to convert data types. The event has been discarded. original data: #{datahash}")
426
+ else
427
+ @logger.warn("Protobuf encoding error 2.4.2: (#{e.inspect}). The event has been discarded.")
428
+ end
429
+ if @stop_on_error
430
+ raise e
431
+ end
432
+ nil
433
+ rescue => ex
434
+ @logger.warn("Protobuf encoding error 2.5: (#{e.inspect}). The event has been discarded. Auto-typecasting was on: #{@pb3_encoder_autoconvert_types}")
435
+ if @stop_on_error
436
+ raise ex
437
+ end
438
+ nil
439
+ end
440
+ end # pb3_handle_type_errors
441
+
442
+
443
+ def pb3_get_type_mismatches(data, key_prefix, pb_class)
444
+ mismatches = []
445
+ data.to_h.each do |key, value|
446
+ expected_type = pb3_get_expected_type(key, pb_class)
447
+ r = pb3_compare_datatypes(value, key, key_prefix, pb_class, expected_type)
448
+ mismatches.concat(r)
449
+ end # data.each
450
+ mismatches
451
+ end
452
+
453
+
454
+ def pb3_get_expected_type(key, pb_class)
455
+ pb_descriptor = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class)
456
+ if !pb_descriptor.nil?
457
+ pb_builder = pb_descriptor.msgclass
458
+ pb_obj = pb_builder.new({})
459
+ v = pb_obj.send(key)
460
+ if !v.nil?
461
+ v.class
462
+ else
463
+ nil
464
+ end
465
+ end
466
+ end
467
+
468
+
469
+ def pb3_compare_datatypes(value, key, key_prefix, pb_class, expected_type)
470
+ mismatches = []
471
+ if value.nil?
472
+ is_mismatch = false
473
+ else
474
+ case value
475
+ when ::Hash, Google::Protobuf::MessageExts
476
+ is_mismatch = false
477
+ descriptor = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class).lookup(key)
478
+ if !descriptor.subtype.nil?
479
+ class_of_nested_object = pb3_get_descriptorpool_name(descriptor.subtype.msgclass)
480
+ new_prefix = "#{key}."
481
+ recursive_mismatches = pb3_get_type_mismatches(value, new_prefix, class_of_nested_object)
482
+ mismatches.concat(recursive_mismatches)
483
+ end
484
+ when ::Array
485
+ expected_type = pb3_get_expected_type(key, pb_class)
486
+ is_mismatch = (expected_type != Google::Protobuf::RepeatedField)
487
+ child_type = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class).lookup(key).type
488
+ value.each_with_index do | v, i |
489
+ new_prefix = "#{key}."
490
+ recursive_mismatches = pb3_compare_datatypes(v, i.to_s, new_prefix, pb_class, child_type)
491
+ mismatches.concat(recursive_mismatches)
492
+ is_mismatch |= recursive_mismatches.any?
493
+ end # do
494
+ else # is scalar data type
495
+
496
+ is_mismatch = ! pb3_is_scalar_datatype_match(expected_type, value.class)
497
+ end # if
498
+ end # if value.nil?
499
+
500
+ if is_mismatch
501
+ mismatches << {"key" => "#{key_prefix}#{key}", "actual_type" => value.class, "expected_type" => expected_type, "value" => value}
502
+ end
503
+ mismatches
504
+ end
505
+
506
+ def pb3_remove_typeconversion_tag(data)
507
+ # remove the tag that we added to the event because
508
+ # the protobuf definition might not have a field for tags
509
+ data['tags'].delete(@pb3_typeconversion_tag)
510
+ if data['tags'].length == 0
511
+ data.delete('tags')
512
+ end
513
+ data
514
+ end
515
+
516
+ def pb3_get_descriptorpool_name(child_class)
517
+ # make instance
518
+ inst = child_class.new
519
+ # get the lookup name for the Descriptorpool
520
+ inst.class.descriptor.name
521
+ end
522
+
523
+ def pb3_is_scalar_datatype_match(expected_type, actual_type)
524
+ if expected_type == actual_type
525
+ true
526
+ else
527
+ e = expected_type.to_s.downcase.to_sym
528
+ a = actual_type.to_s.downcase.to_sym
529
+ case e
530
+ # when :string, :integer
531
+ when :string
532
+ a == e
533
+ when :integer
534
+ a == e
535
+ when :float
536
+ a == :float || a == :integer
537
+ end
538
+ end
539
+ end
540
+
541
+
542
+ def pb3_convert_mismatched_types_getter(struct, key)
543
+ if struct.is_a? ::Hash
544
+ struct[key]
545
+ else
546
+ struct.get(key)
547
+ end
548
+ end
549
+
550
+ def pb3_convert_mismatched_types_setter(struct, key, value)
551
+ if struct.is_a? ::Hash
552
+ struct[key] = value
553
+ else
554
+ struct.set(key, value)
555
+ end
556
+ struct
557
+ end
558
+
559
+ def pb3_add_tag(event, tag )
560
+ if event.get('tags').nil?
561
+ event.set('tags', [tag])
562
+ else
563
+ existing_tags = event.get('tags')
564
+ event.set("tags", existing_tags << tag)
565
+ end
566
+ end
567
+
568
+
569
+ # Due to recursion on nested fields in the event object this method might be given an event (1st call) or a hash (2nd .. nth call)
570
+ # First call will be the event object, child objects will be hashes.
571
+ def pb3_convert_mismatched_types(struct, mismatches)
572
+ mismatches.each do | m |
573
+ key = m['key']
574
+ expected_type = m['expected_type']
575
+ actual_type = m['actual_type']
576
+ if key.include? "." # the mismatch is in a child object
577
+ levels = key.split(/\./) # key is something like http_user_agent.minor_version and needs to be splitted.
578
+ key = levels[0]
579
+ sub_levels = levels.drop(1).join(".")
580
+ new_mismatches = [{"key"=>sub_levels, "actual_type"=>m["actual_type"], "expected_type"=>m["expected_type"]}]
581
+ value = pb3_convert_mismatched_types_getter(struct, key)
582
+ new_value = pb3_convert_mismatched_types(value, new_mismatches)
583
+ struct = pb3_convert_mismatched_types_setter(struct, key, new_value )
584
+ else
585
+ value = pb3_convert_mismatched_types_getter(struct, key)
586
+ begin
587
+ case expected_type.to_s
588
+ when "Integer"
589
+ case actual_type.to_s
590
+ when "String"
591
+ new_value = value.to_i
592
+ when "Float"
593
+ if value.floor == value # convert values like 2.0 to 2, but not 2.1
594
+ new_value = value.to_i
595
+ end
596
+ end
597
+ when "String"
598
+ new_value = value.to_s
599
+ when "Float"
600
+ new_value = value.to_f
601
+ when "Boolean","TrueClass", "FalseClass"
602
+ new_value = value.to_s.downcase == "true"
603
+ end
604
+ if !new_value.nil?
605
+ struct = pb3_convert_mismatched_types_setter(struct, key, new_value )
606
+ end
607
+ rescue Exception => ex
608
+ @logger.debug("Protobuf encoding error 5: Could not convert types for protobuf encoding: #{ex}")
609
+ end
610
+ end # if key contains .
611
+ end # mismatches.each
612
+ struct
613
+ end
614
+
615
+
616
+ def pb3_prepare_for_encoding(datahash)
617
+ # 0) Remove empty fields.
618
+ datahash = datahash.select { |key, value| !value.nil? }
619
+
620
+ # Preparation: the data cannot be encoded until certain criteria are met:
621
+ # 1) remove @ signs from keys.
622
+ # 2) convert timestamps and other objects to strings
623
+ datahash = datahash.inject({}){|x,(k,v)| x[k.gsub(/@/,'').to_sym] = (should_convert_to_string?(v) ? v.to_s : v); x}
624
+
625
+ datahash.each do |key, value|
626
+ datahash[key] = pb3_prepare_for_encoding(value) if value.is_a?(Hash)
627
+ end
628
+
629
+ datahash
630
+ end
631
+
632
+
633
+ def pb2_encode(event)
634
+ data = pb2_prepare_for_encoding(event.to_hash, @class_name)
635
+ msg = @pb_builder.new(data)
636
+ msg.serialize_to_string
637
+ rescue NoMethodError => e
638
+ @logger.warn("Encoding error 2. Probably mismatching protobuf definition. Required fields in the protobuf definition are: " + event.to_hash.keys.join(", ") + " and the timestamp field name must not include a @. ")
639
+ raise e
640
+ rescue => e
641
+ @logger.warn("Encoding error 1: #{e.inspect}")
642
+ raise e
643
+ end
644
+
645
+
646
+ def pb2_prepare_for_encoding(datahash, class_name)
647
+ if datahash.is_a?(::Hash)
648
+ # Preparation: the data cannot be encoded until certain criteria are met:
649
+ # 1) remove @ signs from keys.
650
+ # 2) convert timestamps and other objects to strings
651
+ datahash = ::Hash[datahash.map{|(k,v)| [k.to_s.dup.gsub(/@/,''), (should_convert_to_string?(v) ? v.to_s : v)] }]
652
+
653
+ # Check if any of the fields in this hash are protobuf classes and if so, create a builder for them.
654
+ meta = @metainfo_messageclasses[class_name]
655
+ if meta
656
+ meta.map do | (k,c) |
657
+ if datahash.include?(k)
658
+ original_value = datahash[k]
659
+ datahash[k] =
660
+ if original_value.is_a?(::Array)
661
+ # make this field an array/list of protobuf objects
662
+ # value is a list of hashed complex objects, each of which needs to be protobuffed and
663
+ # put back into the list.
664
+ original_value.map { |x| pb2_prepare_for_encoding(x, c) }
665
+ original_value
666
+ else
667
+ proto_obj = pb2_create_instance(c)
668
+ proto_obj.new(pb2_prepare_for_encoding(original_value, c)) # this line is reached in the colourtest for an enum.
669
+ # Enums should not be instantiated. Should enums even be in the messageclasses? I dont think so!
670
+ end # if is array
671
+ end # if datahash_include
672
+ end # do
673
+ end # if meta
674
+ end
675
+ datahash
676
+ end
677
+
678
+
679
+ def should_convert_to_string?(v)
680
+ !(v.is_a?(Integer) || v.is_a?(Float) || v.is_a?(::Hash) || v.is_a?(::Array) || [true, false].include?(v))
681
+ end
682
+
683
+
684
+ def pb2_create_instance(name)
685
+ @logger.debug("Creating instance of " + name)
686
+ name.split('::').inject(Object) { |n,c| n.const_get c }
687
+ end
688
+
689
+
690
+ def pb3_metadata_analyis(filename)
691
+ regex_class_name = /\s*add_message "(?<name>.+?)" do\s+/
692
+ regex_pbdefs = /\s*(optional|repeated)(\s*):(?<name>.+),(\s*):(?<type>\w+),(\s*)(?<position>\d+)(, \"(?<enum_class>.*?)\")?/
693
+ class_name = ""
694
+ type = ""
695
+ field_name = ""
696
+ File.readlines(filename).each do |line|
697
+ if ! (line =~ regex_class_name).nil?
698
+ class_name = $1
699
+ @metainfo_messageclasses[class_name] = {}
700
+ @metainfo_enumclasses[class_name] = {}
701
+ end # if
702
+ if ! (line =~ regex_pbdefs).nil?
703
+ field_name = $1
704
+ type = $2
705
+ field_class_name = $4
706
+ if type == "message"
707
+ @metainfo_messageclasses[class_name][field_name] = field_class_name
708
+ elsif type == "enum"
709
+ @metainfo_enumclasses[class_name][field_name] = field_class_name
710
+ end
711
+ end # if
712
+ end # readlines
713
+ if class_name.nil?
714
+ @logger.error("Error 4: class name not found in file " + filename)
715
+ raise ArgumentError, "Invalid protobuf file: " + filename
716
+ end
717
+ rescue Exception => e
718
+ @logger.error("Error 3: unable to read pb definition from file " + filename+ ". Reason: #{e.inspect}. Last settings were: class #{class_name} field #{field_name} type #{type}. Backtrace: " + e.backtrace.inspect.to_s)
719
+ raise e
720
+ end
721
+
722
+
723
+ def pb2_metadata_analyis(filename)
724
+ regex_class_start = /\s*set_fully_qualified_name \"(?<name>.+)\".*?/
725
+ regex_enum_name = /\s*include ..ProtocolBuffers..Enum\s*/
726
+ regex_pbdefs = /\s*(optional|repeated)(\s*):(?<type>.+),(\s*):(?<name>\w+),(\s*)(?<position>\d+)/
727
+ # now we also need to find out which class it contains and the protobuf definitions in it.
728
+ # We'll unfortunately need that later so that we can create nested objects.
729
+
730
+ class_name = ""
731
+ type = ""
732
+ field_name = ""
733
+ is_enum_class = false
734
+
735
+ File.readlines(filename).each do |line|
736
+ if ! (line =~ regex_enum_name).nil?
737
+ is_enum_class= true
738
+ end
739
+
740
+ if ! (line =~ regex_class_start).nil?
741
+ class_name = $1.gsub('.',"::").split('::').map {|word| word.capitalize}.join('::')
742
+ if is_enum_class
743
+ @metainfo_pb2_enumlist << class_name.downcase
744
+ end
745
+ is_enum_class= false # reset when next class starts
746
+ end
747
+ if ! (line =~ regex_pbdefs).nil?
748
+ type = $1
749
+ field_name = $2
750
+ if type =~ /::/
751
+ clean_type = type.gsub(/^:/,"")
752
+ e = @metainfo_pb2_enumlist.include? clean_type.downcase
753
+
754
+ if e
755
+ if not @metainfo_enumclasses.key? class_name
756
+ @metainfo_enumclasses[class_name] = {}
757
+ end
758
+ @metainfo_enumclasses[class_name][field_name] = clean_type
759
+ else
760
+ if not @metainfo_messageclasses.key? class_name
761
+ @metainfo_messageclasses[class_name] = {}
762
+ end
763
+ @metainfo_messageclasses[class_name][field_name] = clean_type
764
+ end
765
+ end
766
+ end
767
+ end
768
+ if class_name.nil?
769
+ @logger.warn("Error 4: class name not found in file " + filename)
770
+ raise ArgumentError, "Invalid protobuf file: " + filename
771
+ end
772
+ rescue LoadError => e
773
+ raise ArgumentError.new("Could not load file: " + filename + ". Please try to use absolute pathes. Current working dir: " + Dir.pwd + ", loadpath: " + $LOAD_PATH.join(" "))
774
+ rescue => e
775
+
776
+ @logger.warn("Error 3: unable to read pb definition from file " + filename+ ". Reason: #{e.inspect}. Last settings were: class #{class_name} field #{field_name} type #{type}. Backtrace: " + e.backtrace.inspect.to_s)
777
+ raise e
778
+ end
779
+
780
+
781
+ def load_protobuf_definition(filename)
782
+ if filename.end_with? ('.rb')
783
+ # Add to the loading path of the protobuf definitions
784
+ if (Pathname.new filename).absolute?
785
+ begin
786
+ require filename
787
+ rescue Exception => e
788
+ @logger.error("Unable to load file: #{filename}. Reason: #{e.inspect}")
789
+ raise e
790
+ end
791
+ end
792
+
793
+ if @protobuf_version == 3
794
+ pb3_metadata_analyis(filename)
795
+ else
796
+ pb2_metadata_analyis(filename)
797
+ end
798
+
799
+ else
800
+ @logger.warn("Not a ruby file: " + filename)
801
+ end
802
+ end
803
+
804
+ end # class LogStash::Codecs::Protobuf