logstash-codec-protobuf 1.2.8-jruby

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +45 -0
  3. data/CONTRIBUTORS +12 -0
  4. data/DEVELOPER.md +2 -0
  5. data/Gemfile +11 -0
  6. data/LICENSE +202 -0
  7. data/NOTICE.TXT +4 -0
  8. data/README.md +184 -0
  9. data/docs/index.asciidoc +241 -0
  10. data/google-protobuf-lib-update.md +57 -0
  11. data/lib/logstash/codecs/protobuf.rb +735 -0
  12. data/logstash-codec-protobuf.gemspec +28 -0
  13. data/spec/codecs/pb2_spec.rb +236 -0
  14. data/spec/codecs/pb3_decode_spec.rb +445 -0
  15. data/spec/codecs/pb3_encode_spec.rb +243 -0
  16. data/spec/helpers/pb2/ColourTestcase.pb.rb +35 -0
  17. data/spec/helpers/pb2/ColourTestcase.proto +24 -0
  18. data/spec/helpers/pb2/event.pb.rb +19 -0
  19. data/spec/helpers/pb2/event.proto +12 -0
  20. data/spec/helpers/pb2/header/header.pb.rb +16 -0
  21. data/spec/helpers/pb2/header/header.proto +8 -0
  22. data/spec/helpers/pb2/human.pb.rb +26 -0
  23. data/spec/helpers/pb2/unicorn.pb.rb +19 -0
  24. data/spec/helpers/pb2/unicorn_event.pb.rb +24 -0
  25. data/spec/helpers/pb3/FantasyHorse_pb.rb +44 -0
  26. data/spec/helpers/pb3/ProbeResult_pb.rb +26 -0
  27. data/spec/helpers/pb3/dnsmessage_pb.rb +82 -0
  28. data/spec/helpers/pb3/events.proto3 +10 -0
  29. data/spec/helpers/pb3/events_pb.rb +17 -0
  30. data/spec/helpers/pb3/header/header.proto3 +7 -0
  31. data/spec/helpers/pb3/header/header_pb.rb +12 -0
  32. data/spec/helpers/pb3/integertest_pb.rb +20 -0
  33. data/spec/helpers/pb3/messageA.proto3 +12 -0
  34. data/spec/helpers/pb3/messageA_pb.rb +16 -0
  35. data/spec/helpers/pb3/messageB.proto3 +12 -0
  36. data/spec/helpers/pb3/messageB_pb.rb +16 -0
  37. data/spec/helpers/pb3/rum2_pb.rb +87 -0
  38. data/spec/helpers/pb3/rum3_pb.rb +87 -0
  39. data/spec/helpers/pb3/rum_pb.rb +87 -0
  40. data/spec/helpers/pb3/unicorn.proto3 +31 -0
  41. data/spec/helpers/pb3/unicorn_pb.rb +31 -0
  42. metadata +177 -0
@@ -0,0 +1,735 @@
1
+ # encoding: utf-8
2
+ require 'logstash/codecs/base'
3
+ require 'logstash/util/charset'
4
+ require 'google/protobuf' # for protobuf3
5
+ require 'protocol_buffers' # https://github.com/codekitchen/ruby-protocol-buffers, for protobuf2
6
+
7
+ # Monkey-patch the `Google::Protobuf::DescriptorPool` with a mutex for exclusive
8
+ # access.
9
+ #
10
+ # The DescriptorPool instance is not thread-safe when loading protobuf
11
+ # definitions. This can cause unrecoverable errors when registering multiple
12
+ # concurrent pipelines that try to register the same dependency. The
13
+ # DescriptorPool instance is global to the JVM and shared among all pipelines.
14
+ class << Google::Protobuf::DescriptorPool
15
+ def with_lock
16
+ if !@mutex
17
+ @mutex = Mutex.new
18
+ end
19
+
20
+ return @mutex
21
+ end
22
+ end
23
+
24
+ # This codec converts protobuf encoded messages into logstash events and vice versa.
25
+ #
26
+ # Requires the protobuf definitions as ruby files. You can create those using the [ruby-protoc compiler](https://github.com/codekitchen/ruby-protocol-buffers).
27
+ #
28
+ # The following shows a usage example for decoding protobuf 2 encoded events from a kafka stream:
29
+ # [source,ruby]
30
+ # kafka
31
+ # {
32
+ # zk_connect => "127.0.0.1"
33
+ # topic_id => "your_topic_goes_here"
34
+ # key_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
35
+ # value_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
36
+ # codec => protobuf
37
+ # {
38
+ # class_name => "Animal::Unicorn"
39
+ # include_path => ['/path/to/protobuf/definitions/UnicornProtobuf.pb.rb']
40
+ # }
41
+ # }
42
+ #
43
+ # Same example for protobuf 3:
44
+ # [source,ruby]
45
+ # kafka
46
+ # {
47
+ # zk_connect => "127.0.0.1"
48
+ # topic_id => "your_topic_goes_here"
49
+ # key_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
50
+ # value_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
51
+ # codec => protobuf
52
+ # {
53
+ # class_name => "Animal.Unicorn"
54
+ # include_path => ['/path/to/protobuf/definitions/UnicornProtobuf_pb.rb']
55
+ # protobuf_version => 3
56
+ # }
57
+ # }
58
+ #
59
+ # Specifically for the kafka input: please set the deserializer classes as shown above.
60
+
61
+ class LogStash::Codecs::Protobuf < LogStash::Codecs::Base
62
+ config_name 'protobuf'
63
+
64
+ # Name of the class to decode.
65
+ # If your protobuf 2 definition contains modules, prepend them to the class name with double colons like so:
66
+ # [source,ruby]
67
+ # class_name => "Animal::Horse::Unicorn"
68
+ #
69
+ # This corresponds to a protobuf definition starting as follows:
70
+ # [source,ruby]
71
+ # module Animal
72
+ # module Horse
73
+ # class Unicorn
74
+ # # here are your field definitions.
75
+ #
76
+ # For protobuf 3 separate the modules with single dots.
77
+ # [source,ruby]
78
+ # class_name => "Animal.Horse.Unicorn"
79
+ # Check the bottom of the generated protobuf ruby file. It contains lines like this:
80
+ # [source,ruby]
81
+ # Animals.Unicorn = Google::Protobuf::DescriptorPool.generated_pool.lookup("Animals.Unicorn").msgclass
82
+ # Use the parameter for the lookup call as the class_name for the codec config.
83
+ #
84
+ # If your class references other definitions: you only have to add the main class here.
85
+ config :class_name, :validate => :string, :required => true
86
+
87
+ # Relative path to the ruby file that contains class_name
88
+ #
89
+ # Relative path (from `protobuf_root_directory`) that holds the definition of the class specified in
90
+ # `class_name`.
91
+ #
92
+ # `class_file` and `include_path` cannot be used at the same time.
93
+ config :class_file, :validate => :string, :default => '', :required => false
94
+
95
+ # Absolute path to the root directory that contains all referenced/used dependencies
96
+ # of the main class (`class_name`) or any of its dependencies.
97
+ #
98
+ # For instance:
99
+ #
100
+ # pb3
101
+ # ├── header
102
+ # │ └── header_pb.rb
103
+ # ├── messageA_pb.rb
104
+ #
105
+ # In this case `messageA_pb.rb` has an embedded message from `header/header_pb.rb`.
106
+ # If `class_file` is set to `messageA_pb.rb`, and `class_name` to
107
+ # `MessageA`, `protobuf_root_directory` must be set to `/path/to/pb3`, which includes
108
+ # both definitions.
109
+ config :protobuf_root_directory, :validate => :string, :required => false
110
+
111
+ # List of absolute pathes to files with protobuf definitions.
112
+ # When using more than one file, make sure to arrange the files in reverse order of dependency so that each class is loaded before it is
113
+ # refered to by another.
114
+ #
115
+ # Example: a class _Unicorn_ referencing another protobuf class _Wings_
116
+ # [source,ruby]
117
+ # module Animal
118
+ # module Horse
119
+ # class Unicorn
120
+ # set_fully_qualified_name "Animal.Horse.Unicorn"
121
+ # optional ::Animal::Bodypart::Wings, :wings, 1
122
+ # optional :string, :name, 2
123
+ # # here be more field definitions
124
+ #
125
+ # would be configured as
126
+ # [source,ruby]
127
+ # include_path => ['/path/to/protobuf/definitions/Wings.pb.rb','/path/to/protobuf/definitions/Unicorn.pb.rb']
128
+ #
129
+ # `class_file` and `include_path` cannot be used at the same time.
130
+ config :include_path, :validate => :array, :default => [], :required => false
131
+
132
+ # Protocol buffer version switch. Defaults to version 2. Please note that the behaviour for enums varies between the versions.
133
+ # For protobuf 2 you will get integer representations for enums, for protobuf 3 you'll get string representations due to a different converter library.
134
+ # Recommendation: use the translate plugin to restore previous behaviour when upgrading.
135
+ config :protobuf_version, :validate => [2,3], :default => 2, :required => true
136
+
137
+ # To tolerate faulty messages that cannot be en/decoded, set this to false. Otherwise the pipeline will stop upon encountering a non decipherable message.
138
+ config :stop_on_error, :validate => :boolean, :default => false, :required => false
139
+
140
+ # Instruct the encoder to attempt converting data types to match the protobuf definitions. Available only for protobuf version 3.
141
+ config :pb3_encoder_autoconvert_types, :validate => :boolean, :default => true, :required => false
142
+
143
+ # Add meta information to `[@metadata][pb_oneof]` about which classes were chosen for [oneof](https://developers.google.com/protocol-buffers/docs/proto3#oneof) fields.
144
+ # Example values: for the protobuf definition
145
+ # ``` oneof :horse_type do
146
+ # optional :unicorn, :message, 2, "FantasyUnicorn"
147
+ # optional :pegasus, :message, 3, "FantasyPegasus"
148
+ # end
149
+ # ```
150
+ # the field `[@metadata][pb_oneof][horse_type]` will be set to either `pegasus` or `unicorn`.
151
+ # Available only for protobuf version 3.
152
+ config :pb3_set_oneof_metainfo, :validate => :boolean, :default => false, :required => false
153
+
154
+
155
+ attr_reader :execution_context
156
+
157
+ # id of the pipeline whose events you want to read from.
158
+ def pipeline_id
159
+ respond_to?(:execution_context) && !execution_context.nil? ? execution_context.pipeline_id : "main"
160
+ end
161
+
162
+ def register
163
+ @metainfo_messageclasses = {}
164
+ @metainfo_enumclasses = {}
165
+ @metainfo_pb2_enumlist = []
166
+ @pb3_typeconversion_tag = "_protobuf_type_converted"
167
+
168
+ if @include_path.length > 0 and not class_file.strip.empty?
169
+ raise LogStash::ConfigurationError, "Cannot use `include_path` and `class_file` at the same time"
170
+ end
171
+
172
+ if @include_path.length == 0 and class_file.strip.empty?
173
+ raise LogStash::ConfigurationError, "Need to specify `include_path` or `class_file`"
174
+ end
175
+
176
+ should_register = Google::Protobuf::DescriptorPool.generated_pool.lookup(class_name).nil?
177
+
178
+ unless @protobuf_root_directory.nil? or @protobuf_root_directory.strip.empty?
179
+ if !$LOAD_PATH.include? @protobuf_root_directory and should_register
180
+ $LOAD_PATH.unshift(@protobuf_root_directory)
181
+ end
182
+ end
183
+
184
+ @class_file = "#{@protobuf_root_directory}/#{@class_file}" unless (Pathname.new @class_file).absolute? or @class_file.empty?
185
+ # exclusive access while loading protobuf definitions
186
+ Google::Protobuf::DescriptorPool.with_lock.synchronize do
187
+ # load from `class_file`
188
+ load_protobuf_definition(@class_file) if should_register and !@class_file.empty?
189
+ # load from `include_path`
190
+ include_path.each { |path| load_protobuf_definition(path) } if include_path.length > 0 and should_register
191
+
192
+ if @protobuf_version == 3
193
+ @pb_builder = Google::Protobuf::DescriptorPool.generated_pool.lookup(class_name).msgclass
194
+
195
+ else
196
+ @pb_builder = pb2_create_instance(class_name)
197
+ end
198
+ end
199
+ end
200
+
201
+ # Pipelines using this plugin cannot be reloaded.
202
+ # https://github.com/elastic/logstash/pull/6499
203
+ #
204
+ # The DescriptorPool instance registers the protobuf classes (and
205
+ # dependencies) as global objects. This makes it very difficult to reload a
206
+ # pipeline, because `class_name` and all of its dependencies are already
207
+ # registered.
208
+ def reloadable?
209
+ return false
210
+ end
211
+
212
+ def decode(data)
213
+ if @protobuf_version == 3
214
+ decoded = @pb_builder.decode(data.to_s)
215
+ if @pb3_set_oneof_metainfo
216
+ meta = pb3_get_oneof_metainfo(decoded, @class_name)
217
+ end
218
+ h = pb3_deep_to_hash(decoded)
219
+ else
220
+ decoded = @pb_builder.parse(data.to_s)
221
+ h = decoded.to_hash
222
+ end
223
+ e = LogStash::Event.new(h)
224
+ if @protobuf_version == 3 and @pb3_set_oneof_metainfo
225
+ e.set("[@metadata][pb_oneof]", meta)
226
+ end
227
+ yield e if block_given?
228
+ rescue => ex
229
+ @logger.warn("Couldn't decode protobuf: #{ex.inspect}.")
230
+ if stop_on_error
231
+ raise ex
232
+ else # keep original message so that the user can debug it.
233
+ yield LogStash::Event.new("message" => data, "tags" => ["_protobufdecodefailure"])
234
+ end
235
+ end # def decode
236
+
237
+
238
+ def encode(event)
239
+ if @protobuf_version == 3
240
+ protobytes = pb3_encode(event)
241
+ else
242
+ protobytes = pb2_encode(event)
243
+ end
244
+ unless protobytes.nil? or protobytes.empty?
245
+ @on_event.call(event, protobytes)
246
+ end
247
+ end # def encode
248
+
249
+
250
+ private
251
+ def pb3_deep_to_hash(input)
252
+ case input
253
+ when Google::Protobuf::MessageExts # it's a protobuf class
254
+ result = Hash.new
255
+ input.to_h.each {|key, value|
256
+ result[key] = pb3_deep_to_hash(value) # the key is required for the class lookup of enums.
257
+ }
258
+ when ::Array
259
+ result = []
260
+ input.each {|value|
261
+ result << pb3_deep_to_hash(value)
262
+ }
263
+ when ::Hash
264
+ result = {}
265
+ input.each {|key, value|
266
+ result[key] = pb3_deep_to_hash(value)
267
+ }
268
+ when Symbol # is an Enum
269
+ result = input.to_s.sub(':','')
270
+ else
271
+ result = input
272
+ end
273
+ result
274
+ end
275
+
276
+ def pb3_encode(event)
277
+
278
+ datahash = event.to_hash
279
+
280
+ is_recursive_call = !event.get('tags').nil? and event.get('tags').include? @pb3_typeconversion_tag
281
+ if is_recursive_call
282
+ datahash = pb3_remove_typeconversion_tag(datahash)
283
+ end
284
+ datahash = pb3_prepare_for_encoding(datahash)
285
+ if datahash.nil?
286
+ @logger.warn("Protobuf encoding error 4: empty data for event #{event.to_hash}")
287
+ end
288
+ if @pb_builder.nil?
289
+ @logger.warn("Protobuf encoding error 5: empty protobuf builder for class #{@class_name}")
290
+ end
291
+ pb_obj = @pb_builder.new(datahash)
292
+ @pb_builder.encode(pb_obj)
293
+
294
+ rescue ArgumentError => e
295
+ k = event.to_hash.keys.join(", ")
296
+ @logger.warn("Protobuf encoding error 1: Argument error (#{e.inspect}). Reason: probably mismatching protobuf definition. \
297
+ Required fields in the protobuf definition are: #{k} and fields must not begin with @ sign. The event has been discarded.")
298
+ nil
299
+ rescue TypeError => e
300
+ pb3_handle_type_errors(event, e, is_recursive_call, datahash)
301
+ nil
302
+ rescue => e
303
+ @logger.warn("Protobuf encoding error 3: #{e.inspect}. Event discarded. Input data: #{datahash}. The event has been discarded. Backtrace: #{e.backtrace}")
304
+ nil
305
+ end
306
+
307
+
308
+
309
+
310
+ def pb3_handle_type_errors(event, e, is_recursive_call, datahash)
311
+ begin
312
+ if is_recursive_call
313
+ @logger.warn("Protobuf encoding error 2.1: Type error (#{e.inspect}). Some types could not be converted. The event has been discarded. Type mismatches: #{mismatches}.")
314
+ else
315
+ if @pb3_encoder_autoconvert_types
316
+
317
+ msg = "Protobuf encoding error 2.2: Type error (#{e.inspect}). Will try to convert the data types. Original data: #{datahash}"
318
+ @logger.warn(msg)
319
+ mismatches = pb3_get_type_mismatches(datahash, "", @class_name)
320
+
321
+ event = pb3_convert_mismatched_types(event, mismatches)
322
+ # Add a (temporary) tag to handle the recursion stop
323
+ pb3_add_tag(event, @pb3_typeconversion_tag )
324
+ pb3_encode(event)
325
+ else
326
+ @logger.warn("Protobuf encoding error 2.3: Type error (#{e.inspect}). The event has been discarded. Try setting pb3_encoder_autoconvert_types => true for automatic type conversion.")
327
+ end
328
+ end
329
+ rescue TypeError => e
330
+ if @pb3_encoder_autoconvert_types
331
+ @logger.warn("Protobuf encoding error 2.4.1: (#{e.inspect}). Failed to convert data types. The event has been discarded. original data: #{datahash}")
332
+ else
333
+ @logger.warn("Protobuf encoding error 2.4.2: (#{e.inspect}). The event has been discarded.")
334
+ end
335
+ if @stop_on_error
336
+ raise e
337
+ end
338
+ nil
339
+ rescue => ex
340
+ @logger.warn("Protobuf encoding error 2.5: (#{e.inspect}). The event has been discarded. Auto-typecasting was on: #{@pb3_encoder_autoconvert_types}")
341
+ if @stop_on_error
342
+ raise ex
343
+ end
344
+ nil
345
+ end
346
+ end # pb3_handle_type_errors
347
+
348
+
349
+ def pb3_get_type_mismatches(data, key_prefix, pb_class)
350
+ mismatches = []
351
+ data.to_h.each do |key, value|
352
+ expected_type = pb3_get_expected_type(key, pb_class)
353
+ r = pb3_compare_datatypes(value, key, key_prefix, pb_class, expected_type)
354
+ mismatches.concat(r)
355
+ end # data.each
356
+ mismatches
357
+ end
358
+
359
+
360
+ def pb3_get_expected_type(key, pb_class)
361
+ pb_descriptor = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class)
362
+
363
+ if !pb_descriptor.nil?
364
+ pb_builder = pb_descriptor.msgclass
365
+ pb_obj = pb_builder.new({})
366
+ v = pb_obj.send(key)
367
+
368
+ if !v.nil?
369
+ v.class
370
+ else
371
+ nil
372
+ end
373
+ end
374
+ end
375
+
376
+ def pb3_compare_datatypes(value, key, key_prefix, pb_class, expected_type)
377
+ mismatches = []
378
+
379
+ if value.nil?
380
+ is_mismatch = false
381
+ else
382
+ case value
383
+ when ::Hash, Google::Protobuf::MessageExts
384
+ is_mismatch = false
385
+ descriptor = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class).lookup(key)
386
+ if !descriptor.subtype.nil?
387
+ class_of_nested_object = pb3_get_descriptorpool_name(descriptor.subtype.msgclass)
388
+ new_prefix = "#{key}."
389
+ recursive_mismatches = pb3_get_type_mismatches(value, new_prefix, class_of_nested_object)
390
+ mismatches.concat(recursive_mismatches)
391
+ end
392
+ when ::Array
393
+ expected_type = pb3_get_expected_type(key, pb_class)
394
+ is_mismatch = (expected_type != Google::Protobuf::RepeatedField)
395
+ child_type = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class).lookup(key).type
396
+ value.each_with_index do | v, i |
397
+ new_prefix = "#{key}."
398
+ recursive_mismatches = pb3_compare_datatypes(v, i.to_s, new_prefix, pb_class, child_type)
399
+ mismatches.concat(recursive_mismatches)
400
+ is_mismatch |= recursive_mismatches.any?
401
+ end # do
402
+ else # is scalar data type
403
+
404
+ is_mismatch = ! pb3_is_scalar_datatype_match(expected_type, value.class)
405
+ end # if
406
+ end # if value.nil?
407
+
408
+ if is_mismatch
409
+ mismatches << {"key" => "#{key_prefix}#{key}", "actual_type" => value.class, "expected_type" => expected_type, "value" => value}
410
+ end
411
+ mismatches
412
+ end
413
+
414
+ def pb3_remove_typeconversion_tag(data)
415
+ # remove the tag that we added to the event because
416
+ # the protobuf definition might not have a field for tags
417
+ data['tags'].delete(@pb3_typeconversion_tag)
418
+ if data['tags'].length == 0
419
+ data.delete('tags')
420
+ end
421
+ data
422
+ end
423
+
424
+ def pb3_get_descriptorpool_name(child_class)
425
+ # make instance
426
+ inst = child_class.new
427
+ # get the lookup name for the Descriptorpool
428
+ inst.class.descriptor.name
429
+ end
430
+
431
+ def pb3_is_scalar_datatype_match(expected_type, actual_type)
432
+ if expected_type == actual_type
433
+ true
434
+ else
435
+ e = expected_type.to_s.downcase.to_sym
436
+ a = actual_type.to_s.downcase.to_sym
437
+ case e
438
+ # when :string, :integer
439
+ when :string
440
+ a == e
441
+ when :integer
442
+ a == e
443
+ when :float
444
+ a == :float || a == :integer
445
+ end
446
+ end
447
+ end
448
+
449
+
450
+ def pb3_convert_mismatched_types_getter(struct, key)
451
+ if struct.is_a? ::Hash
452
+ struct[key]
453
+ else
454
+ struct.get(key)
455
+ end
456
+ end
457
+
458
+ def pb3_convert_mismatched_types_setter(struct, key, value)
459
+ if struct.is_a? ::Hash
460
+ struct[key] = value
461
+ else
462
+ struct.set(key, value)
463
+ end
464
+ struct
465
+ end
466
+
467
+ def pb3_add_tag(event, tag )
468
+ if event.get('tags').nil?
469
+ event.set('tags', [tag])
470
+ else
471
+ existing_tags = event.get('tags')
472
+ event.set("tags", existing_tags << tag)
473
+ end
474
+ end
475
+
476
+ # Due to recursion on nested fields in the event object this method might be given an event (1st call) or a hash (2nd .. nth call)
477
+ # First call will be the event object, child objects will be hashes.
478
+ def pb3_convert_mismatched_types(struct, mismatches)
479
+ mismatches.each do | m |
480
+ key = m['key']
481
+ expected_type = m['expected_type']
482
+ actual_type = m['actual_type']
483
+ if key.include? "." # the mismatch is in a child object
484
+ levels = key.split(/\./) # key is something like http_user_agent.minor_version and needs to be splitted.
485
+ key = levels[0]
486
+ sub_levels = levels.drop(1).join(".")
487
+ new_mismatches = [{"key"=>sub_levels, "actual_type"=>m["actual_type"], "expected_type"=>m["expected_type"]}]
488
+ value = pb3_convert_mismatched_types_getter(struct, key)
489
+ new_value = pb3_convert_mismatched_types(value, new_mismatches)
490
+ struct = pb3_convert_mismatched_types_setter(struct, key, new_value )
491
+ else
492
+ value = pb3_convert_mismatched_types_getter(struct, key)
493
+ begin
494
+ case expected_type.to_s
495
+ when "Integer"
496
+ case actual_type.to_s
497
+ when "String"
498
+ new_value = value.to_i
499
+ when "Float"
500
+ if value.floor == value # convert values like 2.0 to 2, but not 2.1
501
+ new_value = value.to_i
502
+ end
503
+ end
504
+ when "String"
505
+ new_value = value.to_s
506
+ when "Float"
507
+ new_value = value.to_f
508
+ when "Boolean","TrueClass", "FalseClass"
509
+ new_value = value.to_s.downcase == "true"
510
+ end
511
+ if !new_value.nil?
512
+ struct = pb3_convert_mismatched_types_setter(struct, key, new_value )
513
+ end
514
+ rescue Exception => ex
515
+ @logger.debug("Protobuf encoding error 5: Could not convert types for protobuf encoding: #{ex}")
516
+ end
517
+ end # if key contains .
518
+ end # mismatches.each
519
+ struct
520
+ end
521
+
522
+ def pb3_prepare_for_encoding(datahash)
523
+ # 0) Remove empty fields.
524
+ datahash = datahash.select { |key, value| !value.nil? }
525
+
526
+ # Preparation: the data cannot be encoded until certain criteria are met:
527
+ # 1) remove @ signs from keys.
528
+ # 2) convert timestamps and other objects to strings
529
+ datahash = datahash.inject({}){|x,(k,v)| x[k.gsub(/@/,'').to_sym] = (should_convert_to_string?(v) ? v.to_s : v); x}
530
+
531
+ datahash.each do |key, value|
532
+ datahash[key] = pb3_prepare_for_encoding(value) if value.is_a?(Hash)
533
+ end
534
+
535
+ datahash
536
+ end
537
+
538
+ def pb3_get_oneof_metainfo(pb_object, pb_class_name)
539
+ meta = {}
540
+ pb_class = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class_name).msgclass
541
+
542
+ pb_class.descriptor.each_oneof { |field|
543
+ field.each { | group_option |
544
+ if !pb_object.send(group_option.name).nil?
545
+ meta[field.name] = group_option.name
546
+ end
547
+ }
548
+ }
549
+
550
+ pb_class.descriptor.select{ |field| field.type == :message }.each { | field |
551
+ # recurse over nested protobuf classes
552
+ pb_sub_object = pb_object.send(field.name)
553
+ if !pb_sub_object.nil? and !field.subtype.nil?
554
+ pb_sub_class = pb3_get_descriptorpool_name(field.subtype.msgclass)
555
+ meta[field.name] = pb3_get_oneof_metainfo(pb_sub_object, pb_sub_class)
556
+ end
557
+ }
558
+
559
+ meta
560
+ end
561
+
562
+
563
+ def pb2_encode(event)
564
+ data = pb2_prepare_for_encoding(event.to_hash, @class_name)
565
+ msg = @pb_builder.new(data)
566
+ msg.serialize_to_string
567
+ rescue NoMethodError => e
568
+ @logger.warn("Encoding error 2. Probably mismatching protobuf definition. Required fields in the protobuf definition are: " + event.to_hash.keys.join(", ") + " and the timestamp field name must not include a @. ")
569
+ raise e
570
+ rescue => e
571
+ @logger.warn("Encoding error 1: #{e.inspect}")
572
+ raise e
573
+ end
574
+
575
+
576
+ def pb2_prepare_for_encoding(datahash, class_name)
577
+ if datahash.is_a?(::Hash)
578
+ # Preparation: the data cannot be encoded until certain criteria are met:
579
+ # 1) remove @ signs from keys.
580
+ # 2) convert timestamps and other objects to strings
581
+ datahash = ::Hash[datahash.map{|(k,v)| [k.to_s.dup.gsub(/@/,''), (should_convert_to_string?(v) ? v.to_s : v)] }]
582
+
583
+ # Check if any of the fields in this hash are protobuf classes and if so, create a builder for them.
584
+ meta = @metainfo_messageclasses[class_name]
585
+ if meta
586
+ meta.map do | (k,c) |
587
+ if datahash.include?(k)
588
+ original_value = datahash[k]
589
+ datahash[k] =
590
+ if original_value.is_a?(::Array)
591
+ # make this field an array/list of protobuf objects
592
+ # value is a list of hashed complex objects, each of which needs to be protobuffed and
593
+ # put back into the list.
594
+ original_value.map { |x| pb2_prepare_for_encoding(x, c) }
595
+ original_value
596
+ else
597
+ proto_obj = pb2_create_instance(c)
598
+ proto_obj.new(pb2_prepare_for_encoding(original_value, c)) # this line is reached in the colourtest for an enum. Enums should not be instantiated. Should enums even be in the messageclasses? I dont think so! TODO bug
599
+ end # if is array
600
+ end # if datahash_include
601
+ end # do
602
+ end # if meta
603
+ end
604
+ datahash
605
+ end
606
+
607
+
608
+ def should_convert_to_string?(v)
609
+ !(v.is_a?(Integer) || v.is_a?(Float) || v.is_a?(::Hash) || v.is_a?(::Array) || [true, false].include?(v))
610
+ end
611
+
612
+
613
+ def pb2_create_instance(name)
614
+ @logger.debug("Creating instance of " + name)
615
+ name.split('::').inject(Object) { |n,c| n.const_get c }
616
+ end
617
+
618
+
619
+ def pb3_metadata_analyis(filename)
620
+
621
+ regex_class_name = /\s*add_message "(?<name>.+?)" do\s+/ # TODO optimize both regexes for speed (negative lookahead)
622
+ regex_pbdefs = /\s*(optional|repeated)(\s*):(?<name>.+),(\s*):(?<type>\w+),(\s*)(?<position>\d+)(, \"(?<enum_class>.*?)\")?/
623
+ class_name = ""
624
+ type = ""
625
+ field_name = ""
626
+ File.readlines(filename).each do |line|
627
+ if ! (line =~ regex_class_name).nil?
628
+ class_name = $1
629
+ @metainfo_messageclasses[class_name] = {}
630
+ @metainfo_enumclasses[class_name] = {}
631
+ end # if
632
+ if ! (line =~ regex_pbdefs).nil?
633
+ field_name = $1
634
+ type = $2
635
+ field_class_name = $4
636
+ if type == "message"
637
+ @metainfo_messageclasses[class_name][field_name] = field_class_name
638
+ elsif type == "enum"
639
+ @metainfo_enumclasses[class_name][field_name] = field_class_name
640
+ end
641
+ end # if
642
+ end # readlines
643
+ if class_name.nil?
644
+ @logger.warn("Error 4: class name not found in file " + filename)
645
+ raise ArgumentError, "Invalid protobuf file: " + filename
646
+ end
647
+ rescue Exception => e
648
+ @logger.warn("Error 3: unable to read pb definition from file " + filename+ ". Reason: #{e.inspect}. Last settings were: class #{class_name} field #{field_name} type #{type}. Backtrace: " + e.backtrace.inspect.to_s)
649
+ raise e
650
+ end
651
+
652
+
653
+
654
+ def pb2_metadata_analyis(filename)
655
+ regex_class_start = /\s*set_fully_qualified_name \"(?<name>.+)\".*?/
656
+ regex_enum_name = /\s*include ..ProtocolBuffers..Enum\s*/
657
+ regex_pbdefs = /\s*(optional|repeated)(\s*):(?<type>.+),(\s*):(?<name>\w+),(\s*)(?<position>\d+)/
658
+ # now we also need to find out which class it contains and the protobuf definitions in it.
659
+ # We'll unfortunately need that later so that we can create nested objects.
660
+
661
+ class_name = ""
662
+ type = ""
663
+ field_name = ""
664
+ is_enum_class = false
665
+
666
+ File.readlines(filename).each do |line|
667
+ if ! (line =~ regex_enum_name).nil?
668
+ is_enum_class= true
669
+ end
670
+
671
+ if ! (line =~ regex_class_start).nil?
672
+ class_name = $1.gsub('.',"::").split('::').map {|word| word.capitalize}.join('::')
673
+ if is_enum_class
674
+ @metainfo_pb2_enumlist << class_name.downcase
675
+ end
676
+ is_enum_class= false # reset when next class starts
677
+ end
678
+ if ! (line =~ regex_pbdefs).nil?
679
+ type = $1
680
+ field_name = $2
681
+ if type =~ /::/
682
+ clean_type = type.gsub(/^:/,"")
683
+ e = @metainfo_pb2_enumlist.include? clean_type.downcase
684
+
685
+ if e
686
+ if not @metainfo_enumclasses.key? class_name
687
+ @metainfo_enumclasses[class_name] = {}
688
+ end
689
+ @metainfo_enumclasses[class_name][field_name] = clean_type
690
+ else
691
+ if not @metainfo_messageclasses.key? class_name
692
+ @metainfo_messageclasses[class_name] = {}
693
+ end
694
+ @metainfo_messageclasses[class_name][field_name] = clean_type
695
+ end
696
+ end
697
+ end
698
+ end
699
+ if class_name.nil?
700
+ @logger.warn("Error 4: class name not found in file " + filename)
701
+ raise ArgumentError, "Invalid protobuf file: " + filename
702
+ end
703
+ rescue LoadError => e
704
+ raise ArgumentError.new("Could not load file: " + filename + ". Please try to use absolute pathes. Current working dir: " + Dir.pwd + ", loadpath: " + $LOAD_PATH.join(" "))
705
+ rescue => e
706
+
707
+ @logger.warn("Error 3: unable to read pb definition from file " + filename+ ". Reason: #{e.inspect}. Last settings were: class #{class_name} field #{field_name} type #{type}. Backtrace: " + e.backtrace.inspect.to_s)
708
+ raise e
709
+ end
710
+
711
+
712
+ def load_protobuf_definition(filename)
713
+ if filename.end_with? ('.rb')
714
+ # Add to the loading path of the protobuf definitions
715
+ if (Pathname.new filename).absolute?
716
+ begin
717
+ require filename
718
+ rescue Exception => e
719
+ @logger.error("Unable to load file: #{filename}. Reason: #{e.inspect}")
720
+ raise e
721
+ end
722
+ end
723
+
724
+ if @protobuf_version == 3
725
+ pb3_metadata_analyis(filename)
726
+ else
727
+ pb2_metadata_analyis(filename)
728
+ end
729
+
730
+ else
731
+ @logger.warn("Not a ruby file: " + filename)
732
+ end
733
+ end
734
+
735
+ end # class LogStash::Codecs::Protobuf