logstash-codec-protobuf 1.2.8-jruby

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +45 -0
  3. data/CONTRIBUTORS +12 -0
  4. data/DEVELOPER.md +2 -0
  5. data/Gemfile +11 -0
  6. data/LICENSE +202 -0
  7. data/NOTICE.TXT +4 -0
  8. data/README.md +184 -0
  9. data/docs/index.asciidoc +241 -0
  10. data/google-protobuf-lib-update.md +57 -0
  11. data/lib/logstash/codecs/protobuf.rb +735 -0
  12. data/logstash-codec-protobuf.gemspec +28 -0
  13. data/spec/codecs/pb2_spec.rb +236 -0
  14. data/spec/codecs/pb3_decode_spec.rb +445 -0
  15. data/spec/codecs/pb3_encode_spec.rb +243 -0
  16. data/spec/helpers/pb2/ColourTestcase.pb.rb +35 -0
  17. data/spec/helpers/pb2/ColourTestcase.proto +24 -0
  18. data/spec/helpers/pb2/event.pb.rb +19 -0
  19. data/spec/helpers/pb2/event.proto +12 -0
  20. data/spec/helpers/pb2/header/header.pb.rb +16 -0
  21. data/spec/helpers/pb2/header/header.proto +8 -0
  22. data/spec/helpers/pb2/human.pb.rb +26 -0
  23. data/spec/helpers/pb2/unicorn.pb.rb +19 -0
  24. data/spec/helpers/pb2/unicorn_event.pb.rb +24 -0
  25. data/spec/helpers/pb3/FantasyHorse_pb.rb +44 -0
  26. data/spec/helpers/pb3/ProbeResult_pb.rb +26 -0
  27. data/spec/helpers/pb3/dnsmessage_pb.rb +82 -0
  28. data/spec/helpers/pb3/events.proto3 +10 -0
  29. data/spec/helpers/pb3/events_pb.rb +17 -0
  30. data/spec/helpers/pb3/header/header.proto3 +7 -0
  31. data/spec/helpers/pb3/header/header_pb.rb +12 -0
  32. data/spec/helpers/pb3/integertest_pb.rb +20 -0
  33. data/spec/helpers/pb3/messageA.proto3 +12 -0
  34. data/spec/helpers/pb3/messageA_pb.rb +16 -0
  35. data/spec/helpers/pb3/messageB.proto3 +12 -0
  36. data/spec/helpers/pb3/messageB_pb.rb +16 -0
  37. data/spec/helpers/pb3/rum2_pb.rb +87 -0
  38. data/spec/helpers/pb3/rum3_pb.rb +87 -0
  39. data/spec/helpers/pb3/rum_pb.rb +87 -0
  40. data/spec/helpers/pb3/unicorn.proto3 +31 -0
  41. data/spec/helpers/pb3/unicorn_pb.rb +31 -0
  42. metadata +177 -0
@@ -0,0 +1,735 @@
1
+ # encoding: utf-8
2
+ require 'logstash/codecs/base'
3
+ require 'logstash/util/charset'
4
+ require 'google/protobuf' # for protobuf3
5
+ require 'protocol_buffers' # https://github.com/codekitchen/ruby-protocol-buffers, for protobuf2
6
+
7
+ # Monkey-patch the `Google::Protobuf::DescriptorPool` with a mutex for exclusive
8
+ # access.
9
+ #
10
+ # The DescriptorPool instance is not thread-safe when loading protobuf
11
+ # definitions. This can cause unrecoverable errors when registering multiple
12
+ # concurrent pipelines that try to register the same dependency. The
13
+ # DescriptorPool instance is global to the JVM and shared among all pipelines.
14
+ class << Google::Protobuf::DescriptorPool
15
+ def with_lock
16
+ if !@mutex
17
+ @mutex = Mutex.new
18
+ end
19
+
20
+ return @mutex
21
+ end
22
+ end
23
+
24
+ # This codec converts protobuf encoded messages into logstash events and vice versa.
25
+ #
26
+ # Requires the protobuf definitions as ruby files. You can create those using the [ruby-protoc compiler](https://github.com/codekitchen/ruby-protocol-buffers).
27
+ #
28
+ # The following shows a usage example for decoding protobuf 2 encoded events from a kafka stream:
29
+ # [source,ruby]
30
+ # kafka
31
+ # {
32
+ # zk_connect => "127.0.0.1"
33
+ # topic_id => "your_topic_goes_here"
34
+ # key_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
35
+ # value_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
36
+ # codec => protobuf
37
+ # {
38
+ # class_name => "Animal::Unicorn"
39
+ # include_path => ['/path/to/protobuf/definitions/UnicornProtobuf.pb.rb']
40
+ # }
41
+ # }
42
+ #
43
+ # Same example for protobuf 3:
44
+ # [source,ruby]
45
+ # kafka
46
+ # {
47
+ # zk_connect => "127.0.0.1"
48
+ # topic_id => "your_topic_goes_here"
49
+ # key_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
50
+ # value_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
51
+ # codec => protobuf
52
+ # {
53
+ # class_name => "Animal.Unicorn"
54
+ # include_path => ['/path/to/protobuf/definitions/UnicornProtobuf_pb.rb']
55
+ # protobuf_version => 3
56
+ # }
57
+ # }
58
+ #
59
+ # Specifically for the kafka input: please set the deserializer classes as shown above.
60
+
61
+ class LogStash::Codecs::Protobuf < LogStash::Codecs::Base
62
+ config_name 'protobuf'
63
+
64
+ # Name of the class to decode.
65
+ # If your protobuf 2 definition contains modules, prepend them to the class name with double colons like so:
66
+ # [source,ruby]
67
+ # class_name => "Animal::Horse::Unicorn"
68
+ #
69
+ # This corresponds to a protobuf definition starting as follows:
70
+ # [source,ruby]
71
+ # module Animal
72
+ # module Horse
73
+ # class Unicorn
74
+ # # here are your field definitions.
75
+ #
76
+ # For protobuf 3 separate the modules with single dots.
77
+ # [source,ruby]
78
+ # class_name => "Animal.Horse.Unicorn"
79
+ # Check the bottom of the generated protobuf ruby file. It contains lines like this:
80
+ # [source,ruby]
81
+ # Animals.Unicorn = Google::Protobuf::DescriptorPool.generated_pool.lookup("Animals.Unicorn").msgclass
82
+ # Use the parameter for the lookup call as the class_name for the codec config.
83
+ #
84
+ # If your class references other definitions: you only have to add the main class here.
85
+ config :class_name, :validate => :string, :required => true
86
+
87
+ # Relative path to the ruby file that contains class_name
88
+ #
89
+ # Relative path (from `protobuf_root_directory`) that holds the definition of the class specified in
90
+ # `class_name`.
91
+ #
92
+ # `class_file` and `include_path` cannot be used at the same time.
93
+ config :class_file, :validate => :string, :default => '', :required => false
94
+
95
+ # Absolute path to the root directory that contains all referenced/used dependencies
96
+ # of the main class (`class_name`) or any of its dependencies.
97
+ #
98
+ # For instance:
99
+ #
100
+ # pb3
101
+ # ├── header
102
+ # │ └── header_pb.rb
103
+ # ├── messageA_pb.rb
104
+ #
105
+ # In this case `messageA_pb.rb` has an embedded message from `header/header_pb.rb`.
106
+ # If `class_file` is set to `messageA_pb.rb`, and `class_name` to
107
+ # `MessageA`, `protobuf_root_directory` must be set to `/path/to/pb3`, which includes
108
+ # both definitions.
109
+ config :protobuf_root_directory, :validate => :string, :required => false
110
+
111
+ # List of absolute pathes to files with protobuf definitions.
112
+ # When using more than one file, make sure to arrange the files in reverse order of dependency so that each class is loaded before it is
113
+ # refered to by another.
114
+ #
115
+ # Example: a class _Unicorn_ referencing another protobuf class _Wings_
116
+ # [source,ruby]
117
+ # module Animal
118
+ # module Horse
119
+ # class Unicorn
120
+ # set_fully_qualified_name "Animal.Horse.Unicorn"
121
+ # optional ::Animal::Bodypart::Wings, :wings, 1
122
+ # optional :string, :name, 2
123
+ # # here be more field definitions
124
+ #
125
+ # would be configured as
126
+ # [source,ruby]
127
+ # include_path => ['/path/to/protobuf/definitions/Wings.pb.rb','/path/to/protobuf/definitions/Unicorn.pb.rb']
128
+ #
129
+ # `class_file` and `include_path` cannot be used at the same time.
130
+ config :include_path, :validate => :array, :default => [], :required => false
131
+
132
+ # Protocol buffer version switch. Defaults to version 2. Please note that the behaviour for enums varies between the versions.
133
+ # For protobuf 2 you will get integer representations for enums, for protobuf 3 you'll get string representations due to a different converter library.
134
+ # Recommendation: use the translate plugin to restore previous behaviour when upgrading.
135
+ config :protobuf_version, :validate => [2,3], :default => 2, :required => true
136
+
137
+ # To tolerate faulty messages that cannot be en/decoded, set this to false. Otherwise the pipeline will stop upon encountering a non decipherable message.
138
+ config :stop_on_error, :validate => :boolean, :default => false, :required => false
139
+
140
+ # Instruct the encoder to attempt converting data types to match the protobuf definitions. Available only for protobuf version 3.
141
+ config :pb3_encoder_autoconvert_types, :validate => :boolean, :default => true, :required => false
142
+
143
+ # Add meta information to `[@metadata][pb_oneof]` about which classes were chosen for [oneof](https://developers.google.com/protocol-buffers/docs/proto3#oneof) fields.
144
+ # Example values: for the protobuf definition
145
+ # ``` oneof :horse_type do
146
+ # optional :unicorn, :message, 2, "FantasyUnicorn"
147
+ # optional :pegasus, :message, 3, "FantasyPegasus"
148
+ # end
149
+ # ```
150
+ # the field `[@metadata][pb_oneof][horse_type]` will be set to either `pegasus` or `unicorn`.
151
+ # Available only for protobuf version 3.
152
+ config :pb3_set_oneof_metainfo, :validate => :boolean, :default => false, :required => false
153
+
154
+
155
+ attr_reader :execution_context
156
+
157
+ # id of the pipeline whose events you want to read from.
158
+ def pipeline_id
159
+ respond_to?(:execution_context) && !execution_context.nil? ? execution_context.pipeline_id : "main"
160
+ end
161
+
162
+ def register
163
+ @metainfo_messageclasses = {}
164
+ @metainfo_enumclasses = {}
165
+ @metainfo_pb2_enumlist = []
166
+ @pb3_typeconversion_tag = "_protobuf_type_converted"
167
+
168
+ if @include_path.length > 0 and not class_file.strip.empty?
169
+ raise LogStash::ConfigurationError, "Cannot use `include_path` and `class_file` at the same time"
170
+ end
171
+
172
+ if @include_path.length == 0 and class_file.strip.empty?
173
+ raise LogStash::ConfigurationError, "Need to specify `include_path` or `class_file`"
174
+ end
175
+
176
+ should_register = Google::Protobuf::DescriptorPool.generated_pool.lookup(class_name).nil?
177
+
178
+ unless @protobuf_root_directory.nil? or @protobuf_root_directory.strip.empty?
179
+ if !$LOAD_PATH.include? @protobuf_root_directory and should_register
180
+ $LOAD_PATH.unshift(@protobuf_root_directory)
181
+ end
182
+ end
183
+
184
+ @class_file = "#{@protobuf_root_directory}/#{@class_file}" unless (Pathname.new @class_file).absolute? or @class_file.empty?
185
+ # exclusive access while loading protobuf definitions
186
+ Google::Protobuf::DescriptorPool.with_lock.synchronize do
187
+ # load from `class_file`
188
+ load_protobuf_definition(@class_file) if should_register and !@class_file.empty?
189
+ # load from `include_path`
190
+ include_path.each { |path| load_protobuf_definition(path) } if include_path.length > 0 and should_register
191
+
192
+ if @protobuf_version == 3
193
+ @pb_builder = Google::Protobuf::DescriptorPool.generated_pool.lookup(class_name).msgclass
194
+
195
+ else
196
+ @pb_builder = pb2_create_instance(class_name)
197
+ end
198
+ end
199
+ end
200
+
201
+ # Pipelines using this plugin cannot be reloaded.
202
+ # https://github.com/elastic/logstash/pull/6499
203
+ #
204
+ # The DescriptorPool instance registers the protobuf classes (and
205
+ # dependencies) as global objects. This makes it very difficult to reload a
206
+ # pipeline, because `class_name` and all of its dependencies are already
207
+ # registered.
208
+ def reloadable?
209
+ return false
210
+ end
211
+
212
+ def decode(data)
213
+ if @protobuf_version == 3
214
+ decoded = @pb_builder.decode(data.to_s)
215
+ if @pb3_set_oneof_metainfo
216
+ meta = pb3_get_oneof_metainfo(decoded, @class_name)
217
+ end
218
+ h = pb3_deep_to_hash(decoded)
219
+ else
220
+ decoded = @pb_builder.parse(data.to_s)
221
+ h = decoded.to_hash
222
+ end
223
+ e = LogStash::Event.new(h)
224
+ if @protobuf_version == 3 and @pb3_set_oneof_metainfo
225
+ e.set("[@metadata][pb_oneof]", meta)
226
+ end
227
+ yield e if block_given?
228
+ rescue => ex
229
+ @logger.warn("Couldn't decode protobuf: #{ex.inspect}.")
230
+ if stop_on_error
231
+ raise ex
232
+ else # keep original message so that the user can debug it.
233
+ yield LogStash::Event.new("message" => data, "tags" => ["_protobufdecodefailure"])
234
+ end
235
+ end # def decode
236
+
237
+
238
+ def encode(event)
239
+ if @protobuf_version == 3
240
+ protobytes = pb3_encode(event)
241
+ else
242
+ protobytes = pb2_encode(event)
243
+ end
244
+ unless protobytes.nil? or protobytes.empty?
245
+ @on_event.call(event, protobytes)
246
+ end
247
+ end # def encode
248
+
249
+
250
+ private
251
+ def pb3_deep_to_hash(input)
252
+ case input
253
+ when Google::Protobuf::MessageExts # it's a protobuf class
254
+ result = Hash.new
255
+ input.to_h.each {|key, value|
256
+ result[key] = pb3_deep_to_hash(value) # the key is required for the class lookup of enums.
257
+ }
258
+ when ::Array
259
+ result = []
260
+ input.each {|value|
261
+ result << pb3_deep_to_hash(value)
262
+ }
263
+ when ::Hash
264
+ result = {}
265
+ input.each {|key, value|
266
+ result[key] = pb3_deep_to_hash(value)
267
+ }
268
+ when Symbol # is an Enum
269
+ result = input.to_s.sub(':','')
270
+ else
271
+ result = input
272
+ end
273
+ result
274
+ end
275
+
276
+ def pb3_encode(event)
277
+
278
+ datahash = event.to_hash
279
+
280
+ is_recursive_call = !event.get('tags').nil? and event.get('tags').include? @pb3_typeconversion_tag
281
+ if is_recursive_call
282
+ datahash = pb3_remove_typeconversion_tag(datahash)
283
+ end
284
+ datahash = pb3_prepare_for_encoding(datahash)
285
+ if datahash.nil?
286
+ @logger.warn("Protobuf encoding error 4: empty data for event #{event.to_hash}")
287
+ end
288
+ if @pb_builder.nil?
289
+ @logger.warn("Protobuf encoding error 5: empty protobuf builder for class #{@class_name}")
290
+ end
291
+ pb_obj = @pb_builder.new(datahash)
292
+ @pb_builder.encode(pb_obj)
293
+
294
+ rescue ArgumentError => e
295
+ k = event.to_hash.keys.join(", ")
296
+ @logger.warn("Protobuf encoding error 1: Argument error (#{e.inspect}). Reason: probably mismatching protobuf definition. \
297
+ Required fields in the protobuf definition are: #{k} and fields must not begin with @ sign. The event has been discarded.")
298
+ nil
299
+ rescue TypeError => e
300
+ pb3_handle_type_errors(event, e, is_recursive_call, datahash)
301
+ nil
302
+ rescue => e
303
+ @logger.warn("Protobuf encoding error 3: #{e.inspect}. Event discarded. Input data: #{datahash}. The event has been discarded. Backtrace: #{e.backtrace}")
304
+ nil
305
+ end
306
+
307
+
308
+
309
+
310
+ def pb3_handle_type_errors(event, e, is_recursive_call, datahash)
311
+ begin
312
+ if is_recursive_call
313
+ @logger.warn("Protobuf encoding error 2.1: Type error (#{e.inspect}). Some types could not be converted. The event has been discarded. Type mismatches: #{mismatches}.")
314
+ else
315
+ if @pb3_encoder_autoconvert_types
316
+
317
+ msg = "Protobuf encoding error 2.2: Type error (#{e.inspect}). Will try to convert the data types. Original data: #{datahash}"
318
+ @logger.warn(msg)
319
+ mismatches = pb3_get_type_mismatches(datahash, "", @class_name)
320
+
321
+ event = pb3_convert_mismatched_types(event, mismatches)
322
+ # Add a (temporary) tag to handle the recursion stop
323
+ pb3_add_tag(event, @pb3_typeconversion_tag )
324
+ pb3_encode(event)
325
+ else
326
+ @logger.warn("Protobuf encoding error 2.3: Type error (#{e.inspect}). The event has been discarded. Try setting pb3_encoder_autoconvert_types => true for automatic type conversion.")
327
+ end
328
+ end
329
+ rescue TypeError => e
330
+ if @pb3_encoder_autoconvert_types
331
+ @logger.warn("Protobuf encoding error 2.4.1: (#{e.inspect}). Failed to convert data types. The event has been discarded. original data: #{datahash}")
332
+ else
333
+ @logger.warn("Protobuf encoding error 2.4.2: (#{e.inspect}). The event has been discarded.")
334
+ end
335
+ if @stop_on_error
336
+ raise e
337
+ end
338
+ nil
339
+ rescue => ex
340
+ @logger.warn("Protobuf encoding error 2.5: (#{e.inspect}). The event has been discarded. Auto-typecasting was on: #{@pb3_encoder_autoconvert_types}")
341
+ if @stop_on_error
342
+ raise ex
343
+ end
344
+ nil
345
+ end
346
+ end # pb3_handle_type_errors
347
+
348
+
349
+ def pb3_get_type_mismatches(data, key_prefix, pb_class)
350
+ mismatches = []
351
+ data.to_h.each do |key, value|
352
+ expected_type = pb3_get_expected_type(key, pb_class)
353
+ r = pb3_compare_datatypes(value, key, key_prefix, pb_class, expected_type)
354
+ mismatches.concat(r)
355
+ end # data.each
356
+ mismatches
357
+ end
358
+
359
+
360
+ def pb3_get_expected_type(key, pb_class)
361
+ pb_descriptor = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class)
362
+
363
+ if !pb_descriptor.nil?
364
+ pb_builder = pb_descriptor.msgclass
365
+ pb_obj = pb_builder.new({})
366
+ v = pb_obj.send(key)
367
+
368
+ if !v.nil?
369
+ v.class
370
+ else
371
+ nil
372
+ end
373
+ end
374
+ end
375
+
376
+ def pb3_compare_datatypes(value, key, key_prefix, pb_class, expected_type)
377
+ mismatches = []
378
+
379
+ if value.nil?
380
+ is_mismatch = false
381
+ else
382
+ case value
383
+ when ::Hash, Google::Protobuf::MessageExts
384
+ is_mismatch = false
385
+ descriptor = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class).lookup(key)
386
+ if !descriptor.subtype.nil?
387
+ class_of_nested_object = pb3_get_descriptorpool_name(descriptor.subtype.msgclass)
388
+ new_prefix = "#{key}."
389
+ recursive_mismatches = pb3_get_type_mismatches(value, new_prefix, class_of_nested_object)
390
+ mismatches.concat(recursive_mismatches)
391
+ end
392
+ when ::Array
393
+ expected_type = pb3_get_expected_type(key, pb_class)
394
+ is_mismatch = (expected_type != Google::Protobuf::RepeatedField)
395
+ child_type = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class).lookup(key).type
396
+ value.each_with_index do | v, i |
397
+ new_prefix = "#{key}."
398
+ recursive_mismatches = pb3_compare_datatypes(v, i.to_s, new_prefix, pb_class, child_type)
399
+ mismatches.concat(recursive_mismatches)
400
+ is_mismatch |= recursive_mismatches.any?
401
+ end # do
402
+ else # is scalar data type
403
+
404
+ is_mismatch = ! pb3_is_scalar_datatype_match(expected_type, value.class)
405
+ end # if
406
+ end # if value.nil?
407
+
408
+ if is_mismatch
409
+ mismatches << {"key" => "#{key_prefix}#{key}", "actual_type" => value.class, "expected_type" => expected_type, "value" => value}
410
+ end
411
+ mismatches
412
+ end
413
+
414
+ def pb3_remove_typeconversion_tag(data)
415
+ # remove the tag that we added to the event because
416
+ # the protobuf definition might not have a field for tags
417
+ data['tags'].delete(@pb3_typeconversion_tag)
418
+ if data['tags'].length == 0
419
+ data.delete('tags')
420
+ end
421
+ data
422
+ end
423
+
424
+ def pb3_get_descriptorpool_name(child_class)
425
+ # make instance
426
+ inst = child_class.new
427
+ # get the lookup name for the Descriptorpool
428
+ inst.class.descriptor.name
429
+ end
430
+
431
+ def pb3_is_scalar_datatype_match(expected_type, actual_type)
432
+ if expected_type == actual_type
433
+ true
434
+ else
435
+ e = expected_type.to_s.downcase.to_sym
436
+ a = actual_type.to_s.downcase.to_sym
437
+ case e
438
+ # when :string, :integer
439
+ when :string
440
+ a == e
441
+ when :integer
442
+ a == e
443
+ when :float
444
+ a == :float || a == :integer
445
+ end
446
+ end
447
+ end
448
+
449
+
450
+ def pb3_convert_mismatched_types_getter(struct, key)
451
+ if struct.is_a? ::Hash
452
+ struct[key]
453
+ else
454
+ struct.get(key)
455
+ end
456
+ end
457
+
458
+ def pb3_convert_mismatched_types_setter(struct, key, value)
459
+ if struct.is_a? ::Hash
460
+ struct[key] = value
461
+ else
462
+ struct.set(key, value)
463
+ end
464
+ struct
465
+ end
466
+
467
+ def pb3_add_tag(event, tag )
468
+ if event.get('tags').nil?
469
+ event.set('tags', [tag])
470
+ else
471
+ existing_tags = event.get('tags')
472
+ event.set("tags", existing_tags << tag)
473
+ end
474
+ end
475
+
476
+ # Due to recursion on nested fields in the event object this method might be given an event (1st call) or a hash (2nd .. nth call)
477
+ # First call will be the event object, child objects will be hashes.
478
+ def pb3_convert_mismatched_types(struct, mismatches)
479
+ mismatches.each do | m |
480
+ key = m['key']
481
+ expected_type = m['expected_type']
482
+ actual_type = m['actual_type']
483
+ if key.include? "." # the mismatch is in a child object
484
+ levels = key.split(/\./) # key is something like http_user_agent.minor_version and needs to be splitted.
485
+ key = levels[0]
486
+ sub_levels = levels.drop(1).join(".")
487
+ new_mismatches = [{"key"=>sub_levels, "actual_type"=>m["actual_type"], "expected_type"=>m["expected_type"]}]
488
+ value = pb3_convert_mismatched_types_getter(struct, key)
489
+ new_value = pb3_convert_mismatched_types(value, new_mismatches)
490
+ struct = pb3_convert_mismatched_types_setter(struct, key, new_value )
491
+ else
492
+ value = pb3_convert_mismatched_types_getter(struct, key)
493
+ begin
494
+ case expected_type.to_s
495
+ when "Integer"
496
+ case actual_type.to_s
497
+ when "String"
498
+ new_value = value.to_i
499
+ when "Float"
500
+ if value.floor == value # convert values like 2.0 to 2, but not 2.1
501
+ new_value = value.to_i
502
+ end
503
+ end
504
+ when "String"
505
+ new_value = value.to_s
506
+ when "Float"
507
+ new_value = value.to_f
508
+ when "Boolean","TrueClass", "FalseClass"
509
+ new_value = value.to_s.downcase == "true"
510
+ end
511
+ if !new_value.nil?
512
+ struct = pb3_convert_mismatched_types_setter(struct, key, new_value )
513
+ end
514
+ rescue Exception => ex
515
+ @logger.debug("Protobuf encoding error 5: Could not convert types for protobuf encoding: #{ex}")
516
+ end
517
+ end # if key contains .
518
+ end # mismatches.each
519
+ struct
520
+ end
521
+
522
+ def pb3_prepare_for_encoding(datahash)
523
+ # 0) Remove empty fields.
524
+ datahash = datahash.select { |key, value| !value.nil? }
525
+
526
+ # Preparation: the data cannot be encoded until certain criteria are met:
527
+ # 1) remove @ signs from keys.
528
+ # 2) convert timestamps and other objects to strings
529
+ datahash = datahash.inject({}){|x,(k,v)| x[k.gsub(/@/,'').to_sym] = (should_convert_to_string?(v) ? v.to_s : v); x}
530
+
531
+ datahash.each do |key, value|
532
+ datahash[key] = pb3_prepare_for_encoding(value) if value.is_a?(Hash)
533
+ end
534
+
535
+ datahash
536
+ end
537
+
538
+ def pb3_get_oneof_metainfo(pb_object, pb_class_name)
539
+ meta = {}
540
+ pb_class = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class_name).msgclass
541
+
542
+ pb_class.descriptor.each_oneof { |field|
543
+ field.each { | group_option |
544
+ if !pb_object.send(group_option.name).nil?
545
+ meta[field.name] = group_option.name
546
+ end
547
+ }
548
+ }
549
+
550
+ pb_class.descriptor.select{ |field| field.type == :message }.each { | field |
551
+ # recurse over nested protobuf classes
552
+ pb_sub_object = pb_object.send(field.name)
553
+ if !pb_sub_object.nil? and !field.subtype.nil?
554
+ pb_sub_class = pb3_get_descriptorpool_name(field.subtype.msgclass)
555
+ meta[field.name] = pb3_get_oneof_metainfo(pb_sub_object, pb_sub_class)
556
+ end
557
+ }
558
+
559
+ meta
560
+ end
561
+
562
+
563
+ def pb2_encode(event)
564
+ data = pb2_prepare_for_encoding(event.to_hash, @class_name)
565
+ msg = @pb_builder.new(data)
566
+ msg.serialize_to_string
567
+ rescue NoMethodError => e
568
+ @logger.warn("Encoding error 2. Probably mismatching protobuf definition. Required fields in the protobuf definition are: " + event.to_hash.keys.join(", ") + " and the timestamp field name must not include a @. ")
569
+ raise e
570
+ rescue => e
571
+ @logger.warn("Encoding error 1: #{e.inspect}")
572
+ raise e
573
+ end
574
+
575
+
576
+ def pb2_prepare_for_encoding(datahash, class_name)
577
+ if datahash.is_a?(::Hash)
578
+ # Preparation: the data cannot be encoded until certain criteria are met:
579
+ # 1) remove @ signs from keys.
580
+ # 2) convert timestamps and other objects to strings
581
+ datahash = ::Hash[datahash.map{|(k,v)| [k.to_s.dup.gsub(/@/,''), (should_convert_to_string?(v) ? v.to_s : v)] }]
582
+
583
+ # Check if any of the fields in this hash are protobuf classes and if so, create a builder for them.
584
+ meta = @metainfo_messageclasses[class_name]
585
+ if meta
586
+ meta.map do | (k,c) |
587
+ if datahash.include?(k)
588
+ original_value = datahash[k]
589
+ datahash[k] =
590
+ if original_value.is_a?(::Array)
591
+ # make this field an array/list of protobuf objects
592
+ # value is a list of hashed complex objects, each of which needs to be protobuffed and
593
+ # put back into the list.
594
+ original_value.map { |x| pb2_prepare_for_encoding(x, c) }
595
+ original_value
596
+ else
597
+ proto_obj = pb2_create_instance(c)
598
+ proto_obj.new(pb2_prepare_for_encoding(original_value, c)) # this line is reached in the colourtest for an enum. Enums should not be instantiated. Should enums even be in the messageclasses? I dont think so! TODO bug
599
+ end # if is array
600
+ end # if datahash_include
601
+ end # do
602
+ end # if meta
603
+ end
604
+ datahash
605
+ end
606
+
607
+
608
+ def should_convert_to_string?(v)
609
+ !(v.is_a?(Integer) || v.is_a?(Float) || v.is_a?(::Hash) || v.is_a?(::Array) || [true, false].include?(v))
610
+ end
611
+
612
+
613
+ def pb2_create_instance(name)
614
+ @logger.debug("Creating instance of " + name)
615
+ name.split('::').inject(Object) { |n,c| n.const_get c }
616
+ end
617
+
618
+
619
+ def pb3_metadata_analyis(filename)
620
+
621
+ regex_class_name = /\s*add_message "(?<name>.+?)" do\s+/ # TODO optimize both regexes for speed (negative lookahead)
622
+ regex_pbdefs = /\s*(optional|repeated)(\s*):(?<name>.+),(\s*):(?<type>\w+),(\s*)(?<position>\d+)(, \"(?<enum_class>.*?)\")?/
623
+ class_name = ""
624
+ type = ""
625
+ field_name = ""
626
+ File.readlines(filename).each do |line|
627
+ if ! (line =~ regex_class_name).nil?
628
+ class_name = $1
629
+ @metainfo_messageclasses[class_name] = {}
630
+ @metainfo_enumclasses[class_name] = {}
631
+ end # if
632
+ if ! (line =~ regex_pbdefs).nil?
633
+ field_name = $1
634
+ type = $2
635
+ field_class_name = $4
636
+ if type == "message"
637
+ @metainfo_messageclasses[class_name][field_name] = field_class_name
638
+ elsif type == "enum"
639
+ @metainfo_enumclasses[class_name][field_name] = field_class_name
640
+ end
641
+ end # if
642
+ end # readlines
643
+ if class_name.nil?
644
+ @logger.warn("Error 4: class name not found in file " + filename)
645
+ raise ArgumentError, "Invalid protobuf file: " + filename
646
+ end
647
+ rescue Exception => e
648
+ @logger.warn("Error 3: unable to read pb definition from file " + filename+ ". Reason: #{e.inspect}. Last settings were: class #{class_name} field #{field_name} type #{type}. Backtrace: " + e.backtrace.inspect.to_s)
649
+ raise e
650
+ end
651
+
652
+
653
+
654
+ def pb2_metadata_analyis(filename)
655
+ regex_class_start = /\s*set_fully_qualified_name \"(?<name>.+)\".*?/
656
+ regex_enum_name = /\s*include ..ProtocolBuffers..Enum\s*/
657
+ regex_pbdefs = /\s*(optional|repeated)(\s*):(?<type>.+),(\s*):(?<name>\w+),(\s*)(?<position>\d+)/
658
+ # now we also need to find out which class it contains and the protobuf definitions in it.
659
+ # We'll unfortunately need that later so that we can create nested objects.
660
+
661
+ class_name = ""
662
+ type = ""
663
+ field_name = ""
664
+ is_enum_class = false
665
+
666
+ File.readlines(filename).each do |line|
667
+ if ! (line =~ regex_enum_name).nil?
668
+ is_enum_class= true
669
+ end
670
+
671
+ if ! (line =~ regex_class_start).nil?
672
+ class_name = $1.gsub('.',"::").split('::').map {|word| word.capitalize}.join('::')
673
+ if is_enum_class
674
+ @metainfo_pb2_enumlist << class_name.downcase
675
+ end
676
+ is_enum_class= false # reset when next class starts
677
+ end
678
+ if ! (line =~ regex_pbdefs).nil?
679
+ type = $1
680
+ field_name = $2
681
+ if type =~ /::/
682
+ clean_type = type.gsub(/^:/,"")
683
+ e = @metainfo_pb2_enumlist.include? clean_type.downcase
684
+
685
+ if e
686
+ if not @metainfo_enumclasses.key? class_name
687
+ @metainfo_enumclasses[class_name] = {}
688
+ end
689
+ @metainfo_enumclasses[class_name][field_name] = clean_type
690
+ else
691
+ if not @metainfo_messageclasses.key? class_name
692
+ @metainfo_messageclasses[class_name] = {}
693
+ end
694
+ @metainfo_messageclasses[class_name][field_name] = clean_type
695
+ end
696
+ end
697
+ end
698
+ end
699
+ if class_name.nil?
700
+ @logger.warn("Error 4: class name not found in file " + filename)
701
+ raise ArgumentError, "Invalid protobuf file: " + filename
702
+ end
703
+ rescue LoadError => e
704
+ raise ArgumentError.new("Could not load file: " + filename + ". Please try to use absolute pathes. Current working dir: " + Dir.pwd + ", loadpath: " + $LOAD_PATH.join(" "))
705
+ rescue => e
706
+
707
+ @logger.warn("Error 3: unable to read pb definition from file " + filename+ ". Reason: #{e.inspect}. Last settings were: class #{class_name} field #{field_name} type #{type}. Backtrace: " + e.backtrace.inspect.to_s)
708
+ raise e
709
+ end
710
+
711
+
712
+ def load_protobuf_definition(filename)
713
+ if filename.end_with? ('.rb')
714
+ # Add to the loading path of the protobuf definitions
715
+ if (Pathname.new filename).absolute?
716
+ begin
717
+ require filename
718
+ rescue Exception => e
719
+ @logger.error("Unable to load file: #{filename}. Reason: #{e.inspect}")
720
+ raise e
721
+ end
722
+ end
723
+
724
+ if @protobuf_version == 3
725
+ pb3_metadata_analyis(filename)
726
+ else
727
+ pb2_metadata_analyis(filename)
728
+ end
729
+
730
+ else
731
+ @logger.warn("Not a ruby file: " + filename)
732
+ end
733
+ end
734
+
735
+ end # class LogStash::Codecs::Protobuf