logstash-codec-protobuf 1.0.5 → 1.2.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +20 -1
  3. data/Gemfile +1 -1
  4. data/LICENSE +2 -3
  5. data/README.md +147 -40
  6. data/docs/index.asciidoc +173 -41
  7. data/lib/logstash/codecs/protobuf.rb +598 -238
  8. data/logstash-codec-protobuf.gemspec +3 -3
  9. data/spec/codecs/{protobuf_spec.rb → pb2_spec.rb} +81 -54
  10. data/spec/codecs/pb3_decode_spec.rb +445 -0
  11. data/spec/codecs/pb3_encode_spec.rb +243 -0
  12. data/spec/helpers/pb2/event.pb.rb +19 -0
  13. data/spec/helpers/pb2/event.proto +12 -0
  14. data/spec/helpers/pb2/header/header.pb.rb +16 -0
  15. data/spec/helpers/pb2/header/header.proto +8 -0
  16. data/spec/helpers/pb3/FantasyHorse_pb.rb +44 -0
  17. data/spec/helpers/pb3/ProbeResult_pb.rb +26 -0
  18. data/spec/helpers/pb3/dnsmessage_pb.rb +82 -0
  19. data/spec/helpers/pb3/events.proto3 +10 -0
  20. data/spec/helpers/pb3/events_pb.rb +17 -0
  21. data/spec/helpers/pb3/header/header.proto3 +7 -0
  22. data/spec/helpers/pb3/header/header_pb.rb +12 -0
  23. data/spec/helpers/pb3/integertest_pb.rb +20 -0
  24. data/spec/helpers/pb3/messageA.proto3 +12 -0
  25. data/spec/helpers/pb3/messageA_pb.rb +16 -0
  26. data/spec/helpers/pb3/messageB.proto3 +12 -0
  27. data/spec/helpers/pb3/messageB_pb.rb +16 -0
  28. data/spec/helpers/pb3/rum2_pb.rb +87 -0
  29. data/spec/helpers/pb3/rum3_pb.rb +87 -0
  30. data/spec/helpers/pb3/rum_pb.rb +87 -0
  31. metadata +62 -34
  32. data/lib/net/jpountz/lz4/lz4/1.3.0/lz4-1.3.0.jar +0 -0
  33. data/lib/org/apache/kafka/kafka-clients/0.11.0.0/kafka-clients-0.11.0.0.jar +0 -0
  34. data/lib/org/apache/logging/log4j/log4j-api/2.8.2/log4j-api-2.8.2.jar +0 -0
  35. data/lib/org/apache/logging/log4j/log4j-slf4j-impl/2.8.2/log4j-slf4j-impl-2.8.2.jar +0 -0
  36. data/lib/org/slf4j/slf4j-api/1.7.24/slf4j-api-1.7.24.jar +0 -0
  37. data/lib/org/slf4j/slf4j-api/1.7.25/slf4j-api-1.7.25.jar +0 -0
  38. data/lib/org/xerial/snappy/snappy-java/1.1.2.6/snappy-java-1.1.2.6.jar +0 -0
  39. data/spec/codecs/protobuf3_spec.rb +0 -147
  40. data/vendor/jar-dependencies/runtime-jars/kafka-clients-0.11.0.0.jar +0 -0
  41. data/vendor/jar-dependencies/runtime-jars/log4j-api-2.8.2.jar +0 -0
  42. data/vendor/jar-dependencies/runtime-jars/log4j-slf4j-impl-2.8.2.jar +0 -0
  43. data/vendor/jar-dependencies/runtime-jars/lz4-1.3.0.jar +0 -0
  44. data/vendor/jar-dependencies/runtime-jars/slf4j-api-1.7.24.jar +0 -0
  45. data/vendor/jar-dependencies/runtime-jars/slf4j-api-1.7.25.jar +0 -0
  46. data/vendor/jar-dependencies/runtime-jars/snappy-java-1.1.2.6.jar +0 -0
@@ -4,131 +4,268 @@ require 'logstash/util/charset'
4
4
  require 'google/protobuf' # for protobuf3
5
5
  require 'protocol_buffers' # https://github.com/codekitchen/ruby-protocol-buffers, for protobuf2
6
6
 
7
- # This codec converts protobuf encoded messages into logstash events and vice versa.
7
+ # Monkey-patch the `Google::Protobuf::DescriptorPool` with a mutex for exclusive
8
+ # access.
9
+ #
10
+ # The DescriptorPool instance is not thread-safe when loading protobuf
11
+ # definitions. This can cause unrecoverable errors when registering multiple
12
+ # concurrent pipelines that try to register the same dependency. The
13
+ # DescriptorPool instance is global to the JVM and shared among all pipelines.
14
+ class << Google::Protobuf::DescriptorPool
15
+ def with_lock
16
+ if !@mutex
17
+ @mutex = Mutex.new
18
+ end
19
+
20
+ return @mutex
21
+ end
22
+ end
23
+
24
+ # This codec converts protobuf encoded messages into logstash events and vice versa.
8
25
  #
9
26
  # Requires the protobuf definitions as ruby files. You can create those using the [ruby-protoc compiler](https://github.com/codekitchen/ruby-protocol-buffers).
10
- #
11
- # The following shows a usage example for decoding events from a kafka stream:
27
+ #
28
+ # The following shows a usage example for decoding protobuf 2 encoded events from a kafka stream:
12
29
  # [source,ruby]
13
- # kafka
30
+ # kafka
14
31
  # {
15
32
  # zk_connect => "127.0.0.1"
16
33
  # topic_id => "your_topic_goes_here"
17
- # codec => protobuf
34
+ # key_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
35
+ # value_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
36
+ # codec => protobuf
18
37
  # {
19
38
  # class_name => "Animal::Unicorn"
20
39
  # include_path => ['/path/to/protobuf/definitions/UnicornProtobuf.pb.rb']
21
40
  # }
22
41
  # }
23
42
  #
43
+ # Same example for protobuf 3:
44
+ # [source,ruby]
45
+ # kafka
46
+ # {
47
+ # zk_connect => "127.0.0.1"
48
+ # topic_id => "your_topic_goes_here"
49
+ # key_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
50
+ # value_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
51
+ # codec => protobuf
52
+ # {
53
+ # class_name => "Animal.Unicorn"
54
+ # include_path => ['/path/to/protobuf/definitions/UnicornProtobuf_pb.rb']
55
+ # protobuf_version => 3
56
+ # }
57
+ # }
58
+ #
59
+ # Specifically for the kafka input: please set the deserializer classes as shown above.
24
60
 
25
61
  class LogStash::Codecs::Protobuf < LogStash::Codecs::Base
26
62
  config_name 'protobuf'
27
63
 
28
64
  # Name of the class to decode.
29
- # If your protobuf definition contains modules, prepend them to the class name with double colons like so:
65
+ # If your protobuf 2 definition contains modules, prepend them to the class name with double colons like so:
30
66
  # [source,ruby]
31
- # class_name => "Foods::Dairy::Cheese"
32
- #
67
+ # class_name => "Animal::Horse::Unicorn"
68
+ #
33
69
  # This corresponds to a protobuf definition starting as follows:
34
70
  # [source,ruby]
35
- # module Foods
36
- # module Dairy
37
- # class Cheese
38
- # # here are your field definitions.
39
- #
71
+ # module Animal
72
+ # module Horse
73
+ # class Unicorn
74
+ # # here are your field definitions.
75
+ #
76
+ # For protobuf 3 separate the modules with single dots.
77
+ # [source,ruby]
78
+ # class_name => "Animal.Horse.Unicorn"
79
+ # Check the bottom of the generated protobuf ruby file. It contains lines like this:
80
+ # [source,ruby]
81
+ # Animals.Unicorn = Google::Protobuf::DescriptorPool.generated_pool.lookup("Animals.Unicorn").msgclass
82
+ # Use the parameter for the lookup call as the class_name for the codec config.
83
+ #
40
84
  # If your class references other definitions: you only have to add the main class here.
41
85
  config :class_name, :validate => :string, :required => true
42
86
 
43
- # List of absolute pathes to files with protobuf definitions.
44
- # When using more than one file, make sure to arrange the files in reverse order of dependency so that each class is loaded before it is
87
+ # Relative path to the ruby file that contains class_name
88
+ #
89
+ # Relative path (from `protobuf_root_directory`) that holds the definition of the class specified in
90
+ # `class_name`.
91
+ #
92
+ # `class_file` and `include_path` cannot be used at the same time.
93
+ config :class_file, :validate => :string, :default => '', :required => false
94
+
95
+ # Absolute path to the root directory that contains all referenced/used dependencies
96
+ # of the main class (`class_name`) or any of its dependencies.
97
+ #
98
+ # For instance:
99
+ #
100
+ # pb3
101
+ # ├── header
102
+ # │ └── header_pb.rb
103
+ # ├── messageA_pb.rb
104
+ #
105
+ # In this case `messageA_pb.rb` has an embedded message from `header/header_pb.rb`.
106
+ # If `class_file` is set to `messageA_pb.rb`, and `class_name` to
107
+ # `MessageA`, `protobuf_root_directory` must be set to `/path/to/pb3`, which includes
108
+ # both definitions.
109
+ config :protobuf_root_directory, :validate => :string, :required => false
110
+
111
+ # List of absolute pathes to files with protobuf definitions.
112
+ # When using more than one file, make sure to arrange the files in reverse order of dependency so that each class is loaded before it is
45
113
  # refered to by another.
46
- #
47
- # Example: a class _Cheese_ referencing another protobuf class _Milk_
114
+ #
115
+ # Example: a class _Unicorn_ referencing another protobuf class _Wings_
48
116
  # [source,ruby]
49
- # module Foods
50
- # module Dairy
51
- # class Cheese
52
- # set_fully_qualified_name "Foods.Dairy.Cheese"
53
- # optional ::Foods::Cheese::Milk, :milk, 1
54
- # optional :int64, :unique_id, 2
55
- # # here be more field definitions
117
+ # module Animal
118
+ # module Horse
119
+ # class Unicorn
120
+ # set_fully_qualified_name "Animal.Horse.Unicorn"
121
+ # optional ::Animal::Bodypart::Wings, :wings, 1
122
+ # optional :string, :name, 2
123
+ # # here be more field definitions
56
124
  #
57
125
  # would be configured as
58
126
  # [source,ruby]
59
- # include_path => ['/path/to/protobuf/definitions/Milk.pb.rb','/path/to/protobuf/definitions/Cheese.pb.rb']
127
+ # include_path => ['/path/to/protobuf/definitions/Wings.pb.rb','/path/to/protobuf/definitions/Unicorn.pb.rb']
60
128
  #
61
- # When using the codec in an output plugin:
62
- # * make sure to include all the desired fields in the protobuf definition, including timestamp.
63
- # Remove fields that are not part of the protobuf definition from the event by using the mutate filter.
64
- # * the @ symbol is currently not supported in field names when loading the protobuf definitions for encoding. Make sure to call the timestamp field "timestamp"
65
- # instead of "@timestamp" in the protobuf file. Logstash event fields will be stripped of the leading @ before conversion.
66
- #
67
- config :include_path, :validate => :array, :required => true
68
-
69
- # Protocol buffer version switch. Set to false (default) for version 2. Please note that the behaviour for enums varies between the versions.
129
+ # `class_file` and `include_path` cannot be used at the same time.
130
+ config :include_path, :validate => :array, :default => [], :required => false
131
+
132
+ # Protocol buffer version switch. Defaults to version 2. Please note that the behaviour for enums varies between the versions.
70
133
  # For protobuf 2 you will get integer representations for enums, for protobuf 3 you'll get string representations due to a different converter library.
71
134
  # Recommendation: use the translate plugin to restore previous behaviour when upgrading.
72
- config :protobuf_version_3, :validate => :boolean, :required => true, :default=>false
135
+ config :protobuf_version, :validate => [2,3], :default => 2, :required => true
136
+
137
+ # To tolerate faulty messages that cannot be en/decoded, set this to false. Otherwise the pipeline will stop upon encountering a non decipherable message.
138
+ config :stop_on_error, :validate => :boolean, :default => false, :required => false
139
+
140
+ # Instruct the encoder to attempt converting data types to match the protobuf definitions. Available only for protobuf version 3.
141
+ config :pb3_encoder_autoconvert_types, :validate => :boolean, :default => true, :required => false
142
+
143
+ # Add meta information to `[@metadata][pb_oneof]` about which classes were chosen for [oneof](https://developers.google.com/protocol-buffers/docs/proto3#oneof) fields.
144
+ # Example values: for the protobuf definition
145
+ # ``` oneof :horse_type do
146
+ # optional :unicorn, :message, 2, "FantasyUnicorn"
147
+ # optional :pegasus, :message, 3, "FantasyPegasus"
148
+ # end
149
+ # ```
150
+ # the field `[@metadata][pb_oneof][horse_type]` will be set to either `pegasus` or `unicorn`.
151
+ # Available only for protobuf version 3.
152
+ config :pb3_set_oneof_metainfo, :validate => :boolean, :default => false, :required => false
153
+
73
154
 
155
+ attr_reader :execution_context
156
+
157
+ # id of the pipeline whose events you want to read from.
158
+ def pipeline_id
159
+ respond_to?(:execution_context) && !execution_context.nil? ? execution_context.pipeline_id : "main"
160
+ end
74
161
 
75
162
  def register
76
163
  @metainfo_messageclasses = {}
77
164
  @metainfo_enumclasses = {}
78
- include_path.each { |path| load_protobuf_definition(path) }
79
- if @protobuf_version_3
80
- @pb_builder = Google::Protobuf::DescriptorPool.generated_pool.lookup(class_name).msgclass
81
- else
82
- @pb_builder = pb2_create_instance(class_name)
165
+ @metainfo_pb2_enumlist = []
166
+ @pb3_typeconversion_tag = "_protobuf_type_converted"
167
+
168
+ if @include_path.length > 0 and not class_file.strip.empty?
169
+ raise LogStash::ConfigurationError, "Cannot use `include_path` and `class_file` at the same time"
170
+ end
171
+
172
+ if @include_path.length == 0 and class_file.strip.empty?
173
+ raise LogStash::ConfigurationError, "Need to specify `include_path` or `class_file`"
174
+ end
175
+
176
+ should_register = Google::Protobuf::DescriptorPool.generated_pool.lookup(class_name).nil?
177
+
178
+ unless @protobuf_root_directory.nil? or @protobuf_root_directory.strip.empty?
179
+ if !$LOAD_PATH.include? @protobuf_root_directory and should_register
180
+ $LOAD_PATH.unshift(@protobuf_root_directory)
181
+ end
182
+ end
183
+
184
+ @class_file = "#{@protobuf_root_directory}/#{@class_file}" unless (Pathname.new @class_file).absolute? or @class_file.empty?
185
+ # exclusive access while loading protobuf definitions
186
+ Google::Protobuf::DescriptorPool.with_lock.synchronize do
187
+ # load from `class_file`
188
+ load_protobuf_definition(@class_file) if should_register and !@class_file.empty?
189
+ # load from `include_path`
190
+ include_path.each { |path| load_protobuf_definition(path) } if include_path.length > 0 and should_register
191
+
192
+ if @protobuf_version == 3
193
+ @pb_builder = Google::Protobuf::DescriptorPool.generated_pool.lookup(class_name).msgclass
194
+
195
+ else
196
+ @pb_builder = pb2_create_instance(class_name)
197
+ end
83
198
  end
84
199
  end
85
200
 
201
+ # Pipelines using this plugin cannot be reloaded.
202
+ # https://github.com/elastic/logstash/pull/6499
203
+ #
204
+ # The DescriptorPool instance registers the protobuf classes (and
205
+ # dependencies) as global objects. This makes it very difficult to reload a
206
+ # pipeline, because `class_name` and all of its dependencies are already
207
+ # registered.
208
+ def reloadable?
209
+ return false
210
+ end
86
211
 
87
212
  def decode(data)
88
- begin
89
- if @protobuf_version_3
90
- decoded = @pb_builder.decode(data.to_s)
91
- h = pb3_deep_to_hash(decoded)
92
- else
93
- decoded = @pb_builder.parse(data.to_s)
94
- h = decoded.to_hash
213
+ if @protobuf_version == 3
214
+ decoded = @pb_builder.decode(data.to_s)
215
+ if @pb3_set_oneof_metainfo
216
+ meta = pb3_get_oneof_metainfo(decoded, @class_name)
95
217
  end
96
- yield LogStash::Event.new(h) if block_given?
97
- rescue => e
98
- @logger.warn("Couldn't decode protobuf: #{e.inspect}.")
99
- raise e
218
+ h = pb3_deep_to_hash(decoded)
219
+ else
220
+ decoded = @pb_builder.parse(data.to_s)
221
+ h = decoded.to_hash
222
+ end
223
+ e = LogStash::Event.new(h)
224
+ if @protobuf_version == 3 and @pb3_set_oneof_metainfo
225
+ e.set("[@metadata][pb_oneof]", meta)
226
+ end
227
+ yield e if block_given?
228
+ rescue => ex
229
+ @logger.warn("Couldn't decode protobuf: #{ex.inspect}.")
230
+ if stop_on_error
231
+ raise ex
232
+ else # keep original message so that the user can debug it.
233
+ yield LogStash::Event.new("message" => data, "tags" => ["_protobufdecodefailure"])
100
234
  end
101
235
  end # def decode
102
236
 
103
237
 
104
238
  def encode(event)
105
- if @protobuf_version_3
106
- protobytes = pb3_encode_wrapper(event)
239
+ if @protobuf_version == 3
240
+ protobytes = pb3_encode(event)
107
241
  else
108
- protobytes = pb2_encode_wrapper(event)
242
+ protobytes = pb2_encode(event)
243
+ end
244
+ unless protobytes.nil? or protobytes.empty?
245
+ @on_event.call(event, protobytes)
109
246
  end
110
- @on_event.call(event, protobytes)
111
247
  end # def encode
112
248
 
113
249
 
114
250
  private
115
251
  def pb3_deep_to_hash(input)
116
- if input.class.ancestors.include? Google::Protobuf::MessageExts # it's a protobuf class
252
+ case input
253
+ when Google::Protobuf::MessageExts # it's a protobuf class
117
254
  result = Hash.new
118
255
  input.to_hash.each {|key, value|
119
256
  result[key] = pb3_deep_to_hash(value) # the key is required for the class lookup of enums.
120
- }
121
- elsif input.kind_of?(Array)
257
+ }
258
+ when ::Array
122
259
  result = []
123
260
  input.each {|value|
124
261
  result << pb3_deep_to_hash(value)
125
262
  }
126
- elsif input.kind_of?(::Hash)
263
+ when ::Hash
127
264
  result = {}
128
265
  input.each {|key, value|
129
266
  result[key] = pb3_deep_to_hash(value)
130
267
  }
131
- elsif input.instance_of? Symbol # is an Enum
268
+ when Symbol # is an Enum
132
269
  result = input.to_s.sub(':','')
133
270
  else
134
271
  result = input
@@ -136,240 +273,463 @@ class LogStash::Codecs::Protobuf < LogStash::Codecs::Base
136
273
  result
137
274
  end
138
275
 
139
- def pb3_encode_wrapper(event)
276
+ def pb3_encode(event)
277
+
278
+ datahash = event.to_hash
279
+
280
+ is_recursive_call = !event.get('tags').nil? and event.get('tags').include? @pb3_typeconversion_tag
281
+ if is_recursive_call
282
+ datahash = pb3_remove_typeconversion_tag(datahash)
283
+ end
284
+ datahash = pb3_prepare_for_encoding(datahash)
285
+ if datahash.nil?
286
+ @logger.warn("Protobuf encoding error 4: empty data for event #{event.to_hash}")
287
+ end
288
+ if @pb_builder.nil?
289
+ @logger.warn("Protobuf encoding error 5: empty protobuf builder for class #{@class_name}")
290
+ end
291
+ pb_obj = @pb_builder.new(datahash)
292
+ @pb_builder.encode(pb_obj)
293
+
294
+ rescue ArgumentError => e
295
+ k = event.to_hash.keys.join(", ")
296
+ @logger.warn("Protobuf encoding error 1: Argument error (#{e.inspect}). Reason: probably mismatching protobuf definition. \
297
+ Required fields in the protobuf definition are: #{k} and fields must not begin with @ sign. The event has been discarded.")
298
+ nil
299
+ rescue TypeError => e
300
+ pb3_handle_type_errors(event, e, is_recursive_call, datahash)
301
+ nil
302
+ rescue => e
303
+ @logger.warn("Protobuf encoding error 3: #{e.inspect}. Event discarded. Input data: #{datahash}. The event has been discarded. Backtrace: #{e.backtrace}")
304
+ nil
305
+ end
306
+
307
+
308
+
309
+
310
+ def pb3_handle_type_errors(event, e, is_recursive_call, datahash)
140
311
  begin
141
- data = pb3_encode(event.to_hash, @class_name)
142
- pb_obj = @pb_builder.new(data)
143
- @pb_builder.encode(pb_obj)
144
- rescue ArgumentError => e
145
- @logger.debug("Encoding error 2. Probably mismatching protobuf definition. Required fields in the protobuf definition are: " + event.to_hash.keys.join(", ") + " and the timestamp field name must not include a @. ")
146
- raise e
147
- rescue => e
148
- @logger.debug("Couldn't generate protobuf: ${e}")
149
- raise e
312
+ if is_recursive_call
313
+ @logger.warn("Protobuf encoding error 2.1: Type error (#{e.inspect}). Some types could not be converted. The event has been discarded. Type mismatches: #{mismatches}.")
314
+ else
315
+ if @pb3_encoder_autoconvert_types
316
+
317
+ msg = "Protobuf encoding error 2.2: Type error (#{e.inspect}). Will try to convert the data types. Original data: #{datahash}"
318
+ @logger.warn(msg)
319
+ mismatches = pb3_get_type_mismatches(datahash, "", @class_name)
320
+
321
+ event = pb3_convert_mismatched_types(event, mismatches)
322
+ # Add a (temporary) tag to handle the recursion stop
323
+ pb3_add_tag(event, @pb3_typeconversion_tag )
324
+ pb3_encode(event)
325
+ else
326
+ @logger.warn("Protobuf encoding error 2.3: Type error (#{e.inspect}). The event has been discarded. Try setting pb3_encoder_autoconvert_types => true for automatic type conversion.")
327
+ end
328
+ end
329
+ rescue TypeError => e
330
+ if @pb3_encoder_autoconvert_types
331
+ @logger.warn("Protobuf encoding error 2.4.1: (#{e.inspect}). Failed to convert data types. The event has been discarded. original data: #{datahash}")
332
+ else
333
+ @logger.warn("Protobuf encoding error 2.4.2: (#{e.inspect}). The event has been discarded.")
334
+ end
335
+ if @stop_on_error
336
+ raise e
337
+ end
338
+ nil
339
+ rescue => ex
340
+ @logger.warn("Protobuf encoding error 2.5: (#{e.inspect}). The event has been discarded. Auto-typecasting was on: #{@pb3_encoder_autoconvert_types}")
341
+ if @stop_on_error
342
+ raise ex
343
+ end
344
+ nil
150
345
  end
346
+ end # pb3_handle_type_errors
347
+
348
+
349
+ def pb3_get_type_mismatches(data, key_prefix, pb_class)
350
+ mismatches = []
351
+ data.to_hash.each do |key, value|
352
+ expected_type = pb3_get_expected_type(key, pb_class)
353
+ r = pb3_compare_datatypes(value, key, key_prefix, pb_class, expected_type)
354
+ mismatches.concat(r)
355
+ end # data.each
356
+ mismatches
151
357
  end
152
358
 
153
359
 
154
- def pb3_encode(datahash, class_name)
155
- next unless datahash.is_a?(::Hash)
360
+ def pb3_get_expected_type(key, pb_class)
361
+ pb_descriptor = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class)
362
+
363
+ if !pb_descriptor.nil?
364
+ pb_builder = pb_descriptor.msgclass
365
+ pb_obj = pb_builder.new({})
366
+ v = pb_obj.send(key)
367
+
368
+ if !v.nil?
369
+ v.class
370
+ else
371
+ nil
372
+ end
373
+ end
374
+ end
375
+
376
+ def pb3_compare_datatypes(value, key, key_prefix, pb_class, expected_type)
377
+ mismatches = []
378
+
379
+ if value.nil?
380
+ is_mismatch = false
381
+ else
382
+ case value
383
+ when ::Hash, Google::Protobuf::MessageExts
384
+ is_mismatch = false
385
+ descriptor = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class).lookup(key)
386
+ if !descriptor.subtype.nil?
387
+ class_of_nested_object = pb3_get_descriptorpool_name(descriptor.subtype.msgclass)
388
+ new_prefix = "#{key}."
389
+ recursive_mismatches = pb3_get_type_mismatches(value, new_prefix, class_of_nested_object)
390
+ mismatches.concat(recursive_mismatches)
391
+ end
392
+ when ::Array
393
+ expected_type = pb3_get_expected_type(key, pb_class)
394
+ is_mismatch = (expected_type != Google::Protobuf::RepeatedField)
395
+ child_type = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class).lookup(key).type
396
+ value.each_with_index do | v, i |
397
+ new_prefix = "#{key}."
398
+ recursive_mismatches = pb3_compare_datatypes(v, i.to_s, new_prefix, pb_class, child_type)
399
+ mismatches.concat(recursive_mismatches)
400
+ is_mismatch |= recursive_mismatches.any?
401
+ end # do
402
+ else # is scalar data type
403
+
404
+ is_mismatch = ! pb3_is_scalar_datatype_match(expected_type, value.class)
405
+ end # if
406
+ end # if value.nil?
407
+
408
+ if is_mismatch
409
+ mismatches << {"key" => "#{key_prefix}#{key}", "actual_type" => value.class, "expected_type" => expected_type, "value" => value}
410
+ end
411
+ mismatches
412
+ end
413
+
414
+ def pb3_remove_typeconversion_tag(data)
415
+ # remove the tag that we added to the event because
416
+ # the protobuf definition might not have a field for tags
417
+ data['tags'].delete(@pb3_typeconversion_tag)
418
+ if data['tags'].length == 0
419
+ data.delete('tags')
420
+ end
421
+ data
422
+ end
423
+
424
+ def pb3_get_descriptorpool_name(child_class)
425
+ # make instance
426
+ inst = child_class.new
427
+ # get the lookup name for the Descriptorpool
428
+ inst.class.descriptor.name
429
+ end
430
+
431
+ def pb3_is_scalar_datatype_match(expected_type, actual_type)
432
+ if expected_type == actual_type
433
+ true
434
+ else
435
+ e = expected_type.to_s.downcase.to_sym
436
+ a = actual_type.to_s.downcase.to_sym
437
+ case e
438
+ # when :string, :integer
439
+ when :string
440
+ a == e
441
+ when :integer
442
+ a == e
443
+ when :float
444
+ a == :float || a == :integer
445
+ end
446
+ end
447
+ end
448
+
449
+
450
+ def pb3_convert_mismatched_types_getter(struct, key)
451
+ if struct.is_a? ::Hash
452
+ struct[key]
453
+ else
454
+ struct.get(key)
455
+ end
456
+ end
457
+
458
+ def pb3_convert_mismatched_types_setter(struct, key, value)
459
+ if struct.is_a? ::Hash
460
+ struct[key] = value
461
+ else
462
+ struct.set(key, value)
463
+ end
464
+ struct
465
+ end
466
+
467
+ def pb3_add_tag(event, tag )
468
+ if event.get('tags').nil?
469
+ event.set('tags', [tag])
470
+ else
471
+ existing_tags = event.get('tags')
472
+ event.set("tags", existing_tags << tag)
473
+ end
474
+ end
475
+
476
+ # Due to recursion on nested fields in the event object this method might be given an event (1st call) or a hash (2nd .. nth call)
477
+ # First call will be the event object, child objects will be hashes.
478
+ def pb3_convert_mismatched_types(struct, mismatches)
479
+ mismatches.each do | m |
480
+ key = m['key']
481
+ expected_type = m['expected_type']
482
+ actual_type = m['actual_type']
483
+ if key.include? "." # the mismatch is in a child object
484
+ levels = key.split(/\./) # key is something like http_user_agent.minor_version and needs to be splitted.
485
+ key = levels[0]
486
+ sub_levels = levels.drop(1).join(".")
487
+ new_mismatches = [{"key"=>sub_levels, "actual_type"=>m["actual_type"], "expected_type"=>m["expected_type"]}]
488
+ value = pb3_convert_mismatched_types_getter(struct, key)
489
+ new_value = pb3_convert_mismatched_types(value, new_mismatches)
490
+ struct = pb3_convert_mismatched_types_setter(struct, key, new_value )
491
+ else
492
+ value = pb3_convert_mismatched_types_getter(struct, key)
493
+ begin
494
+ case expected_type.to_s
495
+ when "Integer"
496
+ case actual_type.to_s
497
+ when "String"
498
+ new_value = value.to_i
499
+ when "Float"
500
+ if value.floor == value # convert values like 2.0 to 2, but not 2.1
501
+ new_value = value.to_i
502
+ end
503
+ end
504
+ when "String"
505
+ new_value = value.to_s
506
+ when "Float"
507
+ new_value = value.to_f
508
+ when "Boolean","TrueClass", "FalseClass"
509
+ new_value = value.to_s.downcase == "true"
510
+ end
511
+ if !new_value.nil?
512
+ struct = pb3_convert_mismatched_types_setter(struct, key, new_value )
513
+ end
514
+ rescue Exception => ex
515
+ @logger.debug("Protobuf encoding error 5: Could not convert types for protobuf encoding: #{ex}")
516
+ end
517
+ end # if key contains .
518
+ end # mismatches.each
519
+ struct
520
+ end
521
+
522
+ def pb3_prepare_for_encoding(datahash)
523
+ # 0) Remove empty fields.
524
+ datahash = datahash.select { |key, value| !value.nil? }
156
525
 
157
526
  # Preparation: the data cannot be encoded until certain criteria are met:
158
527
  # 1) remove @ signs from keys.
159
528
  # 2) convert timestamps and other objects to strings
160
529
  datahash = datahash.inject({}){|x,(k,v)| x[k.gsub(/@/,'').to_sym] = (should_convert_to_string?(v) ? v.to_s : v); x}
161
-
162
- # Check if any of the fields in this hash are protobuf classes and if so, create a builder for them.
163
- meta = @metainfo_messageclasses[class_name]
164
- if meta
165
- meta.map do | (field_name,class_name) |
166
- key = field_name.to_sym
167
- if datahash.include?(key)
168
- original_value = datahash[key]
169
- datahash[key] =
170
- if original_value.is_a?(::Array)
171
- # make this field an array/list of protobuf objects
172
- # value is a list of hashed complex objects, each of which needs to be protobuffed and
173
- # put back into the list.
174
- original_value.map { |x| pb3_encode(x, class_name) }
175
- original_value
176
- else
177
- r = pb3_encode(original_value, class_name)
178
- builder = Google::Protobuf::DescriptorPool.generated_pool.lookup(class_name).msgclass
179
- builder.new(r)
180
- end # if is array
181
- end # if datahash_include
182
- end # do
183
- end # if meta
184
- # Check if any of the fields in this hash are enum classes and if so, create a builder for them.
185
- meta = @metainfo_enumclasses[class_name]
186
- if meta
187
- meta.map do | (field_name,class_name) |
188
- key = field_name.to_sym
189
- if datahash.include?(key)
190
- original_value = datahash[key]
191
- datahash[key] =
192
- if original_value.is_a?(::Array)
193
- original_value.map { |x| pb3_encode(x, class_name) }
194
- original_value
195
- else
196
- if original_value.is_a?(Fixnum)
197
- original_value # integers will be automatically converted into enum
198
- else
199
- # feature request: support for providing integers as strings or symbols.
200
- # not fully tested yet:
201
- # begin
202
- # enum_lookup_name = "#{class_name}::#{original_value}"
203
- # enum_lookup_name.split('::').inject(Object) do |mod, class_name|
204
- # mod.const_get(class_name)
205
- # end # do
206
- # rescue => e
207
- # @logger.debug("Encoding error 3: could not translate #{original_value} into enum. ${e}")
208
- # raise e
209
- # end
210
- end # if is a fixnum
211
- end # if is array
212
- end # if datahash_include
213
- end # do
214
- end # if meta
530
+
531
+ datahash.each do |key, value|
532
+ datahash[key] = pb3_prepare_for_encoding(value) if value.is_a?(Hash)
533
+ end
534
+
215
535
  datahash
216
536
  end
217
537
 
218
- def pb2_encode_wrapper(event)
219
- begin
220
- data = pb2_encode(event.to_hash, @class_name)
221
- msg = @pb_builder.new(data)
222
- msg.serialize_to_string
223
- rescue NoMethodError => e
224
- @logger.debug("Encoding error 2. Probably mismatching protobuf definition. Required fields in the protobuf definition are: " + event.to_hash.keys.join(", ") + " and the timestamp field name must not include a @. ")
225
- raise e
226
- rescue => e
227
- @logger.debug("Encoding error 1: ${e}")
228
- raise e
229
- end
538
+ def pb3_get_oneof_metainfo(pb_object, pb_class_name)
539
+ meta = {}
540
+ pb_class = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class_name).msgclass
541
+
542
+ pb_class.descriptor.each_oneof { |field|
543
+ field.each { | group_option |
544
+ if !pb_object.send(group_option.name).nil?
545
+ meta[field.name] = group_option.name
546
+ end
547
+ }
548
+ }
549
+
550
+ pb_class.descriptor.select{ |field| field.type == :message }.each { | field |
551
+ # recurse over nested protobuf classes
552
+ pb_sub_object = pb_object.send(field.name)
553
+ if !pb_sub_object.nil? and !field.subtype.nil?
554
+ pb_sub_class = pb3_get_descriptorpool_name(field.subtype.msgclass)
555
+ meta[field.name] = pb3_get_oneof_metainfo(pb_sub_object, pb_sub_class)
556
+ end
557
+ }
558
+
559
+ meta
230
560
  end
231
561
 
232
562
 
233
- def pb2_encode(datahash, class_name)
234
- next unless datahash.is_a?(::Hash)
563
+ def pb2_encode(event)
564
+ data = pb2_prepare_for_encoding(event.to_hash, @class_name)
565
+ msg = @pb_builder.new(data)
566
+ msg.serialize_to_string
567
+ rescue NoMethodError => e
568
+ @logger.warn("Encoding error 2. Probably mismatching protobuf definition. Required fields in the protobuf definition are: " + event.to_hash.keys.join(", ") + " and the timestamp field name must not include a @. ")
569
+ raise e
570
+ rescue => e
571
+ @logger.warn("Encoding error 1: #{e.inspect}")
572
+ raise e
573
+ end
235
574
 
236
- # Preparation: the data cannot be encoded until certain criteria are met:
237
- # 1) remove @ signs from keys.
238
- # 2) convert timestamps and other objects to strings
239
- datahash = ::Hash[datahash.map{|(k,v)| [k.to_s.dup.gsub(/@/,''), (should_convert_to_string?(v) ? v.to_s : v)] }]
240
-
241
- # Check if any of the fields in this hash are protobuf classes and if so, create a builder for them.
242
- meta = @metainfo_messageclasses[class_name]
243
- if meta
244
- meta.map do | (k,class_name) |
245
- if datahash.include?(k)
246
- original_value = datahash[k]
247
- p
248
- datahash[k] =
249
- if original_value.is_a?(::Array)
250
- # make this field an array/list of protobuf objects
251
- # value is a list of hashed complex objects, each of which needs to be protobuffed and
252
- # put back into the list.
253
- original_value.map { |x| pb2_encode(x, class_name) }
254
- original_value
255
- else
256
- proto_obj = pb2_create_instance(class_name)
257
- proto_obj.new(pb2_encode(original_value, class_name))
258
- end # if is array
259
- end # if datahash_include
260
- end # do
261
- end # if meta
262
575
 
576
+ def pb2_prepare_for_encoding(datahash, class_name)
577
+ if datahash.is_a?(::Hash)
578
+ # Preparation: the data cannot be encoded until certain criteria are met:
579
+ # 1) remove @ signs from keys.
580
+ # 2) convert timestamps and other objects to strings
581
+ datahash = ::Hash[datahash.map{|(k,v)| [k.to_s.dup.gsub(/@/,''), (should_convert_to_string?(v) ? v.to_s : v)] }]
582
+
583
+ # Check if any of the fields in this hash are protobuf classes and if so, create a builder for them.
584
+ meta = @metainfo_messageclasses[class_name]
585
+ if meta
586
+ meta.map do | (k,c) |
587
+ if datahash.include?(k)
588
+ original_value = datahash[k]
589
+ datahash[k] =
590
+ if original_value.is_a?(::Array)
591
+ # make this field an array/list of protobuf objects
592
+ # value is a list of hashed complex objects, each of which needs to be protobuffed and
593
+ # put back into the list.
594
+ original_value.map { |x| pb2_prepare_for_encoding(x, c) }
595
+ original_value
596
+ else
597
+ proto_obj = pb2_create_instance(c)
598
+ proto_obj.new(pb2_prepare_for_encoding(original_value, c)) # this line is reached in the colourtest for an enum. Enums should not be instantiated. Should enums even be in the messageclasses? I dont think so! TODO bug
599
+ end # if is array
600
+ end # if datahash_include
601
+ end # do
602
+ end # if meta
603
+ end
263
604
  datahash
264
605
  end
265
606
 
266
607
 
267
608
  def should_convert_to_string?(v)
268
- !(v.is_a?(Fixnum) || v.is_a?(::Hash) || v.is_a?(::Array) || [true, false].include?(v))
609
+ !(v.is_a?(Integer) || v.is_a?(Float) || v.is_a?(::Hash) || v.is_a?(::Array) || [true, false].include?(v))
269
610
  end
270
611
 
271
-
612
+
272
613
  def pb2_create_instance(name)
273
- begin
274
- @logger.debug("Creating instance of " + name)
275
- name.split('::').inject(Object) { |n,c| n.const_get c }
276
- end
614
+ @logger.debug("Creating instance of " + name)
615
+ name.split('::').inject(Object) { |n,c| n.const_get c }
277
616
  end
278
617
 
279
618
 
280
619
  def pb3_metadata_analyis(filename)
620
+
281
621
  regex_class_name = /\s*add_message "(?<name>.+?)" do\s+/ # TODO optimize both regexes for speed (negative lookahead)
282
622
  regex_pbdefs = /\s*(optional|repeated)(\s*):(?<name>.+),(\s*):(?<type>\w+),(\s*)(?<position>\d+)(, \"(?<enum_class>.*?)\")?/
283
- # Example
284
- # optional :father, :message, 10, "Unicorn"
285
- # repeated :favourite_numbers, :int32, 5
286
- begin
287
- class_name = ""
288
- type = ""
289
- field_name = ""
290
- File.readlines(filename).each do |line|
291
- if ! (line =~ regex_class_name).nil?
292
- class_name = $1
293
- @metainfo_messageclasses[class_name] = {}
294
- @metainfo_enumclasses[class_name] = {}
623
+ class_name = ""
624
+ type = ""
625
+ field_name = ""
626
+ File.readlines(filename).each do |line|
627
+ if ! (line =~ regex_class_name).nil?
628
+ class_name = $1
629
+ @metainfo_messageclasses[class_name] = {}
630
+ @metainfo_enumclasses[class_name] = {}
631
+ end # if
632
+ if ! (line =~ regex_pbdefs).nil?
633
+ field_name = $1
634
+ type = $2
635
+ field_class_name = $4
636
+ if type == "message"
637
+ @metainfo_messageclasses[class_name][field_name] = field_class_name
638
+ elsif type == "enum"
639
+ @metainfo_enumclasses[class_name][field_name] = field_class_name
295
640
  end
296
- if ! (line =~ regex_pbdefs).nil?
297
- field_name = $1
298
- type = $2
299
- field_class_name = $4
300
- if type == "message"
301
- @metainfo_messageclasses[class_name][field_name] = field_class_name
302
- elsif type == "enum"
303
- @metainfo_enumclasses[class_name][field_name] = field_class_name
304
- end
305
- end
306
- end
307
- rescue Exception => e
308
- @logger.warn("Error 3: unable to read pb definition from file " + filename+ ". Reason: #{e.inspect}. Last settings were: class #{class_name} field #{field_name} type #{type}. Backtrace: " + e.backtrace.inspect.to_s)
309
- raise e
310
- end
641
+ end # if
642
+ end # readlines
311
643
  if class_name.nil?
312
644
  @logger.warn("Error 4: class name not found in file " + filename)
313
645
  raise ArgumentError, "Invalid protobuf file: " + filename
314
- end
646
+ end
647
+ rescue Exception => e
648
+ @logger.warn("Error 3: unable to read pb definition from file " + filename+ ". Reason: #{e.inspect}. Last settings were: class #{class_name} field #{field_name} type #{type}. Backtrace: " + e.backtrace.inspect.to_s)
649
+ raise e
315
650
  end
316
651
 
652
+
653
+
317
654
  def pb2_metadata_analyis(filename)
318
- regex_class_name = /\s*class\s*(?<name>.+?)\s+/
319
- regex_module_name = /\s*module\s*(?<name>.+?)\s+/
655
+ regex_class_start = /\s*set_fully_qualified_name \"(?<name>.+)\".*?/
656
+ regex_enum_name = /\s*include ..ProtocolBuffers..Enum\s*/
320
657
  regex_pbdefs = /\s*(optional|repeated)(\s*):(?<type>.+),(\s*):(?<name>\w+),(\s*)(?<position>\d+)/
321
658
  # now we also need to find out which class it contains and the protobuf definitions in it.
322
659
  # We'll unfortunately need that later so that we can create nested objects.
323
- begin
324
- class_name = ""
325
- type = ""
326
- field_name = ""
327
- classname_found = false
328
- File.readlines(filename).each do |line|
329
- if ! (line =~ regex_module_name).nil? && !classname_found # because it might be declared twice in the file
330
- class_name << $1
331
- class_name << "::"
332
-
333
- end
334
- if ! (line =~ regex_class_name).nil? && !classname_found # because it might be declared twice in the file
335
- class_name << $1
336
- @metainfo_messageclasses[class_name] = {}
337
- classname_found = true
660
+
661
+ class_name = ""
662
+ type = ""
663
+ field_name = ""
664
+ is_enum_class = false
665
+
666
+ File.readlines(filename).each do |line|
667
+ if ! (line =~ regex_enum_name).nil?
668
+ is_enum_class= true
669
+ end
670
+
671
+ if ! (line =~ regex_class_start).nil?
672
+ class_name = $1.gsub('.',"::").split('::').map {|word| word.capitalize}.join('::')
673
+ if is_enum_class
674
+ @metainfo_pb2_enumlist << class_name.downcase
338
675
  end
339
- if ! (line =~ regex_pbdefs).nil?
340
- type = $1
341
- field_name = $2
342
- if type =~ /::/
343
- @metainfo_messageclasses[class_name][field_name] = type.gsub!(/^:/,"")
344
-
676
+ is_enum_class= false # reset when next class starts
677
+ end
678
+ if ! (line =~ regex_pbdefs).nil?
679
+ type = $1
680
+ field_name = $2
681
+ if type =~ /::/
682
+ clean_type = type.gsub(/^:/,"")
683
+ e = @metainfo_pb2_enumlist.include? clean_type.downcase
684
+
685
+ if e
686
+ if not @metainfo_enumclasses.key? class_name
687
+ @metainfo_enumclasses[class_name] = {}
688
+ end
689
+ @metainfo_enumclasses[class_name][field_name] = clean_type
690
+ else
691
+ if not @metainfo_messageclasses.key? class_name
692
+ @metainfo_messageclasses[class_name] = {}
693
+ end
694
+ @metainfo_messageclasses[class_name][field_name] = clean_type
345
695
  end
346
696
  end
347
697
  end
348
- rescue Exception => e
349
- @logger.warn("Error 3: unable to read pb definition from file " + filename+ ". Reason: #{e.inspect}. Last settings were: class #{class_name} field #{field_name} type #{type}. Backtrace: " + e.backtrace.inspect.to_s)
350
- raise e
351
698
  end
352
699
  if class_name.nil?
353
700
  @logger.warn("Error 4: class name not found in file " + filename)
354
701
  raise ArgumentError, "Invalid protobuf file: " + filename
355
- end
702
+ end
703
+ rescue LoadError => e
704
+ raise ArgumentError.new("Could not load file: " + filename + ". Please try to use absolute pathes. Current working dir: " + Dir.pwd + ", loadpath: " + $LOAD_PATH.join(" "))
705
+ rescue => e
706
+
707
+ @logger.warn("Error 3: unable to read pb definition from file " + filename+ ". Reason: #{e.inspect}. Last settings were: class #{class_name} field #{field_name} type #{type}. Backtrace: " + e.backtrace.inspect.to_s)
708
+ raise e
356
709
  end
357
710
 
711
+
358
712
  def load_protobuf_definition(filename)
359
- begin
360
- if filename.end_with? ('.rb')
361
- @logger.debug("Including protobuf file: " + filename)
362
- require filename
363
- if @protobuf_version_3
364
- pb3_metadata_analyis(filename)
365
- else
366
- pb2_metadata_analyis(filename)
713
+ if filename.end_with? ('.rb')
714
+ # Add to the loading path of the protobuf definitions
715
+ if (Pathname.new filename).absolute?
716
+ begin
717
+ require filename
718
+ rescue Exception => e
719
+ @logger.error("Unable to load file: #{filename}. Reason: #{e.inspect}")
720
+ raise e
367
721
  end
368
- else
369
- @logger.warn("Not a ruby file: " + filename)
370
722
  end
723
+
724
+ if @protobuf_version == 3
725
+ pb3_metadata_analyis(filename)
726
+ else
727
+ pb2_metadata_analyis(filename)
728
+ end
729
+
730
+ else
731
+ @logger.warn("Not a ruby file: " + filename)
371
732
  end
372
733
  end
373
734
 
374
-
375
735
  end # class LogStash::Codecs::Protobuf