logstash-codec-protobuf 1.0.5 → 1.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +20 -1
  3. data/Gemfile +1 -1
  4. data/LICENSE +2 -3
  5. data/README.md +147 -40
  6. data/docs/index.asciidoc +173 -41
  7. data/lib/logstash/codecs/protobuf.rb +598 -238
  8. data/logstash-codec-protobuf.gemspec +3 -3
  9. data/spec/codecs/{protobuf_spec.rb → pb2_spec.rb} +81 -54
  10. data/spec/codecs/pb3_decode_spec.rb +445 -0
  11. data/spec/codecs/pb3_encode_spec.rb +243 -0
  12. data/spec/helpers/pb2/event.pb.rb +19 -0
  13. data/spec/helpers/pb2/event.proto +12 -0
  14. data/spec/helpers/pb2/header/header.pb.rb +16 -0
  15. data/spec/helpers/pb2/header/header.proto +8 -0
  16. data/spec/helpers/pb3/FantasyHorse_pb.rb +44 -0
  17. data/spec/helpers/pb3/ProbeResult_pb.rb +26 -0
  18. data/spec/helpers/pb3/dnsmessage_pb.rb +82 -0
  19. data/spec/helpers/pb3/events.proto3 +10 -0
  20. data/spec/helpers/pb3/events_pb.rb +17 -0
  21. data/spec/helpers/pb3/header/header.proto3 +7 -0
  22. data/spec/helpers/pb3/header/header_pb.rb +12 -0
  23. data/spec/helpers/pb3/integertest_pb.rb +20 -0
  24. data/spec/helpers/pb3/messageA.proto3 +12 -0
  25. data/spec/helpers/pb3/messageA_pb.rb +16 -0
  26. data/spec/helpers/pb3/messageB.proto3 +12 -0
  27. data/spec/helpers/pb3/messageB_pb.rb +16 -0
  28. data/spec/helpers/pb3/rum2_pb.rb +87 -0
  29. data/spec/helpers/pb3/rum3_pb.rb +87 -0
  30. data/spec/helpers/pb3/rum_pb.rb +87 -0
  31. metadata +62 -34
  32. data/lib/net/jpountz/lz4/lz4/1.3.0/lz4-1.3.0.jar +0 -0
  33. data/lib/org/apache/kafka/kafka-clients/0.11.0.0/kafka-clients-0.11.0.0.jar +0 -0
  34. data/lib/org/apache/logging/log4j/log4j-api/2.8.2/log4j-api-2.8.2.jar +0 -0
  35. data/lib/org/apache/logging/log4j/log4j-slf4j-impl/2.8.2/log4j-slf4j-impl-2.8.2.jar +0 -0
  36. data/lib/org/slf4j/slf4j-api/1.7.24/slf4j-api-1.7.24.jar +0 -0
  37. data/lib/org/slf4j/slf4j-api/1.7.25/slf4j-api-1.7.25.jar +0 -0
  38. data/lib/org/xerial/snappy/snappy-java/1.1.2.6/snappy-java-1.1.2.6.jar +0 -0
  39. data/spec/codecs/protobuf3_spec.rb +0 -147
  40. data/vendor/jar-dependencies/runtime-jars/kafka-clients-0.11.0.0.jar +0 -0
  41. data/vendor/jar-dependencies/runtime-jars/log4j-api-2.8.2.jar +0 -0
  42. data/vendor/jar-dependencies/runtime-jars/log4j-slf4j-impl-2.8.2.jar +0 -0
  43. data/vendor/jar-dependencies/runtime-jars/lz4-1.3.0.jar +0 -0
  44. data/vendor/jar-dependencies/runtime-jars/slf4j-api-1.7.24.jar +0 -0
  45. data/vendor/jar-dependencies/runtime-jars/slf4j-api-1.7.25.jar +0 -0
  46. data/vendor/jar-dependencies/runtime-jars/snappy-java-1.1.2.6.jar +0 -0
@@ -4,131 +4,268 @@ require 'logstash/util/charset'
4
4
  require 'google/protobuf' # for protobuf3
5
5
  require 'protocol_buffers' # https://github.com/codekitchen/ruby-protocol-buffers, for protobuf2
6
6
 
7
- # This codec converts protobuf encoded messages into logstash events and vice versa.
7
+ # Monkey-patch the `Google::Protobuf::DescriptorPool` with a mutex for exclusive
8
+ # access.
9
+ #
10
+ # The DescriptorPool instance is not thread-safe when loading protobuf
11
+ # definitions. This can cause unrecoverable errors when registering multiple
12
+ # concurrent pipelines that try to register the same dependency. The
13
+ # DescriptorPool instance is global to the JVM and shared among all pipelines.
14
+ class << Google::Protobuf::DescriptorPool
15
+ def with_lock
16
+ if !@mutex
17
+ @mutex = Mutex.new
18
+ end
19
+
20
+ return @mutex
21
+ end
22
+ end
23
+
24
+ # This codec converts protobuf encoded messages into logstash events and vice versa.
8
25
  #
9
26
  # Requires the protobuf definitions as ruby files. You can create those using the [ruby-protoc compiler](https://github.com/codekitchen/ruby-protocol-buffers).
10
- #
11
- # The following shows a usage example for decoding events from a kafka stream:
27
+ #
28
+ # The following shows a usage example for decoding protobuf 2 encoded events from a kafka stream:
12
29
  # [source,ruby]
13
- # kafka
30
+ # kafka
14
31
  # {
15
32
  # zk_connect => "127.0.0.1"
16
33
  # topic_id => "your_topic_goes_here"
17
- # codec => protobuf
34
+ # key_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
35
+ # value_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
36
+ # codec => protobuf
18
37
  # {
19
38
  # class_name => "Animal::Unicorn"
20
39
  # include_path => ['/path/to/protobuf/definitions/UnicornProtobuf.pb.rb']
21
40
  # }
22
41
  # }
23
42
  #
43
+ # Same example for protobuf 3:
44
+ # [source,ruby]
45
+ # kafka
46
+ # {
47
+ # zk_connect => "127.0.0.1"
48
+ # topic_id => "your_topic_goes_here"
49
+ # key_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
50
+ # value_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
51
+ # codec => protobuf
52
+ # {
53
+ # class_name => "Animal.Unicorn"
54
+ # include_path => ['/path/to/protobuf/definitions/UnicornProtobuf_pb.rb']
55
+ # protobuf_version => 3
56
+ # }
57
+ # }
58
+ #
59
+ # Specifically for the kafka input: please set the deserializer classes as shown above.
24
60
 
25
61
  class LogStash::Codecs::Protobuf < LogStash::Codecs::Base
26
62
  config_name 'protobuf'
27
63
 
28
64
  # Name of the class to decode.
29
- # If your protobuf definition contains modules, prepend them to the class name with double colons like so:
65
+ # If your protobuf 2 definition contains modules, prepend them to the class name with double colons like so:
30
66
  # [source,ruby]
31
- # class_name => "Foods::Dairy::Cheese"
32
- #
67
+ # class_name => "Animal::Horse::Unicorn"
68
+ #
33
69
  # This corresponds to a protobuf definition starting as follows:
34
70
  # [source,ruby]
35
- # module Foods
36
- # module Dairy
37
- # class Cheese
38
- # # here are your field definitions.
39
- #
71
+ # module Animal
72
+ # module Horse
73
+ # class Unicorn
74
+ # # here are your field definitions.
75
+ #
76
+ # For protobuf 3 separate the modules with single dots.
77
+ # [source,ruby]
78
+ # class_name => "Animal.Horse.Unicorn"
79
+ # Check the bottom of the generated protobuf ruby file. It contains lines like this:
80
+ # [source,ruby]
81
+ # Animals.Unicorn = Google::Protobuf::DescriptorPool.generated_pool.lookup("Animals.Unicorn").msgclass
82
+ # Use the parameter for the lookup call as the class_name for the codec config.
83
+ #
40
84
  # If your class references other definitions: you only have to add the main class here.
41
85
  config :class_name, :validate => :string, :required => true
42
86
 
43
- # List of absolute pathes to files with protobuf definitions.
44
- # When using more than one file, make sure to arrange the files in reverse order of dependency so that each class is loaded before it is
87
+ # Relative path to the ruby file that contains class_name
88
+ #
89
+ # Relative path (from `protobuf_root_directory`) that holds the definition of the class specified in
90
+ # `class_name`.
91
+ #
92
+ # `class_file` and `include_path` cannot be used at the same time.
93
+ config :class_file, :validate => :string, :default => '', :required => false
94
+
95
+ # Absolute path to the root directory that contains all referenced/used dependencies
96
+ # of the main class (`class_name`) or any of its dependencies.
97
+ #
98
+ # For instance:
99
+ #
100
+ # pb3
101
+ # ├── header
102
+ # │ └── header_pb.rb
103
+ # ├── messageA_pb.rb
104
+ #
105
+ # In this case `messageA_pb.rb` has an embedded message from `header/header_pb.rb`.
106
+ # If `class_file` is set to `messageA_pb.rb`, and `class_name` to
107
+ # `MessageA`, `protobuf_root_directory` must be set to `/path/to/pb3`, which includes
108
+ # both definitions.
109
+ config :protobuf_root_directory, :validate => :string, :required => false
110
+
111
+ # List of absolute pathes to files with protobuf definitions.
112
+ # When using more than one file, make sure to arrange the files in reverse order of dependency so that each class is loaded before it is
45
113
  # refered to by another.
46
- #
47
- # Example: a class _Cheese_ referencing another protobuf class _Milk_
114
+ #
115
+ # Example: a class _Unicorn_ referencing another protobuf class _Wings_
48
116
  # [source,ruby]
49
- # module Foods
50
- # module Dairy
51
- # class Cheese
52
- # set_fully_qualified_name "Foods.Dairy.Cheese"
53
- # optional ::Foods::Cheese::Milk, :milk, 1
54
- # optional :int64, :unique_id, 2
55
- # # here be more field definitions
117
+ # module Animal
118
+ # module Horse
119
+ # class Unicorn
120
+ # set_fully_qualified_name "Animal.Horse.Unicorn"
121
+ # optional ::Animal::Bodypart::Wings, :wings, 1
122
+ # optional :string, :name, 2
123
+ # # here be more field definitions
56
124
  #
57
125
  # would be configured as
58
126
  # [source,ruby]
59
- # include_path => ['/path/to/protobuf/definitions/Milk.pb.rb','/path/to/protobuf/definitions/Cheese.pb.rb']
127
+ # include_path => ['/path/to/protobuf/definitions/Wings.pb.rb','/path/to/protobuf/definitions/Unicorn.pb.rb']
60
128
  #
61
- # When using the codec in an output plugin:
62
- # * make sure to include all the desired fields in the protobuf definition, including timestamp.
63
- # Remove fields that are not part of the protobuf definition from the event by using the mutate filter.
64
- # * the @ symbol is currently not supported in field names when loading the protobuf definitions for encoding. Make sure to call the timestamp field "timestamp"
65
- # instead of "@timestamp" in the protobuf file. Logstash event fields will be stripped of the leading @ before conversion.
66
- #
67
- config :include_path, :validate => :array, :required => true
68
-
69
- # Protocol buffer version switch. Set to false (default) for version 2. Please note that the behaviour for enums varies between the versions.
129
+ # `class_file` and `include_path` cannot be used at the same time.
130
+ config :include_path, :validate => :array, :default => [], :required => false
131
+
132
+ # Protocol buffer version switch. Defaults to version 2. Please note that the behaviour for enums varies between the versions.
70
133
  # For protobuf 2 you will get integer representations for enums, for protobuf 3 you'll get string representations due to a different converter library.
71
134
  # Recommendation: use the translate plugin to restore previous behaviour when upgrading.
72
- config :protobuf_version_3, :validate => :boolean, :required => true, :default=>false
135
+ config :protobuf_version, :validate => [2,3], :default => 2, :required => true
136
+
137
+ # To tolerate faulty messages that cannot be en/decoded, set this to false. Otherwise the pipeline will stop upon encountering a non decipherable message.
138
+ config :stop_on_error, :validate => :boolean, :default => false, :required => false
139
+
140
+ # Instruct the encoder to attempt converting data types to match the protobuf definitions. Available only for protobuf version 3.
141
+ config :pb3_encoder_autoconvert_types, :validate => :boolean, :default => true, :required => false
142
+
143
+ # Add meta information to `[@metadata][pb_oneof]` about which classes were chosen for [oneof](https://developers.google.com/protocol-buffers/docs/proto3#oneof) fields.
144
+ # Example values: for the protobuf definition
145
+ # ``` oneof :horse_type do
146
+ # optional :unicorn, :message, 2, "FantasyUnicorn"
147
+ # optional :pegasus, :message, 3, "FantasyPegasus"
148
+ # end
149
+ # ```
150
+ # the field `[@metadata][pb_oneof][horse_type]` will be set to either `pegasus` or `unicorn`.
151
+ # Available only for protobuf version 3.
152
+ config :pb3_set_oneof_metainfo, :validate => :boolean, :default => false, :required => false
153
+
73
154
 
155
+ attr_reader :execution_context
156
+
157
+ # id of the pipeline whose events you want to read from.
158
+ def pipeline_id
159
+ respond_to?(:execution_context) && !execution_context.nil? ? execution_context.pipeline_id : "main"
160
+ end
74
161
 
75
162
  def register
76
163
  @metainfo_messageclasses = {}
77
164
  @metainfo_enumclasses = {}
78
- include_path.each { |path| load_protobuf_definition(path) }
79
- if @protobuf_version_3
80
- @pb_builder = Google::Protobuf::DescriptorPool.generated_pool.lookup(class_name).msgclass
81
- else
82
- @pb_builder = pb2_create_instance(class_name)
165
+ @metainfo_pb2_enumlist = []
166
+ @pb3_typeconversion_tag = "_protobuf_type_converted"
167
+
168
+ if @include_path.length > 0 and not class_file.strip.empty?
169
+ raise LogStash::ConfigurationError, "Cannot use `include_path` and `class_file` at the same time"
170
+ end
171
+
172
+ if @include_path.length == 0 and class_file.strip.empty?
173
+ raise LogStash::ConfigurationError, "Need to specify `include_path` or `class_file`"
174
+ end
175
+
176
+ should_register = Google::Protobuf::DescriptorPool.generated_pool.lookup(class_name).nil?
177
+
178
+ unless @protobuf_root_directory.nil? or @protobuf_root_directory.strip.empty?
179
+ if !$LOAD_PATH.include? @protobuf_root_directory and should_register
180
+ $LOAD_PATH.unshift(@protobuf_root_directory)
181
+ end
182
+ end
183
+
184
+ @class_file = "#{@protobuf_root_directory}/#{@class_file}" unless (Pathname.new @class_file).absolute? or @class_file.empty?
185
+ # exclusive access while loading protobuf definitions
186
+ Google::Protobuf::DescriptorPool.with_lock.synchronize do
187
+ # load from `class_file`
188
+ load_protobuf_definition(@class_file) if should_register and !@class_file.empty?
189
+ # load from `include_path`
190
+ include_path.each { |path| load_protobuf_definition(path) } if include_path.length > 0 and should_register
191
+
192
+ if @protobuf_version == 3
193
+ @pb_builder = Google::Protobuf::DescriptorPool.generated_pool.lookup(class_name).msgclass
194
+
195
+ else
196
+ @pb_builder = pb2_create_instance(class_name)
197
+ end
83
198
  end
84
199
  end
85
200
 
201
+ # Pipelines using this plugin cannot be reloaded.
202
+ # https://github.com/elastic/logstash/pull/6499
203
+ #
204
+ # The DescriptorPool instance registers the protobuf classes (and
205
+ # dependencies) as global objects. This makes it very difficult to reload a
206
+ # pipeline, because `class_name` and all of its dependencies are already
207
+ # registered.
208
+ def reloadable?
209
+ return false
210
+ end
86
211
 
87
212
  def decode(data)
88
- begin
89
- if @protobuf_version_3
90
- decoded = @pb_builder.decode(data.to_s)
91
- h = pb3_deep_to_hash(decoded)
92
- else
93
- decoded = @pb_builder.parse(data.to_s)
94
- h = decoded.to_hash
213
+ if @protobuf_version == 3
214
+ decoded = @pb_builder.decode(data.to_s)
215
+ if @pb3_set_oneof_metainfo
216
+ meta = pb3_get_oneof_metainfo(decoded, @class_name)
95
217
  end
96
- yield LogStash::Event.new(h) if block_given?
97
- rescue => e
98
- @logger.warn("Couldn't decode protobuf: #{e.inspect}.")
99
- raise e
218
+ h = pb3_deep_to_hash(decoded)
219
+ else
220
+ decoded = @pb_builder.parse(data.to_s)
221
+ h = decoded.to_hash
222
+ end
223
+ e = LogStash::Event.new(h)
224
+ if @protobuf_version == 3 and @pb3_set_oneof_metainfo
225
+ e.set("[@metadata][pb_oneof]", meta)
226
+ end
227
+ yield e if block_given?
228
+ rescue => ex
229
+ @logger.warn("Couldn't decode protobuf: #{ex.inspect}.")
230
+ if stop_on_error
231
+ raise ex
232
+ else # keep original message so that the user can debug it.
233
+ yield LogStash::Event.new("message" => data, "tags" => ["_protobufdecodefailure"])
100
234
  end
101
235
  end # def decode
102
236
 
103
237
 
104
238
  def encode(event)
105
- if @protobuf_version_3
106
- protobytes = pb3_encode_wrapper(event)
239
+ if @protobuf_version == 3
240
+ protobytes = pb3_encode(event)
107
241
  else
108
- protobytes = pb2_encode_wrapper(event)
242
+ protobytes = pb2_encode(event)
243
+ end
244
+ unless protobytes.nil? or protobytes.empty?
245
+ @on_event.call(event, protobytes)
109
246
  end
110
- @on_event.call(event, protobytes)
111
247
  end # def encode
112
248
 
113
249
 
114
250
  private
115
251
  def pb3_deep_to_hash(input)
116
- if input.class.ancestors.include? Google::Protobuf::MessageExts # it's a protobuf class
252
+ case input
253
+ when Google::Protobuf::MessageExts # it's a protobuf class
117
254
  result = Hash.new
118
255
  input.to_hash.each {|key, value|
119
256
  result[key] = pb3_deep_to_hash(value) # the key is required for the class lookup of enums.
120
- }
121
- elsif input.kind_of?(Array)
257
+ }
258
+ when ::Array
122
259
  result = []
123
260
  input.each {|value|
124
261
  result << pb3_deep_to_hash(value)
125
262
  }
126
- elsif input.kind_of?(::Hash)
263
+ when ::Hash
127
264
  result = {}
128
265
  input.each {|key, value|
129
266
  result[key] = pb3_deep_to_hash(value)
130
267
  }
131
- elsif input.instance_of? Symbol # is an Enum
268
+ when Symbol # is an Enum
132
269
  result = input.to_s.sub(':','')
133
270
  else
134
271
  result = input
@@ -136,240 +273,463 @@ class LogStash::Codecs::Protobuf < LogStash::Codecs::Base
136
273
  result
137
274
  end
138
275
 
139
- def pb3_encode_wrapper(event)
276
+ def pb3_encode(event)
277
+
278
+ datahash = event.to_hash
279
+
280
+ is_recursive_call = !event.get('tags').nil? and event.get('tags').include? @pb3_typeconversion_tag
281
+ if is_recursive_call
282
+ datahash = pb3_remove_typeconversion_tag(datahash)
283
+ end
284
+ datahash = pb3_prepare_for_encoding(datahash)
285
+ if datahash.nil?
286
+ @logger.warn("Protobuf encoding error 4: empty data for event #{event.to_hash}")
287
+ end
288
+ if @pb_builder.nil?
289
+ @logger.warn("Protobuf encoding error 5: empty protobuf builder for class #{@class_name}")
290
+ end
291
+ pb_obj = @pb_builder.new(datahash)
292
+ @pb_builder.encode(pb_obj)
293
+
294
+ rescue ArgumentError => e
295
+ k = event.to_hash.keys.join(", ")
296
+ @logger.warn("Protobuf encoding error 1: Argument error (#{e.inspect}). Reason: probably mismatching protobuf definition. \
297
+ Required fields in the protobuf definition are: #{k} and fields must not begin with @ sign. The event has been discarded.")
298
+ nil
299
+ rescue TypeError => e
300
+ pb3_handle_type_errors(event, e, is_recursive_call, datahash)
301
+ nil
302
+ rescue => e
303
+ @logger.warn("Protobuf encoding error 3: #{e.inspect}. Event discarded. Input data: #{datahash}. The event has been discarded. Backtrace: #{e.backtrace}")
304
+ nil
305
+ end
306
+
307
+
308
+
309
+
310
+ def pb3_handle_type_errors(event, e, is_recursive_call, datahash)
140
311
  begin
141
- data = pb3_encode(event.to_hash, @class_name)
142
- pb_obj = @pb_builder.new(data)
143
- @pb_builder.encode(pb_obj)
144
- rescue ArgumentError => e
145
- @logger.debug("Encoding error 2. Probably mismatching protobuf definition. Required fields in the protobuf definition are: " + event.to_hash.keys.join(", ") + " and the timestamp field name must not include a @. ")
146
- raise e
147
- rescue => e
148
- @logger.debug("Couldn't generate protobuf: ${e}")
149
- raise e
312
+ if is_recursive_call
313
+ @logger.warn("Protobuf encoding error 2.1: Type error (#{e.inspect}). Some types could not be converted. The event has been discarded. Type mismatches: #{mismatches}.")
314
+ else
315
+ if @pb3_encoder_autoconvert_types
316
+
317
+ msg = "Protobuf encoding error 2.2: Type error (#{e.inspect}). Will try to convert the data types. Original data: #{datahash}"
318
+ @logger.warn(msg)
319
+ mismatches = pb3_get_type_mismatches(datahash, "", @class_name)
320
+
321
+ event = pb3_convert_mismatched_types(event, mismatches)
322
+ # Add a (temporary) tag to handle the recursion stop
323
+ pb3_add_tag(event, @pb3_typeconversion_tag )
324
+ pb3_encode(event)
325
+ else
326
+ @logger.warn("Protobuf encoding error 2.3: Type error (#{e.inspect}). The event has been discarded. Try setting pb3_encoder_autoconvert_types => true for automatic type conversion.")
327
+ end
328
+ end
329
+ rescue TypeError => e
330
+ if @pb3_encoder_autoconvert_types
331
+ @logger.warn("Protobuf encoding error 2.4.1: (#{e.inspect}). Failed to convert data types. The event has been discarded. original data: #{datahash}")
332
+ else
333
+ @logger.warn("Protobuf encoding error 2.4.2: (#{e.inspect}). The event has been discarded.")
334
+ end
335
+ if @stop_on_error
336
+ raise e
337
+ end
338
+ nil
339
+ rescue => ex
340
+ @logger.warn("Protobuf encoding error 2.5: (#{e.inspect}). The event has been discarded. Auto-typecasting was on: #{@pb3_encoder_autoconvert_types}")
341
+ if @stop_on_error
342
+ raise ex
343
+ end
344
+ nil
150
345
  end
346
+ end # pb3_handle_type_errors
347
+
348
+
349
+ def pb3_get_type_mismatches(data, key_prefix, pb_class)
350
+ mismatches = []
351
+ data.to_hash.each do |key, value|
352
+ expected_type = pb3_get_expected_type(key, pb_class)
353
+ r = pb3_compare_datatypes(value, key, key_prefix, pb_class, expected_type)
354
+ mismatches.concat(r)
355
+ end # data.each
356
+ mismatches
151
357
  end
152
358
 
153
359
 
154
- def pb3_encode(datahash, class_name)
155
- next unless datahash.is_a?(::Hash)
360
+ def pb3_get_expected_type(key, pb_class)
361
+ pb_descriptor = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class)
362
+
363
+ if !pb_descriptor.nil?
364
+ pb_builder = pb_descriptor.msgclass
365
+ pb_obj = pb_builder.new({})
366
+ v = pb_obj.send(key)
367
+
368
+ if !v.nil?
369
+ v.class
370
+ else
371
+ nil
372
+ end
373
+ end
374
+ end
375
+
376
+ def pb3_compare_datatypes(value, key, key_prefix, pb_class, expected_type)
377
+ mismatches = []
378
+
379
+ if value.nil?
380
+ is_mismatch = false
381
+ else
382
+ case value
383
+ when ::Hash, Google::Protobuf::MessageExts
384
+ is_mismatch = false
385
+ descriptor = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class).lookup(key)
386
+ if !descriptor.subtype.nil?
387
+ class_of_nested_object = pb3_get_descriptorpool_name(descriptor.subtype.msgclass)
388
+ new_prefix = "#{key}."
389
+ recursive_mismatches = pb3_get_type_mismatches(value, new_prefix, class_of_nested_object)
390
+ mismatches.concat(recursive_mismatches)
391
+ end
392
+ when ::Array
393
+ expected_type = pb3_get_expected_type(key, pb_class)
394
+ is_mismatch = (expected_type != Google::Protobuf::RepeatedField)
395
+ child_type = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class).lookup(key).type
396
+ value.each_with_index do | v, i |
397
+ new_prefix = "#{key}."
398
+ recursive_mismatches = pb3_compare_datatypes(v, i.to_s, new_prefix, pb_class, child_type)
399
+ mismatches.concat(recursive_mismatches)
400
+ is_mismatch |= recursive_mismatches.any?
401
+ end # do
402
+ else # is scalar data type
403
+
404
+ is_mismatch = ! pb3_is_scalar_datatype_match(expected_type, value.class)
405
+ end # if
406
+ end # if value.nil?
407
+
408
+ if is_mismatch
409
+ mismatches << {"key" => "#{key_prefix}#{key}", "actual_type" => value.class, "expected_type" => expected_type, "value" => value}
410
+ end
411
+ mismatches
412
+ end
413
+
414
+ def pb3_remove_typeconversion_tag(data)
415
+ # remove the tag that we added to the event because
416
+ # the protobuf definition might not have a field for tags
417
+ data['tags'].delete(@pb3_typeconversion_tag)
418
+ if data['tags'].length == 0
419
+ data.delete('tags')
420
+ end
421
+ data
422
+ end
423
+
424
+ def pb3_get_descriptorpool_name(child_class)
425
+ # make instance
426
+ inst = child_class.new
427
+ # get the lookup name for the Descriptorpool
428
+ inst.class.descriptor.name
429
+ end
430
+
431
+ def pb3_is_scalar_datatype_match(expected_type, actual_type)
432
+ if expected_type == actual_type
433
+ true
434
+ else
435
+ e = expected_type.to_s.downcase.to_sym
436
+ a = actual_type.to_s.downcase.to_sym
437
+ case e
438
+ # when :string, :integer
439
+ when :string
440
+ a == e
441
+ when :integer
442
+ a == e
443
+ when :float
444
+ a == :float || a == :integer
445
+ end
446
+ end
447
+ end
448
+
449
+
450
+ def pb3_convert_mismatched_types_getter(struct, key)
451
+ if struct.is_a? ::Hash
452
+ struct[key]
453
+ else
454
+ struct.get(key)
455
+ end
456
+ end
457
+
458
+ def pb3_convert_mismatched_types_setter(struct, key, value)
459
+ if struct.is_a? ::Hash
460
+ struct[key] = value
461
+ else
462
+ struct.set(key, value)
463
+ end
464
+ struct
465
+ end
466
+
467
+ def pb3_add_tag(event, tag )
468
+ if event.get('tags').nil?
469
+ event.set('tags', [tag])
470
+ else
471
+ existing_tags = event.get('tags')
472
+ event.set("tags", existing_tags << tag)
473
+ end
474
+ end
475
+
476
+ # Due to recursion on nested fields in the event object this method might be given an event (1st call) or a hash (2nd .. nth call)
477
+ # First call will be the event object, child objects will be hashes.
478
+ def pb3_convert_mismatched_types(struct, mismatches)
479
+ mismatches.each do | m |
480
+ key = m['key']
481
+ expected_type = m['expected_type']
482
+ actual_type = m['actual_type']
483
+ if key.include? "." # the mismatch is in a child object
484
+ levels = key.split(/\./) # key is something like http_user_agent.minor_version and needs to be splitted.
485
+ key = levels[0]
486
+ sub_levels = levels.drop(1).join(".")
487
+ new_mismatches = [{"key"=>sub_levels, "actual_type"=>m["actual_type"], "expected_type"=>m["expected_type"]}]
488
+ value = pb3_convert_mismatched_types_getter(struct, key)
489
+ new_value = pb3_convert_mismatched_types(value, new_mismatches)
490
+ struct = pb3_convert_mismatched_types_setter(struct, key, new_value )
491
+ else
492
+ value = pb3_convert_mismatched_types_getter(struct, key)
493
+ begin
494
+ case expected_type.to_s
495
+ when "Integer"
496
+ case actual_type.to_s
497
+ when "String"
498
+ new_value = value.to_i
499
+ when "Float"
500
+ if value.floor == value # convert values like 2.0 to 2, but not 2.1
501
+ new_value = value.to_i
502
+ end
503
+ end
504
+ when "String"
505
+ new_value = value.to_s
506
+ when "Float"
507
+ new_value = value.to_f
508
+ when "Boolean","TrueClass", "FalseClass"
509
+ new_value = value.to_s.downcase == "true"
510
+ end
511
+ if !new_value.nil?
512
+ struct = pb3_convert_mismatched_types_setter(struct, key, new_value )
513
+ end
514
+ rescue Exception => ex
515
+ @logger.debug("Protobuf encoding error 5: Could not convert types for protobuf encoding: #{ex}")
516
+ end
517
+ end # if key contains .
518
+ end # mismatches.each
519
+ struct
520
+ end
521
+
522
+ def pb3_prepare_for_encoding(datahash)
523
+ # 0) Remove empty fields.
524
+ datahash = datahash.select { |key, value| !value.nil? }
156
525
 
157
526
  # Preparation: the data cannot be encoded until certain criteria are met:
158
527
  # 1) remove @ signs from keys.
159
528
  # 2) convert timestamps and other objects to strings
160
529
  datahash = datahash.inject({}){|x,(k,v)| x[k.gsub(/@/,'').to_sym] = (should_convert_to_string?(v) ? v.to_s : v); x}
161
-
162
- # Check if any of the fields in this hash are protobuf classes and if so, create a builder for them.
163
- meta = @metainfo_messageclasses[class_name]
164
- if meta
165
- meta.map do | (field_name,class_name) |
166
- key = field_name.to_sym
167
- if datahash.include?(key)
168
- original_value = datahash[key]
169
- datahash[key] =
170
- if original_value.is_a?(::Array)
171
- # make this field an array/list of protobuf objects
172
- # value is a list of hashed complex objects, each of which needs to be protobuffed and
173
- # put back into the list.
174
- original_value.map { |x| pb3_encode(x, class_name) }
175
- original_value
176
- else
177
- r = pb3_encode(original_value, class_name)
178
- builder = Google::Protobuf::DescriptorPool.generated_pool.lookup(class_name).msgclass
179
- builder.new(r)
180
- end # if is array
181
- end # if datahash_include
182
- end # do
183
- end # if meta
184
- # Check if any of the fields in this hash are enum classes and if so, create a builder for them.
185
- meta = @metainfo_enumclasses[class_name]
186
- if meta
187
- meta.map do | (field_name,class_name) |
188
- key = field_name.to_sym
189
- if datahash.include?(key)
190
- original_value = datahash[key]
191
- datahash[key] =
192
- if original_value.is_a?(::Array)
193
- original_value.map { |x| pb3_encode(x, class_name) }
194
- original_value
195
- else
196
- if original_value.is_a?(Fixnum)
197
- original_value # integers will be automatically converted into enum
198
- else
199
- # feature request: support for providing integers as strings or symbols.
200
- # not fully tested yet:
201
- # begin
202
- # enum_lookup_name = "#{class_name}::#{original_value}"
203
- # enum_lookup_name.split('::').inject(Object) do |mod, class_name|
204
- # mod.const_get(class_name)
205
- # end # do
206
- # rescue => e
207
- # @logger.debug("Encoding error 3: could not translate #{original_value} into enum. ${e}")
208
- # raise e
209
- # end
210
- end # if is a fixnum
211
- end # if is array
212
- end # if datahash_include
213
- end # do
214
- end # if meta
530
+
531
+ datahash.each do |key, value|
532
+ datahash[key] = pb3_prepare_for_encoding(value) if value.is_a?(Hash)
533
+ end
534
+
215
535
  datahash
216
536
  end
217
537
 
218
- def pb2_encode_wrapper(event)
219
- begin
220
- data = pb2_encode(event.to_hash, @class_name)
221
- msg = @pb_builder.new(data)
222
- msg.serialize_to_string
223
- rescue NoMethodError => e
224
- @logger.debug("Encoding error 2. Probably mismatching protobuf definition. Required fields in the protobuf definition are: " + event.to_hash.keys.join(", ") + " and the timestamp field name must not include a @. ")
225
- raise e
226
- rescue => e
227
- @logger.debug("Encoding error 1: ${e}")
228
- raise e
229
- end
538
+ def pb3_get_oneof_metainfo(pb_object, pb_class_name)
539
+ meta = {}
540
+ pb_class = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class_name).msgclass
541
+
542
+ pb_class.descriptor.each_oneof { |field|
543
+ field.each { | group_option |
544
+ if !pb_object.send(group_option.name).nil?
545
+ meta[field.name] = group_option.name
546
+ end
547
+ }
548
+ }
549
+
550
+ pb_class.descriptor.select{ |field| field.type == :message }.each { | field |
551
+ # recurse over nested protobuf classes
552
+ pb_sub_object = pb_object.send(field.name)
553
+ if !pb_sub_object.nil? and !field.subtype.nil?
554
+ pb_sub_class = pb3_get_descriptorpool_name(field.subtype.msgclass)
555
+ meta[field.name] = pb3_get_oneof_metainfo(pb_sub_object, pb_sub_class)
556
+ end
557
+ }
558
+
559
+ meta
230
560
  end
231
561
 
232
562
 
233
- def pb2_encode(datahash, class_name)
234
- next unless datahash.is_a?(::Hash)
563
+ def pb2_encode(event)
564
+ data = pb2_prepare_for_encoding(event.to_hash, @class_name)
565
+ msg = @pb_builder.new(data)
566
+ msg.serialize_to_string
567
+ rescue NoMethodError => e
568
+ @logger.warn("Encoding error 2. Probably mismatching protobuf definition. Required fields in the protobuf definition are: " + event.to_hash.keys.join(", ") + " and the timestamp field name must not include a @. ")
569
+ raise e
570
+ rescue => e
571
+ @logger.warn("Encoding error 1: #{e.inspect}")
572
+ raise e
573
+ end
235
574
 
236
- # Preparation: the data cannot be encoded until certain criteria are met:
237
- # 1) remove @ signs from keys.
238
- # 2) convert timestamps and other objects to strings
239
- datahash = ::Hash[datahash.map{|(k,v)| [k.to_s.dup.gsub(/@/,''), (should_convert_to_string?(v) ? v.to_s : v)] }]
240
-
241
- # Check if any of the fields in this hash are protobuf classes and if so, create a builder for them.
242
- meta = @metainfo_messageclasses[class_name]
243
- if meta
244
- meta.map do | (k,class_name) |
245
- if datahash.include?(k)
246
- original_value = datahash[k]
247
- p
248
- datahash[k] =
249
- if original_value.is_a?(::Array)
250
- # make this field an array/list of protobuf objects
251
- # value is a list of hashed complex objects, each of which needs to be protobuffed and
252
- # put back into the list.
253
- original_value.map { |x| pb2_encode(x, class_name) }
254
- original_value
255
- else
256
- proto_obj = pb2_create_instance(class_name)
257
- proto_obj.new(pb2_encode(original_value, class_name))
258
- end # if is array
259
- end # if datahash_include
260
- end # do
261
- end # if meta
262
575
 
576
+ def pb2_prepare_for_encoding(datahash, class_name)
577
+ if datahash.is_a?(::Hash)
578
+ # Preparation: the data cannot be encoded until certain criteria are met:
579
+ # 1) remove @ signs from keys.
580
+ # 2) convert timestamps and other objects to strings
581
+ datahash = ::Hash[datahash.map{|(k,v)| [k.to_s.dup.gsub(/@/,''), (should_convert_to_string?(v) ? v.to_s : v)] }]
582
+
583
+ # Check if any of the fields in this hash are protobuf classes and if so, create a builder for them.
584
+ meta = @metainfo_messageclasses[class_name]
585
+ if meta
586
+ meta.map do | (k,c) |
587
+ if datahash.include?(k)
588
+ original_value = datahash[k]
589
+ datahash[k] =
590
+ if original_value.is_a?(::Array)
591
+ # make this field an array/list of protobuf objects
592
+ # value is a list of hashed complex objects, each of which needs to be protobuffed and
593
+ # put back into the list.
594
+ original_value.map { |x| pb2_prepare_for_encoding(x, c) }
595
+ original_value
596
+ else
597
+ proto_obj = pb2_create_instance(c)
598
+ proto_obj.new(pb2_prepare_for_encoding(original_value, c)) # this line is reached in the colourtest for an enum. Enums should not be instantiated. Should enums even be in the messageclasses? I dont think so! TODO bug
599
+ end # if is array
600
+ end # if datahash_include
601
+ end # do
602
+ end # if meta
603
+ end
263
604
  datahash
264
605
  end
265
606
 
266
607
 
267
608
  def should_convert_to_string?(v)
268
- !(v.is_a?(Fixnum) || v.is_a?(::Hash) || v.is_a?(::Array) || [true, false].include?(v))
609
+ !(v.is_a?(Integer) || v.is_a?(Float) || v.is_a?(::Hash) || v.is_a?(::Array) || [true, false].include?(v))
269
610
  end
270
611
 
271
-
612
+
272
613
  def pb2_create_instance(name)
273
- begin
274
- @logger.debug("Creating instance of " + name)
275
- name.split('::').inject(Object) { |n,c| n.const_get c }
276
- end
614
+ @logger.debug("Creating instance of " + name)
615
+ name.split('::').inject(Object) { |n,c| n.const_get c }
277
616
  end
278
617
 
279
618
 
280
619
  def pb3_metadata_analyis(filename)
620
+
281
621
  regex_class_name = /\s*add_message "(?<name>.+?)" do\s+/ # TODO optimize both regexes for speed (negative lookahead)
282
622
  regex_pbdefs = /\s*(optional|repeated)(\s*):(?<name>.+),(\s*):(?<type>\w+),(\s*)(?<position>\d+)(, \"(?<enum_class>.*?)\")?/
283
- # Example
284
- # optional :father, :message, 10, "Unicorn"
285
- # repeated :favourite_numbers, :int32, 5
286
- begin
287
- class_name = ""
288
- type = ""
289
- field_name = ""
290
- File.readlines(filename).each do |line|
291
- if ! (line =~ regex_class_name).nil?
292
- class_name = $1
293
- @metainfo_messageclasses[class_name] = {}
294
- @metainfo_enumclasses[class_name] = {}
623
+ class_name = ""
624
+ type = ""
625
+ field_name = ""
626
+ File.readlines(filename).each do |line|
627
+ if ! (line =~ regex_class_name).nil?
628
+ class_name = $1
629
+ @metainfo_messageclasses[class_name] = {}
630
+ @metainfo_enumclasses[class_name] = {}
631
+ end # if
632
+ if ! (line =~ regex_pbdefs).nil?
633
+ field_name = $1
634
+ type = $2
635
+ field_class_name = $4
636
+ if type == "message"
637
+ @metainfo_messageclasses[class_name][field_name] = field_class_name
638
+ elsif type == "enum"
639
+ @metainfo_enumclasses[class_name][field_name] = field_class_name
295
640
  end
296
- if ! (line =~ regex_pbdefs).nil?
297
- field_name = $1
298
- type = $2
299
- field_class_name = $4
300
- if type == "message"
301
- @metainfo_messageclasses[class_name][field_name] = field_class_name
302
- elsif type == "enum"
303
- @metainfo_enumclasses[class_name][field_name] = field_class_name
304
- end
305
- end
306
- end
307
- rescue Exception => e
308
- @logger.warn("Error 3: unable to read pb definition from file " + filename+ ". Reason: #{e.inspect}. Last settings were: class #{class_name} field #{field_name} type #{type}. Backtrace: " + e.backtrace.inspect.to_s)
309
- raise e
310
- end
641
+ end # if
642
+ end # readlines
311
643
  if class_name.nil?
312
644
  @logger.warn("Error 4: class name not found in file " + filename)
313
645
  raise ArgumentError, "Invalid protobuf file: " + filename
314
- end
646
+ end
647
+ rescue Exception => e
648
+ @logger.warn("Error 3: unable to read pb definition from file " + filename+ ". Reason: #{e.inspect}. Last settings were: class #{class_name} field #{field_name} type #{type}. Backtrace: " + e.backtrace.inspect.to_s)
649
+ raise e
315
650
  end
316
651
 
652
+
653
+
317
654
  def pb2_metadata_analyis(filename)
318
- regex_class_name = /\s*class\s*(?<name>.+?)\s+/
319
- regex_module_name = /\s*module\s*(?<name>.+?)\s+/
655
+ regex_class_start = /\s*set_fully_qualified_name \"(?<name>.+)\".*?/
656
+ regex_enum_name = /\s*include ..ProtocolBuffers..Enum\s*/
320
657
  regex_pbdefs = /\s*(optional|repeated)(\s*):(?<type>.+),(\s*):(?<name>\w+),(\s*)(?<position>\d+)/
321
658
  # now we also need to find out which class it contains and the protobuf definitions in it.
322
659
  # We'll unfortunately need that later so that we can create nested objects.
323
- begin
324
- class_name = ""
325
- type = ""
326
- field_name = ""
327
- classname_found = false
328
- File.readlines(filename).each do |line|
329
- if ! (line =~ regex_module_name).nil? && !classname_found # because it might be declared twice in the file
330
- class_name << $1
331
- class_name << "::"
332
-
333
- end
334
- if ! (line =~ regex_class_name).nil? && !classname_found # because it might be declared twice in the file
335
- class_name << $1
336
- @metainfo_messageclasses[class_name] = {}
337
- classname_found = true
660
+
661
+ class_name = ""
662
+ type = ""
663
+ field_name = ""
664
+ is_enum_class = false
665
+
666
+ File.readlines(filename).each do |line|
667
+ if ! (line =~ regex_enum_name).nil?
668
+ is_enum_class= true
669
+ end
670
+
671
+ if ! (line =~ regex_class_start).nil?
672
+ class_name = $1.gsub('.',"::").split('::').map {|word| word.capitalize}.join('::')
673
+ if is_enum_class
674
+ @metainfo_pb2_enumlist << class_name.downcase
338
675
  end
339
- if ! (line =~ regex_pbdefs).nil?
340
- type = $1
341
- field_name = $2
342
- if type =~ /::/
343
- @metainfo_messageclasses[class_name][field_name] = type.gsub!(/^:/,"")
344
-
676
+ is_enum_class= false # reset when next class starts
677
+ end
678
+ if ! (line =~ regex_pbdefs).nil?
679
+ type = $1
680
+ field_name = $2
681
+ if type =~ /::/
682
+ clean_type = type.gsub(/^:/,"")
683
+ e = @metainfo_pb2_enumlist.include? clean_type.downcase
684
+
685
+ if e
686
+ if not @metainfo_enumclasses.key? class_name
687
+ @metainfo_enumclasses[class_name] = {}
688
+ end
689
+ @metainfo_enumclasses[class_name][field_name] = clean_type
690
+ else
691
+ if not @metainfo_messageclasses.key? class_name
692
+ @metainfo_messageclasses[class_name] = {}
693
+ end
694
+ @metainfo_messageclasses[class_name][field_name] = clean_type
345
695
  end
346
696
  end
347
697
  end
348
- rescue Exception => e
349
- @logger.warn("Error 3: unable to read pb definition from file " + filename+ ". Reason: #{e.inspect}. Last settings were: class #{class_name} field #{field_name} type #{type}. Backtrace: " + e.backtrace.inspect.to_s)
350
- raise e
351
698
  end
352
699
  if class_name.nil?
353
700
  @logger.warn("Error 4: class name not found in file " + filename)
354
701
  raise ArgumentError, "Invalid protobuf file: " + filename
355
- end
702
+ end
703
+ rescue LoadError => e
704
+ raise ArgumentError.new("Could not load file: " + filename + ". Please try to use absolute pathes. Current working dir: " + Dir.pwd + ", loadpath: " + $LOAD_PATH.join(" "))
705
+ rescue => e
706
+
707
+ @logger.warn("Error 3: unable to read pb definition from file " + filename+ ". Reason: #{e.inspect}. Last settings were: class #{class_name} field #{field_name} type #{type}. Backtrace: " + e.backtrace.inspect.to_s)
708
+ raise e
356
709
  end
357
710
 
711
+
358
712
  def load_protobuf_definition(filename)
359
- begin
360
- if filename.end_with? ('.rb')
361
- @logger.debug("Including protobuf file: " + filename)
362
- require filename
363
- if @protobuf_version_3
364
- pb3_metadata_analyis(filename)
365
- else
366
- pb2_metadata_analyis(filename)
713
+ if filename.end_with? ('.rb')
714
+ # Add to the loading path of the protobuf definitions
715
+ if (Pathname.new filename).absolute?
716
+ begin
717
+ require filename
718
+ rescue Exception => e
719
+ @logger.error("Unable to load file: #{filename}. Reason: #{e.inspect}")
720
+ raise e
367
721
  end
368
- else
369
- @logger.warn("Not a ruby file: " + filename)
370
722
  end
723
+
724
+ if @protobuf_version == 3
725
+ pb3_metadata_analyis(filename)
726
+ else
727
+ pb2_metadata_analyis(filename)
728
+ end
729
+
730
+ else
731
+ @logger.warn("Not a ruby file: " + filename)
371
732
  end
372
733
  end
373
734
 
374
-
375
735
  end # class LogStash::Codecs::Protobuf