logstash-codec-protobuf 1.3.0-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +56 -0
- data/CONTRIBUTORS +12 -0
- data/DEVELOPER.md +2 -0
- data/Gemfile +11 -0
- data/LICENSE +202 -0
- data/NOTICE.TXT +4 -0
- data/README.md +184 -0
- data/docs/index.asciidoc +241 -0
- data/google-protobuf-lib-update.md +57 -0
- data/lib/logstash/codecs/protobuf.rb +804 -0
- data/logstash-codec-protobuf.gemspec +33 -0
- data/spec/codecs/pb2_spec.rb +236 -0
- data/spec/codecs/pb3_decode_spec.rb +665 -0
- data/spec/codecs/pb3_encode_spec.rb +243 -0
- data/spec/helpers/pb2/ColourTestcase.pb.rb +35 -0
- data/spec/helpers/pb2/ColourTestcase.proto +24 -0
- data/spec/helpers/pb2/event.pb.rb +19 -0
- data/spec/helpers/pb2/event.proto +12 -0
- data/spec/helpers/pb2/header/header.pb.rb +16 -0
- data/spec/helpers/pb2/header/header.proto +8 -0
- data/spec/helpers/pb2/human.pb.rb +26 -0
- data/spec/helpers/pb2/unicorn.pb.rb +19 -0
- data/spec/helpers/pb2/unicorn_event.pb.rb +24 -0
- data/spec/helpers/pb3/FantasyHorse_pb.rb +48 -0
- data/spec/helpers/pb3/PhoneDirectory_pb.rb +37 -0
- data/spec/helpers/pb3/ProbeResult_pb.rb +26 -0
- data/spec/helpers/pb3/ResultListComposerRequest_pb.rb +25 -0
- data/spec/helpers/pb3/dnsmessage_pb.rb +82 -0
- data/spec/helpers/pb3/events_pb.rb +17 -0
- data/spec/helpers/pb3/header/header.proto3 +7 -0
- data/spec/helpers/pb3/header/header_pb.rb +12 -0
- data/spec/helpers/pb3/integertest_pb.rb +18 -0
- data/spec/helpers/pb3/messageA_pb.rb +16 -0
- data/spec/helpers/pb3/messageB_pb.rb +15 -0
- data/spec/helpers/pb3/rum2_pb.rb +87 -0
- data/spec/helpers/pb3/rum3_pb.rb +87 -0
- data/spec/helpers/pb3/rum_pb.rb +87 -0
- data/spec/helpers/pb3/struct_test_pb.rb +21 -0
- data/spec/helpers/pb3/unicorn_pb.rb +31 -0
- metadata +175 -0
@@ -0,0 +1,804 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'logstash/codecs/base'
|
3
|
+
require 'logstash/util/charset'
|
4
|
+
require 'google/protobuf' # for protobuf3
|
5
|
+
require 'google/protobuf/struct_pb'
|
6
|
+
require 'protocol_buffers' # https://github.com/codekitchen/ruby-protocol-buffers, for protobuf2
|
7
|
+
|
8
|
+
|
9
|
+
# Monkey-patch the `Google::Protobuf::DescriptorPool` with a mutex for exclusive
|
10
|
+
# access.
|
11
|
+
#
|
12
|
+
# The DescriptorPool instance is not thread-safe when loading protobuf
|
13
|
+
# definitions. This can cause unrecoverable errors when registering multiple
|
14
|
+
# concurrent pipelines that try to register the same dependency. The
|
15
|
+
# DescriptorPool instance is global to the JVM and shared among all pipelines.
|
16
|
+
class << Google::Protobuf::DescriptorPool
|
17
|
+
def with_lock
|
18
|
+
if !@mutex
|
19
|
+
@mutex = Mutex.new
|
20
|
+
end
|
21
|
+
|
22
|
+
return @mutex
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# This codec converts protobuf encoded messages into logstash events and vice versa.
|
27
|
+
#
|
28
|
+
# Requires the protobuf definitions as ruby files. You can create those using the [ruby-protoc compiler](https://github.com/codekitchen/ruby-protocol-buffers).
|
29
|
+
#
|
30
|
+
# The following shows a usage example for decoding protobuf 2 encoded events from a kafka stream:
|
31
|
+
# [source,ruby]
|
32
|
+
# kafka
|
33
|
+
# {
|
34
|
+
# zk_connect => "127.0.0.1"
|
35
|
+
# topic_id => "your_topic_goes_here"
|
36
|
+
# key_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
|
37
|
+
# value_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
|
38
|
+
# codec => protobuf
|
39
|
+
# {
|
40
|
+
# class_name => "Animal::Unicorn"
|
41
|
+
# include_path => ['/path/to/protobuf/definitions/UnicornProtobuf.pb.rb']
|
42
|
+
# }
|
43
|
+
# }
|
44
|
+
#
|
45
|
+
# Same example for protobuf 3:
|
46
|
+
# [source,ruby]
|
47
|
+
# kafka
|
48
|
+
# {
|
49
|
+
# zk_connect => "127.0.0.1"
|
50
|
+
# topic_id => "your_topic_goes_here"
|
51
|
+
# key_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
|
52
|
+
# value_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
|
53
|
+
# codec => protobuf
|
54
|
+
# {
|
55
|
+
# class_name => "Animal.Unicorn"
|
56
|
+
# include_path => ['/path/to/protobuf/definitions/UnicornProtobuf_pb.rb']
|
57
|
+
# protobuf_version => 3
|
58
|
+
# }
|
59
|
+
# }
|
60
|
+
#
|
61
|
+
# Specifically for the kafka input: please set the deserializer classes as shown above.
|
62
|
+
|
63
|
+
class LogStash::Codecs::Protobuf < LogStash::Codecs::Base
|
64
|
+
config_name 'protobuf'
|
65
|
+
|
66
|
+
# Name of the class to decode.
|
67
|
+
# If your protobuf 2 definition contains modules, prepend them to the class name with double colons like so:
|
68
|
+
# [source,ruby]
|
69
|
+
# class_name => "Animal::Horse::Unicorn"
|
70
|
+
#
|
71
|
+
# This corresponds to a protobuf definition starting as follows:
|
72
|
+
# [source,ruby]
|
73
|
+
# module Animal
|
74
|
+
# module Horse
|
75
|
+
# class Unicorn
|
76
|
+
# # here are your field definitions.
|
77
|
+
#
|
78
|
+
# For protobuf 3 separate the modules with single dots.
|
79
|
+
# [source,ruby]
|
80
|
+
# class_name => "Animal.Horse.Unicorn"
|
81
|
+
# Check the bottom of the generated protobuf ruby file. It contains lines like this:
|
82
|
+
# [source,ruby]
|
83
|
+
# Animals.Unicorn = Google::Protobuf::DescriptorPool.generated_pool.lookup("Animals.Unicorn").msgclass
|
84
|
+
# Use the parameter for the lookup call as the class_name for the codec config.
|
85
|
+
#
|
86
|
+
# If your class references other definitions: you only have to add the main class here.
|
87
|
+
config :class_name, :validate => :string, :required => true
|
88
|
+
|
89
|
+
# Relative path to the ruby file that contains class_name
|
90
|
+
#
|
91
|
+
# Relative path (from `protobuf_root_directory`) that holds the definition of the class specified in
|
92
|
+
# `class_name`.
|
93
|
+
#
|
94
|
+
# `class_file` and `include_path` cannot be used at the same time.
|
95
|
+
config :class_file, :validate => :string, :default => '', :required => false
|
96
|
+
|
97
|
+
# Absolute path to the root directory that contains all referenced/used dependencies
|
98
|
+
# of the main class (`class_name`) or any of its dependencies.
|
99
|
+
#
|
100
|
+
# For instance:
|
101
|
+
#
|
102
|
+
# pb3
|
103
|
+
# ├── header
|
104
|
+
# │ └── header_pb.rb
|
105
|
+
# ├── messageA_pb.rb
|
106
|
+
#
|
107
|
+
# In this case `messageA_pb.rb` has an embedded message from `header/header_pb.rb`.
|
108
|
+
# If `class_file` is set to `messageA_pb.rb`, and `class_name` to
|
109
|
+
# `MessageA`, `protobuf_root_directory` must be set to `/path/to/pb3`, which includes
|
110
|
+
# both definitions.
|
111
|
+
config :protobuf_root_directory, :validate => :string, :required => false
|
112
|
+
|
113
|
+
# List of absolute pathes to files with protobuf definitions.
|
114
|
+
# When using more than one file, make sure to arrange the files in reverse order of dependency so that each class is loaded before it is
|
115
|
+
# refered to by another.
|
116
|
+
#
|
117
|
+
# Example: a class _Unicorn_ referencing another protobuf class _Wings_
|
118
|
+
# [source,ruby]
|
119
|
+
# module Animal
|
120
|
+
# module Horse
|
121
|
+
# class Unicorn
|
122
|
+
# set_fully_qualified_name "Animal.Horse.Unicorn"
|
123
|
+
# optional ::Animal::Bodypart::Wings, :wings, 1
|
124
|
+
# optional :string, :name, 2
|
125
|
+
# # here be more field definitions
|
126
|
+
#
|
127
|
+
# would be configured as
|
128
|
+
# [source,ruby]
|
129
|
+
# include_path => ['/path/to/protobuf/definitions/Wings.pb.rb','/path/to/protobuf/definitions/Unicorn.pb.rb']
|
130
|
+
#
|
131
|
+
# `class_file` and `include_path` cannot be used at the same time.
|
132
|
+
config :include_path, :validate => :array, :default => [], :required => false
|
133
|
+
|
134
|
+
# Protocol buffer version switch. Defaults to version 2. Please note that the behaviour for enums varies between the versions.
|
135
|
+
# For protobuf 2 you will get integer representations for enums, for protobuf 3 you'll get string representations due to a different converter library.
|
136
|
+
# Recommendation: use the translate plugin to restore previous behaviour when upgrading.
|
137
|
+
config :protobuf_version, :validate => [2,3], :default => 2, :required => true
|
138
|
+
|
139
|
+
# To tolerate faulty messages that cannot be en/decoded, set this to false. Otherwise the pipeline will stop upon encountering a non decipherable message.
|
140
|
+
config :stop_on_error, :validate => :boolean, :default => false, :required => false
|
141
|
+
|
142
|
+
# Instruct the encoder to attempt converting data types to match the protobuf definitions. Available only for protobuf version 3.
|
143
|
+
config :pb3_encoder_autoconvert_types, :validate => :boolean, :default => true, :required => false
|
144
|
+
|
145
|
+
# Add meta information to `[@metadata][pb_oneof]` about which classes were chosen for [oneof](https://developers.google.com/protocol-buffers/docs/proto3#oneof) fields.
|
146
|
+
# Example values: for the protobuf definition
|
147
|
+
# ``` oneof :horse_type do
|
148
|
+
# optional :unicorn, :message, 2, "FantasyUnicorn"
|
149
|
+
# optional :pegasus, :message, 3, "FantasyPegasus"
|
150
|
+
# end
|
151
|
+
# ```
|
152
|
+
# the field `[@metadata][pb_oneof][horse_type]` will be set to either `pegasus` or `unicorn`.
|
153
|
+
# Available only for protobuf version 3.
|
154
|
+
config :pb3_set_oneof_metainfo, :validate => :boolean, :default => false, :required => false
|
155
|
+
|
156
|
+
|
157
|
+
attr_reader :execution_context
|
158
|
+
|
159
|
+
# id of the pipeline whose events you want to read from.
|
160
|
+
def pipeline_id
|
161
|
+
respond_to?(:execution_context) && !execution_context.nil? ? execution_context.pipeline_id : "main"
|
162
|
+
end
|
163
|
+
|
164
|
+
def register
|
165
|
+
@metainfo_messageclasses = {}
|
166
|
+
@metainfo_enumclasses = {}
|
167
|
+
@metainfo_pb2_enumlist = []
|
168
|
+
@pb3_typeconversion_tag = "_protobuf_type_converted"
|
169
|
+
|
170
|
+
if @include_path.length > 0 and not class_file.strip.empty?
|
171
|
+
raise LogStash::ConfigurationError, "Cannot use `include_path` and `class_file` at the same time"
|
172
|
+
end
|
173
|
+
|
174
|
+
if @include_path.length == 0 and class_file.strip.empty?
|
175
|
+
raise LogStash::ConfigurationError, "Need to specify `include_path` or `class_file`"
|
176
|
+
end
|
177
|
+
|
178
|
+
should_register = Google::Protobuf::DescriptorPool.generated_pool.lookup(class_name).nil?
|
179
|
+
|
180
|
+
unless @protobuf_root_directory.nil? or @protobuf_root_directory.strip.empty?
|
181
|
+
if !$LOAD_PATH.include? @protobuf_root_directory and should_register
|
182
|
+
$LOAD_PATH.unshift(@protobuf_root_directory)
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
@class_file = "#{@protobuf_root_directory}/#{@class_file}" unless (Pathname.new @class_file).absolute? or @class_file.empty?
|
187
|
+
# exclusive access while loading protobuf definitions
|
188
|
+
Google::Protobuf::DescriptorPool.with_lock.synchronize do
|
189
|
+
# load from `class_file`
|
190
|
+
load_protobuf_definition(@class_file) if should_register and !@class_file.empty?
|
191
|
+
# load from `include_path`
|
192
|
+
include_path.each { |path| load_protobuf_definition(path) } if include_path.length > 0 and should_register
|
193
|
+
if @protobuf_version == 3
|
194
|
+
@pb_builder = Google::Protobuf::DescriptorPool.generated_pool.lookup(class_name).msgclass
|
195
|
+
else
|
196
|
+
@pb_builder = pb2_create_instance(class_name)
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
# Pipelines using this plugin cannot be reloaded.
|
202
|
+
# https://github.com/elastic/logstash/pull/6499
|
203
|
+
#
|
204
|
+
# The DescriptorPool instance registers the protobuf classes (and
|
205
|
+
# dependencies) as global objects. This makes it very difficult to reload a
|
206
|
+
# pipeline, because `class_name` and all of its dependencies are already
|
207
|
+
# registered.
|
208
|
+
def reloadable?
|
209
|
+
return false
|
210
|
+
end
|
211
|
+
|
212
|
+
def decode(data)
|
213
|
+
if @protobuf_version == 3
|
214
|
+
decoded = @pb_builder.decode(data.to_s)
|
215
|
+
hashed, meta = pb3_to_hash(decoded)
|
216
|
+
else # version = 2
|
217
|
+
decoded = @pb_builder.parse(data.to_s)
|
218
|
+
hashed = decoded.to_hash
|
219
|
+
end
|
220
|
+
e = LogStash::Event.new(hashed)
|
221
|
+
if @protobuf_version == 3 and @pb3_set_oneof_metainfo
|
222
|
+
e.set("[@metadata][pb_oneof]", meta)
|
223
|
+
end
|
224
|
+
yield e if block_given?
|
225
|
+
rescue => ex
|
226
|
+
@logger.warn("Couldn't decode protobuf: #{ex.inspect}")
|
227
|
+
if @stop_on_error
|
228
|
+
raise ex
|
229
|
+
else # keep original message so that the user can debug it.
|
230
|
+
yield LogStash::Event.new(
|
231
|
+
"message" => data, "tags" => ["_protobufdecodefailure"],
|
232
|
+
"decoder_exception" => "#{ex.inspect}")
|
233
|
+
end
|
234
|
+
end # def decode
|
235
|
+
|
236
|
+
|
237
|
+
def encode(event)
|
238
|
+
if @protobuf_version == 3
|
239
|
+
protobytes = pb3_encode(event)
|
240
|
+
else
|
241
|
+
protobytes = pb2_encode(event)
|
242
|
+
end
|
243
|
+
unless protobytes.nil? or protobytes.empty?
|
244
|
+
@on_event.call(event, protobytes)
|
245
|
+
end
|
246
|
+
end # def encode
|
247
|
+
|
248
|
+
|
249
|
+
# Get the builder class for any given protobuf object from the descriptor pool
|
250
|
+
# Exposed for testing
|
251
|
+
# @param [Object] pb_obj The pb object instance to do the lookup for
|
252
|
+
# @return [Object] The pb builder class
|
253
|
+
def pb3_class_for_name(pb_obj)
|
254
|
+
Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_obj.class.descriptor.name)
|
255
|
+
end
|
256
|
+
|
257
|
+
private
|
258
|
+
|
259
|
+
# Helper function for debugging: print data types for fields of a hash
|
260
|
+
def print_types(hashy, i = 0)
|
261
|
+
hashy.each do |key, value|
|
262
|
+
puts ws(i) + "#{key} " + value.class.name
|
263
|
+
if value.is_a? ::Hash
|
264
|
+
print_types(value, i + 1)
|
265
|
+
end
|
266
|
+
if value.is_a? ::Array
|
267
|
+
value.each do |v|
|
268
|
+
puts ws(i + 1) + "" + v.class.name
|
269
|
+
if v.is_a? ::Hash
|
270
|
+
print_types(v, i + 2)
|
271
|
+
end
|
272
|
+
end
|
273
|
+
end
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
# Helper function for debugging: indent print statements based on recursion level
|
278
|
+
def ws(i)
|
279
|
+
" " * i
|
280
|
+
end
|
281
|
+
|
282
|
+
|
283
|
+
# Converts the pb class to a hash, including its nested objects.
|
284
|
+
# @param [Object] input The pb class or any of its nested data structures
|
285
|
+
# @param [Numeric] i Level of recursion, needed only for whitespace indentation in debug output
|
286
|
+
# @return [Hash, Hash] The converted data as a hash + meta information about the one-of choices.
|
287
|
+
def pb3_to_hash(input, i = 0)
|
288
|
+
meta = {}
|
289
|
+
case input
|
290
|
+
when Google::Protobuf::Struct
|
291
|
+
result = JSON.parse input.to_json({
|
292
|
+
:preserve_proto_fieldnames => true,
|
293
|
+
:emit_defaults => true
|
294
|
+
})
|
295
|
+
when Google::Protobuf::MessageExts # it's a protobuf class
|
296
|
+
result = Hash.new
|
297
|
+
input.clone().to_h.keys.each {|key|
|
298
|
+
# 'class' is a reserved word so we cannot send() it to the pb object.
|
299
|
+
# It would give the pb definition class instead of the value of a field of such name.
|
300
|
+
if key.to_s == "class"
|
301
|
+
value = input[key]
|
302
|
+
else
|
303
|
+
value = input.send(key)
|
304
|
+
end
|
305
|
+
unless value.nil?
|
306
|
+
r, m = pb3_to_hash(value, 1 + i)
|
307
|
+
result[key.to_s] = r unless r.nil?
|
308
|
+
meta[key] = m unless m.empty?
|
309
|
+
end
|
310
|
+
}
|
311
|
+
result, m = oneof_clean(result, input, i)
|
312
|
+
meta = meta.merge(m) unless m.empty?
|
313
|
+
when ::Array
|
314
|
+
when Google::Protobuf::RepeatedField
|
315
|
+
result = []
|
316
|
+
meta = []
|
317
|
+
input.each {|value|
|
318
|
+
r, m = pb3_to_hash(value, 1 + i)
|
319
|
+
result << r unless r.nil?
|
320
|
+
meta << m unless r.nil?
|
321
|
+
}
|
322
|
+
when ::Hash
|
323
|
+
when Google::Protobuf::Map
|
324
|
+
result = {}
|
325
|
+
input.each {|key, value|
|
326
|
+
r, m = pb3_to_hash(value, 1 + i)
|
327
|
+
result[key.to_s] = r unless r.nil?
|
328
|
+
meta[key] = m unless m.empty?
|
329
|
+
}
|
330
|
+
when Symbol # is an Enum
|
331
|
+
result = input.to_s.sub(':','')
|
332
|
+
else # any other scalar
|
333
|
+
result = input
|
334
|
+
end
|
335
|
+
return result, meta
|
336
|
+
end
|
337
|
+
|
338
|
+
|
339
|
+
# For one-of options, remove the non-chosen options.
|
340
|
+
# @param [Hash] datahash The data hash including all options for each one-of field
|
341
|
+
# @param [Object] pb_obj The protobuf class from which datahash was created
|
342
|
+
# @param [Numeric] i Level of recursion, needed only for whitespace indentation in debug output
|
343
|
+
# @return [Hash, Hash] The reduced data as a hash + meta information about the one-of choices.
|
344
|
+
def oneof_clean(datahash, pb_obj, i = 0)
|
345
|
+
# If a field is part of a one-of then it must only be set if it's the selected option.
|
346
|
+
# In codec versions <= 1.2.x this was not the case. The .to_h delivered default values
|
347
|
+
# for every one-of option regardless of which one had been chosen, instead of respecting the XOR relation between them.
|
348
|
+
# The selected option's field name can be queried from input[parent_field]
|
349
|
+
# where parent_field is the name of the one-of field outside the option list.
|
350
|
+
# It's unclear though how to identify a) if a field is part of a one-of struct
|
351
|
+
# because the class of the chosen option will always be a scalar,
|
352
|
+
# and b) the name of the parent field.
|
353
|
+
# As a workaround we look up the names of the 'parent fields' for this class and then the chosen options for those.
|
354
|
+
# Then we remove the other options which weren't set by the producer.
|
355
|
+
pb_class = pb3_class_for_name(pb_obj)
|
356
|
+
meta = {}
|
357
|
+
unless pb_class.nil?
|
358
|
+
pb_class.msgclass.descriptor.each_oneof { |field|
|
359
|
+
# Find out which one-of option has been set
|
360
|
+
chosen = pb_obj.send(field.name).to_s
|
361
|
+
# Go through the options and remove the names of the non-chosen fields from the hash
|
362
|
+
# Whacky solution, better ideas are welcome.
|
363
|
+
field.each { | group_option |
|
364
|
+
if group_option.name != chosen
|
365
|
+
key = group_option.name
|
366
|
+
datahash.delete(key)
|
367
|
+
end
|
368
|
+
}
|
369
|
+
meta[field.name.to_s] = chosen
|
370
|
+
}
|
371
|
+
end # unless
|
372
|
+
return datahash, meta
|
373
|
+
end
|
374
|
+
|
375
|
+
|
376
|
+
def pb3_encode(event)
|
377
|
+
datahash = event.to_hash
|
378
|
+
is_recursive_call = !event.get('tags').nil? and event.get('tags').include? @pb3_typeconversion_tag
|
379
|
+
if is_recursive_call
|
380
|
+
datahash = pb3_remove_typeconversion_tag(datahash)
|
381
|
+
end
|
382
|
+
datahash = pb3_prepare_for_encoding(datahash)
|
383
|
+
if datahash.nil?
|
384
|
+
@logger.warn("Protobuf encoding error 4: empty data for event #{event.to_hash}")
|
385
|
+
end
|
386
|
+
if @pb_builder.nil?
|
387
|
+
@logger.warn("Protobuf encoding error 5: empty protobuf builder for class #{@class_name}")
|
388
|
+
end
|
389
|
+
pb_obj = @pb_builder.new(datahash)
|
390
|
+
@pb_builder.encode(pb_obj)
|
391
|
+
rescue ArgumentError => e
|
392
|
+
k = event.to_hash.keys.join(", ")
|
393
|
+
@logger.warn("Protobuf encoding error 1: Argument error (#{e.inspect}). Reason: probably mismatching protobuf definition. \
|
394
|
+
Required fields in the protobuf definition are: #{k} and fields must not begin with @ sign. The event has been discarded.")
|
395
|
+
nil
|
396
|
+
rescue TypeError => e
|
397
|
+
pb3_handle_type_errors(event, e, is_recursive_call, datahash)
|
398
|
+
nil
|
399
|
+
rescue => e
|
400
|
+
@logger.warn("Protobuf encoding error 3: #{e.inspect}. Event discarded. Input data: #{datahash}. The event has been discarded. Backtrace: #{e.backtrace}")
|
401
|
+
nil
|
402
|
+
end
|
403
|
+
|
404
|
+
|
405
|
+
def pb3_handle_type_errors(event, e, is_recursive_call, datahash)
|
406
|
+
begin
|
407
|
+
if is_recursive_call
|
408
|
+
@logger.warn("Protobuf encoding error 2.1: Type error (#{e.inspect}). Some types could not be converted. The event has been discarded. Type mismatches: #{mismatches}.")
|
409
|
+
else
|
410
|
+
if @pb3_encoder_autoconvert_types
|
411
|
+
msg = "Protobuf encoding error 2.2: Type error (#{e.inspect}). Will try to convert the data types. Original data: #{datahash}"
|
412
|
+
@logger.warn(msg)
|
413
|
+
mismatches = pb3_get_type_mismatches(datahash, "", @class_name)
|
414
|
+
|
415
|
+
event = pb3_convert_mismatched_types(event, mismatches)
|
416
|
+
# Add a (temporary) tag to handle the recursion stop
|
417
|
+
pb3_add_tag(event, @pb3_typeconversion_tag )
|
418
|
+
pb3_encode(event)
|
419
|
+
else
|
420
|
+
@logger.warn("Protobuf encoding error 2.3: Type error (#{e.inspect}). The event has been discarded. Try setting pb3_encoder_autoconvert_types => true for automatic type conversion.")
|
421
|
+
end
|
422
|
+
end
|
423
|
+
rescue TypeError => e
|
424
|
+
if @pb3_encoder_autoconvert_types
|
425
|
+
@logger.warn("Protobuf encoding error 2.4.1: (#{e.inspect}). Failed to convert data types. The event has been discarded. original data: #{datahash}")
|
426
|
+
else
|
427
|
+
@logger.warn("Protobuf encoding error 2.4.2: (#{e.inspect}). The event has been discarded.")
|
428
|
+
end
|
429
|
+
if @stop_on_error
|
430
|
+
raise e
|
431
|
+
end
|
432
|
+
nil
|
433
|
+
rescue => ex
|
434
|
+
@logger.warn("Protobuf encoding error 2.5: (#{e.inspect}). The event has been discarded. Auto-typecasting was on: #{@pb3_encoder_autoconvert_types}")
|
435
|
+
if @stop_on_error
|
436
|
+
raise ex
|
437
|
+
end
|
438
|
+
nil
|
439
|
+
end
|
440
|
+
end # pb3_handle_type_errors
|
441
|
+
|
442
|
+
|
443
|
+
def pb3_get_type_mismatches(data, key_prefix, pb_class)
|
444
|
+
mismatches = []
|
445
|
+
data.to_h.each do |key, value|
|
446
|
+
expected_type = pb3_get_expected_type(key, pb_class)
|
447
|
+
r = pb3_compare_datatypes(value, key, key_prefix, pb_class, expected_type)
|
448
|
+
mismatches.concat(r)
|
449
|
+
end # data.each
|
450
|
+
mismatches
|
451
|
+
end
|
452
|
+
|
453
|
+
|
454
|
+
def pb3_get_expected_type(key, pb_class)
|
455
|
+
pb_descriptor = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class)
|
456
|
+
if !pb_descriptor.nil?
|
457
|
+
pb_builder = pb_descriptor.msgclass
|
458
|
+
pb_obj = pb_builder.new({})
|
459
|
+
v = pb_obj.send(key)
|
460
|
+
if !v.nil?
|
461
|
+
v.class
|
462
|
+
else
|
463
|
+
nil
|
464
|
+
end
|
465
|
+
end
|
466
|
+
end
|
467
|
+
|
468
|
+
|
469
|
+
def pb3_compare_datatypes(value, key, key_prefix, pb_class, expected_type)
|
470
|
+
mismatches = []
|
471
|
+
if value.nil?
|
472
|
+
is_mismatch = false
|
473
|
+
else
|
474
|
+
case value
|
475
|
+
when ::Hash, Google::Protobuf::MessageExts
|
476
|
+
is_mismatch = false
|
477
|
+
descriptor = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class).lookup(key)
|
478
|
+
if !descriptor.subtype.nil?
|
479
|
+
class_of_nested_object = pb3_get_descriptorpool_name(descriptor.subtype.msgclass)
|
480
|
+
new_prefix = "#{key}."
|
481
|
+
recursive_mismatches = pb3_get_type_mismatches(value, new_prefix, class_of_nested_object)
|
482
|
+
mismatches.concat(recursive_mismatches)
|
483
|
+
end
|
484
|
+
when ::Array
|
485
|
+
expected_type = pb3_get_expected_type(key, pb_class)
|
486
|
+
is_mismatch = (expected_type != Google::Protobuf::RepeatedField)
|
487
|
+
child_type = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class).lookup(key).type
|
488
|
+
value.each_with_index do | v, i |
|
489
|
+
new_prefix = "#{key}."
|
490
|
+
recursive_mismatches = pb3_compare_datatypes(v, i.to_s, new_prefix, pb_class, child_type)
|
491
|
+
mismatches.concat(recursive_mismatches)
|
492
|
+
is_mismatch |= recursive_mismatches.any?
|
493
|
+
end # do
|
494
|
+
else # is scalar data type
|
495
|
+
|
496
|
+
is_mismatch = ! pb3_is_scalar_datatype_match(expected_type, value.class)
|
497
|
+
end # if
|
498
|
+
end # if value.nil?
|
499
|
+
|
500
|
+
if is_mismatch
|
501
|
+
mismatches << {"key" => "#{key_prefix}#{key}", "actual_type" => value.class, "expected_type" => expected_type, "value" => value}
|
502
|
+
end
|
503
|
+
mismatches
|
504
|
+
end
|
505
|
+
|
506
|
+
def pb3_remove_typeconversion_tag(data)
|
507
|
+
# remove the tag that we added to the event because
|
508
|
+
# the protobuf definition might not have a field for tags
|
509
|
+
data['tags'].delete(@pb3_typeconversion_tag)
|
510
|
+
if data['tags'].length == 0
|
511
|
+
data.delete('tags')
|
512
|
+
end
|
513
|
+
data
|
514
|
+
end
|
515
|
+
|
516
|
+
def pb3_get_descriptorpool_name(child_class)
|
517
|
+
# make instance
|
518
|
+
inst = child_class.new
|
519
|
+
# get the lookup name for the Descriptorpool
|
520
|
+
inst.class.descriptor.name
|
521
|
+
end
|
522
|
+
|
523
|
+
def pb3_is_scalar_datatype_match(expected_type, actual_type)
|
524
|
+
if expected_type == actual_type
|
525
|
+
true
|
526
|
+
else
|
527
|
+
e = expected_type.to_s.downcase.to_sym
|
528
|
+
a = actual_type.to_s.downcase.to_sym
|
529
|
+
case e
|
530
|
+
# when :string, :integer
|
531
|
+
when :string
|
532
|
+
a == e
|
533
|
+
when :integer
|
534
|
+
a == e
|
535
|
+
when :float
|
536
|
+
a == :float || a == :integer
|
537
|
+
end
|
538
|
+
end
|
539
|
+
end
|
540
|
+
|
541
|
+
|
542
|
+
def pb3_convert_mismatched_types_getter(struct, key)
|
543
|
+
if struct.is_a? ::Hash
|
544
|
+
struct[key]
|
545
|
+
else
|
546
|
+
struct.get(key)
|
547
|
+
end
|
548
|
+
end
|
549
|
+
|
550
|
+
def pb3_convert_mismatched_types_setter(struct, key, value)
|
551
|
+
if struct.is_a? ::Hash
|
552
|
+
struct[key] = value
|
553
|
+
else
|
554
|
+
struct.set(key, value)
|
555
|
+
end
|
556
|
+
struct
|
557
|
+
end
|
558
|
+
|
559
|
+
def pb3_add_tag(event, tag )
|
560
|
+
if event.get('tags').nil?
|
561
|
+
event.set('tags', [tag])
|
562
|
+
else
|
563
|
+
existing_tags = event.get('tags')
|
564
|
+
event.set("tags", existing_tags << tag)
|
565
|
+
end
|
566
|
+
end
|
567
|
+
|
568
|
+
|
569
|
+
# Due to recursion on nested fields in the event object this method might be given an event (1st call) or a hash (2nd .. nth call)
|
570
|
+
# First call will be the event object, child objects will be hashes.
|
571
|
+
def pb3_convert_mismatched_types(struct, mismatches)
|
572
|
+
mismatches.each do | m |
|
573
|
+
key = m['key']
|
574
|
+
expected_type = m['expected_type']
|
575
|
+
actual_type = m['actual_type']
|
576
|
+
if key.include? "." # the mismatch is in a child object
|
577
|
+
levels = key.split(/\./) # key is something like http_user_agent.minor_version and needs to be splitted.
|
578
|
+
key = levels[0]
|
579
|
+
sub_levels = levels.drop(1).join(".")
|
580
|
+
new_mismatches = [{"key"=>sub_levels, "actual_type"=>m["actual_type"], "expected_type"=>m["expected_type"]}]
|
581
|
+
value = pb3_convert_mismatched_types_getter(struct, key)
|
582
|
+
new_value = pb3_convert_mismatched_types(value, new_mismatches)
|
583
|
+
struct = pb3_convert_mismatched_types_setter(struct, key, new_value )
|
584
|
+
else
|
585
|
+
value = pb3_convert_mismatched_types_getter(struct, key)
|
586
|
+
begin
|
587
|
+
case expected_type.to_s
|
588
|
+
when "Integer"
|
589
|
+
case actual_type.to_s
|
590
|
+
when "String"
|
591
|
+
new_value = value.to_i
|
592
|
+
when "Float"
|
593
|
+
if value.floor == value # convert values like 2.0 to 2, but not 2.1
|
594
|
+
new_value = value.to_i
|
595
|
+
end
|
596
|
+
end
|
597
|
+
when "String"
|
598
|
+
new_value = value.to_s
|
599
|
+
when "Float"
|
600
|
+
new_value = value.to_f
|
601
|
+
when "Boolean","TrueClass", "FalseClass"
|
602
|
+
new_value = value.to_s.downcase == "true"
|
603
|
+
end
|
604
|
+
if !new_value.nil?
|
605
|
+
struct = pb3_convert_mismatched_types_setter(struct, key, new_value )
|
606
|
+
end
|
607
|
+
rescue Exception => ex
|
608
|
+
@logger.debug("Protobuf encoding error 5: Could not convert types for protobuf encoding: #{ex}")
|
609
|
+
end
|
610
|
+
end # if key contains .
|
611
|
+
end # mismatches.each
|
612
|
+
struct
|
613
|
+
end
|
614
|
+
|
615
|
+
|
616
|
+
def pb3_prepare_for_encoding(datahash)
|
617
|
+
# 0) Remove empty fields.
|
618
|
+
datahash = datahash.select { |key, value| !value.nil? }
|
619
|
+
|
620
|
+
# Preparation: the data cannot be encoded until certain criteria are met:
|
621
|
+
# 1) remove @ signs from keys.
|
622
|
+
# 2) convert timestamps and other objects to strings
|
623
|
+
datahash = datahash.inject({}){|x,(k,v)| x[k.gsub(/@/,'').to_sym] = (should_convert_to_string?(v) ? v.to_s : v); x}
|
624
|
+
|
625
|
+
datahash.each do |key, value|
|
626
|
+
datahash[key] = pb3_prepare_for_encoding(value) if value.is_a?(Hash)
|
627
|
+
end
|
628
|
+
|
629
|
+
datahash
|
630
|
+
end
|
631
|
+
|
632
|
+
|
633
|
+
def pb2_encode(event)
|
634
|
+
data = pb2_prepare_for_encoding(event.to_hash, @class_name)
|
635
|
+
msg = @pb_builder.new(data)
|
636
|
+
msg.serialize_to_string
|
637
|
+
rescue NoMethodError => e
|
638
|
+
@logger.warn("Encoding error 2. Probably mismatching protobuf definition. Required fields in the protobuf definition are: " + event.to_hash.keys.join(", ") + " and the timestamp field name must not include a @. ")
|
639
|
+
raise e
|
640
|
+
rescue => e
|
641
|
+
@logger.warn("Encoding error 1: #{e.inspect}")
|
642
|
+
raise e
|
643
|
+
end
|
644
|
+
|
645
|
+
|
646
|
+
def pb2_prepare_for_encoding(datahash, class_name)
|
647
|
+
if datahash.is_a?(::Hash)
|
648
|
+
# Preparation: the data cannot be encoded until certain criteria are met:
|
649
|
+
# 1) remove @ signs from keys.
|
650
|
+
# 2) convert timestamps and other objects to strings
|
651
|
+
datahash = ::Hash[datahash.map{|(k,v)| [k.to_s.dup.gsub(/@/,''), (should_convert_to_string?(v) ? v.to_s : v)] }]
|
652
|
+
|
653
|
+
# Check if any of the fields in this hash are protobuf classes and if so, create a builder for them.
|
654
|
+
meta = @metainfo_messageclasses[class_name]
|
655
|
+
if meta
|
656
|
+
meta.map do | (k,c) |
|
657
|
+
if datahash.include?(k)
|
658
|
+
original_value = datahash[k]
|
659
|
+
datahash[k] =
|
660
|
+
if original_value.is_a?(::Array)
|
661
|
+
# make this field an array/list of protobuf objects
|
662
|
+
# value is a list of hashed complex objects, each of which needs to be protobuffed and
|
663
|
+
# put back into the list.
|
664
|
+
original_value.map { |x| pb2_prepare_for_encoding(x, c) }
|
665
|
+
original_value
|
666
|
+
else
|
667
|
+
proto_obj = pb2_create_instance(c)
|
668
|
+
proto_obj.new(pb2_prepare_for_encoding(original_value, c)) # this line is reached in the colourtest for an enum.
|
669
|
+
# Enums should not be instantiated. Should enums even be in the messageclasses? I dont think so!
|
670
|
+
end # if is array
|
671
|
+
end # if datahash_include
|
672
|
+
end # do
|
673
|
+
end # if meta
|
674
|
+
end
|
675
|
+
datahash
|
676
|
+
end
|
677
|
+
|
678
|
+
|
679
|
+
def should_convert_to_string?(v)
|
680
|
+
!(v.is_a?(Integer) || v.is_a?(Float) || v.is_a?(::Hash) || v.is_a?(::Array) || [true, false].include?(v))
|
681
|
+
end
|
682
|
+
|
683
|
+
|
684
|
+
def pb2_create_instance(name)
|
685
|
+
@logger.debug("Creating instance of " + name)
|
686
|
+
name.split('::').inject(Object) { |n,c| n.const_get c }
|
687
|
+
end
|
688
|
+
|
689
|
+
|
690
|
+
def pb3_metadata_analyis(filename)
|
691
|
+
regex_class_name = /\s*add_message "(?<name>.+?)" do\s+/
|
692
|
+
regex_pbdefs = /\s*(optional|repeated)(\s*):(?<name>.+),(\s*):(?<type>\w+),(\s*)(?<position>\d+)(, \"(?<enum_class>.*?)\")?/
|
693
|
+
class_name = ""
|
694
|
+
type = ""
|
695
|
+
field_name = ""
|
696
|
+
File.readlines(filename).each do |line|
|
697
|
+
if ! (line =~ regex_class_name).nil?
|
698
|
+
class_name = $1
|
699
|
+
@metainfo_messageclasses[class_name] = {}
|
700
|
+
@metainfo_enumclasses[class_name] = {}
|
701
|
+
end # if
|
702
|
+
if ! (line =~ regex_pbdefs).nil?
|
703
|
+
field_name = $1
|
704
|
+
type = $2
|
705
|
+
field_class_name = $4
|
706
|
+
if type == "message"
|
707
|
+
@metainfo_messageclasses[class_name][field_name] = field_class_name
|
708
|
+
elsif type == "enum"
|
709
|
+
@metainfo_enumclasses[class_name][field_name] = field_class_name
|
710
|
+
end
|
711
|
+
end # if
|
712
|
+
end # readlines
|
713
|
+
if class_name.nil?
|
714
|
+
@logger.error("Error 4: class name not found in file " + filename)
|
715
|
+
raise ArgumentError, "Invalid protobuf file: " + filename
|
716
|
+
end
|
717
|
+
rescue Exception => e
|
718
|
+
@logger.error("Error 3: unable to read pb definition from file " + filename+ ". Reason: #{e.inspect}. Last settings were: class #{class_name} field #{field_name} type #{type}. Backtrace: " + e.backtrace.inspect.to_s)
|
719
|
+
raise e
|
720
|
+
end
|
721
|
+
|
722
|
+
|
723
|
+
def pb2_metadata_analyis(filename)
|
724
|
+
regex_class_start = /\s*set_fully_qualified_name \"(?<name>.+)\".*?/
|
725
|
+
regex_enum_name = /\s*include ..ProtocolBuffers..Enum\s*/
|
726
|
+
regex_pbdefs = /\s*(optional|repeated)(\s*):(?<type>.+),(\s*):(?<name>\w+),(\s*)(?<position>\d+)/
|
727
|
+
# now we also need to find out which class it contains and the protobuf definitions in it.
|
728
|
+
# We'll unfortunately need that later so that we can create nested objects.
|
729
|
+
|
730
|
+
class_name = ""
|
731
|
+
type = ""
|
732
|
+
field_name = ""
|
733
|
+
is_enum_class = false
|
734
|
+
|
735
|
+
File.readlines(filename).each do |line|
|
736
|
+
if ! (line =~ regex_enum_name).nil?
|
737
|
+
is_enum_class= true
|
738
|
+
end
|
739
|
+
|
740
|
+
if ! (line =~ regex_class_start).nil?
|
741
|
+
class_name = $1.gsub('.',"::").split('::').map {|word| word.capitalize}.join('::')
|
742
|
+
if is_enum_class
|
743
|
+
@metainfo_pb2_enumlist << class_name.downcase
|
744
|
+
end
|
745
|
+
is_enum_class= false # reset when next class starts
|
746
|
+
end
|
747
|
+
if ! (line =~ regex_pbdefs).nil?
|
748
|
+
type = $1
|
749
|
+
field_name = $2
|
750
|
+
if type =~ /::/
|
751
|
+
clean_type = type.gsub(/^:/,"")
|
752
|
+
e = @metainfo_pb2_enumlist.include? clean_type.downcase
|
753
|
+
|
754
|
+
if e
|
755
|
+
if not @metainfo_enumclasses.key? class_name
|
756
|
+
@metainfo_enumclasses[class_name] = {}
|
757
|
+
end
|
758
|
+
@metainfo_enumclasses[class_name][field_name] = clean_type
|
759
|
+
else
|
760
|
+
if not @metainfo_messageclasses.key? class_name
|
761
|
+
@metainfo_messageclasses[class_name] = {}
|
762
|
+
end
|
763
|
+
@metainfo_messageclasses[class_name][field_name] = clean_type
|
764
|
+
end
|
765
|
+
end
|
766
|
+
end
|
767
|
+
end
|
768
|
+
if class_name.nil?
|
769
|
+
@logger.warn("Error 4: class name not found in file " + filename)
|
770
|
+
raise ArgumentError, "Invalid protobuf file: " + filename
|
771
|
+
end
|
772
|
+
rescue LoadError => e
|
773
|
+
raise ArgumentError.new("Could not load file: " + filename + ". Please try to use absolute pathes. Current working dir: " + Dir.pwd + ", loadpath: " + $LOAD_PATH.join(" "))
|
774
|
+
rescue => e
|
775
|
+
|
776
|
+
@logger.warn("Error 3: unable to read pb definition from file " + filename+ ". Reason: #{e.inspect}. Last settings were: class #{class_name} field #{field_name} type #{type}. Backtrace: " + e.backtrace.inspect.to_s)
|
777
|
+
raise e
|
778
|
+
end
|
779
|
+
|
780
|
+
|
781
|
+
def load_protobuf_definition(filename)
|
782
|
+
if filename.end_with? ('.rb')
|
783
|
+
# Add to the loading path of the protobuf definitions
|
784
|
+
if (Pathname.new filename).absolute?
|
785
|
+
begin
|
786
|
+
require filename
|
787
|
+
rescue Exception => e
|
788
|
+
@logger.error("Unable to load file: #{filename}. Reason: #{e.inspect}")
|
789
|
+
raise e
|
790
|
+
end
|
791
|
+
end
|
792
|
+
|
793
|
+
if @protobuf_version == 3
|
794
|
+
pb3_metadata_analyis(filename)
|
795
|
+
else
|
796
|
+
pb2_metadata_analyis(filename)
|
797
|
+
end
|
798
|
+
|
799
|
+
else
|
800
|
+
@logger.warn("Not a ruby file: " + filename)
|
801
|
+
end
|
802
|
+
end
|
803
|
+
|
804
|
+
end # class LogStash::Codecs::Protobuf
|