logstash-codec-protobuf 1.2.8-jruby
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +45 -0
- data/CONTRIBUTORS +12 -0
- data/DEVELOPER.md +2 -0
- data/Gemfile +11 -0
- data/LICENSE +202 -0
- data/NOTICE.TXT +4 -0
- data/README.md +184 -0
- data/docs/index.asciidoc +241 -0
- data/google-protobuf-lib-update.md +57 -0
- data/lib/logstash/codecs/protobuf.rb +735 -0
- data/logstash-codec-protobuf.gemspec +28 -0
- data/spec/codecs/pb2_spec.rb +236 -0
- data/spec/codecs/pb3_decode_spec.rb +445 -0
- data/spec/codecs/pb3_encode_spec.rb +243 -0
- data/spec/helpers/pb2/ColourTestcase.pb.rb +35 -0
- data/spec/helpers/pb2/ColourTestcase.proto +24 -0
- data/spec/helpers/pb2/event.pb.rb +19 -0
- data/spec/helpers/pb2/event.proto +12 -0
- data/spec/helpers/pb2/header/header.pb.rb +16 -0
- data/spec/helpers/pb2/header/header.proto +8 -0
- data/spec/helpers/pb2/human.pb.rb +26 -0
- data/spec/helpers/pb2/unicorn.pb.rb +19 -0
- data/spec/helpers/pb2/unicorn_event.pb.rb +24 -0
- data/spec/helpers/pb3/FantasyHorse_pb.rb +44 -0
- data/spec/helpers/pb3/ProbeResult_pb.rb +26 -0
- data/spec/helpers/pb3/dnsmessage_pb.rb +82 -0
- data/spec/helpers/pb3/events.proto3 +10 -0
- data/spec/helpers/pb3/events_pb.rb +17 -0
- data/spec/helpers/pb3/header/header.proto3 +7 -0
- data/spec/helpers/pb3/header/header_pb.rb +12 -0
- data/spec/helpers/pb3/integertest_pb.rb +20 -0
- data/spec/helpers/pb3/messageA.proto3 +12 -0
- data/spec/helpers/pb3/messageA_pb.rb +16 -0
- data/spec/helpers/pb3/messageB.proto3 +12 -0
- data/spec/helpers/pb3/messageB_pb.rb +16 -0
- data/spec/helpers/pb3/rum2_pb.rb +87 -0
- data/spec/helpers/pb3/rum3_pb.rb +87 -0
- data/spec/helpers/pb3/rum_pb.rb +87 -0
- data/spec/helpers/pb3/unicorn.proto3 +31 -0
- data/spec/helpers/pb3/unicorn_pb.rb +31 -0
- metadata +177 -0
@@ -0,0 +1,735 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'logstash/codecs/base'
|
3
|
+
require 'logstash/util/charset'
|
4
|
+
require 'google/protobuf' # for protobuf3
|
5
|
+
require 'protocol_buffers' # https://github.com/codekitchen/ruby-protocol-buffers, for protobuf2
|
6
|
+
|
7
|
+
# Monkey-patch the `Google::Protobuf::DescriptorPool` with a mutex for exclusive
|
8
|
+
# access.
|
9
|
+
#
|
10
|
+
# The DescriptorPool instance is not thread-safe when loading protobuf
|
11
|
+
# definitions. This can cause unrecoverable errors when registering multiple
|
12
|
+
# concurrent pipelines that try to register the same dependency. The
|
13
|
+
# DescriptorPool instance is global to the JVM and shared among all pipelines.
|
14
|
+
class << Google::Protobuf::DescriptorPool
|
15
|
+
def with_lock
|
16
|
+
if !@mutex
|
17
|
+
@mutex = Mutex.new
|
18
|
+
end
|
19
|
+
|
20
|
+
return @mutex
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# This codec converts protobuf encoded messages into logstash events and vice versa.
|
25
|
+
#
|
26
|
+
# Requires the protobuf definitions as ruby files. You can create those using the [ruby-protoc compiler](https://github.com/codekitchen/ruby-protocol-buffers).
|
27
|
+
#
|
28
|
+
# The following shows a usage example for decoding protobuf 2 encoded events from a kafka stream:
|
29
|
+
# [source,ruby]
|
30
|
+
# kafka
|
31
|
+
# {
|
32
|
+
# zk_connect => "127.0.0.1"
|
33
|
+
# topic_id => "your_topic_goes_here"
|
34
|
+
# key_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
|
35
|
+
# value_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
|
36
|
+
# codec => protobuf
|
37
|
+
# {
|
38
|
+
# class_name => "Animal::Unicorn"
|
39
|
+
# include_path => ['/path/to/protobuf/definitions/UnicornProtobuf.pb.rb']
|
40
|
+
# }
|
41
|
+
# }
|
42
|
+
#
|
43
|
+
# Same example for protobuf 3:
|
44
|
+
# [source,ruby]
|
45
|
+
# kafka
|
46
|
+
# {
|
47
|
+
# zk_connect => "127.0.0.1"
|
48
|
+
# topic_id => "your_topic_goes_here"
|
49
|
+
# key_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
|
50
|
+
# value_deserializer_class => "org.apache.kafka.common.serialization.ByteArrayDeserializer"
|
51
|
+
# codec => protobuf
|
52
|
+
# {
|
53
|
+
# class_name => "Animal.Unicorn"
|
54
|
+
# include_path => ['/path/to/protobuf/definitions/UnicornProtobuf_pb.rb']
|
55
|
+
# protobuf_version => 3
|
56
|
+
# }
|
57
|
+
# }
|
58
|
+
#
|
59
|
+
# Specifically for the kafka input: please set the deserializer classes as shown above.
|
60
|
+
|
61
|
+
class LogStash::Codecs::Protobuf < LogStash::Codecs::Base
|
62
|
+
config_name 'protobuf'
|
63
|
+
|
64
|
+
# Name of the class to decode.
|
65
|
+
# If your protobuf 2 definition contains modules, prepend them to the class name with double colons like so:
|
66
|
+
# [source,ruby]
|
67
|
+
# class_name => "Animal::Horse::Unicorn"
|
68
|
+
#
|
69
|
+
# This corresponds to a protobuf definition starting as follows:
|
70
|
+
# [source,ruby]
|
71
|
+
# module Animal
|
72
|
+
# module Horse
|
73
|
+
# class Unicorn
|
74
|
+
# # here are your field definitions.
|
75
|
+
#
|
76
|
+
# For protobuf 3 separate the modules with single dots.
|
77
|
+
# [source,ruby]
|
78
|
+
# class_name => "Animal.Horse.Unicorn"
|
79
|
+
# Check the bottom of the generated protobuf ruby file. It contains lines like this:
|
80
|
+
# [source,ruby]
|
81
|
+
# Animals.Unicorn = Google::Protobuf::DescriptorPool.generated_pool.lookup("Animals.Unicorn").msgclass
|
82
|
+
# Use the parameter for the lookup call as the class_name for the codec config.
|
83
|
+
#
|
84
|
+
# If your class references other definitions: you only have to add the main class here.
|
85
|
+
config :class_name, :validate => :string, :required => true
|
86
|
+
|
87
|
+
# Relative path to the ruby file that contains class_name
|
88
|
+
#
|
89
|
+
# Relative path (from `protobuf_root_directory`) that holds the definition of the class specified in
|
90
|
+
# `class_name`.
|
91
|
+
#
|
92
|
+
# `class_file` and `include_path` cannot be used at the same time.
|
93
|
+
config :class_file, :validate => :string, :default => '', :required => false
|
94
|
+
|
95
|
+
# Absolute path to the root directory that contains all referenced/used dependencies
|
96
|
+
# of the main class (`class_name`) or any of its dependencies.
|
97
|
+
#
|
98
|
+
# For instance:
|
99
|
+
#
|
100
|
+
# pb3
|
101
|
+
# ├── header
|
102
|
+
# │ └── header_pb.rb
|
103
|
+
# ├── messageA_pb.rb
|
104
|
+
#
|
105
|
+
# In this case `messageA_pb.rb` has an embedded message from `header/header_pb.rb`.
|
106
|
+
# If `class_file` is set to `messageA_pb.rb`, and `class_name` to
|
107
|
+
# `MessageA`, `protobuf_root_directory` must be set to `/path/to/pb3`, which includes
|
108
|
+
# both definitions.
|
109
|
+
config :protobuf_root_directory, :validate => :string, :required => false
|
110
|
+
|
111
|
+
# List of absolute pathes to files with protobuf definitions.
|
112
|
+
# When using more than one file, make sure to arrange the files in reverse order of dependency so that each class is loaded before it is
|
113
|
+
# refered to by another.
|
114
|
+
#
|
115
|
+
# Example: a class _Unicorn_ referencing another protobuf class _Wings_
|
116
|
+
# [source,ruby]
|
117
|
+
# module Animal
|
118
|
+
# module Horse
|
119
|
+
# class Unicorn
|
120
|
+
# set_fully_qualified_name "Animal.Horse.Unicorn"
|
121
|
+
# optional ::Animal::Bodypart::Wings, :wings, 1
|
122
|
+
# optional :string, :name, 2
|
123
|
+
# # here be more field definitions
|
124
|
+
#
|
125
|
+
# would be configured as
|
126
|
+
# [source,ruby]
|
127
|
+
# include_path => ['/path/to/protobuf/definitions/Wings.pb.rb','/path/to/protobuf/definitions/Unicorn.pb.rb']
|
128
|
+
#
|
129
|
+
# `class_file` and `include_path` cannot be used at the same time.
|
130
|
+
config :include_path, :validate => :array, :default => [], :required => false
|
131
|
+
|
132
|
+
# Protocol buffer version switch. Defaults to version 2. Please note that the behaviour for enums varies between the versions.
|
133
|
+
# For protobuf 2 you will get integer representations for enums, for protobuf 3 you'll get string representations due to a different converter library.
|
134
|
+
# Recommendation: use the translate plugin to restore previous behaviour when upgrading.
|
135
|
+
config :protobuf_version, :validate => [2,3], :default => 2, :required => true
|
136
|
+
|
137
|
+
# To tolerate faulty messages that cannot be en/decoded, set this to false. Otherwise the pipeline will stop upon encountering a non decipherable message.
|
138
|
+
config :stop_on_error, :validate => :boolean, :default => false, :required => false
|
139
|
+
|
140
|
+
# Instruct the encoder to attempt converting data types to match the protobuf definitions. Available only for protobuf version 3.
|
141
|
+
config :pb3_encoder_autoconvert_types, :validate => :boolean, :default => true, :required => false
|
142
|
+
|
143
|
+
# Add meta information to `[@metadata][pb_oneof]` about which classes were chosen for [oneof](https://developers.google.com/protocol-buffers/docs/proto3#oneof) fields.
|
144
|
+
# Example values: for the protobuf definition
|
145
|
+
# ``` oneof :horse_type do
|
146
|
+
# optional :unicorn, :message, 2, "FantasyUnicorn"
|
147
|
+
# optional :pegasus, :message, 3, "FantasyPegasus"
|
148
|
+
# end
|
149
|
+
# ```
|
150
|
+
# the field `[@metadata][pb_oneof][horse_type]` will be set to either `pegasus` or `unicorn`.
|
151
|
+
# Available only for protobuf version 3.
|
152
|
+
config :pb3_set_oneof_metainfo, :validate => :boolean, :default => false, :required => false
|
153
|
+
|
154
|
+
|
155
|
+
attr_reader :execution_context
|
156
|
+
|
157
|
+
# id of the pipeline whose events you want to read from.
|
158
|
+
def pipeline_id
|
159
|
+
respond_to?(:execution_context) && !execution_context.nil? ? execution_context.pipeline_id : "main"
|
160
|
+
end
|
161
|
+
|
162
|
+
def register
|
163
|
+
@metainfo_messageclasses = {}
|
164
|
+
@metainfo_enumclasses = {}
|
165
|
+
@metainfo_pb2_enumlist = []
|
166
|
+
@pb3_typeconversion_tag = "_protobuf_type_converted"
|
167
|
+
|
168
|
+
if @include_path.length > 0 and not class_file.strip.empty?
|
169
|
+
raise LogStash::ConfigurationError, "Cannot use `include_path` and `class_file` at the same time"
|
170
|
+
end
|
171
|
+
|
172
|
+
if @include_path.length == 0 and class_file.strip.empty?
|
173
|
+
raise LogStash::ConfigurationError, "Need to specify `include_path` or `class_file`"
|
174
|
+
end
|
175
|
+
|
176
|
+
should_register = Google::Protobuf::DescriptorPool.generated_pool.lookup(class_name).nil?
|
177
|
+
|
178
|
+
unless @protobuf_root_directory.nil? or @protobuf_root_directory.strip.empty?
|
179
|
+
if !$LOAD_PATH.include? @protobuf_root_directory and should_register
|
180
|
+
$LOAD_PATH.unshift(@protobuf_root_directory)
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
@class_file = "#{@protobuf_root_directory}/#{@class_file}" unless (Pathname.new @class_file).absolute? or @class_file.empty?
|
185
|
+
# exclusive access while loading protobuf definitions
|
186
|
+
Google::Protobuf::DescriptorPool.with_lock.synchronize do
|
187
|
+
# load from `class_file`
|
188
|
+
load_protobuf_definition(@class_file) if should_register and !@class_file.empty?
|
189
|
+
# load from `include_path`
|
190
|
+
include_path.each { |path| load_protobuf_definition(path) } if include_path.length > 0 and should_register
|
191
|
+
|
192
|
+
if @protobuf_version == 3
|
193
|
+
@pb_builder = Google::Protobuf::DescriptorPool.generated_pool.lookup(class_name).msgclass
|
194
|
+
|
195
|
+
else
|
196
|
+
@pb_builder = pb2_create_instance(class_name)
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
# Pipelines using this plugin cannot be reloaded.
|
202
|
+
# https://github.com/elastic/logstash/pull/6499
|
203
|
+
#
|
204
|
+
# The DescriptorPool instance registers the protobuf classes (and
|
205
|
+
# dependencies) as global objects. This makes it very difficult to reload a
|
206
|
+
# pipeline, because `class_name` and all of its dependencies are already
|
207
|
+
# registered.
|
208
|
+
def reloadable?
|
209
|
+
return false
|
210
|
+
end
|
211
|
+
|
212
|
+
def decode(data)
|
213
|
+
if @protobuf_version == 3
|
214
|
+
decoded = @pb_builder.decode(data.to_s)
|
215
|
+
if @pb3_set_oneof_metainfo
|
216
|
+
meta = pb3_get_oneof_metainfo(decoded, @class_name)
|
217
|
+
end
|
218
|
+
h = pb3_deep_to_hash(decoded)
|
219
|
+
else
|
220
|
+
decoded = @pb_builder.parse(data.to_s)
|
221
|
+
h = decoded.to_hash
|
222
|
+
end
|
223
|
+
e = LogStash::Event.new(h)
|
224
|
+
if @protobuf_version == 3 and @pb3_set_oneof_metainfo
|
225
|
+
e.set("[@metadata][pb_oneof]", meta)
|
226
|
+
end
|
227
|
+
yield e if block_given?
|
228
|
+
rescue => ex
|
229
|
+
@logger.warn("Couldn't decode protobuf: #{ex.inspect}.")
|
230
|
+
if stop_on_error
|
231
|
+
raise ex
|
232
|
+
else # keep original message so that the user can debug it.
|
233
|
+
yield LogStash::Event.new("message" => data, "tags" => ["_protobufdecodefailure"])
|
234
|
+
end
|
235
|
+
end # def decode
|
236
|
+
|
237
|
+
|
238
|
+
def encode(event)
|
239
|
+
if @protobuf_version == 3
|
240
|
+
protobytes = pb3_encode(event)
|
241
|
+
else
|
242
|
+
protobytes = pb2_encode(event)
|
243
|
+
end
|
244
|
+
unless protobytes.nil? or protobytes.empty?
|
245
|
+
@on_event.call(event, protobytes)
|
246
|
+
end
|
247
|
+
end # def encode
|
248
|
+
|
249
|
+
|
250
|
+
private
|
251
|
+
def pb3_deep_to_hash(input)
|
252
|
+
case input
|
253
|
+
when Google::Protobuf::MessageExts # it's a protobuf class
|
254
|
+
result = Hash.new
|
255
|
+
input.to_h.each {|key, value|
|
256
|
+
result[key] = pb3_deep_to_hash(value) # the key is required for the class lookup of enums.
|
257
|
+
}
|
258
|
+
when ::Array
|
259
|
+
result = []
|
260
|
+
input.each {|value|
|
261
|
+
result << pb3_deep_to_hash(value)
|
262
|
+
}
|
263
|
+
when ::Hash
|
264
|
+
result = {}
|
265
|
+
input.each {|key, value|
|
266
|
+
result[key] = pb3_deep_to_hash(value)
|
267
|
+
}
|
268
|
+
when Symbol # is an Enum
|
269
|
+
result = input.to_s.sub(':','')
|
270
|
+
else
|
271
|
+
result = input
|
272
|
+
end
|
273
|
+
result
|
274
|
+
end
|
275
|
+
|
276
|
+
def pb3_encode(event)
|
277
|
+
|
278
|
+
datahash = event.to_hash
|
279
|
+
|
280
|
+
is_recursive_call = !event.get('tags').nil? and event.get('tags').include? @pb3_typeconversion_tag
|
281
|
+
if is_recursive_call
|
282
|
+
datahash = pb3_remove_typeconversion_tag(datahash)
|
283
|
+
end
|
284
|
+
datahash = pb3_prepare_for_encoding(datahash)
|
285
|
+
if datahash.nil?
|
286
|
+
@logger.warn("Protobuf encoding error 4: empty data for event #{event.to_hash}")
|
287
|
+
end
|
288
|
+
if @pb_builder.nil?
|
289
|
+
@logger.warn("Protobuf encoding error 5: empty protobuf builder for class #{@class_name}")
|
290
|
+
end
|
291
|
+
pb_obj = @pb_builder.new(datahash)
|
292
|
+
@pb_builder.encode(pb_obj)
|
293
|
+
|
294
|
+
rescue ArgumentError => e
|
295
|
+
k = event.to_hash.keys.join(", ")
|
296
|
+
@logger.warn("Protobuf encoding error 1: Argument error (#{e.inspect}). Reason: probably mismatching protobuf definition. \
|
297
|
+
Required fields in the protobuf definition are: #{k} and fields must not begin with @ sign. The event has been discarded.")
|
298
|
+
nil
|
299
|
+
rescue TypeError => e
|
300
|
+
pb3_handle_type_errors(event, e, is_recursive_call, datahash)
|
301
|
+
nil
|
302
|
+
rescue => e
|
303
|
+
@logger.warn("Protobuf encoding error 3: #{e.inspect}. Event discarded. Input data: #{datahash}. The event has been discarded. Backtrace: #{e.backtrace}")
|
304
|
+
nil
|
305
|
+
end
|
306
|
+
|
307
|
+
|
308
|
+
|
309
|
+
|
310
|
+
def pb3_handle_type_errors(event, e, is_recursive_call, datahash)
|
311
|
+
begin
|
312
|
+
if is_recursive_call
|
313
|
+
@logger.warn("Protobuf encoding error 2.1: Type error (#{e.inspect}). Some types could not be converted. The event has been discarded. Type mismatches: #{mismatches}.")
|
314
|
+
else
|
315
|
+
if @pb3_encoder_autoconvert_types
|
316
|
+
|
317
|
+
msg = "Protobuf encoding error 2.2: Type error (#{e.inspect}). Will try to convert the data types. Original data: #{datahash}"
|
318
|
+
@logger.warn(msg)
|
319
|
+
mismatches = pb3_get_type_mismatches(datahash, "", @class_name)
|
320
|
+
|
321
|
+
event = pb3_convert_mismatched_types(event, mismatches)
|
322
|
+
# Add a (temporary) tag to handle the recursion stop
|
323
|
+
pb3_add_tag(event, @pb3_typeconversion_tag )
|
324
|
+
pb3_encode(event)
|
325
|
+
else
|
326
|
+
@logger.warn("Protobuf encoding error 2.3: Type error (#{e.inspect}). The event has been discarded. Try setting pb3_encoder_autoconvert_types => true for automatic type conversion.")
|
327
|
+
end
|
328
|
+
end
|
329
|
+
rescue TypeError => e
|
330
|
+
if @pb3_encoder_autoconvert_types
|
331
|
+
@logger.warn("Protobuf encoding error 2.4.1: (#{e.inspect}). Failed to convert data types. The event has been discarded. original data: #{datahash}")
|
332
|
+
else
|
333
|
+
@logger.warn("Protobuf encoding error 2.4.2: (#{e.inspect}). The event has been discarded.")
|
334
|
+
end
|
335
|
+
if @stop_on_error
|
336
|
+
raise e
|
337
|
+
end
|
338
|
+
nil
|
339
|
+
rescue => ex
|
340
|
+
@logger.warn("Protobuf encoding error 2.5: (#{e.inspect}). The event has been discarded. Auto-typecasting was on: #{@pb3_encoder_autoconvert_types}")
|
341
|
+
if @stop_on_error
|
342
|
+
raise ex
|
343
|
+
end
|
344
|
+
nil
|
345
|
+
end
|
346
|
+
end # pb3_handle_type_errors
|
347
|
+
|
348
|
+
|
349
|
+
def pb3_get_type_mismatches(data, key_prefix, pb_class)
|
350
|
+
mismatches = []
|
351
|
+
data.to_h.each do |key, value|
|
352
|
+
expected_type = pb3_get_expected_type(key, pb_class)
|
353
|
+
r = pb3_compare_datatypes(value, key, key_prefix, pb_class, expected_type)
|
354
|
+
mismatches.concat(r)
|
355
|
+
end # data.each
|
356
|
+
mismatches
|
357
|
+
end
|
358
|
+
|
359
|
+
|
360
|
+
def pb3_get_expected_type(key, pb_class)
|
361
|
+
pb_descriptor = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class)
|
362
|
+
|
363
|
+
if !pb_descriptor.nil?
|
364
|
+
pb_builder = pb_descriptor.msgclass
|
365
|
+
pb_obj = pb_builder.new({})
|
366
|
+
v = pb_obj.send(key)
|
367
|
+
|
368
|
+
if !v.nil?
|
369
|
+
v.class
|
370
|
+
else
|
371
|
+
nil
|
372
|
+
end
|
373
|
+
end
|
374
|
+
end
|
375
|
+
|
376
|
+
def pb3_compare_datatypes(value, key, key_prefix, pb_class, expected_type)
|
377
|
+
mismatches = []
|
378
|
+
|
379
|
+
if value.nil?
|
380
|
+
is_mismatch = false
|
381
|
+
else
|
382
|
+
case value
|
383
|
+
when ::Hash, Google::Protobuf::MessageExts
|
384
|
+
is_mismatch = false
|
385
|
+
descriptor = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class).lookup(key)
|
386
|
+
if !descriptor.subtype.nil?
|
387
|
+
class_of_nested_object = pb3_get_descriptorpool_name(descriptor.subtype.msgclass)
|
388
|
+
new_prefix = "#{key}."
|
389
|
+
recursive_mismatches = pb3_get_type_mismatches(value, new_prefix, class_of_nested_object)
|
390
|
+
mismatches.concat(recursive_mismatches)
|
391
|
+
end
|
392
|
+
when ::Array
|
393
|
+
expected_type = pb3_get_expected_type(key, pb_class)
|
394
|
+
is_mismatch = (expected_type != Google::Protobuf::RepeatedField)
|
395
|
+
child_type = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class).lookup(key).type
|
396
|
+
value.each_with_index do | v, i |
|
397
|
+
new_prefix = "#{key}."
|
398
|
+
recursive_mismatches = pb3_compare_datatypes(v, i.to_s, new_prefix, pb_class, child_type)
|
399
|
+
mismatches.concat(recursive_mismatches)
|
400
|
+
is_mismatch |= recursive_mismatches.any?
|
401
|
+
end # do
|
402
|
+
else # is scalar data type
|
403
|
+
|
404
|
+
is_mismatch = ! pb3_is_scalar_datatype_match(expected_type, value.class)
|
405
|
+
end # if
|
406
|
+
end # if value.nil?
|
407
|
+
|
408
|
+
if is_mismatch
|
409
|
+
mismatches << {"key" => "#{key_prefix}#{key}", "actual_type" => value.class, "expected_type" => expected_type, "value" => value}
|
410
|
+
end
|
411
|
+
mismatches
|
412
|
+
end
|
413
|
+
|
414
|
+
def pb3_remove_typeconversion_tag(data)
|
415
|
+
# remove the tag that we added to the event because
|
416
|
+
# the protobuf definition might not have a field for tags
|
417
|
+
data['tags'].delete(@pb3_typeconversion_tag)
|
418
|
+
if data['tags'].length == 0
|
419
|
+
data.delete('tags')
|
420
|
+
end
|
421
|
+
data
|
422
|
+
end
|
423
|
+
|
424
|
+
def pb3_get_descriptorpool_name(child_class)
|
425
|
+
# make instance
|
426
|
+
inst = child_class.new
|
427
|
+
# get the lookup name for the Descriptorpool
|
428
|
+
inst.class.descriptor.name
|
429
|
+
end
|
430
|
+
|
431
|
+
def pb3_is_scalar_datatype_match(expected_type, actual_type)
|
432
|
+
if expected_type == actual_type
|
433
|
+
true
|
434
|
+
else
|
435
|
+
e = expected_type.to_s.downcase.to_sym
|
436
|
+
a = actual_type.to_s.downcase.to_sym
|
437
|
+
case e
|
438
|
+
# when :string, :integer
|
439
|
+
when :string
|
440
|
+
a == e
|
441
|
+
when :integer
|
442
|
+
a == e
|
443
|
+
when :float
|
444
|
+
a == :float || a == :integer
|
445
|
+
end
|
446
|
+
end
|
447
|
+
end
|
448
|
+
|
449
|
+
|
450
|
+
def pb3_convert_mismatched_types_getter(struct, key)
|
451
|
+
if struct.is_a? ::Hash
|
452
|
+
struct[key]
|
453
|
+
else
|
454
|
+
struct.get(key)
|
455
|
+
end
|
456
|
+
end
|
457
|
+
|
458
|
+
def pb3_convert_mismatched_types_setter(struct, key, value)
|
459
|
+
if struct.is_a? ::Hash
|
460
|
+
struct[key] = value
|
461
|
+
else
|
462
|
+
struct.set(key, value)
|
463
|
+
end
|
464
|
+
struct
|
465
|
+
end
|
466
|
+
|
467
|
+
def pb3_add_tag(event, tag )
|
468
|
+
if event.get('tags').nil?
|
469
|
+
event.set('tags', [tag])
|
470
|
+
else
|
471
|
+
existing_tags = event.get('tags')
|
472
|
+
event.set("tags", existing_tags << tag)
|
473
|
+
end
|
474
|
+
end
|
475
|
+
|
476
|
+
# Due to recursion on nested fields in the event object this method might be given an event (1st call) or a hash (2nd .. nth call)
|
477
|
+
# First call will be the event object, child objects will be hashes.
|
478
|
+
def pb3_convert_mismatched_types(struct, mismatches)
|
479
|
+
mismatches.each do | m |
|
480
|
+
key = m['key']
|
481
|
+
expected_type = m['expected_type']
|
482
|
+
actual_type = m['actual_type']
|
483
|
+
if key.include? "." # the mismatch is in a child object
|
484
|
+
levels = key.split(/\./) # key is something like http_user_agent.minor_version and needs to be splitted.
|
485
|
+
key = levels[0]
|
486
|
+
sub_levels = levels.drop(1).join(".")
|
487
|
+
new_mismatches = [{"key"=>sub_levels, "actual_type"=>m["actual_type"], "expected_type"=>m["expected_type"]}]
|
488
|
+
value = pb3_convert_mismatched_types_getter(struct, key)
|
489
|
+
new_value = pb3_convert_mismatched_types(value, new_mismatches)
|
490
|
+
struct = pb3_convert_mismatched_types_setter(struct, key, new_value )
|
491
|
+
else
|
492
|
+
value = pb3_convert_mismatched_types_getter(struct, key)
|
493
|
+
begin
|
494
|
+
case expected_type.to_s
|
495
|
+
when "Integer"
|
496
|
+
case actual_type.to_s
|
497
|
+
when "String"
|
498
|
+
new_value = value.to_i
|
499
|
+
when "Float"
|
500
|
+
if value.floor == value # convert values like 2.0 to 2, but not 2.1
|
501
|
+
new_value = value.to_i
|
502
|
+
end
|
503
|
+
end
|
504
|
+
when "String"
|
505
|
+
new_value = value.to_s
|
506
|
+
when "Float"
|
507
|
+
new_value = value.to_f
|
508
|
+
when "Boolean","TrueClass", "FalseClass"
|
509
|
+
new_value = value.to_s.downcase == "true"
|
510
|
+
end
|
511
|
+
if !new_value.nil?
|
512
|
+
struct = pb3_convert_mismatched_types_setter(struct, key, new_value )
|
513
|
+
end
|
514
|
+
rescue Exception => ex
|
515
|
+
@logger.debug("Protobuf encoding error 5: Could not convert types for protobuf encoding: #{ex}")
|
516
|
+
end
|
517
|
+
end # if key contains .
|
518
|
+
end # mismatches.each
|
519
|
+
struct
|
520
|
+
end
|
521
|
+
|
522
|
+
def pb3_prepare_for_encoding(datahash)
|
523
|
+
# 0) Remove empty fields.
|
524
|
+
datahash = datahash.select { |key, value| !value.nil? }
|
525
|
+
|
526
|
+
# Preparation: the data cannot be encoded until certain criteria are met:
|
527
|
+
# 1) remove @ signs from keys.
|
528
|
+
# 2) convert timestamps and other objects to strings
|
529
|
+
datahash = datahash.inject({}){|x,(k,v)| x[k.gsub(/@/,'').to_sym] = (should_convert_to_string?(v) ? v.to_s : v); x}
|
530
|
+
|
531
|
+
datahash.each do |key, value|
|
532
|
+
datahash[key] = pb3_prepare_for_encoding(value) if value.is_a?(Hash)
|
533
|
+
end
|
534
|
+
|
535
|
+
datahash
|
536
|
+
end
|
537
|
+
|
538
|
+
def pb3_get_oneof_metainfo(pb_object, pb_class_name)
|
539
|
+
meta = {}
|
540
|
+
pb_class = Google::Protobuf::DescriptorPool.generated_pool.lookup(pb_class_name).msgclass
|
541
|
+
|
542
|
+
pb_class.descriptor.each_oneof { |field|
|
543
|
+
field.each { | group_option |
|
544
|
+
if !pb_object.send(group_option.name).nil?
|
545
|
+
meta[field.name] = group_option.name
|
546
|
+
end
|
547
|
+
}
|
548
|
+
}
|
549
|
+
|
550
|
+
pb_class.descriptor.select{ |field| field.type == :message }.each { | field |
|
551
|
+
# recurse over nested protobuf classes
|
552
|
+
pb_sub_object = pb_object.send(field.name)
|
553
|
+
if !pb_sub_object.nil? and !field.subtype.nil?
|
554
|
+
pb_sub_class = pb3_get_descriptorpool_name(field.subtype.msgclass)
|
555
|
+
meta[field.name] = pb3_get_oneof_metainfo(pb_sub_object, pb_sub_class)
|
556
|
+
end
|
557
|
+
}
|
558
|
+
|
559
|
+
meta
|
560
|
+
end
|
561
|
+
|
562
|
+
|
563
|
+
def pb2_encode(event)
|
564
|
+
data = pb2_prepare_for_encoding(event.to_hash, @class_name)
|
565
|
+
msg = @pb_builder.new(data)
|
566
|
+
msg.serialize_to_string
|
567
|
+
rescue NoMethodError => e
|
568
|
+
@logger.warn("Encoding error 2. Probably mismatching protobuf definition. Required fields in the protobuf definition are: " + event.to_hash.keys.join(", ") + " and the timestamp field name must not include a @. ")
|
569
|
+
raise e
|
570
|
+
rescue => e
|
571
|
+
@logger.warn("Encoding error 1: #{e.inspect}")
|
572
|
+
raise e
|
573
|
+
end
|
574
|
+
|
575
|
+
|
576
|
+
def pb2_prepare_for_encoding(datahash, class_name)
|
577
|
+
if datahash.is_a?(::Hash)
|
578
|
+
# Preparation: the data cannot be encoded until certain criteria are met:
|
579
|
+
# 1) remove @ signs from keys.
|
580
|
+
# 2) convert timestamps and other objects to strings
|
581
|
+
datahash = ::Hash[datahash.map{|(k,v)| [k.to_s.dup.gsub(/@/,''), (should_convert_to_string?(v) ? v.to_s : v)] }]
|
582
|
+
|
583
|
+
# Check if any of the fields in this hash are protobuf classes and if so, create a builder for them.
|
584
|
+
meta = @metainfo_messageclasses[class_name]
|
585
|
+
if meta
|
586
|
+
meta.map do | (k,c) |
|
587
|
+
if datahash.include?(k)
|
588
|
+
original_value = datahash[k]
|
589
|
+
datahash[k] =
|
590
|
+
if original_value.is_a?(::Array)
|
591
|
+
# make this field an array/list of protobuf objects
|
592
|
+
# value is a list of hashed complex objects, each of which needs to be protobuffed and
|
593
|
+
# put back into the list.
|
594
|
+
original_value.map { |x| pb2_prepare_for_encoding(x, c) }
|
595
|
+
original_value
|
596
|
+
else
|
597
|
+
proto_obj = pb2_create_instance(c)
|
598
|
+
proto_obj.new(pb2_prepare_for_encoding(original_value, c)) # this line is reached in the colourtest for an enum. Enums should not be instantiated. Should enums even be in the messageclasses? I dont think so! TODO bug
|
599
|
+
end # if is array
|
600
|
+
end # if datahash_include
|
601
|
+
end # do
|
602
|
+
end # if meta
|
603
|
+
end
|
604
|
+
datahash
|
605
|
+
end
|
606
|
+
|
607
|
+
|
608
|
+
def should_convert_to_string?(v)
|
609
|
+
!(v.is_a?(Integer) || v.is_a?(Float) || v.is_a?(::Hash) || v.is_a?(::Array) || [true, false].include?(v))
|
610
|
+
end
|
611
|
+
|
612
|
+
|
613
|
+
def pb2_create_instance(name)
|
614
|
+
@logger.debug("Creating instance of " + name)
|
615
|
+
name.split('::').inject(Object) { |n,c| n.const_get c }
|
616
|
+
end
|
617
|
+
|
618
|
+
|
619
|
+
def pb3_metadata_analyis(filename)
|
620
|
+
|
621
|
+
regex_class_name = /\s*add_message "(?<name>.+?)" do\s+/ # TODO optimize both regexes for speed (negative lookahead)
|
622
|
+
regex_pbdefs = /\s*(optional|repeated)(\s*):(?<name>.+),(\s*):(?<type>\w+),(\s*)(?<position>\d+)(, \"(?<enum_class>.*?)\")?/
|
623
|
+
class_name = ""
|
624
|
+
type = ""
|
625
|
+
field_name = ""
|
626
|
+
File.readlines(filename).each do |line|
|
627
|
+
if ! (line =~ regex_class_name).nil?
|
628
|
+
class_name = $1
|
629
|
+
@metainfo_messageclasses[class_name] = {}
|
630
|
+
@metainfo_enumclasses[class_name] = {}
|
631
|
+
end # if
|
632
|
+
if ! (line =~ regex_pbdefs).nil?
|
633
|
+
field_name = $1
|
634
|
+
type = $2
|
635
|
+
field_class_name = $4
|
636
|
+
if type == "message"
|
637
|
+
@metainfo_messageclasses[class_name][field_name] = field_class_name
|
638
|
+
elsif type == "enum"
|
639
|
+
@metainfo_enumclasses[class_name][field_name] = field_class_name
|
640
|
+
end
|
641
|
+
end # if
|
642
|
+
end # readlines
|
643
|
+
if class_name.nil?
|
644
|
+
@logger.warn("Error 4: class name not found in file " + filename)
|
645
|
+
raise ArgumentError, "Invalid protobuf file: " + filename
|
646
|
+
end
|
647
|
+
rescue Exception => e
|
648
|
+
@logger.warn("Error 3: unable to read pb definition from file " + filename+ ". Reason: #{e.inspect}. Last settings were: class #{class_name} field #{field_name} type #{type}. Backtrace: " + e.backtrace.inspect.to_s)
|
649
|
+
raise e
|
650
|
+
end
|
651
|
+
|
652
|
+
|
653
|
+
|
654
|
+
def pb2_metadata_analyis(filename)
|
655
|
+
regex_class_start = /\s*set_fully_qualified_name \"(?<name>.+)\".*?/
|
656
|
+
regex_enum_name = /\s*include ..ProtocolBuffers..Enum\s*/
|
657
|
+
regex_pbdefs = /\s*(optional|repeated)(\s*):(?<type>.+),(\s*):(?<name>\w+),(\s*)(?<position>\d+)/
|
658
|
+
# now we also need to find out which class it contains and the protobuf definitions in it.
|
659
|
+
# We'll unfortunately need that later so that we can create nested objects.
|
660
|
+
|
661
|
+
class_name = ""
|
662
|
+
type = ""
|
663
|
+
field_name = ""
|
664
|
+
is_enum_class = false
|
665
|
+
|
666
|
+
File.readlines(filename).each do |line|
|
667
|
+
if ! (line =~ regex_enum_name).nil?
|
668
|
+
is_enum_class= true
|
669
|
+
end
|
670
|
+
|
671
|
+
if ! (line =~ regex_class_start).nil?
|
672
|
+
class_name = $1.gsub('.',"::").split('::').map {|word| word.capitalize}.join('::')
|
673
|
+
if is_enum_class
|
674
|
+
@metainfo_pb2_enumlist << class_name.downcase
|
675
|
+
end
|
676
|
+
is_enum_class= false # reset when next class starts
|
677
|
+
end
|
678
|
+
if ! (line =~ regex_pbdefs).nil?
|
679
|
+
type = $1
|
680
|
+
field_name = $2
|
681
|
+
if type =~ /::/
|
682
|
+
clean_type = type.gsub(/^:/,"")
|
683
|
+
e = @metainfo_pb2_enumlist.include? clean_type.downcase
|
684
|
+
|
685
|
+
if e
|
686
|
+
if not @metainfo_enumclasses.key? class_name
|
687
|
+
@metainfo_enumclasses[class_name] = {}
|
688
|
+
end
|
689
|
+
@metainfo_enumclasses[class_name][field_name] = clean_type
|
690
|
+
else
|
691
|
+
if not @metainfo_messageclasses.key? class_name
|
692
|
+
@metainfo_messageclasses[class_name] = {}
|
693
|
+
end
|
694
|
+
@metainfo_messageclasses[class_name][field_name] = clean_type
|
695
|
+
end
|
696
|
+
end
|
697
|
+
end
|
698
|
+
end
|
699
|
+
if class_name.nil?
|
700
|
+
@logger.warn("Error 4: class name not found in file " + filename)
|
701
|
+
raise ArgumentError, "Invalid protobuf file: " + filename
|
702
|
+
end
|
703
|
+
rescue LoadError => e
|
704
|
+
raise ArgumentError.new("Could not load file: " + filename + ". Please try to use absolute pathes. Current working dir: " + Dir.pwd + ", loadpath: " + $LOAD_PATH.join(" "))
|
705
|
+
rescue => e
|
706
|
+
|
707
|
+
@logger.warn("Error 3: unable to read pb definition from file " + filename+ ". Reason: #{e.inspect}. Last settings were: class #{class_name} field #{field_name} type #{type}. Backtrace: " + e.backtrace.inspect.to_s)
|
708
|
+
raise e
|
709
|
+
end
|
710
|
+
|
711
|
+
|
712
|
+
def load_protobuf_definition(filename)
|
713
|
+
if filename.end_with? ('.rb')
|
714
|
+
# Add to the loading path of the protobuf definitions
|
715
|
+
if (Pathname.new filename).absolute?
|
716
|
+
begin
|
717
|
+
require filename
|
718
|
+
rescue Exception => e
|
719
|
+
@logger.error("Unable to load file: #{filename}. Reason: #{e.inspect}")
|
720
|
+
raise e
|
721
|
+
end
|
722
|
+
end
|
723
|
+
|
724
|
+
if @protobuf_version == 3
|
725
|
+
pb3_metadata_analyis(filename)
|
726
|
+
else
|
727
|
+
pb2_metadata_analyis(filename)
|
728
|
+
end
|
729
|
+
|
730
|
+
else
|
731
|
+
@logger.warn("Not a ruby file: " + filename)
|
732
|
+
end
|
733
|
+
end
|
734
|
+
|
735
|
+
end # class LogStash::Codecs::Protobuf
|