logstash-output-kusto 1.0.0-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +31 -0
- data/CONTRIBUTORS +10 -0
- data/Gemfile +20 -0
- data/LICENSE +201 -0
- data/README.md +79 -0
- data/lib/com/fasterxml/jackson/core/jackson-annotations/2.9.10/jackson-annotations-2.9.10.jar +0 -0
- data/lib/com/fasterxml/jackson/core/jackson-core/2.9.4/jackson-core-2.9.4.jar +0 -0
- data/lib/com/fasterxml/jackson/core/jackson-databind/2.9.10.7/jackson-databind-2.9.10.7.jar +0 -0
- data/lib/com/github/stephenc/jcip/jcip-annotations/1.0-1/jcip-annotations-1.0-1.jar +0 -0
- data/lib/com/google/code/gson/gson/2.8.0/gson-2.8.0.jar +0 -0
- data/lib/com/google/guava/guava/20.0/guava-20.0.jar +0 -0
- data/lib/com/microsoft/azure/adal4j/1.6.5/adal4j-1.6.5.jar +0 -0
- data/lib/com/microsoft/azure/azure-keyvault-core/1.0.0/azure-keyvault-core-1.0.0.jar +0 -0
- data/lib/com/microsoft/azure/azure-storage/8.3.0/azure-storage-8.3.0.jar +0 -0
- data/lib/com/microsoft/azure/kusto/kusto-data/2.1.2/kusto-data-2.1.2.jar +0 -0
- data/lib/com/microsoft/azure/kusto/kusto-ingest/2.1.2/kusto-ingest-2.1.2.jar +0 -0
- data/lib/com/nimbusds/lang-tag/1.5/lang-tag-1.5.jar +0 -0
- data/lib/com/nimbusds/nimbus-jose-jwt/9.3/nimbus-jose-jwt-9.3.jar +0 -0
- data/lib/com/nimbusds/oauth2-oidc-sdk/6.5/oauth2-oidc-sdk-6.5.jar +0 -0
- data/lib/com/sun/mail/javax.mail/1.6.1/javax.mail-1.6.1.jar +0 -0
- data/lib/com/univocity/univocity-parsers/2.1.1/univocity-parsers-2.1.1.jar +0 -0
- data/lib/commons-codec/commons-codec/1.14/commons-codec-1.14.jar +0 -0
- data/lib/commons-logging/commons-logging/1.2/commons-logging-1.2.jar +0 -0
- data/lib/javax/activation/activation/1.1/activation-1.1.jar +0 -0
- data/lib/logstash-output-kusto_jars.rb +64 -0
- data/lib/logstash/outputs/kusto.rb +413 -0
- data/lib/logstash/outputs/kusto/ingestor.rb +123 -0
- data/lib/logstash/outputs/kusto/interval.rb +81 -0
- data/lib/net/minidev/accessors-smart/1.2/accessors-smart-1.2.jar +0 -0
- data/lib/net/minidev/json-smart/2.3/json-smart-2.3.jar +0 -0
- data/lib/org/apache/commons/commons-lang3/3.9/commons-lang3-3.9.jar +0 -0
- data/lib/org/apache/httpcomponents/httpclient/4.5.8/httpclient-4.5.8.jar +0 -0
- data/lib/org/apache/httpcomponents/httpcore/4.4.11/httpcore-4.4.11.jar +0 -0
- data/lib/org/jetbrains/annotations/17.0.0/annotations-17.0.0.jar +0 -0
- data/lib/org/json/json/20190722/json-20190722.jar +0 -0
- data/lib/org/ow2/asm/asm/5.0.4/asm-5.0.4.jar +0 -0
- data/lib/org/slf4j/slf4j-api/1.8.0-beta4/slf4j-api-1.8.0-beta4.jar +0 -0
- data/logstash-output-kusto.gemspec +35 -0
- data/spec/outputs/kusto/ingestor_spec.rb +109 -0
- data/spec/outputs/kusto_spec.rb +54 -0
- data/spec/spec_helpers.rb +21 -0
- metadata +203 -0
@@ -0,0 +1,413 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'logstash/outputs/base'
|
4
|
+
require 'logstash/namespace'
|
5
|
+
require 'logstash/errors'
|
6
|
+
|
7
|
+
require 'logstash/outputs/kusto/ingestor'
|
8
|
+
require 'logstash/outputs/kusto/interval'
|
9
|
+
|
10
|
+
##
|
11
|
+
# This plugin sends messages to Azure Kusto in batches.
|
12
|
+
#
|
13
|
+
class LogStash::Outputs::Kusto < LogStash::Outputs::Base
|
14
|
+
config_name 'kusto'
|
15
|
+
concurrency :shared
|
16
|
+
|
17
|
+
FIELD_REF = /%\{[^}]+\}/
|
18
|
+
|
19
|
+
attr_reader :failure_path
|
20
|
+
|
21
|
+
# The path to the file to write. Event fields can be used here,
|
22
|
+
# like `/var/log/logstash/%{host}/%{application}`
|
23
|
+
# One may also utilize the path option for date-based log
|
24
|
+
# rotation via the joda time format. This will use the event
|
25
|
+
# timestamp.
|
26
|
+
# E.g.: `path => "./test-%{+YYYY-MM-dd}.txt"` to create
|
27
|
+
# `./test-2013-05-29.txt`
|
28
|
+
#
|
29
|
+
# If you use an absolute path you cannot start with a dynamic string.
|
30
|
+
# E.g: `/%{myfield}/`, `/test-%{myfield}/` are not valid paths
|
31
|
+
config :path, validate: :string, required: true
|
32
|
+
|
33
|
+
# Flush interval (in seconds) for flushing writes to files.
|
34
|
+
# 0 will flush on every message. Increase this value to recude IO calls but keep
|
35
|
+
# in mind that events buffered before flush can be lost in case of abrupt failure.
|
36
|
+
config :flush_interval, validate: :number, default: 2
|
37
|
+
|
38
|
+
# If the generated path is invalid, the events will be saved
|
39
|
+
# into this file and inside the defined path.
|
40
|
+
config :filename_failure, validate: :string, default: '_filepath_failures'
|
41
|
+
|
42
|
+
# If the configured file is deleted, but an event is handled by the plugin,
|
43
|
+
# the plugin will recreate the file. Default => true
|
44
|
+
config :create_if_deleted, validate: :boolean, default: true
|
45
|
+
|
46
|
+
# Dir access mode to use. Note that due to the bug in jruby system umask
|
47
|
+
# is ignored on linux: https://github.com/jruby/jruby/issues/3426
|
48
|
+
# Setting it to -1 uses default OS value.
|
49
|
+
# Example: `"dir_mode" => 0750`
|
50
|
+
config :dir_mode, validate: :number, default: -1
|
51
|
+
|
52
|
+
# File access mode to use. Note that due to the bug in jruby system umask
|
53
|
+
# is ignored on linux: https://github.com/jruby/jruby/issues/3426
|
54
|
+
# Setting it to -1 uses default OS value.
|
55
|
+
# Example: `"file_mode" => 0640`
|
56
|
+
config :file_mode, validate: :number, default: -1
|
57
|
+
|
58
|
+
# TODO: fix the interval type...
|
59
|
+
config :stale_cleanup_interval, validate: :number, default: 10
|
60
|
+
config :stale_cleanup_type, validate: %w[events interval], default: 'events'
|
61
|
+
|
62
|
+
# Should the plugin recover from failure?
|
63
|
+
#
|
64
|
+
# If `true`, the plugin will look for temp files from past runs within the
|
65
|
+
# path (before any dynamic pattern is added) and try to process them
|
66
|
+
#
|
67
|
+
# If `false`, the plugin will disregard temp files found
|
68
|
+
config :recovery, validate: :boolean, default: true
|
69
|
+
|
70
|
+
|
71
|
+
# The Kusto endpoint for ingestion related communication. You can see it on the Azure Portal.
|
72
|
+
config :ingest_url, validate: :string, required: true
|
73
|
+
|
74
|
+
# The following are the credentails used to connect to the Kusto service
|
75
|
+
# application id
|
76
|
+
config :app_id, validate: :string, required: true
|
77
|
+
# application key (secret)
|
78
|
+
config :app_key, validate: :password, required: true
|
79
|
+
# aad tenant id
|
80
|
+
config :app_tenant, validate: :string, default: nil
|
81
|
+
|
82
|
+
# The following are the data settings that impact where events are written to
|
83
|
+
# Database name
|
84
|
+
config :database, validate: :string, required: true
|
85
|
+
# Target table name
|
86
|
+
config :table, validate: :string, required: true
|
87
|
+
# Mapping name - Used by Kusto to map each attribute from incoming event JSON strings to the appropriate column in the table.
|
88
|
+
# Note that this must be in JSON format, as this is the interface between Logstash and Kusto
|
89
|
+
config :json_mapping, validate: :string, required: true
|
90
|
+
|
91
|
+
# Mappung name - deprecated, use json_mapping
|
92
|
+
config :mapping, validate: :string, deprecated: true
|
93
|
+
|
94
|
+
|
95
|
+
# Determines if local files used for temporary storage will be deleted
|
96
|
+
# after upload is successful
|
97
|
+
config :delete_temp_files, validate: :boolean, default: true
|
98
|
+
|
99
|
+
# TODO: will be used to route events to many tables according to event properties
|
100
|
+
config :dynamic_event_routing, validate: :boolean, default: false
|
101
|
+
|
102
|
+
# Specify how many files can be uploaded concurrently
|
103
|
+
config :upload_concurrent_count, validate: :number, default: 3
|
104
|
+
|
105
|
+
# Specify how many files can be kept in the upload queue before the main process
|
106
|
+
# starts processing them in the main thread (not healthy)
|
107
|
+
config :upload_queue_size, validate: :number, default: 30
|
108
|
+
|
109
|
+
default :codec, 'json_lines'
|
110
|
+
|
111
|
+
def register
|
112
|
+
require 'fileutils' # For mkdir_p
|
113
|
+
|
114
|
+
@files = {}
|
115
|
+
@io_mutex = Mutex.new
|
116
|
+
|
117
|
+
final_mapping = json_mapping
|
118
|
+
if final_mapping.empty?
|
119
|
+
final_mapping = mapping
|
120
|
+
end
|
121
|
+
|
122
|
+
# TODO: add id to the tmp path to support multiple outputs of the same type
|
123
|
+
# add fields from the meta that will note the destination of the events in the file
|
124
|
+
@path = if dynamic_event_routing
|
125
|
+
File.expand_path("#{path}.%{[@metadata][database]}.%{[@metadata][table]}.%{[@metadata][final_mapping]}")
|
126
|
+
else
|
127
|
+
File.expand_path("#{path}.#{database}.#{table}")
|
128
|
+
end
|
129
|
+
|
130
|
+
validate_path
|
131
|
+
|
132
|
+
@file_root = if path_with_field_ref?
|
133
|
+
extract_file_root
|
134
|
+
else
|
135
|
+
File.dirname(path)
|
136
|
+
end
|
137
|
+
@failure_path = File.join(@file_root, @filename_failure)
|
138
|
+
|
139
|
+
executor = Concurrent::ThreadPoolExecutor.new(min_threads: 1,
|
140
|
+
max_threads: upload_concurrent_count,
|
141
|
+
max_queue: upload_queue_size,
|
142
|
+
fallback_policy: :caller_runs)
|
143
|
+
|
144
|
+
@ingestor = Ingestor.new(ingest_url, app_id, app_key, app_tenant, database, table, final_mapping, delete_temp_files, @logger, executor)
|
145
|
+
|
146
|
+
# send existing files
|
147
|
+
recover_past_files if recovery
|
148
|
+
|
149
|
+
@last_stale_cleanup_cycle = Time.now
|
150
|
+
|
151
|
+
@flush_interval = @flush_interval.to_i
|
152
|
+
if @flush_interval > 0
|
153
|
+
@flusher = Interval.start(@flush_interval, -> { flush_pending_files })
|
154
|
+
end
|
155
|
+
|
156
|
+
if (@stale_cleanup_type == 'interval') && (@stale_cleanup_interval > 0)
|
157
|
+
@cleaner = Interval.start(stale_cleanup_interval, -> { close_stale_files })
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
private
|
162
|
+
def validate_path
|
163
|
+
if (root_directory =~ FIELD_REF) != nil
|
164
|
+
@logger.error('The starting part of the path should not be dynamic.', path: @path)
|
165
|
+
raise LogStash::ConfigurationError.new('The starting part of the path should not be dynamic.')
|
166
|
+
end
|
167
|
+
|
168
|
+
if !path_with_field_ref?
|
169
|
+
@logger.error('Path should include some time related fields to allow for file rotation.', path: @path)
|
170
|
+
raise LogStash::ConfigurationError.new('Path should include some time related fields to allow for file rotation.')
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
private
|
175
|
+
def root_directory
|
176
|
+
parts = @path.split(File::SEPARATOR).reject(&:empty?)
|
177
|
+
if Gem.win_platform?
|
178
|
+
# First part is the drive letter
|
179
|
+
parts[1]
|
180
|
+
else
|
181
|
+
parts.first
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
public
|
186
|
+
def multi_receive_encoded(events_and_encoded)
|
187
|
+
encoded_by_path = Hash.new { |h, k| h[k] = [] }
|
188
|
+
|
189
|
+
events_and_encoded.each do |event, encoded|
|
190
|
+
file_output_path = event_path(event)
|
191
|
+
encoded_by_path[file_output_path] << encoded
|
192
|
+
end
|
193
|
+
|
194
|
+
@io_mutex.synchronize do
|
195
|
+
encoded_by_path.each do |path, chunks|
|
196
|
+
fd = open(path)
|
197
|
+
# append to the file
|
198
|
+
chunks.each { |chunk| fd.write(chunk) }
|
199
|
+
fd.flush unless @flusher && @flusher.alive?
|
200
|
+
end
|
201
|
+
|
202
|
+
close_stale_files if @stale_cleanup_type == 'events'
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
def close
|
207
|
+
@flusher.stop unless @flusher.nil?
|
208
|
+
@cleaner.stop unless @cleaner.nil?
|
209
|
+
@io_mutex.synchronize do
|
210
|
+
@logger.debug('Close: closing files')
|
211
|
+
|
212
|
+
@files.each do |path, fd|
|
213
|
+
begin
|
214
|
+
fd.close
|
215
|
+
@logger.debug("Closed file #{path}", fd: fd)
|
216
|
+
|
217
|
+
kusto_send_file(path)
|
218
|
+
rescue Exception => e
|
219
|
+
@logger.error('Exception while flushing and closing files.', exception: e)
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
@ingestor.stop unless @ingestor.nil?
|
225
|
+
end
|
226
|
+
|
227
|
+
private
|
228
|
+
def inside_file_root?(log_path)
|
229
|
+
target_file = File.expand_path(log_path)
|
230
|
+
return target_file.start_with?("#{@file_root}/")
|
231
|
+
end
|
232
|
+
|
233
|
+
private
|
234
|
+
def event_path(event)
|
235
|
+
file_output_path = generate_filepath(event)
|
236
|
+
if path_with_field_ref? && !inside_file_root?(file_output_path)
|
237
|
+
@logger.warn('The event tried to write outside the files root, writing the event to the failure file', event: event, filename: @failure_path)
|
238
|
+
file_output_path = @failure_path
|
239
|
+
elsif !@create_if_deleted && deleted?(file_output_path)
|
240
|
+
file_output_path = @failure_path
|
241
|
+
end
|
242
|
+
@logger.debug('Writing event to tmp file.', filename: file_output_path)
|
243
|
+
|
244
|
+
file_output_path
|
245
|
+
end
|
246
|
+
|
247
|
+
private
|
248
|
+
def generate_filepath(event)
|
249
|
+
event.sprintf(@path)
|
250
|
+
end
|
251
|
+
|
252
|
+
private
|
253
|
+
def path_with_field_ref?
|
254
|
+
path =~ FIELD_REF
|
255
|
+
end
|
256
|
+
|
257
|
+
private
|
258
|
+
def extract_file_root
|
259
|
+
parts = File.expand_path(path).split(File::SEPARATOR)
|
260
|
+
parts.take_while { |part| part !~ FIELD_REF }.join(File::SEPARATOR)
|
261
|
+
end
|
262
|
+
|
263
|
+
# the back-bone of @flusher, our periodic-flushing interval.
|
264
|
+
private
|
265
|
+
def flush_pending_files
|
266
|
+
@io_mutex.synchronize do
|
267
|
+
@logger.debug('Starting flush cycle')
|
268
|
+
|
269
|
+
@files.each do |path, fd|
|
270
|
+
@logger.debug('Flushing file', path: path, fd: fd)
|
271
|
+
fd.flush
|
272
|
+
end
|
273
|
+
end
|
274
|
+
rescue Exception => e
|
275
|
+
# squash exceptions caught while flushing after logging them
|
276
|
+
@logger.error('Exception flushing files', exception: e.message, backtrace: e.backtrace)
|
277
|
+
end
|
278
|
+
|
279
|
+
# every 10 seconds or so (triggered by events, but if there are no events there's no point closing files anyway)
|
280
|
+
private
|
281
|
+
def close_stale_files
|
282
|
+
now = Time.now
|
283
|
+
return unless now - @last_stale_cleanup_cycle >= @stale_cleanup_interval
|
284
|
+
|
285
|
+
@logger.debug('Starting stale files cleanup cycle', files: @files)
|
286
|
+
inactive_files = @files.select { |path, fd| not fd.active }
|
287
|
+
@logger.debug("#{inactive_files.count} stale files found", inactive_files: inactive_files)
|
288
|
+
inactive_files.each do |path, fd|
|
289
|
+
@logger.info("Closing file #{path}")
|
290
|
+
fd.close
|
291
|
+
@files.delete(path)
|
292
|
+
|
293
|
+
kusto_send_file(path)
|
294
|
+
end
|
295
|
+
# mark all files as inactive, a call to write will mark them as active again
|
296
|
+
@files.each { |path, fd| fd.active = false }
|
297
|
+
@last_stale_cleanup_cycle = now
|
298
|
+
end
|
299
|
+
|
300
|
+
private
|
301
|
+
def cached?(path)
|
302
|
+
@files.include?(path) && !@files[path].nil?
|
303
|
+
end
|
304
|
+
|
305
|
+
private
|
306
|
+
def deleted?(path)
|
307
|
+
!File.exist?(path)
|
308
|
+
end
|
309
|
+
|
310
|
+
private
|
311
|
+
def open(path)
|
312
|
+
return @files[path] if !deleted?(path) && cached?(path)
|
313
|
+
|
314
|
+
if deleted?(path)
|
315
|
+
if @create_if_deleted
|
316
|
+
@logger.debug('Required file does not exist, creating it.', path: path)
|
317
|
+
@files.delete(path)
|
318
|
+
else
|
319
|
+
return @files[path] if cached?(path)
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
@logger.info('Opening file', path: path)
|
324
|
+
|
325
|
+
dir = File.dirname(path)
|
326
|
+
if !Dir.exist?(dir)
|
327
|
+
@logger.info('Creating directory', directory: dir)
|
328
|
+
if @dir_mode != -1
|
329
|
+
FileUtils.mkdir_p(dir, mode: @dir_mode)
|
330
|
+
else
|
331
|
+
FileUtils.mkdir_p(dir)
|
332
|
+
end
|
333
|
+
end
|
334
|
+
|
335
|
+
# work around a bug opening fifos (bug JRUBY-6280)
|
336
|
+
stat = begin
|
337
|
+
File.stat(path)
|
338
|
+
rescue
|
339
|
+
nil
|
340
|
+
end
|
341
|
+
fd = if stat && stat.ftype == 'fifo' && LogStash::Environment.jruby?
|
342
|
+
java.io.FileWriter.new(java.io.File.new(path))
|
343
|
+
elsif @file_mode != -1
|
344
|
+
File.new(path, 'a+', @file_mode)
|
345
|
+
else
|
346
|
+
File.new(path, 'a+')
|
347
|
+
end
|
348
|
+
# fd = if @file_mode != -1
|
349
|
+
# File.new(path, 'a+', @file_mode)
|
350
|
+
# else
|
351
|
+
# File.new(path, 'a+')
|
352
|
+
# end
|
353
|
+
# end
|
354
|
+
@files[path] = IOWriter.new(fd)
|
355
|
+
end
|
356
|
+
|
357
|
+
private
|
358
|
+
def kusto_send_file(file_path)
|
359
|
+
@ingestor.upload_async(file_path, delete_temp_files)
|
360
|
+
end
|
361
|
+
|
362
|
+
private
|
363
|
+
def recover_past_files
|
364
|
+
require 'find'
|
365
|
+
|
366
|
+
# we need to find the last "regular" part in the path before any dynamic vars
|
367
|
+
path_last_char = @path.length - 1
|
368
|
+
|
369
|
+
pattern_start = @path.index('%') || path_last_char
|
370
|
+
last_folder_before_pattern = @path.rindex('/', pattern_start) || path_last_char
|
371
|
+
new_path = path[0..last_folder_before_pattern]
|
372
|
+
|
373
|
+
begin
|
374
|
+
return unless Dir.exist?(new_path)
|
375
|
+
@logger.info("Going to recover old files in path #{@new_path}")
|
376
|
+
|
377
|
+
old_files = Find.find(new_path).select { |p| /.*\.#{database}\.#{table}$/ =~ p }
|
378
|
+
@logger.info("Found #{old_files.length} old file(s), sending them now...")
|
379
|
+
|
380
|
+
old_files.each do |file|
|
381
|
+
kusto_send_file(file)
|
382
|
+
end
|
383
|
+
rescue Errno::ENOENT => e
|
384
|
+
@logger.warn('No such file or directory', exception: e.class, message: e.message, path: new_path, backtrace: e.backtrace)
|
385
|
+
end
|
386
|
+
end
|
387
|
+
end
|
388
|
+
|
389
|
+
# wrapper class
|
390
|
+
class IOWriter
|
391
|
+
def initialize(io)
|
392
|
+
@io = io
|
393
|
+
end
|
394
|
+
|
395
|
+
def write(*args)
|
396
|
+
@io.write(*args)
|
397
|
+
@active = true
|
398
|
+
end
|
399
|
+
|
400
|
+
def flush
|
401
|
+
@io.flush
|
402
|
+
end
|
403
|
+
|
404
|
+
def method_missing(method_name, *args, &block)
|
405
|
+
if @io.respond_to?(method_name)
|
406
|
+
|
407
|
+
@io.send(method_name, *args, &block)
|
408
|
+
else
|
409
|
+
super
|
410
|
+
end
|
411
|
+
end
|
412
|
+
attr_accessor :active
|
413
|
+
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'logstash/outputs/base'
|
4
|
+
require 'logstash/namespace'
|
5
|
+
require 'logstash/errors'
|
6
|
+
|
7
|
+
class LogStash::Outputs::Kusto < LogStash::Outputs::Base
|
8
|
+
##
|
9
|
+
# This handles the overall logic and communication with Kusto
|
10
|
+
#
|
11
|
+
class Ingestor
|
12
|
+
require 'logstash-output-kusto_jars'
|
13
|
+
RETRY_DELAY_SECONDS = 3
|
14
|
+
DEFAULT_THREADPOOL = Concurrent::ThreadPoolExecutor.new(
|
15
|
+
min_threads: 1,
|
16
|
+
max_threads: 8,
|
17
|
+
max_queue: 1,
|
18
|
+
fallback_policy: :caller_runs
|
19
|
+
)
|
20
|
+
LOW_QUEUE_LENGTH = 3
|
21
|
+
FIELD_REF = /%\{[^}]+\}/
|
22
|
+
|
23
|
+
def initialize(ingest_url, app_id, app_key, app_tenant, database, table, json_mapping, delete_local, logger, threadpool = DEFAULT_THREADPOOL)
|
24
|
+
@workers_pool = threadpool
|
25
|
+
@logger = logger
|
26
|
+
|
27
|
+
validate_config(database, table, json_mapping)
|
28
|
+
|
29
|
+
@logger.debug('Preparing Kusto resources.')
|
30
|
+
|
31
|
+
kusto_java = Java::com.microsoft.azure.kusto
|
32
|
+
kusto_connection_string = kusto_java.data.ConnectionStringBuilder.createWithAadApplicationCredentials(ingest_url, app_id, app_key.value, app_tenant)
|
33
|
+
@logger.debug(Gem.loaded_specs.to_s)
|
34
|
+
# Unfortunately there's no way to avoid using the gem/plugin name directly...
|
35
|
+
name_for_tracing = "logstash-output-kusto:#{Gem.loaded_specs['logstash-output-kusto']&.version || "unknown"}"
|
36
|
+
@logger.debug("Client name for tracing: #{name_for_tracing}")
|
37
|
+
kusto_connection_string.setClientVersionForTracing(name_for_tracing)
|
38
|
+
|
39
|
+
@kusto_client = kusto_java.ingest.IngestClientFactory.createClient(kusto_connection_string)
|
40
|
+
|
41
|
+
@ingestion_properties = kusto_java.ingest.IngestionProperties.new(database, table)
|
42
|
+
@ingestion_properties.setIngestionMapping(json_mapping, kusto_java.ingest.IngestionMapping::IngestionMappingKind::Json)
|
43
|
+
@ingestion_properties.setDataFormat(kusto_java.ingest.IngestionProperties::DATA_FORMAT::json)
|
44
|
+
@delete_local = delete_local
|
45
|
+
|
46
|
+
@logger.debug('Kusto resources are ready.')
|
47
|
+
end
|
48
|
+
|
49
|
+
def validate_config(database, table, json_mapping)
|
50
|
+
if database =~ FIELD_REF
|
51
|
+
@logger.error('database config value should not be dynamic.', database)
|
52
|
+
raise LogStash::ConfigurationError.new('database config value should not be dynamic.')
|
53
|
+
end
|
54
|
+
|
55
|
+
if table =~ FIELD_REF
|
56
|
+
@logger.error('table config value should not be dynamic.', table)
|
57
|
+
raise LogStash::ConfigurationError.new('table config value should not be dynamic.')
|
58
|
+
end
|
59
|
+
|
60
|
+
if json_mapping =~ FIELD_REF
|
61
|
+
@logger.error('json_mapping config value should not be dynamic.', json_mapping)
|
62
|
+
raise LogStash::ConfigurationError.new('json_mapping config value should not be dynamic.')
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def upload_async(path, delete_on_success)
|
67
|
+
if @workers_pool.remaining_capacity <= LOW_QUEUE_LENGTH
|
68
|
+
@logger.warn("Ingestor queue capacity is running low with #{@workers_pool.remaining_capacity} free slots.")
|
69
|
+
end
|
70
|
+
|
71
|
+
@workers_pool.post do
|
72
|
+
LogStash::Util.set_thread_name("Kusto to ingest file: #{path}")
|
73
|
+
upload(path, delete_on_success)
|
74
|
+
end
|
75
|
+
rescue Exception => e
|
76
|
+
@logger.error('StandardError.', exception: e.class, message: e.message, path: path, backtrace: e.backtrace)
|
77
|
+
raise e
|
78
|
+
end
|
79
|
+
|
80
|
+
def upload(path, delete_on_success)
|
81
|
+
file_size = File.size(path)
|
82
|
+
@logger.debug("Sending file to kusto: #{path}. size: #{file_size}")
|
83
|
+
|
84
|
+
# TODO: dynamic routing
|
85
|
+
# file_metadata = path.partition('.kusto.').last
|
86
|
+
# file_metadata_parts = file_metadata.split('.')
|
87
|
+
|
88
|
+
# if file_metadata_parts.length == 3
|
89
|
+
# # this is the number we expect - database, table, json_mapping
|
90
|
+
# database = file_metadata_parts[0]
|
91
|
+
# table = file_metadata_parts[1]
|
92
|
+
# json_mapping = file_metadata_parts[2]
|
93
|
+
|
94
|
+
# local_ingestion_properties = Java::KustoIngestionProperties.new(database, table)
|
95
|
+
# local_ingestion_properties.addJsonMappingName(json_mapping)
|
96
|
+
# end
|
97
|
+
|
98
|
+
file_source_info = Java::com.microsoft.azure.kusto.ingest.source.FileSourceInfo.new(path, 0); # 0 - let the sdk figure out the size of the file
|
99
|
+
@kusto_client.ingestFromFile(file_source_info, @ingestion_properties)
|
100
|
+
|
101
|
+
File.delete(path) if delete_on_success
|
102
|
+
|
103
|
+
@logger.debug("File #{path} sent to kusto.")
|
104
|
+
rescue Errno::ENOENT => e
|
105
|
+
@logger.error("File doesn't exist! Unrecoverable error.", exception: e.class, message: e.message, path: path, backtrace: e.backtrace)
|
106
|
+
rescue Java::JavaNioFile::NoSuchFileException => e
|
107
|
+
@logger.error("File doesn't exist! Unrecoverable error.", exception: e.class, message: e.message, path: path, backtrace: e.backtrace)
|
108
|
+
rescue => e
|
109
|
+
# When the retry limit is reached or another error happen we will wait and retry.
|
110
|
+
#
|
111
|
+
# Thread might be stuck here, but I think its better than losing anything
|
112
|
+
# its either a transient errors or something bad really happened.
|
113
|
+
@logger.error('Uploading failed, retrying.', exception: e.class, message: e.message, path: path, backtrace: e.backtrace)
|
114
|
+
sleep RETRY_DELAY_SECONDS
|
115
|
+
retry
|
116
|
+
end
|
117
|
+
|
118
|
+
def stop
|
119
|
+
@workers_pool.shutdown
|
120
|
+
@workers_pool.wait_for_termination(nil) # block until its done
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|