logstash-output-kusto 1.0.5-java → 1.0.6-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +36 -36
- data/CONTRIBUTORS +10 -10
- data/Gemfile +22 -22
- data/LICENSE +201 -201
- data/README.md +94 -94
- data/SECURITY.md +41 -41
- data/lib/com/fasterxml/jackson/core/jackson-annotations/2.12.5/jackson-annotations-2.12.5.jar +0 -0
- data/lib/com/fasterxml/jackson/core/jackson-core/2.12.5/jackson-core-2.12.5.jar +0 -0
- data/lib/com/fasterxml/jackson/core/jackson-databind/2.12.5/jackson-databind-2.12.5.jar +0 -0
- data/lib/com/microsoft/azure/kusto/kusto-data/3.1.1/kusto-data-3.1.1.jar +0 -0
- data/lib/com/microsoft/azure/kusto/kusto-data/3.2.1/kusto-data-3.2.1.jar +0 -0
- data/lib/com/microsoft/azure/kusto/kusto-ingest/3.1.1/kusto-ingest-3.1.1.jar +0 -0
- data/lib/com/microsoft/azure/kusto/kusto-ingest/3.2.1/kusto-ingest-3.2.1.jar +0 -0
- data/lib/logstash/outputs/kusto/ingestor.rb +138 -138
- data/lib/logstash/outputs/kusto/interval.rb +81 -81
- data/lib/logstash/outputs/kusto.rb +422 -422
- data/lib/logstash-output-kusto_jars.rb +12 -12
- data/lib/org/apache/commons/commons-text/1.10.0/commons-text-1.10.0.jar +0 -0
- data/logstash-output-kusto.gemspec +36 -36
- data/spec/outputs/kusto/ingestor_spec.rb +121 -121
- data/spec/outputs/kusto_spec.rb +56 -56
- data/spec/spec_helpers.rb +21 -21
- metadata +21 -13
@@ -1,422 +1,422 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
require 'logstash/outputs/base'
|
4
|
-
require 'logstash/namespace'
|
5
|
-
require 'logstash/errors'
|
6
|
-
|
7
|
-
require 'logstash/outputs/kusto/ingestor'
|
8
|
-
require 'logstash/outputs/kusto/interval'
|
9
|
-
|
10
|
-
##
|
11
|
-
# This plugin sends messages to Azure Kusto in batches.
|
12
|
-
#
|
13
|
-
class LogStash::Outputs::Kusto < LogStash::Outputs::Base
|
14
|
-
config_name 'kusto'
|
15
|
-
concurrency :shared
|
16
|
-
|
17
|
-
FIELD_REF = /%\{[^}]+\}/
|
18
|
-
|
19
|
-
attr_reader :failure_path
|
20
|
-
|
21
|
-
# The path to the file to write. Event fields can be used here,
|
22
|
-
# like `/var/log/logstash/%{host}/%{application}`
|
23
|
-
# One may also utilize the path option for date-based log
|
24
|
-
# rotation via the joda time format. This will use the event
|
25
|
-
# timestamp.
|
26
|
-
# E.g.: `path => "./test-%{+YYYY-MM-dd}.txt"` to create
|
27
|
-
# `./test-2013-05-29.txt`
|
28
|
-
#
|
29
|
-
# If you use an absolute path you cannot start with a dynamic string.
|
30
|
-
# E.g: `/%{myfield}/`, `/test-%{myfield}/` are not valid paths
|
31
|
-
config :path, validate: :string, required: true
|
32
|
-
|
33
|
-
# Flush interval (in seconds) for flushing writes to files.
|
34
|
-
# 0 will flush on every message. Increase this value to recude IO calls but keep
|
35
|
-
# in mind that events buffered before flush can be lost in case of abrupt failure.
|
36
|
-
config :flush_interval, validate: :number, default: 2
|
37
|
-
|
38
|
-
# If the generated path is invalid, the events will be saved
|
39
|
-
# into this file and inside the defined path.
|
40
|
-
config :filename_failure, validate: :string, default: '_filepath_failures'
|
41
|
-
|
42
|
-
# If the configured file is deleted, but an event is handled by the plugin,
|
43
|
-
# the plugin will recreate the file. Default => true
|
44
|
-
config :create_if_deleted, validate: :boolean, default: true
|
45
|
-
|
46
|
-
# Dir access mode to use. Note that due to the bug in jruby system umask
|
47
|
-
# is ignored on linux: https://github.com/jruby/jruby/issues/3426
|
48
|
-
# Setting it to -1 uses default OS value.
|
49
|
-
# Example: `"dir_mode" => 0750`
|
50
|
-
config :dir_mode, validate: :number, default: -1
|
51
|
-
|
52
|
-
# File access mode to use. Note that due to the bug in jruby system umask
|
53
|
-
# is ignored on linux: https://github.com/jruby/jruby/issues/3426
|
54
|
-
# Setting it to -1 uses default OS value.
|
55
|
-
# Example: `"file_mode" => 0640`
|
56
|
-
config :file_mode, validate: :number, default: -1
|
57
|
-
|
58
|
-
# TODO: fix the interval type...
|
59
|
-
config :stale_cleanup_interval, validate: :number, default: 10
|
60
|
-
config :stale_cleanup_type, validate: %w[events interval], default: 'events'
|
61
|
-
|
62
|
-
# Should the plugin recover from failure?
|
63
|
-
#
|
64
|
-
# If `true`, the plugin will look for temp files from past runs within the
|
65
|
-
# path (before any dynamic pattern is added) and try to process them
|
66
|
-
#
|
67
|
-
# If `false`, the plugin will disregard temp files found
|
68
|
-
config :recovery, validate: :boolean, default: true
|
69
|
-
|
70
|
-
|
71
|
-
# The Kusto endpoint for ingestion related communication. You can see it on the Azure Portal.
|
72
|
-
config :ingest_url, validate: :string, required: true
|
73
|
-
|
74
|
-
# The following are the credentails used to connect to the Kusto service
|
75
|
-
# application id
|
76
|
-
config :app_id, validate: :string, required: true
|
77
|
-
# application key (secret)
|
78
|
-
config :app_key, validate: :password, required: true
|
79
|
-
# aad tenant id
|
80
|
-
config :app_tenant, validate: :string, default: nil
|
81
|
-
|
82
|
-
# The following are the data settings that impact where events are written to
|
83
|
-
# Database name
|
84
|
-
config :database, validate: :string, required: true
|
85
|
-
# Target table name
|
86
|
-
config :table, validate: :string, required: true
|
87
|
-
# Mapping name - Used by Kusto to map each attribute from incoming event JSON strings to the appropriate column in the table.
|
88
|
-
# Note that this must be in JSON format, as this is the interface between Logstash and Kusto
|
89
|
-
config :json_mapping, validate: :string, required: true
|
90
|
-
|
91
|
-
# Mapping name - deprecated, use json_mapping
|
92
|
-
config :mapping, validate: :string, deprecated: true
|
93
|
-
|
94
|
-
|
95
|
-
# Determines if local files used for temporary storage will be deleted
|
96
|
-
# after upload is successful
|
97
|
-
config :delete_temp_files, validate: :boolean, default: true
|
98
|
-
|
99
|
-
# TODO: will be used to route events to many tables according to event properties
|
100
|
-
config :dynamic_event_routing, validate: :boolean, default: false
|
101
|
-
|
102
|
-
# Specify how many files can be uploaded concurrently
|
103
|
-
config :upload_concurrent_count, validate: :number, default: 3
|
104
|
-
|
105
|
-
# Specify how many files can be kept in the upload queue before the main process
|
106
|
-
# starts processing them in the main thread (not healthy)
|
107
|
-
config :upload_queue_size, validate: :number, default: 30
|
108
|
-
|
109
|
-
# Host of the proxy , is an optional field. Can connect directly
|
110
|
-
config :proxy_host, validate: :string, required: false
|
111
|
-
|
112
|
-
# Port where the proxy runs , defaults to 80. Usually a value like 3128
|
113
|
-
config :proxy_port, validate: :number, required: false , default: 80
|
114
|
-
|
115
|
-
# Check Proxy URL can be over http or https. Dowe need it this way or ignore this & remove this
|
116
|
-
config :proxy_protocol, validate: :string, required: false , default: 'http'
|
117
|
-
|
118
|
-
default :codec, 'json_lines'
|
119
|
-
|
120
|
-
def register
|
121
|
-
require 'fileutils' # For mkdir_p
|
122
|
-
|
123
|
-
@files = {}
|
124
|
-
@io_mutex = Mutex.new
|
125
|
-
|
126
|
-
final_mapping = json_mapping
|
127
|
-
if final_mapping.empty?
|
128
|
-
final_mapping = mapping
|
129
|
-
end
|
130
|
-
|
131
|
-
# TODO: add id to the tmp path to support multiple outputs of the same type
|
132
|
-
# add fields from the meta that will note the destination of the events in the file
|
133
|
-
@path = if dynamic_event_routing
|
134
|
-
File.expand_path("#{path}.%{[@metadata][database]}.%{[@metadata][table]}.%{[@metadata][final_mapping]}")
|
135
|
-
else
|
136
|
-
File.expand_path("#{path}.#{database}.#{table}")
|
137
|
-
end
|
138
|
-
|
139
|
-
validate_path
|
140
|
-
|
141
|
-
@file_root = if path_with_field_ref?
|
142
|
-
extract_file_root
|
143
|
-
else
|
144
|
-
File.dirname(path)
|
145
|
-
end
|
146
|
-
@failure_path = File.join(@file_root, @filename_failure)
|
147
|
-
|
148
|
-
executor = Concurrent::ThreadPoolExecutor.new(min_threads: 1,
|
149
|
-
max_threads: upload_concurrent_count,
|
150
|
-
max_queue: upload_queue_size,
|
151
|
-
fallback_policy: :caller_runs)
|
152
|
-
|
153
|
-
@ingestor = Ingestor.new(ingest_url, app_id, app_key, app_tenant, database, table, final_mapping, delete_temp_files, proxy_host, proxy_port,proxy_protocol, @logger, executor)
|
154
|
-
|
155
|
-
# send existing files
|
156
|
-
recover_past_files if recovery
|
157
|
-
|
158
|
-
@last_stale_cleanup_cycle = Time.now
|
159
|
-
|
160
|
-
@flush_interval = @flush_interval.to_i
|
161
|
-
if @flush_interval > 0
|
162
|
-
@flusher = Interval.start(@flush_interval, -> { flush_pending_files })
|
163
|
-
end
|
164
|
-
|
165
|
-
if (@stale_cleanup_type == 'interval') && (@stale_cleanup_interval > 0)
|
166
|
-
@cleaner = Interval.start(stale_cleanup_interval, -> { close_stale_files })
|
167
|
-
end
|
168
|
-
end
|
169
|
-
|
170
|
-
private
|
171
|
-
def validate_path
|
172
|
-
if (root_directory =~ FIELD_REF) != nil
|
173
|
-
@logger.error('The starting part of the path should not be dynamic.', path: @path)
|
174
|
-
raise LogStash::ConfigurationError.new('The starting part of the path should not be dynamic.')
|
175
|
-
end
|
176
|
-
|
177
|
-
if !path_with_field_ref?
|
178
|
-
@logger.error('Path should include some time related fields to allow for file rotation.', path: @path)
|
179
|
-
raise LogStash::ConfigurationError.new('Path should include some time related fields to allow for file rotation.')
|
180
|
-
end
|
181
|
-
end
|
182
|
-
|
183
|
-
private
|
184
|
-
def root_directory
|
185
|
-
parts = @path.split(File::SEPARATOR).reject(&:empty?)
|
186
|
-
if Gem.win_platform?
|
187
|
-
# First part is the drive letter
|
188
|
-
parts[1]
|
189
|
-
else
|
190
|
-
parts.first
|
191
|
-
end
|
192
|
-
end
|
193
|
-
|
194
|
-
public
|
195
|
-
def multi_receive_encoded(events_and_encoded)
|
196
|
-
encoded_by_path = Hash.new { |h, k| h[k] = [] }
|
197
|
-
|
198
|
-
events_and_encoded.each do |event, encoded|
|
199
|
-
file_output_path = event_path(event)
|
200
|
-
encoded_by_path[file_output_path] << encoded
|
201
|
-
end
|
202
|
-
|
203
|
-
@io_mutex.synchronize do
|
204
|
-
encoded_by_path.each do |path, chunks|
|
205
|
-
fd = open(path)
|
206
|
-
# append to the file
|
207
|
-
chunks.each { |chunk| fd.write(chunk) }
|
208
|
-
fd.flush unless @flusher && @flusher.alive?
|
209
|
-
end
|
210
|
-
|
211
|
-
close_stale_files if @stale_cleanup_type == 'events'
|
212
|
-
end
|
213
|
-
end
|
214
|
-
|
215
|
-
def close
|
216
|
-
@flusher.stop unless @flusher.nil?
|
217
|
-
@cleaner.stop unless @cleaner.nil?
|
218
|
-
@io_mutex.synchronize do
|
219
|
-
@logger.debug('Close: closing files')
|
220
|
-
|
221
|
-
@files.each do |path, fd|
|
222
|
-
begin
|
223
|
-
fd.close
|
224
|
-
@logger.debug("Closed file #{path}", fd: fd)
|
225
|
-
|
226
|
-
kusto_send_file(path)
|
227
|
-
rescue Exception => e
|
228
|
-
@logger.error('Exception while flushing and closing files.', exception: e)
|
229
|
-
end
|
230
|
-
end
|
231
|
-
end
|
232
|
-
|
233
|
-
@ingestor.stop unless @ingestor.nil?
|
234
|
-
end
|
235
|
-
|
236
|
-
private
|
237
|
-
def inside_file_root?(log_path)
|
238
|
-
target_file = File.expand_path(log_path)
|
239
|
-
return target_file.start_with?("#{@file_root}/")
|
240
|
-
end
|
241
|
-
|
242
|
-
private
|
243
|
-
def event_path(event)
|
244
|
-
file_output_path = generate_filepath(event)
|
245
|
-
if path_with_field_ref? && !inside_file_root?(file_output_path)
|
246
|
-
@logger.warn('The event tried to write outside the files root, writing the event to the failure file', event: event, filename: @failure_path)
|
247
|
-
file_output_path = @failure_path
|
248
|
-
elsif !@create_if_deleted && deleted?(file_output_path)
|
249
|
-
file_output_path = @failure_path
|
250
|
-
end
|
251
|
-
@logger.debug('Writing event to tmp file.', filename: file_output_path)
|
252
|
-
|
253
|
-
file_output_path
|
254
|
-
end
|
255
|
-
|
256
|
-
private
|
257
|
-
def generate_filepath(event)
|
258
|
-
event.sprintf(@path)
|
259
|
-
end
|
260
|
-
|
261
|
-
private
|
262
|
-
def path_with_field_ref?
|
263
|
-
path =~ FIELD_REF
|
264
|
-
end
|
265
|
-
|
266
|
-
private
|
267
|
-
def extract_file_root
|
268
|
-
parts = File.expand_path(path).split(File::SEPARATOR)
|
269
|
-
parts.take_while { |part| part !~ FIELD_REF }.join(File::SEPARATOR)
|
270
|
-
end
|
271
|
-
|
272
|
-
# the back-bone of @flusher, our periodic-flushing interval.
|
273
|
-
private
|
274
|
-
def flush_pending_files
|
275
|
-
@io_mutex.synchronize do
|
276
|
-
@logger.debug('Starting flush cycle')
|
277
|
-
|
278
|
-
@files.each do |path, fd|
|
279
|
-
@logger.debug('Flushing file', path: path, fd: fd)
|
280
|
-
fd.flush
|
281
|
-
end
|
282
|
-
end
|
283
|
-
rescue Exception => e
|
284
|
-
# squash exceptions caught while flushing after logging them
|
285
|
-
@logger.error('Exception flushing files', exception: e.message, backtrace: e.backtrace)
|
286
|
-
end
|
287
|
-
|
288
|
-
# every 10 seconds or so (triggered by events, but if there are no events there's no point closing files anyway)
|
289
|
-
private
|
290
|
-
def close_stale_files
|
291
|
-
now = Time.now
|
292
|
-
return unless now - @last_stale_cleanup_cycle >= @stale_cleanup_interval
|
293
|
-
|
294
|
-
@logger.debug('Starting stale files cleanup cycle', files: @files)
|
295
|
-
inactive_files = @files.select { |path, fd| not fd.active }
|
296
|
-
@logger.debug("#{inactive_files.count} stale files found", inactive_files: inactive_files)
|
297
|
-
inactive_files.each do |path, fd|
|
298
|
-
@logger.info("Closing file #{path}")
|
299
|
-
fd.close
|
300
|
-
@files.delete(path)
|
301
|
-
|
302
|
-
kusto_send_file(path)
|
303
|
-
end
|
304
|
-
# mark all files as inactive, a call to write will mark them as active again
|
305
|
-
@files.each { |path, fd| fd.active = false }
|
306
|
-
@last_stale_cleanup_cycle = now
|
307
|
-
end
|
308
|
-
|
309
|
-
private
|
310
|
-
def cached?(path)
|
311
|
-
@files.include?(path) && !@files[path].nil?
|
312
|
-
end
|
313
|
-
|
314
|
-
private
|
315
|
-
def deleted?(path)
|
316
|
-
!File.exist?(path)
|
317
|
-
end
|
318
|
-
|
319
|
-
private
|
320
|
-
def open(path)
|
321
|
-
return @files[path] if !deleted?(path) && cached?(path)
|
322
|
-
|
323
|
-
if deleted?(path)
|
324
|
-
if @create_if_deleted
|
325
|
-
@logger.debug('Required file does not exist, creating it.', path: path)
|
326
|
-
@files.delete(path)
|
327
|
-
else
|
328
|
-
return @files[path] if cached?(path)
|
329
|
-
end
|
330
|
-
end
|
331
|
-
|
332
|
-
@logger.info('Opening file', path: path)
|
333
|
-
|
334
|
-
dir = File.dirname(path)
|
335
|
-
if !Dir.exist?(dir)
|
336
|
-
@logger.info('Creating directory', directory: dir)
|
337
|
-
if @dir_mode != -1
|
338
|
-
FileUtils.mkdir_p(dir, mode: @dir_mode)
|
339
|
-
else
|
340
|
-
FileUtils.mkdir_p(dir)
|
341
|
-
end
|
342
|
-
end
|
343
|
-
|
344
|
-
# work around a bug opening fifos (bug JRUBY-6280)
|
345
|
-
stat = begin
|
346
|
-
File.stat(path)
|
347
|
-
rescue
|
348
|
-
nil
|
349
|
-
end
|
350
|
-
fd = if stat && stat.ftype == 'fifo' && LogStash::Environment.jruby?
|
351
|
-
java.io.FileWriter.new(java.io.File.new(path))
|
352
|
-
elsif @file_mode != -1
|
353
|
-
File.new(path, 'a+', @file_mode)
|
354
|
-
else
|
355
|
-
File.new(path, 'a+')
|
356
|
-
end
|
357
|
-
# fd = if @file_mode != -1
|
358
|
-
# File.new(path, 'a+', @file_mode)
|
359
|
-
# else
|
360
|
-
# File.new(path, 'a+')
|
361
|
-
# end
|
362
|
-
# end
|
363
|
-
@files[path] = IOWriter.new(fd)
|
364
|
-
end
|
365
|
-
|
366
|
-
private
|
367
|
-
def kusto_send_file(file_path)
|
368
|
-
@ingestor.upload_async(file_path, delete_temp_files)
|
369
|
-
end
|
370
|
-
|
371
|
-
private
|
372
|
-
def recover_past_files
|
373
|
-
require 'find'
|
374
|
-
|
375
|
-
# we need to find the last "regular" part in the path before any dynamic vars
|
376
|
-
path_last_char = @path.length - 1
|
377
|
-
|
378
|
-
pattern_start = @path.index('%') || path_last_char
|
379
|
-
last_folder_before_pattern = @path.rindex('/', pattern_start) || path_last_char
|
380
|
-
new_path = path[0..last_folder_before_pattern]
|
381
|
-
|
382
|
-
begin
|
383
|
-
return unless Dir.exist?(new_path)
|
384
|
-
@logger.info("Going to recover old files in path #{@new_path}")
|
385
|
-
|
386
|
-
old_files = Find.find(new_path).select { |p| /.*\.#{database}\.#{table}$/ =~ p }
|
387
|
-
@logger.info("Found #{old_files.length} old file(s), sending them now...")
|
388
|
-
|
389
|
-
old_files.each do |file|
|
390
|
-
kusto_send_file(file)
|
391
|
-
end
|
392
|
-
rescue Errno::ENOENT => e
|
393
|
-
@logger.warn('No such file or directory', exception: e.class, message: e.message, path: new_path, backtrace: e.backtrace)
|
394
|
-
end
|
395
|
-
end
|
396
|
-
end
|
397
|
-
|
398
|
-
# wrapper class
|
399
|
-
class IOWriter
|
400
|
-
def initialize(io)
|
401
|
-
@io = io
|
402
|
-
end
|
403
|
-
|
404
|
-
def write(*args)
|
405
|
-
@io.write(*args)
|
406
|
-
@active = true
|
407
|
-
end
|
408
|
-
|
409
|
-
def flush
|
410
|
-
@io.flush
|
411
|
-
end
|
412
|
-
|
413
|
-
def method_missing(method_name, *args, &block)
|
414
|
-
if @io.respond_to?(method_name)
|
415
|
-
|
416
|
-
@io.send(method_name, *args, &block)
|
417
|
-
else
|
418
|
-
super
|
419
|
-
end
|
420
|
-
end
|
421
|
-
attr_accessor :active
|
422
|
-
end
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'logstash/outputs/base'
|
4
|
+
require 'logstash/namespace'
|
5
|
+
require 'logstash/errors'
|
6
|
+
|
7
|
+
require 'logstash/outputs/kusto/ingestor'
|
8
|
+
require 'logstash/outputs/kusto/interval'
|
9
|
+
|
10
|
+
##
|
11
|
+
# This plugin sends messages to Azure Kusto in batches.
|
12
|
+
#
|
13
|
+
class LogStash::Outputs::Kusto < LogStash::Outputs::Base
|
14
|
+
config_name 'kusto'
|
15
|
+
concurrency :shared
|
16
|
+
|
17
|
+
FIELD_REF = /%\{[^}]+\}/
|
18
|
+
|
19
|
+
attr_reader :failure_path
|
20
|
+
|
21
|
+
# The path to the file to write. Event fields can be used here,
|
22
|
+
# like `/var/log/logstash/%{host}/%{application}`
|
23
|
+
# One may also utilize the path option for date-based log
|
24
|
+
# rotation via the joda time format. This will use the event
|
25
|
+
# timestamp.
|
26
|
+
# E.g.: `path => "./test-%{+YYYY-MM-dd}.txt"` to create
|
27
|
+
# `./test-2013-05-29.txt`
|
28
|
+
#
|
29
|
+
# If you use an absolute path you cannot start with a dynamic string.
|
30
|
+
# E.g: `/%{myfield}/`, `/test-%{myfield}/` are not valid paths
|
31
|
+
config :path, validate: :string, required: true
|
32
|
+
|
33
|
+
# Flush interval (in seconds) for flushing writes to files.
|
34
|
+
# 0 will flush on every message. Increase this value to recude IO calls but keep
|
35
|
+
# in mind that events buffered before flush can be lost in case of abrupt failure.
|
36
|
+
config :flush_interval, validate: :number, default: 2
|
37
|
+
|
38
|
+
# If the generated path is invalid, the events will be saved
|
39
|
+
# into this file and inside the defined path.
|
40
|
+
config :filename_failure, validate: :string, default: '_filepath_failures'
|
41
|
+
|
42
|
+
# If the configured file is deleted, but an event is handled by the plugin,
|
43
|
+
# the plugin will recreate the file. Default => true
|
44
|
+
config :create_if_deleted, validate: :boolean, default: true
|
45
|
+
|
46
|
+
# Dir access mode to use. Note that due to the bug in jruby system umask
|
47
|
+
# is ignored on linux: https://github.com/jruby/jruby/issues/3426
|
48
|
+
# Setting it to -1 uses default OS value.
|
49
|
+
# Example: `"dir_mode" => 0750`
|
50
|
+
config :dir_mode, validate: :number, default: -1
|
51
|
+
|
52
|
+
# File access mode to use. Note that due to the bug in jruby system umask
|
53
|
+
# is ignored on linux: https://github.com/jruby/jruby/issues/3426
|
54
|
+
# Setting it to -1 uses default OS value.
|
55
|
+
# Example: `"file_mode" => 0640`
|
56
|
+
config :file_mode, validate: :number, default: -1
|
57
|
+
|
58
|
+
# TODO: fix the interval type...
|
59
|
+
config :stale_cleanup_interval, validate: :number, default: 10
|
60
|
+
config :stale_cleanup_type, validate: %w[events interval], default: 'events'
|
61
|
+
|
62
|
+
# Should the plugin recover from failure?
|
63
|
+
#
|
64
|
+
# If `true`, the plugin will look for temp files from past runs within the
|
65
|
+
# path (before any dynamic pattern is added) and try to process them
|
66
|
+
#
|
67
|
+
# If `false`, the plugin will disregard temp files found
|
68
|
+
config :recovery, validate: :boolean, default: true
|
69
|
+
|
70
|
+
|
71
|
+
# The Kusto endpoint for ingestion related communication. You can see it on the Azure Portal.
|
72
|
+
config :ingest_url, validate: :string, required: true
|
73
|
+
|
74
|
+
# The following are the credentails used to connect to the Kusto service
|
75
|
+
# application id
|
76
|
+
config :app_id, validate: :string, required: true
|
77
|
+
# application key (secret)
|
78
|
+
config :app_key, validate: :password, required: true
|
79
|
+
# aad tenant id
|
80
|
+
config :app_tenant, validate: :string, default: nil
|
81
|
+
|
82
|
+
# The following are the data settings that impact where events are written to
|
83
|
+
# Database name
|
84
|
+
config :database, validate: :string, required: true
|
85
|
+
# Target table name
|
86
|
+
config :table, validate: :string, required: true
|
87
|
+
# Mapping name - Used by Kusto to map each attribute from incoming event JSON strings to the appropriate column in the table.
|
88
|
+
# Note that this must be in JSON format, as this is the interface between Logstash and Kusto
|
89
|
+
config :json_mapping, validate: :string, required: true
|
90
|
+
|
91
|
+
# Mapping name - deprecated, use json_mapping
|
92
|
+
config :mapping, validate: :string, deprecated: true
|
93
|
+
|
94
|
+
|
95
|
+
# Determines if local files used for temporary storage will be deleted
|
96
|
+
# after upload is successful
|
97
|
+
config :delete_temp_files, validate: :boolean, default: true
|
98
|
+
|
99
|
+
# TODO: will be used to route events to many tables according to event properties
|
100
|
+
config :dynamic_event_routing, validate: :boolean, default: false
|
101
|
+
|
102
|
+
# Specify how many files can be uploaded concurrently
|
103
|
+
config :upload_concurrent_count, validate: :number, default: 3
|
104
|
+
|
105
|
+
# Specify how many files can be kept in the upload queue before the main process
|
106
|
+
# starts processing them in the main thread (not healthy)
|
107
|
+
config :upload_queue_size, validate: :number, default: 30
|
108
|
+
|
109
|
+
# Host of the proxy , is an optional field. Can connect directly
|
110
|
+
config :proxy_host, validate: :string, required: false
|
111
|
+
|
112
|
+
# Port where the proxy runs , defaults to 80. Usually a value like 3128
|
113
|
+
config :proxy_port, validate: :number, required: false , default: 80
|
114
|
+
|
115
|
+
# Check Proxy URL can be over http or https. Dowe need it this way or ignore this & remove this
|
116
|
+
config :proxy_protocol, validate: :string, required: false , default: 'http'
|
117
|
+
|
118
|
+
default :codec, 'json_lines'
|
119
|
+
|
120
|
+
def register
|
121
|
+
require 'fileutils' # For mkdir_p
|
122
|
+
|
123
|
+
@files = {}
|
124
|
+
@io_mutex = Mutex.new
|
125
|
+
|
126
|
+
final_mapping = json_mapping
|
127
|
+
if final_mapping.empty?
|
128
|
+
final_mapping = mapping
|
129
|
+
end
|
130
|
+
|
131
|
+
# TODO: add id to the tmp path to support multiple outputs of the same type
|
132
|
+
# add fields from the meta that will note the destination of the events in the file
|
133
|
+
@path = if dynamic_event_routing
|
134
|
+
File.expand_path("#{path}.%{[@metadata][database]}.%{[@metadata][table]}.%{[@metadata][final_mapping]}")
|
135
|
+
else
|
136
|
+
File.expand_path("#{path}.#{database}.#{table}")
|
137
|
+
end
|
138
|
+
|
139
|
+
validate_path
|
140
|
+
|
141
|
+
@file_root = if path_with_field_ref?
|
142
|
+
extract_file_root
|
143
|
+
else
|
144
|
+
File.dirname(path)
|
145
|
+
end
|
146
|
+
@failure_path = File.join(@file_root, @filename_failure)
|
147
|
+
|
148
|
+
executor = Concurrent::ThreadPoolExecutor.new(min_threads: 1,
|
149
|
+
max_threads: upload_concurrent_count,
|
150
|
+
max_queue: upload_queue_size,
|
151
|
+
fallback_policy: :caller_runs)
|
152
|
+
|
153
|
+
@ingestor = Ingestor.new(ingest_url, app_id, app_key, app_tenant, database, table, final_mapping, delete_temp_files, proxy_host, proxy_port,proxy_protocol, @logger, executor)
|
154
|
+
|
155
|
+
# send existing files
|
156
|
+
recover_past_files if recovery
|
157
|
+
|
158
|
+
@last_stale_cleanup_cycle = Time.now
|
159
|
+
|
160
|
+
@flush_interval = @flush_interval.to_i
|
161
|
+
if @flush_interval > 0
|
162
|
+
@flusher = Interval.start(@flush_interval, -> { flush_pending_files })
|
163
|
+
end
|
164
|
+
|
165
|
+
if (@stale_cleanup_type == 'interval') && (@stale_cleanup_interval > 0)
|
166
|
+
@cleaner = Interval.start(stale_cleanup_interval, -> { close_stale_files })
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
private
|
171
|
+
def validate_path
|
172
|
+
if (root_directory =~ FIELD_REF) != nil
|
173
|
+
@logger.error('The starting part of the path should not be dynamic.', path: @path)
|
174
|
+
raise LogStash::ConfigurationError.new('The starting part of the path should not be dynamic.')
|
175
|
+
end
|
176
|
+
|
177
|
+
if !path_with_field_ref?
|
178
|
+
@logger.error('Path should include some time related fields to allow for file rotation.', path: @path)
|
179
|
+
raise LogStash::ConfigurationError.new('Path should include some time related fields to allow for file rotation.')
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
private
|
184
|
+
def root_directory
|
185
|
+
parts = @path.split(File::SEPARATOR).reject(&:empty?)
|
186
|
+
if Gem.win_platform?
|
187
|
+
# First part is the drive letter
|
188
|
+
parts[1]
|
189
|
+
else
|
190
|
+
parts.first
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
public
|
195
|
+
def multi_receive_encoded(events_and_encoded)
|
196
|
+
encoded_by_path = Hash.new { |h, k| h[k] = [] }
|
197
|
+
|
198
|
+
events_and_encoded.each do |event, encoded|
|
199
|
+
file_output_path = event_path(event)
|
200
|
+
encoded_by_path[file_output_path] << encoded
|
201
|
+
end
|
202
|
+
|
203
|
+
@io_mutex.synchronize do
|
204
|
+
encoded_by_path.each do |path, chunks|
|
205
|
+
fd = open(path)
|
206
|
+
# append to the file
|
207
|
+
chunks.each { |chunk| fd.write(chunk) }
|
208
|
+
fd.flush unless @flusher && @flusher.alive?
|
209
|
+
end
|
210
|
+
|
211
|
+
close_stale_files if @stale_cleanup_type == 'events'
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
def close
|
216
|
+
@flusher.stop unless @flusher.nil?
|
217
|
+
@cleaner.stop unless @cleaner.nil?
|
218
|
+
@io_mutex.synchronize do
|
219
|
+
@logger.debug('Close: closing files')
|
220
|
+
|
221
|
+
@files.each do |path, fd|
|
222
|
+
begin
|
223
|
+
fd.close
|
224
|
+
@logger.debug("Closed file #{path}", fd: fd)
|
225
|
+
|
226
|
+
kusto_send_file(path)
|
227
|
+
rescue Exception => e
|
228
|
+
@logger.error('Exception while flushing and closing files.', exception: e)
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
@ingestor.stop unless @ingestor.nil?
|
234
|
+
end
|
235
|
+
|
236
|
+
private
|
237
|
+
def inside_file_root?(log_path)
|
238
|
+
target_file = File.expand_path(log_path)
|
239
|
+
return target_file.start_with?("#{@file_root}/")
|
240
|
+
end
|
241
|
+
|
242
|
+
private
|
243
|
+
def event_path(event)
|
244
|
+
file_output_path = generate_filepath(event)
|
245
|
+
if path_with_field_ref? && !inside_file_root?(file_output_path)
|
246
|
+
@logger.warn('The event tried to write outside the files root, writing the event to the failure file', event: event, filename: @failure_path)
|
247
|
+
file_output_path = @failure_path
|
248
|
+
elsif !@create_if_deleted && deleted?(file_output_path)
|
249
|
+
file_output_path = @failure_path
|
250
|
+
end
|
251
|
+
@logger.debug('Writing event to tmp file.', filename: file_output_path)
|
252
|
+
|
253
|
+
file_output_path
|
254
|
+
end
|
255
|
+
|
256
|
+
private
|
257
|
+
def generate_filepath(event)
|
258
|
+
event.sprintf(@path)
|
259
|
+
end
|
260
|
+
|
261
|
+
private
|
262
|
+
def path_with_field_ref?
|
263
|
+
path =~ FIELD_REF
|
264
|
+
end
|
265
|
+
|
266
|
+
private
|
267
|
+
def extract_file_root
|
268
|
+
parts = File.expand_path(path).split(File::SEPARATOR)
|
269
|
+
parts.take_while { |part| part !~ FIELD_REF }.join(File::SEPARATOR)
|
270
|
+
end
|
271
|
+
|
272
|
+
# the back-bone of @flusher, our periodic-flushing interval.
|
273
|
+
private
|
274
|
+
def flush_pending_files
|
275
|
+
@io_mutex.synchronize do
|
276
|
+
@logger.debug('Starting flush cycle')
|
277
|
+
|
278
|
+
@files.each do |path, fd|
|
279
|
+
@logger.debug('Flushing file', path: path, fd: fd)
|
280
|
+
fd.flush
|
281
|
+
end
|
282
|
+
end
|
283
|
+
rescue Exception => e
|
284
|
+
# squash exceptions caught while flushing after logging them
|
285
|
+
@logger.error('Exception flushing files', exception: e.message, backtrace: e.backtrace)
|
286
|
+
end
|
287
|
+
|
288
|
+
# every 10 seconds or so (triggered by events, but if there are no events there's no point closing files anyway)
|
289
|
+
private
|
290
|
+
def close_stale_files
|
291
|
+
now = Time.now
|
292
|
+
return unless now - @last_stale_cleanup_cycle >= @stale_cleanup_interval
|
293
|
+
|
294
|
+
@logger.debug('Starting stale files cleanup cycle', files: @files)
|
295
|
+
inactive_files = @files.select { |path, fd| not fd.active }
|
296
|
+
@logger.debug("#{inactive_files.count} stale files found", inactive_files: inactive_files)
|
297
|
+
inactive_files.each do |path, fd|
|
298
|
+
@logger.info("Closing file #{path}")
|
299
|
+
fd.close
|
300
|
+
@files.delete(path)
|
301
|
+
|
302
|
+
kusto_send_file(path)
|
303
|
+
end
|
304
|
+
# mark all files as inactive, a call to write will mark them as active again
|
305
|
+
@files.each { |path, fd| fd.active = false }
|
306
|
+
@last_stale_cleanup_cycle = now
|
307
|
+
end
|
308
|
+
|
309
|
+
private
|
310
|
+
def cached?(path)
|
311
|
+
@files.include?(path) && !@files[path].nil?
|
312
|
+
end
|
313
|
+
|
314
|
+
private
|
315
|
+
def deleted?(path)
|
316
|
+
!File.exist?(path)
|
317
|
+
end
|
318
|
+
|
319
|
+
private
|
320
|
+
def open(path)
|
321
|
+
return @files[path] if !deleted?(path) && cached?(path)
|
322
|
+
|
323
|
+
if deleted?(path)
|
324
|
+
if @create_if_deleted
|
325
|
+
@logger.debug('Required file does not exist, creating it.', path: path)
|
326
|
+
@files.delete(path)
|
327
|
+
else
|
328
|
+
return @files[path] if cached?(path)
|
329
|
+
end
|
330
|
+
end
|
331
|
+
|
332
|
+
@logger.info('Opening file', path: path)
|
333
|
+
|
334
|
+
dir = File.dirname(path)
|
335
|
+
if !Dir.exist?(dir)
|
336
|
+
@logger.info('Creating directory', directory: dir)
|
337
|
+
if @dir_mode != -1
|
338
|
+
FileUtils.mkdir_p(dir, mode: @dir_mode)
|
339
|
+
else
|
340
|
+
FileUtils.mkdir_p(dir)
|
341
|
+
end
|
342
|
+
end
|
343
|
+
|
344
|
+
# work around a bug opening fifos (bug JRUBY-6280)
|
345
|
+
stat = begin
|
346
|
+
File.stat(path)
|
347
|
+
rescue
|
348
|
+
nil
|
349
|
+
end
|
350
|
+
fd = if stat && stat.ftype == 'fifo' && LogStash::Environment.jruby?
|
351
|
+
java.io.FileWriter.new(java.io.File.new(path))
|
352
|
+
elsif @file_mode != -1
|
353
|
+
File.new(path, 'a+', @file_mode)
|
354
|
+
else
|
355
|
+
File.new(path, 'a+')
|
356
|
+
end
|
357
|
+
# fd = if @file_mode != -1
|
358
|
+
# File.new(path, 'a+', @file_mode)
|
359
|
+
# else
|
360
|
+
# File.new(path, 'a+')
|
361
|
+
# end
|
362
|
+
# end
|
363
|
+
@files[path] = IOWriter.new(fd)
|
364
|
+
end
|
365
|
+
|
366
|
+
private
|
367
|
+
def kusto_send_file(file_path)
|
368
|
+
@ingestor.upload_async(file_path, delete_temp_files)
|
369
|
+
end
|
370
|
+
|
371
|
+
private
|
372
|
+
def recover_past_files
|
373
|
+
require 'find'
|
374
|
+
|
375
|
+
# we need to find the last "regular" part in the path before any dynamic vars
|
376
|
+
path_last_char = @path.length - 1
|
377
|
+
|
378
|
+
pattern_start = @path.index('%') || path_last_char
|
379
|
+
last_folder_before_pattern = @path.rindex('/', pattern_start) || path_last_char
|
380
|
+
new_path = path[0..last_folder_before_pattern]
|
381
|
+
|
382
|
+
begin
|
383
|
+
return unless Dir.exist?(new_path)
|
384
|
+
@logger.info("Going to recover old files in path #{@new_path}")
|
385
|
+
|
386
|
+
old_files = Find.find(new_path).select { |p| /.*\.#{database}\.#{table}$/ =~ p }
|
387
|
+
@logger.info("Found #{old_files.length} old file(s), sending them now...")
|
388
|
+
|
389
|
+
old_files.each do |file|
|
390
|
+
kusto_send_file(file)
|
391
|
+
end
|
392
|
+
rescue Errno::ENOENT => e
|
393
|
+
@logger.warn('No such file or directory', exception: e.class, message: e.message, path: new_path, backtrace: e.backtrace)
|
394
|
+
end
|
395
|
+
end
|
396
|
+
end
|
397
|
+
|
398
|
+
# wrapper class
|
399
|
+
class IOWriter
|
400
|
+
def initialize(io)
|
401
|
+
@io = io
|
402
|
+
end
|
403
|
+
|
404
|
+
def write(*args)
|
405
|
+
@io.write(*args)
|
406
|
+
@active = true
|
407
|
+
end
|
408
|
+
|
409
|
+
def flush
|
410
|
+
@io.flush
|
411
|
+
end
|
412
|
+
|
413
|
+
def method_missing(method_name, *args, &block)
|
414
|
+
if @io.respond_to?(method_name)
|
415
|
+
|
416
|
+
@io.send(method_name, *args, &block)
|
417
|
+
else
|
418
|
+
super
|
419
|
+
end
|
420
|
+
end
|
421
|
+
attr_accessor :active
|
422
|
+
end
|