logstash-output-kusto 1.0.5-java → 1.0.6-java

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,422 +1,422 @@
1
- # encoding: utf-8
2
-
3
- require 'logstash/outputs/base'
4
- require 'logstash/namespace'
5
- require 'logstash/errors'
6
-
7
- require 'logstash/outputs/kusto/ingestor'
8
- require 'logstash/outputs/kusto/interval'
9
-
10
- ##
11
- # This plugin sends messages to Azure Kusto in batches.
12
- #
13
- class LogStash::Outputs::Kusto < LogStash::Outputs::Base
14
- config_name 'kusto'
15
- concurrency :shared
16
-
17
- FIELD_REF = /%\{[^}]+\}/
18
-
19
- attr_reader :failure_path
20
-
21
- # The path to the file to write. Event fields can be used here,
22
- # like `/var/log/logstash/%{host}/%{application}`
23
- # One may also utilize the path option for date-based log
24
- # rotation via the joda time format. This will use the event
25
- # timestamp.
26
- # E.g.: `path => "./test-%{+YYYY-MM-dd}.txt"` to create
27
- # `./test-2013-05-29.txt`
28
- #
29
- # If you use an absolute path you cannot start with a dynamic string.
30
- # E.g: `/%{myfield}/`, `/test-%{myfield}/` are not valid paths
31
- config :path, validate: :string, required: true
32
-
33
- # Flush interval (in seconds) for flushing writes to files.
34
- # 0 will flush on every message. Increase this value to recude IO calls but keep
35
- # in mind that events buffered before flush can be lost in case of abrupt failure.
36
- config :flush_interval, validate: :number, default: 2
37
-
38
- # If the generated path is invalid, the events will be saved
39
- # into this file and inside the defined path.
40
- config :filename_failure, validate: :string, default: '_filepath_failures'
41
-
42
- # If the configured file is deleted, but an event is handled by the plugin,
43
- # the plugin will recreate the file. Default => true
44
- config :create_if_deleted, validate: :boolean, default: true
45
-
46
- # Dir access mode to use. Note that due to the bug in jruby system umask
47
- # is ignored on linux: https://github.com/jruby/jruby/issues/3426
48
- # Setting it to -1 uses default OS value.
49
- # Example: `"dir_mode" => 0750`
50
- config :dir_mode, validate: :number, default: -1
51
-
52
- # File access mode to use. Note that due to the bug in jruby system umask
53
- # is ignored on linux: https://github.com/jruby/jruby/issues/3426
54
- # Setting it to -1 uses default OS value.
55
- # Example: `"file_mode" => 0640`
56
- config :file_mode, validate: :number, default: -1
57
-
58
- # TODO: fix the interval type...
59
- config :stale_cleanup_interval, validate: :number, default: 10
60
- config :stale_cleanup_type, validate: %w[events interval], default: 'events'
61
-
62
- # Should the plugin recover from failure?
63
- #
64
- # If `true`, the plugin will look for temp files from past runs within the
65
- # path (before any dynamic pattern is added) and try to process them
66
- #
67
- # If `false`, the plugin will disregard temp files found
68
- config :recovery, validate: :boolean, default: true
69
-
70
-
71
- # The Kusto endpoint for ingestion related communication. You can see it on the Azure Portal.
72
- config :ingest_url, validate: :string, required: true
73
-
74
- # The following are the credentails used to connect to the Kusto service
75
- # application id
76
- config :app_id, validate: :string, required: true
77
- # application key (secret)
78
- config :app_key, validate: :password, required: true
79
- # aad tenant id
80
- config :app_tenant, validate: :string, default: nil
81
-
82
- # The following are the data settings that impact where events are written to
83
- # Database name
84
- config :database, validate: :string, required: true
85
- # Target table name
86
- config :table, validate: :string, required: true
87
- # Mapping name - Used by Kusto to map each attribute from incoming event JSON strings to the appropriate column in the table.
88
- # Note that this must be in JSON format, as this is the interface between Logstash and Kusto
89
- config :json_mapping, validate: :string, required: true
90
-
91
- # Mapping name - deprecated, use json_mapping
92
- config :mapping, validate: :string, deprecated: true
93
-
94
-
95
- # Determines if local files used for temporary storage will be deleted
96
- # after upload is successful
97
- config :delete_temp_files, validate: :boolean, default: true
98
-
99
- # TODO: will be used to route events to many tables according to event properties
100
- config :dynamic_event_routing, validate: :boolean, default: false
101
-
102
- # Specify how many files can be uploaded concurrently
103
- config :upload_concurrent_count, validate: :number, default: 3
104
-
105
- # Specify how many files can be kept in the upload queue before the main process
106
- # starts processing them in the main thread (not healthy)
107
- config :upload_queue_size, validate: :number, default: 30
108
-
109
- # Host of the proxy , is an optional field. Can connect directly
110
- config :proxy_host, validate: :string, required: false
111
-
112
- # Port where the proxy runs , defaults to 80. Usually a value like 3128
113
- config :proxy_port, validate: :number, required: false , default: 80
114
-
115
- # Check Proxy URL can be over http or https. Dowe need it this way or ignore this & remove this
116
- config :proxy_protocol, validate: :string, required: false , default: 'http'
117
-
118
- default :codec, 'json_lines'
119
-
120
- def register
121
- require 'fileutils' # For mkdir_p
122
-
123
- @files = {}
124
- @io_mutex = Mutex.new
125
-
126
- final_mapping = json_mapping
127
- if final_mapping.empty?
128
- final_mapping = mapping
129
- end
130
-
131
- # TODO: add id to the tmp path to support multiple outputs of the same type
132
- # add fields from the meta that will note the destination of the events in the file
133
- @path = if dynamic_event_routing
134
- File.expand_path("#{path}.%{[@metadata][database]}.%{[@metadata][table]}.%{[@metadata][final_mapping]}")
135
- else
136
- File.expand_path("#{path}.#{database}.#{table}")
137
- end
138
-
139
- validate_path
140
-
141
- @file_root = if path_with_field_ref?
142
- extract_file_root
143
- else
144
- File.dirname(path)
145
- end
146
- @failure_path = File.join(@file_root, @filename_failure)
147
-
148
- executor = Concurrent::ThreadPoolExecutor.new(min_threads: 1,
149
- max_threads: upload_concurrent_count,
150
- max_queue: upload_queue_size,
151
- fallback_policy: :caller_runs)
152
-
153
- @ingestor = Ingestor.new(ingest_url, app_id, app_key, app_tenant, database, table, final_mapping, delete_temp_files, proxy_host, proxy_port,proxy_protocol, @logger, executor)
154
-
155
- # send existing files
156
- recover_past_files if recovery
157
-
158
- @last_stale_cleanup_cycle = Time.now
159
-
160
- @flush_interval = @flush_interval.to_i
161
- if @flush_interval > 0
162
- @flusher = Interval.start(@flush_interval, -> { flush_pending_files })
163
- end
164
-
165
- if (@stale_cleanup_type == 'interval') && (@stale_cleanup_interval > 0)
166
- @cleaner = Interval.start(stale_cleanup_interval, -> { close_stale_files })
167
- end
168
- end
169
-
170
- private
171
- def validate_path
172
- if (root_directory =~ FIELD_REF) != nil
173
- @logger.error('The starting part of the path should not be dynamic.', path: @path)
174
- raise LogStash::ConfigurationError.new('The starting part of the path should not be dynamic.')
175
- end
176
-
177
- if !path_with_field_ref?
178
- @logger.error('Path should include some time related fields to allow for file rotation.', path: @path)
179
- raise LogStash::ConfigurationError.new('Path should include some time related fields to allow for file rotation.')
180
- end
181
- end
182
-
183
- private
184
- def root_directory
185
- parts = @path.split(File::SEPARATOR).reject(&:empty?)
186
- if Gem.win_platform?
187
- # First part is the drive letter
188
- parts[1]
189
- else
190
- parts.first
191
- end
192
- end
193
-
194
- public
195
- def multi_receive_encoded(events_and_encoded)
196
- encoded_by_path = Hash.new { |h, k| h[k] = [] }
197
-
198
- events_and_encoded.each do |event, encoded|
199
- file_output_path = event_path(event)
200
- encoded_by_path[file_output_path] << encoded
201
- end
202
-
203
- @io_mutex.synchronize do
204
- encoded_by_path.each do |path, chunks|
205
- fd = open(path)
206
- # append to the file
207
- chunks.each { |chunk| fd.write(chunk) }
208
- fd.flush unless @flusher && @flusher.alive?
209
- end
210
-
211
- close_stale_files if @stale_cleanup_type == 'events'
212
- end
213
- end
214
-
215
- def close
216
- @flusher.stop unless @flusher.nil?
217
- @cleaner.stop unless @cleaner.nil?
218
- @io_mutex.synchronize do
219
- @logger.debug('Close: closing files')
220
-
221
- @files.each do |path, fd|
222
- begin
223
- fd.close
224
- @logger.debug("Closed file #{path}", fd: fd)
225
-
226
- kusto_send_file(path)
227
- rescue Exception => e
228
- @logger.error('Exception while flushing and closing files.', exception: e)
229
- end
230
- end
231
- end
232
-
233
- @ingestor.stop unless @ingestor.nil?
234
- end
235
-
236
- private
237
- def inside_file_root?(log_path)
238
- target_file = File.expand_path(log_path)
239
- return target_file.start_with?("#{@file_root}/")
240
- end
241
-
242
- private
243
- def event_path(event)
244
- file_output_path = generate_filepath(event)
245
- if path_with_field_ref? && !inside_file_root?(file_output_path)
246
- @logger.warn('The event tried to write outside the files root, writing the event to the failure file', event: event, filename: @failure_path)
247
- file_output_path = @failure_path
248
- elsif !@create_if_deleted && deleted?(file_output_path)
249
- file_output_path = @failure_path
250
- end
251
- @logger.debug('Writing event to tmp file.', filename: file_output_path)
252
-
253
- file_output_path
254
- end
255
-
256
- private
257
- def generate_filepath(event)
258
- event.sprintf(@path)
259
- end
260
-
261
- private
262
- def path_with_field_ref?
263
- path =~ FIELD_REF
264
- end
265
-
266
- private
267
- def extract_file_root
268
- parts = File.expand_path(path).split(File::SEPARATOR)
269
- parts.take_while { |part| part !~ FIELD_REF }.join(File::SEPARATOR)
270
- end
271
-
272
- # the back-bone of @flusher, our periodic-flushing interval.
273
- private
274
- def flush_pending_files
275
- @io_mutex.synchronize do
276
- @logger.debug('Starting flush cycle')
277
-
278
- @files.each do |path, fd|
279
- @logger.debug('Flushing file', path: path, fd: fd)
280
- fd.flush
281
- end
282
- end
283
- rescue Exception => e
284
- # squash exceptions caught while flushing after logging them
285
- @logger.error('Exception flushing files', exception: e.message, backtrace: e.backtrace)
286
- end
287
-
288
- # every 10 seconds or so (triggered by events, but if there are no events there's no point closing files anyway)
289
- private
290
- def close_stale_files
291
- now = Time.now
292
- return unless now - @last_stale_cleanup_cycle >= @stale_cleanup_interval
293
-
294
- @logger.debug('Starting stale files cleanup cycle', files: @files)
295
- inactive_files = @files.select { |path, fd| not fd.active }
296
- @logger.debug("#{inactive_files.count} stale files found", inactive_files: inactive_files)
297
- inactive_files.each do |path, fd|
298
- @logger.info("Closing file #{path}")
299
- fd.close
300
- @files.delete(path)
301
-
302
- kusto_send_file(path)
303
- end
304
- # mark all files as inactive, a call to write will mark them as active again
305
- @files.each { |path, fd| fd.active = false }
306
- @last_stale_cleanup_cycle = now
307
- end
308
-
309
- private
310
- def cached?(path)
311
- @files.include?(path) && !@files[path].nil?
312
- end
313
-
314
- private
315
- def deleted?(path)
316
- !File.exist?(path)
317
- end
318
-
319
- private
320
- def open(path)
321
- return @files[path] if !deleted?(path) && cached?(path)
322
-
323
- if deleted?(path)
324
- if @create_if_deleted
325
- @logger.debug('Required file does not exist, creating it.', path: path)
326
- @files.delete(path)
327
- else
328
- return @files[path] if cached?(path)
329
- end
330
- end
331
-
332
- @logger.info('Opening file', path: path)
333
-
334
- dir = File.dirname(path)
335
- if !Dir.exist?(dir)
336
- @logger.info('Creating directory', directory: dir)
337
- if @dir_mode != -1
338
- FileUtils.mkdir_p(dir, mode: @dir_mode)
339
- else
340
- FileUtils.mkdir_p(dir)
341
- end
342
- end
343
-
344
- # work around a bug opening fifos (bug JRUBY-6280)
345
- stat = begin
346
- File.stat(path)
347
- rescue
348
- nil
349
- end
350
- fd = if stat && stat.ftype == 'fifo' && LogStash::Environment.jruby?
351
- java.io.FileWriter.new(java.io.File.new(path))
352
- elsif @file_mode != -1
353
- File.new(path, 'a+', @file_mode)
354
- else
355
- File.new(path, 'a+')
356
- end
357
- # fd = if @file_mode != -1
358
- # File.new(path, 'a+', @file_mode)
359
- # else
360
- # File.new(path, 'a+')
361
- # end
362
- # end
363
- @files[path] = IOWriter.new(fd)
364
- end
365
-
366
- private
367
- def kusto_send_file(file_path)
368
- @ingestor.upload_async(file_path, delete_temp_files)
369
- end
370
-
371
- private
372
- def recover_past_files
373
- require 'find'
374
-
375
- # we need to find the last "regular" part in the path before any dynamic vars
376
- path_last_char = @path.length - 1
377
-
378
- pattern_start = @path.index('%') || path_last_char
379
- last_folder_before_pattern = @path.rindex('/', pattern_start) || path_last_char
380
- new_path = path[0..last_folder_before_pattern]
381
-
382
- begin
383
- return unless Dir.exist?(new_path)
384
- @logger.info("Going to recover old files in path #{@new_path}")
385
-
386
- old_files = Find.find(new_path).select { |p| /.*\.#{database}\.#{table}$/ =~ p }
387
- @logger.info("Found #{old_files.length} old file(s), sending them now...")
388
-
389
- old_files.each do |file|
390
- kusto_send_file(file)
391
- end
392
- rescue Errno::ENOENT => e
393
- @logger.warn('No such file or directory', exception: e.class, message: e.message, path: new_path, backtrace: e.backtrace)
394
- end
395
- end
396
- end
397
-
398
- # wrapper class
399
- class IOWriter
400
- def initialize(io)
401
- @io = io
402
- end
403
-
404
- def write(*args)
405
- @io.write(*args)
406
- @active = true
407
- end
408
-
409
- def flush
410
- @io.flush
411
- end
412
-
413
- def method_missing(method_name, *args, &block)
414
- if @io.respond_to?(method_name)
415
-
416
- @io.send(method_name, *args, &block)
417
- else
418
- super
419
- end
420
- end
421
- attr_accessor :active
422
- end
1
+ # encoding: utf-8
2
+
3
+ require 'logstash/outputs/base'
4
+ require 'logstash/namespace'
5
+ require 'logstash/errors'
6
+
7
+ require 'logstash/outputs/kusto/ingestor'
8
+ require 'logstash/outputs/kusto/interval'
9
+
10
+ ##
11
+ # This plugin sends messages to Azure Kusto in batches.
12
+ #
13
+ class LogStash::Outputs::Kusto < LogStash::Outputs::Base
14
+ config_name 'kusto'
15
+ concurrency :shared
16
+
17
+ FIELD_REF = /%\{[^}]+\}/
18
+
19
+ attr_reader :failure_path
20
+
21
+ # The path to the file to write. Event fields can be used here,
22
+ # like `/var/log/logstash/%{host}/%{application}`
23
+ # One may also utilize the path option for date-based log
24
+ # rotation via the joda time format. This will use the event
25
+ # timestamp.
26
+ # E.g.: `path => "./test-%{+YYYY-MM-dd}.txt"` to create
27
+ # `./test-2013-05-29.txt`
28
+ #
29
+ # If you use an absolute path you cannot start with a dynamic string.
30
+ # E.g: `/%{myfield}/`, `/test-%{myfield}/` are not valid paths
31
+ config :path, validate: :string, required: true
32
+
33
+ # Flush interval (in seconds) for flushing writes to files.
34
+ # 0 will flush on every message. Increase this value to recude IO calls but keep
35
+ # in mind that events buffered before flush can be lost in case of abrupt failure.
36
+ config :flush_interval, validate: :number, default: 2
37
+
38
+ # If the generated path is invalid, the events will be saved
39
+ # into this file and inside the defined path.
40
+ config :filename_failure, validate: :string, default: '_filepath_failures'
41
+
42
+ # If the configured file is deleted, but an event is handled by the plugin,
43
+ # the plugin will recreate the file. Default => true
44
+ config :create_if_deleted, validate: :boolean, default: true
45
+
46
+ # Dir access mode to use. Note that due to the bug in jruby system umask
47
+ # is ignored on linux: https://github.com/jruby/jruby/issues/3426
48
+ # Setting it to -1 uses default OS value.
49
+ # Example: `"dir_mode" => 0750`
50
+ config :dir_mode, validate: :number, default: -1
51
+
52
+ # File access mode to use. Note that due to the bug in jruby system umask
53
+ # is ignored on linux: https://github.com/jruby/jruby/issues/3426
54
+ # Setting it to -1 uses default OS value.
55
+ # Example: `"file_mode" => 0640`
56
+ config :file_mode, validate: :number, default: -1
57
+
58
+ # TODO: fix the interval type...
59
+ config :stale_cleanup_interval, validate: :number, default: 10
60
+ config :stale_cleanup_type, validate: %w[events interval], default: 'events'
61
+
62
+ # Should the plugin recover from failure?
63
+ #
64
+ # If `true`, the plugin will look for temp files from past runs within the
65
+ # path (before any dynamic pattern is added) and try to process them
66
+ #
67
+ # If `false`, the plugin will disregard temp files found
68
+ config :recovery, validate: :boolean, default: true
69
+
70
+
71
+ # The Kusto endpoint for ingestion related communication. You can see it on the Azure Portal.
72
+ config :ingest_url, validate: :string, required: true
73
+
74
+ # The following are the credentails used to connect to the Kusto service
75
+ # application id
76
+ config :app_id, validate: :string, required: true
77
+ # application key (secret)
78
+ config :app_key, validate: :password, required: true
79
+ # aad tenant id
80
+ config :app_tenant, validate: :string, default: nil
81
+
82
+ # The following are the data settings that impact where events are written to
83
+ # Database name
84
+ config :database, validate: :string, required: true
85
+ # Target table name
86
+ config :table, validate: :string, required: true
87
+ # Mapping name - Used by Kusto to map each attribute from incoming event JSON strings to the appropriate column in the table.
88
+ # Note that this must be in JSON format, as this is the interface between Logstash and Kusto
89
+ config :json_mapping, validate: :string, required: true
90
+
91
+ # Mapping name - deprecated, use json_mapping
92
+ config :mapping, validate: :string, deprecated: true
93
+
94
+
95
+ # Determines if local files used for temporary storage will be deleted
96
+ # after upload is successful
97
+ config :delete_temp_files, validate: :boolean, default: true
98
+
99
+ # TODO: will be used to route events to many tables according to event properties
100
+ config :dynamic_event_routing, validate: :boolean, default: false
101
+
102
+ # Specify how many files can be uploaded concurrently
103
+ config :upload_concurrent_count, validate: :number, default: 3
104
+
105
+ # Specify how many files can be kept in the upload queue before the main process
106
+ # starts processing them in the main thread (not healthy)
107
+ config :upload_queue_size, validate: :number, default: 30
108
+
109
+ # Host of the proxy , is an optional field. Can connect directly
110
+ config :proxy_host, validate: :string, required: false
111
+
112
+ # Port where the proxy runs , defaults to 80. Usually a value like 3128
113
+ config :proxy_port, validate: :number, required: false , default: 80
114
+
115
+ # Check Proxy URL can be over http or https. Dowe need it this way or ignore this & remove this
116
+ config :proxy_protocol, validate: :string, required: false , default: 'http'
117
+
118
+ default :codec, 'json_lines'
119
+
120
+ def register
121
+ require 'fileutils' # For mkdir_p
122
+
123
+ @files = {}
124
+ @io_mutex = Mutex.new
125
+
126
+ final_mapping = json_mapping
127
+ if final_mapping.empty?
128
+ final_mapping = mapping
129
+ end
130
+
131
+ # TODO: add id to the tmp path to support multiple outputs of the same type
132
+ # add fields from the meta that will note the destination of the events in the file
133
+ @path = if dynamic_event_routing
134
+ File.expand_path("#{path}.%{[@metadata][database]}.%{[@metadata][table]}.%{[@metadata][final_mapping]}")
135
+ else
136
+ File.expand_path("#{path}.#{database}.#{table}")
137
+ end
138
+
139
+ validate_path
140
+
141
+ @file_root = if path_with_field_ref?
142
+ extract_file_root
143
+ else
144
+ File.dirname(path)
145
+ end
146
+ @failure_path = File.join(@file_root, @filename_failure)
147
+
148
+ executor = Concurrent::ThreadPoolExecutor.new(min_threads: 1,
149
+ max_threads: upload_concurrent_count,
150
+ max_queue: upload_queue_size,
151
+ fallback_policy: :caller_runs)
152
+
153
+ @ingestor = Ingestor.new(ingest_url, app_id, app_key, app_tenant, database, table, final_mapping, delete_temp_files, proxy_host, proxy_port,proxy_protocol, @logger, executor)
154
+
155
+ # send existing files
156
+ recover_past_files if recovery
157
+
158
+ @last_stale_cleanup_cycle = Time.now
159
+
160
+ @flush_interval = @flush_interval.to_i
161
+ if @flush_interval > 0
162
+ @flusher = Interval.start(@flush_interval, -> { flush_pending_files })
163
+ end
164
+
165
+ if (@stale_cleanup_type == 'interval') && (@stale_cleanup_interval > 0)
166
+ @cleaner = Interval.start(stale_cleanup_interval, -> { close_stale_files })
167
+ end
168
+ end
169
+
170
+ private
171
+ def validate_path
172
+ if (root_directory =~ FIELD_REF) != nil
173
+ @logger.error('The starting part of the path should not be dynamic.', path: @path)
174
+ raise LogStash::ConfigurationError.new('The starting part of the path should not be dynamic.')
175
+ end
176
+
177
+ if !path_with_field_ref?
178
+ @logger.error('Path should include some time related fields to allow for file rotation.', path: @path)
179
+ raise LogStash::ConfigurationError.new('Path should include some time related fields to allow for file rotation.')
180
+ end
181
+ end
182
+
183
+ private
184
+ def root_directory
185
+ parts = @path.split(File::SEPARATOR).reject(&:empty?)
186
+ if Gem.win_platform?
187
+ # First part is the drive letter
188
+ parts[1]
189
+ else
190
+ parts.first
191
+ end
192
+ end
193
+
194
+ public
195
+ def multi_receive_encoded(events_and_encoded)
196
+ encoded_by_path = Hash.new { |h, k| h[k] = [] }
197
+
198
+ events_and_encoded.each do |event, encoded|
199
+ file_output_path = event_path(event)
200
+ encoded_by_path[file_output_path] << encoded
201
+ end
202
+
203
+ @io_mutex.synchronize do
204
+ encoded_by_path.each do |path, chunks|
205
+ fd = open(path)
206
+ # append to the file
207
+ chunks.each { |chunk| fd.write(chunk) }
208
+ fd.flush unless @flusher && @flusher.alive?
209
+ end
210
+
211
+ close_stale_files if @stale_cleanup_type == 'events'
212
+ end
213
+ end
214
+
215
+ def close
216
+ @flusher.stop unless @flusher.nil?
217
+ @cleaner.stop unless @cleaner.nil?
218
+ @io_mutex.synchronize do
219
+ @logger.debug('Close: closing files')
220
+
221
+ @files.each do |path, fd|
222
+ begin
223
+ fd.close
224
+ @logger.debug("Closed file #{path}", fd: fd)
225
+
226
+ kusto_send_file(path)
227
+ rescue Exception => e
228
+ @logger.error('Exception while flushing and closing files.', exception: e)
229
+ end
230
+ end
231
+ end
232
+
233
+ @ingestor.stop unless @ingestor.nil?
234
+ end
235
+
236
+ private
237
+ def inside_file_root?(log_path)
238
+ target_file = File.expand_path(log_path)
239
+ return target_file.start_with?("#{@file_root}/")
240
+ end
241
+
242
+ private
243
+ def event_path(event)
244
+ file_output_path = generate_filepath(event)
245
+ if path_with_field_ref? && !inside_file_root?(file_output_path)
246
+ @logger.warn('The event tried to write outside the files root, writing the event to the failure file', event: event, filename: @failure_path)
247
+ file_output_path = @failure_path
248
+ elsif !@create_if_deleted && deleted?(file_output_path)
249
+ file_output_path = @failure_path
250
+ end
251
+ @logger.debug('Writing event to tmp file.', filename: file_output_path)
252
+
253
+ file_output_path
254
+ end
255
+
256
+ private
257
+ def generate_filepath(event)
258
+ event.sprintf(@path)
259
+ end
260
+
261
+ private
262
+ def path_with_field_ref?
263
+ path =~ FIELD_REF
264
+ end
265
+
266
+ private
267
+ def extract_file_root
268
+ parts = File.expand_path(path).split(File::SEPARATOR)
269
+ parts.take_while { |part| part !~ FIELD_REF }.join(File::SEPARATOR)
270
+ end
271
+
272
+ # the back-bone of @flusher, our periodic-flushing interval.
273
+ private
274
+ def flush_pending_files
275
+ @io_mutex.synchronize do
276
+ @logger.debug('Starting flush cycle')
277
+
278
+ @files.each do |path, fd|
279
+ @logger.debug('Flushing file', path: path, fd: fd)
280
+ fd.flush
281
+ end
282
+ end
283
+ rescue Exception => e
284
+ # squash exceptions caught while flushing after logging them
285
+ @logger.error('Exception flushing files', exception: e.message, backtrace: e.backtrace)
286
+ end
287
+
288
+ # every 10 seconds or so (triggered by events, but if there are no events there's no point closing files anyway)
289
+ private
290
+ def close_stale_files
291
+ now = Time.now
292
+ return unless now - @last_stale_cleanup_cycle >= @stale_cleanup_interval
293
+
294
+ @logger.debug('Starting stale files cleanup cycle', files: @files)
295
+ inactive_files = @files.select { |path, fd| not fd.active }
296
+ @logger.debug("#{inactive_files.count} stale files found", inactive_files: inactive_files)
297
+ inactive_files.each do |path, fd|
298
+ @logger.info("Closing file #{path}")
299
+ fd.close
300
+ @files.delete(path)
301
+
302
+ kusto_send_file(path)
303
+ end
304
+ # mark all files as inactive, a call to write will mark them as active again
305
+ @files.each { |path, fd| fd.active = false }
306
+ @last_stale_cleanup_cycle = now
307
+ end
308
+
309
+ private
310
+ def cached?(path)
311
+ @files.include?(path) && !@files[path].nil?
312
+ end
313
+
314
+ private
315
+ def deleted?(path)
316
+ !File.exist?(path)
317
+ end
318
+
319
+ private
320
+ def open(path)
321
+ return @files[path] if !deleted?(path) && cached?(path)
322
+
323
+ if deleted?(path)
324
+ if @create_if_deleted
325
+ @logger.debug('Required file does not exist, creating it.', path: path)
326
+ @files.delete(path)
327
+ else
328
+ return @files[path] if cached?(path)
329
+ end
330
+ end
331
+
332
+ @logger.info('Opening file', path: path)
333
+
334
+ dir = File.dirname(path)
335
+ if !Dir.exist?(dir)
336
+ @logger.info('Creating directory', directory: dir)
337
+ if @dir_mode != -1
338
+ FileUtils.mkdir_p(dir, mode: @dir_mode)
339
+ else
340
+ FileUtils.mkdir_p(dir)
341
+ end
342
+ end
343
+
344
+ # work around a bug opening fifos (bug JRUBY-6280)
345
+ stat = begin
346
+ File.stat(path)
347
+ rescue
348
+ nil
349
+ end
350
+ fd = if stat && stat.ftype == 'fifo' && LogStash::Environment.jruby?
351
+ java.io.FileWriter.new(java.io.File.new(path))
352
+ elsif @file_mode != -1
353
+ File.new(path, 'a+', @file_mode)
354
+ else
355
+ File.new(path, 'a+')
356
+ end
357
+ # fd = if @file_mode != -1
358
+ # File.new(path, 'a+', @file_mode)
359
+ # else
360
+ # File.new(path, 'a+')
361
+ # end
362
+ # end
363
+ @files[path] = IOWriter.new(fd)
364
+ end
365
+
366
+ private
367
+ def kusto_send_file(file_path)
368
+ @ingestor.upload_async(file_path, delete_temp_files)
369
+ end
370
+
371
+ private
372
+ def recover_past_files
373
+ require 'find'
374
+
375
+ # we need to find the last "regular" part in the path before any dynamic vars
376
+ path_last_char = @path.length - 1
377
+
378
+ pattern_start = @path.index('%') || path_last_char
379
+ last_folder_before_pattern = @path.rindex('/', pattern_start) || path_last_char
380
+ new_path = path[0..last_folder_before_pattern]
381
+
382
+ begin
383
+ return unless Dir.exist?(new_path)
384
+ @logger.info("Going to recover old files in path #{@new_path}")
385
+
386
+ old_files = Find.find(new_path).select { |p| /.*\.#{database}\.#{table}$/ =~ p }
387
+ @logger.info("Found #{old_files.length} old file(s), sending them now...")
388
+
389
+ old_files.each do |file|
390
+ kusto_send_file(file)
391
+ end
392
+ rescue Errno::ENOENT => e
393
+ @logger.warn('No such file or directory', exception: e.class, message: e.message, path: new_path, backtrace: e.backtrace)
394
+ end
395
+ end
396
+ end
397
+
398
+ # wrapper class
399
+ class IOWriter
400
+ def initialize(io)
401
+ @io = io
402
+ end
403
+
404
+ def write(*args)
405
+ @io.write(*args)
406
+ @active = true
407
+ end
408
+
409
+ def flush
410
+ @io.flush
411
+ end
412
+
413
+ def method_missing(method_name, *args, &block)
414
+ if @io.respond_to?(method_name)
415
+
416
+ @io.send(method_name, *args, &block)
417
+ else
418
+ super
419
+ end
420
+ end
421
+ attr_accessor :active
422
+ end