logstash-output-kusto 1.0.5-java → 1.0.6-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,422 +1,422 @@
1
- # encoding: utf-8
2
-
3
- require 'logstash/outputs/base'
4
- require 'logstash/namespace'
5
- require 'logstash/errors'
6
-
7
- require 'logstash/outputs/kusto/ingestor'
8
- require 'logstash/outputs/kusto/interval'
9
-
10
- ##
11
- # This plugin sends messages to Azure Kusto in batches.
12
- #
13
- class LogStash::Outputs::Kusto < LogStash::Outputs::Base
14
- config_name 'kusto'
15
- concurrency :shared
16
-
17
- FIELD_REF = /%\{[^}]+\}/
18
-
19
- attr_reader :failure_path
20
-
21
- # The path to the file to write. Event fields can be used here,
22
- # like `/var/log/logstash/%{host}/%{application}`
23
- # One may also utilize the path option for date-based log
24
- # rotation via the joda time format. This will use the event
25
- # timestamp.
26
- # E.g.: `path => "./test-%{+YYYY-MM-dd}.txt"` to create
27
- # `./test-2013-05-29.txt`
28
- #
29
- # If you use an absolute path you cannot start with a dynamic string.
30
- # E.g: `/%{myfield}/`, `/test-%{myfield}/` are not valid paths
31
- config :path, validate: :string, required: true
32
-
33
- # Flush interval (in seconds) for flushing writes to files.
34
- # 0 will flush on every message. Increase this value to recude IO calls but keep
35
- # in mind that events buffered before flush can be lost in case of abrupt failure.
36
- config :flush_interval, validate: :number, default: 2
37
-
38
- # If the generated path is invalid, the events will be saved
39
- # into this file and inside the defined path.
40
- config :filename_failure, validate: :string, default: '_filepath_failures'
41
-
42
- # If the configured file is deleted, but an event is handled by the plugin,
43
- # the plugin will recreate the file. Default => true
44
- config :create_if_deleted, validate: :boolean, default: true
45
-
46
- # Dir access mode to use. Note that due to the bug in jruby system umask
47
- # is ignored on linux: https://github.com/jruby/jruby/issues/3426
48
- # Setting it to -1 uses default OS value.
49
- # Example: `"dir_mode" => 0750`
50
- config :dir_mode, validate: :number, default: -1
51
-
52
- # File access mode to use. Note that due to the bug in jruby system umask
53
- # is ignored on linux: https://github.com/jruby/jruby/issues/3426
54
- # Setting it to -1 uses default OS value.
55
- # Example: `"file_mode" => 0640`
56
- config :file_mode, validate: :number, default: -1
57
-
58
- # TODO: fix the interval type...
59
- config :stale_cleanup_interval, validate: :number, default: 10
60
- config :stale_cleanup_type, validate: %w[events interval], default: 'events'
61
-
62
- # Should the plugin recover from failure?
63
- #
64
- # If `true`, the plugin will look for temp files from past runs within the
65
- # path (before any dynamic pattern is added) and try to process them
66
- #
67
- # If `false`, the plugin will disregard temp files found
68
- config :recovery, validate: :boolean, default: true
69
-
70
-
71
- # The Kusto endpoint for ingestion related communication. You can see it on the Azure Portal.
72
- config :ingest_url, validate: :string, required: true
73
-
74
- # The following are the credentails used to connect to the Kusto service
75
- # application id
76
- config :app_id, validate: :string, required: true
77
- # application key (secret)
78
- config :app_key, validate: :password, required: true
79
- # aad tenant id
80
- config :app_tenant, validate: :string, default: nil
81
-
82
- # The following are the data settings that impact where events are written to
83
- # Database name
84
- config :database, validate: :string, required: true
85
- # Target table name
86
- config :table, validate: :string, required: true
87
- # Mapping name - Used by Kusto to map each attribute from incoming event JSON strings to the appropriate column in the table.
88
- # Note that this must be in JSON format, as this is the interface between Logstash and Kusto
89
- config :json_mapping, validate: :string, required: true
90
-
91
- # Mapping name - deprecated, use json_mapping
92
- config :mapping, validate: :string, deprecated: true
93
-
94
-
95
- # Determines if local files used for temporary storage will be deleted
96
- # after upload is successful
97
- config :delete_temp_files, validate: :boolean, default: true
98
-
99
- # TODO: will be used to route events to many tables according to event properties
100
- config :dynamic_event_routing, validate: :boolean, default: false
101
-
102
- # Specify how many files can be uploaded concurrently
103
- config :upload_concurrent_count, validate: :number, default: 3
104
-
105
- # Specify how many files can be kept in the upload queue before the main process
106
- # starts processing them in the main thread (not healthy)
107
- config :upload_queue_size, validate: :number, default: 30
108
-
109
- # Host of the proxy , is an optional field. Can connect directly
110
- config :proxy_host, validate: :string, required: false
111
-
112
- # Port where the proxy runs , defaults to 80. Usually a value like 3128
113
- config :proxy_port, validate: :number, required: false , default: 80
114
-
115
- # Check Proxy URL can be over http or https. Dowe need it this way or ignore this & remove this
116
- config :proxy_protocol, validate: :string, required: false , default: 'http'
117
-
118
- default :codec, 'json_lines'
119
-
120
- def register
121
- require 'fileutils' # For mkdir_p
122
-
123
- @files = {}
124
- @io_mutex = Mutex.new
125
-
126
- final_mapping = json_mapping
127
- if final_mapping.empty?
128
- final_mapping = mapping
129
- end
130
-
131
- # TODO: add id to the tmp path to support multiple outputs of the same type
132
- # add fields from the meta that will note the destination of the events in the file
133
- @path = if dynamic_event_routing
134
- File.expand_path("#{path}.%{[@metadata][database]}.%{[@metadata][table]}.%{[@metadata][final_mapping]}")
135
- else
136
- File.expand_path("#{path}.#{database}.#{table}")
137
- end
138
-
139
- validate_path
140
-
141
- @file_root = if path_with_field_ref?
142
- extract_file_root
143
- else
144
- File.dirname(path)
145
- end
146
- @failure_path = File.join(@file_root, @filename_failure)
147
-
148
- executor = Concurrent::ThreadPoolExecutor.new(min_threads: 1,
149
- max_threads: upload_concurrent_count,
150
- max_queue: upload_queue_size,
151
- fallback_policy: :caller_runs)
152
-
153
- @ingestor = Ingestor.new(ingest_url, app_id, app_key, app_tenant, database, table, final_mapping, delete_temp_files, proxy_host, proxy_port,proxy_protocol, @logger, executor)
154
-
155
- # send existing files
156
- recover_past_files if recovery
157
-
158
- @last_stale_cleanup_cycle = Time.now
159
-
160
- @flush_interval = @flush_interval.to_i
161
- if @flush_interval > 0
162
- @flusher = Interval.start(@flush_interval, -> { flush_pending_files })
163
- end
164
-
165
- if (@stale_cleanup_type == 'interval') && (@stale_cleanup_interval > 0)
166
- @cleaner = Interval.start(stale_cleanup_interval, -> { close_stale_files })
167
- end
168
- end
169
-
170
- private
171
- def validate_path
172
- if (root_directory =~ FIELD_REF) != nil
173
- @logger.error('The starting part of the path should not be dynamic.', path: @path)
174
- raise LogStash::ConfigurationError.new('The starting part of the path should not be dynamic.')
175
- end
176
-
177
- if !path_with_field_ref?
178
- @logger.error('Path should include some time related fields to allow for file rotation.', path: @path)
179
- raise LogStash::ConfigurationError.new('Path should include some time related fields to allow for file rotation.')
180
- end
181
- end
182
-
183
- private
184
- def root_directory
185
- parts = @path.split(File::SEPARATOR).reject(&:empty?)
186
- if Gem.win_platform?
187
- # First part is the drive letter
188
- parts[1]
189
- else
190
- parts.first
191
- end
192
- end
193
-
194
- public
195
- def multi_receive_encoded(events_and_encoded)
196
- encoded_by_path = Hash.new { |h, k| h[k] = [] }
197
-
198
- events_and_encoded.each do |event, encoded|
199
- file_output_path = event_path(event)
200
- encoded_by_path[file_output_path] << encoded
201
- end
202
-
203
- @io_mutex.synchronize do
204
- encoded_by_path.each do |path, chunks|
205
- fd = open(path)
206
- # append to the file
207
- chunks.each { |chunk| fd.write(chunk) }
208
- fd.flush unless @flusher && @flusher.alive?
209
- end
210
-
211
- close_stale_files if @stale_cleanup_type == 'events'
212
- end
213
- end
214
-
215
- def close
216
- @flusher.stop unless @flusher.nil?
217
- @cleaner.stop unless @cleaner.nil?
218
- @io_mutex.synchronize do
219
- @logger.debug('Close: closing files')
220
-
221
- @files.each do |path, fd|
222
- begin
223
- fd.close
224
- @logger.debug("Closed file #{path}", fd: fd)
225
-
226
- kusto_send_file(path)
227
- rescue Exception => e
228
- @logger.error('Exception while flushing and closing files.', exception: e)
229
- end
230
- end
231
- end
232
-
233
- @ingestor.stop unless @ingestor.nil?
234
- end
235
-
236
- private
237
- def inside_file_root?(log_path)
238
- target_file = File.expand_path(log_path)
239
- return target_file.start_with?("#{@file_root}/")
240
- end
241
-
242
- private
243
- def event_path(event)
244
- file_output_path = generate_filepath(event)
245
- if path_with_field_ref? && !inside_file_root?(file_output_path)
246
- @logger.warn('The event tried to write outside the files root, writing the event to the failure file', event: event, filename: @failure_path)
247
- file_output_path = @failure_path
248
- elsif !@create_if_deleted && deleted?(file_output_path)
249
- file_output_path = @failure_path
250
- end
251
- @logger.debug('Writing event to tmp file.', filename: file_output_path)
252
-
253
- file_output_path
254
- end
255
-
256
- private
257
- def generate_filepath(event)
258
- event.sprintf(@path)
259
- end
260
-
261
- private
262
- def path_with_field_ref?
263
- path =~ FIELD_REF
264
- end
265
-
266
- private
267
- def extract_file_root
268
- parts = File.expand_path(path).split(File::SEPARATOR)
269
- parts.take_while { |part| part !~ FIELD_REF }.join(File::SEPARATOR)
270
- end
271
-
272
- # the back-bone of @flusher, our periodic-flushing interval.
273
- private
274
- def flush_pending_files
275
- @io_mutex.synchronize do
276
- @logger.debug('Starting flush cycle')
277
-
278
- @files.each do |path, fd|
279
- @logger.debug('Flushing file', path: path, fd: fd)
280
- fd.flush
281
- end
282
- end
283
- rescue Exception => e
284
- # squash exceptions caught while flushing after logging them
285
- @logger.error('Exception flushing files', exception: e.message, backtrace: e.backtrace)
286
- end
287
-
288
- # every 10 seconds or so (triggered by events, but if there are no events there's no point closing files anyway)
289
- private
290
- def close_stale_files
291
- now = Time.now
292
- return unless now - @last_stale_cleanup_cycle >= @stale_cleanup_interval
293
-
294
- @logger.debug('Starting stale files cleanup cycle', files: @files)
295
- inactive_files = @files.select { |path, fd| not fd.active }
296
- @logger.debug("#{inactive_files.count} stale files found", inactive_files: inactive_files)
297
- inactive_files.each do |path, fd|
298
- @logger.info("Closing file #{path}")
299
- fd.close
300
- @files.delete(path)
301
-
302
- kusto_send_file(path)
303
- end
304
- # mark all files as inactive, a call to write will mark them as active again
305
- @files.each { |path, fd| fd.active = false }
306
- @last_stale_cleanup_cycle = now
307
- end
308
-
309
- private
310
- def cached?(path)
311
- @files.include?(path) && !@files[path].nil?
312
- end
313
-
314
- private
315
- def deleted?(path)
316
- !File.exist?(path)
317
- end
318
-
319
- private
320
- def open(path)
321
- return @files[path] if !deleted?(path) && cached?(path)
322
-
323
- if deleted?(path)
324
- if @create_if_deleted
325
- @logger.debug('Required file does not exist, creating it.', path: path)
326
- @files.delete(path)
327
- else
328
- return @files[path] if cached?(path)
329
- end
330
- end
331
-
332
- @logger.info('Opening file', path: path)
333
-
334
- dir = File.dirname(path)
335
- if !Dir.exist?(dir)
336
- @logger.info('Creating directory', directory: dir)
337
- if @dir_mode != -1
338
- FileUtils.mkdir_p(dir, mode: @dir_mode)
339
- else
340
- FileUtils.mkdir_p(dir)
341
- end
342
- end
343
-
344
- # work around a bug opening fifos (bug JRUBY-6280)
345
- stat = begin
346
- File.stat(path)
347
- rescue
348
- nil
349
- end
350
- fd = if stat && stat.ftype == 'fifo' && LogStash::Environment.jruby?
351
- java.io.FileWriter.new(java.io.File.new(path))
352
- elsif @file_mode != -1
353
- File.new(path, 'a+', @file_mode)
354
- else
355
- File.new(path, 'a+')
356
- end
357
- # fd = if @file_mode != -1
358
- # File.new(path, 'a+', @file_mode)
359
- # else
360
- # File.new(path, 'a+')
361
- # end
362
- # end
363
- @files[path] = IOWriter.new(fd)
364
- end
365
-
366
- private
367
- def kusto_send_file(file_path)
368
- @ingestor.upload_async(file_path, delete_temp_files)
369
- end
370
-
371
- private
372
- def recover_past_files
373
- require 'find'
374
-
375
- # we need to find the last "regular" part in the path before any dynamic vars
376
- path_last_char = @path.length - 1
377
-
378
- pattern_start = @path.index('%') || path_last_char
379
- last_folder_before_pattern = @path.rindex('/', pattern_start) || path_last_char
380
- new_path = path[0..last_folder_before_pattern]
381
-
382
- begin
383
- return unless Dir.exist?(new_path)
384
- @logger.info("Going to recover old files in path #{@new_path}")
385
-
386
- old_files = Find.find(new_path).select { |p| /.*\.#{database}\.#{table}$/ =~ p }
387
- @logger.info("Found #{old_files.length} old file(s), sending them now...")
388
-
389
- old_files.each do |file|
390
- kusto_send_file(file)
391
- end
392
- rescue Errno::ENOENT => e
393
- @logger.warn('No such file or directory', exception: e.class, message: e.message, path: new_path, backtrace: e.backtrace)
394
- end
395
- end
396
- end
397
-
398
- # wrapper class
399
- class IOWriter
400
- def initialize(io)
401
- @io = io
402
- end
403
-
404
- def write(*args)
405
- @io.write(*args)
406
- @active = true
407
- end
408
-
409
- def flush
410
- @io.flush
411
- end
412
-
413
- def method_missing(method_name, *args, &block)
414
- if @io.respond_to?(method_name)
415
-
416
- @io.send(method_name, *args, &block)
417
- else
418
- super
419
- end
420
- end
421
- attr_accessor :active
422
- end
1
+ # encoding: utf-8
2
+
3
+ require 'logstash/outputs/base'
4
+ require 'logstash/namespace'
5
+ require 'logstash/errors'
6
+
7
+ require 'logstash/outputs/kusto/ingestor'
8
+ require 'logstash/outputs/kusto/interval'
9
+
10
+ ##
11
+ # This plugin sends messages to Azure Kusto in batches.
12
+ #
13
+ class LogStash::Outputs::Kusto < LogStash::Outputs::Base
14
+ config_name 'kusto'
15
+ concurrency :shared
16
+
17
+ FIELD_REF = /%\{[^}]+\}/
18
+
19
+ attr_reader :failure_path
20
+
21
+ # The path to the file to write. Event fields can be used here,
22
+ # like `/var/log/logstash/%{host}/%{application}`
23
+ # One may also utilize the path option for date-based log
24
+ # rotation via the joda time format. This will use the event
25
+ # timestamp.
26
+ # E.g.: `path => "./test-%{+YYYY-MM-dd}.txt"` to create
27
+ # `./test-2013-05-29.txt`
28
+ #
29
+ # If you use an absolute path you cannot start with a dynamic string.
30
+ # E.g: `/%{myfield}/`, `/test-%{myfield}/` are not valid paths
31
+ config :path, validate: :string, required: true
32
+
33
+ # Flush interval (in seconds) for flushing writes to files.
34
+ # 0 will flush on every message. Increase this value to recude IO calls but keep
35
+ # in mind that events buffered before flush can be lost in case of abrupt failure.
36
+ config :flush_interval, validate: :number, default: 2
37
+
38
+ # If the generated path is invalid, the events will be saved
39
+ # into this file and inside the defined path.
40
+ config :filename_failure, validate: :string, default: '_filepath_failures'
41
+
42
+ # If the configured file is deleted, but an event is handled by the plugin,
43
+ # the plugin will recreate the file. Default => true
44
+ config :create_if_deleted, validate: :boolean, default: true
45
+
46
+ # Dir access mode to use. Note that due to the bug in jruby system umask
47
+ # is ignored on linux: https://github.com/jruby/jruby/issues/3426
48
+ # Setting it to -1 uses default OS value.
49
+ # Example: `"dir_mode" => 0750`
50
+ config :dir_mode, validate: :number, default: -1
51
+
52
+ # File access mode to use. Note that due to the bug in jruby system umask
53
+ # is ignored on linux: https://github.com/jruby/jruby/issues/3426
54
+ # Setting it to -1 uses default OS value.
55
+ # Example: `"file_mode" => 0640`
56
+ config :file_mode, validate: :number, default: -1
57
+
58
+ # TODO: fix the interval type...
59
+ config :stale_cleanup_interval, validate: :number, default: 10
60
+ config :stale_cleanup_type, validate: %w[events interval], default: 'events'
61
+
62
+ # Should the plugin recover from failure?
63
+ #
64
+ # If `true`, the plugin will look for temp files from past runs within the
65
+ # path (before any dynamic pattern is added) and try to process them
66
+ #
67
+ # If `false`, the plugin will disregard temp files found
68
+ config :recovery, validate: :boolean, default: true
69
+
70
+
71
+ # The Kusto endpoint for ingestion related communication. You can see it on the Azure Portal.
72
+ config :ingest_url, validate: :string, required: true
73
+
74
+ # The following are the credentails used to connect to the Kusto service
75
+ # application id
76
+ config :app_id, validate: :string, required: true
77
+ # application key (secret)
78
+ config :app_key, validate: :password, required: true
79
+ # aad tenant id
80
+ config :app_tenant, validate: :string, default: nil
81
+
82
+ # The following are the data settings that impact where events are written to
83
+ # Database name
84
+ config :database, validate: :string, required: true
85
+ # Target table name
86
+ config :table, validate: :string, required: true
87
+ # Mapping name - Used by Kusto to map each attribute from incoming event JSON strings to the appropriate column in the table.
88
+ # Note that this must be in JSON format, as this is the interface between Logstash and Kusto
89
+ config :json_mapping, validate: :string, required: true
90
+
91
+ # Mapping name - deprecated, use json_mapping
92
+ config :mapping, validate: :string, deprecated: true
93
+
94
+
95
+ # Determines if local files used for temporary storage will be deleted
96
+ # after upload is successful
97
+ config :delete_temp_files, validate: :boolean, default: true
98
+
99
+ # TODO: will be used to route events to many tables according to event properties
100
+ config :dynamic_event_routing, validate: :boolean, default: false
101
+
102
+ # Specify how many files can be uploaded concurrently
103
+ config :upload_concurrent_count, validate: :number, default: 3
104
+
105
+ # Specify how many files can be kept in the upload queue before the main process
106
+ # starts processing them in the main thread (not healthy)
107
+ config :upload_queue_size, validate: :number, default: 30
108
+
109
+ # Host of the proxy , is an optional field. Can connect directly
110
+ config :proxy_host, validate: :string, required: false
111
+
112
+ # Port where the proxy runs , defaults to 80. Usually a value like 3128
113
+ config :proxy_port, validate: :number, required: false , default: 80
114
+
115
+ # Check Proxy URL can be over http or https. Dowe need it this way or ignore this & remove this
116
+ config :proxy_protocol, validate: :string, required: false , default: 'http'
117
+
118
+ default :codec, 'json_lines'
119
+
120
+ def register
121
+ require 'fileutils' # For mkdir_p
122
+
123
+ @files = {}
124
+ @io_mutex = Mutex.new
125
+
126
+ final_mapping = json_mapping
127
+ if final_mapping.empty?
128
+ final_mapping = mapping
129
+ end
130
+
131
+ # TODO: add id to the tmp path to support multiple outputs of the same type
132
+ # add fields from the meta that will note the destination of the events in the file
133
+ @path = if dynamic_event_routing
134
+ File.expand_path("#{path}.%{[@metadata][database]}.%{[@metadata][table]}.%{[@metadata][final_mapping]}")
135
+ else
136
+ File.expand_path("#{path}.#{database}.#{table}")
137
+ end
138
+
139
+ validate_path
140
+
141
+ @file_root = if path_with_field_ref?
142
+ extract_file_root
143
+ else
144
+ File.dirname(path)
145
+ end
146
+ @failure_path = File.join(@file_root, @filename_failure)
147
+
148
+ executor = Concurrent::ThreadPoolExecutor.new(min_threads: 1,
149
+ max_threads: upload_concurrent_count,
150
+ max_queue: upload_queue_size,
151
+ fallback_policy: :caller_runs)
152
+
153
+ @ingestor = Ingestor.new(ingest_url, app_id, app_key, app_tenant, database, table, final_mapping, delete_temp_files, proxy_host, proxy_port,proxy_protocol, @logger, executor)
154
+
155
+ # send existing files
156
+ recover_past_files if recovery
157
+
158
+ @last_stale_cleanup_cycle = Time.now
159
+
160
+ @flush_interval = @flush_interval.to_i
161
+ if @flush_interval > 0
162
+ @flusher = Interval.start(@flush_interval, -> { flush_pending_files })
163
+ end
164
+
165
+ if (@stale_cleanup_type == 'interval') && (@stale_cleanup_interval > 0)
166
+ @cleaner = Interval.start(stale_cleanup_interval, -> { close_stale_files })
167
+ end
168
+ end
169
+
170
+ private
171
+ def validate_path
172
+ if (root_directory =~ FIELD_REF) != nil
173
+ @logger.error('The starting part of the path should not be dynamic.', path: @path)
174
+ raise LogStash::ConfigurationError.new('The starting part of the path should not be dynamic.')
175
+ end
176
+
177
+ if !path_with_field_ref?
178
+ @logger.error('Path should include some time related fields to allow for file rotation.', path: @path)
179
+ raise LogStash::ConfigurationError.new('Path should include some time related fields to allow for file rotation.')
180
+ end
181
+ end
182
+
183
+ private
184
+ def root_directory
185
+ parts = @path.split(File::SEPARATOR).reject(&:empty?)
186
+ if Gem.win_platform?
187
+ # First part is the drive letter
188
+ parts[1]
189
+ else
190
+ parts.first
191
+ end
192
+ end
193
+
194
+ public
195
+ def multi_receive_encoded(events_and_encoded)
196
+ encoded_by_path = Hash.new { |h, k| h[k] = [] }
197
+
198
+ events_and_encoded.each do |event, encoded|
199
+ file_output_path = event_path(event)
200
+ encoded_by_path[file_output_path] << encoded
201
+ end
202
+
203
+ @io_mutex.synchronize do
204
+ encoded_by_path.each do |path, chunks|
205
+ fd = open(path)
206
+ # append to the file
207
+ chunks.each { |chunk| fd.write(chunk) }
208
+ fd.flush unless @flusher && @flusher.alive?
209
+ end
210
+
211
+ close_stale_files if @stale_cleanup_type == 'events'
212
+ end
213
+ end
214
+
215
+ def close
216
+ @flusher.stop unless @flusher.nil?
217
+ @cleaner.stop unless @cleaner.nil?
218
+ @io_mutex.synchronize do
219
+ @logger.debug('Close: closing files')
220
+
221
+ @files.each do |path, fd|
222
+ begin
223
+ fd.close
224
+ @logger.debug("Closed file #{path}", fd: fd)
225
+
226
+ kusto_send_file(path)
227
+ rescue Exception => e
228
+ @logger.error('Exception while flushing and closing files.', exception: e)
229
+ end
230
+ end
231
+ end
232
+
233
+ @ingestor.stop unless @ingestor.nil?
234
+ end
235
+
236
+ private
237
+ def inside_file_root?(log_path)
238
+ target_file = File.expand_path(log_path)
239
+ return target_file.start_with?("#{@file_root}/")
240
+ end
241
+
242
+ private
243
+ def event_path(event)
244
+ file_output_path = generate_filepath(event)
245
+ if path_with_field_ref? && !inside_file_root?(file_output_path)
246
+ @logger.warn('The event tried to write outside the files root, writing the event to the failure file', event: event, filename: @failure_path)
247
+ file_output_path = @failure_path
248
+ elsif !@create_if_deleted && deleted?(file_output_path)
249
+ file_output_path = @failure_path
250
+ end
251
+ @logger.debug('Writing event to tmp file.', filename: file_output_path)
252
+
253
+ file_output_path
254
+ end
255
+
256
+ private
257
+ def generate_filepath(event)
258
+ event.sprintf(@path)
259
+ end
260
+
261
+ private
262
+ def path_with_field_ref?
263
+ path =~ FIELD_REF
264
+ end
265
+
266
+ private
267
+ def extract_file_root
268
+ parts = File.expand_path(path).split(File::SEPARATOR)
269
+ parts.take_while { |part| part !~ FIELD_REF }.join(File::SEPARATOR)
270
+ end
271
+
272
+ # the back-bone of @flusher, our periodic-flushing interval.
273
+ private
274
+ def flush_pending_files
275
+ @io_mutex.synchronize do
276
+ @logger.debug('Starting flush cycle')
277
+
278
+ @files.each do |path, fd|
279
+ @logger.debug('Flushing file', path: path, fd: fd)
280
+ fd.flush
281
+ end
282
+ end
283
+ rescue Exception => e
284
+ # squash exceptions caught while flushing after logging them
285
+ @logger.error('Exception flushing files', exception: e.message, backtrace: e.backtrace)
286
+ end
287
+
288
+ # every 10 seconds or so (triggered by events, but if there are no events there's no point closing files anyway)
289
+ private
290
+ def close_stale_files
291
+ now = Time.now
292
+ return unless now - @last_stale_cleanup_cycle >= @stale_cleanup_interval
293
+
294
+ @logger.debug('Starting stale files cleanup cycle', files: @files)
295
+ inactive_files = @files.select { |path, fd| not fd.active }
296
+ @logger.debug("#{inactive_files.count} stale files found", inactive_files: inactive_files)
297
+ inactive_files.each do |path, fd|
298
+ @logger.info("Closing file #{path}")
299
+ fd.close
300
+ @files.delete(path)
301
+
302
+ kusto_send_file(path)
303
+ end
304
+ # mark all files as inactive, a call to write will mark them as active again
305
+ @files.each { |path, fd| fd.active = false }
306
+ @last_stale_cleanup_cycle = now
307
+ end
308
+
309
+ private
310
+ def cached?(path)
311
+ @files.include?(path) && !@files[path].nil?
312
+ end
313
+
314
+ private
315
+ def deleted?(path)
316
+ !File.exist?(path)
317
+ end
318
+
319
+ private
320
+ def open(path)
321
+ return @files[path] if !deleted?(path) && cached?(path)
322
+
323
+ if deleted?(path)
324
+ if @create_if_deleted
325
+ @logger.debug('Required file does not exist, creating it.', path: path)
326
+ @files.delete(path)
327
+ else
328
+ return @files[path] if cached?(path)
329
+ end
330
+ end
331
+
332
+ @logger.info('Opening file', path: path)
333
+
334
+ dir = File.dirname(path)
335
+ if !Dir.exist?(dir)
336
+ @logger.info('Creating directory', directory: dir)
337
+ if @dir_mode != -1
338
+ FileUtils.mkdir_p(dir, mode: @dir_mode)
339
+ else
340
+ FileUtils.mkdir_p(dir)
341
+ end
342
+ end
343
+
344
+ # work around a bug opening fifos (bug JRUBY-6280)
345
+ stat = begin
346
+ File.stat(path)
347
+ rescue
348
+ nil
349
+ end
350
+ fd = if stat && stat.ftype == 'fifo' && LogStash::Environment.jruby?
351
+ java.io.FileWriter.new(java.io.File.new(path))
352
+ elsif @file_mode != -1
353
+ File.new(path, 'a+', @file_mode)
354
+ else
355
+ File.new(path, 'a+')
356
+ end
357
+ # fd = if @file_mode != -1
358
+ # File.new(path, 'a+', @file_mode)
359
+ # else
360
+ # File.new(path, 'a+')
361
+ # end
362
+ # end
363
+ @files[path] = IOWriter.new(fd)
364
+ end
365
+
366
+ private
367
+ def kusto_send_file(file_path)
368
+ @ingestor.upload_async(file_path, delete_temp_files)
369
+ end
370
+
371
+ private
372
+ def recover_past_files
373
+ require 'find'
374
+
375
+ # we need to find the last "regular" part in the path before any dynamic vars
376
+ path_last_char = @path.length - 1
377
+
378
+ pattern_start = @path.index('%') || path_last_char
379
+ last_folder_before_pattern = @path.rindex('/', pattern_start) || path_last_char
380
+ new_path = path[0..last_folder_before_pattern]
381
+
382
+ begin
383
+ return unless Dir.exist?(new_path)
384
+ @logger.info("Going to recover old files in path #{@new_path}")
385
+
386
+ old_files = Find.find(new_path).select { |p| /.*\.#{database}\.#{table}$/ =~ p }
387
+ @logger.info("Found #{old_files.length} old file(s), sending them now...")
388
+
389
+ old_files.each do |file|
390
+ kusto_send_file(file)
391
+ end
392
+ rescue Errno::ENOENT => e
393
+ @logger.warn('No such file or directory', exception: e.class, message: e.message, path: new_path, backtrace: e.backtrace)
394
+ end
395
+ end
396
+ end
397
+
398
+ # wrapper class
399
+ class IOWriter
400
+ def initialize(io)
401
+ @io = io
402
+ end
403
+
404
+ def write(*args)
405
+ @io.write(*args)
406
+ @active = true
407
+ end
408
+
409
+ def flush
410
+ @io.flush
411
+ end
412
+
413
+ def method_missing(method_name, *args, &block)
414
+ if @io.respond_to?(method_name)
415
+
416
+ @io.send(method_name, *args, &block)
417
+ else
418
+ super
419
+ end
420
+ end
421
+ attr_accessor :active
422
+ end