logstash-output-application_insights 0.1.6 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +10 -2
  3. data/lib/logstash/outputs/application_insights.rb +13 -5
  4. data/lib/logstash/outputs/application_insights/blob.rb +27 -381
  5. data/lib/logstash/outputs/application_insights/block.rb +28 -21
  6. data/lib/logstash/outputs/application_insights/channel.rb +143 -48
  7. data/lib/logstash/outputs/application_insights/channels.rb +4 -3
  8. data/lib/logstash/outputs/application_insights/clients.rb +1 -1
  9. data/lib/logstash/outputs/application_insights/config.rb +3 -2
  10. data/lib/logstash/outputs/application_insights/constants.rb +9 -5
  11. data/lib/logstash/outputs/application_insights/context.rb +97 -0
  12. data/lib/logstash/outputs/application_insights/local_file.rb +113 -0
  13. data/lib/logstash/outputs/application_insights/notification.rb +116 -0
  14. data/lib/logstash/outputs/application_insights/notification_recovery.rb +5 -6
  15. data/lib/logstash/outputs/application_insights/shutdown_recovery.rb +3 -2
  16. data/lib/logstash/outputs/application_insights/state_table.rb +108 -0
  17. data/lib/logstash/outputs/application_insights/storage_cleanup.rb +4 -3
  18. data/lib/logstash/outputs/application_insights/storage_recovery.rb +10 -3
  19. data/lib/logstash/outputs/application_insights/test_notification.rb +3 -6
  20. data/lib/logstash/outputs/application_insights/test_storage.rb +1 -1
  21. data/lib/logstash/outputs/application_insights/upload_pipe.rb +285 -0
  22. data/lib/logstash/outputs/application_insights/validate_notification.rb +1 -1
  23. data/lib/logstash/outputs/application_insights/validate_storage.rb +1 -1
  24. data/lib/logstash/outputs/application_insights/version.rb +1 -1
  25. data/logstash-output-application-insights.gemspec +1 -1
  26. metadata +9 -4
@@ -22,13 +22,13 @@
22
22
  class LogStash::Outputs::Application_insights
23
23
  class Block
24
24
 
25
- attr_reader :bytes
26
- attr_reader :buffer
27
- attr_reader :bytesize
28
- attr_reader :events_count
29
- attr_reader :block_numbers
30
- attr_reader :done_time
31
- attr_reader :oldest_event_time
25
+ attr_accessor :bytes
26
+ attr_accessor :buffer
27
+ attr_accessor :bytesize
28
+ attr_accessor :events_count
29
+ attr_accessor :block_numbers
30
+ attr_accessor :done_time
31
+ attr_accessor :oldest_event_time
32
32
 
33
33
 
34
34
  public
@@ -42,20 +42,17 @@ class LogStash::Outputs::Application_insights
42
42
 
43
43
 
44
44
 
45
- def initialize ( event_separator )
46
- @buffer = [ ]
47
- @bytesize = 0
48
- @events_count = 0
45
+ def initialize ( event_separator = "" )
46
+ dispose
49
47
  @event_separator = event_separator
50
48
  @event_separator_bytesize = @event_separator.bytesize
51
- @block_numbers = nil
52
49
  end
53
50
 
54
51
  # concatenate two blocks into one
55
52
  def concat ( other )
56
53
  if @bytesize + other.bytesize <= BLOB_BLOCK_MAX_BYTESIZE
57
54
  if @block_numbers
58
- @block_numbers.concat( other.block_numbers ) if @block_numbers
55
+ @block_numbers.concat( other.block_numbers )
59
56
  @bytes += other.bytes
60
57
  @done_time = other.done_time if other.done_time > @done_time
61
58
  else
@@ -84,20 +81,30 @@ class LogStash::Outputs::Application_insights
84
81
 
85
82
  def dispose
86
83
  @bytes = nil
87
- @buffer = nil
88
- @bytesize = nil
89
- @events_count = nil
84
+ @buffer = [ ]
85
+ @bytesize = 0
86
+ @events_count = 0
90
87
  @done_time = nil
91
88
  @oldest_event_time = nil
92
89
  @block_numbers = nil
93
90
  end
94
91
 
92
+
93
+ def partial_seal
94
+ if @done_time.nil?
95
+ @done_time = Time.now.utc
96
+ @buffer << "" # required to add eol after last event
97
+ @bytes = @buffer.join( @event_separator )
98
+ @buffer = nil # release the memory of the array
99
+ end
100
+ end
101
+
102
+
95
103
  def seal
96
- @block_numbers = [ Block.generate_block_number ]
97
- @done_time = Time.now.utc
98
- @buffer << "" # required to add eol after last event
99
- @bytes = @buffer.join( @event_separator )
100
- @buffer = nil # release the memory of the array
104
+ if @done_time.nil?
105
+ @block_numbers = [ Block.generate_block_number ]
106
+ partial_seal
107
+ end
101
108
  end
102
109
 
103
110
  def is_full?
@@ -24,16 +24,20 @@ class LogStash::Outputs::Application_insights
24
24
 
25
25
  attr_reader :instrumentation_key
26
26
  attr_reader :table_id
27
- attr_reader :failed_on_upload_retry_Q
28
- attr_reader :failed_on_notify_retry_Q
29
- attr_reader :event_format_ext
30
27
  attr_reader :blob_max_delay
28
+ attr_reader :blob_extension
29
+ attr_reader :event_format
31
30
 
32
31
  public
33
32
 
34
33
  def initialize ( instrumentation_key, table_id )
35
34
  @closing = false
36
35
  configuration = Config.current
36
+
37
+ @file_pipe = !configuration[:disable_compression]
38
+ @gzip_file = !configuration[:disable_compression]
39
+ @blob_max_bytesize = configuration[:blob_max_bytesize]
40
+ @blob_max_events = configuration[:blob_max_events]
37
41
 
38
42
  @logger = configuration[:logger]
39
43
 
@@ -42,19 +46,41 @@ class LogStash::Outputs::Application_insights
42
46
  @table_id = table_id
43
47
  set_table_properties( configuration )
44
48
  @semaphore = Mutex.new
45
- @failed_on_upload_retry_Q = Queue.new
46
- @failed_on_notify_retry_Q = Queue.new
47
49
  @workers_channel = { }
48
- @active_blobs = [ Blob.new( self, 1 ) ]
49
50
 
50
- launch_upload_recovery_thread
51
+ @failed_on_notify_retry_Q = Queue.new
51
52
  launch_notify_recovery_thread
53
+
54
+ @blob_extension = ".#{@event_format}"
55
+ if file_pipe?
56
+ @blob_extension = "_#{@event_format}.gz" if gzip_file?
57
+ @add_pipe_threshold = 0
58
+ @file_prefix = configuration[:local_file_prefix]
59
+ @file = nil
60
+ @failed_on_file_upload_retry_Q = Queue.new
61
+ launch_file_upload_recovery_thread
62
+ else
63
+ @add_pipe_threshold = CHANNEL_THRESHOLD_TO_ADD_UPLOAD_PIPE
64
+ @failed_on_block_upload_retry_Q = Queue.new
65
+ launch_block_upload_recovery_thread
66
+ end
67
+
68
+ @active_upload_pipes = [ Upload_pipe.new( self, 1 ) ]
69
+ end
70
+
71
+
72
+ def gzip_file?
73
+ @gzip_file
74
+ end
75
+
76
+ def file_pipe?
77
+ @file_pipe
52
78
  end
53
79
 
54
80
  def close
55
81
  @closing = true
56
- @active_blobs.each do |blob|
57
- blob.close
82
+ @active_upload_pipes.each do |upload_pipe|
83
+ upload_pipe.close
58
84
  end
59
85
  end
60
86
 
@@ -62,29 +88,102 @@ class LogStash::Outputs::Application_insights
62
88
  @closing
63
89
  end
64
90
 
65
- def << ( event )
66
- if @serialized_event_field && event[@serialized_event_field]
67
- serialized_event = serialize_serialized_event_field( event[@serialized_event_field] )
91
+ # received data is an hash of the event (does not include metadata)
92
+ def << ( data )
93
+ if @serialized_event_field && data[@serialized_event_field]
94
+ serialized_event = serialize_serialized_event_field( data[@serialized_event_field] )
68
95
  else
69
- serialized_event = ( EXT_EVENT_FORMAT_CSV == @serialization ? serialize_to_csv( event ) : serialize_to_json( event ) )
96
+ serialized_event = ( EXT_EVENT_FORMAT_CSV == @event_format ? serialize_to_csv( data ) : serialize_to_json( data ) )
70
97
  end
71
98
 
72
99
  if serialized_event
73
100
  sub_channel = @workers_channel[Thread.current] || @semaphore.synchronize { @workers_channel[Thread.current] = Sub_channel.new( @event_separator ) }
74
101
  sub_channel << serialized_event
75
102
  else
76
- @logger.warn { "event not uploaded, no relevant data in event. table_id: #{table_id}, event: #{event}" }
103
+ @logger.warn { "event not uploaded, no relevant data in event. table_id: #{table_id}, event: #{data}" }
77
104
  end
78
105
  end
79
106
 
107
+
80
108
  def flush
81
- block_list = collect_blocks
82
- enqueue_blocks( block_list )
109
+ if file_pipe?
110
+ gz_collect_and_compress_blocks_to_file
111
+ if file_expired_or_full?
112
+ enqueue_to_pipe( [ @file ] )
113
+ @file = nil
114
+ end
115
+ else
116
+ list = collect_blocks
117
+ enqueue_to_pipe( list )
118
+ end
119
+ end
120
+
121
+
122
+ def recover_later_notification( tuple )
123
+ @failed_on_notify_retry_Q << tuple
124
+ end
125
+
126
+
127
+ def recover_later_block_upload( block_to_upload )
128
+ @failed_on_block_upload_retry_Q << block_to_upload
83
129
  end
84
130
 
131
+ def recover_later_file_upload( file_to_upload )
132
+ # start the file from the begining
133
+ file_to_upload.close_read
134
+ @failed_on_file_upload_retry_Q << file_to_upload
135
+ end
85
136
 
86
137
  private
87
138
 
139
+ def local_file_name
140
+ time_utc = Time.now.utc
141
+ strtime = Time.now.utc.strftime( "%F-%H-%M-%S-%L" )
142
+ "#{@file_prefix}_ikey-#{@instrumentation_key}_table-#{@table_id}_#{strtime}#{@blob_extension}"
143
+ end
144
+
145
+
146
+ def local_file
147
+ @file ||= Local_file.new( local_file_name, gzip_file? )
148
+ end
149
+
150
+
151
+ def file_expired_or_full?
152
+ @file && ( @file.oldest_event_time + @blob_max_delay <= Time.now.utc || @file.bytesize >= @blob_max_bytesize || @file.events_count >= @blob_max_events )
153
+ end
154
+
155
+
156
+ def gz_collect_and_compress_blocks_to_file
157
+ workers_channel = @semaphore.synchronize { @workers_channel.dup }
158
+ full_block_list = [ ]
159
+
160
+ workers_channel.each_value do |worker_channel|
161
+ full_block_list.concat( worker_channel.get_block_list! )
162
+ end
163
+
164
+ full_block_list.each do |block|
165
+ block.partial_seal
166
+ local_file << block
167
+ end
168
+ end
169
+
170
+
171
+ def launch_file_upload_recovery_thread
172
+ #recovery thread
173
+ Thread.new do
174
+ loop do
175
+ file_to_upload = @failed_on_file_upload_retry_Q.pop
176
+ until Clients.instance.storage_account_state_on? do
177
+ Stud.stoppable_sleep( 60 ) { stopped? }
178
+ end
179
+ if file_to_upload
180
+ enqueue_to_pipe( [ file_to_upload ] )
181
+ end
182
+ end
183
+ end
184
+ end
185
+
186
+
88
187
  def collect_blocks
89
188
  workers_channel = @semaphore.synchronize { @workers_channel.dup }
90
189
  full_block_list = [ ]
@@ -110,26 +209,24 @@ class LogStash::Outputs::Application_insights
110
209
  end
111
210
 
112
211
 
113
- def enqueue_blocks ( block_list )
114
- block_list.each do |block|
115
- block.seal
116
- find_blob << block
212
+ def enqueue_to_pipe ( list )
213
+ list.each do |block_or_file|
214
+ block_or_file.seal
215
+ find_upload_pipe << block_or_file
117
216
  end
118
217
  end
119
218
 
120
219
 
121
- def launch_upload_recovery_thread
220
+ def launch_block_upload_recovery_thread
122
221
  #recovery thread
123
222
  Thread.new do
124
- next_block = nil
125
223
  loop do
126
- block_to_upload = next_block || @failed_on_upload_retry_Q.pop
127
- next_block = nil
224
+ block_to_upload = @failed_on_block_upload_retry_Q.pop
128
225
  until Clients.instance.storage_account_state_on? do
129
226
  Stud.stoppable_sleep( 60 ) { stopped? }
130
227
  end
131
228
  if block_to_upload
132
- find_blob << block_to_upload
229
+ enqueue_to_pipe( [ block_to_upload ] )
133
230
  end
134
231
  end
135
232
  end
@@ -152,10 +249,10 @@ class LogStash::Outputs::Application_insights
152
249
  @shutdown ||= Shutdown.instance
153
250
  @shutdown.display_msg("!!! notification won't recover in this session due to shutdown")
154
251
  else
155
- success = Blob.new.notify( tuple )
252
+ success = Notification.new( tuple ).notify
156
253
  while success && @failed_on_notify_retry_Q.length > 0
157
254
  tuple = @failed_on_notify_retry_Q.pop
158
- success = Blob.new.notify( tuple )
255
+ success = Notification.new( tuple ).notify
159
256
  end
160
257
  end
161
258
  tuple = nil # release for GC
@@ -168,13 +265,13 @@ class LogStash::Outputs::Application_insights
168
265
  serialized_data = nil
169
266
  if data.is_a?( String )
170
267
  serialized_data = data
171
- elsif EXT_EVENT_FORMAT_CSV == @serialization
268
+ elsif EXT_EVENT_FORMAT_CSV == @event_format
172
269
  if data.is_a?( Array )
173
270
  serialized_data = data.to_csv( :col_sep => @csv_separator )
174
271
  elsif data.is_a?( Hash )
175
272
  serialized_data = serialize_to_csv( data )
176
273
  end
177
- elsif EXT_EVENT_FORMAT_JSON == @serialization
274
+ elsif EXT_EVENT_FORMAT_JSON == @event_format
178
275
  if data.is_a?( Hash )
179
276
  serialized_data = serialize_to_json( data )
180
277
  elsif data.is_a?( Array ) && !@table_columns.nil?
@@ -185,14 +282,14 @@ class LogStash::Outputs::Application_insights
185
282
  end
186
283
 
187
284
 
188
- def serialize_to_json ( event )
189
- return event.to_json unless !@table_columns.nil?
285
+ def serialize_to_json ( data )
286
+ return data.to_json unless !@table_columns.nil?
190
287
 
191
- fields = ( @case_insensitive_columns ? Utils.downcase_hash_keys( event.to_hash ) : event )
288
+ data = Utils.downcase_hash_keys( data ) if @case_insensitive_columns
192
289
 
193
290
  json_hash = { }
194
291
  @table_columns.each do |column|
195
- value = fields[column[:field_name]] || column[:default]
292
+ value = data[column[:field_name]] || column[:default]
196
293
  json_hash[column[:name]] = value if value
197
294
  end
198
295
  return nil if json_hash.empty?
@@ -200,14 +297,14 @@ class LogStash::Outputs::Application_insights
200
297
  end
201
298
 
202
299
 
203
- def serialize_to_csv ( event )
300
+ def serialize_to_csv ( data )
204
301
  return nil unless !@table_columns.nil?
205
302
 
206
- fields = ( @case_insensitive_columns ? Utils.downcase_hash_keys( event.to_hash ) : event )
303
+ data = Utils.downcase_hash_keys( data ) if @case_insensitive_columns
207
304
 
208
305
  csv_array = [ ]
209
306
  @table_columns.each do |column|
210
- value = fields[column[:field_name]] || column[:default] || @csv_default_value
307
+ value = data[column[:field_name]] || column[:default] || @csv_default_value
211
308
  type = (column[:type] || value.class.name).downcase.to_sym
212
309
  csv_array << ( [:hash, :array, :json, :dynamic, :object].include?( type ) ? value.to_json : value )
213
310
  end
@@ -216,14 +313,14 @@ class LogStash::Outputs::Application_insights
216
313
  end
217
314
 
218
315
 
219
- def find_blob
220
- min_blob = @active_blobs[0]
221
- @active_blobs.each do |blob|
222
- return blob if 0 == blob.queue_size
223
- min_blob = blob if blob.queue_size < min_blob.queue_size
316
+ def find_upload_pipe
317
+ min_upload_pipe = @active_upload_pipes[0]
318
+ @active_upload_pipes.each do |upload_pipe|
319
+ return upload_pipe unless min_upload_pipe.busy?
320
+ min_upload_pipe = upload_pipe if upload_pipe.queue_size < min_upload_pipe.queue_size
224
321
  end
225
- @active_blobs << ( min_blob = Blob.new( self, @active_blobs.length + 1 ) ) if min_blob.queue_size > 2 && @active_blobs.length < 40
226
- min_blob
322
+ @active_upload_pipes << ( min_upload_pipe = Upload_pipe.new( self, @active_upload_pipes.length + 1 ) ) if min_upload_pipe.busy? && min_upload_pipe.queue_size >= @add_pipe_threshold && @active_upload_pipes.length < MAX_CHANNEL_UPLOAD_PIPES
323
+ min_upload_pipe
227
324
  end
228
325
 
229
326
 
@@ -235,16 +332,17 @@ class LogStash::Outputs::Application_insights
235
332
  @event_separator = table_properties[:event_separator]
236
333
  @serialized_event_field = table_properties[:serialized_event_field]
237
334
  @table_columns = table_properties[:table_columns]
238
- @serialization = table_properties[:blob_serialization]
335
+ @event_format = table_properties[:blob_serialization]
239
336
  @case_insensitive_columns = table_properties[:case_insensitive_columns]
240
337
  @csv_default_value = table_properties[:csv_default_value]
241
338
  @csv_separator = table_properties[:csv_separator]
242
339
  end
340
+
243
341
  @blob_max_delay ||= configuration[:blob_max_delay]
244
342
  @event_separator ||= configuration[:event_separator]
245
343
  @serialized_event_field ||= configuration[:serialized_event_field]
246
344
  @table_columns ||= configuration[:table_columns]
247
- @serialization ||= configuration[:blob_serialization]
345
+ @event_format ||= configuration[:blob_serialization]
248
346
  @case_insensitive_columns ||= configuration[:case_insensitive_columns]
249
347
  @csv_default_value ||= configuration[:csv_default_value]
250
348
  @csv_separator ||= configuration[:csv_separator]
@@ -258,9 +356,6 @@ class LogStash::Outputs::Application_insights
258
356
  end
259
357
  end
260
358
 
261
- # in the future, when compression is introduced, the serialization may be different from the extension
262
- @event_format_ext = @serialization
263
-
264
359
  end
265
360
 
266
361
  end
@@ -52,11 +52,12 @@ class LogStash::Outputs::Application_insights
52
52
  elsif LogStash::FLUSH == event
53
53
  @logger.info { "received a LogStash::FLUSH event" }
54
54
  else
55
- table_id = event[METADATA_FIELD_TABLE_ID] || event[FIELD_TABLE_ID] || @default_table_id
56
- instrumentation_key = event[METADATA_FIELD_INSTRUMENTATION_KEY] || event[FIELD_INSTRUMENTATION_KEY] || ( @tables[table_id][:instrumentation_key] if @tables[table_id] ) || @default_instrumentation_key
55
+ data = event.to_hash
56
+ table_id = ( event.include?( METADATA_FIELD_TABLE_ID ) ? event.sprintf( "%{#{METADATA_FIELD_TABLE_ID}}" ) : data[FIELD_TABLE_ID] ) || @default_table_id
57
+ instrumentation_key = ( event.include?( METADATA_FIELD_INSTRUMENTATION_KEY ) ? event.sprintf( "%{#{METADATA_FIELD_INSTRUMENTATION_KEY}}" ) : data[FIELD_INSTRUMENTATION_KEY] ) || @default_instrumentation_key
57
58
 
58
59
  @flow_control.pass_or_wait
59
- channel( instrumentation_key, table_id ) << event
60
+ channel( instrumentation_key, table_id ) << data
60
61
  end
61
62
  end
62
63
 
@@ -81,7 +81,7 @@ class LogStash::Outputs::Application_insights
81
81
  test_storage = Test_storage.new( account_name )
82
82
  loop do
83
83
  sleep( @resurrect_delay )
84
- if test_storage.submit
84
+ if test_storage.test
85
85
  @state_semaphore.synchronize {
86
86
  storage_account = @storage_accounts[account_name]
87
87
  storage_account[:off_reason] = [ ]
@@ -213,9 +213,10 @@ class LogStash::Outputs::Application_insights
213
213
  }
214
214
  validate_and_adjust_table_properties!( configuration, configuration )
215
215
 
216
- configuration[:state_table_name] = "#{AZURE_STORAGE_TABLE_LOGSTASH_PREFIX}#{configuration[:azure_storage_table_prefix]}#{STATE_TABLE_NAME}"
217
- configuration[:test_storage_container] = "#{AZURE_STORAGE_CONTAINER_LOGSTASH_PREFIX}#{configuration[:azure_storage_container_prefix]}-#{STORAGE_TEST_CONTAINER_NAME}"
216
+ configuration[:state_table_name] = AZURE_STORAGE_TABLE_LOGSTASH_PREFIX + configuration[:azure_storage_table_prefix] + STATE_TABLE_NAME
217
+ configuration[:test_storage_container] = AZURE_STORAGE_CONTAINER_LOGSTASH_PREFIX + configuration[:azure_storage_container_prefix] + "-" + STORAGE_TEST_CONTAINER_NAME
218
218
  configuration[:partition_key_prefix] = configuration[:azure_storage_blob_prefix].gsub( "/", "" )
219
+ configuration[:local_file_prefix] = LOCAL_FS_FILE_PREFIX + configuration[:azure_storage_blob_prefix].gsub( "/", "_" )
219
220
 
220
221
  @@masked_configuration = mask_configuration( configuration )
221
222