logstash-output-application_insights 0.1.6 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (26) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +10 -2
  3. data/lib/logstash/outputs/application_insights.rb +13 -5
  4. data/lib/logstash/outputs/application_insights/blob.rb +27 -381
  5. data/lib/logstash/outputs/application_insights/block.rb +28 -21
  6. data/lib/logstash/outputs/application_insights/channel.rb +143 -48
  7. data/lib/logstash/outputs/application_insights/channels.rb +4 -3
  8. data/lib/logstash/outputs/application_insights/clients.rb +1 -1
  9. data/lib/logstash/outputs/application_insights/config.rb +3 -2
  10. data/lib/logstash/outputs/application_insights/constants.rb +9 -5
  11. data/lib/logstash/outputs/application_insights/context.rb +97 -0
  12. data/lib/logstash/outputs/application_insights/local_file.rb +113 -0
  13. data/lib/logstash/outputs/application_insights/notification.rb +116 -0
  14. data/lib/logstash/outputs/application_insights/notification_recovery.rb +5 -6
  15. data/lib/logstash/outputs/application_insights/shutdown_recovery.rb +3 -2
  16. data/lib/logstash/outputs/application_insights/state_table.rb +108 -0
  17. data/lib/logstash/outputs/application_insights/storage_cleanup.rb +4 -3
  18. data/lib/logstash/outputs/application_insights/storage_recovery.rb +10 -3
  19. data/lib/logstash/outputs/application_insights/test_notification.rb +3 -6
  20. data/lib/logstash/outputs/application_insights/test_storage.rb +1 -1
  21. data/lib/logstash/outputs/application_insights/upload_pipe.rb +285 -0
  22. data/lib/logstash/outputs/application_insights/validate_notification.rb +1 -1
  23. data/lib/logstash/outputs/application_insights/validate_storage.rb +1 -1
  24. data/lib/logstash/outputs/application_insights/version.rb +1 -1
  25. data/logstash-output-application-insights.gemspec +1 -1
  26. metadata +9 -4
@@ -22,13 +22,13 @@
22
22
  class LogStash::Outputs::Application_insights
23
23
  class Block
24
24
 
25
- attr_reader :bytes
26
- attr_reader :buffer
27
- attr_reader :bytesize
28
- attr_reader :events_count
29
- attr_reader :block_numbers
30
- attr_reader :done_time
31
- attr_reader :oldest_event_time
25
+ attr_accessor :bytes
26
+ attr_accessor :buffer
27
+ attr_accessor :bytesize
28
+ attr_accessor :events_count
29
+ attr_accessor :block_numbers
30
+ attr_accessor :done_time
31
+ attr_accessor :oldest_event_time
32
32
 
33
33
 
34
34
  public
@@ -42,20 +42,17 @@ class LogStash::Outputs::Application_insights
42
42
 
43
43
 
44
44
 
45
- def initialize ( event_separator )
46
- @buffer = [ ]
47
- @bytesize = 0
48
- @events_count = 0
45
+ def initialize ( event_separator = "" )
46
+ dispose
49
47
  @event_separator = event_separator
50
48
  @event_separator_bytesize = @event_separator.bytesize
51
- @block_numbers = nil
52
49
  end
53
50
 
54
51
  # concatenate two blocks into one
55
52
  def concat ( other )
56
53
  if @bytesize + other.bytesize <= BLOB_BLOCK_MAX_BYTESIZE
57
54
  if @block_numbers
58
- @block_numbers.concat( other.block_numbers ) if @block_numbers
55
+ @block_numbers.concat( other.block_numbers )
59
56
  @bytes += other.bytes
60
57
  @done_time = other.done_time if other.done_time > @done_time
61
58
  else
@@ -84,20 +81,30 @@ class LogStash::Outputs::Application_insights
84
81
 
85
82
  def dispose
86
83
  @bytes = nil
87
- @buffer = nil
88
- @bytesize = nil
89
- @events_count = nil
84
+ @buffer = [ ]
85
+ @bytesize = 0
86
+ @events_count = 0
90
87
  @done_time = nil
91
88
  @oldest_event_time = nil
92
89
  @block_numbers = nil
93
90
  end
94
91
 
92
+
93
+ def partial_seal
94
+ if @done_time.nil?
95
+ @done_time = Time.now.utc
96
+ @buffer << "" # required to add eol after last event
97
+ @bytes = @buffer.join( @event_separator )
98
+ @buffer = nil # release the memory of the array
99
+ end
100
+ end
101
+
102
+
95
103
  def seal
96
- @block_numbers = [ Block.generate_block_number ]
97
- @done_time = Time.now.utc
98
- @buffer << "" # required to add eol after last event
99
- @bytes = @buffer.join( @event_separator )
100
- @buffer = nil # release the memory of the array
104
+ if @done_time.nil?
105
+ @block_numbers = [ Block.generate_block_number ]
106
+ partial_seal
107
+ end
101
108
  end
102
109
 
103
110
  def is_full?
@@ -24,16 +24,20 @@ class LogStash::Outputs::Application_insights
24
24
 
25
25
  attr_reader :instrumentation_key
26
26
  attr_reader :table_id
27
- attr_reader :failed_on_upload_retry_Q
28
- attr_reader :failed_on_notify_retry_Q
29
- attr_reader :event_format_ext
30
27
  attr_reader :blob_max_delay
28
+ attr_reader :blob_extension
29
+ attr_reader :event_format
31
30
 
32
31
  public
33
32
 
34
33
  def initialize ( instrumentation_key, table_id )
35
34
  @closing = false
36
35
  configuration = Config.current
36
+
37
+ @file_pipe = !configuration[:disable_compression]
38
+ @gzip_file = !configuration[:disable_compression]
39
+ @blob_max_bytesize = configuration[:blob_max_bytesize]
40
+ @blob_max_events = configuration[:blob_max_events]
37
41
 
38
42
  @logger = configuration[:logger]
39
43
 
@@ -42,19 +46,41 @@ class LogStash::Outputs::Application_insights
42
46
  @table_id = table_id
43
47
  set_table_properties( configuration )
44
48
  @semaphore = Mutex.new
45
- @failed_on_upload_retry_Q = Queue.new
46
- @failed_on_notify_retry_Q = Queue.new
47
49
  @workers_channel = { }
48
- @active_blobs = [ Blob.new( self, 1 ) ]
49
50
 
50
- launch_upload_recovery_thread
51
+ @failed_on_notify_retry_Q = Queue.new
51
52
  launch_notify_recovery_thread
53
+
54
+ @blob_extension = ".#{@event_format}"
55
+ if file_pipe?
56
+ @blob_extension = "_#{@event_format}.gz" if gzip_file?
57
+ @add_pipe_threshold = 0
58
+ @file_prefix = configuration[:local_file_prefix]
59
+ @file = nil
60
+ @failed_on_file_upload_retry_Q = Queue.new
61
+ launch_file_upload_recovery_thread
62
+ else
63
+ @add_pipe_threshold = CHANNEL_THRESHOLD_TO_ADD_UPLOAD_PIPE
64
+ @failed_on_block_upload_retry_Q = Queue.new
65
+ launch_block_upload_recovery_thread
66
+ end
67
+
68
+ @active_upload_pipes = [ Upload_pipe.new( self, 1 ) ]
69
+ end
70
+
71
+
72
+ def gzip_file?
73
+ @gzip_file
74
+ end
75
+
76
+ def file_pipe?
77
+ @file_pipe
52
78
  end
53
79
 
54
80
  def close
55
81
  @closing = true
56
- @active_blobs.each do |blob|
57
- blob.close
82
+ @active_upload_pipes.each do |upload_pipe|
83
+ upload_pipe.close
58
84
  end
59
85
  end
60
86
 
@@ -62,29 +88,102 @@ class LogStash::Outputs::Application_insights
62
88
  @closing
63
89
  end
64
90
 
65
- def << ( event )
66
- if @serialized_event_field && event[@serialized_event_field]
67
- serialized_event = serialize_serialized_event_field( event[@serialized_event_field] )
91
+ # received data is an hash of the event (does not include metadata)
92
+ def << ( data )
93
+ if @serialized_event_field && data[@serialized_event_field]
94
+ serialized_event = serialize_serialized_event_field( data[@serialized_event_field] )
68
95
  else
69
- serialized_event = ( EXT_EVENT_FORMAT_CSV == @serialization ? serialize_to_csv( event ) : serialize_to_json( event ) )
96
+ serialized_event = ( EXT_EVENT_FORMAT_CSV == @event_format ? serialize_to_csv( data ) : serialize_to_json( data ) )
70
97
  end
71
98
 
72
99
  if serialized_event
73
100
  sub_channel = @workers_channel[Thread.current] || @semaphore.synchronize { @workers_channel[Thread.current] = Sub_channel.new( @event_separator ) }
74
101
  sub_channel << serialized_event
75
102
  else
76
- @logger.warn { "event not uploaded, no relevant data in event. table_id: #{table_id}, event: #{event}" }
103
+ @logger.warn { "event not uploaded, no relevant data in event. table_id: #{table_id}, event: #{data}" }
77
104
  end
78
105
  end
79
106
 
107
+
80
108
  def flush
81
- block_list = collect_blocks
82
- enqueue_blocks( block_list )
109
+ if file_pipe?
110
+ gz_collect_and_compress_blocks_to_file
111
+ if file_expired_or_full?
112
+ enqueue_to_pipe( [ @file ] )
113
+ @file = nil
114
+ end
115
+ else
116
+ list = collect_blocks
117
+ enqueue_to_pipe( list )
118
+ end
119
+ end
120
+
121
+
122
+ def recover_later_notification( tuple )
123
+ @failed_on_notify_retry_Q << tuple
124
+ end
125
+
126
+
127
+ def recover_later_block_upload( block_to_upload )
128
+ @failed_on_block_upload_retry_Q << block_to_upload
83
129
  end
84
130
 
131
+ def recover_later_file_upload( file_to_upload )
132
+ # start the file from the begining
133
+ file_to_upload.close_read
134
+ @failed_on_file_upload_retry_Q << file_to_upload
135
+ end
85
136
 
86
137
  private
87
138
 
139
+ def local_file_name
140
+ time_utc = Time.now.utc
141
+ strtime = Time.now.utc.strftime( "%F-%H-%M-%S-%L" )
142
+ "#{@file_prefix}_ikey-#{@instrumentation_key}_table-#{@table_id}_#{strtime}#{@blob_extension}"
143
+ end
144
+
145
+
146
+ def local_file
147
+ @file ||= Local_file.new( local_file_name, gzip_file? )
148
+ end
149
+
150
+
151
+ def file_expired_or_full?
152
+ @file && ( @file.oldest_event_time + @blob_max_delay <= Time.now.utc || @file.bytesize >= @blob_max_bytesize || @file.events_count >= @blob_max_events )
153
+ end
154
+
155
+
156
+ def gz_collect_and_compress_blocks_to_file
157
+ workers_channel = @semaphore.synchronize { @workers_channel.dup }
158
+ full_block_list = [ ]
159
+
160
+ workers_channel.each_value do |worker_channel|
161
+ full_block_list.concat( worker_channel.get_block_list! )
162
+ end
163
+
164
+ full_block_list.each do |block|
165
+ block.partial_seal
166
+ local_file << block
167
+ end
168
+ end
169
+
170
+
171
+ def launch_file_upload_recovery_thread
172
+ #recovery thread
173
+ Thread.new do
174
+ loop do
175
+ file_to_upload = @failed_on_file_upload_retry_Q.pop
176
+ until Clients.instance.storage_account_state_on? do
177
+ Stud.stoppable_sleep( 60 ) { stopped? }
178
+ end
179
+ if file_to_upload
180
+ enqueue_to_pipe( [ file_to_upload ] )
181
+ end
182
+ end
183
+ end
184
+ end
185
+
186
+
88
187
  def collect_blocks
89
188
  workers_channel = @semaphore.synchronize { @workers_channel.dup }
90
189
  full_block_list = [ ]
@@ -110,26 +209,24 @@ class LogStash::Outputs::Application_insights
110
209
  end
111
210
 
112
211
 
113
- def enqueue_blocks ( block_list )
114
- block_list.each do |block|
115
- block.seal
116
- find_blob << block
212
+ def enqueue_to_pipe ( list )
213
+ list.each do |block_or_file|
214
+ block_or_file.seal
215
+ find_upload_pipe << block_or_file
117
216
  end
118
217
  end
119
218
 
120
219
 
121
- def launch_upload_recovery_thread
220
+ def launch_block_upload_recovery_thread
122
221
  #recovery thread
123
222
  Thread.new do
124
- next_block = nil
125
223
  loop do
126
- block_to_upload = next_block || @failed_on_upload_retry_Q.pop
127
- next_block = nil
224
+ block_to_upload = @failed_on_block_upload_retry_Q.pop
128
225
  until Clients.instance.storage_account_state_on? do
129
226
  Stud.stoppable_sleep( 60 ) { stopped? }
130
227
  end
131
228
  if block_to_upload
132
- find_blob << block_to_upload
229
+ enqueue_to_pipe( [ block_to_upload ] )
133
230
  end
134
231
  end
135
232
  end
@@ -152,10 +249,10 @@ class LogStash::Outputs::Application_insights
152
249
  @shutdown ||= Shutdown.instance
153
250
  @shutdown.display_msg("!!! notification won't recover in this session due to shutdown")
154
251
  else
155
- success = Blob.new.notify( tuple )
252
+ success = Notification.new( tuple ).notify
156
253
  while success && @failed_on_notify_retry_Q.length > 0
157
254
  tuple = @failed_on_notify_retry_Q.pop
158
- success = Blob.new.notify( tuple )
255
+ success = Notification.new( tuple ).notify
159
256
  end
160
257
  end
161
258
  tuple = nil # release for GC
@@ -168,13 +265,13 @@ class LogStash::Outputs::Application_insights
168
265
  serialized_data = nil
169
266
  if data.is_a?( String )
170
267
  serialized_data = data
171
- elsif EXT_EVENT_FORMAT_CSV == @serialization
268
+ elsif EXT_EVENT_FORMAT_CSV == @event_format
172
269
  if data.is_a?( Array )
173
270
  serialized_data = data.to_csv( :col_sep => @csv_separator )
174
271
  elsif data.is_a?( Hash )
175
272
  serialized_data = serialize_to_csv( data )
176
273
  end
177
- elsif EXT_EVENT_FORMAT_JSON == @serialization
274
+ elsif EXT_EVENT_FORMAT_JSON == @event_format
178
275
  if data.is_a?( Hash )
179
276
  serialized_data = serialize_to_json( data )
180
277
  elsif data.is_a?( Array ) && !@table_columns.nil?
@@ -185,14 +282,14 @@ class LogStash::Outputs::Application_insights
185
282
  end
186
283
 
187
284
 
188
- def serialize_to_json ( event )
189
- return event.to_json unless !@table_columns.nil?
285
+ def serialize_to_json ( data )
286
+ return data.to_json unless !@table_columns.nil?
190
287
 
191
- fields = ( @case_insensitive_columns ? Utils.downcase_hash_keys( event.to_hash ) : event )
288
+ data = Utils.downcase_hash_keys( data ) if @case_insensitive_columns
192
289
 
193
290
  json_hash = { }
194
291
  @table_columns.each do |column|
195
- value = fields[column[:field_name]] || column[:default]
292
+ value = data[column[:field_name]] || column[:default]
196
293
  json_hash[column[:name]] = value if value
197
294
  end
198
295
  return nil if json_hash.empty?
@@ -200,14 +297,14 @@ class LogStash::Outputs::Application_insights
200
297
  end
201
298
 
202
299
 
203
- def serialize_to_csv ( event )
300
+ def serialize_to_csv ( data )
204
301
  return nil unless !@table_columns.nil?
205
302
 
206
- fields = ( @case_insensitive_columns ? Utils.downcase_hash_keys( event.to_hash ) : event )
303
+ data = Utils.downcase_hash_keys( data ) if @case_insensitive_columns
207
304
 
208
305
  csv_array = [ ]
209
306
  @table_columns.each do |column|
210
- value = fields[column[:field_name]] || column[:default] || @csv_default_value
307
+ value = data[column[:field_name]] || column[:default] || @csv_default_value
211
308
  type = (column[:type] || value.class.name).downcase.to_sym
212
309
  csv_array << ( [:hash, :array, :json, :dynamic, :object].include?( type ) ? value.to_json : value )
213
310
  end
@@ -216,14 +313,14 @@ class LogStash::Outputs::Application_insights
216
313
  end
217
314
 
218
315
 
219
- def find_blob
220
- min_blob = @active_blobs[0]
221
- @active_blobs.each do |blob|
222
- return blob if 0 == blob.queue_size
223
- min_blob = blob if blob.queue_size < min_blob.queue_size
316
+ def find_upload_pipe
317
+ min_upload_pipe = @active_upload_pipes[0]
318
+ @active_upload_pipes.each do |upload_pipe|
319
+ return upload_pipe unless min_upload_pipe.busy?
320
+ min_upload_pipe = upload_pipe if upload_pipe.queue_size < min_upload_pipe.queue_size
224
321
  end
225
- @active_blobs << ( min_blob = Blob.new( self, @active_blobs.length + 1 ) ) if min_blob.queue_size > 2 && @active_blobs.length < 40
226
- min_blob
322
+ @active_upload_pipes << ( min_upload_pipe = Upload_pipe.new( self, @active_upload_pipes.length + 1 ) ) if min_upload_pipe.busy? && min_upload_pipe.queue_size >= @add_pipe_threshold && @active_upload_pipes.length < MAX_CHANNEL_UPLOAD_PIPES
323
+ min_upload_pipe
227
324
  end
228
325
 
229
326
 
@@ -235,16 +332,17 @@ class LogStash::Outputs::Application_insights
235
332
  @event_separator = table_properties[:event_separator]
236
333
  @serialized_event_field = table_properties[:serialized_event_field]
237
334
  @table_columns = table_properties[:table_columns]
238
- @serialization = table_properties[:blob_serialization]
335
+ @event_format = table_properties[:blob_serialization]
239
336
  @case_insensitive_columns = table_properties[:case_insensitive_columns]
240
337
  @csv_default_value = table_properties[:csv_default_value]
241
338
  @csv_separator = table_properties[:csv_separator]
242
339
  end
340
+
243
341
  @blob_max_delay ||= configuration[:blob_max_delay]
244
342
  @event_separator ||= configuration[:event_separator]
245
343
  @serialized_event_field ||= configuration[:serialized_event_field]
246
344
  @table_columns ||= configuration[:table_columns]
247
- @serialization ||= configuration[:blob_serialization]
345
+ @event_format ||= configuration[:blob_serialization]
248
346
  @case_insensitive_columns ||= configuration[:case_insensitive_columns]
249
347
  @csv_default_value ||= configuration[:csv_default_value]
250
348
  @csv_separator ||= configuration[:csv_separator]
@@ -258,9 +356,6 @@ class LogStash::Outputs::Application_insights
258
356
  end
259
357
  end
260
358
 
261
- # in the future, when compression is introduced, the serialization may be different from the extension
262
- @event_format_ext = @serialization
263
-
264
359
  end
265
360
 
266
361
  end
@@ -52,11 +52,12 @@ class LogStash::Outputs::Application_insights
52
52
  elsif LogStash::FLUSH == event
53
53
  @logger.info { "received a LogStash::FLUSH event" }
54
54
  else
55
- table_id = event[METADATA_FIELD_TABLE_ID] || event[FIELD_TABLE_ID] || @default_table_id
56
- instrumentation_key = event[METADATA_FIELD_INSTRUMENTATION_KEY] || event[FIELD_INSTRUMENTATION_KEY] || ( @tables[table_id][:instrumentation_key] if @tables[table_id] ) || @default_instrumentation_key
55
+ data = event.to_hash
56
+ table_id = ( event.include?( METADATA_FIELD_TABLE_ID ) ? event.sprintf( "%{#{METADATA_FIELD_TABLE_ID}}" ) : data[FIELD_TABLE_ID] ) || @default_table_id
57
+ instrumentation_key = ( event.include?( METADATA_FIELD_INSTRUMENTATION_KEY ) ? event.sprintf( "%{#{METADATA_FIELD_INSTRUMENTATION_KEY}}" ) : data[FIELD_INSTRUMENTATION_KEY] ) || @default_instrumentation_key
57
58
 
58
59
  @flow_control.pass_or_wait
59
- channel( instrumentation_key, table_id ) << event
60
+ channel( instrumentation_key, table_id ) << data
60
61
  end
61
62
  end
62
63
 
@@ -81,7 +81,7 @@ class LogStash::Outputs::Application_insights
81
81
  test_storage = Test_storage.new( account_name )
82
82
  loop do
83
83
  sleep( @resurrect_delay )
84
- if test_storage.submit
84
+ if test_storage.test
85
85
  @state_semaphore.synchronize {
86
86
  storage_account = @storage_accounts[account_name]
87
87
  storage_account[:off_reason] = [ ]
@@ -213,9 +213,10 @@ class LogStash::Outputs::Application_insights
213
213
  }
214
214
  validate_and_adjust_table_properties!( configuration, configuration )
215
215
 
216
- configuration[:state_table_name] = "#{AZURE_STORAGE_TABLE_LOGSTASH_PREFIX}#{configuration[:azure_storage_table_prefix]}#{STATE_TABLE_NAME}"
217
- configuration[:test_storage_container] = "#{AZURE_STORAGE_CONTAINER_LOGSTASH_PREFIX}#{configuration[:azure_storage_container_prefix]}-#{STORAGE_TEST_CONTAINER_NAME}"
216
+ configuration[:state_table_name] = AZURE_STORAGE_TABLE_LOGSTASH_PREFIX + configuration[:azure_storage_table_prefix] + STATE_TABLE_NAME
217
+ configuration[:test_storage_container] = AZURE_STORAGE_CONTAINER_LOGSTASH_PREFIX + configuration[:azure_storage_container_prefix] + "-" + STORAGE_TEST_CONTAINER_NAME
218
218
  configuration[:partition_key_prefix] = configuration[:azure_storage_blob_prefix].gsub( "/", "" )
219
+ configuration[:local_file_prefix] = LOCAL_FS_FILE_PREFIX + configuration[:azure_storage_blob_prefix].gsub( "/", "_" )
219
220
 
220
221
  @@masked_configuration = mask_configuration( configuration )
221
222