logstash-input-azureblob-offline 0.9.13.1-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +7 -0
- data/Gemfile +2 -0
- data/LICENSE +17 -0
- data/README.md +266 -0
- data/lib/com/microsoft/json-parser.rb +202 -0
- data/lib/logstash/inputs/azureblob.rb +525 -0
- data/logstash-input-azureblob.gemspec +32 -0
- data/spec/com/microsoft/json-parser_spec.rb +280 -0
- data/spec/inputs/azureblob_spec.rb +324 -0
- metadata +168 -0
@@ -0,0 +1,525 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/inputs/base"
|
3
|
+
require "logstash/namespace"
|
4
|
+
|
5
|
+
# Azure Storage SDK for Ruby
|
6
|
+
require "azure/storage"
|
7
|
+
require 'json' # for registry content
|
8
|
+
require "securerandom" # for generating uuid.
|
9
|
+
|
10
|
+
require "com/microsoft/json-parser"
|
11
|
+
|
12
|
+
#require Dir[ File.dirname(__FILE__) + "/../../*_jars.rb" ].first
|
13
|
+
# Registry item to coordinate between mulitple clients
|
14
|
+
class LogStash::Inputs::RegistryItem
|
15
|
+
attr_accessor :file_path, :etag, :offset, :reader, :gen
|
16
|
+
# Allow json serialization.
|
17
|
+
def as_json(options={})
|
18
|
+
{
|
19
|
+
file_path: @file_path,
|
20
|
+
etag: @etag,
|
21
|
+
reader: @reader,
|
22
|
+
offset: @offset,
|
23
|
+
gen: @gen
|
24
|
+
}
|
25
|
+
end # as_json
|
26
|
+
|
27
|
+
def to_json(*options)
|
28
|
+
as_json(*options).to_json(*options)
|
29
|
+
end # to_json
|
30
|
+
|
31
|
+
def initialize(file_path, etag, reader, offset = 0, gen = 0)
|
32
|
+
@file_path = file_path
|
33
|
+
@etag = etag
|
34
|
+
@reader = reader
|
35
|
+
@offset = offset
|
36
|
+
@gen = gen
|
37
|
+
end # initialize
|
38
|
+
end # class RegistryItem
|
39
|
+
|
40
|
+
# Logstash input plugin for Azure Blobs
|
41
|
+
#
|
42
|
+
# This logstash plugin gathers data from Microsoft Azure Blobs
|
43
|
+
class LogStash::Inputs::LogstashInputAzureblob < LogStash::Inputs::Base
|
44
|
+
config_name 'azureblob-offline'
|
45
|
+
|
46
|
+
# If undefined, Logstash will complain, even if codec is unused.
|
47
|
+
default :codec, 'json_lines'
|
48
|
+
|
49
|
+
# Set the account name for the azure storage account.
|
50
|
+
config :storage_account_name, :validate => :string
|
51
|
+
|
52
|
+
# Set the key to access the storage account.
|
53
|
+
config :storage_access_key, :validate => :string
|
54
|
+
|
55
|
+
# Set the container of the blobs.
|
56
|
+
config :container, :validate => :string
|
57
|
+
|
58
|
+
# The path(s) to the file(s) to use as an input. By default it will
|
59
|
+
# watch every files in the storage container.
|
60
|
+
# You can use filename patterns here, such as `logs/*.log`.
|
61
|
+
# If you use a pattern like `logs/**/*.log`, a recursive search
|
62
|
+
# of `logs` will be done for all `*.log` files.
|
63
|
+
# Do not include a leading `/`, as Azure path look like this:
|
64
|
+
# `path/to/blob/file.txt`
|
65
|
+
#
|
66
|
+
# You may also configure multiple paths. See an example
|
67
|
+
# on the <<array,Logstash configuration page>>.
|
68
|
+
config :path_filters, :validate => :array, :default => [], :required => false
|
69
|
+
|
70
|
+
# Set the endpoint for the blobs.
|
71
|
+
#
|
72
|
+
# The default, `core.windows.net` targets the public azure.
|
73
|
+
config :endpoint, :validate => :string, :default => 'core.windows.net'
|
74
|
+
|
75
|
+
# Set the value of using backup mode.
|
76
|
+
config :backupmode, :validate => :boolean, :default => false, :deprecated => true, :obsolete => 'This option is obsoleted and the settings will be ignored.'
|
77
|
+
|
78
|
+
# Set the value for the registry file.
|
79
|
+
#
|
80
|
+
# The default, `data/registry`, is used to coordinate readings for various instances of the clients.
|
81
|
+
config :registry_path, :validate => :string, :default => 'data/registry'
|
82
|
+
|
83
|
+
# Sets the value for registry file lock duration in seconds. It must be set to -1, or between 15 to 60 inclusively.
|
84
|
+
#
|
85
|
+
# The default, `15` means the registry file will be locked for at most 15 seconds. This should usually be sufficient to
|
86
|
+
# read the content of registry. Having this configuration here to allow lease expired in case the client crashed that
|
87
|
+
# never got a chance to release the lease for the registry.
|
88
|
+
config :registry_lease_duration, :validate => :number, :default => 15
|
89
|
+
|
90
|
+
# Set how many seconds to keep idle before checking for new logs.
|
91
|
+
#
|
92
|
+
# The default, `30`, means trigger a reading for the log every 30 seconds after entering idle.
|
93
|
+
config :interval, :validate => :number, :default => 30
|
94
|
+
|
95
|
+
# Set the registry create mode
|
96
|
+
#
|
97
|
+
# The default, `resume`, means when the registry is initially created, it assumes all logs has been handled.
|
98
|
+
# When set to `start_over`, it will read all log files from begining.
|
99
|
+
config :registry_create_policy, :validate => :string, :default => 'resume'
|
100
|
+
|
101
|
+
# Sets the header of the file that does not repeat over records. Usually, these are json opening tags.
|
102
|
+
config :file_head_bytes, :validate => :number, :default => 0
|
103
|
+
|
104
|
+
# Sets the tail of the file that does not repeat over records. Usually, these are json closing tags.
|
105
|
+
config :file_tail_bytes, :validate => :number, :default => 0
|
106
|
+
|
107
|
+
# Sets how to break json
|
108
|
+
#
|
109
|
+
# Only works when the codec is set to `json`. Sets the policy to break the json object in the array into small events.
|
110
|
+
# Break json into small sections will not be as efficient as keep it as a whole, but will reduce the usage of
|
111
|
+
# the memory.
|
112
|
+
# Possible options: `do_not_break`, `with_head_tail`, `without_head_tail`
|
113
|
+
config :break_json_down_policy, :validate => :string, :default => 'do_not_break', :obsolete => 'This option is obsoleted and the settings will be ignored.'
|
114
|
+
|
115
|
+
# Sets when break json happens, how many json object will be put in 1 batch
|
116
|
+
config :break_json_batch_count, :validate => :number, :default => 10, :obsolete => 'This option is obsoleted and the settings will be ignored.'
|
117
|
+
|
118
|
+
# Sets the page-size for returned blob items. Too big number will hit heap overflow; Too small number will leads to too many requests.
|
119
|
+
#
|
120
|
+
# The default, `100` is good for default heap size of 1G.
|
121
|
+
config :blob_list_page_size, :validate => :number, :default => 100
|
122
|
+
|
123
|
+
# The default is 4 MB
|
124
|
+
config :file_chunk_size_bytes, :validate => :number, :default => 4 * 1024 * 1024
|
125
|
+
|
126
|
+
config :azure_blob_file_path_field, :validate => :boolean, :default => false
|
127
|
+
|
128
|
+
config :azure_blob_file_path_field_name, :validate => :string, :default => "azureblobfilepath"
|
129
|
+
|
130
|
+
# Constant of max integer
|
131
|
+
MAX = 2**([42].pack('i').size * 16 - 2) - 1
|
132
|
+
|
133
|
+
# Update the registry offset each time after this number of entries have been processed
|
134
|
+
UPDATE_REGISTRY_COUNT = 100
|
135
|
+
|
136
|
+
public
|
137
|
+
def register
|
138
|
+
user_agent = 'logstash-input-azureblob-offline'
|
139
|
+
|
140
|
+
# this is the reader # for this specific instance.
|
141
|
+
@reader = SecureRandom.uuid
|
142
|
+
|
143
|
+
# Setup a specific instance of an Azure::Storage::Client
|
144
|
+
client = Azure::Storage::Client.create(:storage_account_name => @storage_account_name, :storage_access_key => @storage_access_key, :storage_blob_host => "https://#{@storage_account_name}.blob.#{@endpoint}", :user_agent_prefix => user_agent)
|
145
|
+
# Get an azure storage blob service object from a specific instance of an Azure::Storage::Client
|
146
|
+
@azure_blob = client.blob_client
|
147
|
+
# Add retry filter to the service object
|
148
|
+
@azure_blob.with_filter(Azure::Storage::Core::Filter::ExponentialRetryPolicyFilter.new)
|
149
|
+
end # def register
|
150
|
+
|
151
|
+
def run(queue)
|
152
|
+
# we can abort the loop if stop? becomes true
|
153
|
+
while !stop?
|
154
|
+
process(queue)
|
155
|
+
@logger.debug("Hitting interval of #{@interval}s . . .")
|
156
|
+
Stud.stoppable_sleep(@interval) { stop? }
|
157
|
+
end # loop
|
158
|
+
end # def run
|
159
|
+
|
160
|
+
def stop
|
161
|
+
cleanup_registry
|
162
|
+
end # def stop
|
163
|
+
|
164
|
+
# Start processing the next item.
|
165
|
+
def process(queue)
|
166
|
+
begin
|
167
|
+
@processed_entries = 0
|
168
|
+
blob, start_index, gen = register_for_read
|
169
|
+
|
170
|
+
unless blob.nil?
|
171
|
+
begin
|
172
|
+
blob_name = blob.name
|
173
|
+
@logger.debug("Processing blob #{blob.name}")
|
174
|
+
blob_size = blob.properties[:content_length]
|
175
|
+
# Work-around: After returned by get_blob, the etag will contains quotes.
|
176
|
+
new_etag = blob.properties[:etag]
|
177
|
+
# ~ Work-around
|
178
|
+
|
179
|
+
blob, header = @azure_blob.get_blob(@container, blob_name, {:end_range => (@file_head_bytes-1) }) if header.nil? unless @file_head_bytes.nil? or @file_head_bytes <= 0
|
180
|
+
|
181
|
+
blob, tail = @azure_blob.get_blob(@container, blob_name, {:start_range => blob_size - @file_tail_bytes}) if tail.nil? unless @file_tail_bytes.nil? or @file_tail_bytes <= 0
|
182
|
+
|
183
|
+
if start_index == 0
|
184
|
+
# Skip the header since it is already read.
|
185
|
+
start_index = @file_head_bytes
|
186
|
+
end
|
187
|
+
|
188
|
+
@logger.debug("start index: #{start_index} blob size: #{blob_size}")
|
189
|
+
|
190
|
+
content_length = 0
|
191
|
+
blob_reader = BlobReader.new(@logger, @azure_blob, @container, blob_name, file_chunk_size_bytes, start_index, blob_size - 1 - @file_tail_bytes)
|
192
|
+
|
193
|
+
is_json_codec = (defined?(LogStash::Codecs::JSON) == 'constant') && (@codec.is_a? LogStash::Codecs::JSON)
|
194
|
+
if is_json_codec
|
195
|
+
parser = JsonParser.new(@logger, blob_reader)
|
196
|
+
|
197
|
+
parser.parse(->(json_content) {
|
198
|
+
content_length += json_content.length
|
199
|
+
|
200
|
+
enqueue_content(queue, json_content, header, tail, blob_name)
|
201
|
+
|
202
|
+
on_entry_processed(start_index, content_length, blob_name, new_etag, gen)
|
203
|
+
}, ->(malformed_json) {
|
204
|
+
@logger.debug("Skipping #{malformed_json.length} malformed bytes")
|
205
|
+
content_length = content_length + malformed_json.length
|
206
|
+
|
207
|
+
on_entry_processed(start_index, content_length, blob_name, new_etag, gen)
|
208
|
+
})
|
209
|
+
else
|
210
|
+
begin
|
211
|
+
content, are_more_bytes_available = blob_reader.read
|
212
|
+
|
213
|
+
content_length += content.length
|
214
|
+
enqueue_content(queue, content, header, tail, blob_name)
|
215
|
+
|
216
|
+
on_entry_processed(start_index, content_length, blob_name, new_etag, gen)
|
217
|
+
end until !are_more_bytes_available || content.nil?
|
218
|
+
|
219
|
+
end #if
|
220
|
+
ensure
|
221
|
+
# Making sure the reader is removed from the registry even when there's exception.
|
222
|
+
request_registry_update(start_index, content_length, blob_name, new_etag, gen)
|
223
|
+
end # begin
|
224
|
+
end # unless
|
225
|
+
rescue => e
|
226
|
+
@logger.error("Oh My, An error occurred. Error:#{e}: Trace: #{e.backtrace}", :exception => e)
|
227
|
+
end # begin
|
228
|
+
end # process
|
229
|
+
|
230
|
+
def enqueue_content(queue, content, header, tail, blob_name)
|
231
|
+
if (header.nil? || header.length == 0) && (tail.nil? || tail.length == 0)
|
232
|
+
#skip some unnecessary copying
|
233
|
+
full_content = content
|
234
|
+
else
|
235
|
+
full_content = ''
|
236
|
+
full_content << header unless header.nil? || header.length == 0
|
237
|
+
full_content << content
|
238
|
+
full_content << tail unless tail.nil? || tail.length == 0
|
239
|
+
end
|
240
|
+
|
241
|
+
@codec.decode(full_content) do |event|
|
242
|
+
if @azure_blob_file_path_field
|
243
|
+
event.set(@azure_blob_file_path_field_name, blob_name)
|
244
|
+
end
|
245
|
+
decorate(event)
|
246
|
+
queue << event
|
247
|
+
end
|
248
|
+
end
|
249
|
+
|
250
|
+
def on_entry_processed(start_index, content_length, blob_name, new_etag, gen)
|
251
|
+
@processed_entries += 1
|
252
|
+
request_registry_update(start_index, content_length, blob_name, new_etag, gen) if @processed_entries % UPDATE_REGISTRY_COUNT == 0
|
253
|
+
end
|
254
|
+
|
255
|
+
def request_registry_update(start_index, content_length, blob_name, new_etag, gen)
|
256
|
+
new_offset = start_index
|
257
|
+
new_offset += content_length unless content_length.nil?
|
258
|
+
@logger.debug("New registry offset: #{new_offset}")
|
259
|
+
new_registry_item = LogStash::Inputs::RegistryItem.new(blob_name, new_etag, nil, new_offset, gen)
|
260
|
+
update_registry(new_registry_item)
|
261
|
+
end
|
262
|
+
|
263
|
+
# Deserialize registry hash from json string.
|
264
|
+
def deserialize_registry_hash (json_string)
|
265
|
+
result = Hash.new
|
266
|
+
temp_hash = JSON.parse(json_string)
|
267
|
+
temp_hash.values.each { |kvp|
|
268
|
+
result[kvp['file_path']] = LogStash::Inputs::RegistryItem.new(kvp['file_path'], kvp['etag'], kvp['reader'], kvp['offset'], kvp['gen'])
|
269
|
+
}
|
270
|
+
return result
|
271
|
+
end #deserialize_registry_hash
|
272
|
+
|
273
|
+
# List all the blobs in the given container.
|
274
|
+
def list_all_blobs
|
275
|
+
blobs = Set.new []
|
276
|
+
continuation_token = NIL
|
277
|
+
@blob_list_page_size = 100 if @blob_list_page_size <= 0
|
278
|
+
loop do
|
279
|
+
# Need to limit the returned number of the returned entries to avoid out of memory exception.
|
280
|
+
entries = @azure_blob.list_blobs(@container, { :timeout => 60, :marker => continuation_token, :max_results => @blob_list_page_size })
|
281
|
+
if @path_filters.empty?
|
282
|
+
entries.each do |entry|
|
283
|
+
blobs << entry
|
284
|
+
end # each
|
285
|
+
else
|
286
|
+
# Add the registry_path to the list of matched blobs
|
287
|
+
@path_filters << @registry_path
|
288
|
+
entries.each do |entry|
|
289
|
+
# FNM_PATHNAME is required so that "**/test" can match "test" at the root folder
|
290
|
+
# FNM_EXTGLOB allows you to use "test{a,b,c}" to match either "testa", "testb" or "testc" (closer to shell behavior)
|
291
|
+
matched = @path_filters.any? {|path| File.fnmatch?(path, entry.name, File::FNM_PATHNAME | File::FNM_EXTGLOB)}
|
292
|
+
blobs << entry if matched
|
293
|
+
end # each
|
294
|
+
end
|
295
|
+
continuation_token = entries.continuation_token
|
296
|
+
break if continuation_token.empty?
|
297
|
+
end # loop
|
298
|
+
return blobs
|
299
|
+
end # def list_blobs
|
300
|
+
|
301
|
+
# Raise generation for blob in registry
|
302
|
+
def raise_gen(registry_hash, file_path)
|
303
|
+
begin
|
304
|
+
target_item = registry_hash[file_path]
|
305
|
+
begin
|
306
|
+
target_item.gen += 1
|
307
|
+
# Protect gen from overflow.
|
308
|
+
target_item.gen = target_item.gen / 2 if target_item.gen == MAX
|
309
|
+
rescue StandardError => e
|
310
|
+
@logger.error("Fail to get the next generation for target item #{target_item}.", :exception => e)
|
311
|
+
target_item.gen = 0
|
312
|
+
end
|
313
|
+
|
314
|
+
min_gen_item = registry_hash.values.min_by { |x| x.gen }
|
315
|
+
while min_gen_item.gen > 0
|
316
|
+
registry_hash.values.each { |value|
|
317
|
+
value.gen -= 1
|
318
|
+
}
|
319
|
+
min_gen_item = registry_hash.values.min_by { |x| x.gen }
|
320
|
+
end
|
321
|
+
end
|
322
|
+
end # raise_gen
|
323
|
+
|
324
|
+
# Acquire a lease on a blob item with retries.
|
325
|
+
#
|
326
|
+
# By default, it will retry 60 times with 1 second interval.
|
327
|
+
def acquire_lease(blob_name, retry_times = 60, interval_sec = 1)
|
328
|
+
lease = nil;
|
329
|
+
retried = 0;
|
330
|
+
while lease.nil? do
|
331
|
+
begin
|
332
|
+
lease = @azure_blob.acquire_blob_lease(@container, blob_name, { :timeout => 60, :duration => @registry_lease_duration })
|
333
|
+
rescue StandardError => e
|
334
|
+
if (e.class.name.include? 'LeaseAlreadyPresent')
|
335
|
+
if (retried > retry_times)
|
336
|
+
raise
|
337
|
+
end
|
338
|
+
retried += 1
|
339
|
+
sleep interval_sec
|
340
|
+
else
|
341
|
+
# Anything else happend other than 'LeaseAlreadyPresent', break the lease. This is a work-around for the behavior that when
|
342
|
+
# timeout exception is hit, somehow, a infinite lease will be put on the lock file.
|
343
|
+
@azure_blob.break_blob_lease(@container, blob_name, { :break_period => 30 })
|
344
|
+
end
|
345
|
+
end
|
346
|
+
end #while
|
347
|
+
return lease
|
348
|
+
end # acquire_lease
|
349
|
+
|
350
|
+
# Return the next blob for reading as well as the start index.
|
351
|
+
def register_for_read
|
352
|
+
begin
|
353
|
+
all_blobs = list_all_blobs
|
354
|
+
registry = all_blobs.find { |item| item.name.downcase == @registry_path }
|
355
|
+
|
356
|
+
candidate_blobs = all_blobs.select { |item| (item.name.downcase != @registry_path) }
|
357
|
+
|
358
|
+
start_index = 0
|
359
|
+
gen = 0
|
360
|
+
lease = nil
|
361
|
+
|
362
|
+
if registry.nil?
|
363
|
+
registry_hash = create_registry(candidate_blobs)
|
364
|
+
lease = acquire_lease(@registry_path)
|
365
|
+
else
|
366
|
+
lease = acquire_lease(@registry_path)
|
367
|
+
registry_hash = load_registry
|
368
|
+
end #if
|
369
|
+
|
370
|
+
picked_blobs = Set.new []
|
371
|
+
# Pick up the next candidate
|
372
|
+
picked_blob = nil
|
373
|
+
candidate_blobs.each { |candidate_blob|
|
374
|
+
@logger.debug("candidate_blob: #{candidate_blob.name} content length: #{candidate_blob.properties[:content_length]}")
|
375
|
+
registry_item = registry_hash[candidate_blob.name]
|
376
|
+
|
377
|
+
# Appending items that doesn't exist in the hash table
|
378
|
+
if registry_item.nil?
|
379
|
+
registry_item = LogStash::Inputs::RegistryItem.new(candidate_blob.name, candidate_blob.properties[:etag], nil, 0, 0)
|
380
|
+
registry_hash[candidate_blob.name] = registry_item
|
381
|
+
end # if
|
382
|
+
@logger.debug("registry_item offset: #{registry_item.offset}")
|
383
|
+
if ((registry_item.offset < candidate_blob.properties[:content_length]) && (registry_item.reader.nil? || registry_item.reader == @reader))
|
384
|
+
@logger.debug("candidate_blob picked: #{candidate_blob.name} content length: #{candidate_blob.properties[:content_length]}")
|
385
|
+
picked_blobs << candidate_blob
|
386
|
+
end
|
387
|
+
}
|
388
|
+
|
389
|
+
picked_blob = picked_blobs.min_by { |b| registry_hash[b.name].gen }
|
390
|
+
unless picked_blob.nil?
|
391
|
+
registry_item = registry_hash[picked_blob.name]
|
392
|
+
registry_item.reader = @reader
|
393
|
+
registry_hash[picked_blob.name] = registry_item
|
394
|
+
start_index = registry_item.offset
|
395
|
+
raise_gen(registry_hash, picked_blob.name)
|
396
|
+
gen = registry_item.gen
|
397
|
+
end # unless
|
398
|
+
|
399
|
+
# Save the change for the registry
|
400
|
+
save_registry(registry_hash, lease)
|
401
|
+
|
402
|
+
@azure_blob.release_blob_lease(@container, @registry_path, lease)
|
403
|
+
lease = nil
|
404
|
+
|
405
|
+
return picked_blob, start_index, gen
|
406
|
+
rescue StandardError => e
|
407
|
+
@logger.error("Oh My, An error occurred. #{e}: #{e.backtrace}", :exception => e)
|
408
|
+
return nil, nil, nil
|
409
|
+
ensure
|
410
|
+
@azure_blob.release_blob_lease(@container, @registry_path, lease) unless lease.nil?
|
411
|
+
lease = nil
|
412
|
+
end # rescue
|
413
|
+
end #register_for_read
|
414
|
+
|
415
|
+
# Update the registry
|
416
|
+
def update_registry(registry_item)
|
417
|
+
begin
|
418
|
+
lease = nil
|
419
|
+
lease = acquire_lease(@registry_path)
|
420
|
+
registry_hash = load_registry
|
421
|
+
registry_hash[registry_item.file_path] = registry_item
|
422
|
+
save_registry(registry_hash, lease)
|
423
|
+
@azure_blob.release_blob_lease(@container, @registry_path, lease)
|
424
|
+
lease = nil
|
425
|
+
rescue StandardError => e
|
426
|
+
@logger.error("Oh My, An error occurred. #{e}:\n#{e.backtrace}", :exception => e)
|
427
|
+
ensure
|
428
|
+
@azure_blob.release_blob_lease(@container, @registry_path, lease) unless lease.nil?
|
429
|
+
lease = nil
|
430
|
+
end #rescue
|
431
|
+
end # def update_registry
|
432
|
+
|
433
|
+
# Clean up the registry.
|
434
|
+
def cleanup_registry
|
435
|
+
begin
|
436
|
+
@logger.debug("azureblob : start cleanup_registry")
|
437
|
+
lease = nil
|
438
|
+
lease = acquire_lease(@registry_path)
|
439
|
+
registry_hash = load_registry
|
440
|
+
registry_hash.each { | key, registry_item|
|
441
|
+
registry_item.reader = nil if registry_item.reader == @reader
|
442
|
+
}
|
443
|
+
save_registry(registry_hash, lease)
|
444
|
+
@azure_blob.release_blob_lease(@container, @registry_path, lease)
|
445
|
+
lease = nil
|
446
|
+
rescue StandardError => e
|
447
|
+
@logger.error("Oh My, An error occurred. #{e}:\n#{e.backtrace}", :exception => e)
|
448
|
+
ensure
|
449
|
+
@azure_blob.release_blob_lease(@container, @registry_path, lease) unless lease.nil?
|
450
|
+
lease = nil
|
451
|
+
end #rescue
|
452
|
+
@logger.debug("azureblob : End of cleanup_registry")
|
453
|
+
end # def cleanup_registry
|
454
|
+
|
455
|
+
# Create a registry file to coordinate between multiple azure blob inputs.
|
456
|
+
def create_registry(blob_items)
|
457
|
+
@azure_blob.create_block_blob(@container, @registry_path, '')
|
458
|
+
lease = acquire_lease(@registry_path)
|
459
|
+
registry_hash = Hash.new
|
460
|
+
blob_items.each do |blob_item|
|
461
|
+
initial_offset = 0
|
462
|
+
initial_offset = blob_item.properties[:content_length] if @registry_create_policy == 'resume'
|
463
|
+
registry_item = LogStash::Inputs::RegistryItem.new(blob_item.name, blob_item.properties[:etag], nil, initial_offset, 0)
|
464
|
+
registry_hash[blob_item.name] = registry_item
|
465
|
+
end # each
|
466
|
+
save_registry(registry_hash, lease)
|
467
|
+
@azure_blob.release_blob_lease(@container, @registry_path, lease)
|
468
|
+
registry_hash
|
469
|
+
end # create_registry
|
470
|
+
|
471
|
+
# Load the content of the registry into the registry hash and return it.
|
472
|
+
def load_registry
|
473
|
+
# Get content
|
474
|
+
_registry_blob, registry_blob_body = @azure_blob.get_blob(@container, @registry_path)
|
475
|
+
registry_hash = deserialize_registry_hash(registry_blob_body)
|
476
|
+
registry_hash
|
477
|
+
end # def load_registry
|
478
|
+
|
479
|
+
# Serialize the registry hash and save it.
|
480
|
+
def save_registry(registry_hash, lease_id)
|
481
|
+
# Serialize hash to json
|
482
|
+
registry_hash_json = JSON.generate(registry_hash)
|
483
|
+
|
484
|
+
# Upload registry to blob
|
485
|
+
@azure_blob.create_block_blob(@container, @registry_path, registry_hash_json, lease_id: lease_id)
|
486
|
+
end # def save_registry
|
487
|
+
end # class LogStash::Inputs::LogstashInputAzureblob
|
488
|
+
|
489
|
+
class BlobReader < LinearReader
|
490
|
+
def initialize(logger, azure_blob, container, blob_name, chunk_size, blob_start_index, blob_end_index)
|
491
|
+
@logger = logger
|
492
|
+
@azure_blob = azure_blob
|
493
|
+
@container = container
|
494
|
+
@blob_name = blob_name
|
495
|
+
@blob_start_index = blob_start_index
|
496
|
+
@blob_end_index = blob_end_index
|
497
|
+
@chunk_size = chunk_size
|
498
|
+
end
|
499
|
+
|
500
|
+
def read
|
501
|
+
if @blob_end_index < @blob_start_index
|
502
|
+
return nil, false
|
503
|
+
end
|
504
|
+
|
505
|
+
are_more_bytes_available = false
|
506
|
+
|
507
|
+
if @blob_end_index >= @blob_start_index + @chunk_size
|
508
|
+
end_index = @blob_start_index + @chunk_size - 1
|
509
|
+
are_more_bytes_available = true
|
510
|
+
else
|
511
|
+
end_index = @blob_end_index
|
512
|
+
end
|
513
|
+
content = read_from_blob(@blob_start_index, end_index)
|
514
|
+
|
515
|
+
@blob_start_index = end_index + 1
|
516
|
+
return content, are_more_bytes_available
|
517
|
+
end
|
518
|
+
|
519
|
+
private
|
520
|
+
|
521
|
+
def read_from_blob(start_index, end_index)
|
522
|
+
_blob, content = @azure_blob.get_blob(@container, @blob_name, {:start_range => start_index, :end_range => end_index } )
|
523
|
+
return content
|
524
|
+
end
|
525
|
+
end #class BlobReader
|