logstash-input-azureblob 0.9.12-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +7 -0
- data/Gemfile +2 -0
- data/LICENSE +17 -0
- data/README.md +253 -0
- data/lib/com/microsoft/json-parser.rb +202 -0
- data/lib/logstash-input-azureblob_jars.rb +10 -0
- data/lib/logstash/inputs/azureblob.rb +500 -0
- data/lib/org/glassfish/javax.json/1.1/javax.json-1.1.jar +0 -0
- data/logstash-input-azureblob.gemspec +32 -0
- data/spec/com/microsoft/json-parser_spec.rb +280 -0
- data/spec/inputs/azureblob_spec.rb +324 -0
- metadata +165 -0
@@ -0,0 +1,10 @@
|
|
1
|
+
# this is a generated file, to avoid over-writing it just delete this comment
|
2
|
+
begin
|
3
|
+
require 'jar_dependencies'
|
4
|
+
rescue LoadError
|
5
|
+
require 'org/glassfish/javax.json/1.1/javax.json-1.1.jar'
|
6
|
+
end
|
7
|
+
|
8
|
+
if defined? Jars
|
9
|
+
require_jar( 'org.glassfish', 'javax.json', '1.1' )
|
10
|
+
end
|
@@ -0,0 +1,500 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/inputs/base"
|
3
|
+
require "logstash/namespace"
|
4
|
+
|
5
|
+
# Azure Storage SDK for Ruby
|
6
|
+
require "azure/storage"
|
7
|
+
require 'json' # for registry content
|
8
|
+
require "securerandom" # for generating uuid.
|
9
|
+
|
10
|
+
require "com/microsoft/json-parser"
|
11
|
+
|
12
|
+
#require Dir[ File.dirname(__FILE__) + "/../../*_jars.rb" ].first
|
13
|
+
# Registry item to coordinate between mulitple clients
|
14
|
+
class LogStash::Inputs::RegistryItem
|
15
|
+
attr_accessor :file_path, :etag, :offset, :reader, :gen
|
16
|
+
# Allow json serialization.
|
17
|
+
def as_json(options={})
|
18
|
+
{
|
19
|
+
file_path: @file_path,
|
20
|
+
etag: @etag,
|
21
|
+
reader: @reader,
|
22
|
+
offset: @offset,
|
23
|
+
gen: @gen
|
24
|
+
}
|
25
|
+
end # as_json
|
26
|
+
|
27
|
+
def to_json(*options)
|
28
|
+
as_json(*options).to_json(*options)
|
29
|
+
end # to_json
|
30
|
+
|
31
|
+
def initialize(file_path, etag, reader, offset = 0, gen = 0)
|
32
|
+
@file_path = file_path
|
33
|
+
@etag = etag
|
34
|
+
@reader = reader
|
35
|
+
@offset = offset
|
36
|
+
@gen = gen
|
37
|
+
end # initialize
|
38
|
+
end # class RegistryItem
|
39
|
+
|
40
|
+
|
41
|
+
# Logstash input plugin for Azure Blobs
|
42
|
+
#
|
43
|
+
# This logstash plugin gathers data from Microsoft Azure Blobs
|
44
|
+
class LogStash::Inputs::LogstashInputAzureblob < LogStash::Inputs::Base
|
45
|
+
config_name "azureblob"
|
46
|
+
|
47
|
+
# If undefined, Logstash will complain, even if codec is unused.
|
48
|
+
default :codec, "json_lines"
|
49
|
+
|
50
|
+
# Set the account name for the azure storage account.
|
51
|
+
config :storage_account_name, :validate => :string
|
52
|
+
|
53
|
+
# Set the key to access the storage account.
|
54
|
+
config :storage_access_key, :validate => :string
|
55
|
+
|
56
|
+
# Set the container of the blobs.
|
57
|
+
config :container, :validate => :string
|
58
|
+
|
59
|
+
# Set the endpoint for the blobs.
|
60
|
+
#
|
61
|
+
# The default, `core.windows.net` targets the public azure.
|
62
|
+
config :endpoint, :validate => :string, :default => 'core.windows.net'
|
63
|
+
|
64
|
+
# Set the value of using backup mode.
|
65
|
+
config :backupmode, :validate => :boolean, :default => false, :deprecated => true, :obsolete => 'This option is obsoleted and the settings will be ignored.'
|
66
|
+
|
67
|
+
# Set the value for the registry file.
|
68
|
+
#
|
69
|
+
# The default, `data/registry`, is used to coordinate readings for various instances of the clients.
|
70
|
+
config :registry_path, :validate => :string, :default => 'data/registry'
|
71
|
+
|
72
|
+
# Sets the value for registry file lock duration in seconds. It must be set to -1, or between 15 to 60 inclusively.
|
73
|
+
#
|
74
|
+
# The default, `15` means the registry file will be locked for at most 15 seconds. This should usually be sufficient to
|
75
|
+
# read the content of registry. Having this configuration here to allow lease expired in case the client crashed that
|
76
|
+
# never got a chance to release the lease for the registry.
|
77
|
+
config :registry_lease_duration, :validate => :number, :default => 15
|
78
|
+
|
79
|
+
# Set how many seconds to keep idle before checking for new logs.
|
80
|
+
#
|
81
|
+
# The default, `30`, means trigger a reading for the log every 30 seconds after entering idle.
|
82
|
+
config :interval, :validate => :number, :default => 30
|
83
|
+
|
84
|
+
# Set the registry create mode
|
85
|
+
#
|
86
|
+
# The default, `resume`, means when the registry is initially created, it assumes all logs has been handled.
|
87
|
+
# When set to `start_over`, it will read all log files from begining.
|
88
|
+
config :registry_create_policy, :validate => :string, :default => 'resume'
|
89
|
+
|
90
|
+
# Sets the header of the file that does not repeat over records. Usually, these are json opening tags.
|
91
|
+
config :file_head_bytes, :validate => :number, :default => 0
|
92
|
+
|
93
|
+
# Sets the tail of the file that does not repeat over records. Usually, these are json closing tags.
|
94
|
+
config :file_tail_bytes, :validate => :number, :default => 0
|
95
|
+
|
96
|
+
# Sets how to break json
|
97
|
+
#
|
98
|
+
# Only works when the codec is set to `json`. Sets the policy to break the json object in the array into small events.
|
99
|
+
# Break json into small sections will not be as efficient as keep it as a whole, but will reduce the usage of
|
100
|
+
# the memory.
|
101
|
+
# Possible options: `do_not_break`, `with_head_tail`, `without_head_tail`
|
102
|
+
config :break_json_down_policy, :validate => :string, :default => 'do_not_break', :obsolete => 'This option is obsoleted and the settings will be ignored.'
|
103
|
+
|
104
|
+
# Sets when break json happens, how many json object will be put in 1 batch
|
105
|
+
config :break_json_batch_count, :validate => :number, :default => 10, :obsolete => 'This option is obsoleted and the settings will be ignored.'
|
106
|
+
|
107
|
+
# Sets the page-size for returned blob items. Too big number will hit heap overflow; Too small number will leads to too many requests.
|
108
|
+
#
|
109
|
+
# The default, `100` is good for default heap size of 1G.
|
110
|
+
config :blob_list_page_size, :validate => :number, :default => 100
|
111
|
+
|
112
|
+
# The default is 4 MB
|
113
|
+
config :file_chunk_size_bytes, :validate => :number, :default => 4 * 1024 * 1024
|
114
|
+
|
115
|
+
# Constant of max integer
|
116
|
+
MAX = 2 ** ([42].pack('i').size * 16 - 2 ) -1
|
117
|
+
|
118
|
+
# Update the registry offset each time after this number of entries have been processed
|
119
|
+
UPDATE_REGISTRY_COUNT = 100
|
120
|
+
|
121
|
+
public
|
122
|
+
def register
|
123
|
+
user_agent = "logstash-input-azureblob"
|
124
|
+
user_agent << "/" << Gem.latest_spec_for("logstash-input-azureblob").version.to_s
|
125
|
+
|
126
|
+
# this is the reader # for this specific instance.
|
127
|
+
@reader = SecureRandom.uuid
|
128
|
+
@registry_locker = "#{@registry_path}.lock"
|
129
|
+
|
130
|
+
# Setup a specific instance of an Azure::Storage::Client
|
131
|
+
client = Azure::Storage::Client.create(:storage_account_name => @storage_account_name, :storage_access_key => @storage_access_key, :storage_blob_host => "https://#{@storage_account_name}.blob.#{@endpoint}", :user_agent_prefix => user_agent)
|
132
|
+
# Get an azure storage blob service object from a specific instance of an Azure::Storage::Client
|
133
|
+
@azure_blob = client.blob_client
|
134
|
+
# Add retry filter to the service object
|
135
|
+
@azure_blob.with_filter(Azure::Storage::Core::Filter::ExponentialRetryPolicyFilter.new)
|
136
|
+
end # def register
|
137
|
+
|
138
|
+
def run(queue)
|
139
|
+
# we can abort the loop if stop? becomes true
|
140
|
+
while !stop?
|
141
|
+
process(queue)
|
142
|
+
@logger.debug("Hitting interval of #{@interval}ms . . .")
|
143
|
+
Stud.stoppable_sleep(@interval) { stop? }
|
144
|
+
end # loop
|
145
|
+
end # def run
|
146
|
+
|
147
|
+
def stop
|
148
|
+
cleanup_registry
|
149
|
+
end # def stop
|
150
|
+
|
151
|
+
# Start processing the next item.
|
152
|
+
def process(queue)
|
153
|
+
begin
|
154
|
+
@processed_entries = 0
|
155
|
+
blob, start_index, gen = register_for_read
|
156
|
+
|
157
|
+
if(!blob.nil?)
|
158
|
+
begin
|
159
|
+
blob_name = blob.name
|
160
|
+
@logger.debug("Processing blob #{blob.name}")
|
161
|
+
blob_size = blob.properties[:content_length]
|
162
|
+
# Work-around: After returned by get_blob, the etag will contains quotes.
|
163
|
+
new_etag = blob.properties[:etag]
|
164
|
+
# ~ Work-around
|
165
|
+
|
166
|
+
blob, header = @azure_blob.get_blob(@container, blob_name, {:end_range => (@file_head_bytes-1) }) if header.nil? unless @file_head_bytes.nil? or @file_head_bytes <= 0
|
167
|
+
|
168
|
+
blob, tail = @azure_blob.get_blob(@container, blob_name, {:start_range => blob_size - @file_tail_bytes}) if tail.nil? unless @file_tail_bytes.nil? or @file_tail_bytes <= 0
|
169
|
+
|
170
|
+
if start_index == 0
|
171
|
+
# Skip the header since it is already read.
|
172
|
+
start_index = @file_head_bytes
|
173
|
+
end
|
174
|
+
|
175
|
+
@logger.debug("start index: #{start_index} blob size: #{blob_size}")
|
176
|
+
|
177
|
+
content_length = 0
|
178
|
+
blob_reader = BlobReader.new(@logger, @azure_blob, @container, blob_name, file_chunk_size_bytes, start_index, blob_size - 1 - @file_tail_bytes)
|
179
|
+
|
180
|
+
is_json_codec = (defined?(LogStash::Codecs::JSON) == 'constant') && (@codec.is_a? LogStash::Codecs::JSON)
|
181
|
+
if is_json_codec
|
182
|
+
parser = JsonParser.new(@logger, blob_reader)
|
183
|
+
|
184
|
+
parser.parse(->(json_content) {
|
185
|
+
content_length = content_length + json_content.length
|
186
|
+
|
187
|
+
enqueue_content(queue, json_content, header, tail)
|
188
|
+
|
189
|
+
on_entry_processed(start_index, content_length, blob_name, new_etag, gen)
|
190
|
+
}, ->(malformed_json) {
|
191
|
+
@logger.debug("Skipping #{malformed_json.length} malformed bytes")
|
192
|
+
content_length = content_length + malformed_json.length
|
193
|
+
|
194
|
+
on_entry_processed(start_index, content_length, blob_name, new_etag, gen)
|
195
|
+
})
|
196
|
+
else
|
197
|
+
begin
|
198
|
+
content, are_more_bytes_available = blob_reader.read
|
199
|
+
|
200
|
+
content_length = content_length + content.length
|
201
|
+
enqueue_content(queue, content, header, tail)
|
202
|
+
|
203
|
+
on_entry_processed(start_index, content_length, blob_name, new_etag, gen)
|
204
|
+
end until !are_more_bytes_available || content.nil?
|
205
|
+
|
206
|
+
end #if
|
207
|
+
ensure
|
208
|
+
# Making sure the reader is removed from the registry even when there's exception.
|
209
|
+
request_registry_update(start_index, content_length, blob_name, new_etag, gen)
|
210
|
+
end # begin
|
211
|
+
end # if
|
212
|
+
rescue => e
|
213
|
+
@logger.error("Oh My, An error occurred. Error:#{e}: Trace: #{e.backtrace}", :exception => e)
|
214
|
+
end # begin
|
215
|
+
end # process
|
216
|
+
|
217
|
+
def enqueue_content(queue, content, header, tail)
|
218
|
+
if (header.nil? || header.length == 0) && (tail.nil? || tail.length == 0)
|
219
|
+
#skip some unnecessary copying
|
220
|
+
full_content = content
|
221
|
+
else
|
222
|
+
full_content = ""
|
223
|
+
full_content << header unless header.nil? || header.length == 0
|
224
|
+
full_content << content
|
225
|
+
full_content << tail unless tail.nil? || tail.length == 0
|
226
|
+
end
|
227
|
+
|
228
|
+
@codec.decode(full_content) do |event|
|
229
|
+
decorate(event)
|
230
|
+
queue << event
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
def on_entry_processed(start_index, content_length, blob_name, new_etag, gen)
|
235
|
+
@processed_entries = @processed_entries + 1
|
236
|
+
if @processed_entries % UPDATE_REGISTRY_COUNT == 0
|
237
|
+
request_registry_update(start_index, content_length, blob_name, new_etag, gen)
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
241
|
+
def request_registry_update(start_index, content_length, blob_name, new_etag, gen)
|
242
|
+
new_offset = start_index
|
243
|
+
new_offset = new_offset + content_length unless content_length.nil?
|
244
|
+
@logger.debug("New registry offset: #{new_offset}")
|
245
|
+
new_registry_item = LogStash::Inputs::RegistryItem.new(blob_name, new_etag, nil, new_offset, gen)
|
246
|
+
update_registry(new_registry_item)
|
247
|
+
end
|
248
|
+
|
249
|
+
# Deserialize registry hash from json string.
|
250
|
+
def deserialize_registry_hash (json_string)
|
251
|
+
result = Hash.new
|
252
|
+
temp_hash = JSON.parse(json_string)
|
253
|
+
temp_hash.values.each { |kvp|
|
254
|
+
result[kvp['file_path']] = LogStash::Inputs::RegistryItem.new(kvp['file_path'], kvp['etag'], kvp['reader'], kvp['offset'], kvp['gen'])
|
255
|
+
}
|
256
|
+
return result
|
257
|
+
end #deserialize_registry_hash
|
258
|
+
|
259
|
+
# List all the blobs in the given container.
|
260
|
+
def list_all_blobs
|
261
|
+
blobs = Set.new []
|
262
|
+
continuation_token = NIL
|
263
|
+
@blob_list_page_size = 100 if @blob_list_page_size <= 0
|
264
|
+
loop do
|
265
|
+
# Need to limit the returned number of the returned entries to avoid out of memory exception.
|
266
|
+
entries = @azure_blob.list_blobs(@container, { :timeout => 60, :marker => continuation_token, :max_results => @blob_list_page_size })
|
267
|
+
entries.each do |entry|
|
268
|
+
blobs << entry
|
269
|
+
end # each
|
270
|
+
continuation_token = entries.continuation_token
|
271
|
+
break if continuation_token.empty?
|
272
|
+
end # loop
|
273
|
+
return blobs
|
274
|
+
end # def list_blobs
|
275
|
+
|
276
|
+
# Raise generation for blob in registry
|
277
|
+
def raise_gen(registry_hash, file_path)
|
278
|
+
begin
|
279
|
+
target_item = registry_hash[file_path]
|
280
|
+
begin
|
281
|
+
target_item.gen += 1
|
282
|
+
# Protect gen from overflow.
|
283
|
+
target_item.gen = target_item.gen / 2 if target_item.gen == MAX
|
284
|
+
rescue StandardError => e
|
285
|
+
@logger.error("Fail to get the next generation for target item #{target_item}.", :exception => e)
|
286
|
+
target_item.gen = 0
|
287
|
+
end
|
288
|
+
|
289
|
+
min_gen_item = registry_hash.values.min_by { |x| x.gen }
|
290
|
+
while min_gen_item.gen > 0
|
291
|
+
registry_hash.values.each { |value|
|
292
|
+
value.gen -= 1
|
293
|
+
}
|
294
|
+
min_gen_item = registry_hash.values.min_by { |x| x.gen }
|
295
|
+
end
|
296
|
+
end
|
297
|
+
end # raise_gen
|
298
|
+
|
299
|
+
# Acquire a lease on a blob item with retries.
|
300
|
+
#
|
301
|
+
# By default, it will retry 60 times with 1 second interval.
|
302
|
+
def acquire_lease(blob_name, retry_times = 60, interval_sec = 1)
|
303
|
+
lease = nil;
|
304
|
+
retried = 0;
|
305
|
+
while lease.nil? do
|
306
|
+
begin
|
307
|
+
lease = @azure_blob.acquire_blob_lease(@container, blob_name, { :timeout => 60, :duration => @registry_lease_duration })
|
308
|
+
rescue StandardError => e
|
309
|
+
if(e.type && e.type == 'LeaseAlreadyPresent')
|
310
|
+
if (retried > retry_times)
|
311
|
+
raise
|
312
|
+
end
|
313
|
+
retried += 1
|
314
|
+
sleep interval_sec
|
315
|
+
else
|
316
|
+
# Anything else happend other than 'LeaseAlreadyPresent', break the lease. This is a work-around for the behavior that when
|
317
|
+
# timeout exception is hit, somehow, a infinite lease will be put on the lock file.
|
318
|
+
@azure_blob.break_blob_lease(@container, blob, { :break_period => 30 })
|
319
|
+
end
|
320
|
+
end
|
321
|
+
end #while
|
322
|
+
return lease
|
323
|
+
end # acquire_lease
|
324
|
+
|
325
|
+
# Return the next blob for reading as well as the start index.
|
326
|
+
def register_for_read
|
327
|
+
begin
|
328
|
+
all_blobs = list_all_blobs
|
329
|
+
registry = all_blobs.find { |item| item.name.downcase == @registry_path }
|
330
|
+
registry_locker = all_blobs.find { |item| item.name.downcase == @registry_locker }
|
331
|
+
|
332
|
+
candidate_blobs = all_blobs.select { |item| (item.name.downcase != @registry_path) && ( item.name.downcase != @registry_locker ) }
|
333
|
+
|
334
|
+
start_index = 0
|
335
|
+
gen = 0
|
336
|
+
lease = nil
|
337
|
+
|
338
|
+
# Put lease on locker file than the registy file to allow update of the registry as a workaround for Azure Storage Ruby SDK issue # 16.
|
339
|
+
# Workaround: https://github.com/Azure/azure-storage-ruby/issues/16
|
340
|
+
registry_locker = @azure_blob.create_block_blob(@container, @registry_locker, @reader) if registry_locker.nil?
|
341
|
+
lease = acquire_lease(@registry_locker)
|
342
|
+
# ~ Workaround
|
343
|
+
|
344
|
+
if(registry.nil?)
|
345
|
+
registry_hash = create_registry(candidate_blobs)
|
346
|
+
else
|
347
|
+
registry_hash = load_registry
|
348
|
+
end #if
|
349
|
+
|
350
|
+
picked_blobs = Set.new []
|
351
|
+
# Pick up the next candidate
|
352
|
+
picked_blob = nil
|
353
|
+
candidate_blobs.each { |candidate_blob|
|
354
|
+
@logger.debug("candidate_blob: #{candidate_blob.name} content length: #{candidate_blob.properties[:content_length]}")
|
355
|
+
registry_item = registry_hash[candidate_blob.name]
|
356
|
+
|
357
|
+
# Appending items that doesn't exist in the hash table
|
358
|
+
if registry_item.nil?
|
359
|
+
registry_item = LogStash::Inputs::RegistryItem.new(candidate_blob.name, candidate_blob.properties[:etag], nil, 0, 0)
|
360
|
+
registry_hash[candidate_blob.name] = registry_item
|
361
|
+
end # if
|
362
|
+
@logger.debug("registry_item offset: #{registry_item.offset}")
|
363
|
+
if ((registry_item.offset < candidate_blob.properties[:content_length]) && (registry_item.reader.nil? || registry_item.reader == @reader))
|
364
|
+
@logger.debug("candidate_blob picked: #{candidate_blob.name} content length: #{candidate_blob.properties[:content_length]}")
|
365
|
+
picked_blobs << candidate_blob
|
366
|
+
end
|
367
|
+
}
|
368
|
+
|
369
|
+
picked_blob = picked_blobs.min_by { |b| registry_hash[b.name].gen }
|
370
|
+
if !picked_blob.nil?
|
371
|
+
registry_item = registry_hash[picked_blob.name]
|
372
|
+
registry_item.reader = @reader
|
373
|
+
registry_hash[picked_blob.name] = registry_item
|
374
|
+
start_index = registry_item.offset
|
375
|
+
raise_gen(registry_hash, picked_blob.name)
|
376
|
+
gen = registry_item.gen
|
377
|
+
end #if
|
378
|
+
|
379
|
+
# Save the chnage for the registry
|
380
|
+
save_registry(registry_hash)
|
381
|
+
|
382
|
+
@azure_blob.release_blob_lease(@container, @registry_locker, lease)
|
383
|
+
lease = nil;
|
384
|
+
|
385
|
+
return picked_blob, start_index, gen
|
386
|
+
rescue StandardError => e
|
387
|
+
@logger.error("Oh My, An error occurred. #{e}: #{e.backtrace}", :exception => e)
|
388
|
+
return nil, nil, nil
|
389
|
+
ensure
|
390
|
+
@azure_blob.release_blob_lease(@container, @registry_locker, lease) unless lease.nil?
|
391
|
+
lease = nil
|
392
|
+
end # rescue
|
393
|
+
end #register_for_read
|
394
|
+
|
395
|
+
# Update the registry
|
396
|
+
def update_registry (registry_item)
|
397
|
+
begin
|
398
|
+
lease = nil
|
399
|
+
lease = acquire_lease(@registry_locker)
|
400
|
+
registry_hash = load_registry
|
401
|
+
registry_hash[registry_item.file_path] = registry_item
|
402
|
+
save_registry(registry_hash)
|
403
|
+
@azure_blob.release_blob_lease(@container, @registry_locker, lease)
|
404
|
+
lease = nil
|
405
|
+
rescue StandardError => e
|
406
|
+
@logger.error("Oh My, An error occurred. #{e}:\n#{e.backtrace}", :exception => e)
|
407
|
+
ensure
|
408
|
+
@azure_blob.release_blob_lease(@container, @registry_locker, lease) unless lease.nil?
|
409
|
+
lease = nil
|
410
|
+
end #rescue
|
411
|
+
end # def update_registry
|
412
|
+
|
413
|
+
# Clean up the registry.
|
414
|
+
def cleanup_registry
|
415
|
+
begin
|
416
|
+
lease = nil
|
417
|
+
lease = acquire_lease(@registry_locker)
|
418
|
+
registry_hash = load_registry
|
419
|
+
registry_hash.each { | key, registry_item|
|
420
|
+
registry_item.reader = nil if registry_item.reader == @reader
|
421
|
+
}
|
422
|
+
save_registry(registry_hash)
|
423
|
+
@azure_blob.release_blob_lease(@container, @registry_locker, lease)
|
424
|
+
lease = nil
|
425
|
+
rescue StandardError => e
|
426
|
+
@logger.error("Oh My, An error occurred. #{e}:\n#{e.backtrace}", :exception => e)
|
427
|
+
ensure
|
428
|
+
@azure_blob.release_blob_lease(@container, @registry_locker, lease) unless lease.nil?
|
429
|
+
lease = nil
|
430
|
+
end #rescue
|
431
|
+
end # def cleanup_registry
|
432
|
+
|
433
|
+
# Create a registry file to coordinate between multiple azure blob inputs.
|
434
|
+
def create_registry (blob_items)
|
435
|
+
registry_hash = Hash.new
|
436
|
+
|
437
|
+
blob_items.each do |blob_item|
|
438
|
+
initial_offset = 0
|
439
|
+
initial_offset = blob_item.properties[:content_length] if @registry_create_policy == 'resume'
|
440
|
+
registry_item = LogStash::Inputs::RegistryItem.new(blob_item.name, blob_item.properties[:etag], nil, initial_offset, 0)
|
441
|
+
registry_hash[blob_item.name] = registry_item
|
442
|
+
end # each
|
443
|
+
save_registry(registry_hash)
|
444
|
+
return registry_hash
|
445
|
+
end # create_registry
|
446
|
+
|
447
|
+
# Load the content of the registry into the registry hash and return it.
|
448
|
+
def load_registry
|
449
|
+
# Get content
|
450
|
+
registry_blob, registry_blob_body = @azure_blob.get_blob(@container, @registry_path)
|
451
|
+
registry_hash = deserialize_registry_hash(registry_blob_body)
|
452
|
+
return registry_hash
|
453
|
+
end # def load_registry
|
454
|
+
|
455
|
+
# Serialize the registry hash and save it.
|
456
|
+
def save_registry(registry_hash)
|
457
|
+
# Serialize hash to json
|
458
|
+
registry_hash_json = JSON.generate(registry_hash)
|
459
|
+
|
460
|
+
# Upload registry to blob
|
461
|
+
@azure_blob.create_block_blob(@container, @registry_path, registry_hash_json)
|
462
|
+
end # def save_registry
|
463
|
+
end # class LogStash::Inputs::LogstashInputAzureblob
|
464
|
+
|
465
|
+
class BlobReader < LinearReader
|
466
|
+
def initialize(logger, azure_blob, container, blob_name, chunk_size, blob_start_index, blob_end_index)
|
467
|
+
@logger = logger
|
468
|
+
@azure_blob = azure_blob
|
469
|
+
@container = container
|
470
|
+
@blob_name = blob_name
|
471
|
+
@blob_start_index = blob_start_index
|
472
|
+
@blob_end_index = blob_end_index
|
473
|
+
@chunk_size = chunk_size
|
474
|
+
end
|
475
|
+
|
476
|
+
def read
|
477
|
+
if @blob_end_index < @blob_start_index
|
478
|
+
return nil, false
|
479
|
+
end
|
480
|
+
|
481
|
+
are_more_bytes_available = false
|
482
|
+
|
483
|
+
if @blob_end_index >= @blob_start_index + @chunk_size
|
484
|
+
end_index = @blob_start_index + @chunk_size - 1
|
485
|
+
are_more_bytes_available = true
|
486
|
+
else
|
487
|
+
end_index = @blob_end_index
|
488
|
+
end
|
489
|
+
content = read_from_blob(@blob_start_index, end_index)
|
490
|
+
|
491
|
+
@blob_start_index = end_index + 1
|
492
|
+
return content, are_more_bytes_available
|
493
|
+
end
|
494
|
+
|
495
|
+
private
|
496
|
+
def read_from_blob(start_index, end_index)
|
497
|
+
blob, content = @azure_blob.get_blob(@container, @blob_name, {:start_range => start_index, :end_range => end_index } )
|
498
|
+
return content
|
499
|
+
end
|
500
|
+
end #class BlobReader
|