logstash-input-azureblob 0.9.12-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +7 -0
- data/Gemfile +2 -0
- data/LICENSE +17 -0
- data/README.md +253 -0
- data/lib/com/microsoft/json-parser.rb +202 -0
- data/lib/logstash-input-azureblob_jars.rb +10 -0
- data/lib/logstash/inputs/azureblob.rb +500 -0
- data/lib/org/glassfish/javax.json/1.1/javax.json-1.1.jar +0 -0
- data/logstash-input-azureblob.gemspec +32 -0
- data/spec/com/microsoft/json-parser_spec.rb +280 -0
- data/spec/inputs/azureblob_spec.rb +324 -0
- metadata +165 -0
@@ -0,0 +1,10 @@
|
|
1
|
+
# this is a generated file, to avoid over-writing it just delete this comment
|
2
|
+
begin
|
3
|
+
require 'jar_dependencies'
|
4
|
+
rescue LoadError
|
5
|
+
require 'org/glassfish/javax.json/1.1/javax.json-1.1.jar'
|
6
|
+
end
|
7
|
+
|
8
|
+
if defined? Jars
|
9
|
+
require_jar( 'org.glassfish', 'javax.json', '1.1' )
|
10
|
+
end
|
@@ -0,0 +1,500 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/inputs/base"
|
3
|
+
require "logstash/namespace"
|
4
|
+
|
5
|
+
# Azure Storage SDK for Ruby
|
6
|
+
require "azure/storage"
|
7
|
+
require 'json' # for registry content
|
8
|
+
require "securerandom" # for generating uuid.
|
9
|
+
|
10
|
+
require "com/microsoft/json-parser"
|
11
|
+
|
12
|
+
#require Dir[ File.dirname(__FILE__) + "/../../*_jars.rb" ].first
|
13
|
+
# Registry item to coordinate between mulitple clients
|
14
|
+
class LogStash::Inputs::RegistryItem
|
15
|
+
attr_accessor :file_path, :etag, :offset, :reader, :gen
|
16
|
+
# Allow json serialization.
|
17
|
+
def as_json(options={})
|
18
|
+
{
|
19
|
+
file_path: @file_path,
|
20
|
+
etag: @etag,
|
21
|
+
reader: @reader,
|
22
|
+
offset: @offset,
|
23
|
+
gen: @gen
|
24
|
+
}
|
25
|
+
end # as_json
|
26
|
+
|
27
|
+
def to_json(*options)
|
28
|
+
as_json(*options).to_json(*options)
|
29
|
+
end # to_json
|
30
|
+
|
31
|
+
def initialize(file_path, etag, reader, offset = 0, gen = 0)
|
32
|
+
@file_path = file_path
|
33
|
+
@etag = etag
|
34
|
+
@reader = reader
|
35
|
+
@offset = offset
|
36
|
+
@gen = gen
|
37
|
+
end # initialize
|
38
|
+
end # class RegistryItem
|
39
|
+
|
40
|
+
|
41
|
+
# Logstash input plugin for Azure Blobs
|
42
|
+
#
|
43
|
+
# This logstash plugin gathers data from Microsoft Azure Blobs
|
44
|
+
class LogStash::Inputs::LogstashInputAzureblob < LogStash::Inputs::Base
|
45
|
+
config_name "azureblob"
|
46
|
+
|
47
|
+
# If undefined, Logstash will complain, even if codec is unused.
|
48
|
+
default :codec, "json_lines"
|
49
|
+
|
50
|
+
# Set the account name for the azure storage account.
|
51
|
+
config :storage_account_name, :validate => :string
|
52
|
+
|
53
|
+
# Set the key to access the storage account.
|
54
|
+
config :storage_access_key, :validate => :string
|
55
|
+
|
56
|
+
# Set the container of the blobs.
|
57
|
+
config :container, :validate => :string
|
58
|
+
|
59
|
+
# Set the endpoint for the blobs.
|
60
|
+
#
|
61
|
+
# The default, `core.windows.net` targets the public azure.
|
62
|
+
config :endpoint, :validate => :string, :default => 'core.windows.net'
|
63
|
+
|
64
|
+
# Set the value of using backup mode.
|
65
|
+
config :backupmode, :validate => :boolean, :default => false, :deprecated => true, :obsolete => 'This option is obsoleted and the settings will be ignored.'
|
66
|
+
|
67
|
+
# Set the value for the registry file.
|
68
|
+
#
|
69
|
+
# The default, `data/registry`, is used to coordinate readings for various instances of the clients.
|
70
|
+
config :registry_path, :validate => :string, :default => 'data/registry'
|
71
|
+
|
72
|
+
# Sets the value for registry file lock duration in seconds. It must be set to -1, or between 15 to 60 inclusively.
|
73
|
+
#
|
74
|
+
# The default, `15` means the registry file will be locked for at most 15 seconds. This should usually be sufficient to
|
75
|
+
# read the content of registry. Having this configuration here to allow lease expired in case the client crashed that
|
76
|
+
# never got a chance to release the lease for the registry.
|
77
|
+
config :registry_lease_duration, :validate => :number, :default => 15
|
78
|
+
|
79
|
+
# Set how many seconds to keep idle before checking for new logs.
|
80
|
+
#
|
81
|
+
# The default, `30`, means trigger a reading for the log every 30 seconds after entering idle.
|
82
|
+
config :interval, :validate => :number, :default => 30
|
83
|
+
|
84
|
+
# Set the registry create mode
|
85
|
+
#
|
86
|
+
# The default, `resume`, means when the registry is initially created, it assumes all logs has been handled.
|
87
|
+
# When set to `start_over`, it will read all log files from begining.
|
88
|
+
config :registry_create_policy, :validate => :string, :default => 'resume'
|
89
|
+
|
90
|
+
# Sets the header of the file that does not repeat over records. Usually, these are json opening tags.
|
91
|
+
config :file_head_bytes, :validate => :number, :default => 0
|
92
|
+
|
93
|
+
# Sets the tail of the file that does not repeat over records. Usually, these are json closing tags.
|
94
|
+
config :file_tail_bytes, :validate => :number, :default => 0
|
95
|
+
|
96
|
+
# Sets how to break json
|
97
|
+
#
|
98
|
+
# Only works when the codec is set to `json`. Sets the policy to break the json object in the array into small events.
|
99
|
+
# Break json into small sections will not be as efficient as keep it as a whole, but will reduce the usage of
|
100
|
+
# the memory.
|
101
|
+
# Possible options: `do_not_break`, `with_head_tail`, `without_head_tail`
|
102
|
+
config :break_json_down_policy, :validate => :string, :default => 'do_not_break', :obsolete => 'This option is obsoleted and the settings will be ignored.'
|
103
|
+
|
104
|
+
# Sets when break json happens, how many json object will be put in 1 batch
|
105
|
+
config :break_json_batch_count, :validate => :number, :default => 10, :obsolete => 'This option is obsoleted and the settings will be ignored.'
|
106
|
+
|
107
|
+
# Sets the page-size for returned blob items. Too big number will hit heap overflow; Too small number will leads to too many requests.
|
108
|
+
#
|
109
|
+
# The default, `100` is good for default heap size of 1G.
|
110
|
+
config :blob_list_page_size, :validate => :number, :default => 100
|
111
|
+
|
112
|
+
# The default is 4 MB
|
113
|
+
config :file_chunk_size_bytes, :validate => :number, :default => 4 * 1024 * 1024
|
114
|
+
|
115
|
+
# Constant of max integer
|
116
|
+
MAX = 2 ** ([42].pack('i').size * 16 - 2 ) -1
|
117
|
+
|
118
|
+
# Update the registry offset each time after this number of entries have been processed
|
119
|
+
UPDATE_REGISTRY_COUNT = 100
|
120
|
+
|
121
|
+
public
|
122
|
+
def register
|
123
|
+
user_agent = "logstash-input-azureblob"
|
124
|
+
user_agent << "/" << Gem.latest_spec_for("logstash-input-azureblob").version.to_s
|
125
|
+
|
126
|
+
# this is the reader # for this specific instance.
|
127
|
+
@reader = SecureRandom.uuid
|
128
|
+
@registry_locker = "#{@registry_path}.lock"
|
129
|
+
|
130
|
+
# Setup a specific instance of an Azure::Storage::Client
|
131
|
+
client = Azure::Storage::Client.create(:storage_account_name => @storage_account_name, :storage_access_key => @storage_access_key, :storage_blob_host => "https://#{@storage_account_name}.blob.#{@endpoint}", :user_agent_prefix => user_agent)
|
132
|
+
# Get an azure storage blob service object from a specific instance of an Azure::Storage::Client
|
133
|
+
@azure_blob = client.blob_client
|
134
|
+
# Add retry filter to the service object
|
135
|
+
@azure_blob.with_filter(Azure::Storage::Core::Filter::ExponentialRetryPolicyFilter.new)
|
136
|
+
end # def register
|
137
|
+
|
138
|
+
def run(queue)
|
139
|
+
# we can abort the loop if stop? becomes true
|
140
|
+
while !stop?
|
141
|
+
process(queue)
|
142
|
+
@logger.debug("Hitting interval of #{@interval}ms . . .")
|
143
|
+
Stud.stoppable_sleep(@interval) { stop? }
|
144
|
+
end # loop
|
145
|
+
end # def run
|
146
|
+
|
147
|
+
def stop
|
148
|
+
cleanup_registry
|
149
|
+
end # def stop
|
150
|
+
|
151
|
+
# Start processing the next item.
|
152
|
+
def process(queue)
|
153
|
+
begin
|
154
|
+
@processed_entries = 0
|
155
|
+
blob, start_index, gen = register_for_read
|
156
|
+
|
157
|
+
if(!blob.nil?)
|
158
|
+
begin
|
159
|
+
blob_name = blob.name
|
160
|
+
@logger.debug("Processing blob #{blob.name}")
|
161
|
+
blob_size = blob.properties[:content_length]
|
162
|
+
# Work-around: After returned by get_blob, the etag will contains quotes.
|
163
|
+
new_etag = blob.properties[:etag]
|
164
|
+
# ~ Work-around
|
165
|
+
|
166
|
+
blob, header = @azure_blob.get_blob(@container, blob_name, {:end_range => (@file_head_bytes-1) }) if header.nil? unless @file_head_bytes.nil? or @file_head_bytes <= 0
|
167
|
+
|
168
|
+
blob, tail = @azure_blob.get_blob(@container, blob_name, {:start_range => blob_size - @file_tail_bytes}) if tail.nil? unless @file_tail_bytes.nil? or @file_tail_bytes <= 0
|
169
|
+
|
170
|
+
if start_index == 0
|
171
|
+
# Skip the header since it is already read.
|
172
|
+
start_index = @file_head_bytes
|
173
|
+
end
|
174
|
+
|
175
|
+
@logger.debug("start index: #{start_index} blob size: #{blob_size}")
|
176
|
+
|
177
|
+
content_length = 0
|
178
|
+
blob_reader = BlobReader.new(@logger, @azure_blob, @container, blob_name, file_chunk_size_bytes, start_index, blob_size - 1 - @file_tail_bytes)
|
179
|
+
|
180
|
+
is_json_codec = (defined?(LogStash::Codecs::JSON) == 'constant') && (@codec.is_a? LogStash::Codecs::JSON)
|
181
|
+
if is_json_codec
|
182
|
+
parser = JsonParser.new(@logger, blob_reader)
|
183
|
+
|
184
|
+
parser.parse(->(json_content) {
|
185
|
+
content_length = content_length + json_content.length
|
186
|
+
|
187
|
+
enqueue_content(queue, json_content, header, tail)
|
188
|
+
|
189
|
+
on_entry_processed(start_index, content_length, blob_name, new_etag, gen)
|
190
|
+
}, ->(malformed_json) {
|
191
|
+
@logger.debug("Skipping #{malformed_json.length} malformed bytes")
|
192
|
+
content_length = content_length + malformed_json.length
|
193
|
+
|
194
|
+
on_entry_processed(start_index, content_length, blob_name, new_etag, gen)
|
195
|
+
})
|
196
|
+
else
|
197
|
+
begin
|
198
|
+
content, are_more_bytes_available = blob_reader.read
|
199
|
+
|
200
|
+
content_length = content_length + content.length
|
201
|
+
enqueue_content(queue, content, header, tail)
|
202
|
+
|
203
|
+
on_entry_processed(start_index, content_length, blob_name, new_etag, gen)
|
204
|
+
end until !are_more_bytes_available || content.nil?
|
205
|
+
|
206
|
+
end #if
|
207
|
+
ensure
|
208
|
+
# Making sure the reader is removed from the registry even when there's exception.
|
209
|
+
request_registry_update(start_index, content_length, blob_name, new_etag, gen)
|
210
|
+
end # begin
|
211
|
+
end # if
|
212
|
+
rescue => e
|
213
|
+
@logger.error("Oh My, An error occurred. Error:#{e}: Trace: #{e.backtrace}", :exception => e)
|
214
|
+
end # begin
|
215
|
+
end # process
|
216
|
+
|
217
|
+
def enqueue_content(queue, content, header, tail)
|
218
|
+
if (header.nil? || header.length == 0) && (tail.nil? || tail.length == 0)
|
219
|
+
#skip some unnecessary copying
|
220
|
+
full_content = content
|
221
|
+
else
|
222
|
+
full_content = ""
|
223
|
+
full_content << header unless header.nil? || header.length == 0
|
224
|
+
full_content << content
|
225
|
+
full_content << tail unless tail.nil? || tail.length == 0
|
226
|
+
end
|
227
|
+
|
228
|
+
@codec.decode(full_content) do |event|
|
229
|
+
decorate(event)
|
230
|
+
queue << event
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
def on_entry_processed(start_index, content_length, blob_name, new_etag, gen)
|
235
|
+
@processed_entries = @processed_entries + 1
|
236
|
+
if @processed_entries % UPDATE_REGISTRY_COUNT == 0
|
237
|
+
request_registry_update(start_index, content_length, blob_name, new_etag, gen)
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
241
|
+
def request_registry_update(start_index, content_length, blob_name, new_etag, gen)
|
242
|
+
new_offset = start_index
|
243
|
+
new_offset = new_offset + content_length unless content_length.nil?
|
244
|
+
@logger.debug("New registry offset: #{new_offset}")
|
245
|
+
new_registry_item = LogStash::Inputs::RegistryItem.new(blob_name, new_etag, nil, new_offset, gen)
|
246
|
+
update_registry(new_registry_item)
|
247
|
+
end
|
248
|
+
|
249
|
+
# Deserialize registry hash from json string.
|
250
|
+
def deserialize_registry_hash (json_string)
|
251
|
+
result = Hash.new
|
252
|
+
temp_hash = JSON.parse(json_string)
|
253
|
+
temp_hash.values.each { |kvp|
|
254
|
+
result[kvp['file_path']] = LogStash::Inputs::RegistryItem.new(kvp['file_path'], kvp['etag'], kvp['reader'], kvp['offset'], kvp['gen'])
|
255
|
+
}
|
256
|
+
return result
|
257
|
+
end #deserialize_registry_hash
|
258
|
+
|
259
|
+
# List all the blobs in the given container.
|
260
|
+
def list_all_blobs
|
261
|
+
blobs = Set.new []
|
262
|
+
continuation_token = NIL
|
263
|
+
@blob_list_page_size = 100 if @blob_list_page_size <= 0
|
264
|
+
loop do
|
265
|
+
# Need to limit the returned number of the returned entries to avoid out of memory exception.
|
266
|
+
entries = @azure_blob.list_blobs(@container, { :timeout => 60, :marker => continuation_token, :max_results => @blob_list_page_size })
|
267
|
+
entries.each do |entry|
|
268
|
+
blobs << entry
|
269
|
+
end # each
|
270
|
+
continuation_token = entries.continuation_token
|
271
|
+
break if continuation_token.empty?
|
272
|
+
end # loop
|
273
|
+
return blobs
|
274
|
+
end # def list_blobs
|
275
|
+
|
276
|
+
# Raise generation for blob in registry
|
277
|
+
def raise_gen(registry_hash, file_path)
|
278
|
+
begin
|
279
|
+
target_item = registry_hash[file_path]
|
280
|
+
begin
|
281
|
+
target_item.gen += 1
|
282
|
+
# Protect gen from overflow.
|
283
|
+
target_item.gen = target_item.gen / 2 if target_item.gen == MAX
|
284
|
+
rescue StandardError => e
|
285
|
+
@logger.error("Fail to get the next generation for target item #{target_item}.", :exception => e)
|
286
|
+
target_item.gen = 0
|
287
|
+
end
|
288
|
+
|
289
|
+
min_gen_item = registry_hash.values.min_by { |x| x.gen }
|
290
|
+
while min_gen_item.gen > 0
|
291
|
+
registry_hash.values.each { |value|
|
292
|
+
value.gen -= 1
|
293
|
+
}
|
294
|
+
min_gen_item = registry_hash.values.min_by { |x| x.gen }
|
295
|
+
end
|
296
|
+
end
|
297
|
+
end # raise_gen
|
298
|
+
|
299
|
+
# Acquire a lease on a blob item with retries.
|
300
|
+
#
|
301
|
+
# By default, it will retry 60 times with 1 second interval.
|
302
|
+
def acquire_lease(blob_name, retry_times = 60, interval_sec = 1)
|
303
|
+
lease = nil;
|
304
|
+
retried = 0;
|
305
|
+
while lease.nil? do
|
306
|
+
begin
|
307
|
+
lease = @azure_blob.acquire_blob_lease(@container, blob_name, { :timeout => 60, :duration => @registry_lease_duration })
|
308
|
+
rescue StandardError => e
|
309
|
+
if(e.type && e.type == 'LeaseAlreadyPresent')
|
310
|
+
if (retried > retry_times)
|
311
|
+
raise
|
312
|
+
end
|
313
|
+
retried += 1
|
314
|
+
sleep interval_sec
|
315
|
+
else
|
316
|
+
# Anything else happend other than 'LeaseAlreadyPresent', break the lease. This is a work-around for the behavior that when
|
317
|
+
# timeout exception is hit, somehow, a infinite lease will be put on the lock file.
|
318
|
+
@azure_blob.break_blob_lease(@container, blob, { :break_period => 30 })
|
319
|
+
end
|
320
|
+
end
|
321
|
+
end #while
|
322
|
+
return lease
|
323
|
+
end # acquire_lease
|
324
|
+
|
325
|
+
# Return the next blob for reading as well as the start index.
|
326
|
+
def register_for_read
|
327
|
+
begin
|
328
|
+
all_blobs = list_all_blobs
|
329
|
+
registry = all_blobs.find { |item| item.name.downcase == @registry_path }
|
330
|
+
registry_locker = all_blobs.find { |item| item.name.downcase == @registry_locker }
|
331
|
+
|
332
|
+
candidate_blobs = all_blobs.select { |item| (item.name.downcase != @registry_path) && ( item.name.downcase != @registry_locker ) }
|
333
|
+
|
334
|
+
start_index = 0
|
335
|
+
gen = 0
|
336
|
+
lease = nil
|
337
|
+
|
338
|
+
# Put lease on locker file than the registy file to allow update of the registry as a workaround for Azure Storage Ruby SDK issue # 16.
|
339
|
+
# Workaround: https://github.com/Azure/azure-storage-ruby/issues/16
|
340
|
+
registry_locker = @azure_blob.create_block_blob(@container, @registry_locker, @reader) if registry_locker.nil?
|
341
|
+
lease = acquire_lease(@registry_locker)
|
342
|
+
# ~ Workaround
|
343
|
+
|
344
|
+
if(registry.nil?)
|
345
|
+
registry_hash = create_registry(candidate_blobs)
|
346
|
+
else
|
347
|
+
registry_hash = load_registry
|
348
|
+
end #if
|
349
|
+
|
350
|
+
picked_blobs = Set.new []
|
351
|
+
# Pick up the next candidate
|
352
|
+
picked_blob = nil
|
353
|
+
candidate_blobs.each { |candidate_blob|
|
354
|
+
@logger.debug("candidate_blob: #{candidate_blob.name} content length: #{candidate_blob.properties[:content_length]}")
|
355
|
+
registry_item = registry_hash[candidate_blob.name]
|
356
|
+
|
357
|
+
# Appending items that doesn't exist in the hash table
|
358
|
+
if registry_item.nil?
|
359
|
+
registry_item = LogStash::Inputs::RegistryItem.new(candidate_blob.name, candidate_blob.properties[:etag], nil, 0, 0)
|
360
|
+
registry_hash[candidate_blob.name] = registry_item
|
361
|
+
end # if
|
362
|
+
@logger.debug("registry_item offset: #{registry_item.offset}")
|
363
|
+
if ((registry_item.offset < candidate_blob.properties[:content_length]) && (registry_item.reader.nil? || registry_item.reader == @reader))
|
364
|
+
@logger.debug("candidate_blob picked: #{candidate_blob.name} content length: #{candidate_blob.properties[:content_length]}")
|
365
|
+
picked_blobs << candidate_blob
|
366
|
+
end
|
367
|
+
}
|
368
|
+
|
369
|
+
picked_blob = picked_blobs.min_by { |b| registry_hash[b.name].gen }
|
370
|
+
if !picked_blob.nil?
|
371
|
+
registry_item = registry_hash[picked_blob.name]
|
372
|
+
registry_item.reader = @reader
|
373
|
+
registry_hash[picked_blob.name] = registry_item
|
374
|
+
start_index = registry_item.offset
|
375
|
+
raise_gen(registry_hash, picked_blob.name)
|
376
|
+
gen = registry_item.gen
|
377
|
+
end #if
|
378
|
+
|
379
|
+
# Save the chnage for the registry
|
380
|
+
save_registry(registry_hash)
|
381
|
+
|
382
|
+
@azure_blob.release_blob_lease(@container, @registry_locker, lease)
|
383
|
+
lease = nil;
|
384
|
+
|
385
|
+
return picked_blob, start_index, gen
|
386
|
+
rescue StandardError => e
|
387
|
+
@logger.error("Oh My, An error occurred. #{e}: #{e.backtrace}", :exception => e)
|
388
|
+
return nil, nil, nil
|
389
|
+
ensure
|
390
|
+
@azure_blob.release_blob_lease(@container, @registry_locker, lease) unless lease.nil?
|
391
|
+
lease = nil
|
392
|
+
end # rescue
|
393
|
+
end #register_for_read
|
394
|
+
|
395
|
+
# Update the registry
|
396
|
+
def update_registry (registry_item)
|
397
|
+
begin
|
398
|
+
lease = nil
|
399
|
+
lease = acquire_lease(@registry_locker)
|
400
|
+
registry_hash = load_registry
|
401
|
+
registry_hash[registry_item.file_path] = registry_item
|
402
|
+
save_registry(registry_hash)
|
403
|
+
@azure_blob.release_blob_lease(@container, @registry_locker, lease)
|
404
|
+
lease = nil
|
405
|
+
rescue StandardError => e
|
406
|
+
@logger.error("Oh My, An error occurred. #{e}:\n#{e.backtrace}", :exception => e)
|
407
|
+
ensure
|
408
|
+
@azure_blob.release_blob_lease(@container, @registry_locker, lease) unless lease.nil?
|
409
|
+
lease = nil
|
410
|
+
end #rescue
|
411
|
+
end # def update_registry
|
412
|
+
|
413
|
+
# Clean up the registry.
|
414
|
+
def cleanup_registry
|
415
|
+
begin
|
416
|
+
lease = nil
|
417
|
+
lease = acquire_lease(@registry_locker)
|
418
|
+
registry_hash = load_registry
|
419
|
+
registry_hash.each { | key, registry_item|
|
420
|
+
registry_item.reader = nil if registry_item.reader == @reader
|
421
|
+
}
|
422
|
+
save_registry(registry_hash)
|
423
|
+
@azure_blob.release_blob_lease(@container, @registry_locker, lease)
|
424
|
+
lease = nil
|
425
|
+
rescue StandardError => e
|
426
|
+
@logger.error("Oh My, An error occurred. #{e}:\n#{e.backtrace}", :exception => e)
|
427
|
+
ensure
|
428
|
+
@azure_blob.release_blob_lease(@container, @registry_locker, lease) unless lease.nil?
|
429
|
+
lease = nil
|
430
|
+
end #rescue
|
431
|
+
end # def cleanup_registry
|
432
|
+
|
433
|
+
# Create a registry file to coordinate between multiple azure blob inputs.
|
434
|
+
def create_registry (blob_items)
|
435
|
+
registry_hash = Hash.new
|
436
|
+
|
437
|
+
blob_items.each do |blob_item|
|
438
|
+
initial_offset = 0
|
439
|
+
initial_offset = blob_item.properties[:content_length] if @registry_create_policy == 'resume'
|
440
|
+
registry_item = LogStash::Inputs::RegistryItem.new(blob_item.name, blob_item.properties[:etag], nil, initial_offset, 0)
|
441
|
+
registry_hash[blob_item.name] = registry_item
|
442
|
+
end # each
|
443
|
+
save_registry(registry_hash)
|
444
|
+
return registry_hash
|
445
|
+
end # create_registry
|
446
|
+
|
447
|
+
# Load the content of the registry into the registry hash and return it.
|
448
|
+
def load_registry
|
449
|
+
# Get content
|
450
|
+
registry_blob, registry_blob_body = @azure_blob.get_blob(@container, @registry_path)
|
451
|
+
registry_hash = deserialize_registry_hash(registry_blob_body)
|
452
|
+
return registry_hash
|
453
|
+
end # def load_registry
|
454
|
+
|
455
|
+
# Serialize the registry hash and save it.
|
456
|
+
def save_registry(registry_hash)
|
457
|
+
# Serialize hash to json
|
458
|
+
registry_hash_json = JSON.generate(registry_hash)
|
459
|
+
|
460
|
+
# Upload registry to blob
|
461
|
+
@azure_blob.create_block_blob(@container, @registry_path, registry_hash_json)
|
462
|
+
end # def save_registry
|
463
|
+
end # class LogStash::Inputs::LogstashInputAzureblob
|
464
|
+
|
465
|
+
class BlobReader < LinearReader
|
466
|
+
def initialize(logger, azure_blob, container, blob_name, chunk_size, blob_start_index, blob_end_index)
|
467
|
+
@logger = logger
|
468
|
+
@azure_blob = azure_blob
|
469
|
+
@container = container
|
470
|
+
@blob_name = blob_name
|
471
|
+
@blob_start_index = blob_start_index
|
472
|
+
@blob_end_index = blob_end_index
|
473
|
+
@chunk_size = chunk_size
|
474
|
+
end
|
475
|
+
|
476
|
+
def read
|
477
|
+
if @blob_end_index < @blob_start_index
|
478
|
+
return nil, false
|
479
|
+
end
|
480
|
+
|
481
|
+
are_more_bytes_available = false
|
482
|
+
|
483
|
+
if @blob_end_index >= @blob_start_index + @chunk_size
|
484
|
+
end_index = @blob_start_index + @chunk_size - 1
|
485
|
+
are_more_bytes_available = true
|
486
|
+
else
|
487
|
+
end_index = @blob_end_index
|
488
|
+
end
|
489
|
+
content = read_from_blob(@blob_start_index, end_index)
|
490
|
+
|
491
|
+
@blob_start_index = end_index + 1
|
492
|
+
return content, are_more_bytes_available
|
493
|
+
end
|
494
|
+
|
495
|
+
private
|
496
|
+
def read_from_blob(start_index, end_index)
|
497
|
+
blob, content = @azure_blob.get_blob(@container, @blob_name, {:start_range => start_index, :end_range => end_index } )
|
498
|
+
return content
|
499
|
+
end
|
500
|
+
end #class BlobReader
|