logstash-input-azure_blob_storage 0.12.2 → 0.12.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -1
- data/lib/logstash/inputs/azure_blob_storage.rb +47 -38
- data/logstash-input-azure_blob_storage.gemspec +3 -3
- data/spec/inputs/azure_blob_storage_spec.rb +4 -0
- data/spec/inputs/test.rb +11 -0
- metadata +20 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: da5b56c9fb4733d62604b7753d2e08e3d24100915f58d4f374ab6b6573d27408
|
4
|
+
data.tar.gz: 26f5a19de67615eeb27d302e0b0768fc0b3199f33808182b32a3aeed422b2599
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 37f134a2ef4cb0cae2a24e38a3299687f56bc2bf7d98f56976d2bba605b356caec39022c33d18fb58d51933eaa0a3d747c0c4cfacfbe4c0ce6c029b0ff6ddd78
|
7
|
+
data.tar.gz: abd3a6803577db03c42eebf69795eded5e948e48a38363fa81b566c2ecc494d9313d07e280569ab424c561a91c6a08a00d1cfa6bc7674fd84b137e08b4af75ef
|
data/CHANGELOG.md
CHANGED
@@ -1,4 +1,9 @@
|
|
1
|
-
|
1
|
+
## 0.12.3
|
2
|
+
- Fixed repetative processing
|
3
|
+
- Replaced newreg with registry cleanup, using newreg to replace old registry worked in 0.11, but not in .12
|
4
|
+
- Implemented Mutex for the save_registry thread. Also marshal @registry before thread start for better thread safety
|
5
|
+
|
6
|
+
## 0.12.2
|
2
7
|
- Fixed the exception handling, not trying to print how many JSON fields there are while catching the exception
|
3
8
|
|
4
9
|
## 0.12.1
|
@@ -96,10 +96,6 @@ config :prefix, :validate => :string, :required => false
|
|
96
96
|
|
97
97
|
config :path_filters, :validate => :array, :default => ['**/*'], :required => false
|
98
98
|
|
99
|
-
# TODO: Other feature requests
|
100
|
-
# show file path in logger
|
101
|
-
# add filepath as part of log message
|
102
|
-
# option to keep registry on local disk
|
103
99
|
|
104
100
|
|
105
101
|
public
|
@@ -107,6 +103,7 @@ def register
|
|
107
103
|
@pipe_id = Thread.current[:name].split("[").last.split("]").first
|
108
104
|
@logger.info("=== #{config_name} #{Gem.loaded_specs["logstash-input-"+config_name].version.to_s} / #{@pipe_id} / #{@id[0,6]} / ruby #{ RUBY_VERSION }p#{ RUBY_PATCHLEVEL } ===")
|
109
105
|
@logger.info("If this plugin doesn't work, please raise an issue in https://github.com/janmg/logstash-input-azure_blob_storage")
|
106
|
+
@busy_writing_registry = Mutex.new
|
110
107
|
# TODO: consider multiple readers, so add pipeline @id or use logstash-to-logstash communication?
|
111
108
|
# TODO: Implement retry ... Error: Connection refused - Failed to open TCP connection to
|
112
109
|
end
|
@@ -152,7 +149,7 @@ def run(queue)
|
|
152
149
|
# read filelist and set offsets to file length to mark all the old files as done
|
153
150
|
if registry_create_policy == "start_fresh"
|
154
151
|
@registry = list_blobs(true)
|
155
|
-
save_registry(
|
152
|
+
save_registry()
|
156
153
|
@logger.info("starting fresh, writing a clean registry to contain #{@registry.size} blobs/files")
|
157
154
|
end
|
158
155
|
|
@@ -178,7 +175,6 @@ def run(queue)
|
|
178
175
|
@logger.info("head will be: #{@head} and tail is set to #{@tail}")
|
179
176
|
end
|
180
177
|
|
181
|
-
newreg = Hash.new
|
182
178
|
filelist = Hash.new
|
183
179
|
worklist = Hash.new
|
184
180
|
@last = start = Time.now.to_i
|
@@ -197,7 +193,6 @@ def run(queue)
|
|
197
193
|
#filelist.sort_by(|k,v|resource(k)[:date])
|
198
194
|
worklist.clear
|
199
195
|
filelist.clear
|
200
|
-
newreg.clear
|
201
196
|
|
202
197
|
# Listing all the files
|
203
198
|
filelist = list_blobs(false)
|
@@ -208,16 +203,24 @@ def run(queue)
|
|
208
203
|
rescue
|
209
204
|
off = 0
|
210
205
|
end
|
211
|
-
|
206
|
+
@registry.store(name, { :offset => off, :length => file[:length] })
|
212
207
|
if (@debug_until > @processed) then @logger.info("2: adding offsets: #{name} #{off} #{file[:length]}") end
|
213
208
|
end
|
214
209
|
# size nilClass when the list doesn't grow?!
|
210
|
+
|
211
|
+
# clean registry of files that are not in the filelist
|
212
|
+
@registry.each do |name,file|
|
213
|
+
unless filelist.include?(name)
|
214
|
+
@registry.delete(name)
|
215
|
+
if (@debug_until > @processed) then @logger.info("purging #{name}") end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
215
219
|
# Worklist is the subset of files where the already read offset is smaller than the file size
|
216
|
-
@registry = newreg
|
217
220
|
worklist.clear
|
218
221
|
chunk = nil
|
219
222
|
|
220
|
-
worklist =
|
223
|
+
worklist = @registry.select {|name,file| file[:offset] < file[:length]}
|
221
224
|
if (worklist.size > 4) then @logger.info("worklist contains #{worklist.size} blobs") end
|
222
225
|
|
223
226
|
# Start of processing
|
@@ -236,7 +239,8 @@ def run(queue)
|
|
236
239
|
chunk = full_read(name)
|
237
240
|
size=chunk.size
|
238
241
|
rescue Exception => e
|
239
|
-
|
242
|
+
# Azure::Core::Http::HTTPError / undefined method `message='
|
243
|
+
@logger.error("Failed to read #{name} ... will continue, set file as read and pretend this never happened")
|
240
244
|
@logger.error("#{size} size and #{file[:length]} file length")
|
241
245
|
size = file[:length]
|
242
246
|
end
|
@@ -275,12 +279,12 @@ def run(queue)
|
|
275
279
|
decorate(event)
|
276
280
|
queue << event
|
277
281
|
end
|
282
|
+
@processed += counter
|
278
283
|
rescue Exception => e
|
279
284
|
@logger.error("codec exception: #{e.message} .. will continue and pretend this never happened")
|
280
285
|
@registry.store(name, { :offset => file[:length], :length => file[:length] })
|
281
286
|
@logger.debug("#{chunk}")
|
282
287
|
end
|
283
|
-
@processed += counter
|
284
288
|
end
|
285
289
|
@registry.store(name, { :offset => size, :length => file[:length] })
|
286
290
|
# TODO add input plugin option to prevent connection cache
|
@@ -291,14 +295,14 @@ def run(queue)
|
|
291
295
|
return
|
292
296
|
end
|
293
297
|
if ((Time.now.to_i - @last) > @interval)
|
294
|
-
save_registry(
|
298
|
+
save_registry()
|
295
299
|
end
|
296
300
|
end
|
297
301
|
end
|
298
302
|
# The files that got processed after the last registry save need to be saved too, in case the worklist is empty for some intervals.
|
299
303
|
now = Time.now.to_i
|
300
304
|
if ((now - @last) > @interval)
|
301
|
-
save_registry(
|
305
|
+
save_registry()
|
302
306
|
end
|
303
307
|
sleeptime = interval - ((now - start) % interval)
|
304
308
|
if @debug_timer
|
@@ -309,10 +313,10 @@ def run(queue)
|
|
309
313
|
end
|
310
314
|
|
311
315
|
def stop
|
312
|
-
save_registry(
|
316
|
+
save_registry()
|
313
317
|
end
|
314
318
|
def close
|
315
|
-
save_registry(
|
319
|
+
save_registry()
|
316
320
|
end
|
317
321
|
|
318
322
|
|
@@ -490,30 +494,35 @@ def try_list_blobs(fill)
|
|
490
494
|
end
|
491
495
|
|
492
496
|
# When events were processed after the last registry save, start a thread to update the registry file.
|
493
|
-
def save_registry(
|
494
|
-
# Because of threading, processed values and regsaved are not thread safe, they can change as instance variable @! Most of the time this is fine because the registry is the last resort, but be careful about corner cases!
|
497
|
+
def save_registry()
|
495
498
|
unless @processed == @regsaved
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
499
|
+
unless (@busy_writing_registry.locked?)
|
500
|
+
# deep_copy hash, to save the registry independant from the variable for thread safety
|
501
|
+
# if deep_clone uses Marshall to do a copy,
|
502
|
+
regdump = Marshal.dump(@registry)
|
503
|
+
regsize = @registry.size
|
504
|
+
Thread.new {
|
505
|
+
begin
|
506
|
+
@busy_writing_registry.lock
|
507
|
+
unless (@registry_local_path)
|
508
|
+
@blob_client.create_block_blob(container, registry_path, regdump)
|
509
|
+
@logger.info("processed #{@processed} events, saving #{regsize} blobs and offsets to remote registry #{registry_path}")
|
510
|
+
else
|
511
|
+
File.open(@registry_local_path+"/"+@pipe_id, 'w') { |file| file.write(regdump) }
|
512
|
+
@logger.info("processed #{@processed} events, saving #{regsize} blobs and offsets to local registry #{registry_local_path+"/"+@pipe_id}")
|
513
|
+
end
|
514
|
+
@last = Time.now.to_i
|
515
|
+
@regsaved = @processed
|
516
|
+
rescue Exception => e
|
517
|
+
@logger.error("Oh my, registry write failed")
|
518
|
+
@logger.error("#{e.message}")
|
519
|
+
ensure
|
520
|
+
@busy_writing_registry.unlock
|
521
|
+
end
|
513
522
|
}
|
514
|
-
|
515
|
-
|
516
|
-
|
523
|
+
else
|
524
|
+
@logger.info("Skipped writing the registry because previous write still in progress, it just takes long or may be hanging!")
|
525
|
+
end
|
517
526
|
end
|
518
527
|
end
|
519
528
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'logstash-input-azure_blob_storage'
|
3
|
-
s.version = '0.12.
|
3
|
+
s.version = '0.12.3'
|
4
4
|
s.licenses = ['Apache-2.0']
|
5
5
|
s.summary = 'This logstash plugin reads and parses data from Azure Storage Blobs.'
|
6
6
|
s.description = <<-EOF
|
@@ -20,8 +20,8 @@ EOF
|
|
20
20
|
s.metadata = { "logstash_plugin" => "true", "logstash_group" => "input" }
|
21
21
|
|
22
22
|
# Gem dependencies
|
23
|
-
s.add_runtime_dependency 'logstash-core-plugin-api', '~> 2.
|
23
|
+
s.add_runtime_dependency 'logstash-core-plugin-api', '~> 2.0'
|
24
24
|
s.add_runtime_dependency 'stud', '~> 0.0.23'
|
25
25
|
s.add_runtime_dependency 'azure-storage-blob', '~> 2', '>= 2.0.3'
|
26
|
-
|
26
|
+
s.add_development_dependency 'logstash-devutils'
|
27
27
|
end
|
@@ -8,6 +8,10 @@ describe LogStash::Inputs::AzureBlobStorage do
|
|
8
8
|
let(:config) { { "interval" => 100 } }
|
9
9
|
end
|
10
10
|
|
11
|
+
def test_val
|
12
|
+
assert_qual('abc', val('letters=abc'))
|
13
|
+
end
|
14
|
+
|
11
15
|
def test_helper_methodes
|
12
16
|
assert_equal('b', AzureBlobStorage.val('a=b'))
|
13
17
|
assert_equal('whatever', AzureBlobStorage.strip_comma(',whatever'))
|
data/spec/inputs/test.rb
ADDED
metadata
CHANGED
@@ -1,21 +1,21 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-input-azure_blob_storage
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.12.
|
4
|
+
version: 0.12.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Geertsma
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-01-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
15
15
|
requirements:
|
16
16
|
- - "~>"
|
17
17
|
- !ruby/object:Gem::Version
|
18
|
-
version: '2.
|
18
|
+
version: '2.0'
|
19
19
|
name: logstash-core-plugin-api
|
20
20
|
prerelease: false
|
21
21
|
type: :runtime
|
@@ -23,7 +23,7 @@ dependencies:
|
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '2.
|
26
|
+
version: '2.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
requirement: !ruby/object:Gem::Requirement
|
29
29
|
requirements:
|
@@ -58,6 +58,20 @@ dependencies:
|
|
58
58
|
- - ">="
|
59
59
|
- !ruby/object:Gem::Version
|
60
60
|
version: 2.0.3
|
61
|
+
- !ruby/object:Gem::Dependency
|
62
|
+
requirement: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
67
|
+
name: logstash-devutils
|
68
|
+
prerelease: false
|
69
|
+
type: :development
|
70
|
+
version_requirements: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0'
|
61
75
|
description: " This gem is a Logstash plugin. It reads and parses data from Azure\
|
62
76
|
\ Storage Blobs. The azure_blob_storage is a reimplementation to replace azureblob\
|
63
77
|
\ from azure-diagnostics-tools/Logstash. It can deal with larger volumes and partial\
|
@@ -77,6 +91,7 @@ files:
|
|
77
91
|
- lib/logstash/inputs/azure_blob_storage.rb
|
78
92
|
- logstash-input-azure_blob_storage.gemspec
|
79
93
|
- spec/inputs/azure_blob_storage_spec.rb
|
94
|
+
- spec/inputs/test.rb
|
80
95
|
homepage: https://github.com/janmg/logstash-input-azure_blob_storage
|
81
96
|
licenses:
|
82
97
|
- Apache-2.0
|
@@ -104,3 +119,4 @@ specification_version: 4
|
|
104
119
|
summary: This logstash plugin reads and parses data from Azure Storage Blobs.
|
105
120
|
test_files:
|
106
121
|
- spec/inputs/azure_blob_storage_spec.rb
|
122
|
+
- spec/inputs/test.rb
|