logstash-input-azure_blob_storage 0.11.7 → 0.12.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +19 -0
- data/lib/logstash/inputs/azure_blob_storage.rb +52 -38
- data/logstash-input-azure_blob_storage.gemspec +4 -4
- data/spec/inputs/azure_blob_storage_spec.rb +4 -0
- data/spec/inputs/test.rb +11 -0
- metadata +32 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: da5b56c9fb4733d62604b7753d2e08e3d24100915f58d4f374ab6b6573d27408
|
4
|
+
data.tar.gz: 26f5a19de67615eeb27d302e0b0768fc0b3199f33808182b32a3aeed422b2599
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 37f134a2ef4cb0cae2a24e38a3299687f56bc2bf7d98f56976d2bba605b356caec39022c33d18fb58d51933eaa0a3d747c0c4cfacfbe4c0ce6c029b0ff6ddd78
|
7
|
+
data.tar.gz: abd3a6803577db03c42eebf69795eded5e948e48a38363fa81b566c2ecc494d9313d07e280569ab424c561a91c6a08a00d1cfa6bc7674fd84b137e08b4af75ef
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,22 @@
|
|
1
|
+
## 0.12.3
|
2
|
+
- Fixed repetative processing
|
3
|
+
- Replaced newreg with registry cleanup, using newreg to replace old registry worked in 0.11, but not in .12
|
4
|
+
- Implemented Mutex for the save_registry thread. Also marshal @registry before thread start for better thread safety
|
5
|
+
|
6
|
+
## 0.12.2
|
7
|
+
- Fixed the exception handling, not trying to print how many JSON fields there are while catching the exception
|
8
|
+
|
9
|
+
## 0.12.1
|
10
|
+
- Catch NSGFLOW logs when the JSON parsing somehow failed
|
11
|
+
|
12
|
+
## 0.12.0
|
13
|
+
- version 2 of azure-storage
|
14
|
+
- saving current files registry, not keeping historical files
|
15
|
+
|
16
|
+
## 0.11.7
|
17
|
+
- implemented skip_learning
|
18
|
+
- start ignoring failed files and not retry
|
19
|
+
|
1
20
|
## 0.11.6
|
2
21
|
- fix in json head and tail learning the max_results
|
3
22
|
- broke out connection setup in order to call it again if connection exceptions come
|
@@ -96,10 +96,6 @@ config :prefix, :validate => :string, :required => false
|
|
96
96
|
|
97
97
|
config :path_filters, :validate => :array, :default => ['**/*'], :required => false
|
98
98
|
|
99
|
-
# TODO: Other feature requests
|
100
|
-
# show file path in logger
|
101
|
-
# add filepath as part of log message
|
102
|
-
# option to keep registry on local disk
|
103
99
|
|
104
100
|
|
105
101
|
public
|
@@ -107,6 +103,7 @@ def register
|
|
107
103
|
@pipe_id = Thread.current[:name].split("[").last.split("]").first
|
108
104
|
@logger.info("=== #{config_name} #{Gem.loaded_specs["logstash-input-"+config_name].version.to_s} / #{@pipe_id} / #{@id[0,6]} / ruby #{ RUBY_VERSION }p#{ RUBY_PATCHLEVEL } ===")
|
109
105
|
@logger.info("If this plugin doesn't work, please raise an issue in https://github.com/janmg/logstash-input-azure_blob_storage")
|
106
|
+
@busy_writing_registry = Mutex.new
|
110
107
|
# TODO: consider multiple readers, so add pipeline @id or use logstash-to-logstash communication?
|
111
108
|
# TODO: Implement retry ... Error: Connection refused - Failed to open TCP connection to
|
112
109
|
end
|
@@ -152,7 +149,7 @@ def run(queue)
|
|
152
149
|
# read filelist and set offsets to file length to mark all the old files as done
|
153
150
|
if registry_create_policy == "start_fresh"
|
154
151
|
@registry = list_blobs(true)
|
155
|
-
save_registry(
|
152
|
+
save_registry()
|
156
153
|
@logger.info("starting fresh, writing a clean registry to contain #{@registry.size} blobs/files")
|
157
154
|
end
|
158
155
|
|
@@ -178,7 +175,6 @@ def run(queue)
|
|
178
175
|
@logger.info("head will be: #{@head} and tail is set to #{@tail}")
|
179
176
|
end
|
180
177
|
|
181
|
-
newreg = Hash.new
|
182
178
|
filelist = Hash.new
|
183
179
|
worklist = Hash.new
|
184
180
|
@last = start = Time.now.to_i
|
@@ -197,7 +193,6 @@ def run(queue)
|
|
197
193
|
#filelist.sort_by(|k,v|resource(k)[:date])
|
198
194
|
worklist.clear
|
199
195
|
filelist.clear
|
200
|
-
newreg.clear
|
201
196
|
|
202
197
|
# Listing all the files
|
203
198
|
filelist = list_blobs(false)
|
@@ -208,15 +203,24 @@ def run(queue)
|
|
208
203
|
rescue
|
209
204
|
off = 0
|
210
205
|
end
|
211
|
-
|
206
|
+
@registry.store(name, { :offset => off, :length => file[:length] })
|
212
207
|
if (@debug_until > @processed) then @logger.info("2: adding offsets: #{name} #{off} #{file[:length]}") end
|
213
208
|
end
|
214
209
|
# size nilClass when the list doesn't grow?!
|
210
|
+
|
211
|
+
# clean registry of files that are not in the filelist
|
212
|
+
@registry.each do |name,file|
|
213
|
+
unless filelist.include?(name)
|
214
|
+
@registry.delete(name)
|
215
|
+
if (@debug_until > @processed) then @logger.info("purging #{name}") end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
215
219
|
# Worklist is the subset of files where the already read offset is smaller than the file size
|
216
|
-
|
220
|
+
worklist.clear
|
217
221
|
chunk = nil
|
218
222
|
|
219
|
-
worklist =
|
223
|
+
worklist = @registry.select {|name,file| file[:offset] < file[:length]}
|
220
224
|
if (worklist.size > 4) then @logger.info("worklist contains #{worklist.size} blobs") end
|
221
225
|
|
222
226
|
# Start of processing
|
@@ -235,7 +239,8 @@ def run(queue)
|
|
235
239
|
chunk = full_read(name)
|
236
240
|
size=chunk.size
|
237
241
|
rescue Exception => e
|
238
|
-
|
242
|
+
# Azure::Core::Http::HTTPError / undefined method `message='
|
243
|
+
@logger.error("Failed to read #{name} ... will continue, set file as read and pretend this never happened")
|
239
244
|
@logger.error("#{size} size and #{file[:length]} file length")
|
240
245
|
size = file[:length]
|
241
246
|
end
|
@@ -274,12 +279,12 @@ def run(queue)
|
|
274
279
|
decorate(event)
|
275
280
|
queue << event
|
276
281
|
end
|
282
|
+
@processed += counter
|
277
283
|
rescue Exception => e
|
278
284
|
@logger.error("codec exception: #{e.message} .. will continue and pretend this never happened")
|
279
285
|
@registry.store(name, { :offset => file[:length], :length => file[:length] })
|
280
286
|
@logger.debug("#{chunk}")
|
281
287
|
end
|
282
|
-
@processed += counter
|
283
288
|
end
|
284
289
|
@registry.store(name, { :offset => size, :length => file[:length] })
|
285
290
|
# TODO add input plugin option to prevent connection cache
|
@@ -290,14 +295,14 @@ def run(queue)
|
|
290
295
|
return
|
291
296
|
end
|
292
297
|
if ((Time.now.to_i - @last) > @interval)
|
293
|
-
save_registry(
|
298
|
+
save_registry()
|
294
299
|
end
|
295
300
|
end
|
296
301
|
end
|
297
302
|
# The files that got processed after the last registry save need to be saved too, in case the worklist is empty for some intervals.
|
298
303
|
now = Time.now.to_i
|
299
304
|
if ((now - @last) > @interval)
|
300
|
-
save_registry(
|
305
|
+
save_registry()
|
301
306
|
end
|
302
307
|
sleeptime = interval - ((now - start) % interval)
|
303
308
|
if @debug_timer
|
@@ -308,10 +313,10 @@ def run(queue)
|
|
308
313
|
end
|
309
314
|
|
310
315
|
def stop
|
311
|
-
save_registry(
|
316
|
+
save_registry()
|
312
317
|
end
|
313
318
|
def close
|
314
|
-
save_registry(
|
319
|
+
save_registry()
|
315
320
|
end
|
316
321
|
|
317
322
|
|
@@ -388,6 +393,7 @@ end
|
|
388
393
|
|
389
394
|
def nsgflowlog(queue, json, name)
|
390
395
|
count=0
|
396
|
+
begin
|
391
397
|
json["records"].each do |record|
|
392
398
|
res = resource(record["resourceId"])
|
393
399
|
resource = { :subscription => res[:subscription], :resourcegroup => res[:resourcegroup], :nsg => res[:nsg] }
|
@@ -417,6 +423,9 @@ def nsgflowlog(queue, json, name)
|
|
417
423
|
end
|
418
424
|
end
|
419
425
|
end
|
426
|
+
rescue Exception => e
|
427
|
+
@logger.error("NSG Flowlog problem for #{name} and error message #{e.message}")
|
428
|
+
end
|
420
429
|
return count
|
421
430
|
end
|
422
431
|
|
@@ -485,30 +494,35 @@ def try_list_blobs(fill)
|
|
485
494
|
end
|
486
495
|
|
487
496
|
# When events were processed after the last registry save, start a thread to update the registry file.
|
488
|
-
def save_registry(
|
489
|
-
# Because of threading, processed values and regsaved are not thread safe, they can change as instance variable @! Most of the time this is fine because the registry is the last resort, but be careful about corner cases!
|
497
|
+
def save_registry()
|
490
498
|
unless @processed == @regsaved
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
499
|
+
unless (@busy_writing_registry.locked?)
|
500
|
+
# deep_copy hash, to save the registry independant from the variable for thread safety
|
501
|
+
# if deep_clone uses Marshall to do a copy,
|
502
|
+
regdump = Marshal.dump(@registry)
|
503
|
+
regsize = @registry.size
|
504
|
+
Thread.new {
|
505
|
+
begin
|
506
|
+
@busy_writing_registry.lock
|
507
|
+
unless (@registry_local_path)
|
508
|
+
@blob_client.create_block_blob(container, registry_path, regdump)
|
509
|
+
@logger.info("processed #{@processed} events, saving #{regsize} blobs and offsets to remote registry #{registry_path}")
|
510
|
+
else
|
511
|
+
File.open(@registry_local_path+"/"+@pipe_id, 'w') { |file| file.write(regdump) }
|
512
|
+
@logger.info("processed #{@processed} events, saving #{regsize} blobs and offsets to local registry #{registry_local_path+"/"+@pipe_id}")
|
513
|
+
end
|
514
|
+
@last = Time.now.to_i
|
515
|
+
@regsaved = @processed
|
516
|
+
rescue Exception => e
|
517
|
+
@logger.error("Oh my, registry write failed")
|
518
|
+
@logger.error("#{e.message}")
|
519
|
+
ensure
|
520
|
+
@busy_writing_registry.unlock
|
521
|
+
end
|
508
522
|
}
|
509
|
-
|
510
|
-
|
511
|
-
|
523
|
+
else
|
524
|
+
@logger.info("Skipped writing the registry because previous write still in progress, it just takes long or may be hanging!")
|
525
|
+
end
|
512
526
|
end
|
513
527
|
end
|
514
528
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'logstash-input-azure_blob_storage'
|
3
|
-
s.version = '0.
|
3
|
+
s.version = '0.12.3'
|
4
4
|
s.licenses = ['Apache-2.0']
|
5
5
|
s.summary = 'This logstash plugin reads and parses data from Azure Storage Blobs.'
|
6
6
|
s.description = <<-EOF
|
@@ -20,8 +20,8 @@ EOF
|
|
20
20
|
s.metadata = { "logstash_plugin" => "true", "logstash_group" => "input" }
|
21
21
|
|
22
22
|
# Gem dependencies
|
23
|
-
s.add_runtime_dependency 'logstash-core-plugin-api', '~> 2.
|
23
|
+
s.add_runtime_dependency 'logstash-core-plugin-api', '~> 2.0'
|
24
24
|
s.add_runtime_dependency 'stud', '~> 0.0.23'
|
25
|
-
s.add_runtime_dependency 'azure-storage-blob', '~>
|
26
|
-
|
25
|
+
s.add_runtime_dependency 'azure-storage-blob', '~> 2', '>= 2.0.3'
|
26
|
+
s.add_development_dependency 'logstash-devutils'
|
27
27
|
end
|
@@ -8,6 +8,10 @@ describe LogStash::Inputs::AzureBlobStorage do
|
|
8
8
|
let(:config) { { "interval" => 100 } }
|
9
9
|
end
|
10
10
|
|
11
|
+
def test_val
|
12
|
+
assert_qual('abc', val('letters=abc'))
|
13
|
+
end
|
14
|
+
|
11
15
|
def test_helper_methodes
|
12
16
|
assert_equal('b', AzureBlobStorage.val('a=b'))
|
13
17
|
assert_equal('whatever', AzureBlobStorage.strip_comma(',whatever'))
|
data/spec/inputs/test.rb
ADDED
metadata
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-input-azure_blob_storage
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.12.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Geertsma
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-01-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
15
15
|
requirements:
|
16
16
|
- - "~>"
|
17
17
|
- !ruby/object:Gem::Version
|
18
|
-
version: '2.
|
18
|
+
version: '2.0'
|
19
19
|
name: logstash-core-plugin-api
|
20
|
-
type: :runtime
|
21
20
|
prerelease: false
|
21
|
+
type: :runtime
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '2.
|
26
|
+
version: '2.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
requirement: !ruby/object:Gem::Requirement
|
29
29
|
requirements:
|
@@ -31,8 +31,8 @@ dependencies:
|
|
31
31
|
- !ruby/object:Gem::Version
|
32
32
|
version: 0.0.23
|
33
33
|
name: stud
|
34
|
-
type: :runtime
|
35
34
|
prerelease: false
|
35
|
+
type: :runtime
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
@@ -43,15 +43,35 @@ dependencies:
|
|
43
43
|
requirements:
|
44
44
|
- - "~>"
|
45
45
|
- !ruby/object:Gem::Version
|
46
|
-
version: '
|
46
|
+
version: '2'
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 2.0.3
|
47
50
|
name: azure-storage-blob
|
48
|
-
type: :runtime
|
49
51
|
prerelease: false
|
52
|
+
type: :runtime
|
50
53
|
version_requirements: !ruby/object:Gem::Requirement
|
51
54
|
requirements:
|
52
55
|
- - "~>"
|
53
56
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
57
|
+
version: '2'
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: 2.0.3
|
61
|
+
- !ruby/object:Gem::Dependency
|
62
|
+
requirement: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
67
|
+
name: logstash-devutils
|
68
|
+
prerelease: false
|
69
|
+
type: :development
|
70
|
+
version_requirements: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0'
|
55
75
|
description: " This gem is a Logstash plugin. It reads and parses data from Azure\
|
56
76
|
\ Storage Blobs. The azure_blob_storage is a reimplementation to replace azureblob\
|
57
77
|
\ from azure-diagnostics-tools/Logstash. It can deal with larger volumes and partial\
|
@@ -71,6 +91,7 @@ files:
|
|
71
91
|
- lib/logstash/inputs/azure_blob_storage.rb
|
72
92
|
- logstash-input-azure_blob_storage.gemspec
|
73
93
|
- spec/inputs/azure_blob_storage_spec.rb
|
94
|
+
- spec/inputs/test.rb
|
74
95
|
homepage: https://github.com/janmg/logstash-input-azure_blob_storage
|
75
96
|
licenses:
|
76
97
|
- Apache-2.0
|
@@ -92,9 +113,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
92
113
|
- !ruby/object:Gem::Version
|
93
114
|
version: '0'
|
94
115
|
requirements: []
|
95
|
-
rubygems_version: 3.
|
116
|
+
rubygems_version: 3.1.6
|
96
117
|
signing_key:
|
97
118
|
specification_version: 4
|
98
119
|
summary: This logstash plugin reads and parses data from Azure Storage Blobs.
|
99
120
|
test_files:
|
100
121
|
- spec/inputs/azure_blob_storage_spec.rb
|
122
|
+
- spec/inputs/test.rb
|