logstash-input-azure_blob_storage 0.12.2 → 0.12.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -1
- data/lib/logstash/inputs/azure_blob_storage.rb +47 -38
- data/logstash-input-azure_blob_storage.gemspec +3 -3
- data/spec/inputs/azure_blob_storage_spec.rb +4 -0
- data/spec/inputs/test.rb +11 -0
- metadata +20 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: da5b56c9fb4733d62604b7753d2e08e3d24100915f58d4f374ab6b6573d27408
|
4
|
+
data.tar.gz: 26f5a19de67615eeb27d302e0b0768fc0b3199f33808182b32a3aeed422b2599
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 37f134a2ef4cb0cae2a24e38a3299687f56bc2bf7d98f56976d2bba605b356caec39022c33d18fb58d51933eaa0a3d747c0c4cfacfbe4c0ce6c029b0ff6ddd78
|
7
|
+
data.tar.gz: abd3a6803577db03c42eebf69795eded5e948e48a38363fa81b566c2ecc494d9313d07e280569ab424c561a91c6a08a00d1cfa6bc7674fd84b137e08b4af75ef
|
data/CHANGELOG.md
CHANGED
@@ -1,4 +1,9 @@
|
|
1
|
-
|
1
|
+
## 0.12.3
|
2
|
+
- Fixed repetative processing
|
3
|
+
- Replaced newreg with registry cleanup, using newreg to replace old registry worked in 0.11, but not in .12
|
4
|
+
- Implemented Mutex for the save_registry thread. Also marshal @registry before thread start for better thread safety
|
5
|
+
|
6
|
+
## 0.12.2
|
2
7
|
- Fixed the exception handling, not trying to print how many JSON fields there are while catching the exception
|
3
8
|
|
4
9
|
## 0.12.1
|
@@ -96,10 +96,6 @@ config :prefix, :validate => :string, :required => false
|
|
96
96
|
|
97
97
|
config :path_filters, :validate => :array, :default => ['**/*'], :required => false
|
98
98
|
|
99
|
-
# TODO: Other feature requests
|
100
|
-
# show file path in logger
|
101
|
-
# add filepath as part of log message
|
102
|
-
# option to keep registry on local disk
|
103
99
|
|
104
100
|
|
105
101
|
public
|
@@ -107,6 +103,7 @@ def register
|
|
107
103
|
@pipe_id = Thread.current[:name].split("[").last.split("]").first
|
108
104
|
@logger.info("=== #{config_name} #{Gem.loaded_specs["logstash-input-"+config_name].version.to_s} / #{@pipe_id} / #{@id[0,6]} / ruby #{ RUBY_VERSION }p#{ RUBY_PATCHLEVEL } ===")
|
109
105
|
@logger.info("If this plugin doesn't work, please raise an issue in https://github.com/janmg/logstash-input-azure_blob_storage")
|
106
|
+
@busy_writing_registry = Mutex.new
|
110
107
|
# TODO: consider multiple readers, so add pipeline @id or use logstash-to-logstash communication?
|
111
108
|
# TODO: Implement retry ... Error: Connection refused - Failed to open TCP connection to
|
112
109
|
end
|
@@ -152,7 +149,7 @@ def run(queue)
|
|
152
149
|
# read filelist and set offsets to file length to mark all the old files as done
|
153
150
|
if registry_create_policy == "start_fresh"
|
154
151
|
@registry = list_blobs(true)
|
155
|
-
save_registry(
|
152
|
+
save_registry()
|
156
153
|
@logger.info("starting fresh, writing a clean registry to contain #{@registry.size} blobs/files")
|
157
154
|
end
|
158
155
|
|
@@ -178,7 +175,6 @@ def run(queue)
|
|
178
175
|
@logger.info("head will be: #{@head} and tail is set to #{@tail}")
|
179
176
|
end
|
180
177
|
|
181
|
-
newreg = Hash.new
|
182
178
|
filelist = Hash.new
|
183
179
|
worklist = Hash.new
|
184
180
|
@last = start = Time.now.to_i
|
@@ -197,7 +193,6 @@ def run(queue)
|
|
197
193
|
#filelist.sort_by(|k,v|resource(k)[:date])
|
198
194
|
worklist.clear
|
199
195
|
filelist.clear
|
200
|
-
newreg.clear
|
201
196
|
|
202
197
|
# Listing all the files
|
203
198
|
filelist = list_blobs(false)
|
@@ -208,16 +203,24 @@ def run(queue)
|
|
208
203
|
rescue
|
209
204
|
off = 0
|
210
205
|
end
|
211
|
-
|
206
|
+
@registry.store(name, { :offset => off, :length => file[:length] })
|
212
207
|
if (@debug_until > @processed) then @logger.info("2: adding offsets: #{name} #{off} #{file[:length]}") end
|
213
208
|
end
|
214
209
|
# size nilClass when the list doesn't grow?!
|
210
|
+
|
211
|
+
# clean registry of files that are not in the filelist
|
212
|
+
@registry.each do |name,file|
|
213
|
+
unless filelist.include?(name)
|
214
|
+
@registry.delete(name)
|
215
|
+
if (@debug_until > @processed) then @logger.info("purging #{name}") end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
215
219
|
# Worklist is the subset of files where the already read offset is smaller than the file size
|
216
|
-
@registry = newreg
|
217
220
|
worklist.clear
|
218
221
|
chunk = nil
|
219
222
|
|
220
|
-
worklist =
|
223
|
+
worklist = @registry.select {|name,file| file[:offset] < file[:length]}
|
221
224
|
if (worklist.size > 4) then @logger.info("worklist contains #{worklist.size} blobs") end
|
222
225
|
|
223
226
|
# Start of processing
|
@@ -236,7 +239,8 @@ def run(queue)
|
|
236
239
|
chunk = full_read(name)
|
237
240
|
size=chunk.size
|
238
241
|
rescue Exception => e
|
239
|
-
|
242
|
+
# Azure::Core::Http::HTTPError / undefined method `message='
|
243
|
+
@logger.error("Failed to read #{name} ... will continue, set file as read and pretend this never happened")
|
240
244
|
@logger.error("#{size} size and #{file[:length]} file length")
|
241
245
|
size = file[:length]
|
242
246
|
end
|
@@ -275,12 +279,12 @@ def run(queue)
|
|
275
279
|
decorate(event)
|
276
280
|
queue << event
|
277
281
|
end
|
282
|
+
@processed += counter
|
278
283
|
rescue Exception => e
|
279
284
|
@logger.error("codec exception: #{e.message} .. will continue and pretend this never happened")
|
280
285
|
@registry.store(name, { :offset => file[:length], :length => file[:length] })
|
281
286
|
@logger.debug("#{chunk}")
|
282
287
|
end
|
283
|
-
@processed += counter
|
284
288
|
end
|
285
289
|
@registry.store(name, { :offset => size, :length => file[:length] })
|
286
290
|
# TODO add input plugin option to prevent connection cache
|
@@ -291,14 +295,14 @@ def run(queue)
|
|
291
295
|
return
|
292
296
|
end
|
293
297
|
if ((Time.now.to_i - @last) > @interval)
|
294
|
-
save_registry(
|
298
|
+
save_registry()
|
295
299
|
end
|
296
300
|
end
|
297
301
|
end
|
298
302
|
# The files that got processed after the last registry save need to be saved too, in case the worklist is empty for some intervals.
|
299
303
|
now = Time.now.to_i
|
300
304
|
if ((now - @last) > @interval)
|
301
|
-
save_registry(
|
305
|
+
save_registry()
|
302
306
|
end
|
303
307
|
sleeptime = interval - ((now - start) % interval)
|
304
308
|
if @debug_timer
|
@@ -309,10 +313,10 @@ def run(queue)
|
|
309
313
|
end
|
310
314
|
|
311
315
|
def stop
|
312
|
-
save_registry(
|
316
|
+
save_registry()
|
313
317
|
end
|
314
318
|
def close
|
315
|
-
save_registry(
|
319
|
+
save_registry()
|
316
320
|
end
|
317
321
|
|
318
322
|
|
@@ -490,30 +494,35 @@ def try_list_blobs(fill)
|
|
490
494
|
end
|
491
495
|
|
492
496
|
# When events were processed after the last registry save, start a thread to update the registry file.
|
493
|
-
def save_registry(
|
494
|
-
# Because of threading, processed values and regsaved are not thread safe, they can change as instance variable @! Most of the time this is fine because the registry is the last resort, but be careful about corner cases!
|
497
|
+
def save_registry()
|
495
498
|
unless @processed == @regsaved
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
499
|
+
unless (@busy_writing_registry.locked?)
|
500
|
+
# deep_copy hash, to save the registry independant from the variable for thread safety
|
501
|
+
# if deep_clone uses Marshall to do a copy,
|
502
|
+
regdump = Marshal.dump(@registry)
|
503
|
+
regsize = @registry.size
|
504
|
+
Thread.new {
|
505
|
+
begin
|
506
|
+
@busy_writing_registry.lock
|
507
|
+
unless (@registry_local_path)
|
508
|
+
@blob_client.create_block_blob(container, registry_path, regdump)
|
509
|
+
@logger.info("processed #{@processed} events, saving #{regsize} blobs and offsets to remote registry #{registry_path}")
|
510
|
+
else
|
511
|
+
File.open(@registry_local_path+"/"+@pipe_id, 'w') { |file| file.write(regdump) }
|
512
|
+
@logger.info("processed #{@processed} events, saving #{regsize} blobs and offsets to local registry #{registry_local_path+"/"+@pipe_id}")
|
513
|
+
end
|
514
|
+
@last = Time.now.to_i
|
515
|
+
@regsaved = @processed
|
516
|
+
rescue Exception => e
|
517
|
+
@logger.error("Oh my, registry write failed")
|
518
|
+
@logger.error("#{e.message}")
|
519
|
+
ensure
|
520
|
+
@busy_writing_registry.unlock
|
521
|
+
end
|
513
522
|
}
|
514
|
-
|
515
|
-
|
516
|
-
|
523
|
+
else
|
524
|
+
@logger.info("Skipped writing the registry because previous write still in progress, it just takes long or may be hanging!")
|
525
|
+
end
|
517
526
|
end
|
518
527
|
end
|
519
528
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'logstash-input-azure_blob_storage'
|
3
|
-
s.version = '0.12.
|
3
|
+
s.version = '0.12.3'
|
4
4
|
s.licenses = ['Apache-2.0']
|
5
5
|
s.summary = 'This logstash plugin reads and parses data from Azure Storage Blobs.'
|
6
6
|
s.description = <<-EOF
|
@@ -20,8 +20,8 @@ EOF
|
|
20
20
|
s.metadata = { "logstash_plugin" => "true", "logstash_group" => "input" }
|
21
21
|
|
22
22
|
# Gem dependencies
|
23
|
-
s.add_runtime_dependency 'logstash-core-plugin-api', '~> 2.
|
23
|
+
s.add_runtime_dependency 'logstash-core-plugin-api', '~> 2.0'
|
24
24
|
s.add_runtime_dependency 'stud', '~> 0.0.23'
|
25
25
|
s.add_runtime_dependency 'azure-storage-blob', '~> 2', '>= 2.0.3'
|
26
|
-
|
26
|
+
s.add_development_dependency 'logstash-devutils'
|
27
27
|
end
|
@@ -8,6 +8,10 @@ describe LogStash::Inputs::AzureBlobStorage do
|
|
8
8
|
let(:config) { { "interval" => 100 } }
|
9
9
|
end
|
10
10
|
|
11
|
+
def test_val
|
12
|
+
assert_qual('abc', val('letters=abc'))
|
13
|
+
end
|
14
|
+
|
11
15
|
def test_helper_methodes
|
12
16
|
assert_equal('b', AzureBlobStorage.val('a=b'))
|
13
17
|
assert_equal('whatever', AzureBlobStorage.strip_comma(',whatever'))
|
data/spec/inputs/test.rb
ADDED
metadata
CHANGED
@@ -1,21 +1,21 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-input-azure_blob_storage
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.12.
|
4
|
+
version: 0.12.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Geertsma
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-01-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
15
15
|
requirements:
|
16
16
|
- - "~>"
|
17
17
|
- !ruby/object:Gem::Version
|
18
|
-
version: '2.
|
18
|
+
version: '2.0'
|
19
19
|
name: logstash-core-plugin-api
|
20
20
|
prerelease: false
|
21
21
|
type: :runtime
|
@@ -23,7 +23,7 @@ dependencies:
|
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '2.
|
26
|
+
version: '2.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
requirement: !ruby/object:Gem::Requirement
|
29
29
|
requirements:
|
@@ -58,6 +58,20 @@ dependencies:
|
|
58
58
|
- - ">="
|
59
59
|
- !ruby/object:Gem::Version
|
60
60
|
version: 2.0.3
|
61
|
+
- !ruby/object:Gem::Dependency
|
62
|
+
requirement: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
67
|
+
name: logstash-devutils
|
68
|
+
prerelease: false
|
69
|
+
type: :development
|
70
|
+
version_requirements: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0'
|
61
75
|
description: " This gem is a Logstash plugin. It reads and parses data from Azure\
|
62
76
|
\ Storage Blobs. The azure_blob_storage is a reimplementation to replace azureblob\
|
63
77
|
\ from azure-diagnostics-tools/Logstash. It can deal with larger volumes and partial\
|
@@ -77,6 +91,7 @@ files:
|
|
77
91
|
- lib/logstash/inputs/azure_blob_storage.rb
|
78
92
|
- logstash-input-azure_blob_storage.gemspec
|
79
93
|
- spec/inputs/azure_blob_storage_spec.rb
|
94
|
+
- spec/inputs/test.rb
|
80
95
|
homepage: https://github.com/janmg/logstash-input-azure_blob_storage
|
81
96
|
licenses:
|
82
97
|
- Apache-2.0
|
@@ -104,3 +119,4 @@ specification_version: 4
|
|
104
119
|
summary: This logstash plugin reads and parses data from Azure Storage Blobs.
|
105
120
|
test_files:
|
106
121
|
- spec/inputs/azure_blob_storage_spec.rb
|
122
|
+
- spec/inputs/test.rb
|