logstash-input-azure_blob_storage 0.11.7 → 0.12.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0dd48413c8fc381dc144c1a4a58c82906533e4f94b36d631c597ecc766aa8edf
4
- data.tar.gz: 3fb23ac270d539092ca52d73418c710e6d12816635b066da004728bdaef3cc9b
3
+ metadata.gz: da5b56c9fb4733d62604b7753d2e08e3d24100915f58d4f374ab6b6573d27408
4
+ data.tar.gz: 26f5a19de67615eeb27d302e0b0768fc0b3199f33808182b32a3aeed422b2599
5
5
  SHA512:
6
- metadata.gz: 84c46edd2afbfe316c2fd5a3b8601f8841308270c3050425a62def1305065aae7868f8be14a98fcc1a7b98f3ccfeee3c9f8a9f9652bf7e799f8d8c5ba1016334
7
- data.tar.gz: f2a2a27e068f8d384be829abde10935d0cc323da3eb943264ce05e11d9ee1f383fcfac47cbad51a909623638477b0bef5323d2a5b2def1d8666444d08511aa24
6
+ metadata.gz: 37f134a2ef4cb0cae2a24e38a3299687f56bc2bf7d98f56976d2bba605b356caec39022c33d18fb58d51933eaa0a3d747c0c4cfacfbe4c0ce6c029b0ff6ddd78
7
+ data.tar.gz: abd3a6803577db03c42eebf69795eded5e948e48a38363fa81b566c2ecc494d9313d07e280569ab424c561a91c6a08a00d1cfa6bc7674fd84b137e08b4af75ef
data/CHANGELOG.md CHANGED
@@ -1,3 +1,22 @@
1
+ ## 0.12.3
2
+ - Fixed repetative processing
3
+ - Replaced newreg with registry cleanup, using newreg to replace old registry worked in 0.11, but not in .12
4
+ - Implemented Mutex for the save_registry thread. Also marshal @registry before thread start for better thread safety
5
+
6
+ ## 0.12.2
7
+ - Fixed the exception handling, not trying to print how many JSON fields there are while catching the exception
8
+
9
+ ## 0.12.1
10
+ - Catch NSGFLOW logs when the JSON parsing somehow failed
11
+
12
+ ## 0.12.0
13
+ - version 2 of azure-storage
14
+ - saving current files registry, not keeping historical files
15
+
16
+ ## 0.11.7
17
+ - implemented skip_learning
18
+ - start ignoring failed files and not retry
19
+
1
20
  ## 0.11.6
2
21
  - fix in json head and tail learning the max_results
3
22
  - broke out connection setup in order to call it again if connection exceptions come
@@ -96,10 +96,6 @@ config :prefix, :validate => :string, :required => false
96
96
 
97
97
  config :path_filters, :validate => :array, :default => ['**/*'], :required => false
98
98
 
99
- # TODO: Other feature requests
100
- # show file path in logger
101
- # add filepath as part of log message
102
- # option to keep registry on local disk
103
99
 
104
100
 
105
101
  public
@@ -107,6 +103,7 @@ def register
107
103
  @pipe_id = Thread.current[:name].split("[").last.split("]").first
108
104
  @logger.info("=== #{config_name} #{Gem.loaded_specs["logstash-input-"+config_name].version.to_s} / #{@pipe_id} / #{@id[0,6]} / ruby #{ RUBY_VERSION }p#{ RUBY_PATCHLEVEL } ===")
109
105
  @logger.info("If this plugin doesn't work, please raise an issue in https://github.com/janmg/logstash-input-azure_blob_storage")
106
+ @busy_writing_registry = Mutex.new
110
107
  # TODO: consider multiple readers, so add pipeline @id or use logstash-to-logstash communication?
111
108
  # TODO: Implement retry ... Error: Connection refused - Failed to open TCP connection to
112
109
  end
@@ -152,7 +149,7 @@ def run(queue)
152
149
  # read filelist and set offsets to file length to mark all the old files as done
153
150
  if registry_create_policy == "start_fresh"
154
151
  @registry = list_blobs(true)
155
- save_registry(@registry)
152
+ save_registry()
156
153
  @logger.info("starting fresh, writing a clean registry to contain #{@registry.size} blobs/files")
157
154
  end
158
155
 
@@ -178,7 +175,6 @@ def run(queue)
178
175
  @logger.info("head will be: #{@head} and tail is set to #{@tail}")
179
176
  end
180
177
 
181
- newreg = Hash.new
182
178
  filelist = Hash.new
183
179
  worklist = Hash.new
184
180
  @last = start = Time.now.to_i
@@ -197,7 +193,6 @@ def run(queue)
197
193
  #filelist.sort_by(|k,v|resource(k)[:date])
198
194
  worklist.clear
199
195
  filelist.clear
200
- newreg.clear
201
196
 
202
197
  # Listing all the files
203
198
  filelist = list_blobs(false)
@@ -208,15 +203,24 @@ def run(queue)
208
203
  rescue
209
204
  off = 0
210
205
  end
211
- newreg.store(name, { :offset => off, :length => file[:length] })
206
+ @registry.store(name, { :offset => off, :length => file[:length] })
212
207
  if (@debug_until > @processed) then @logger.info("2: adding offsets: #{name} #{off} #{file[:length]}") end
213
208
  end
214
209
  # size nilClass when the list doesn't grow?!
210
+
211
+ # clean registry of files that are not in the filelist
212
+ @registry.each do |name,file|
213
+ unless filelist.include?(name)
214
+ @registry.delete(name)
215
+ if (@debug_until > @processed) then @logger.info("purging #{name}") end
216
+ end
217
+ end
218
+
215
219
  # Worklist is the subset of files where the already read offset is smaller than the file size
216
- worklist.clear
220
+ worklist.clear
217
221
  chunk = nil
218
222
 
219
- worklist = newreg.select {|name,file| file[:offset] < file[:length]}
223
+ worklist = @registry.select {|name,file| file[:offset] < file[:length]}
220
224
  if (worklist.size > 4) then @logger.info("worklist contains #{worklist.size} blobs") end
221
225
 
222
226
  # Start of processing
@@ -235,7 +239,8 @@ def run(queue)
235
239
  chunk = full_read(name)
236
240
  size=chunk.size
237
241
  rescue Exception => e
238
- @logger.error("Failed to read #{name} because of: #{e.message} .. will continue, set file as read and pretend this never happened")
242
+ # Azure::Core::Http::HTTPError / undefined method `message='
243
+ @logger.error("Failed to read #{name} ... will continue, set file as read and pretend this never happened")
239
244
  @logger.error("#{size} size and #{file[:length]} file length")
240
245
  size = file[:length]
241
246
  end
@@ -274,12 +279,12 @@ def run(queue)
274
279
  decorate(event)
275
280
  queue << event
276
281
  end
282
+ @processed += counter
277
283
  rescue Exception => e
278
284
  @logger.error("codec exception: #{e.message} .. will continue and pretend this never happened")
279
285
  @registry.store(name, { :offset => file[:length], :length => file[:length] })
280
286
  @logger.debug("#{chunk}")
281
287
  end
282
- @processed += counter
283
288
  end
284
289
  @registry.store(name, { :offset => size, :length => file[:length] })
285
290
  # TODO add input plugin option to prevent connection cache
@@ -290,14 +295,14 @@ def run(queue)
290
295
  return
291
296
  end
292
297
  if ((Time.now.to_i - @last) > @interval)
293
- save_registry(@registry)
298
+ save_registry()
294
299
  end
295
300
  end
296
301
  end
297
302
  # The files that got processed after the last registry save need to be saved too, in case the worklist is empty for some intervals.
298
303
  now = Time.now.to_i
299
304
  if ((now - @last) > @interval)
300
- save_registry(@registry)
305
+ save_registry()
301
306
  end
302
307
  sleeptime = interval - ((now - start) % interval)
303
308
  if @debug_timer
@@ -308,10 +313,10 @@ def run(queue)
308
313
  end
309
314
 
310
315
  def stop
311
- save_registry(@registry)
316
+ save_registry()
312
317
  end
313
318
  def close
314
- save_registry(@registry)
319
+ save_registry()
315
320
  end
316
321
 
317
322
 
@@ -388,6 +393,7 @@ end
388
393
 
389
394
  def nsgflowlog(queue, json, name)
390
395
  count=0
396
+ begin
391
397
  json["records"].each do |record|
392
398
  res = resource(record["resourceId"])
393
399
  resource = { :subscription => res[:subscription], :resourcegroup => res[:resourcegroup], :nsg => res[:nsg] }
@@ -417,6 +423,9 @@ def nsgflowlog(queue, json, name)
417
423
  end
418
424
  end
419
425
  end
426
+ rescue Exception => e
427
+ @logger.error("NSG Flowlog problem for #{name} and error message #{e.message}")
428
+ end
420
429
  return count
421
430
  end
422
431
 
@@ -485,30 +494,35 @@ def try_list_blobs(fill)
485
494
  end
486
495
 
487
496
  # When events were processed after the last registry save, start a thread to update the registry file.
488
- def save_registry(filelist)
489
- # Because of threading, processed values and regsaved are not thread safe, they can change as instance variable @! Most of the time this is fine because the registry is the last resort, but be careful about corner cases!
497
+ def save_registry()
490
498
  unless @processed == @regsaved
491
- @regsaved = @processed
492
- unless (@busy_writing_registry)
493
- Thread.new {
494
- begin
495
- @busy_writing_registry = true
496
- unless (@registry_local_path)
497
- @blob_client.create_block_blob(container, registry_path, Marshal.dump(filelist))
498
- @logger.info("processed #{@processed} events, saving #{filelist.size} blobs and offsets to remote registry #{registry_path}")
499
- else
500
- File.open(@registry_local_path+"/"+@pipe_id, 'w') { |file| file.write(Marshal.dump(filelist)) }
501
- @logger.info("processed #{@processed} events, saving #{filelist.size} blobs and offsets to local registry #{registry_local_path+"/"+@pipe_id}")
502
- end
503
- @busy_writing_registry = false
504
- @last = Time.now.to_i
505
- rescue
506
- @logger.error("Oh my, registry write failed, do you have write access?")
507
- end
499
+ unless (@busy_writing_registry.locked?)
500
+ # deep_copy hash, to save the registry independant from the variable for thread safety
501
+ # if deep_clone uses Marshall to do a copy,
502
+ regdump = Marshal.dump(@registry)
503
+ regsize = @registry.size
504
+ Thread.new {
505
+ begin
506
+ @busy_writing_registry.lock
507
+ unless (@registry_local_path)
508
+ @blob_client.create_block_blob(container, registry_path, regdump)
509
+ @logger.info("processed #{@processed} events, saving #{regsize} blobs and offsets to remote registry #{registry_path}")
510
+ else
511
+ File.open(@registry_local_path+"/"+@pipe_id, 'w') { |file| file.write(regdump) }
512
+ @logger.info("processed #{@processed} events, saving #{regsize} blobs and offsets to local registry #{registry_local_path+"/"+@pipe_id}")
513
+ end
514
+ @last = Time.now.to_i
515
+ @regsaved = @processed
516
+ rescue Exception => e
517
+ @logger.error("Oh my, registry write failed")
518
+ @logger.error("#{e.message}")
519
+ ensure
520
+ @busy_writing_registry.unlock
521
+ end
508
522
  }
509
- else
510
- @logger.info("Skipped writing the registry because previous write still in progress, it just takes long or may be hanging!")
511
- end
523
+ else
524
+ @logger.info("Skipped writing the registry because previous write still in progress, it just takes long or may be hanging!")
525
+ end
512
526
  end
513
527
  end
514
528
 
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-input-azure_blob_storage'
3
- s.version = '0.11.7'
3
+ s.version = '0.12.3'
4
4
  s.licenses = ['Apache-2.0']
5
5
  s.summary = 'This logstash plugin reads and parses data from Azure Storage Blobs.'
6
6
  s.description = <<-EOF
@@ -20,8 +20,8 @@ EOF
20
20
  s.metadata = { "logstash_plugin" => "true", "logstash_group" => "input" }
21
21
 
22
22
  # Gem dependencies
23
- s.add_runtime_dependency 'logstash-core-plugin-api', '~> 2.1'
23
+ s.add_runtime_dependency 'logstash-core-plugin-api', '~> 2.0'
24
24
  s.add_runtime_dependency 'stud', '~> 0.0.23'
25
- s.add_runtime_dependency 'azure-storage-blob', '~> 1.1'
26
- #s.add_development_dependency 'logstash-devutils', '~> 2'
25
+ s.add_runtime_dependency 'azure-storage-blob', '~> 2', '>= 2.0.3'
26
+ s.add_development_dependency 'logstash-devutils'
27
27
  end
@@ -8,6 +8,10 @@ describe LogStash::Inputs::AzureBlobStorage do
8
8
  let(:config) { { "interval" => 100 } }
9
9
  end
10
10
 
11
+ def test_val
12
+ assert_qual('abc', val('letters=abc'))
13
+ end
14
+
11
15
  def test_helper_methodes
12
16
  assert_equal('b', AzureBlobStorage.val('a=b'))
13
17
  assert_equal('whatever', AzureBlobStorage.strip_comma(',whatever'))
@@ -0,0 +1,11 @@
1
+ # encoding: utf-8
2
+ require "logstash/devutils/rspec/spec_helper"
3
+ require "logstash/inputs/test"
4
+
5
+ describe LogStash::Inputs::Test do
6
+
7
+ it_behaves_like "an interruptible input plugin" do
8
+ let(:config) { { "interval" => 100 } }
9
+ end
10
+
11
+ end
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-azure_blob_storage
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.7
4
+ version: 0.12.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Geertsma
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-05-17 00:00:00.000000000 Z
11
+ date: 2022-01-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
15
15
  requirements:
16
16
  - - "~>"
17
17
  - !ruby/object:Gem::Version
18
- version: '2.1'
18
+ version: '2.0'
19
19
  name: logstash-core-plugin-api
20
- type: :runtime
21
20
  prerelease: false
21
+ type: :runtime
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '2.1'
26
+ version: '2.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  requirement: !ruby/object:Gem::Requirement
29
29
  requirements:
@@ -31,8 +31,8 @@ dependencies:
31
31
  - !ruby/object:Gem::Version
32
32
  version: 0.0.23
33
33
  name: stud
34
- type: :runtime
35
34
  prerelease: false
35
+ type: :runtime
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
@@ -43,15 +43,35 @@ dependencies:
43
43
  requirements:
44
44
  - - "~>"
45
45
  - !ruby/object:Gem::Version
46
- version: '1.1'
46
+ version: '2'
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: 2.0.3
47
50
  name: azure-storage-blob
48
- type: :runtime
49
51
  prerelease: false
52
+ type: :runtime
50
53
  version_requirements: !ruby/object:Gem::Requirement
51
54
  requirements:
52
55
  - - "~>"
53
56
  - !ruby/object:Gem::Version
54
- version: '1.1'
57
+ version: '2'
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: 2.0.3
61
+ - !ruby/object:Gem::Dependency
62
+ requirement: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ name: logstash-devutils
68
+ prerelease: false
69
+ type: :development
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
55
75
  description: " This gem is a Logstash plugin. It reads and parses data from Azure\
56
76
  \ Storage Blobs. The azure_blob_storage is a reimplementation to replace azureblob\
57
77
  \ from azure-diagnostics-tools/Logstash. It can deal with larger volumes and partial\
@@ -71,6 +91,7 @@ files:
71
91
  - lib/logstash/inputs/azure_blob_storage.rb
72
92
  - logstash-input-azure_blob_storage.gemspec
73
93
  - spec/inputs/azure_blob_storage_spec.rb
94
+ - spec/inputs/test.rb
74
95
  homepage: https://github.com/janmg/logstash-input-azure_blob_storage
75
96
  licenses:
76
97
  - Apache-2.0
@@ -92,9 +113,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
92
113
  - !ruby/object:Gem::Version
93
114
  version: '0'
94
115
  requirements: []
95
- rubygems_version: 3.0.6
116
+ rubygems_version: 3.1.6
96
117
  signing_key:
97
118
  specification_version: 4
98
119
  summary: This logstash plugin reads and parses data from Azure Storage Blobs.
99
120
  test_files:
100
121
  - spec/inputs/azure_blob_storage_spec.rb
122
+ - spec/inputs/test.rb