logstash-input-azure_blob_storage 0.12.2 → 0.12.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7d9a45fa4fa24b7e14ffd518665c9310bbf4ff6ba82fd063fde124a45d20f475
4
- data.tar.gz: 56b119316882fefc3d208418aac0b1b712d94649099297f52bef1f04eb44f7fb
3
+ metadata.gz: da5b56c9fb4733d62604b7753d2e08e3d24100915f58d4f374ab6b6573d27408
4
+ data.tar.gz: 26f5a19de67615eeb27d302e0b0768fc0b3199f33808182b32a3aeed422b2599
5
5
  SHA512:
6
- metadata.gz: 17f9c1fc032e18a764a6985a4e6d5a96eacfb4732861a9f6c1a6f41be01ff9c9036e0ce5d15348da12a3d236855111744e6bd867e64162a7417e4f8bdd862392
7
- data.tar.gz: 6e39fc13efa7f7aea54e6aad9c4d5e1c3c00e1c9a5c56f39b28622496b598aecada794c337f9c348c911527b1e17229d1923829a707acf4bfa282da94441dd46
6
+ metadata.gz: 37f134a2ef4cb0cae2a24e38a3299687f56bc2bf7d98f56976d2bba605b356caec39022c33d18fb58d51933eaa0a3d747c0c4cfacfbe4c0ce6c029b0ff6ddd78
7
+ data.tar.gz: abd3a6803577db03c42eebf69795eded5e948e48a38363fa81b566c2ecc494d9313d07e280569ab424c561a91c6a08a00d1cfa6bc7674fd84b137e08b4af75ef
data/CHANGELOG.md CHANGED
@@ -1,4 +1,9 @@
1
- #$ 0.12.2
1
+ ## 0.12.3
2
+ - Fixed repetative processing
3
+ - Replaced newreg with registry cleanup, using newreg to replace old registry worked in 0.11, but not in .12
4
+ - Implemented Mutex for the save_registry thread. Also marshal @registry before thread start for better thread safety
5
+
6
+ ## 0.12.2
2
7
  - Fixed the exception handling, not trying to print how many JSON fields there are while catching the exception
3
8
 
4
9
  ## 0.12.1
@@ -96,10 +96,6 @@ config :prefix, :validate => :string, :required => false
96
96
 
97
97
  config :path_filters, :validate => :array, :default => ['**/*'], :required => false
98
98
 
99
- # TODO: Other feature requests
100
- # show file path in logger
101
- # add filepath as part of log message
102
- # option to keep registry on local disk
103
99
 
104
100
 
105
101
  public
@@ -107,6 +103,7 @@ def register
107
103
  @pipe_id = Thread.current[:name].split("[").last.split("]").first
108
104
  @logger.info("=== #{config_name} #{Gem.loaded_specs["logstash-input-"+config_name].version.to_s} / #{@pipe_id} / #{@id[0,6]} / ruby #{ RUBY_VERSION }p#{ RUBY_PATCHLEVEL } ===")
109
105
  @logger.info("If this plugin doesn't work, please raise an issue in https://github.com/janmg/logstash-input-azure_blob_storage")
106
+ @busy_writing_registry = Mutex.new
110
107
  # TODO: consider multiple readers, so add pipeline @id or use logstash-to-logstash communication?
111
108
  # TODO: Implement retry ... Error: Connection refused - Failed to open TCP connection to
112
109
  end
@@ -152,7 +149,7 @@ def run(queue)
152
149
  # read filelist and set offsets to file length to mark all the old files as done
153
150
  if registry_create_policy == "start_fresh"
154
151
  @registry = list_blobs(true)
155
- save_registry(@registry)
152
+ save_registry()
156
153
  @logger.info("starting fresh, writing a clean registry to contain #{@registry.size} blobs/files")
157
154
  end
158
155
 
@@ -178,7 +175,6 @@ def run(queue)
178
175
  @logger.info("head will be: #{@head} and tail is set to #{@tail}")
179
176
  end
180
177
 
181
- newreg = Hash.new
182
178
  filelist = Hash.new
183
179
  worklist = Hash.new
184
180
  @last = start = Time.now.to_i
@@ -197,7 +193,6 @@ def run(queue)
197
193
  #filelist.sort_by(|k,v|resource(k)[:date])
198
194
  worklist.clear
199
195
  filelist.clear
200
- newreg.clear
201
196
 
202
197
  # Listing all the files
203
198
  filelist = list_blobs(false)
@@ -208,16 +203,24 @@ def run(queue)
208
203
  rescue
209
204
  off = 0
210
205
  end
211
- newreg.store(name, { :offset => off, :length => file[:length] })
206
+ @registry.store(name, { :offset => off, :length => file[:length] })
212
207
  if (@debug_until > @processed) then @logger.info("2: adding offsets: #{name} #{off} #{file[:length]}") end
213
208
  end
214
209
  # size nilClass when the list doesn't grow?!
210
+
211
+ # clean registry of files that are not in the filelist
212
+ @registry.each do |name,file|
213
+ unless filelist.include?(name)
214
+ @registry.delete(name)
215
+ if (@debug_until > @processed) then @logger.info("purging #{name}") end
216
+ end
217
+ end
218
+
215
219
  # Worklist is the subset of files where the already read offset is smaller than the file size
216
- @registry = newreg
217
220
  worklist.clear
218
221
  chunk = nil
219
222
 
220
- worklist = newreg.select {|name,file| file[:offset] < file[:length]}
223
+ worklist = @registry.select {|name,file| file[:offset] < file[:length]}
221
224
  if (worklist.size > 4) then @logger.info("worklist contains #{worklist.size} blobs") end
222
225
 
223
226
  # Start of processing
@@ -236,7 +239,8 @@ def run(queue)
236
239
  chunk = full_read(name)
237
240
  size=chunk.size
238
241
  rescue Exception => e
239
- @logger.error("Failed to read #{name} because of: #{e.message} .. will continue, set file as read and pretend this never happened")
242
+ # Azure::Core::Http::HTTPError / undefined method `message='
243
+ @logger.error("Failed to read #{name} ... will continue, set file as read and pretend this never happened")
240
244
  @logger.error("#{size} size and #{file[:length]} file length")
241
245
  size = file[:length]
242
246
  end
@@ -275,12 +279,12 @@ def run(queue)
275
279
  decorate(event)
276
280
  queue << event
277
281
  end
282
+ @processed += counter
278
283
  rescue Exception => e
279
284
  @logger.error("codec exception: #{e.message} .. will continue and pretend this never happened")
280
285
  @registry.store(name, { :offset => file[:length], :length => file[:length] })
281
286
  @logger.debug("#{chunk}")
282
287
  end
283
- @processed += counter
284
288
  end
285
289
  @registry.store(name, { :offset => size, :length => file[:length] })
286
290
  # TODO add input plugin option to prevent connection cache
@@ -291,14 +295,14 @@ def run(queue)
291
295
  return
292
296
  end
293
297
  if ((Time.now.to_i - @last) > @interval)
294
- save_registry(@registry)
298
+ save_registry()
295
299
  end
296
300
  end
297
301
  end
298
302
  # The files that got processed after the last registry save need to be saved too, in case the worklist is empty for some intervals.
299
303
  now = Time.now.to_i
300
304
  if ((now - @last) > @interval)
301
- save_registry(@registry)
305
+ save_registry()
302
306
  end
303
307
  sleeptime = interval - ((now - start) % interval)
304
308
  if @debug_timer
@@ -309,10 +313,10 @@ def run(queue)
309
313
  end
310
314
 
311
315
  def stop
312
- save_registry(@registry)
316
+ save_registry()
313
317
  end
314
318
  def close
315
- save_registry(@registry)
319
+ save_registry()
316
320
  end
317
321
 
318
322
 
@@ -490,30 +494,35 @@ def try_list_blobs(fill)
490
494
  end
491
495
 
492
496
  # When events were processed after the last registry save, start a thread to update the registry file.
493
- def save_registry(filelist)
494
- # Because of threading, processed values and regsaved are not thread safe, they can change as instance variable @! Most of the time this is fine because the registry is the last resort, but be careful about corner cases!
497
+ def save_registry()
495
498
  unless @processed == @regsaved
496
- @regsaved = @processed
497
- unless (@busy_writing_registry)
498
- Thread.new {
499
- begin
500
- @busy_writing_registry = true
501
- unless (@registry_local_path)
502
- @blob_client.create_block_blob(container, registry_path, Marshal.dump(filelist))
503
- @logger.info("processed #{@processed} events, saving #{filelist.size} blobs and offsets to remote registry #{registry_path}")
504
- else
505
- File.open(@registry_local_path+"/"+@pipe_id, 'w') { |file| file.write(Marshal.dump(filelist)) }
506
- @logger.info("processed #{@processed} events, saving #{filelist.size} blobs and offsets to local registry #{registry_local_path+"/"+@pipe_id}")
507
- end
508
- @busy_writing_registry = false
509
- @last = Time.now.to_i
510
- rescue
511
- @logger.error("Oh my, registry write failed, do you have write access?")
512
- end
499
+ unless (@busy_writing_registry.locked?)
500
+ # deep_copy hash, to save the registry independant from the variable for thread safety
501
+ # if deep_clone uses Marshall to do a copy,
502
+ regdump = Marshal.dump(@registry)
503
+ regsize = @registry.size
504
+ Thread.new {
505
+ begin
506
+ @busy_writing_registry.lock
507
+ unless (@registry_local_path)
508
+ @blob_client.create_block_blob(container, registry_path, regdump)
509
+ @logger.info("processed #{@processed} events, saving #{regsize} blobs and offsets to remote registry #{registry_path}")
510
+ else
511
+ File.open(@registry_local_path+"/"+@pipe_id, 'w') { |file| file.write(regdump) }
512
+ @logger.info("processed #{@processed} events, saving #{regsize} blobs and offsets to local registry #{registry_local_path+"/"+@pipe_id}")
513
+ end
514
+ @last = Time.now.to_i
515
+ @regsaved = @processed
516
+ rescue Exception => e
517
+ @logger.error("Oh my, registry write failed")
518
+ @logger.error("#{e.message}")
519
+ ensure
520
+ @busy_writing_registry.unlock
521
+ end
513
522
  }
514
- else
515
- @logger.info("Skipped writing the registry because previous write still in progress, it just takes long or may be hanging!")
516
- end
523
+ else
524
+ @logger.info("Skipped writing the registry because previous write still in progress, it just takes long or may be hanging!")
525
+ end
517
526
  end
518
527
  end
519
528
 
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-input-azure_blob_storage'
3
- s.version = '0.12.2'
3
+ s.version = '0.12.3'
4
4
  s.licenses = ['Apache-2.0']
5
5
  s.summary = 'This logstash plugin reads and parses data from Azure Storage Blobs.'
6
6
  s.description = <<-EOF
@@ -20,8 +20,8 @@ EOF
20
20
  s.metadata = { "logstash_plugin" => "true", "logstash_group" => "input" }
21
21
 
22
22
  # Gem dependencies
23
- s.add_runtime_dependency 'logstash-core-plugin-api', '~> 2.1'
23
+ s.add_runtime_dependency 'logstash-core-plugin-api', '~> 2.0'
24
24
  s.add_runtime_dependency 'stud', '~> 0.0.23'
25
25
  s.add_runtime_dependency 'azure-storage-blob', '~> 2', '>= 2.0.3'
26
- #s.add_development_dependency 'logstash-devutils', '~> 2'
26
+ s.add_development_dependency 'logstash-devutils'
27
27
  end
@@ -8,6 +8,10 @@ describe LogStash::Inputs::AzureBlobStorage do
8
8
  let(:config) { { "interval" => 100 } }
9
9
  end
10
10
 
11
+ def test_val
12
+ assert_qual('abc', val('letters=abc'))
13
+ end
14
+
11
15
  def test_helper_methodes
12
16
  assert_equal('b', AzureBlobStorage.val('a=b'))
13
17
  assert_equal('whatever', AzureBlobStorage.strip_comma(',whatever'))
@@ -0,0 +1,11 @@
1
+ # encoding: utf-8
2
+ require "logstash/devutils/rspec/spec_helper"
3
+ require "logstash/inputs/test"
4
+
5
+ describe LogStash::Inputs::Test do
6
+
7
+ it_behaves_like "an interruptible input plugin" do
8
+ let(:config) { { "interval" => 100 } }
9
+ end
10
+
11
+ end
metadata CHANGED
@@ -1,21 +1,21 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-azure_blob_storage
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.2
4
+ version: 0.12.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Geertsma
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-12-21 00:00:00.000000000 Z
11
+ date: 2022-01-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
15
15
  requirements:
16
16
  - - "~>"
17
17
  - !ruby/object:Gem::Version
18
- version: '2.1'
18
+ version: '2.0'
19
19
  name: logstash-core-plugin-api
20
20
  prerelease: false
21
21
  type: :runtime
@@ -23,7 +23,7 @@ dependencies:
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '2.1'
26
+ version: '2.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  requirement: !ruby/object:Gem::Requirement
29
29
  requirements:
@@ -58,6 +58,20 @@ dependencies:
58
58
  - - ">="
59
59
  - !ruby/object:Gem::Version
60
60
  version: 2.0.3
61
+ - !ruby/object:Gem::Dependency
62
+ requirement: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ name: logstash-devutils
68
+ prerelease: false
69
+ type: :development
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
61
75
  description: " This gem is a Logstash plugin. It reads and parses data from Azure\
62
76
  \ Storage Blobs. The azure_blob_storage is a reimplementation to replace azureblob\
63
77
  \ from azure-diagnostics-tools/Logstash. It can deal with larger volumes and partial\
@@ -77,6 +91,7 @@ files:
77
91
  - lib/logstash/inputs/azure_blob_storage.rb
78
92
  - logstash-input-azure_blob_storage.gemspec
79
93
  - spec/inputs/azure_blob_storage_spec.rb
94
+ - spec/inputs/test.rb
80
95
  homepage: https://github.com/janmg/logstash-input-azure_blob_storage
81
96
  licenses:
82
97
  - Apache-2.0
@@ -104,3 +119,4 @@ specification_version: 4
104
119
  summary: This logstash plugin reads and parses data from Azure Storage Blobs.
105
120
  test_files:
106
121
  - spec/inputs/azure_blob_storage_spec.rb
122
+ - spec/inputs/test.rb