logstash-input-azure_blob_storage 0.12.2 → 0.12.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7d9a45fa4fa24b7e14ffd518665c9310bbf4ff6ba82fd063fde124a45d20f475
4
- data.tar.gz: 56b119316882fefc3d208418aac0b1b712d94649099297f52bef1f04eb44f7fb
3
+ metadata.gz: da5b56c9fb4733d62604b7753d2e08e3d24100915f58d4f374ab6b6573d27408
4
+ data.tar.gz: 26f5a19de67615eeb27d302e0b0768fc0b3199f33808182b32a3aeed422b2599
5
5
  SHA512:
6
- metadata.gz: 17f9c1fc032e18a764a6985a4e6d5a96eacfb4732861a9f6c1a6f41be01ff9c9036e0ce5d15348da12a3d236855111744e6bd867e64162a7417e4f8bdd862392
7
- data.tar.gz: 6e39fc13efa7f7aea54e6aad9c4d5e1c3c00e1c9a5c56f39b28622496b598aecada794c337f9c348c911527b1e17229d1923829a707acf4bfa282da94441dd46
6
+ metadata.gz: 37f134a2ef4cb0cae2a24e38a3299687f56bc2bf7d98f56976d2bba605b356caec39022c33d18fb58d51933eaa0a3d747c0c4cfacfbe4c0ce6c029b0ff6ddd78
7
+ data.tar.gz: abd3a6803577db03c42eebf69795eded5e948e48a38363fa81b566c2ecc494d9313d07e280569ab424c561a91c6a08a00d1cfa6bc7674fd84b137e08b4af75ef
data/CHANGELOG.md CHANGED
@@ -1,4 +1,9 @@
1
- #$ 0.12.2
1
+ ## 0.12.3
2
+ - Fixed repetative processing
3
+ - Replaced newreg with registry cleanup, using newreg to replace old registry worked in 0.11, but not in .12
4
+ - Implemented Mutex for the save_registry thread. Also marshal @registry before thread start for better thread safety
5
+
6
+ ## 0.12.2
2
7
  - Fixed the exception handling, not trying to print how many JSON fields there are while catching the exception
3
8
 
4
9
  ## 0.12.1
@@ -96,10 +96,6 @@ config :prefix, :validate => :string, :required => false
96
96
 
97
97
  config :path_filters, :validate => :array, :default => ['**/*'], :required => false
98
98
 
99
- # TODO: Other feature requests
100
- # show file path in logger
101
- # add filepath as part of log message
102
- # option to keep registry on local disk
103
99
 
104
100
 
105
101
  public
@@ -107,6 +103,7 @@ def register
107
103
  @pipe_id = Thread.current[:name].split("[").last.split("]").first
108
104
  @logger.info("=== #{config_name} #{Gem.loaded_specs["logstash-input-"+config_name].version.to_s} / #{@pipe_id} / #{@id[0,6]} / ruby #{ RUBY_VERSION }p#{ RUBY_PATCHLEVEL } ===")
109
105
  @logger.info("If this plugin doesn't work, please raise an issue in https://github.com/janmg/logstash-input-azure_blob_storage")
106
+ @busy_writing_registry = Mutex.new
110
107
  # TODO: consider multiple readers, so add pipeline @id or use logstash-to-logstash communication?
111
108
  # TODO: Implement retry ... Error: Connection refused - Failed to open TCP connection to
112
109
  end
@@ -152,7 +149,7 @@ def run(queue)
152
149
  # read filelist and set offsets to file length to mark all the old files as done
153
150
  if registry_create_policy == "start_fresh"
154
151
  @registry = list_blobs(true)
155
- save_registry(@registry)
152
+ save_registry()
156
153
  @logger.info("starting fresh, writing a clean registry to contain #{@registry.size} blobs/files")
157
154
  end
158
155
 
@@ -178,7 +175,6 @@ def run(queue)
178
175
  @logger.info("head will be: #{@head} and tail is set to #{@tail}")
179
176
  end
180
177
 
181
- newreg = Hash.new
182
178
  filelist = Hash.new
183
179
  worklist = Hash.new
184
180
  @last = start = Time.now.to_i
@@ -197,7 +193,6 @@ def run(queue)
197
193
  #filelist.sort_by(|k,v|resource(k)[:date])
198
194
  worklist.clear
199
195
  filelist.clear
200
- newreg.clear
201
196
 
202
197
  # Listing all the files
203
198
  filelist = list_blobs(false)
@@ -208,16 +203,24 @@ def run(queue)
208
203
  rescue
209
204
  off = 0
210
205
  end
211
- newreg.store(name, { :offset => off, :length => file[:length] })
206
+ @registry.store(name, { :offset => off, :length => file[:length] })
212
207
  if (@debug_until > @processed) then @logger.info("2: adding offsets: #{name} #{off} #{file[:length]}") end
213
208
  end
214
209
  # size nilClass when the list doesn't grow?!
210
+
211
+ # clean registry of files that are not in the filelist
212
+ @registry.each do |name,file|
213
+ unless filelist.include?(name)
214
+ @registry.delete(name)
215
+ if (@debug_until > @processed) then @logger.info("purging #{name}") end
216
+ end
217
+ end
218
+
215
219
  # Worklist is the subset of files where the already read offset is smaller than the file size
216
- @registry = newreg
217
220
  worklist.clear
218
221
  chunk = nil
219
222
 
220
- worklist = newreg.select {|name,file| file[:offset] < file[:length]}
223
+ worklist = @registry.select {|name,file| file[:offset] < file[:length]}
221
224
  if (worklist.size > 4) then @logger.info("worklist contains #{worklist.size} blobs") end
222
225
 
223
226
  # Start of processing
@@ -236,7 +239,8 @@ def run(queue)
236
239
  chunk = full_read(name)
237
240
  size=chunk.size
238
241
  rescue Exception => e
239
- @logger.error("Failed to read #{name} because of: #{e.message} .. will continue, set file as read and pretend this never happened")
242
+ # Azure::Core::Http::HTTPError / undefined method `message='
243
+ @logger.error("Failed to read #{name} ... will continue, set file as read and pretend this never happened")
240
244
  @logger.error("#{size} size and #{file[:length]} file length")
241
245
  size = file[:length]
242
246
  end
@@ -275,12 +279,12 @@ def run(queue)
275
279
  decorate(event)
276
280
  queue << event
277
281
  end
282
+ @processed += counter
278
283
  rescue Exception => e
279
284
  @logger.error("codec exception: #{e.message} .. will continue and pretend this never happened")
280
285
  @registry.store(name, { :offset => file[:length], :length => file[:length] })
281
286
  @logger.debug("#{chunk}")
282
287
  end
283
- @processed += counter
284
288
  end
285
289
  @registry.store(name, { :offset => size, :length => file[:length] })
286
290
  # TODO add input plugin option to prevent connection cache
@@ -291,14 +295,14 @@ def run(queue)
291
295
  return
292
296
  end
293
297
  if ((Time.now.to_i - @last) > @interval)
294
- save_registry(@registry)
298
+ save_registry()
295
299
  end
296
300
  end
297
301
  end
298
302
  # The files that got processed after the last registry save need to be saved too, in case the worklist is empty for some intervals.
299
303
  now = Time.now.to_i
300
304
  if ((now - @last) > @interval)
301
- save_registry(@registry)
305
+ save_registry()
302
306
  end
303
307
  sleeptime = interval - ((now - start) % interval)
304
308
  if @debug_timer
@@ -309,10 +313,10 @@ def run(queue)
309
313
  end
310
314
 
311
315
  def stop
312
- save_registry(@registry)
316
+ save_registry()
313
317
  end
314
318
  def close
315
- save_registry(@registry)
319
+ save_registry()
316
320
  end
317
321
 
318
322
 
@@ -490,30 +494,35 @@ def try_list_blobs(fill)
490
494
  end
491
495
 
492
496
  # When events were processed after the last registry save, start a thread to update the registry file.
493
- def save_registry(filelist)
494
- # Because of threading, processed values and regsaved are not thread safe, they can change as instance variable @! Most of the time this is fine because the registry is the last resort, but be careful about corner cases!
497
+ def save_registry()
495
498
  unless @processed == @regsaved
496
- @regsaved = @processed
497
- unless (@busy_writing_registry)
498
- Thread.new {
499
- begin
500
- @busy_writing_registry = true
501
- unless (@registry_local_path)
502
- @blob_client.create_block_blob(container, registry_path, Marshal.dump(filelist))
503
- @logger.info("processed #{@processed} events, saving #{filelist.size} blobs and offsets to remote registry #{registry_path}")
504
- else
505
- File.open(@registry_local_path+"/"+@pipe_id, 'w') { |file| file.write(Marshal.dump(filelist)) }
506
- @logger.info("processed #{@processed} events, saving #{filelist.size} blobs and offsets to local registry #{registry_local_path+"/"+@pipe_id}")
507
- end
508
- @busy_writing_registry = false
509
- @last = Time.now.to_i
510
- rescue
511
- @logger.error("Oh my, registry write failed, do you have write access?")
512
- end
499
+ unless (@busy_writing_registry.locked?)
500
+ # deep_copy hash, to save the registry independant from the variable for thread safety
501
+ # if deep_clone uses Marshall to do a copy,
502
+ regdump = Marshal.dump(@registry)
503
+ regsize = @registry.size
504
+ Thread.new {
505
+ begin
506
+ @busy_writing_registry.lock
507
+ unless (@registry_local_path)
508
+ @blob_client.create_block_blob(container, registry_path, regdump)
509
+ @logger.info("processed #{@processed} events, saving #{regsize} blobs and offsets to remote registry #{registry_path}")
510
+ else
511
+ File.open(@registry_local_path+"/"+@pipe_id, 'w') { |file| file.write(regdump) }
512
+ @logger.info("processed #{@processed} events, saving #{regsize} blobs and offsets to local registry #{registry_local_path+"/"+@pipe_id}")
513
+ end
514
+ @last = Time.now.to_i
515
+ @regsaved = @processed
516
+ rescue Exception => e
517
+ @logger.error("Oh my, registry write failed")
518
+ @logger.error("#{e.message}")
519
+ ensure
520
+ @busy_writing_registry.unlock
521
+ end
513
522
  }
514
- else
515
- @logger.info("Skipped writing the registry because previous write still in progress, it just takes long or may be hanging!")
516
- end
523
+ else
524
+ @logger.info("Skipped writing the registry because previous write still in progress, it just takes long or may be hanging!")
525
+ end
517
526
  end
518
527
  end
519
528
 
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-input-azure_blob_storage'
3
- s.version = '0.12.2'
3
+ s.version = '0.12.3'
4
4
  s.licenses = ['Apache-2.0']
5
5
  s.summary = 'This logstash plugin reads and parses data from Azure Storage Blobs.'
6
6
  s.description = <<-EOF
@@ -20,8 +20,8 @@ EOF
20
20
  s.metadata = { "logstash_plugin" => "true", "logstash_group" => "input" }
21
21
 
22
22
  # Gem dependencies
23
- s.add_runtime_dependency 'logstash-core-plugin-api', '~> 2.1'
23
+ s.add_runtime_dependency 'logstash-core-plugin-api', '~> 2.0'
24
24
  s.add_runtime_dependency 'stud', '~> 0.0.23'
25
25
  s.add_runtime_dependency 'azure-storage-blob', '~> 2', '>= 2.0.3'
26
- #s.add_development_dependency 'logstash-devutils', '~> 2'
26
+ s.add_development_dependency 'logstash-devutils'
27
27
  end
@@ -8,6 +8,10 @@ describe LogStash::Inputs::AzureBlobStorage do
8
8
  let(:config) { { "interval" => 100 } }
9
9
  end
10
10
 
11
+ def test_val
12
+ assert_qual('abc', val('letters=abc'))
13
+ end
14
+
11
15
  def test_helper_methodes
12
16
  assert_equal('b', AzureBlobStorage.val('a=b'))
13
17
  assert_equal('whatever', AzureBlobStorage.strip_comma(',whatever'))
@@ -0,0 +1,11 @@
1
+ # encoding: utf-8
2
+ require "logstash/devutils/rspec/spec_helper"
3
+ require "logstash/inputs/test"
4
+
5
+ describe LogStash::Inputs::Test do
6
+
7
+ it_behaves_like "an interruptible input plugin" do
8
+ let(:config) { { "interval" => 100 } }
9
+ end
10
+
11
+ end
metadata CHANGED
@@ -1,21 +1,21 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-azure_blob_storage
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.2
4
+ version: 0.12.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Geertsma
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-12-21 00:00:00.000000000 Z
11
+ date: 2022-01-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
15
15
  requirements:
16
16
  - - "~>"
17
17
  - !ruby/object:Gem::Version
18
- version: '2.1'
18
+ version: '2.0'
19
19
  name: logstash-core-plugin-api
20
20
  prerelease: false
21
21
  type: :runtime
@@ -23,7 +23,7 @@ dependencies:
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '2.1'
26
+ version: '2.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  requirement: !ruby/object:Gem::Requirement
29
29
  requirements:
@@ -58,6 +58,20 @@ dependencies:
58
58
  - - ">="
59
59
  - !ruby/object:Gem::Version
60
60
  version: 2.0.3
61
+ - !ruby/object:Gem::Dependency
62
+ requirement: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ name: logstash-devutils
68
+ prerelease: false
69
+ type: :development
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
61
75
  description: " This gem is a Logstash plugin. It reads and parses data from Azure\
62
76
  \ Storage Blobs. The azure_blob_storage is a reimplementation to replace azureblob\
63
77
  \ from azure-diagnostics-tools/Logstash. It can deal with larger volumes and partial\
@@ -77,6 +91,7 @@ files:
77
91
  - lib/logstash/inputs/azure_blob_storage.rb
78
92
  - logstash-input-azure_blob_storage.gemspec
79
93
  - spec/inputs/azure_blob_storage_spec.rb
94
+ - spec/inputs/test.rb
80
95
  homepage: https://github.com/janmg/logstash-input-azure_blob_storage
81
96
  licenses:
82
97
  - Apache-2.0
@@ -104,3 +119,4 @@ specification_version: 4
104
119
  summary: This logstash plugin reads and parses data from Azure Storage Blobs.
105
120
  test_files:
106
121
  - spec/inputs/azure_blob_storage_spec.rb
122
+ - spec/inputs/test.rb