logstash-input-azure_blob_storage 0.11.4 → 0.11.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 158d9ef3b7997fb3ec67f4e2278861ae367c3e4a73f362dc56f145482d802e34
4
- data.tar.gz: 89f5b1bc848a97cbf31b1323aa64d021d86a05292d3d7d006994ad170666a37d
3
+ metadata.gz: 3d446aed971a95e6e17a27ed1e9ec8b141f939b53697fb9c332cfb130404745a
4
+ data.tar.gz: 4a1321f6c6a30f6787d2133642ca23840371d6f4e18102cb775d345b09eb176a
5
5
  SHA512:
6
- metadata.gz: 80f12e364ba3fd81375d2b88d24567d92ec83decac371552e3a814194f6dcae2f1c6991ac87f50e0012a8cb177f67da92790d40a71af953b211e5043a1691170
7
- data.tar.gz: 0e54b9c0b9f63737ef8046d362c47f1c20f2d9f702db0311993def976f1a40c14534c7fae9a7a90e098ce4b3bdd18d00517f420e9cc6c4b7810f3709aee797e1
6
+ metadata.gz: b4f48a0bebcd6e3594584a4473b223838359d44e9ef591f958aa4c80c4c22953f6b0f708b19faeaf0517c66f47185bda4de75ab4e3618b23e2e7f23f71cb4bee
7
+ data.tar.gz: 508cd39ea159a4655e590f46ad0108c3b6e6de95ed575c4456da0230bae73fb384ecb7697ed710e7afb1542fe01cbd8a62130acedcbf0ba9c3040ace1f9d76d0
@@ -1,6 +1,13 @@
1
+ ## 0.11.5
2
+ - Added optional filename into the message
3
+ - plumbing for emulator, start_over not learning from registry
4
+
1
5
  ## 0.11.4
2
6
  - fixed listing 3 times, rather than retrying to list max 3 times
3
- - added log entries for better tracing in which phase the application is now and how long it takes
7
+ - added option to migrate/save to using local registry
8
+ - rewrote interval timing
9
+ - reduced saving of registry to maximum once per interval, protect duplicate simultanious writes
10
+ - added debug_timer for better tracing how long operations take
4
11
  - removing pipeline name from logfiles, logstash 7.6 and up have this in the log4j2 by default now
5
12
  - moved initialization from register to run. should make logs more readable
6
13
 
data/README.md CHANGED
@@ -40,7 +40,11 @@ The registry_create_policy is used when the pipeline is started to either resume
40
40
 
41
41
  interval defines the minimum time the registry should be saved to the registry file (by default 'data/registry.dat'), this is only needed in case the pipeline dies unexpectedly. During a normal shutdown the registry is also saved.
42
42
 
43
- During the pipeline start the plugin uses one file to learn how the JSON header and tail look like, they can also be configured manually.
43
+ When registry_local_path is set to a directory, the registry is save on the logstash server in that directory. The filename is the pipe.id
44
+
45
+ with registry_create_policy set to resume and the registry_local_path set to a directory where the registry isn't yet created, should load from the storage account and save the registry on the local server
46
+
47
+ During the pipeline start for JSON codec, the plugin uses one file to learn how the JSON header and tail look like, they can also be configured manually.
44
48
 
45
49
  ## Running the pipeline
46
50
  The pipeline can be started in several ways.
@@ -91,6 +95,7 @@ The log level of the plugin can be put into DEBUG through
91
95
  curl -XPUT 'localhost:9600/_node/logging?pretty' -H 'Content-Type: application/json' -d'{"logger.logstash.inputs.azureblobstorage" : "DEBUG"}'
92
96
  ```
93
97
 
98
+ because debug also makes logstash chatty, there are also debug_timer and debug_until that can be used to print additional informantion on what the pipeline is doing and how long it takes. debug_until is for the number of events until debug is disabled.
94
99
 
95
100
  ## Other Configuration Examples
96
101
  For nsgflowlogs, a simple configuration looks like this
@@ -176,7 +181,7 @@ filter {
176
181
  remove_field => ["subresponse"]
177
182
  remove_field => ["username"]
178
183
  remove_field => ["clientPort"]
179
- remove_field => ["port"]
184
+ remove_field => ["port"]:0
180
185
  remove_field => ["timestamp"]
181
186
  }
182
187
  }
@@ -25,6 +25,9 @@ config :storageaccount, :validate => :string, :required => false
25
25
  # DNS Suffix other then blob.core.windows.net
26
26
  config :dns_suffix, :validate => :string, :required => false, :default => 'core.windows.net'
27
27
 
28
+ # For development this can be used to emulate an accountstorage when not available from azure
29
+ #config :use_development_storage, :validate => :boolean, :required => false
30
+
28
31
  # The (primary or secondary) Access Key for the the storage account. The key can be found in the portal.azure.com or through the azure api StorageAccounts/ListKeys. For example the PowerShell command Get-AzStorageAccountKey.
29
32
  config :access_key, :validate => :password, :required => false
30
33
 
@@ -58,6 +61,7 @@ config :registry_create_policy, :validate => ['resume','start_over','start_fresh
58
61
  # Z00000000000000000000000000000000 2 ]}
59
62
  config :interval, :validate => :number, :default => 60
60
63
 
64
+ config :addfilename, :validate => :boolean, :default => false, :required => false
61
65
  # debug_until will for a maximum amount of processed messages shows 3 types of log printouts including processed filenames. This is a lightweight alternative to switching the loglevel from info to debug or even trace
62
66
  config :debug_until, :validate => :number, :default => 0, :required => false
63
67
 
@@ -127,11 +131,15 @@ def run(queue)
127
131
  unless conn.nil?
128
132
  @blob_client = Azure::Storage::Blob::BlobService.create_from_connection_string(conn)
129
133
  else
134
+ # unless use_development_storage?
130
135
  @blob_client = Azure::Storage::Blob::BlobService.create(
131
136
  storage_account_name: storageaccount,
132
137
  storage_dns_suffix: dns_suffix,
133
138
  storage_access_key: access_key.value,
134
139
  )
140
+ # else
141
+ # @logger.info("not yet implemented")
142
+ # end
135
143
  end
136
144
 
137
145
  @registry = Hash.new
@@ -167,7 +175,7 @@ def run(queue)
167
175
  if registry_create_policy == "start_fresh"
168
176
  @registry = list_blobs(true)
169
177
  save_registry(@registry)
170
- @logger.info("starting fresh, overwriting the registry to contain #{@registry.size} blobs/files")
178
+ @logger.info("starting fresh, writing a clean the registry to contain #{@registry.size} blobs/files")
171
179
  end
172
180
 
173
181
  @is_json = false
@@ -223,6 +231,7 @@ def run(queue)
223
231
  newreg.store(name, { :offset => off, :length => file[:length] })
224
232
  if (@debug_until > @processed) then @logger.info("2: adding offsets: #{name} #{off} #{file[:length]}") end
225
233
  end
234
+ # size nilClass when the list doesn't grow?!
226
235
  # Worklist is the subset of files where the already read offset is smaller than the file size
227
236
  worklist.clear
228
237
  worklist = newreg.select {|name,file| file[:offset] < file[:length]}
@@ -230,13 +239,19 @@ def run(queue)
230
239
 
231
240
  # Start of processing
232
241
  # This would be ideal for threading since it's IO intensive, would be nice with a ruby native ThreadPool
233
- worklist.each do |name, file|
242
+ if (worklist.size > 0) then
243
+ worklist.each do |name, file|
234
244
  start = Time.now.to_i
235
245
  if (@debug_until > @processed) then @logger.info("3: processing #{name} from #{file[:offset]} to #{file[:length]}") end
236
246
  size = 0
237
247
  if file[:offset] == 0
238
- chunk = full_read(name)
239
- size=chunk.size
248
+ # This is where Sera4000 issue starts
249
+ begin
250
+ chunk = full_read(name)
251
+ size=chunk.size
252
+ rescue Exception => e
253
+ @logger.error("Failed to read #{name} because of: #{e.message} .. will continue and pretend this never happened")
254
+ end
240
255
  else
241
256
  chunk = partial_read_json(name, file[:offset], file[:length])
242
257
  @logger.debug("partial file #{name} from #{file[:offset]} to #{file[:length]}")
@@ -245,7 +260,7 @@ def run(queue)
245
260
  res = resource(name)
246
261
  begin
247
262
  fingjson = JSON.parse(chunk)
248
- @processed += nsgflowlog(queue, fingjson)
263
+ @processed += nsgflowlog(queue, fingjson, name)
249
264
  @logger.debug("Processed #{res[:nsg]} [#{res[:date]}] #{@processed} events")
250
265
  rescue JSON::ParserError
251
266
  @logger.error("parse error on #{res[:nsg]} [#{res[:date]}] offset: #{file[:offset]} length: #{file[:length]}")
@@ -259,6 +274,9 @@ def run(queue)
259
274
  begin
260
275
  @codec.decode(chunk) do |event|
261
276
  counter += 1
277
+ if @addfilename
278
+ event.set('filename', name)
279
+ end
262
280
  decorate(event)
263
281
  queue << event
264
282
  end
@@ -279,6 +297,7 @@ def run(queue)
279
297
  if ((Time.now.to_i - @last) > @interval)
280
298
  save_registry(@registry)
281
299
  end
300
+ end
282
301
  end
283
302
  # The files that got processed after the last registry save need to be saved too, in case the worklist is empty for some intervals.
284
303
  now = Time.now.to_i
@@ -326,8 +345,7 @@ def strip_comma(str)
326
345
  end
327
346
 
328
347
 
329
-
330
- def nsgflowlog(queue, json)
348
+ def nsgflowlog(queue, json, name)
331
349
  count=0
332
350
  json["records"].each do |record|
333
351
  res = resource(record["resourceId"])
@@ -340,9 +358,16 @@ def nsgflowlog(queue, json)
340
358
  tups = tup.split(',')
341
359
  ev = rule.merge({:unixtimestamp => tups[0], :src_ip => tups[1], :dst_ip => tups[2], :src_port => tups[3], :dst_port => tups[4], :protocol => tups[5], :direction => tups[6], :decision => tups[7]})
342
360
  if (record["properties"]["Version"]==2)
361
+ tups[9] = 0 if tups[9].nil?
362
+ tups[10] = 0 if tups[10].nil?
363
+ tups[11] = 0 if tups[11].nil?
364
+ tups[12] = 0 if tups[12].nil?
343
365
  ev.merge!( {:flowstate => tups[8], :src_pack => tups[9], :src_bytes => tups[10], :dst_pack => tups[11], :dst_bytes => tups[12]} )
344
366
  end
345
367
  @logger.trace(ev.to_s)
368
+ if @addfilename
369
+ ev.merge!( {:filename => name } )
370
+ end
346
371
  event = LogStash::Event.new('message' => ev.to_json)
347
372
  decorate(event)
348
373
  queue << event
@@ -429,10 +454,10 @@ def save_registry(filelist)
429
454
  @busy_writing_registry = true
430
455
  unless (@registry_local_path)
431
456
  @blob_client.create_block_blob(container, registry_path, Marshal.dump(filelist))
432
- @logger.info("processed #{@processed} events, saving #{filelist.size} blobs and offsets to registry #{registry_path}")
457
+ @logger.info("processed #{@processed} events, saving #{filelist.size} blobs and offsets to remote registry #{registry_path}")
433
458
  else
434
459
  File.open(@registry_local_path+"/"+@pipe_id, 'w') { |file| file.write(Marshal.dump(filelist)) }
435
- @logger.info("processed #{@processed} events, saving #{filelist.size} blobs and offsets to registry #{registry_local_path+"/"+@pipe_id}")
460
+ @logger.info("processed #{@processed} events, saving #{filelist.size} blobs and offsets to local registry #{registry_local_path+"/"+@pipe_id}")
436
461
  end
437
462
  @busy_writing_registry = false
438
463
  @last = Time.now.to_i
@@ -446,21 +471,34 @@ def save_registry(filelist)
446
471
  end
447
472
  end
448
473
 
474
+
449
475
  def learn_encapsulation
450
476
  # From one file, read first block and last block to learn head and tail
451
- # If the blobstorage can't be found, an error from farraday middleware will come with the text
452
- # org.jruby.ext.set.RubySet cannot be cast to class org.jruby.RubyFixnum
453
- blob = @blob_client.list_blobs(container, { maxresults: 1, prefix: @prefix }).first
454
- return if blob.nil?
455
- blocks = @blob_client.list_blob_blocks(container, blob.name)[:committed]
456
- # TODO add check for empty blocks and log error that the header and footer can't be learned and must be set in the config
457
- @logger.debug("using #{blob.name} to learn the json header and tail")
458
- @head = @blob_client.get_blob(container, blob.name, start_range: 0, end_range: blocks.first.size-1)[1]
459
- @logger.debug("learned header: #{@head}")
460
- length = blob.properties[:content_length].to_i
461
- offset = length - blocks.last.size
462
- @tail = @blob_client.get_blob(container, blob.name, start_range: offset, end_range: length-1)[1]
463
- @logger.debug("learned tail: #{@tail}")
477
+ begin
478
+ blobs = @blob_client.list_blobs(container, { maxresults: 3, prefix: @prefix})
479
+ blobs.each do |blob|
480
+ unless blob.name == registry_path
481
+ begin
482
+ blocks = @blob_client.list_blob_blocks(container, blob.name)[:committed]
483
+ if blocks.first.name.start_with?('A00')
484
+ @logger.debug("using #{blob.name}/#{blocks.first.name} to learn the json header")
485
+ @head = @blob_client.get_blob(container, blob.name, start_range: 0, end_range: blocks.first.size-1)[1]
486
+ end
487
+ if blocks.last.name.start_with?('Z00')
488
+ @logger.debug("using #{blob.name}/#{blocks.last.name} to learn the json footer")
489
+ length = blob.properties[:content_length].to_i
490
+ offset = length - blocks.last.size
491
+ @tail = @blob_client.get_blob(container, blob.name, start_range: offset, end_range: length-1)[1]
492
+ @logger.debug("learned tail: #{@tail}")
493
+ end
494
+ rescue Exception => e
495
+ @logger.info("learn json one of the attempts failed #{e.message}")
496
+ end
497
+ end
498
+ end
499
+ rescue Exception => e
500
+ @logger.info("learn json header and footer failed because #{e.message}")
501
+ end
464
502
  end
465
503
 
466
504
  def resource(str)
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-input-azure_blob_storage'
3
- s.version = '0.11.4'
3
+ s.version = '0.11.5'
4
4
  s.licenses = ['Apache-2.0']
5
5
  s.summary = 'This logstash plugin reads and parses data from Azure Storage Blobs.'
6
6
  s.description = <<-EOF
@@ -23,5 +23,5 @@ EOF
23
23
  s.add_runtime_dependency 'logstash-core-plugin-api', '~> 2.1'
24
24
  s.add_runtime_dependency 'stud', '~> 0.0.23'
25
25
  s.add_runtime_dependency 'azure-storage-blob', '~> 1.1'
26
- s.add_development_dependency 'logstash-devutils', '~> 1.0', '>= 1.0.0'
26
+ #s.add_development_dependency 'logstash-devutils', '~> 2'
27
27
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-azure_blob_storage
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.4
4
+ version: 0.11.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Geertsma
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-05-23 00:00:00.000000000 Z
11
+ date: 2020-12-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -52,26 +52,6 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '1.1'
55
- - !ruby/object:Gem::Dependency
56
- requirement: !ruby/object:Gem::Requirement
57
- requirements:
58
- - - ">="
59
- - !ruby/object:Gem::Version
60
- version: 1.0.0
61
- - - "~>"
62
- - !ruby/object:Gem::Version
63
- version: '1.0'
64
- name: logstash-devutils
65
- type: :development
66
- prerelease: false
67
- version_requirements: !ruby/object:Gem::Requirement
68
- requirements:
69
- - - ">="
70
- - !ruby/object:Gem::Version
71
- version: 1.0.0
72
- - - "~>"
73
- - !ruby/object:Gem::Version
74
- version: '1.0'
75
55
  description: " This gem is a Logstash plugin. It reads and parses data from Azure\
76
56
  \ Storage Blobs. The azure_blob_storage is a reimplementation to replace azureblob\
77
57
  \ from azure-diagnostics-tools/Logstash. It can deal with larger volumes and partial\