logstash-input-azure_blob_storage 0.11.4 → 0.11.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -1
- data/README.md +7 -2
- data/lib/logstash/inputs/azure_blob_storage.rb +60 -22
- data/logstash-input-azure_blob_storage.gemspec +2 -2
- metadata +2 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3d446aed971a95e6e17a27ed1e9ec8b141f939b53697fb9c332cfb130404745a
|
4
|
+
data.tar.gz: 4a1321f6c6a30f6787d2133642ca23840371d6f4e18102cb775d345b09eb176a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b4f48a0bebcd6e3594584a4473b223838359d44e9ef591f958aa4c80c4c22953f6b0f708b19faeaf0517c66f47185bda4de75ab4e3618b23e2e7f23f71cb4bee
|
7
|
+
data.tar.gz: 508cd39ea159a4655e590f46ad0108c3b6e6de95ed575c4456da0230bae73fb384ecb7697ed710e7afb1542fe01cbd8a62130acedcbf0ba9c3040ace1f9d76d0
|
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,13 @@
|
|
1
|
+
## 0.11.5
|
2
|
+
- Added optional filename into the message
|
3
|
+
- plumbing for emulator, start_over not learning from registry
|
4
|
+
|
1
5
|
## 0.11.4
|
2
6
|
- fixed listing 3 times, rather than retrying to list max 3 times
|
3
|
-
- added
|
7
|
+
- added option to migrate/save to using local registry
|
8
|
+
- rewrote interval timing
|
9
|
+
- reduced saving of registry to maximum once per interval, protect duplicate simultanious writes
|
10
|
+
- added debug_timer for better tracing how long operations take
|
4
11
|
- removing pipeline name from logfiles, logstash 7.6 and up have this in the log4j2 by default now
|
5
12
|
- moved initialization from register to run. should make logs more readable
|
6
13
|
|
data/README.md
CHANGED
@@ -40,7 +40,11 @@ The registry_create_policy is used when the pipeline is started to either resume
|
|
40
40
|
|
41
41
|
interval defines the minimum time the registry should be saved to the registry file (by default 'data/registry.dat'), this is only needed in case the pipeline dies unexpectedly. During a normal shutdown the registry is also saved.
|
42
42
|
|
43
|
-
|
43
|
+
When registry_local_path is set to a directory, the registry is save on the logstash server in that directory. The filename is the pipe.id
|
44
|
+
|
45
|
+
with registry_create_policy set to resume and the registry_local_path set to a directory where the registry isn't yet created, should load from the storage account and save the registry on the local server
|
46
|
+
|
47
|
+
During the pipeline start for JSON codec, the plugin uses one file to learn how the JSON header and tail look like, they can also be configured manually.
|
44
48
|
|
45
49
|
## Running the pipeline
|
46
50
|
The pipeline can be started in several ways.
|
@@ -91,6 +95,7 @@ The log level of the plugin can be put into DEBUG through
|
|
91
95
|
curl -XPUT 'localhost:9600/_node/logging?pretty' -H 'Content-Type: application/json' -d'{"logger.logstash.inputs.azureblobstorage" : "DEBUG"}'
|
92
96
|
```
|
93
97
|
|
98
|
+
because debug also makes logstash chatty, there are also debug_timer and debug_until that can be used to print additional informantion on what the pipeline is doing and how long it takes. debug_until is for the number of events until debug is disabled.
|
94
99
|
|
95
100
|
## Other Configuration Examples
|
96
101
|
For nsgflowlogs, a simple configuration looks like this
|
@@ -176,7 +181,7 @@ filter {
|
|
176
181
|
remove_field => ["subresponse"]
|
177
182
|
remove_field => ["username"]
|
178
183
|
remove_field => ["clientPort"]
|
179
|
-
remove_field => ["port"]
|
184
|
+
remove_field => ["port"]:0
|
180
185
|
remove_field => ["timestamp"]
|
181
186
|
}
|
182
187
|
}
|
@@ -25,6 +25,9 @@ config :storageaccount, :validate => :string, :required => false
|
|
25
25
|
# DNS Suffix other then blob.core.windows.net
|
26
26
|
config :dns_suffix, :validate => :string, :required => false, :default => 'core.windows.net'
|
27
27
|
|
28
|
+
# For development this can be used to emulate an accountstorage when not available from azure
|
29
|
+
#config :use_development_storage, :validate => :boolean, :required => false
|
30
|
+
|
28
31
|
# The (primary or secondary) Access Key for the the storage account. The key can be found in the portal.azure.com or through the azure api StorageAccounts/ListKeys. For example the PowerShell command Get-AzStorageAccountKey.
|
29
32
|
config :access_key, :validate => :password, :required => false
|
30
33
|
|
@@ -58,6 +61,7 @@ config :registry_create_policy, :validate => ['resume','start_over','start_fresh
|
|
58
61
|
# Z00000000000000000000000000000000 2 ]}
|
59
62
|
config :interval, :validate => :number, :default => 60
|
60
63
|
|
64
|
+
config :addfilename, :validate => :boolean, :default => false, :required => false
|
61
65
|
# debug_until will for a maximum amount of processed messages shows 3 types of log printouts including processed filenames. This is a lightweight alternative to switching the loglevel from info to debug or even trace
|
62
66
|
config :debug_until, :validate => :number, :default => 0, :required => false
|
63
67
|
|
@@ -127,11 +131,15 @@ def run(queue)
|
|
127
131
|
unless conn.nil?
|
128
132
|
@blob_client = Azure::Storage::Blob::BlobService.create_from_connection_string(conn)
|
129
133
|
else
|
134
|
+
# unless use_development_storage?
|
130
135
|
@blob_client = Azure::Storage::Blob::BlobService.create(
|
131
136
|
storage_account_name: storageaccount,
|
132
137
|
storage_dns_suffix: dns_suffix,
|
133
138
|
storage_access_key: access_key.value,
|
134
139
|
)
|
140
|
+
# else
|
141
|
+
# @logger.info("not yet implemented")
|
142
|
+
# end
|
135
143
|
end
|
136
144
|
|
137
145
|
@registry = Hash.new
|
@@ -167,7 +175,7 @@ def run(queue)
|
|
167
175
|
if registry_create_policy == "start_fresh"
|
168
176
|
@registry = list_blobs(true)
|
169
177
|
save_registry(@registry)
|
170
|
-
@logger.info("starting fresh,
|
178
|
+
@logger.info("starting fresh, writing a clean the registry to contain #{@registry.size} blobs/files")
|
171
179
|
end
|
172
180
|
|
173
181
|
@is_json = false
|
@@ -223,6 +231,7 @@ def run(queue)
|
|
223
231
|
newreg.store(name, { :offset => off, :length => file[:length] })
|
224
232
|
if (@debug_until > @processed) then @logger.info("2: adding offsets: #{name} #{off} #{file[:length]}") end
|
225
233
|
end
|
234
|
+
# size nilClass when the list doesn't grow?!
|
226
235
|
# Worklist is the subset of files where the already read offset is smaller than the file size
|
227
236
|
worklist.clear
|
228
237
|
worklist = newreg.select {|name,file| file[:offset] < file[:length]}
|
@@ -230,13 +239,19 @@ def run(queue)
|
|
230
239
|
|
231
240
|
# Start of processing
|
232
241
|
# This would be ideal for threading since it's IO intensive, would be nice with a ruby native ThreadPool
|
233
|
-
worklist.
|
242
|
+
if (worklist.size > 0) then
|
243
|
+
worklist.each do |name, file|
|
234
244
|
start = Time.now.to_i
|
235
245
|
if (@debug_until > @processed) then @logger.info("3: processing #{name} from #{file[:offset]} to #{file[:length]}") end
|
236
246
|
size = 0
|
237
247
|
if file[:offset] == 0
|
238
|
-
|
239
|
-
|
248
|
+
# This is where Sera4000 issue starts
|
249
|
+
begin
|
250
|
+
chunk = full_read(name)
|
251
|
+
size=chunk.size
|
252
|
+
rescue Exception => e
|
253
|
+
@logger.error("Failed to read #{name} because of: #{e.message} .. will continue and pretend this never happened")
|
254
|
+
end
|
240
255
|
else
|
241
256
|
chunk = partial_read_json(name, file[:offset], file[:length])
|
242
257
|
@logger.debug("partial file #{name} from #{file[:offset]} to #{file[:length]}")
|
@@ -245,7 +260,7 @@ def run(queue)
|
|
245
260
|
res = resource(name)
|
246
261
|
begin
|
247
262
|
fingjson = JSON.parse(chunk)
|
248
|
-
@processed += nsgflowlog(queue, fingjson)
|
263
|
+
@processed += nsgflowlog(queue, fingjson, name)
|
249
264
|
@logger.debug("Processed #{res[:nsg]} [#{res[:date]}] #{@processed} events")
|
250
265
|
rescue JSON::ParserError
|
251
266
|
@logger.error("parse error on #{res[:nsg]} [#{res[:date]}] offset: #{file[:offset]} length: #{file[:length]}")
|
@@ -259,6 +274,9 @@ def run(queue)
|
|
259
274
|
begin
|
260
275
|
@codec.decode(chunk) do |event|
|
261
276
|
counter += 1
|
277
|
+
if @addfilename
|
278
|
+
event.set('filename', name)
|
279
|
+
end
|
262
280
|
decorate(event)
|
263
281
|
queue << event
|
264
282
|
end
|
@@ -279,6 +297,7 @@ def run(queue)
|
|
279
297
|
if ((Time.now.to_i - @last) > @interval)
|
280
298
|
save_registry(@registry)
|
281
299
|
end
|
300
|
+
end
|
282
301
|
end
|
283
302
|
# The files that got processed after the last registry save need to be saved too, in case the worklist is empty for some intervals.
|
284
303
|
now = Time.now.to_i
|
@@ -326,8 +345,7 @@ def strip_comma(str)
|
|
326
345
|
end
|
327
346
|
|
328
347
|
|
329
|
-
|
330
|
-
def nsgflowlog(queue, json)
|
348
|
+
def nsgflowlog(queue, json, name)
|
331
349
|
count=0
|
332
350
|
json["records"].each do |record|
|
333
351
|
res = resource(record["resourceId"])
|
@@ -340,9 +358,16 @@ def nsgflowlog(queue, json)
|
|
340
358
|
tups = tup.split(',')
|
341
359
|
ev = rule.merge({:unixtimestamp => tups[0], :src_ip => tups[1], :dst_ip => tups[2], :src_port => tups[3], :dst_port => tups[4], :protocol => tups[5], :direction => tups[6], :decision => tups[7]})
|
342
360
|
if (record["properties"]["Version"]==2)
|
361
|
+
tups[9] = 0 if tups[9].nil?
|
362
|
+
tups[10] = 0 if tups[10].nil?
|
363
|
+
tups[11] = 0 if tups[11].nil?
|
364
|
+
tups[12] = 0 if tups[12].nil?
|
343
365
|
ev.merge!( {:flowstate => tups[8], :src_pack => tups[9], :src_bytes => tups[10], :dst_pack => tups[11], :dst_bytes => tups[12]} )
|
344
366
|
end
|
345
367
|
@logger.trace(ev.to_s)
|
368
|
+
if @addfilename
|
369
|
+
ev.merge!( {:filename => name } )
|
370
|
+
end
|
346
371
|
event = LogStash::Event.new('message' => ev.to_json)
|
347
372
|
decorate(event)
|
348
373
|
queue << event
|
@@ -429,10 +454,10 @@ def save_registry(filelist)
|
|
429
454
|
@busy_writing_registry = true
|
430
455
|
unless (@registry_local_path)
|
431
456
|
@blob_client.create_block_blob(container, registry_path, Marshal.dump(filelist))
|
432
|
-
@logger.info("processed #{@processed} events, saving #{filelist.size} blobs and offsets to registry #{registry_path}")
|
457
|
+
@logger.info("processed #{@processed} events, saving #{filelist.size} blobs and offsets to remote registry #{registry_path}")
|
433
458
|
else
|
434
459
|
File.open(@registry_local_path+"/"+@pipe_id, 'w') { |file| file.write(Marshal.dump(filelist)) }
|
435
|
-
@logger.info("processed #{@processed} events, saving #{filelist.size} blobs and offsets to registry #{registry_local_path+"/"+@pipe_id}")
|
460
|
+
@logger.info("processed #{@processed} events, saving #{filelist.size} blobs and offsets to local registry #{registry_local_path+"/"+@pipe_id}")
|
436
461
|
end
|
437
462
|
@busy_writing_registry = false
|
438
463
|
@last = Time.now.to_i
|
@@ -446,21 +471,34 @@ def save_registry(filelist)
|
|
446
471
|
end
|
447
472
|
end
|
448
473
|
|
474
|
+
|
449
475
|
def learn_encapsulation
|
450
476
|
# From one file, read first block and last block to learn head and tail
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
477
|
+
begin
|
478
|
+
blobs = @blob_client.list_blobs(container, { maxresults: 3, prefix: @prefix})
|
479
|
+
blobs.each do |blob|
|
480
|
+
unless blob.name == registry_path
|
481
|
+
begin
|
482
|
+
blocks = @blob_client.list_blob_blocks(container, blob.name)[:committed]
|
483
|
+
if blocks.first.name.start_with?('A00')
|
484
|
+
@logger.debug("using #{blob.name}/#{blocks.first.name} to learn the json header")
|
485
|
+
@head = @blob_client.get_blob(container, blob.name, start_range: 0, end_range: blocks.first.size-1)[1]
|
486
|
+
end
|
487
|
+
if blocks.last.name.start_with?('Z00')
|
488
|
+
@logger.debug("using #{blob.name}/#{blocks.last.name} to learn the json footer")
|
489
|
+
length = blob.properties[:content_length].to_i
|
490
|
+
offset = length - blocks.last.size
|
491
|
+
@tail = @blob_client.get_blob(container, blob.name, start_range: offset, end_range: length-1)[1]
|
492
|
+
@logger.debug("learned tail: #{@tail}")
|
493
|
+
end
|
494
|
+
rescue Exception => e
|
495
|
+
@logger.info("learn json one of the attempts failed #{e.message}")
|
496
|
+
end
|
497
|
+
end
|
498
|
+
end
|
499
|
+
rescue Exception => e
|
500
|
+
@logger.info("learn json header and footer failed because #{e.message}")
|
501
|
+
end
|
464
502
|
end
|
465
503
|
|
466
504
|
def resource(str)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'logstash-input-azure_blob_storage'
|
3
|
-
s.version = '0.11.
|
3
|
+
s.version = '0.11.5'
|
4
4
|
s.licenses = ['Apache-2.0']
|
5
5
|
s.summary = 'This logstash plugin reads and parses data from Azure Storage Blobs.'
|
6
6
|
s.description = <<-EOF
|
@@ -23,5 +23,5 @@ EOF
|
|
23
23
|
s.add_runtime_dependency 'logstash-core-plugin-api', '~> 2.1'
|
24
24
|
s.add_runtime_dependency 'stud', '~> 0.0.23'
|
25
25
|
s.add_runtime_dependency 'azure-storage-blob', '~> 1.1'
|
26
|
-
s.add_development_dependency 'logstash-devutils', '~>
|
26
|
+
#s.add_development_dependency 'logstash-devutils', '~> 2'
|
27
27
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-input-azure_blob_storage
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.11.
|
4
|
+
version: 0.11.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Geertsma
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-12-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -52,26 +52,6 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '1.1'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
requirement: !ruby/object:Gem::Requirement
|
57
|
-
requirements:
|
58
|
-
- - ">="
|
59
|
-
- !ruby/object:Gem::Version
|
60
|
-
version: 1.0.0
|
61
|
-
- - "~>"
|
62
|
-
- !ruby/object:Gem::Version
|
63
|
-
version: '1.0'
|
64
|
-
name: logstash-devutils
|
65
|
-
type: :development
|
66
|
-
prerelease: false
|
67
|
-
version_requirements: !ruby/object:Gem::Requirement
|
68
|
-
requirements:
|
69
|
-
- - ">="
|
70
|
-
- !ruby/object:Gem::Version
|
71
|
-
version: 1.0.0
|
72
|
-
- - "~>"
|
73
|
-
- !ruby/object:Gem::Version
|
74
|
-
version: '1.0'
|
75
55
|
description: " This gem is a Logstash plugin. It reads and parses data from Azure\
|
76
56
|
\ Storage Blobs. The azure_blob_storage is a reimplementation to replace azureblob\
|
77
57
|
\ from azure-diagnostics-tools/Logstash. It can deal with larger volumes and partial\
|