logstash-input-azure_blob_storage 0.11.4 → 0.11.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -1
- data/README.md +7 -2
- data/lib/logstash/inputs/azure_blob_storage.rb +60 -22
- data/logstash-input-azure_blob_storage.gemspec +2 -2
- metadata +2 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3d446aed971a95e6e17a27ed1e9ec8b141f939b53697fb9c332cfb130404745a
|
4
|
+
data.tar.gz: 4a1321f6c6a30f6787d2133642ca23840371d6f4e18102cb775d345b09eb176a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b4f48a0bebcd6e3594584a4473b223838359d44e9ef591f958aa4c80c4c22953f6b0f708b19faeaf0517c66f47185bda4de75ab4e3618b23e2e7f23f71cb4bee
|
7
|
+
data.tar.gz: 508cd39ea159a4655e590f46ad0108c3b6e6de95ed575c4456da0230bae73fb384ecb7697ed710e7afb1542fe01cbd8a62130acedcbf0ba9c3040ace1f9d76d0
|
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,13 @@
|
|
1
|
+
## 0.11.5
|
2
|
+
- Added optional filename into the message
|
3
|
+
- plumbing for emulator, start_over not learning from registry
|
4
|
+
|
1
5
|
## 0.11.4
|
2
6
|
- fixed listing 3 times, rather than retrying to list max 3 times
|
3
|
-
- added
|
7
|
+
- added option to migrate/save to using local registry
|
8
|
+
- rewrote interval timing
|
9
|
+
- reduced saving of registry to maximum once per interval, protect duplicate simultanious writes
|
10
|
+
- added debug_timer for better tracing how long operations take
|
4
11
|
- removing pipeline name from logfiles, logstash 7.6 and up have this in the log4j2 by default now
|
5
12
|
- moved initialization from register to run. should make logs more readable
|
6
13
|
|
data/README.md
CHANGED
@@ -40,7 +40,11 @@ The registry_create_policy is used when the pipeline is started to either resume
|
|
40
40
|
|
41
41
|
interval defines the minimum time the registry should be saved to the registry file (by default 'data/registry.dat'), this is only needed in case the pipeline dies unexpectedly. During a normal shutdown the registry is also saved.
|
42
42
|
|
43
|
-
|
43
|
+
When registry_local_path is set to a directory, the registry is save on the logstash server in that directory. The filename is the pipe.id
|
44
|
+
|
45
|
+
with registry_create_policy set to resume and the registry_local_path set to a directory where the registry isn't yet created, should load from the storage account and save the registry on the local server
|
46
|
+
|
47
|
+
During the pipeline start for JSON codec, the plugin uses one file to learn how the JSON header and tail look like, they can also be configured manually.
|
44
48
|
|
45
49
|
## Running the pipeline
|
46
50
|
The pipeline can be started in several ways.
|
@@ -91,6 +95,7 @@ The log level of the plugin can be put into DEBUG through
|
|
91
95
|
curl -XPUT 'localhost:9600/_node/logging?pretty' -H 'Content-Type: application/json' -d'{"logger.logstash.inputs.azureblobstorage" : "DEBUG"}'
|
92
96
|
```
|
93
97
|
|
98
|
+
because debug also makes logstash chatty, there are also debug_timer and debug_until that can be used to print additional informantion on what the pipeline is doing and how long it takes. debug_until is for the number of events until debug is disabled.
|
94
99
|
|
95
100
|
## Other Configuration Examples
|
96
101
|
For nsgflowlogs, a simple configuration looks like this
|
@@ -176,7 +181,7 @@ filter {
|
|
176
181
|
remove_field => ["subresponse"]
|
177
182
|
remove_field => ["username"]
|
178
183
|
remove_field => ["clientPort"]
|
179
|
-
remove_field => ["port"]
|
184
|
+
remove_field => ["port"]:0
|
180
185
|
remove_field => ["timestamp"]
|
181
186
|
}
|
182
187
|
}
|
@@ -25,6 +25,9 @@ config :storageaccount, :validate => :string, :required => false
|
|
25
25
|
# DNS Suffix other then blob.core.windows.net
|
26
26
|
config :dns_suffix, :validate => :string, :required => false, :default => 'core.windows.net'
|
27
27
|
|
28
|
+
# For development this can be used to emulate an accountstorage when not available from azure
|
29
|
+
#config :use_development_storage, :validate => :boolean, :required => false
|
30
|
+
|
28
31
|
# The (primary or secondary) Access Key for the the storage account. The key can be found in the portal.azure.com or through the azure api StorageAccounts/ListKeys. For example the PowerShell command Get-AzStorageAccountKey.
|
29
32
|
config :access_key, :validate => :password, :required => false
|
30
33
|
|
@@ -58,6 +61,7 @@ config :registry_create_policy, :validate => ['resume','start_over','start_fresh
|
|
58
61
|
# Z00000000000000000000000000000000 2 ]}
|
59
62
|
config :interval, :validate => :number, :default => 60
|
60
63
|
|
64
|
+
config :addfilename, :validate => :boolean, :default => false, :required => false
|
61
65
|
# debug_until will for a maximum amount of processed messages shows 3 types of log printouts including processed filenames. This is a lightweight alternative to switching the loglevel from info to debug or even trace
|
62
66
|
config :debug_until, :validate => :number, :default => 0, :required => false
|
63
67
|
|
@@ -127,11 +131,15 @@ def run(queue)
|
|
127
131
|
unless conn.nil?
|
128
132
|
@blob_client = Azure::Storage::Blob::BlobService.create_from_connection_string(conn)
|
129
133
|
else
|
134
|
+
# unless use_development_storage?
|
130
135
|
@blob_client = Azure::Storage::Blob::BlobService.create(
|
131
136
|
storage_account_name: storageaccount,
|
132
137
|
storage_dns_suffix: dns_suffix,
|
133
138
|
storage_access_key: access_key.value,
|
134
139
|
)
|
140
|
+
# else
|
141
|
+
# @logger.info("not yet implemented")
|
142
|
+
# end
|
135
143
|
end
|
136
144
|
|
137
145
|
@registry = Hash.new
|
@@ -167,7 +175,7 @@ def run(queue)
|
|
167
175
|
if registry_create_policy == "start_fresh"
|
168
176
|
@registry = list_blobs(true)
|
169
177
|
save_registry(@registry)
|
170
|
-
@logger.info("starting fresh,
|
178
|
+
@logger.info("starting fresh, writing a clean the registry to contain #{@registry.size} blobs/files")
|
171
179
|
end
|
172
180
|
|
173
181
|
@is_json = false
|
@@ -223,6 +231,7 @@ def run(queue)
|
|
223
231
|
newreg.store(name, { :offset => off, :length => file[:length] })
|
224
232
|
if (@debug_until > @processed) then @logger.info("2: adding offsets: #{name} #{off} #{file[:length]}") end
|
225
233
|
end
|
234
|
+
# size nilClass when the list doesn't grow?!
|
226
235
|
# Worklist is the subset of files where the already read offset is smaller than the file size
|
227
236
|
worklist.clear
|
228
237
|
worklist = newreg.select {|name,file| file[:offset] < file[:length]}
|
@@ -230,13 +239,19 @@ def run(queue)
|
|
230
239
|
|
231
240
|
# Start of processing
|
232
241
|
# This would be ideal for threading since it's IO intensive, would be nice with a ruby native ThreadPool
|
233
|
-
worklist.
|
242
|
+
if (worklist.size > 0) then
|
243
|
+
worklist.each do |name, file|
|
234
244
|
start = Time.now.to_i
|
235
245
|
if (@debug_until > @processed) then @logger.info("3: processing #{name} from #{file[:offset]} to #{file[:length]}") end
|
236
246
|
size = 0
|
237
247
|
if file[:offset] == 0
|
238
|
-
|
239
|
-
|
248
|
+
# This is where Sera4000 issue starts
|
249
|
+
begin
|
250
|
+
chunk = full_read(name)
|
251
|
+
size=chunk.size
|
252
|
+
rescue Exception => e
|
253
|
+
@logger.error("Failed to read #{name} because of: #{e.message} .. will continue and pretend this never happened")
|
254
|
+
end
|
240
255
|
else
|
241
256
|
chunk = partial_read_json(name, file[:offset], file[:length])
|
242
257
|
@logger.debug("partial file #{name} from #{file[:offset]} to #{file[:length]}")
|
@@ -245,7 +260,7 @@ def run(queue)
|
|
245
260
|
res = resource(name)
|
246
261
|
begin
|
247
262
|
fingjson = JSON.parse(chunk)
|
248
|
-
@processed += nsgflowlog(queue, fingjson)
|
263
|
+
@processed += nsgflowlog(queue, fingjson, name)
|
249
264
|
@logger.debug("Processed #{res[:nsg]} [#{res[:date]}] #{@processed} events")
|
250
265
|
rescue JSON::ParserError
|
251
266
|
@logger.error("parse error on #{res[:nsg]} [#{res[:date]}] offset: #{file[:offset]} length: #{file[:length]}")
|
@@ -259,6 +274,9 @@ def run(queue)
|
|
259
274
|
begin
|
260
275
|
@codec.decode(chunk) do |event|
|
261
276
|
counter += 1
|
277
|
+
if @addfilename
|
278
|
+
event.set('filename', name)
|
279
|
+
end
|
262
280
|
decorate(event)
|
263
281
|
queue << event
|
264
282
|
end
|
@@ -279,6 +297,7 @@ def run(queue)
|
|
279
297
|
if ((Time.now.to_i - @last) > @interval)
|
280
298
|
save_registry(@registry)
|
281
299
|
end
|
300
|
+
end
|
282
301
|
end
|
283
302
|
# The files that got processed after the last registry save need to be saved too, in case the worklist is empty for some intervals.
|
284
303
|
now = Time.now.to_i
|
@@ -326,8 +345,7 @@ def strip_comma(str)
|
|
326
345
|
end
|
327
346
|
|
328
347
|
|
329
|
-
|
330
|
-
def nsgflowlog(queue, json)
|
348
|
+
def nsgflowlog(queue, json, name)
|
331
349
|
count=0
|
332
350
|
json["records"].each do |record|
|
333
351
|
res = resource(record["resourceId"])
|
@@ -340,9 +358,16 @@ def nsgflowlog(queue, json)
|
|
340
358
|
tups = tup.split(',')
|
341
359
|
ev = rule.merge({:unixtimestamp => tups[0], :src_ip => tups[1], :dst_ip => tups[2], :src_port => tups[3], :dst_port => tups[4], :protocol => tups[5], :direction => tups[6], :decision => tups[7]})
|
342
360
|
if (record["properties"]["Version"]==2)
|
361
|
+
tups[9] = 0 if tups[9].nil?
|
362
|
+
tups[10] = 0 if tups[10].nil?
|
363
|
+
tups[11] = 0 if tups[11].nil?
|
364
|
+
tups[12] = 0 if tups[12].nil?
|
343
365
|
ev.merge!( {:flowstate => tups[8], :src_pack => tups[9], :src_bytes => tups[10], :dst_pack => tups[11], :dst_bytes => tups[12]} )
|
344
366
|
end
|
345
367
|
@logger.trace(ev.to_s)
|
368
|
+
if @addfilename
|
369
|
+
ev.merge!( {:filename => name } )
|
370
|
+
end
|
346
371
|
event = LogStash::Event.new('message' => ev.to_json)
|
347
372
|
decorate(event)
|
348
373
|
queue << event
|
@@ -429,10 +454,10 @@ def save_registry(filelist)
|
|
429
454
|
@busy_writing_registry = true
|
430
455
|
unless (@registry_local_path)
|
431
456
|
@blob_client.create_block_blob(container, registry_path, Marshal.dump(filelist))
|
432
|
-
@logger.info("processed #{@processed} events, saving #{filelist.size} blobs and offsets to registry #{registry_path}")
|
457
|
+
@logger.info("processed #{@processed} events, saving #{filelist.size} blobs and offsets to remote registry #{registry_path}")
|
433
458
|
else
|
434
459
|
File.open(@registry_local_path+"/"+@pipe_id, 'w') { |file| file.write(Marshal.dump(filelist)) }
|
435
|
-
@logger.info("processed #{@processed} events, saving #{filelist.size} blobs and offsets to registry #{registry_local_path+"/"+@pipe_id}")
|
460
|
+
@logger.info("processed #{@processed} events, saving #{filelist.size} blobs and offsets to local registry #{registry_local_path+"/"+@pipe_id}")
|
436
461
|
end
|
437
462
|
@busy_writing_registry = false
|
438
463
|
@last = Time.now.to_i
|
@@ -446,21 +471,34 @@ def save_registry(filelist)
|
|
446
471
|
end
|
447
472
|
end
|
448
473
|
|
474
|
+
|
449
475
|
def learn_encapsulation
|
450
476
|
# From one file, read first block and last block to learn head and tail
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
477
|
+
begin
|
478
|
+
blobs = @blob_client.list_blobs(container, { maxresults: 3, prefix: @prefix})
|
479
|
+
blobs.each do |blob|
|
480
|
+
unless blob.name == registry_path
|
481
|
+
begin
|
482
|
+
blocks = @blob_client.list_blob_blocks(container, blob.name)[:committed]
|
483
|
+
if blocks.first.name.start_with?('A00')
|
484
|
+
@logger.debug("using #{blob.name}/#{blocks.first.name} to learn the json header")
|
485
|
+
@head = @blob_client.get_blob(container, blob.name, start_range: 0, end_range: blocks.first.size-1)[1]
|
486
|
+
end
|
487
|
+
if blocks.last.name.start_with?('Z00')
|
488
|
+
@logger.debug("using #{blob.name}/#{blocks.last.name} to learn the json footer")
|
489
|
+
length = blob.properties[:content_length].to_i
|
490
|
+
offset = length - blocks.last.size
|
491
|
+
@tail = @blob_client.get_blob(container, blob.name, start_range: offset, end_range: length-1)[1]
|
492
|
+
@logger.debug("learned tail: #{@tail}")
|
493
|
+
end
|
494
|
+
rescue Exception => e
|
495
|
+
@logger.info("learn json one of the attempts failed #{e.message}")
|
496
|
+
end
|
497
|
+
end
|
498
|
+
end
|
499
|
+
rescue Exception => e
|
500
|
+
@logger.info("learn json header and footer failed because #{e.message}")
|
501
|
+
end
|
464
502
|
end
|
465
503
|
|
466
504
|
def resource(str)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'logstash-input-azure_blob_storage'
|
3
|
-
s.version = '0.11.
|
3
|
+
s.version = '0.11.5'
|
4
4
|
s.licenses = ['Apache-2.0']
|
5
5
|
s.summary = 'This logstash plugin reads and parses data from Azure Storage Blobs.'
|
6
6
|
s.description = <<-EOF
|
@@ -23,5 +23,5 @@ EOF
|
|
23
23
|
s.add_runtime_dependency 'logstash-core-plugin-api', '~> 2.1'
|
24
24
|
s.add_runtime_dependency 'stud', '~> 0.0.23'
|
25
25
|
s.add_runtime_dependency 'azure-storage-blob', '~> 1.1'
|
26
|
-
s.add_development_dependency 'logstash-devutils', '~>
|
26
|
+
#s.add_development_dependency 'logstash-devutils', '~> 2'
|
27
27
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-input-azure_blob_storage
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.11.
|
4
|
+
version: 0.11.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Geertsma
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-12-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -52,26 +52,6 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '1.1'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
requirement: !ruby/object:Gem::Requirement
|
57
|
-
requirements:
|
58
|
-
- - ">="
|
59
|
-
- !ruby/object:Gem::Version
|
60
|
-
version: 1.0.0
|
61
|
-
- - "~>"
|
62
|
-
- !ruby/object:Gem::Version
|
63
|
-
version: '1.0'
|
64
|
-
name: logstash-devutils
|
65
|
-
type: :development
|
66
|
-
prerelease: false
|
67
|
-
version_requirements: !ruby/object:Gem::Requirement
|
68
|
-
requirements:
|
69
|
-
- - ">="
|
70
|
-
- !ruby/object:Gem::Version
|
71
|
-
version: 1.0.0
|
72
|
-
- - "~>"
|
73
|
-
- !ruby/object:Gem::Version
|
74
|
-
version: '1.0'
|
75
55
|
description: " This gem is a Logstash plugin. It reads and parses data from Azure\
|
76
56
|
\ Storage Blobs. The azure_blob_storage is a reimplementation to replace azureblob\
|
77
57
|
\ from azure-diagnostics-tools/Logstash. It can deal with larger volumes and partial\
|