logstash-input-azure_blob_storage 0.12.2 → 0.12.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -1
- data/Gemfile +7 -0
- data/README.md +1 -1
- data/lib/logstash/inputs/azure_blob_storage.rb +518 -485
- data/logstash-input-azure_blob_storage.gemspec +22 -20
- data/spec/inputs/azure_blob_storage_spec.rb +10 -10
- metadata +33 -5
@@ -1,564 +1,597 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
require 'logstash/inputs/base'
|
3
|
+
#require 'logstash/namespace'
|
3
4
|
require 'stud/interval'
|
4
5
|
require 'azure/storage/blob'
|
5
6
|
require 'json'
|
6
7
|
|
7
|
-
# This is a logstash input plugin for files in Azure
|
8
|
-
|
8
|
+
# This is a logstash input plugin for files in Azure Storage Accounts. There is a storage explorer in the portal and an application with the same name https://storageexplorer.com.
|
9
|
+
|
10
|
+
# https://docs.microsoft.com/en-us/azure/storage/blobs/storage-blobs-introduction
|
11
|
+
# The hierarchy of an Azure block storage is
|
12
|
+
# Tenant > Subscription > Account > ResourceGroup > StorageAccount > Container > FileBlobs > Blocks
|
13
|
+
# A storage account can store blobs, file shares, queus and tables. This plugin is using the Azure ruby plugin to fetch blobs and process the data in the blocks and dealt with blobs growing over time and ignoring archive blobs
|
14
|
+
#
|
15
|
+
# block-id bytes content
|
16
|
+
# A00000000000000000000000000000000 12 {"records":[
|
17
|
+
# D672f4bbd95a04209b00dc05d899e3cce 2576 json objects for 1st minute
|
18
|
+
# D7fe0d4f275a84c32982795b0e5c7d3a1 2312 json objects for 2nd minute
|
19
|
+
# Z00000000000000000000000000000000 2 ]}
|
20
|
+
|
21
|
+
# A storage account has by default a globally unique name, {storageaccount}.blob.core.windows.net which is a CNAME to Azures blob servers blob.*.store.core.windows.net. A storageaccount has an container and those have a directory and blobs (like files). Blobs have one or more blocks. After writing the blocks, they can be committed. Some Azure diagnostics can send events to an EventHub that can be parse through the plugin logstash-input-azure_event_hubs, but for the events that are only stored in an storage account, use this plugin. The original logstash-input-azureblob from azure-diagnostics-tools is great for low volumes, but it suffers from outdated client, slow reads, lease locking issues and json parse errors.
|
22
|
+
|
23
|
+
|
9
24
|
class LogStash::Inputs::AzureBlobStorage < LogStash::Inputs::Base
|
10
|
-
config_name "azure_blob_storage"
|
25
|
+
config_name "azure_blob_storage"
|
11
26
|
|
12
|
-
# If undefined, Logstash will complain, even if codec is unused. The codec for nsgflowlog is "json"
|
13
|
-
default :codec, "json"
|
27
|
+
# If undefined, Logstash will complain, even if codec is unused. The codec for nsgflowlog is "json" and the for WADIIS and APPSERVICE is "line".
|
28
|
+
default :codec, "json"
|
14
29
|
|
15
|
-
# logtype can be nsgflowlog, wadiis, appservice or raw. The default is raw, where files are read and added as one event. If the file grows, the next interval the file is read from the offset, so that the delta is sent as another event. In raw mode, further processing has to be done in the filter block. If the logtype is specified, this plugin will split and mutate and add individual events to the queue.
|
16
|
-
config :logtype, :validate => ['nsgflowlog','wadiis','appservice','raw'], :default => 'raw'
|
30
|
+
# logtype can be nsgflowlog, wadiis, appservice or raw. The default is raw, where files are read and added as one event. If the file grows, the next interval the file is read from the offset, so that the delta is sent as another event. In raw mode, further processing has to be done in the filter block. If the logtype is specified, this plugin will split and mutate and add individual events to the queue.
|
31
|
+
config :logtype, :validate => ['nsgflowlog','wadiis','appservice','raw'], :default => 'raw'
|
17
32
|
|
18
|
-
# The storage account is accessed through Azure::Storage::Blob::BlobService, it needs either a sas_token, connection string or a storageaccount/access_key pair.
|
19
|
-
# https://github.com/Azure/azure-storage-ruby/blob/master/blob/lib/azure/storage/blob/blob_service.rb#L42
|
20
|
-
config :connection_string, :validate => :password, :required => false
|
33
|
+
# The storage account is accessed through Azure::Storage::Blob::BlobService, it needs either a sas_token, connection string or a storageaccount/access_key pair.
|
34
|
+
# https://github.com/Azure/azure-storage-ruby/blob/master/blob/lib/azure/storage/blob/blob_service.rb#L42
|
35
|
+
config :connection_string, :validate => :password, :required => false
|
21
36
|
|
22
|
-
# The storage account name for the azure storage account.
|
23
|
-
config :storageaccount, :validate => :string, :required => false
|
37
|
+
# The storage account name for the azure storage account.
|
38
|
+
config :storageaccount, :validate => :string, :required => false
|
24
39
|
|
25
|
-
#
|
26
|
-
config :
|
40
|
+
# The (primary or secondary) Access Key for the the storage account. The key can be found in the portal.azure.com or through the azure api StorageAccounts/ListKeys. For example the PowerShell command Get-AzStorageAccountKey.
|
41
|
+
config :access_key, :validate => :password, :required => false
|
27
42
|
|
28
|
-
#
|
29
|
-
|
43
|
+
# SAS is the Shared Access Signature, that provides restricted access rights. If the sas_token is absent, the access_key is used instead.
|
44
|
+
config :sas_token, :validate => :password, :required => false
|
30
45
|
|
31
|
-
# The
|
32
|
-
config :
|
46
|
+
# The container of the blobs.
|
47
|
+
config :container, :validate => :string, :default => 'insights-logs-networksecuritygroupflowevent'
|
33
48
|
|
34
|
-
#
|
35
|
-
config :
|
49
|
+
# DNS Suffix other then blob.core.windows.net, needed for government cloud.
|
50
|
+
config :dns_suffix, :validate => :string, :required => false, :default => 'core.windows.net'
|
36
51
|
|
37
|
-
#
|
38
|
-
config :
|
52
|
+
# For development this can be used to emulate an accountstorage when not available from azure
|
53
|
+
#config :use_development_storage, :validate => :boolean, :required => false
|
39
54
|
|
40
|
-
# The registry
|
41
|
-
#
|
42
|
-
#
|
43
|
-
|
55
|
+
# The registry keeps track of the files that where already procesed.
|
56
|
+
# The registry file keeps track of the files that have been processed and until which offset in bytes. It's similar in function
|
57
|
+
#
|
58
|
+
# The default, `data/registry`, it contains a Ruby Marshal Serialized Hash of the filename the offset read sofar and the filelength the list time a filelisting was done.
|
59
|
+
config :registry_path, :validate => :string, :required => false, :default => 'data/registry.dat'
|
44
60
|
|
45
|
-
# If registry_local_path is set to a directory on the local server, the registry is save there instead of the remote blob_storage
|
46
|
-
config :registry_local_path, :validate => :string, :required => false
|
61
|
+
# If registry_local_path is set to a directory on the local server, the registry is save there instead of the remote blob_storage
|
62
|
+
config :registry_local_path, :validate => :string, :required => false
|
47
63
|
|
48
|
-
# The default, `resume`, will load the registry offsets and will start processing files from the offsets.
|
49
|
-
# When set to `start_over`, all log files are processed from begining.
|
50
|
-
# when set to `start_fresh`, it will read log files that are created or appended since this start of the pipeline.
|
51
|
-
config :registry_create_policy, :validate => ['resume','start_over','start_fresh'], :required => false, :default => 'resume'
|
64
|
+
# The default, `resume`, will load the registry offsets and will start processing files from the offsets.
|
65
|
+
# When set to `start_over`, all log files are processed from begining.
|
66
|
+
# when set to `start_fresh`, it will read log files that are created or appended since this start of the pipeline.
|
67
|
+
config :registry_create_policy, :validate => ['resume','start_over','start_fresh'], :required => false, :default => 'resume'
|
52
68
|
|
53
|
-
# The
|
54
|
-
#
|
55
|
-
#
|
56
|
-
|
57
|
-
#
|
58
|
-
# A00000000000000000000000000000000 12 {"records":[
|
59
|
-
# D672f4bbd95a04209b00dc05d899e3cce 2576 json objects for 1st minute
|
60
|
-
# D7fe0d4f275a84c32982795b0e5c7d3a1 2312 json objects for 2nd minute
|
61
|
-
# Z00000000000000000000000000000000 2 ]}
|
62
|
-
config :interval, :validate => :number, :default => 60
|
69
|
+
# The interval is used to save the registry regularly, when new events have have been processed. It is also used to wait before listing the files again and substracting the registry of already processed files to determine the worklist.
|
70
|
+
# waiting time in seconds until processing the next batch. NSGFLOWLOGS append a block per minute, so use multiples of 60 seconds, 300 for 5 minutes, 600 for 10 minutes. The registry is also saved after every interval.
|
71
|
+
# Partial reading starts from the offset and reads until the end, so the starting tag is prepended
|
72
|
+
config :interval, :validate => :number, :default => 60
|
63
73
|
|
64
|
-
# add the filename into the events
|
65
|
-
config :addfilename, :validate => :boolean, :default => false, :required => false
|
74
|
+
# add the filename as a field into the events
|
75
|
+
config :addfilename, :validate => :boolean, :default => false, :required => false
|
66
76
|
|
67
|
-
# debug_until will for a maximum amount of processed messages shows 3 types of log printouts including processed filenames. This is a lightweight alternative to switching the loglevel from info to debug or even trace
|
68
|
-
config :debug_until, :validate => :number, :default => 0, :required => false
|
77
|
+
# debug_until will at the creation of the pipeline for a maximum amount of processed messages shows 3 types of log printouts including processed filenames. After a number of events, the plugin will stop logging the events and continue silently. This is a lightweight alternative to switching the loglevel from info to debug or even trace to see what the plugin is doing and how fast at the start of the plugin. A good value would be approximately 3x the amount of events per file. For instance 6000 events.
|
78
|
+
config :debug_until, :validate => :number, :default => 0, :required => false
|
69
79
|
|
70
|
-
# debug_timer show time spent on activities
|
71
|
-
config :debug_timer, :validate => :boolean, :default => false, :required => false
|
80
|
+
# debug_timer show in the logs, the time spent on activities
|
81
|
+
config :debug_timer, :validate => :boolean, :default => false, :required => false
|
72
82
|
|
73
|
-
# WAD IIS Grok Pattern
|
74
|
-
#config :grokpattern, :validate => :string, :required => false, :default => '%{TIMESTAMP_ISO8601:log_timestamp} %{NOTSPACE:instanceId} %{NOTSPACE:instanceId2} %{IPORHOST:ServerIP} %{WORD:httpMethod} %{URIPATH:requestUri} %{NOTSPACE:requestQuery} %{NUMBER:port} %{NOTSPACE:username} %{IPORHOST:clientIP} %{NOTSPACE:httpVersion} %{NOTSPACE:userAgent} %{NOTSPACE:cookie} %{NOTSPACE:referer} %{NOTSPACE:host} %{NUMBER:httpStatus} %{NUMBER:subresponse} %{NUMBER:win32response} %{NUMBER:sentBytes:int} %{NUMBER:receivedBytes:int} %{NUMBER:timeTaken:int}'
|
83
|
+
# WAD IIS Grok Pattern
|
84
|
+
#config :grokpattern, :validate => :string, :required => false, :default => '%{TIMESTAMP_ISO8601:log_timestamp} %{NOTSPACE:instanceId} %{NOTSPACE:instanceId2} %{IPORHOST:ServerIP} %{WORD:httpMethod} %{URIPATH:requestUri} %{NOTSPACE:requestQuery} %{NUMBER:port} %{NOTSPACE:username} %{IPORHOST:clientIP} %{NOTSPACE:httpVersion} %{NOTSPACE:userAgent} %{NOTSPACE:cookie} %{NOTSPACE:referer} %{NOTSPACE:host} %{NUMBER:httpStatus} %{NUMBER:subresponse} %{NUMBER:win32response} %{NUMBER:sentBytes:int} %{NUMBER:receivedBytes:int} %{NUMBER:timeTaken:int}'
|
75
85
|
|
76
|
-
# skip learning if you use json and don't want to learn the head and tail, but use either the defaults or configure them.
|
77
|
-
config :skip_learning, :validate => :boolean, :default => false, :required => false
|
86
|
+
# skip learning if you use json and don't want to learn the head and tail, but use either the defaults or configure them.
|
87
|
+
config :skip_learning, :validate => :boolean, :default => false, :required => false
|
78
88
|
|
79
|
-
# The string that starts the JSON. Only needed when the codec is JSON. When partial file are read, the result will not be valid JSON unless the start and end are put back. the file_head and file_tail are learned at startup, by reading the first file in the blob_list and taking the first and last block, this would work for blobs that are appended like nsgflowlogs. The configuration can be set to override the learning. In case learning fails and the option is not set, the default is to use the 'records' as set by nsgflowlogs.
|
80
|
-
config :file_head, :validate => :string, :required => false, :default => '{"records":['
|
81
|
-
# The string that ends the JSON
|
82
|
-
config :file_tail, :validate => :string, :required => false, :default => ']}'
|
89
|
+
# The string that starts the JSON. Only needed when the codec is JSON. When partial file are read, the result will not be valid JSON unless the start and end are put back. the file_head and file_tail are learned at startup, by reading the first file in the blob_list and taking the first and last block, this would work for blobs that are appended like nsgflowlogs. The configuration can be set to override the learning. In case learning fails and the option is not set, the default is to use the 'records' as set by nsgflowlogs.
|
90
|
+
config :file_head, :validate => :string, :required => false, :default => '{"records":['
|
91
|
+
# The string that ends the JSON
|
92
|
+
config :file_tail, :validate => :string, :required => false, :default => ']}'
|
83
93
|
|
84
|
-
#
|
85
|
-
#
|
86
|
-
# You
|
87
|
-
#
|
88
|
-
|
89
|
-
# Do not include a leading `/`, as Azure path look like this:
|
90
|
-
# `path/to/blob/file.txt`
|
91
|
-
#
|
92
|
-
# You may also configure multiple paths. See an example
|
93
|
-
# on the <<array,Logstash configuration page>>.
|
94
|
-
# For NSGFLOWLOGS a path starts with "resourceId=/", but this would only be needed to exclude other files that may be written in the same container.
|
95
|
-
config :prefix, :validate => :string, :required => false
|
94
|
+
# By default it will watch every file in the storage container. The prefix option is a simple filter that only processes files with a path that starts with that value.
|
95
|
+
# For NSGFLOWLOGS a path starts with "resourceId=/". This would only be needed to exclude other paths that may be written in the same container. The registry file will be excluded.
|
96
|
+
# You may also configure multiple paths. See an example on the <<array,Logstash configuration page>>.
|
97
|
+
# Do not include a leading `/`, as Azure path look like this: `path/to/blob/file.txt`
|
98
|
+
config :prefix, :validate => :string, :required => false
|
96
99
|
|
97
|
-
|
100
|
+
# For filtering on filenames, you can use filename patterns, such as `logs/*.log`. If you use a pattern like `logs/**/*.log`, a recursive search of `logs` will be done for all `*.log` files in the logs directory.
|
101
|
+
# For https://www.rubydoc.info/stdlib/core/File.fnmatch
|
102
|
+
config :path_filters, :validate => :array, :default => ['**/*'], :required => false
|
98
103
|
|
99
|
-
# TODO: Other feature requests
|
100
|
-
# show file path in logger
|
101
|
-
# add filepath as part of log message
|
102
|
-
# option to keep registry on local disk
|
103
104
|
|
104
105
|
|
105
106
|
public
|
106
|
-
def register
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
end
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
def run(queue)
|
117
|
-
# counter for all processed events since the start of this pipeline
|
118
|
-
@processed = 0
|
119
|
-
@regsaved = @processed
|
120
|
-
|
121
|
-
connect
|
122
|
-
|
123
|
-
@registry = Hash.new
|
124
|
-
if registry_create_policy == "resume"
|
125
|
-
for counter in 1..3
|
126
|
-
begin
|
127
|
-
if (!@registry_local_path.nil?)
|
128
|
-
unless File.file?(@registry_local_path+"/"+@pipe_id)
|
129
|
-
@registry = Marshal.load(@blob_client.get_blob(container, registry_path)[1])
|
130
|
-
#[0] headers [1] responsebody
|
131
|
-
@logger.info("migrating from remote registry #{registry_path}")
|
132
|
-
else
|
133
|
-
if !Dir.exist?(@registry_local_path)
|
134
|
-
FileUtils.mkdir_p(@registry_local_path)
|
135
|
-
end
|
136
|
-
@registry = Marshal.load(File.read(@registry_local_path+"/"+@pipe_id))
|
137
|
-
@logger.info("resuming from local registry #{registry_local_path+"/"+@pipe_id}")
|
138
|
-
end
|
139
|
-
else
|
140
|
-
@registry = Marshal.load(@blob_client.get_blob(container, registry_path)[1])
|
141
|
-
#[0] headers [1] responsebody
|
142
|
-
@logger.info("resuming from remote registry #{registry_path}")
|
143
|
-
end
|
144
|
-
break
|
145
|
-
rescue Exception => e
|
146
|
-
@logger.error("caught: #{e.message}")
|
147
|
-
@registry.clear
|
148
|
-
@logger.error("loading registry failed for attempt #{counter} of 3")
|
149
|
-
end
|
150
|
-
end
|
151
|
-
end
|
152
|
-
# read filelist and set offsets to file length to mark all the old files as done
|
153
|
-
if registry_create_policy == "start_fresh"
|
154
|
-
@registry = list_blobs(true)
|
155
|
-
save_registry(@registry)
|
156
|
-
@logger.info("starting fresh, writing a clean registry to contain #{@registry.size} blobs/files")
|
107
|
+
def register
|
108
|
+
@pipe_id = Thread.current[:name].split("[").last.split("]").first
|
109
|
+
@logger.info("=== #{config_name} #{Gem.loaded_specs["logstash-input-"+config_name].version.to_s} / #{@pipe_id} / #{@id[0,6]} / ruby #{ RUBY_VERSION }p#{ RUBY_PATCHLEVEL } ===")
|
110
|
+
@logger.info("If this plugin doesn't work, please raise an issue in https://github.com/janmg/logstash-input-azure_blob_storage")
|
111
|
+
@busy_writing_registry = Mutex.new
|
112
|
+
# TODO: consider multiple readers, so add pipeline @id or use logstash-to-logstash communication?
|
157
113
|
end
|
158
114
|
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
if
|
170
|
-
|
115
|
+
|
116
|
+
|
117
|
+
def run(queue)
|
118
|
+
# counter for all processed events since the start of this pipeline
|
119
|
+
@processed = 0
|
120
|
+
@regsaved = @processed
|
121
|
+
|
122
|
+
connect
|
123
|
+
|
124
|
+
@registry = Hash.new
|
125
|
+
if registry_create_policy == "resume"
|
126
|
+
for counter in 1..3
|
127
|
+
begin
|
128
|
+
if (!@registry_local_path.nil?)
|
129
|
+
unless File.file?(@registry_local_path+"/"+@pipe_id)
|
130
|
+
@registry = Marshal.load(@blob_client.get_blob(container, registry_path)[1])
|
131
|
+
#[0] headers [1] responsebody
|
132
|
+
@logger.info("migrating from remote registry #{registry_path}")
|
133
|
+
else
|
134
|
+
if !Dir.exist?(@registry_local_path)
|
135
|
+
FileUtils.mkdir_p(@registry_local_path)
|
136
|
+
end
|
137
|
+
@registry = Marshal.load(File.read(@registry_local_path+"/"+@pipe_id))
|
138
|
+
@logger.info("resuming from local registry #{registry_local_path+"/"+@pipe_id}")
|
139
|
+
end
|
140
|
+
else
|
141
|
+
@registry = Marshal.load(@blob_client.get_blob(container, registry_path)[1])
|
142
|
+
#[0] headers [1] responsebody
|
143
|
+
@logger.info("resuming from remote registry #{registry_path}")
|
144
|
+
end
|
145
|
+
break
|
146
|
+
rescue Exception => e
|
147
|
+
@logger.error("caught: #{e.message}")
|
148
|
+
@registry.clear
|
149
|
+
@logger.error("loading registry failed for attempt #{counter} of 3")
|
150
|
+
end
|
151
|
+
end
|
171
152
|
end
|
172
|
-
|
173
|
-
|
153
|
+
# read filelist and set offsets to file length to mark all the old files as done
|
154
|
+
if registry_create_policy == "start_fresh"
|
155
|
+
@registry = list_blobs(true)
|
156
|
+
save_registry()
|
157
|
+
@logger.info("starting fresh, writing a clean registry to contain #{@registry.size} blobs/files")
|
174
158
|
end
|
175
|
-
|
176
|
-
|
159
|
+
|
160
|
+
@is_json = false
|
161
|
+
@is_json_line = false
|
162
|
+
begin
|
163
|
+
if @codec.class.name.eql?("LogStash::Codecs::JSON")
|
164
|
+
@is_json = true
|
165
|
+
elsif @codec.class.name.eql?("LogStash::Codecs::JSONLines")
|
166
|
+
@is_json_line = true
|
167
|
+
end
|
168
|
+
end
|
169
|
+
@head = ''
|
170
|
+
@tail = ''
|
171
|
+
# if codec=json sniff one files blocks A and Z to learn file_head and file_tail
|
172
|
+
if @is_json
|
173
|
+
if file_head
|
174
|
+
@head = file_head
|
175
|
+
end
|
176
|
+
if file_tail
|
177
|
+
@tail = file_tail
|
178
|
+
end
|
179
|
+
if file_head and file_tail and !skip_learning
|
180
|
+
learn_encapsulation
|
181
|
+
end
|
182
|
+
@logger.info("head will be: #{@head} and tail is set to #{@tail}")
|
177
183
|
end
|
178
|
-
@logger.info("head will be: #{@head} and tail is set to #{@tail}")
|
179
|
-
end
|
180
184
|
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
filelist = list_blobs(false)
|
204
|
-
filelist.each do |name, file|
|
205
|
-
off = 0
|
206
|
-
begin
|
207
|
-
off = @registry[name][:offset]
|
208
|
-
rescue
|
185
|
+
filelist = Hash.new
|
186
|
+
worklist = Hash.new
|
187
|
+
@last = start = Time.now.to_i
|
188
|
+
|
189
|
+
# This is the main loop, it
|
190
|
+
# 1. Lists all the files in the remote storage account that match the path prefix
|
191
|
+
# 2. Filters on path_filters to only include files that match the directory and file glob (**/*.json)
|
192
|
+
# 3. Save the listed files in a registry of known files and filesizes.
|
193
|
+
# 4. List all the files again and compare the registry with the new filelist and put the delta in a worklist
|
194
|
+
# 5. Process the worklist and put all events in the logstash queue.
|
195
|
+
# 6. if there is time left, sleep to complete the interval. If processing takes more than an inteval, save the registry and continue.
|
196
|
+
# 7. If stop signal comes, finish the current file, save the registry and quit
|
197
|
+
while !stop?
|
198
|
+
# load the registry, compare it's offsets to file list, set offset to 0 for new files, process the whole list and if finished within the interval wait for next loop,
|
199
|
+
# TODO: sort by timestamp ?
|
200
|
+
#filelist.sort_by(|k,v|resource(k)[:date])
|
201
|
+
worklist.clear
|
202
|
+
filelist.clear
|
203
|
+
|
204
|
+
# Listing all the files
|
205
|
+
filelist = list_blobs(false)
|
206
|
+
filelist.each do |name, file|
|
209
207
|
off = 0
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
# size nilClass when the list doesn't grow?!
|
215
|
-
# Worklist is the subset of files where the already read offset is smaller than the file size
|
216
|
-
@registry = newreg
|
217
|
-
worklist.clear
|
218
|
-
chunk = nil
|
219
|
-
|
220
|
-
worklist = newreg.select {|name,file| file[:offset] < file[:length]}
|
221
|
-
if (worklist.size > 4) then @logger.info("worklist contains #{worklist.size} blobs") end
|
222
|
-
|
223
|
-
# Start of processing
|
224
|
-
# This would be ideal for threading since it's IO intensive, would be nice with a ruby native ThreadPool
|
225
|
-
if (worklist.size > 0) then
|
226
|
-
worklist.each do |name, file|
|
227
|
-
start = Time.now.to_i
|
228
|
-
if (@debug_until > @processed) then @logger.info("3: processing #{name} from #{file[:offset]} to #{file[:length]}") end
|
229
|
-
size = 0
|
230
|
-
if file[:offset] == 0
|
231
|
-
# This is where Sera4000 issue starts
|
232
|
-
# For an append blob, reading full and crashing, retry, last_modified? ... lenght? ... committed? ...
|
233
|
-
# length and skip reg value
|
234
|
-
if (file[:length] > 0)
|
235
|
-
begin
|
236
|
-
chunk = full_read(name)
|
237
|
-
size=chunk.size
|
238
|
-
rescue Exception => e
|
239
|
-
@logger.error("Failed to read #{name} because of: #{e.message} .. will continue, set file as read and pretend this never happened")
|
240
|
-
@logger.error("#{size} size and #{file[:length]} file length")
|
241
|
-
size = file[:length]
|
242
|
-
end
|
243
|
-
else
|
244
|
-
@logger.info("found a zero size file #{name}")
|
245
|
-
chunk = nil
|
208
|
+
begin
|
209
|
+
off = @registry[name][:offset]
|
210
|
+
rescue Exception => e
|
211
|
+
@logger.error("caught: #{e.message} while reading #{name}")
|
246
212
|
end
|
247
|
-
|
248
|
-
|
249
|
-
@logger.debug("partial file #{name} from #{file[:offset]} to #{file[:length]}")
|
213
|
+
@registry.store(name, { :offset => off, :length => file[:length] })
|
214
|
+
if (@debug_until > @processed) then @logger.info("2: adding offsets: #{name} #{off} #{file[:length]}") end
|
250
215
|
end
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
@processed
|
258
|
-
@logger.debug("Processed #{res[:nsg]} [#{res[:date]}] #{@processed} events")
|
259
|
-
rescue JSON::ParserError
|
260
|
-
@logger.error("parse error on #{res[:nsg]} [#{res[:date]}] offset: #{file[:offset]} length: #{file[:length]}")
|
216
|
+
# size nilClass when the list doesn't grow?!
|
217
|
+
|
218
|
+
# clean registry of files that are not in the filelist
|
219
|
+
@registry.each do |name,file|
|
220
|
+
unless filelist.include?(name)
|
221
|
+
@registry.delete(name)
|
222
|
+
if (@debug_until > @processed) then @logger.info("purging #{name}") end
|
261
223
|
end
|
262
|
-
|
263
|
-
|
264
|
-
#
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
224
|
+
end
|
225
|
+
|
226
|
+
# Worklist is the subset of files where the already read offset is smaller than the file size
|
227
|
+
worklist.clear
|
228
|
+
chunk = nil
|
229
|
+
|
230
|
+
worklist = @registry.select {|name,file| file[:offset] < file[:length]}
|
231
|
+
if (worklist.size > 4) then @logger.info("worklist contains #{worklist.size} blobs") end
|
232
|
+
|
233
|
+
# Start of processing
|
234
|
+
# This would be ideal for threading since it's IO intensive, would be nice with a ruby native ThreadPool
|
235
|
+
if (worklist.size > 0) then
|
236
|
+
worklist.each do |name, file|
|
237
|
+
start = Time.now.to_i
|
238
|
+
if (@debug_until > @processed) then @logger.info("3: processing #{name} from #{file[:offset]} to #{file[:length]}") end
|
239
|
+
size = 0
|
240
|
+
if file[:offset] == 0
|
241
|
+
# This is where Sera4000 issue starts
|
242
|
+
# For an append blob, reading full and crashing, retry, last_modified? ... lenght? ... committed? ...
|
243
|
+
# length and skip reg value
|
244
|
+
if (file[:length] > 0)
|
245
|
+
begin
|
246
|
+
chunk = full_read(name)
|
247
|
+
delta_size = chunk.size
|
248
|
+
rescue Exception => e
|
249
|
+
# Azure::Core::Http::HTTPError / undefined method `message='
|
250
|
+
@logger.error("Failed to read #{name} ... will continue, set file as read and pretend this never happened")
|
251
|
+
@logger.error("#{size} size and #{file[:length]} file length")
|
252
|
+
chunk = nil
|
253
|
+
delta_size = file[:length]
|
254
|
+
end
|
255
|
+
else
|
256
|
+
@logger.info("found a zero size file #{name}")
|
257
|
+
chunk = nil
|
258
|
+
delta_size = 0
|
259
|
+
end
|
260
|
+
else
|
261
|
+
chunk = partial_read_json(name, file[:offset], file[:length])
|
262
|
+
delta_size = chunk.size
|
263
|
+
@logger.debug("partial file #{name} from #{file[:offset]} to #{file[:length]}")
|
264
|
+
end
|
265
|
+
|
266
|
+
if logtype == "nsgflowlog" && @is_json
|
267
|
+
# skip empty chunks
|
268
|
+
unless chunk.nil?
|
269
|
+
res = resource(name)
|
270
|
+
begin
|
271
|
+
fingjson = JSON.parse(chunk)
|
272
|
+
@processed += nsgflowlog(queue, fingjson, name)
|
273
|
+
@logger.debug("Processed #{res[:nsg]} [#{res[:date]}] #{@processed} events")
|
274
|
+
rescue JSON::ParserError
|
275
|
+
@logger.error("parse error on #{res[:nsg]} [#{res[:date]}] offset: #{file[:offset]} length: #{file[:length]}")
|
276
|
+
end
|
277
|
+
end
|
278
|
+
# TODO: Convert this to line based grokking.
|
279
|
+
# TODO: ECS Compliance?
|
280
|
+
elsif logtype == "wadiis" && !@is_json
|
281
|
+
@processed += wadiislog(queue, name)
|
282
|
+
else
|
283
|
+
# Handle JSONLines format
|
284
|
+
if !@chunk.nil? && @is_json_line
|
285
|
+
newline_rindex = chunk.rindex("\n")
|
286
|
+
if newline_rindex.nil?
|
287
|
+
# No full line in chunk, skip it without updating the registry.
|
288
|
+
# Expecting that the JSON line would be filled in at a subsequent iteration.
|
289
|
+
next
|
290
|
+
end
|
291
|
+
chunk = chunk[0..newline_rindex]
|
292
|
+
delta_size = chunk.size
|
293
|
+
end
|
294
|
+
|
295
|
+
counter = 0
|
296
|
+
begin
|
297
|
+
@codec.decode(chunk) do |event|
|
298
|
+
counter += 1
|
299
|
+
if @addfilename
|
300
|
+
event.set('filename', name)
|
301
|
+
end
|
302
|
+
decorate(event)
|
303
|
+
queue << event
|
304
|
+
end
|
305
|
+
@processed += counter
|
306
|
+
rescue Exception => e
|
307
|
+
@logger.error("codec exception: #{e.message} .. will continue and pretend this never happened")
|
308
|
+
@logger.debug("#{chunk}")
|
309
|
+
end
|
310
|
+
end
|
311
|
+
|
312
|
+
# Update the size
|
313
|
+
size = file[:offset] + delta_size
|
314
|
+
@registry.store(name, { :offset => size, :length => file[:length] })
|
315
|
+
|
316
|
+
#@logger.info("name #{name} size #{size} len #{file[:length]}")
|
317
|
+
# if stop? good moment to stop what we're doing
|
318
|
+
if stop?
|
319
|
+
return
|
320
|
+
end
|
321
|
+
if ((Time.now.to_i - @last) > @interval)
|
322
|
+
save_registry()
|
274
323
|
end
|
275
|
-
decorate(event)
|
276
|
-
queue << event
|
277
|
-
end
|
278
|
-
rescue Exception => e
|
279
|
-
@logger.error("codec exception: #{e.message} .. will continue and pretend this never happened")
|
280
|
-
@registry.store(name, { :offset => file[:length], :length => file[:length] })
|
281
|
-
@logger.debug("#{chunk}")
|
282
324
|
end
|
283
|
-
@processed += counter
|
284
325
|
end
|
285
|
-
|
286
|
-
|
287
|
-
@
|
288
|
-
|
289
|
-
# if stop? good moment to stop what we're doing
|
290
|
-
if stop?
|
291
|
-
return
|
326
|
+
# The files that got processed after the last registry save need to be saved too, in case the worklist is empty for some intervals.
|
327
|
+
now = Time.now.to_i
|
328
|
+
if ((now - @last) > @interval)
|
329
|
+
save_registry()
|
292
330
|
end
|
293
|
-
|
294
|
-
|
331
|
+
sleeptime = interval - ((now - start) % interval)
|
332
|
+
if @debug_timer
|
333
|
+
@logger.info("going to sleep for #{sleeptime} seconds")
|
295
334
|
end
|
296
|
-
|
297
|
-
end
|
298
|
-
# The files that got processed after the last registry save need to be saved too, in case the worklist is empty for some intervals.
|
299
|
-
now = Time.now.to_i
|
300
|
-
if ((now - @last) > @interval)
|
301
|
-
save_registry(@registry)
|
335
|
+
Stud.stoppable_sleep(sleeptime) { stop? }
|
302
336
|
end
|
303
|
-
sleeptime = interval - ((now - start) % interval)
|
304
|
-
if @debug_timer
|
305
|
-
@logger.info("going to sleep for #{sleeptime} seconds")
|
306
|
-
end
|
307
|
-
Stud.stoppable_sleep(sleeptime) { stop? }
|
308
337
|
end
|
309
|
-
end
|
310
338
|
|
311
|
-
def stop
|
312
|
-
|
313
|
-
end
|
314
|
-
def close
|
315
|
-
|
316
|
-
end
|
339
|
+
def stop
|
340
|
+
save_registry()
|
341
|
+
end
|
342
|
+
def close
|
343
|
+
save_registry()
|
344
|
+
end
|
317
345
|
|
318
346
|
|
319
347
|
private
|
320
|
-
def connect
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
348
|
+
def connect
|
349
|
+
# Try in this order to access the storageaccount
|
350
|
+
# 1. storageaccount / sas_token
|
351
|
+
# 2. connection_string
|
352
|
+
# 3. storageaccount / access_key
|
353
|
+
|
354
|
+
unless connection_string.nil?
|
355
|
+
conn = connection_string.value
|
356
|
+
end
|
357
|
+
unless sas_token.nil?
|
358
|
+
unless sas_token.value.start_with?('?')
|
359
|
+
conn = "BlobEndpoint=https://#{storageaccount}.#{dns_suffix};SharedAccessSignature=#{sas_token.value}"
|
360
|
+
else
|
361
|
+
conn = sas_token.value
|
362
|
+
end
|
363
|
+
end
|
364
|
+
unless conn.nil?
|
365
|
+
@blob_client = Azure::Storage::Blob::BlobService.create_from_connection_string(conn)
|
332
366
|
else
|
333
|
-
|
367
|
+
# unless use_development_storage?
|
368
|
+
@blob_client = Azure::Storage::Blob::BlobService.create(
|
369
|
+
storage_account_name: storageaccount,
|
370
|
+
storage_dns_suffix: dns_suffix,
|
371
|
+
storage_access_key: access_key.value,
|
372
|
+
)
|
373
|
+
# else
|
374
|
+
# @logger.info("development storage emulator not yet implemented")
|
375
|
+
# end
|
334
376
|
end
|
335
377
|
end
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
end
|
350
|
-
|
351
|
-
def full_read(filename)
|
352
|
-
tries ||= 2
|
353
|
-
begin
|
354
|
-
return @blob_client.get_blob(container, filename)[1]
|
355
|
-
rescue Exception => e
|
356
|
-
@logger.error("caught: #{e.message} for full_read")
|
357
|
-
if (tries -= 1) > 0
|
358
|
-
if e.message = "Connection reset by peer"
|
359
|
-
connect
|
360
|
-
end
|
361
|
-
retry
|
378
|
+
|
379
|
+
def full_read(filename)
|
380
|
+
tries ||= 2
|
381
|
+
begin
|
382
|
+
return @blob_client.get_blob(container, filename)[1]
|
383
|
+
rescue Exception => e
|
384
|
+
@logger.error("caught: #{e.message} for full_read")
|
385
|
+
if (tries -= 1) > 0
|
386
|
+
if e.message = "Connection reset by peer"
|
387
|
+
connect
|
388
|
+
end
|
389
|
+
retry
|
390
|
+
end
|
362
391
|
end
|
392
|
+
begin
|
393
|
+
chuck = @blob_client.get_blob(container, filename)[1]
|
394
|
+
end
|
395
|
+
return chuck
|
363
396
|
end
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
return @head + strip_comma(content)
|
375
|
-
else
|
376
|
-
# when the file has grown between list_blobs and the time of partial reading, the tail will be wrong
|
377
|
-
return @head + strip_comma(content[0...-@tail.length]) + @tail
|
397
|
+
|
398
|
+
def partial_read_json(filename, offset, length)
|
399
|
+
content = @blob_client.get_blob(container, filename, start_range: offset-@tail.length, end_range: length-1)[1]
|
400
|
+
if content.end_with?(@tail)
|
401
|
+
# the tail is part of the last block, so included in the total length of the get_blob
|
402
|
+
return @head + strip_comma(content)
|
403
|
+
else
|
404
|
+
# when the file has grown between list_blobs and the time of partial reading, the tail will be wrong
|
405
|
+
return @head + strip_comma(content[0...-@tail.length]) + @tail
|
406
|
+
end
|
378
407
|
end
|
379
|
-
end
|
380
408
|
|
381
|
-
def strip_comma(str)
|
382
|
-
|
383
|
-
|
384
|
-
|
409
|
+
def strip_comma(str)
|
410
|
+
# when skipping over the first blocks the json will start with a comma that needs to be stripped. there should not be a trailing comma, but it gets stripped too
|
411
|
+
if str.start_with?(',')
|
412
|
+
str[0] = ''
|
413
|
+
end
|
414
|
+
str.nil? ? nil : str.chomp(",")
|
385
415
|
end
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
416
|
+
|
417
|
+
|
418
|
+
def nsgflowlog(queue, json, name)
|
419
|
+
count=0
|
420
|
+
begin
|
421
|
+
json["records"].each do |record|
|
422
|
+
res = resource(record["resourceId"])
|
423
|
+
resource = { :subscription => res[:subscription], :resourcegroup => res[:resourcegroup], :nsg => res[:nsg] }
|
424
|
+
@logger.trace(resource.to_s)
|
425
|
+
record["properties"]["flows"].each do |flows|
|
426
|
+
rule = resource.merge ({ :rule => flows["rule"]})
|
427
|
+
flows["flows"].each do |flowx|
|
428
|
+
flowx["flowTuples"].each do |tup|
|
429
|
+
tups = tup.split(',')
|
430
|
+
ev = rule.merge({:unixtimestamp => tups[0], :src_ip => tups[1], :dst_ip => tups[2], :src_port => tups[3], :dst_port => tups[4], :protocol => tups[5], :direction => tups[6], :decision => tups[7]})
|
431
|
+
if (record["properties"]["Version"]==2)
|
432
|
+
tups[9] = 0 if tups[9].nil?
|
433
|
+
tups[10] = 0 if tups[10].nil?
|
434
|
+
tups[11] = 0 if tups[11].nil?
|
435
|
+
tups[12] = 0 if tups[12].nil?
|
436
|
+
ev.merge!( {:flowstate => tups[8], :src_pack => tups[9], :src_bytes => tups[10], :dst_pack => tups[11], :dst_bytes => tups[12]} )
|
437
|
+
end
|
438
|
+
@logger.trace(ev.to_s)
|
439
|
+
if @addfilename
|
440
|
+
ev.merge!( {:filename => name } )
|
441
|
+
end
|
442
|
+
event = LogStash::Event.new('message' => ev.to_json)
|
443
|
+
decorate(event)
|
444
|
+
queue << event
|
445
|
+
count+=1
|
446
|
+
end
|
447
|
+
end
|
448
|
+
end
|
449
|
+
end
|
450
|
+
rescue Exception => e
|
451
|
+
@logger.error("NSG Flowlog problem for #{name} and error message #{e.message}")
|
452
|
+
end
|
453
|
+
return count
|
421
454
|
end
|
422
|
-
|
423
|
-
|
455
|
+
|
456
|
+
def wadiislog(lines)
|
457
|
+
count=0
|
458
|
+
lines.each do |line|
|
459
|
+
unless line.start_with?('#')
|
460
|
+
queue << LogStash::Event.new('message' => ev.to_json)
|
461
|
+
count+=1
|
462
|
+
end
|
463
|
+
end
|
464
|
+
return count
|
465
|
+
# date {
|
466
|
+
# match => [ "log_timestamp", "YYYY-MM-dd HH:mm:ss" ]
|
467
|
+
# target => "@timestamp"
|
468
|
+
# remove_field => ["log_timestamp"]
|
469
|
+
# }
|
424
470
|
end
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
def
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
# date {
|
438
|
-
# match => [ "log_timestamp", "YYYY-MM-dd HH:mm:ss" ]
|
439
|
-
# target => "@timestamp"
|
440
|
-
# remove_field => ["log_timestamp"]
|
441
|
-
# }
|
442
|
-
end
|
443
|
-
|
444
|
-
# list all blobs in the blobstore, set the offsets from the registry and return the filelist
|
445
|
-
# inspired by: https://github.com/Azure-Samples/storage-blobs-ruby-quickstart/blob/master/example.rb
|
446
|
-
def list_blobs(fill)
|
447
|
-
tries ||= 3
|
448
|
-
begin
|
449
|
-
return try_list_blobs(fill)
|
450
|
-
rescue Exception => e
|
451
|
-
@logger.error("caught: #{e.message} for list_blobs retries left #{tries}")
|
452
|
-
if (tries -= 1) > 0
|
453
|
-
retry
|
471
|
+
|
472
|
+
# list all blobs in the blobstore, set the offsets from the registry and return the filelist
|
473
|
+
# inspired by: https://github.com/Azure-Samples/storage-blobs-ruby-quickstart/blob/master/example.rb
|
474
|
+
def list_blobs(fill)
|
475
|
+
tries ||= 3
|
476
|
+
begin
|
477
|
+
return try_list_blobs(fill)
|
478
|
+
rescue Exception => e
|
479
|
+
@logger.error("caught: #{e.message} for list_blobs retries left #{tries}")
|
480
|
+
if (tries -= 1) > 0
|
481
|
+
retry
|
482
|
+
end
|
454
483
|
end
|
455
484
|
end
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
#
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
counter+=1
|
485
|
+
|
486
|
+
def try_list_blobs(fill)
|
487
|
+
# inspired by: http://blog.mirthlab.com/2012/05/25/cleanly-retrying-blocks-of-code-after-an-exception-in-ruby/
|
488
|
+
chrono = Time.now.to_i
|
489
|
+
files = Hash.new
|
490
|
+
nextMarker = nil
|
491
|
+
counter = 1
|
492
|
+
loop do
|
493
|
+
blobs = @blob_client.list_blobs(container, { marker: nextMarker, prefix: @prefix})
|
494
|
+
blobs.each do |blob|
|
495
|
+
# FNM_PATHNAME is required so that "**/test" can match "test" at the root folder
|
496
|
+
# FNM_EXTGLOB allows you to use "test{a,b,c}" to match either "testa", "testb" or "testc" (closer to shell behavior)
|
497
|
+
unless blob.name == registry_path
|
498
|
+
if @path_filters.any? {|path| File.fnmatch?(path, blob.name, File::FNM_PATHNAME | File::FNM_EXTGLOB)}
|
499
|
+
length = blob.properties[:content_length].to_i
|
500
|
+
offset = 0
|
501
|
+
if fill
|
502
|
+
offset = length
|
503
|
+
end
|
504
|
+
files.store(blob.name, { :offset => offset, :length => length })
|
505
|
+
if (@debug_until > @processed) then @logger.info("1: list_blobs #{blob.name} #{offset} #{length}") end
|
506
|
+
end
|
507
|
+
end
|
508
|
+
end
|
509
|
+
nextMarker = blobs.continuation_token
|
510
|
+
break unless nextMarker && !nextMarker.empty?
|
511
|
+
if (counter % 10 == 0) then @logger.info(" listing #{counter * 50000} files") end
|
512
|
+
counter+=1
|
485
513
|
end
|
486
514
|
if @debug_timer
|
487
515
|
@logger.info("list_blobs took #{Time.now.to_i - chrono} sec")
|
488
516
|
end
|
489
|
-
|
490
|
-
end
|
517
|
+
return files
|
518
|
+
end
|
491
519
|
|
492
|
-
# When events were processed after the last registry save, start a thread to update the registry file.
|
493
|
-
def save_registry(
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
520
|
+
# When events were processed after the last registry save, start a thread to update the registry file.
|
521
|
+
def save_registry()
|
522
|
+
unless @processed == @regsaved
|
523
|
+
unless (@busy_writing_registry.locked?)
|
524
|
+
# deep_copy hash, to save the registry independant from the variable for thread safety
|
525
|
+
# if deep_clone uses Marshall to do a copy,
|
526
|
+
regdump = Marshal.dump(@registry)
|
527
|
+
regsize = @registry.size
|
528
|
+
Thread.new {
|
529
|
+
begin
|
530
|
+
@busy_writing_registry.lock
|
531
|
+
unless (@registry_local_path)
|
532
|
+
@blob_client.create_block_blob(container, registry_path, regdump)
|
533
|
+
@logger.info("processed #{@processed} events, saving #{regsize} blobs and offsets to remote registry #{registry_path}")
|
534
|
+
else
|
535
|
+
File.open(@registry_local_path+"/"+@pipe_id, 'w') { |file| file.write(regdump) }
|
536
|
+
@logger.info("processed #{@processed} events, saving #{regsize} blobs and offsets to local registry #{registry_local_path+"/"+@pipe_id}")
|
537
|
+
end
|
538
|
+
@last = Time.now.to_i
|
539
|
+
@regsaved = @processed
|
540
|
+
rescue Exception => e
|
541
|
+
@logger.error("Oh my, registry write failed")
|
542
|
+
@logger.error("#{e.message}")
|
543
|
+
ensure
|
544
|
+
@busy_writing_registry.unlock
|
545
|
+
end
|
546
|
+
}
|
547
|
+
else
|
548
|
+
@logger.info("Skipped writing the registry because previous write still in progress, it just takes long or may be hanging!")
|
512
549
|
end
|
513
|
-
}
|
514
|
-
else
|
515
|
-
@logger.info("Skipped writing the registry because previous write still in progress, it just takes long or may be hanging!")
|
516
550
|
end
|
517
551
|
end
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
552
|
+
|
553
|
+
|
554
|
+
def learn_encapsulation
|
555
|
+
@logger.info("learn_encapsulation, this can be skipped by setting skip_learning => true. Or set both head_file and tail_file")
|
556
|
+
# From one file, read first block and last block to learn head and tail
|
557
|
+
begin
|
558
|
+
blobs = @blob_client.list_blobs(container, { max_results: 3, prefix: @prefix})
|
559
|
+
blobs.each do |blob|
|
560
|
+
unless blob.name == registry_path
|
561
|
+
begin
|
562
|
+
blocks = @blob_client.list_blob_blocks(container, blob.name)[:committed]
|
563
|
+
if blocks.first.name.start_with?('A00')
|
564
|
+
@logger.debug("using #{blob.name}/#{blocks.first.name} to learn the json header")
|
565
|
+
@head = @blob_client.get_blob(container, blob.name, start_range: 0, end_range: blocks.first.size-1)[1]
|
566
|
+
end
|
567
|
+
if blocks.last.name.start_with?('Z00')
|
568
|
+
@logger.debug("using #{blob.name}/#{blocks.last.name} to learn the json footer")
|
569
|
+
length = blob.properties[:content_length].to_i
|
570
|
+
offset = length - blocks.last.size
|
571
|
+
@tail = @blob_client.get_blob(container, blob.name, start_range: offset, end_range: length-1)[1]
|
572
|
+
@logger.debug("learned tail: #{@tail}")
|
573
|
+
end
|
574
|
+
rescue Exception => e
|
575
|
+
@logger.info("learn json one of the attempts failed #{e.message}")
|
576
|
+
end
|
540
577
|
end
|
541
|
-
rescue Exception => e
|
542
|
-
@logger.info("learn json one of the attempts failed #{e.message}")
|
543
|
-
end
|
544
578
|
end
|
579
|
+
rescue Exception => e
|
580
|
+
@logger.info("learn json header and footer failed because #{e.message}")
|
545
581
|
end
|
546
|
-
rescue Exception => e
|
547
|
-
@logger.info("learn json header and footer failed because #{e.message}")
|
548
582
|
end
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
end
|
583
|
+
|
584
|
+
def resource(str)
|
585
|
+
temp = str.split('/')
|
586
|
+
date = '---'
|
587
|
+
unless temp[9].nil?
|
588
|
+
date = val(temp[9])+'/'+val(temp[10])+'/'+val(temp[11])+'-'+val(temp[12])+':00'
|
589
|
+
end
|
590
|
+
return {:subscription=> temp[2], :resourcegroup=>temp[4], :nsg=>temp[8], :date=>date}
|
591
|
+
end
|
592
|
+
|
593
|
+
def val(str)
|
594
|
+
return str.split('=')[1]
|
595
|
+
end
|
563
596
|
|
564
597
|
end # class LogStash::Inputs::AzureBlobStorage
|