RubyGems - logstash-input-azure_blob_storage - Versions diffs - 0.12.7 → 0.12.9 - Mend

logstash-input-azure_blob_storage 0.12.7 → 0.12.9

Files changed (6) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +5 -0
data/README.md +26 -0
data/lib/logstash/inputs/azure_blob_storage.rb +230 -130
data/logstash-input-azure_blob_storage.gemspec +2 -2
metadata +4 -4

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 6bc1a46c4c6ae533e05c83f0e7cb90715cad7390a5cedb9b6e023c46f2e620d1
-  data.tar.gz: 520d7b5131a6b00b6de066a12cd93a99082c7af0bb7184df9f2bc9c8ca64babd
+  metadata.gz: 4714d163b8085f62c285af7e18cae4b0075e89ee11aa6e6f2a9e18a2fd0dde1a
+  data.tar.gz: 0ebb527c554c1b48d7c1d3cb4b17b4ecb8aaa7745dab55b6d0eaa22660722fa2
 SHA512:
-  metadata.gz: 3c069008cfef9b08c4b9793b24538c9c8bdc217b64285626d3c9564a57584b237bfef90f4382e4b68366c2555b1b9a6e91d897951bbcc336b355eaefb310ce00
-  data.tar.gz: ccb7ba1d556cec586872ebe1c94237b3223f484902218d3bff899993467b741519c521b9c08075f98328536cf31274cd2aa386f64458097b025bbef2841c486d
+  metadata.gz: c0696b1431363cd1e828340a54fe044eacceb6c494f8f054f0082777f9bf78512fd3a3c893cea063eedfe006f0fad973fc72ebe7af8f7f03bde290a89fb77b89
+  data.tar.gz: 41a63e6decb12a501528b349b3c88b04c083f09f645360381af0cfaa520853989a9425fe8bf5c30a69666c3c87887efe649cc622291afdea6118abf462b7af3e

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,8 @@
+## 0.12.8
+  - support append blob (use codec json_lines and logtype raw)
+  - change the default head and tail to an empty string, unless the logtype is nsgflowlog
+  - jsonclean configuration parameter to clean the json stream from faulty characters to prevent parse errors
+  - catch ContainerNotFound, print error message in log and sleep interval time.
 ## 0.12.7
   - rewrote partial_read, now the occasional json parse errors should be fixed by reading only commited blocks.

data/README.md CHANGED Viewed

@@ -8,6 +8,14 @@ For problems or feature requests with this specific plugin, raise a github issue
 This plugin can read from Azure Storage Blobs, for instance JSON diagnostics logs for NSG flow logs or LINE based accesslogs from App Services.
 [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/)
+## Alternatives
+This plugin was inspired by the Azure diagnostics tools, but should work better for bigger amounts of files. the configuration is not compatible, the configuration azureblob refers to the diagnostics tools plugin and this plugin uses azure_blob_storage
+https://github.com/Azure/azure-diagnostics-tools/tree/master/Logstash/logstash-input-azureblob
+There is a Filebeat plugin, that may work in the future
+https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-azure-blob-storage.html
+## Innerworking
 The plugin depends on the [Ruby library azure-storage-blob](https://rubygems.org/gems/azure-storage-blob/versions/1.1.0) from Microsoft, that depends on Faraday for the HTTPS connection to Azure.
 The plugin executes the following steps
@@ -184,6 +192,20 @@ output {
     }
 }
 ```
+Another for json_lines on append_blobs
+```
+input {
+    azure_blob_storage {
+        codec => json_lines {
+          delimiter => "\n"
+          charset => "UTF-8"
+        }
+        # below options are optional
+        logtype => "raw"
+        append => true
+        cleanjson => true
+```
 The configuration documentation is in the first 100 lines of the code
 [GITHUB/janmg/logstash-input-azure_blob_storage/blob/master/lib/logstash/inputs/azure_blob_storage.rb](https://github.com/janmg/logstash-input-azure_blob_storage/blob/master/lib/logstash/inputs/azure_blob_storage.rb)
@@ -228,5 +250,9 @@ filter {
     remove_field => ["timestamp"]
   }
 }
+output {
+  stdout { codec => rubydebug }
+}
 ```

data/lib/logstash/inputs/azure_blob_storage.rb CHANGED Viewed

@@ -26,7 +26,7 @@ require 'json'
 class LogStash::Inputs::AzureBlobStorage < LogStash::Inputs::Base
     config_name "azure_blob_storage"
-    # If undefined, Logstash will complain, even if codec is unused. The codec for nsgflowlog is "json" and the for WADIIS and APPSERVICE is "line".
+    # If undefined, Logstash will complain, even if codec is unused. The codec for nsgflowlog is "json", "json_line" works and the for WADIIS and APPSERVICE is "line".
     default :codec, "json"
     # logtype can be nsgflowlog, wadiis, appservice or raw. The default is raw, where files are read and added as one event. If the file grows, the next interval the file is read from the offset, so that the delta is sent as another event. In raw mode, further processing has to be done in the filter block. If the logtype is specified, this plugin will split and mutate and add individual events to the queue.
@@ -68,7 +68,7 @@ class LogStash::Inputs::AzureBlobStorage < LogStash::Inputs::Base
     # when set to `start_fresh`, it will read log files that are created or appended since this start of the pipeline.
     config :registry_create_policy, :validate => ['resume','start_over','start_fresh'], :required => false, :default => 'resume'
-	# The interval is used to save the registry regularly, when new events have have been processed. It is also used to wait before listing the files again and substracting the registry of already processed files to determine the worklist.
+    # The interval is used to save the registry regularly, when new events have have been processed. It is also used to wait before listing the files again and substracting the registry of already processed files to determine the worklist.
     # waiting time in seconds until processing the next batch. NSGFLOWLOGS append a block per minute, so use multiples of 60 seconds, 300 for 5 minutes, 600 for 10 minutes. The registry is also saved after every interval.
     # Partial reading starts from the offset and reads until the end, so the starting tag is prepended
     config :interval, :validate => :number, :default => 60
@@ -95,10 +95,14 @@ class LogStash::Inputs::AzureBlobStorage < LogStash::Inputs::Base
     config :skip_learning, :validate => :boolean, :default => false, :required => false
     # The string that starts the JSON. Only needed when the codec is JSON. When partial file are read, the result will not be valid JSON unless the start and end are put back. the file_head and file_tail are learned at startup, by reading the first file in the blob_list and taking the first and last block, this would work for blobs that are appended like nsgflowlogs. The configuration can be set to override the learning. In case learning fails and the option is not set, the default is to use the 'records' as set by nsgflowlogs.
-    config :file_head, :validate => :string, :required => false, :default => '{"records":['
+    config :file_head, :validate => :string, :required => false, :default => ''
     # The string that ends the JSON
-    config :file_tail, :validate => :string, :required => false, :default => ']}'
+    config :file_tail, :validate => :string, :required => false, :default => ''
+    # inspect the bytes and remove faulty characters
+    config :cleanjson, :validate => :boolean, :default => false, :required => false
+    config :append, :validate => :boolean, :default => false, :required => false
     # By default it will watch every file in the storage container. The prefix option is a simple filter that only processes files with a path that starts with that value.
     # For NSGFLOWLOGS a path starts with "resourceId=/". This would only be needed to exclude other paths that may be written in the same container. The registry file will be excluded.
     # You may also configure multiple paths. See an example on the <<array,Logstash configuration page>>.
@@ -118,6 +122,7 @@ public
         @logger.info("If this plugin doesn't work, please raise an issue in https://github.com/janmg/logstash-input-azure_blob_storage")
         @busy_writing_registry = Mutex.new
         # TODO: consider multiple readers, so add pipeline @id or use logstash-to-logstash communication?
+        # For now it's difficult because the plugin would then have to synchronize the worklist
     end
@@ -128,41 +133,10 @@ public
         @regsaved = @processed
         connect
         @registry = Hash.new
-        if registry_create_policy == "resume"
-            for counter in 1..3
-                begin
-                    if (!@registry_local_path.nil?)
-                        unless File.file?(@registry_local_path+"/"+@pipe_id)
-                            @registry = Marshal.load(@blob_client.get_blob(container, registry_path)[1])
-                            #[0] headers [1] responsebody
-                            @logger.info("migrating from remote registry #{registry_path}")
-                        else
-                            if !Dir.exist?(@registry_local_path)
-                                FileUtils.mkdir_p(@registry_local_path)
-                            end
-                            @registry = Marshal.load(File.read(@registry_local_path+"/"+@pipe_id))
-                            @logger.info("resuming from local registry #{registry_local_path+"/"+@pipe_id}")
-                        end
-                    else
-                        @registry = Marshal.load(@blob_client.get_blob(container, registry_path)[1])
-                        #[0] headers [1] responsebody
-                        @logger.info("resuming from remote registry #{registry_path}")
-                    end
-                    break
-                rescue Exception => e
-                    @logger.error("caught: #{e.message}")
-                    @registry.clear
-                    @logger.error("loading registry failed for attempt #{counter} of 3")
-                end
-             end
-        end
-        # read filelist and set offsets to file length to mark all the old files as done
-        if registry_create_policy == "start_fresh"
-            @registry = list_blobs(true)
-            save_registry()
-            @logger.info("starting fresh, writing a clean registry to contain #{@registry.size} blobs/files")
+        load_registry()
+        @registry.each do |name, file|
+            @logger.info("offset: #{file[:offset]} length: #{file[:length]}")
         end
         @is_json = false
@@ -174,22 +148,29 @@ public
                 @is_json_line = true
             end
         end
         @head = ''
         @tail = ''
-        # if codec=json sniff one files blocks A and Z to learn file_head and file_tail
         if @is_json
+            # if codec=json sniff one files blocks A and Z to learn file_head and file_tail
+            if @logtype == 'nsgflowlog'
+                @head = '{"records":['
+                @tail = ']}'
+            end
             if file_head
                 @head = file_head
             end
             if file_tail
                 @tail = file_tail
             end
-            if file_head and file_tail and !skip_learning
+            if !skip_learning
                 learn_encapsulation
             end
-            @logger.info("head will be: #{@head} and tail is set to #{@tail}")
+            @logger.info("head will be: '#{@head}' and tail is set to: '#{@tail}'")
         end
         filelist = Hash.new
         worklist = Hash.new
         @last = start = Time.now.to_i
@@ -206,24 +187,27 @@ public
             # load the registry, compare it's offsets to file list, set offset to 0 for new files, process the whole list and if finished within the interval wait for next loop,
             # TODO: sort by timestamp ?
             #filelist.sort_by(|k,v|resource(k)[:date])
-            worklist.clear
             filelist.clear
             # Listing all the files
             filelist = list_blobs(false)
+            if (@debug_until > @processed) then
+                @registry.each do |name, file|
+                    @logger.info("#{name} offset: #{file[:offset]} length: #{file[:length]}")
+                end
+            end
             filelist.each do |name, file|
                 off = 0
                 if @registry.key?(name) then
-                  begin
-                    off = @registry[name][:offset]
-                  rescue Exception => e
-                    @logger.error("caught: #{e.message} while reading #{name}")
-                  end
+                    begin
+                        off = @registry[name][:offset]
+                    rescue Exception => e
+                        @logger.error("caught: #{e.message} while reading #{name}")
+                    end
                 end
                 @registry.store(name, { :offset => off, :length => file[:length] })
                 if (@debug_until > @processed) then @logger.info("2: adding offsets: #{name} #{off} #{file[:length]}") end
             end
-            # size nilClass when the list doesn't grow?!
             # clean registry of files that are not in the filelist
             @registry.each do |name,file|
@@ -242,14 +226,16 @@ public
             # Start of processing
             # This would be ideal for threading since it's IO intensive, would be nice with a ruby native ThreadPool
+            # pool = Concurrent::FixedThreadPool.new(5) # 5 threads
+            #pool.post do
+            # some parallel work
+            #end
             if (worklist.size > 0) then
                 worklist.each do |name, file|
                     start = Time.now.to_i
                     if (@debug_until > @processed) then @logger.info("3: processing #{name} from #{file[:offset]} to #{file[:length]}") end
                     size = 0
                     if file[:offset] == 0
-                        # This is where Sera4000 issue starts
-                        # For an append blob, reading full and crashing, retry, last_modified? ... lenght? ... committed? ...
                         # length and skip reg value
                         if (file[:length] > 0)
                             begin
@@ -272,49 +258,86 @@ public
                         delta_size = chunk.size - @head.length - 1
                     end
-                    if logtype == "nsgflowlog" && @is_json
-                        # skip empty chunks
-                        unless chunk.nil?
-                            res = resource(name)
-                            begin
-                                fingjson = JSON.parse(chunk)
-                                @processed += nsgflowlog(queue, fingjson, name)
-                                @logger.debug("Processed #{res[:nsg]} #{@processed} events")
-                            rescue JSON::ParserError => e
-                                @logger.error("parse error #{e.message} on #{res[:nsg]} offset: #{file[:offset]} length: #{file[:length]}")
-                                if (@debug_until > @processed) then @logger.info("#{chunk}") end
-                            end
+                    #
+                    # TODO! ... split out the logtypes and use individual methods
+                    # how does a byte array chuck from json_lines get translated to strings/json/events
+                    # should the byte array be converted to a multiline and then split? drawback need to know characterset and linefeed characters
+                    # how does the json_line decoder work on byte arrays?
+                    #
+                    # so many questions
+                    unless chunk.nil?
+                    counter = 0
+                        if @is_json
+                            if logtype == "nsgflowlog"
+                                res = resource(name)
+                                begin
+                                    fingjson = JSON.parse(chunk)
+                                    @processed += nsgflowlog(queue, fingjson, name)
+                                    @logger.debug("Processed #{res[:nsg]} #{@processed} events")
+                                rescue JSON::ParserError => e
+                                    @logger.error("parse error #{e.message} on #{res[:nsg]} offset: #{file[:offset]} length: #{file[:length]}")
+                                    if (@debug_until > @processed) then @logger.info("#{chunk}") end
+                                end
+                            else
+                                begin
+                                    @codec.decode(chunk) do |event|
+                                        counter += 1
+                                        if @addfilename
+                                            event.set('filename', name)
+                                        end
+                                        decorate(event)
+                                        queue << event
+                                     end
+                                     @processed += counter
+                                 rescue Exception => e
+                                     @logger.error("codec exception: #{e.message} .. continue and pretend this never happened")
+                                 end
+                             end
+                          end
+                        if logtype == "wadiis" && !@is_json
+                            # TODO: Convert this to line based grokking.
+                            @processed += wadiislog(queue, name)
                         end
-                    # TODO: Convert this to line based grokking.
-                    elsif logtype == "wadiis" && !@is_json
-                        @processed += wadiislog(queue, name)
-                    else
-                        # Handle JSONLines format
-                        if !@chunk.nil? && @is_json_line
-                            newline_rindex = chunk.rindex("\n")
-                            if newline_rindex.nil?
-                                # No full line in chunk, skip it without updating the registry.
-                                # Expecting that the JSON line would be filled in at a subsequent iteration.
-                                next
+                        if @is_json_line
+                            # parse one line at a time and dump it in the chunk?
+                            lines = chunk.to_s
+                            if cleanjson
+                                @logger.info("cleaning in progress")
+                                lines.chars.select(&:valid_encoding?).join
+                                #lines.delete "\\"
+                                #lines.scrub{|bytes| '<'+bytes.unpack('H*')[0]+'>' }
+                            end
+                            begin
+                                @codec.decode(lines) do |event|
+                                    counter += 1
+                                    queue << event
+                                end
+                                @processed += counter
+                            rescue Exception => e
+                                # todo: fix codec_lines exception: no implicit conversion of Array into String
+                                @logger.error("json_lines codec exception: #{e.message} .. continue and pretend this never happened")
                             end
-                            chunk = chunk[0..newline_rindex]
-                            delta_size = chunk.size
                         end
-                        counter = 0
-                        begin
-                            @codec.decode(chunk) do |event|
-                                counter += 1
-                                if @addfilename
-                                    event.set('filename', name)
+                        if !@is_json_line && !@is_json
+                            if logtype == "wadiis"
+                                # TODO: Convert this to line based grokking.
+                                @processed += wadiislog(queue, name)
+                            else
+                                # Any other codec and logstyle
+                                begin
+                                    @codec.decode(chunk) do |event|
+                                        counter += 1
+                                        queue << event
+                                    end
+                                    @processed += counter
+                                rescue Exception => e
+                                @logger.error("other codec exception: #{e.message} .. continue and pretend this never happened")
                                 end
-                                decorate(event)
-                                queue << event
                             end
-                            @processed += counter
-                        rescue Exception => e
-                            @logger.error("codec exception: #{e.message} .. will continue and pretend this never happened")
-                            @logger.debug("#{chunk}")
                         end
                     end
@@ -354,6 +377,24 @@ public
 private
+    def list_files
+        filelist = list_blobs(false)
+        filelist.each do |name, file|
+            off = 0
+            if @registry.key?(name) then
+                begin
+                    off = @registry[name][:offset]
+                rescue Exception => e
+                    @logger.error("caught: #{e.message} while reading #{name}")
+                end
+            end
+            @registry.store(name, { :offset => off, :length => file[:length] })
+            if (@debug_until > @processed) then @logger.info("2: adding offsets: #{name} #{off} #{file[:length]}") end
+        end
+        return filelist
+    end
+    # size nilClass when the list doesn't grow?!
     def connect
         # Try in this order to access the storageaccount
         # 1. storageaccount / sas_token
@@ -384,11 +425,48 @@ private
             # end
         end
     end
+    # @registry_create_policy,@registry_local_path,@container,@registry_path
+    def load_registry()
+        if @registry_create_policy == "resume"
+            for counter in 1..3
+                begin
+                    if (!@registry_local_path.nil?)
+                        unless File.file?(@registry_local_path+"/"+@pipe_id)
+                            @registry = Marshal.load(@blob_client.get_blob(@container, path)[1])
+                            #[0] headers [1] responsebody
+                            @logger.info("migrating from remote registry #{path}")
+                        else
+                            if !Dir.exist?(@registry_local_path)
+                                FileUtils.mkdir_p(@registry_local_path)
+                            end
+                            @registry = Marshal.load(File.read(@registry_local_path+"/"+@pipe_id))
+                            @logger.info("resuming from local registry #{@registry_local_path+"/"+@pipe_id}")
+                        end
+                    else
+                        @registry = Marshal.load(@blob_client.get_blob(container, path)[1])
+                        #[0] headers [1] responsebody
+                        @logger.info("resuming from remote registry #{path}")
+                    end
+                    break
+                rescue Exception => e
+                    @logger.error("caught: #{e.message}")
+                    @registry.clear
+                    @logger.error("loading registry failed for attempt #{counter} of 3")
+                end
+             end
+        end
+        # read filelist and set offsets to file length to mark all the old files as done
+        if @registry_create_policy == "start_fresh"
+            @registry = list_blobs(true)
+            #save_registry()
+            @logger.info("starting fresh, with a clean registry containing #{@registry.size} blobs/files")
+        end
+    end
     def full_read(filename)
         tries ||= 2
         begin
-            return @blob_client.get_blob(container, filename)[1]
+            return @blob_client.get_blob(@container, filename)[1]
         rescue Exception => e
             @logger.error("caught: #{e.message} for full_read")
             if (tries -= 1) > 0
@@ -399,7 +477,7 @@ private
             end
         end
         begin
-            chuck = @blob_client.get_blob(container, filename)[1]
+            chuck = @blob_client.get_blob(@container, filename)[1]
         end
         return chuck
     end
@@ -410,29 +488,45 @@ private
         # 3. strip comma
         # if json strip comma and fix head and tail
         size = 0
-        blocks = @blob_client.list_blob_blocks(container, blobname)
-        blocks[:committed].each do |block|
-            size += block.size
-        end
-        # read the new blob blocks from the offset to the last committed size.
-        # if it is json, fix the head and tail
-        # crap committed block at the end is the tail, so must be substracted from the read and then comma stripped and tail added.
-        # but why did I need a -1 for the length?? probably the offset starts at 0 and ends at size-1
-        # should first check commit, read and the check committed again? no, only read the commited size
-        # should read the full content and then substract json tail
+        begin
+            if @append
+                return @blob_client.get_blob(@container, blobname, start_range: offset-1)[1]
+            end
+            blocks = @blob_client.list_blob_blocks(@container, blobname)
+            blocks[:committed].each do |block|
+                size += block.size
+            end
+            # read the new blob blocks from the offset to the last committed size.
+            # if it is json, fix the head and tail
+            # crap committed block at the end is the tail, so must be substracted from the read and then comma stripped and tail added.
+            # but why did I need a -1 for the length?? probably the offset starts at 0 and ends at size-1
+            # should first check commit, read and the check committed again? no, only read the commited size
+            # should read the full content and then substract json tail
-        if @is_json
-            content = @blob_client.get_blob(container, blobname, start_range: offset-1, end_range: size-1)[1]
-            if content.end_with?(@tail)
-                return @head + strip_comma(content)
+            unless @is_json
+                return @blob_client.get_blob(@container, blobname, start_range: offset, end_range: size-1)[1]
             else
-                @logger.info("Fixed a tail! probably new committed blocks started appearing!")
-                # substract the length of the tail and add the tail, because the file grew.size was calculated as the block boundary, so replacing the last bytes with the tail should fix the problem
-                return @head + strip_comma(content[0...-@tail.length]) + @tail
+                content = @blob_client.get_blob(@container, blobname, start_range: offset-1, end_range: size-1)[1]
+                if content.end_with?(@tail)
+                    return @head + strip_comma(content)
+                else
+                    @logger.info("Fixed a tail! probably new committed blocks started appearing!")
+                    # substract the length of the tail and add the tail, because the file grew.size was calculated as the block boundary, so replacing the last bytes with the tail should fix the problem
+                    return @head + strip_comma(content[0...-@tail.length]) + @tail
+                end
             end
-        else
-            content = @blob_client.get_blob(container, blobname, start_range: offset, end_range: size-1)[1]
+        rescue InvalidBlobType => ibt
+            @logger.error("caught #{ibt.message}. Setting BlobType to append")
+            @append = true
+            retry
+        rescue NoMethodError => nme
+            @logger.error("caught #{nme.message}. Setting append to true")
+            @append = true
+            retry
+        rescue Exception => e
+            @logger.error("caught #{e.message}")
         end
     end
@@ -532,26 +626,31 @@ private
         nextMarker = nil
         counter = 1
         loop do
-            blobs = @blob_client.list_blobs(container, { marker: nextMarker, prefix: @prefix})
-            blobs.each do |blob|
-                # FNM_PATHNAME is required so that "**/test" can match "test" at the root folder
-                # FNM_EXTGLOB allows you to use "test{a,b,c}" to match either "testa", "testb" or "testc" (closer to shell behavior)
-                unless blob.name == registry_path
-                    if @path_filters.any? {|path| File.fnmatch?(path, blob.name, File::FNM_PATHNAME | File::FNM_EXTGLOB)}
-                        length = blob.properties[:content_length].to_i
-                        offset = 0
-                        if fill
-                            offset = length
+            begin
+                blobs = @blob_client.list_blobs(@container, { marker: nextMarker, prefix: @prefix})
+                blobs.each do |blob|
+                    # FNM_PATHNAME is required so that "**/test" can match "test" at the root folder
+                    # FNM_EXTGLOB allows you to use "test{a,b,c}" to match either "testa", "testb" or "testc" (closer to shell behavior)
+                    unless blob.name == registry_path
+                        if @path_filters.any? {|path| File.fnmatch?(path, blob.name, File::FNM_PATHNAME | File::FNM_EXTGLOB)}
+                            length = blob.properties[:content_length].to_i
+                            offset = 0
+                            if fill
+                                offset = length
+                            end
+                            files.store(blob.name, { :offset => offset, :length => length })
+                            if (@debug_until > @processed) then @logger.info("1: list_blobs #{blob.name} #{offset} #{length}") end
                         end
-                        files.store(blob.name, { :offset => offset, :length => length })
-                        if (@debug_until > @processed) then @logger.info("1: list_blobs #{blob.name} #{offset} #{length}") end
                     end
                 end
+                nextMarker = blobs.continuation_token
+                break unless nextMarker && !nextMarker.empty?
+                if (counter % 10 == 0) then @logger.info(" listing #{counter * 50000} files") end
+                counter+=1
+            rescue Exception => e
+                @logger.error("caught: #{e.message} while trying to list blobs")
+                return files
             end
-            nextMarker = blobs.continuation_token
-            break unless nextMarker && !nextMarker.empty?
-            if (counter % 10 == 0) then @logger.info(" listing #{counter * 50000} files") end
-            counter+=1
         end
         if @debug_timer
             @logger.info("list_blobs took #{Time.now.to_i - chrono} sec")
@@ -571,7 +670,7 @@ private
                     begin
                         @busy_writing_registry.lock
                         unless (@registry_local_path)
-                            @blob_client.create_block_blob(container, registry_path, regdump)
+                            @blob_client.create_block_blob(@container, registry_path, regdump)
                             @logger.info("processed #{@processed} events, saving #{regsize} blobs and offsets to remote registry #{registry_path}")
                         else
                             File.open(@registry_local_path+"/"+@pipe_id, 'w') { |file| file.write(regdump) }
@@ -597,20 +696,20 @@ private
         @logger.info("learn_encapsulation, this can be skipped by setting skip_learning => true. Or set both head_file and tail_file")
         # From one file, read first block and last block to learn head and tail
         begin
-            blobs = @blob_client.list_blobs(container, { max_results: 3, prefix: @prefix})
+            blobs = @blob_client.list_blobs(@container, { max_results: 3, prefix: @prefix})
             blobs.each do |blob|
                 unless blob.name == registry_path
                     begin
-                        blocks = @blob_client.list_blob_blocks(container, blob.name)[:committed]
+                        blocks = @blob_client.list_blob_blocks(@container, blob.name)[:committed]
                         if ['A00000000000000000000000000000000','QTAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAw'].include?(blocks.first.name)
                             @logger.debug("using #{blob.name}/#{blocks.first.name} to learn the json header")
-                            @head = @blob_client.get_blob(container, blob.name, start_range: 0, end_range: blocks.first.size-1)[1]
+                            @head = @blob_client.get_blob(@container, blob.name, start_range: 0, end_range: blocks.first.size-1)[1]
                         end
                         if ['Z00000000000000000000000000000000','WjAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAw'].include?(blocks.last.name)
                             @logger.debug("using #{blob.name}/#{blocks.last.name} to learn the json footer")
                             length = blob.properties[:content_length].to_i
                             offset = length - blocks.last.size
-                            @tail = @blob_client.get_blob(container, blob.name, start_range: offset, end_range: length-1)[1]
+                            @tail = @blob_client.get_blob(@container, blob.name, start_range: offset, end_range: length-1)[1]
                             @logger.debug("learned tail: #{@tail}")
                         end
                     rescue Exception => e
@@ -635,7 +734,9 @@ private
     def val(str)
         return str.split('=')[1]
     end
+end # class LogStash::Inputs::AzureBlobStorage
+# This is a start towards mapping NSG events to ECS fields ... it's complicated
 =begin
     def ecs(old)
         # https://www.elastic.co/guide/en/ecs/current/ecs-field-reference.html
@@ -681,4 +782,3 @@ private
         return ecs
     end
 =end
-end # class LogStash::Inputs::AzureBlobStorage

data/logstash-input-azure_blob_storage.gemspec CHANGED Viewed

@@ -1,6 +1,6 @@
 Gem::Specification.new do |s|
     s.name          = 'logstash-input-azure_blob_storage'
-    s.version       = '0.12.7'
+    s.version       = '0.12.9'
     s.licenses      = ['Apache-2.0']
     s.summary       = 'This logstash plugin reads and parses data from Azure Storage Blobs.'
     s.description   = <<-EOF
@@ -24,5 +24,5 @@ EOF
     s.add_runtime_dependency 'stud', '~> 0.0.23'
     s.add_runtime_dependency 'azure-storage-blob', '~> 2', '>= 2.0.3'
     s.add_development_dependency 'logstash-devutils', '~> 2.4'
-    s.add_development_dependency 'rubocop', '~> 1.48'
+    s.add_development_dependency 'rubocop', '~> 1.50'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: logstash-input-azure_blob_storage
 version: !ruby/object:Gem::Version
-  version: 0.12.7
+  version: 0.12.9
 platform: ruby
 authors:
 - Jan Geertsma
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2023-04-02 00:00:00.000000000 Z
+date: 2023-07-15 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   requirement: !ruby/object:Gem::Requirement
@@ -77,7 +77,7 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.48'
+        version: '1.50'
   name: rubocop
   prerelease: false
   type: :development
@@ -85,7 +85,7 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.48'
+        version: '1.50'
 description: " This gem is a Logstash plugin. It reads and parses data from Azure\
   \ Storage Blobs. The azure_blob_storage is a reimplementation to replace azureblob\
   \ from azure-diagnostics-tools/Logstash. It can deal with larger volumes and partial\