RubyGems - logstash-filter-grok - Versions diffs - 0.1.0 → 0.1.2 - Mend

logstash-filter-grok 0.1.0 → 0.1.2

Files changed (9) hide show

checksums.yaml +5 -13
data/Gemfile +3 -3
data/Rakefile +2 -6
data/lib/logstash/filters/grok.rb +327 -331
data/logstash-filter-grok.gemspec +7 -5
data/spec/filters/grok_spec.rb +1 -1
metadata +50 -25
data/rakelib/publish.rake +0 -9
data/rakelib/vendor.rake +0 -169

checksums.yaml CHANGED Viewed

@@ -1,15 +1,7 @@
 ---
-!binary "U0hBMQ==":
-  metadata.gz: !binary |-
-    ZDZkNTcxMWY2ZWE0ZGMyYTczNGYzYzRjNDdmMDI4NzE5N2IwN2Q5Mg==
-  data.tar.gz: !binary |-
-    YTJlZDBhZDg2ODViMzNkZjNhMjZmZDc2OTQ2MTFlYTM1MTgyOGNiNA==
+SHA1:
+  metadata.gz: 27006dbd92d0134cef4e01124e2ed8300134aeac
+  data.tar.gz: d571e84744111378dcae55c24293e6fdcd4590d0
 SHA512:
-  metadata.gz: !binary |-
-    ZDJlZTRlYzBmYmFjYmRhNzA1OGE5ZTljN2ZkZGNkNzQ0ZTlhNDI0MzNmMTcx
-    NjFlNjU5MDgwYjI4ZTY3MTkzMWM5ODFmMjAyZGFlMWYzZTI3YjhjMWU0OGNh
-    NTFjYzg2NjRiODNmOWM1NTY0ZGJhMzRlZTdkY2QzN2ZlYjU0OTM=
-  data.tar.gz: !binary |-
-    ZDk4MGU3MzIzNGJkYTk3OGRhYjhiNjIyNTYzYzg1NGU2YzU3ZDQzNGNiZWMw
-    NThhNTBhMDczNmQ0OTM1NTIyYTRmZjkzZTFmNTcxYzliMWVmM2JiNTc2MTVl
-    YzhiMDNjM2RlNTI2MjU0OTdmZmE5NzljYmM0NTRhMjg1YmFiYjY=
+  metadata.gz: 6c8f3172eddad9922c166ee17849475408e4f298f394d0a4672353b56ead744b5c80f4f8080f1c26bb2ed9242b9f7ef4309c66ca66f402d4c22c27f933daf655
+  data.tar.gz: d2cbe6637bb17b6707578c506cdc51ac64aa7720914fb01289b06cc95f456a8d7ed9cb204c88f03b544155a153b8063b85430cc18e0dadf7c70e30778acf250e

data/Gemfile CHANGED Viewed

@@ -1,3 +1,3 @@
-source 'http://rubygems.org'
-gem 'rake'
-gem 'gem_publisher'
+source 'https://rubygems.org'
+gemspec
+gem "logstash", :github => "elasticsearch/logstash", :branch => "1.5"

data/Rakefile CHANGED Viewed

@@ -1,6 +1,2 @@
-@files=[]
-task :default do
-  system("rake -T")
-end
+require "logstash/devutils/rake"
+require "logstash/devutils/rake"

data/lib/logstash/filters/grok.rb CHANGED Viewed

@@ -1,363 +1,359 @@
-# encoding: utf-8
-require "logstash/filters/base"
-require "logstash/namespace"
-require "logstash/environment"
-require "logstash/patterns/core"
-require "set"
-# Parse arbitrary text and structure it.
-#
-# Grok is currently the best way in logstash to parse crappy unstructured log
-# data into something structured and queryable.
-#
-# This tool is perfect for syslog logs, apache and other webserver logs, mysql
-# logs, and in general, any log format that is generally written for humans
-# and not computer consumption.
-#
-# Logstash ships with about 120 patterns by default. You can find them here:
-# <https://github.com/logstash/logstash/tree/v%VERSION%/patterns>. You can add
-# your own trivially. (See the patterns_dir setting)
-#
-# If you need help building patterns to match your logs, you will find the
-# <http://grokdebug.herokuapp.com> too quite useful!
-#
-# #### Grok Basics
-#
-# Grok works by combining text patterns into something that matches your
-# logs.
-#
-# The syntax for a grok pattern is `%{SYNTAX:SEMANTIC}`
-#
-# The `SYNTAX` is the name of the pattern that will match your text. For
-# example, "3.44" will be matched by the NUMBER pattern and "55.3.244.1" will
-# be matched by the IP pattern. The syntax is how you match.
-#
-# The `SEMANTIC` is the identifier you give to the piece of text being matched.
-# For example, "3.44" could be the duration of an event, so you could call it
-# simply 'duration'. Further, a string "55.3.244.1" might identify the 'client'
-# making a request.
-#
-# For the above example, your grok filter would look something like this:
-#
-# %{NUMBER:duration} %{IP:client}
-#
-# Optionally you can add a data type conversion to your grok pattern. By default
-# all semantics are saved as strings. If you wish to convert a semantic's data type,
-# for example change a string to an integer then suffix it with the target data type.
-# For example `%{NUMBER:num:int}` which converts the 'num' semantic from a string to an
-# integer. Currently the only supported conversions are `int` and `float`.
-#
-# #### Example
-#
-# With that idea of a syntax and semantic, we can pull out useful fields from a
-# sample log like this fictional http request log:
-#
-#     55.3.244.1 GET /index.html 15824 0.043
-#
-# The pattern for this could be:
-#
-#     %{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}
-#
-# A more realistic example, let's read these logs from a file:
-#
-#     input {
-#       file {
-#         path => "/var/log/http.log"
-#       }
-#     }
-#     filter {
-#       grok {
-#         match => { "message" => "%{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}" }
-#       }
-#     }
-#
-# After the grok filter, the event will have a few extra fields in it:
-#
-# * client: 55.3.244.1
-# * method: GET
-# * request: /index.html
-# * bytes: 15824
-# * duration: 0.043
-#
-# #### Regular Expressions
-#
-# Grok sits on top of regular expressions, so any regular expressions are valid
-# in grok as well. The regular expression library is Oniguruma, and you can see
-# the full supported regexp syntax [on the Onigiruma
-# site](http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt).
-#
-# #### Custom Patterns
-#
-# Sometimes logstash doesn't have a pattern you need. For this, you have
-# a few options.
-#
-# First, you can use the Oniguruma syntax for 'named capture' which will
-# let you match a piece of text and save it as a field:
-#
-#     (?<field_name>the pattern here)
-#
-# For example, postfix logs have a 'queue id' that is an 10 or 11-character
-# hexadecimal value. I can capture that easily like this:
-#
-#     (?<queue_id>[0-9A-F]{10,11})
-#
-# Alternately, you can create a custom patterns file.
-#
-# * Create a directory called `patterns` with a file in it called `extra`
-#   (the file name doesn't matter, but name it meaningfully for yourself)
-# * In that file, write the pattern you need as the pattern name, a space, then
-#   the regexp for that pattern.
-#
-# For example, doing the postfix queue id example as above:
-#
-#     # contents of ./patterns/postfix:
-#     POSTFIX_QUEUEID [0-9A-F]{10,11}
-#
-# Then use the `patterns_dir` setting in this plugin to tell logstash where
-# your custom patterns directory is. Here's a full example with a sample log:
-#
-#     Jan  1 06:25:43 mailserver14 postfix/cleanup[21403]: BEF25A72965: message-id=<20130101142543.5828399CCAF@mailserver14.example.com>
-#
-#     filter {
-#       grok {
-#         patterns_dir => "./patterns"
-#         match => { "message" => "%{SYSLOGBASE} %{POSTFIX_QUEUEID:queue_id}: %{GREEDYDATA:syslog_message}" }
-#       }
-#     }
-#
-# The above will match and result in the following fields:
-#
-# * timestamp: Jan  1 06:25:43
-# * logsource: mailserver14
-# * program: postfix/cleanup
-# * pid: 21403
-# * queue_id: BEF25A72965
-# * syslog_message: message-id=<20130101142543.5828399CCAF@mailserver14.example.com>
-#
-# The `timestamp`, `logsource`, `program`, and `pid` fields come from the
-# SYSLOGBASE pattern which itself is defined by other patterns.
-class LogStash::Filters::Grok < LogStash::Filters::Base
-  config_name "grok"
-  milestone 3
-  # Specify a pattern to parse with. This will match the 'message' field.
+  # encoding: utf-8
+  require "logstash/filters/base"
+  require "logstash/namespace"
+  require "logstash/environment"
+  require "logstash/patterns/core"
+  require "set"
+  # Parse arbitrary text and structure it.
   #
-  # If you want to match other fields than message, use the 'match' setting.
-  # Multiple patterns is fine.
-  config :pattern, :validate => :array, :deprecated => "You should use this instead: match => { \"message\" => \"your pattern here\" }"
-  # A hash of matches of field => value
+  # Grok is currently the best way in logstash to parse crappy unstructured log
+  # data into something structured and queryable.
   #
-  # For example:
+  # This tool is perfect for syslog logs, apache and other webserver logs, mysql
+  # logs, and in general, any log format that is generally written for humans
+  # and not computer consumption.
   #
-  #     filter {
-  #       grok { match => { "message" => "Duration: %{NUMBER:duration}" } }
-  #     }
+  # Logstash ships with about 120 patterns by default. You can find them here:
+  # <https://github.com/logstash/logstash/tree/v%VERSION%/patterns>. You can add
+  # your own trivially. (See the `patterns_dir` setting)
+  #
+  # If you need help building patterns to match your logs, you will find the
+  # <http://grokdebug.herokuapp.com> too quite useful!
+  #
+  # ==== Grok Basics
+  #
+  # Grok works by combining text patterns into something that matches your
+  # logs.
+  #
+  # The syntax for a grok pattern is `%{SYNTAX:SEMANTIC}`
+  #
+  # The `SYNTAX` is the name of the pattern that will match your text. For
+  # example, `3.44` will be matched by the `NUMBER` pattern and `55.3.244.1` will
+  # be matched by the `IP` pattern. The syntax is how you match.
   #
-  # Alternatively, using the old array syntax:
+  # The `SEMANTIC` is the identifier you give to the piece of text being matched.
+  # For example, `3.44` could be the duration of an event, so you could call it
+  # simply `duration`. Further, a string `55.3.244.1` might identify the `client`
+  # making a request.
   #
+  # For the above example, your grok filter would look something like this:
+  # [source,ruby]
+  # %{NUMBER:duration} %{IP:client}
+  #
+  # Optionally you can add a data type conversion to your grok pattern. By default
+  # all semantics are saved as strings. If you wish to convert a semantic's data type,
+  # for example change a string to an integer then suffix it with the target data type.
+  # For example `%{NUMBER:num:int}` which converts the `num` semantic from a string to an
+  # integer. Currently the only supported conversions are `int` and `float`.
+  #
+  # .Examples:
+  #
+  # With that idea of a syntax and semantic, we can pull out useful fields from a
+  # sample log like this fictional http request log:
+  # [source,ruby]
+  #     55.3.244.1 GET /index.html 15824 0.043
+  #
+  # The pattern for this could be:
+  # [source,ruby]
+  #     %{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}
+  #
+  # A more realistic example, let's read these logs from a file:
+  # [source,ruby]
+  #     input {
+  #       file {
+  #         path => "/var/log/http.log"
+  #       }
+  #     }
   #     filter {
-  #       grok { match => [ "message", "Duration: %{NUMBER:duration}" ] }
+  #       grok {
+  #         match => { "message" => "%{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}" }
+  #       }
   #     }
   #
-  config :match, :validate => :hash, :default => {}
+  # After the grok filter, the event will have a few extra fields in it:
+  #
+  # * `client: 55.3.244.1`
+  # * `method: GET`
+  # * `request: /index.html`
+  # * `bytes: 15824`
+  # * `duration: 0.043`
   #
-  # logstash ships by default with a bunch of patterns, so you don't
-  # necessarily need to define this yourself unless you are adding additional
-  # patterns.
+  # ==== Regular Expressions
   #
-  # Pattern files are plain text with format:
+  # Grok sits on top of regular expressions, so any regular expressions are valid
+  # in grok as well. The regular expression library is Oniguruma, and you can see
+  # the full supported regexp syntax [on the Onigiruma
+  # site](http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt).
   #
-  #     NAME PATTERN
+  # ==== Custom Patterns
   #
-  # For example:
+  # Sometimes logstash doesn't have a pattern you need. For this, you have
+  # a few options.
   #
-  #     NUMBER \d+
-  config :patterns_dir, :validate => :array, :default => []
-  # Drop if matched. Note, this feature may not stay. It is preferable to combine
-  # grok + grep filters to do parsing + dropping.
-  config :drop_if_match, :validate => :boolean, :default => false
-  # Break on first match. The first successful match by grok will result in the
-  # filter being finished. If you want grok to try all patterns (maybe you are
-  # parsing different things), then set this to false.
-  config :break_on_match, :validate => :boolean, :default => true
-  # If true, only store named captures from grok.
-  config :named_captures_only, :validate => :boolean, :default => true
-  # If true, keep empty captures as event fields.
-  config :keep_empty_captures, :validate => :boolean, :default => false
-  # If true, make single-value fields simply that value, not an array
-  # containing that one value.
-  config :singles, :validate => :boolean, :default => true, :deprecated => "This behavior is the default now, you don't need to set it."
-  # Append values to the 'tags' field when there has been no
-  # successful match
-  config :tag_on_failure, :validate => :array, :default => ["_grokparsefailure"]
-  # The fields to overwrite.
+  # First, you can use the Oniguruma syntax for named capture which will
+  # let you match a piece of text and save it as a field:
+  # [source,ruby]
+  #     (?<field_name>the pattern here)
   #
-  # This allows you to overwrite a value in a field that already exists.
+  # For example, postfix logs have a `queue id` that is an 10 or 11-character
+  # hexadecimal value. I can capture that easily like this:
+  # [source,ruby]
+  #     (?<queue_id>[0-9A-F]{10,11})
   #
-  # For example, if you have a syslog line in the 'message' field, you can
-  # overwrite the 'message' field with part of the match like so:
+  # Alternately, you can create a custom patterns file.
   #
+  # * Create a directory called `patterns` with a file in it called `extra`
+  #   (the file name doesn't matter, but name it meaningfully for yourself)
+  # * In that file, write the pattern you need as the pattern name, a space, then
+  #   the regexp for that pattern.
+  #
+  # For example, doing the postfix queue id example as above:
+  # [source,ruby]
+  #     # contents of ./patterns/postfix:
+  #     POSTFIX_QUEUEID [0-9A-F]{10,11}
+  #
+  # Then use the `patterns_dir` setting in this plugin to tell logstash where
+  # your custom patterns directory is. Here's a full example with a sample log:
+  # [source,ruby]
+  #     Jan  1 06:25:43 mailserver14 postfix/cleanup[21403]: BEF25A72965: message-id=<20130101142543.5828399CCAF@mailserver14.example.com>
+  # [source,ruby]
   #     filter {
   #       grok {
-  #         match => { "message" => "%{SYSLOGBASE} %{DATA:message}" }
-  #         overwrite => [ "message" ]
+  #         patterns_dir => "./patterns"
+  #         match => { "message" => "%{SYSLOGBASE} %{POSTFIX_QUEUEID:queue_id}: %{GREEDYDATA:syslog_message}" }
   #       }
   #     }
   #
-  #  In this case, a line like "May 29 16:37:11 sadness logger: hello world"
-  #  will be parsed and 'hello world' will overwrite the original message.
-  config :overwrite, :validate => :array, :default => []
-  # Detect if we are running from a jarfile, pick the right path.
-  @@patterns_path ||= Set.new
-#@@patterns_path += [LogStash::Environment.pattern_path("*")]
-  @@patterns_path += [LogStash::Patterns::Core.path]
-  public
-  def initialize(params)
-    super(params)
-    @match["message"] ||= []
-    @match["message"] += @pattern if @pattern # the config 'pattern' value (array)
-    # a cache of capture name handler methods.
-    @handlers = {}
-  end
-  public
-  def register
-    require "grok-pure" # rubygem 'jls-grok'
-    @patternfiles = []
-    # Have @@patterns_path show first. Last-in pattern definitions win; this
-    # will let folks redefine built-in patterns at runtime.
-    @patterns_dir = @@patterns_path.to_a + @patterns_dir
-    @logger.info? and @logger.info("Grok patterns path", :patterns_dir => @patterns_dir)
-    @patterns_dir.each do |path|
-      if File.directory?(path)
-        path = File.join(path, "*")
-      end
-      Dir.glob(path).each do |file|
-        @logger.info? and @logger.info("Grok loading patterns from file", :path => file)
-        @patternfiles << file
-      end
+  # The above will match and result in the following fields:
+  #
+  # * `timestamp: Jan  1 06:25:43`
+  # * `logsource: mailserver14`
+  # * `program: postfix/cleanup`
+  # * `pid: 21403`
+  # * `queue_id: BEF25A72965`
+  # * `syslog_message: message-id=<20130101142543.5828399CCAF@mailserver14.example.com>`
+  #
+  # The `timestamp`, `logsource`, `program`, and `pid` fields come from the
+  # `SYSLOGBASE` pattern which itself is defined by other patterns.
+  class LogStash::Filters::Grok < LogStash::Filters::Base
+    config_name "grok"
+    milestone 3
+    # Specify a pattern to parse with. This will match the `message` field.
+    #
+    # If you want to match other fields than message, use the `match` setting.
+    # Multiple patterns is fine.
+    config :pattern, :validate => :array, :deprecated => "You should use this instead: match => { \"message\" => \"your pattern here\" }"
+    # A hash of matches of field => value
+    #
+    # For example:
+    # [source,ruby]
+    #     filter {
+    #       grok { match => { "message" => "Duration: %{NUMBER:duration}" } }
+    #     }
+    #
+    # Alternatively, using the old array syntax:
+    # [source,ruby]
+    #     filter {
+    #       grok { match => [ "message", "Duration: %{NUMBER:duration}" ] }
+    #     }
+    #
+    config :match, :validate => :hash, :default => {}
+    #
+    # logstash ships by default with a bunch of patterns, so you don't
+    # necessarily need to define this yourself unless you are adding additional
+    # patterns.
+    #
+    # Pattern files are plain text with format:
+    # [source,ruby]
+    #     NAME PATTERN
+    #
+    # For example:
+    # [source,ruby]
+    #     NUMBER \d+
+    config :patterns_dir, :validate => :array, :default => []
+    # Break on first match. The first successful match by grok will result in the
+    # filter being finished. If you want grok to try all patterns (maybe you are
+    # parsing different things), then set this to false.
+    config :break_on_match, :validate => :boolean, :default => true
+    # If `true`, only store named captures from grok.
+    config :named_captures_only, :validate => :boolean, :default => true
+    # If `true`, keep empty captures as event fields.
+    config :keep_empty_captures, :validate => :boolean, :default => false
+    # If `true`, make single-value fields simply that value, not an array
+    # containing that one value.
+    config :singles, :validate => :boolean, :default => true, :deprecated => "This behavior is the default now, you don't need to set it."
+    # Append values to the `tags` field when there has been no
+    # successful match
+    config :tag_on_failure, :validate => :array, :default => ["_grokparsefailure"]
+    # The fields to overwrite.
+    #
+    # This allows you to overwrite a value in a field that already exists.
+    #
+    # For example, if you have a syslog line in the `message` field, you can
+    # overwrite the `message` field with part of the match like so:
+    # [source,ruby]
+    #     filter {
+    #       grok {
+    #         match => { "message" => "%{SYSLOGBASE} %{DATA:message}" }
+    #         overwrite => [ "message" ]
+    #       }
+    #     }
+    #
+    # In this case, a line like `May 29 16:37:11 sadness logger: hello world`
+    # will be parsed and `hello world` will overwrite the original message.
+    config :overwrite, :validate => :array, :default => []
+    # Detect if we are running from a jarfile, pick the right path.
+    @@patterns_path ||= Set.new
+  #@@patterns_path += [LogStash::Environment.pattern_path("*")]
+    @@patterns_path += [LogStash::Patterns::Core.path]
+    public
+    def initialize(params)
+      super(params)
+      @match["message"] ||= []
+      @match["message"] += @pattern if @pattern # the config 'pattern' value (array)
+      # a cache of capture name handler methods.
+      @handlers = {}
     end
-    @patterns = Hash.new { |h,k| h[k] = [] }
-    @logger.info? and @logger.info("Match data", :match => @match)
-    @match.each do |field, patterns|
-      patterns = [patterns] if patterns.is_a?(String)
-      @logger.info? and @logger.info("Grok compile", :field => field, :patterns => patterns)
-      patterns.each do |pattern|
-        @logger.debug? and @logger.debug("regexp: #{@type}/#{field}", :pattern => pattern)
-        grok = Grok.new
-        grok.logger = @logger unless @logger.nil?
-        add_patterns_from_files(@patternfiles, grok)
-        grok.compile(pattern, @named_captures_only)
-        @patterns[field] << grok
+    public
+    def register
+      require "grok-pure" # rubygem 'jls-grok'
+      @patternfiles = []
+      # Have @@patterns_path show first. Last-in pattern definitions win; this
+      # will let folks redefine built-in patterns at runtime.
+      @patterns_dir = @@patterns_path.to_a + @patterns_dir
+      @logger.info? and @logger.info("Grok patterns path", :patterns_dir => @patterns_dir)
+      @patterns_dir.each do |path|
+        if File.directory?(path)
+          path = File.join(path, "*")
+        end
+        Dir.glob(path).each do |file|
+          @logger.info? and @logger.info("Grok loading patterns from file", :path => file)
+          @patternfiles << file
+        end
       end
-    end # @match.each
-  end # def register
-  public
-  def filter(event)
-    return unless filter?(event)
-    matched = false
-    done = false
-    @logger.debug? and @logger.debug("Running grok filter", :event => event);
-    @patterns.each do |field, groks|
-      if match(groks, field, event)
-        matched = true
-        break if @break_on_match
+      @patterns = Hash.new { |h,k| h[k] = [] }
+      @logger.info? and @logger.info("Match data", :match => @match)
+      @match.each do |field, patterns|
+        patterns = [patterns] if patterns.is_a?(String)
+        @logger.info? and @logger.info("Grok compile", :field => field, :patterns => patterns)
+        patterns.each do |pattern|
+          @logger.debug? and @logger.debug("regexp: #{@type}/#{field}", :pattern => pattern)
+          grok = Grok.new
+          grok.logger = @logger unless @logger.nil?
+          add_patterns_from_files(@patternfiles, grok)
+          grok.compile(pattern, @named_captures_only)
+          @patterns[field] << grok
+        end
+      end # @match.each
+    end # def register
+    public
+    def filter(event)
+      return unless filter?(event)
+      matched = false
+      done = false
+      @logger.debug? and @logger.debug("Running grok filter", :event => event);
+      @patterns.each do |field, groks|
+        if match(groks, field, event)
+          matched = true
+          break if @break_on_match
+        end
+        #break if done
+      end # @patterns.each
+      if matched
+        filter_matched(event)
+      else
+        # Tag this event if we can't parse it. We can use this later to
+        # reparse+reindex logs if we improve the patterns given.
+        @tag_on_failure.each do |tag|
+          event["tags"] ||= []
+          event["tags"] << tag unless event["tags"].include?(tag)
+        end
       end
-      #break if done
-    end # @patterns.each
-    if matched
-      filter_matched(event)
-    else
-      # Tag this event if we can't parse it. We can use this later to
-      # reparse+reindex logs if we improve the patterns given.
-      @tag_on_failure.each do |tag|
-        event["tags"] ||= []
-        event["tags"] << tag unless event["tags"].include?(tag)
+      @logger.debug? and @logger.debug("Event now: ", :event => event)
+    end # def filter
+    private
+    def match(groks, field, event)
+      input = event[field]
+      if input.is_a?(Array)
+        success = false
+        input.each do |input|
+          success |= match_against_groks(groks, input, event)
+        end
+        return success
+      else
+        return match_against_groks(groks, input, event)
       end
+    rescue StandardError => e
+      @logger.warn("Grok regexp threw exception", :exception => e.message)
     end
-    @logger.debug? and @logger.debug("Event now: ", :event => event)
-  end # def filter
-  private
-  def match(groks, field, event)
-    input = event[field]
-    if input.is_a?(Array)
-      success = false
-      input.each do |input|
-        success |= match_against_groks(groks, input, event)
+    private
+    def match_against_groks(groks, input, event)
+      matched = false
+      groks.each do |grok|
+        # Convert anything else to string (number, hash, etc)
+        matched = grok.match_and_capture(input.to_s) do |field, value|
+          matched = true
+          handle(field, value, event)
+        end
+        break if matched and @break_on_match
       end
-      return success
-    else
-      return match_against_groks(groks, input, event)
+      return matched
     end
-  rescue StandardError => e
-    @logger.warn("Grok regexp threw exception", :exception => e.message)
-  end
-  private
-  def match_against_groks(groks, input, event)
-    matched = false
-    groks.each do |grok|
-      # Convert anything else to string (number, hash, etc)
-      matched = grok.match_and_capture(input.to_s) do |field, value|
-        matched = true
-        handle(field, value, event)
-      end
-      break if matched and @break_on_match
-    end
-    return matched
-  end
-  private
-  def handle(field, value, event)
-    return if (value.nil? || (value.is_a?(String) && value.empty?)) unless @keep_empty_captures
-    if @overwrite.include?(field)
-      event[field] = value
-    else
-      v = event[field]
-      if v.nil?
+    private
+    def handle(field, value, event)
+      return if (value.nil? || (value.is_a?(String) && value.empty?)) unless @keep_empty_captures
+      if @overwrite.include?(field)
         event[field] = value
-      elsif v.is_a?(Array)
-        event[field] << value
-      elsif v.is_a?(String)
-        # Promote to array since we aren't overwriting.
-        event[field] = [v, value]
+      else
+        v = event[field]
+        if v.nil?
+          event[field] = value
+        elsif v.is_a?(Array)
+          event[field] << value
+        elsif v.is_a?(String)
+          # Promote to array since we aren't overwriting.
+          event[field] = [v, value]
+        end
       end
     end
-  end
-  private
-  def add_patterns_from_files(paths, grok)
-    paths.each do |path|
-      if !File.exists?(path)
-        raise "Grok pattern file does not exist: #{path}"
+    private
+    def add_patterns_from_files(paths, grok)
+      paths.each do |path|
+        if !File.exists?(path)
+          raise "Grok pattern file does not exist: #{path}"
+        end
+        grok.add_patterns_from_file(path)
       end
-      grok.add_patterns_from_file(path)
-    end
-  end # def add_patterns_from_files
-end # class LogStash::Filters::Grok
+    end # def add_patterns_from_files
+  end # class LogStash::Filters::Grok

data/logstash-filter-grok.gemspec CHANGED Viewed

@@ -1,13 +1,13 @@
 Gem::Specification.new do |s|
   s.name            = 'logstash-filter-grok'
-  s.version         = '0.1.0'
+  s.version         = '0.1.2'
   s.licenses        = ['Apache License (2.0)']
   s.summary         = "Parse arbitrary text and structure it."
-  s.description     = "Grok is currently the best way in logstash to parse crappy unstructured log data into something structured and queryable."
+  s.description     = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
   s.authors         = ["Elasticsearch"]
-  s.email           = 'richard.pijnenburg@elasticsearch.com'
-  s.homepage        = "http://logstash.net/"
+  s.email           = 'info@elasticsearch.com'
+  s.homepage        = "http://www.elasticsearch.org/guide/en/logstash/current/index.html"
   s.require_paths = ["lib"]
   # Files
@@ -17,13 +17,15 @@ Gem::Specification.new do |s|
   s.test_files = s.files.grep(%r{^(test|spec|features)/})
   # Special flag to let us know this is actually a logstash plugin
-  s.metadata = { "logstash_plugin" => "true", "group" => "filter" }
+  s.metadata = { "logstash_plugin" => "true", "logstash_group" => "filter" }
   # Gem dependencies
   s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
   s.add_runtime_dependency 'jls-grok', ['0.11.0']
   s.add_runtime_dependency 'logstash-patterns-core'
+  s.add_development_dependency 'logstash-devutils'
+  s.add_development_dependency 'logstash-devutils'
 end

data/spec/filters/grok_spec.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # encoding: utf-8
-require "spec_helper"
+require "logstash/devutils/rspec/spec_helper"
 require "logstash/filters/grok"
 describe LogStash::Filters::Grok do

metadata CHANGED Viewed

@@ -1,66 +1,93 @@
 --- !ruby/object:Gem::Specification
 name: logstash-filter-grok
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.1.2
 platform: ruby
 authors:
 - Elasticsearch
-autorequire:
+autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-11-05 00:00:00.000000000 Z
+date: 2014-11-21 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
-  name: logstash
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - ! '>='
+    - - '>='
       - !ruby/object:Gem::Version
         version: 1.4.0
     - - <
       - !ruby/object:Gem::Version
         version: 2.0.0
-  type: :runtime
+  name: logstash
   prerelease: false
+  type: :runtime
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - ! '>='
+    - - '>='
       - !ruby/object:Gem::Version
         version: 1.4.0
     - - <
       - !ruby/object:Gem::Version
         version: 2.0.0
 - !ruby/object:Gem::Dependency
-  name: jls-grok
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - '='
       - !ruby/object:Gem::Version
         version: 0.11.0
-  type: :runtime
+  name: jls-grok
   prerelease: false
+  type: :runtime
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - '='
       - !ruby/object:Gem::Version
         version: 0.11.0
 - !ruby/object:Gem::Dependency
-  name: logstash-patterns-core
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - ! '>='
+    - - '>='
       - !ruby/object:Gem::Version
         version: '0'
+  name: logstash-patterns-core
+  prerelease: false
   type: :runtime
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  name: logstash-devutils
+  prerelease: false
+  type: :development
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  name: logstash-devutils
   prerelease: false
+  type: :development
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - ! '>='
+    - - '>='
       - !ruby/object:Gem::Version
         version: '0'
-description: Grok is currently the best way in logstash to parse crappy unstructured
-  log data into something structured and queryable.
-email: richard.pijnenburg@elasticsearch.com
+description: This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program
+email: info@elasticsearch.com
 executables: []
 extensions: []
 extra_rdoc_files: []
@@ -71,33 +98,31 @@ files:
 - Rakefile
 - lib/logstash/filters/grok.rb
 - logstash-filter-grok.gemspec
-- rakelib/publish.rake
-- rakelib/vendor.rake
 - spec/filters/grok_spec.rb
-homepage: http://logstash.net/
+homepage: http://www.elasticsearch.org/guide/en/logstash/current/index.html
 licenses:
 - Apache License (2.0)
 metadata:
   logstash_plugin: 'true'
-  group: filter
-post_install_message:
+  logstash_group: filter
+post_install_message:
 rdoc_options: []
 require_paths:
 - lib
 required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
-  - - ! '>='
+  - - '>='
     - !ruby/object:Gem::Version
       version: '0'
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
-  - - ! '>='
+  - - '>='
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubyforge_project:
-rubygems_version: 2.4.1
-signing_key:
+rubyforge_project:
+rubygems_version: 2.1.9
+signing_key:
 specification_version: 4
 summary: Parse arbitrary text and structure it.
 test_files:

data/rakelib/publish.rake DELETED Viewed

@@ -1,9 +0,0 @@
-require "gem_publisher"
-desc "Publish gem to RubyGems.org"
-task :publish_gem do |t|
-  gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
-  gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
-  puts "Published #{gem}" if gem
-end

data/rakelib/vendor.rake DELETED Viewed

@@ -1,169 +0,0 @@
-require "net/http"
-require "uri"
-require "digest/sha1"
-def vendor(*args)
-  return File.join("vendor", *args)
-end
-directory "vendor/" => ["vendor"] do |task, args|
-  mkdir task.name
-end
-def fetch(url, sha1, output)
-  puts "Downloading #{url}"
-  actual_sha1 = download(url, output)
-  if actual_sha1 != sha1
-    fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
-  end
-end # def fetch
-def file_fetch(url, sha1)
-  filename = File.basename( URI(url).path )
-  output = "vendor/#{filename}"
-  task output => [ "vendor/" ] do
-    begin
-      actual_sha1 = file_sha1(output)
-      if actual_sha1 != sha1
-        fetch(url, sha1, output)
-      end
-    rescue Errno::ENOENT
-      fetch(url, sha1, output)
-    end
-  end.invoke
-  return output
-end
-def file_sha1(path)
-  digest = Digest::SHA1.new
-  fd = File.new(path, "r")
-  while true
-    begin
-      digest << fd.sysread(16384)
-    rescue EOFError
-      break
-    end
-  end
-  return digest.hexdigest
-ensure
-  fd.close if fd
-end
-def download(url, output)
-  uri = URI(url)
-  digest = Digest::SHA1.new
-  tmp = "#{output}.tmp"
-  Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
-    request = Net::HTTP::Get.new(uri.path)
-    http.request(request) do |response|
-      fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
-      size = (response["content-length"].to_i || -1).to_f
-      count = 0
-      File.open(tmp, "w") do |fd|
-        response.read_body do |chunk|
-          fd.write(chunk)
-          digest << chunk
-          if size > 0 && $stdout.tty?
-            count += chunk.bytesize
-            $stdout.write(sprintf("\r%0.2f%%", count/size * 100))
-          end
-        end
-      end
-      $stdout.write("\r      \r") if $stdout.tty?
-    end
-  end
-  File.rename(tmp, output)
-  return digest.hexdigest
-rescue SocketError => e
-  puts "Failure while downloading #{url}: #{e}"
-  raise
-ensure
-  File.unlink(tmp) if File.exist?(tmp)
-end # def download
-def untar(tarball, &block)
-  require "archive/tar/minitar"
-  tgz = Zlib::GzipReader.new(File.open(tarball))
-  # Pull out typesdb
-  tar = Archive::Tar::Minitar::Input.open(tgz)
-  tar.each do |entry|
-    path = block.call(entry)
-    next if path.nil?
-    parent = File.dirname(path)
-    mkdir_p parent unless File.directory?(parent)
-    # Skip this file if the output file is the same size
-    if entry.directory?
-      mkdir path unless File.directory?(path)
-    else
-      entry_mode = entry.instance_eval { @mode } & 0777
-      if File.exists?(path)
-        stat = File.stat(path)
-        # TODO(sissel): Submit a patch to archive-tar-minitar upstream to
-        # expose headers in the entry.
-        entry_size = entry.instance_eval { @size }
-        # If file sizes are same, skip writing.
-        next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
-      end
-      puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
-      File.open(path, "w") do |fd|
-        # eof? check lets us skip empty files. Necessary because the API provided by
-        # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
-        # IO object. Something about empty files in this EntryStream causes
-        # IO.copy_stream to throw "can't convert nil into String" on JRuby
-        # TODO(sissel): File a bug about this.
-        while !entry.eof?
-          chunk = entry.read(16384)
-          fd.write(chunk)
-        end
-          #IO.copy_stream(entry, fd)
-      end
-      File.chmod(entry_mode, path)
-    end
-  end
-  tar.close
-  File.unlink(tarball) if File.file?(tarball)
-end # def untar
-def ungz(file)
-  outpath = file.gsub('.gz', '')
-  tgz = Zlib::GzipReader.new(File.open(file))
-  begin
-    File.open(outpath, "w") do |out|
-      IO::copy_stream(tgz, out)
-    end
-    File.unlink(file)
-  rescue
-    File.unlink(outpath) if File.file?(outpath)
-   raise
-  end
-  tgz.close
-end
-desc "Process any vendor files required for this plugin"
-task "vendor" do |task, args|
-  @files.each do |file|
-    download = file_fetch(file['url'], file['sha1'])
-    if download =~ /.tar.gz/
-      prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
-      untar(download) do |entry|
-        if !file['files'].nil?
-          next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
-          out = entry.full_name.split("/").last
-        end
-        File.join('vendor', out)
-      end
-    elsif download =~ /.gz/
-      ungz(download)
-    end
-  end
-end