RubyGems - logstash-filter-csv - Versions diffs - 0.1.0 - Mend

logstash-filter-csv 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml ADDED Viewed

@@ -0,0 +1,15 @@
+---
+!binary "U0hBMQ==":
+  metadata.gz: !binary |-
+    OTY1ZjIyMmUwNDhkYjk2NmMyYTM0NDA2Mzc5ZDJhMDczM2ZiNTBkZQ==
+  data.tar.gz: !binary |-
+    OTgyOGNiMGU4ZjcxNjUzZGQxNGNiYmQwZDQ5YWRjODIxNzM2N2NkZA==
+SHA512:
+  metadata.gz: !binary |-
+    YjhhNzg3YmViMjFiOWQ0OTE5NmI5MWViOTUxMDE0MWE3MTBlNWEwMmY1NjBi
+    NzU1MWNjOGQ4NWJjNDNiMDUzZTkyOWUyYTZmZmUwNjY5MzFiYzliZjc1OGNl
+    MmU2MDUwMDhlYmM4NWQ4MGRmZTdlZmEzNzMzZWRhYzZlMTA4MjE=
+  data.tar.gz: !binary |-
+    OGFlNWM4NWNkZWE1MzUxY2QxMjQ0YmM0NWQzNDVmMjhlMTBmOTgwOWVmNjkx
+    OGRkMmViN2ViZmYzODM5NThkZDBjNWFmYTU1MmY1ZjJkMzVmOTI3MmYwYmQ2
+    N2ExNGQ1ZTE2ODM5MTAzYWU3YWE5NDViZWM5NmQ2MDQ0MTExMDA=

data/.gitignore ADDED Viewed

@@ -0,0 +1,4 @@
+*.gem
+Gemfile.lock
+.bundle
+vendor

data/Gemfile ADDED Viewed

@@ -0,0 +1,3 @@
+source 'http://rubygems.org'
+gem 'rake'
+gem 'gem_publisher'

data/Rakefile ADDED Viewed

@@ -0,0 +1,6 @@
+@files=[]
+task :default do
+  system("rake -T")
+end

data/lib/logstash/filters/csv.rb ADDED Viewed

@@ -0,0 +1,97 @@
+# encoding: utf-8
+require "logstash/filters/base"
+require "logstash/namespace"
+require "csv"
+# The CSV filter takes an event field containing CSV data, parses it,
+# and stores it as individual fields (can optionally specify the names).
+# This filter can also parse data with any separator, not just commas.
+class LogStash::Filters::CSV < LogStash::Filters::Base
+  config_name "csv"
+  milestone 2
+  # The CSV data in the value of the `source` field will be expanded into a
+  # data structure.
+  config :source, :validate => :string, :default => "message"
+  # Define a list of column names (in the order they appear in the CSV,
+  # as if it were a header line). If `columns` is not configured, or there
+  # are not enough columns specified, the default column names are
+  # "column1", "column2", etc. In the case that there are more columns
+  # in the data than specified in this column list, extra columns will be auto-numbered:
+  # (e.g. "user_defined_1", "user_defined_2", "column3", "column4", etc.)
+  config :columns, :validate => :array, :default => []
+  # Define the column separator value. If this is not specified, the default
+  # is a comma ','.
+  # Optional.
+  config :separator, :validate => :string, :default => ","
+  # Define the character used to quote CSV fields. If this is not specified
+  # the default is a double quote '"'.
+  # Optional.
+  config :quote_char, :validate => :string, :default => '"'
+  # Define target field for placing the data.
+  # Defaults to writing to the root of the event.
+  config :target, :validate => :string
+  public
+  def register
+    # Nothing to do here
+  end # def register
+  public
+  def filter(event)
+    return unless filter?(event)
+    @logger.debug("Running csv filter", :event => event)
+    matches = 0
+    if event[@source]
+      if event[@source].is_a?(String)
+        event[@source] = [event[@source]]
+      end
+      if event[@source].length > 1
+        @logger.warn("csv filter only works on fields of length 1",
+                     :source => @source, :value => event[@source],
+                     :event => event)
+        return
+      end
+      raw = event[@source].first
+      begin
+        values = CSV.parse_line(raw, :col_sep => @separator, :quote_char => @quote_char)
+        if @target.nil?
+          # Default is to write to the root of the event.
+          dest = event
+        else
+          dest = event[@target] ||= {}
+        end
+        values.each_index do |i|
+          field_name = @columns[i] || "column#{i+1}"
+          dest[field_name] = values[i]
+        end
+        filter_matched(event)
+      rescue => e
+        event.tag "_csvparsefailure"
+        @logger.warn("Trouble parsing csv", :source => @source, :raw => raw,
+                      :exception => e)
+        return
+      end # begin
+    end # if event
+    @logger.debug("Event after csv filter", :event => event)
+  end # def filter
+end # class LogStash::Filters::Csv

data/logstash-filter-csv.gemspec ADDED Viewed

@@ -0,0 +1,26 @@
+Gem::Specification.new do |s|
+  s.name            = 'logstash-filter-csv'
+  s.version         = '0.1.0'
+  s.licenses        = ['Apache License (2.0)']
+  s.summary         = "The CSV filter takes an event field containing CSV data, parses it, and stores it as individual fields (can optionally specify the names)."
+  s.description     = "The CSV filter takes an event field containing CSV data, parses it, and stores it as individual fields (can optionally specify the names)."
+  s.authors         = ["Elasticsearch"]
+  s.email           = 'richard.pijnenburg@elasticsearch.com'
+  s.homepage        = "http://logstash.net/"
+  s.require_paths = ["lib"]
+  # Files
+  s.files = `git ls-files`.split($\)+::Dir.glob('vendor/*')
+  # Tests
+  s.test_files = s.files.grep(%r{^(test|spec|features)/})
+  # Special flag to let us know this is actually a logstash plugin
+  s.metadata = { "logstash_plugin" => "true", "group" => "filter" }
+  # Gem dependencies
+  s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
+end

data/rakelib/publish.rake ADDED Viewed

@@ -0,0 +1,9 @@
+require "gem_publisher"
+desc "Publish gem to RubyGems.org"
+task :publish_gem do |t|
+  gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
+  gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
+  puts "Published #{gem}" if gem
+end

data/rakelib/vendor.rake ADDED Viewed

@@ -0,0 +1,169 @@
+require "net/http"
+require "uri"
+require "digest/sha1"
+def vendor(*args)
+  return File.join("vendor", *args)
+end
+directory "vendor/" => ["vendor"] do |task, args|
+  mkdir task.name
+end
+def fetch(url, sha1, output)
+  puts "Downloading #{url}"
+  actual_sha1 = download(url, output)
+  if actual_sha1 != sha1
+    fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
+  end
+end # def fetch
+def file_fetch(url, sha1)
+  filename = File.basename( URI(url).path )
+  output = "vendor/#{filename}"
+  task output => [ "vendor/" ] do
+    begin
+      actual_sha1 = file_sha1(output)
+      if actual_sha1 != sha1
+        fetch(url, sha1, output)
+      end
+    rescue Errno::ENOENT
+      fetch(url, sha1, output)
+    end
+  end.invoke
+  return output
+end
+def file_sha1(path)
+  digest = Digest::SHA1.new
+  fd = File.new(path, "r")
+  while true
+    begin
+      digest << fd.sysread(16384)
+    rescue EOFError
+      break
+    end
+  end
+  return digest.hexdigest
+ensure
+  fd.close if fd
+end
+def download(url, output)
+  uri = URI(url)
+  digest = Digest::SHA1.new
+  tmp = "#{output}.tmp"
+  Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
+    request = Net::HTTP::Get.new(uri.path)
+    http.request(request) do |response|
+      fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
+      size = (response["content-length"].to_i || -1).to_f
+      count = 0
+      File.open(tmp, "w") do |fd|
+        response.read_body do |chunk|
+          fd.write(chunk)
+          digest << chunk
+          if size > 0 && $stdout.tty?
+            count += chunk.bytesize
+            $stdout.write(sprintf("\r%0.2f%%", count/size * 100))
+          end
+        end
+      end
+      $stdout.write("\r      \r") if $stdout.tty?
+    end
+  end
+  File.rename(tmp, output)
+  return digest.hexdigest
+rescue SocketError => e
+  puts "Failure while downloading #{url}: #{e}"
+  raise
+ensure
+  File.unlink(tmp) if File.exist?(tmp)
+end # def download
+def untar(tarball, &block)
+  require "archive/tar/minitar"
+  tgz = Zlib::GzipReader.new(File.open(tarball))
+  # Pull out typesdb
+  tar = Archive::Tar::Minitar::Input.open(tgz)
+  tar.each do |entry|
+    path = block.call(entry)
+    next if path.nil?
+    parent = File.dirname(path)
+    mkdir_p parent unless File.directory?(parent)
+    # Skip this file if the output file is the same size
+    if entry.directory?
+      mkdir path unless File.directory?(path)
+    else
+      entry_mode = entry.instance_eval { @mode } & 0777
+      if File.exists?(path)
+        stat = File.stat(path)
+        # TODO(sissel): Submit a patch to archive-tar-minitar upstream to
+        # expose headers in the entry.
+        entry_size = entry.instance_eval { @size }
+        # If file sizes are same, skip writing.
+        next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
+      end
+      puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
+      File.open(path, "w") do |fd|
+        # eof? check lets us skip empty files. Necessary because the API provided by
+        # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
+        # IO object. Something about empty files in this EntryStream causes
+        # IO.copy_stream to throw "can't convert nil into String" on JRuby
+        # TODO(sissel): File a bug about this.
+        while !entry.eof?
+          chunk = entry.read(16384)
+          fd.write(chunk)
+        end
+          #IO.copy_stream(entry, fd)
+      end
+      File.chmod(entry_mode, path)
+    end
+  end
+  tar.close
+  File.unlink(tarball) if File.file?(tarball)
+end # def untar
+def ungz(file)
+  outpath = file.gsub('.gz', '')
+  tgz = Zlib::GzipReader.new(File.open(file))
+  begin
+    File.open(outpath, "w") do |out|
+      IO::copy_stream(tgz, out)
+    end
+    File.unlink(file)
+  rescue
+    File.unlink(outpath) if File.file?(outpath)
+   raise
+  end
+  tgz.close
+end
+desc "Process any vendor files required for this plugin"
+task "vendor" do |task, args|
+  @files.each do |file|
+    download = file_fetch(file['url'], file['sha1'])
+    if download =~ /.tar.gz/
+      prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
+      untar(download) do |entry|
+        if !file['files'].nil?
+          next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
+          out = entry.full_name.split("/").last
+        end
+        File.join('vendor', out)
+      end
+    elsif download =~ /.gz/
+      ungz(download)
+    end
+  end
+end

data/spec/filters/csv_spec.rb ADDED Viewed

@@ -0,0 +1,175 @@
+# encoding: utf-8
+require "spec_helper"
+require "logstash/filters/csv"
+describe LogStash::Filters::CSV do
+  describe "all defaults" do
+    # The logstash config goes here.
+    # At this time, only filters are supported.
+    config <<-CONFIG
+      filter {
+        csv { }
+      }
+    CONFIG
+    sample "big,bird,sesame street" do
+      insist { subject["column1"] } == "big"
+      insist { subject["column2"] } == "bird"
+      insist { subject["column3"] } == "sesame street"
+    end
+  end
+  describe "custom separator" do
+    config <<-CONFIG
+      filter {
+        csv {
+          separator => ";"
+        }
+      }
+    CONFIG
+    sample "big,bird;sesame street" do
+      insist { subject["column1"] } == "big,bird"
+      insist { subject["column2"] } == "sesame street"
+    end
+  end
+  describe "custom quote char" do
+    config <<-CONFIG
+      filter {
+        csv {
+          quote_char => "'"
+        }
+      }
+    CONFIG
+    sample "big,bird,'sesame street'" do
+      insist { subject["column1"] } == "big"
+      insist { subject["column2"] } == "bird"
+      insist { subject["column3"] } == "sesame street"
+    end
+  end
+  describe "default quote char" do
+    config <<-CONFIG
+      filter {
+        csv {
+        }
+      }
+    CONFIG
+    sample 'big,bird,"sesame, street"' do
+      insist { subject["column1"] } == "big"
+      insist { subject["column2"] } == "bird"
+      insist { subject["column3"] } == "sesame, street"
+    end
+  end
+  describe "null quote char" do
+    config <<-CONFIG
+      filter {
+        csv {
+          quote_char => "\x00"
+        }
+      }
+    CONFIG
+    sample 'big,bird,"sesame" street' do
+      insist { subject["column1"] } == 'big'
+      insist { subject["column2"] } == 'bird'
+      insist { subject["column3"] } == '"sesame" street'
+    end
+  end
+  describe "given columns" do
+    # The logstash config goes here.
+    # At this time, only filters are supported.
+    config <<-CONFIG
+      filter {
+        csv {
+          columns => ["first", "last", "address" ]
+        }
+      }
+    CONFIG
+    sample "big,bird,sesame street" do
+      insist { subject["first"] } == "big"
+      insist { subject["last"] } == "bird"
+      insist { subject["address"] } == "sesame street"
+    end
+  end
+  describe "parse csv with more data than defined column names" do
+    config <<-CONFIG
+      filter {
+        csv {
+          columns => ["custom1", "custom2"]
+        }
+      }
+    CONFIG
+    sample "val1,val2,val3" do
+      insist { subject["custom1"] } == "val1"
+      insist { subject["custom2"] } == "val2"
+      insist { subject["column3"] } == "val3"
+    end
+  end
+  describe "parse csv from a given source with column names" do
+    config <<-CONFIG
+      filter {
+        csv {
+          source => "datafield"
+          columns => ["custom1", "custom2", "custom3"]
+        }
+      }
+    CONFIG
+    sample("datafield" => "val1,val2,val3") do
+      insist { subject["custom1"] } == "val1"
+      insist { subject["custom2"] } == "val2"
+      insist { subject["custom3"] } == "val3"
+    end
+  end
+  describe "given target" do
+    # The logstash config goes here.
+    # At this time, only filters are supported.
+    config <<-CONFIG
+      filter {
+        csv {
+          target => "data"
+        }
+      }
+    CONFIG
+    sample "big,bird,sesame street" do
+      insist { subject["data"]["column1"] } == "big"
+      insist { subject["data"]["column2"] } == "bird"
+      insist { subject["data"]["column3"] } == "sesame street"
+    end
+  end
+  describe "given target and source" do
+    # The logstash config goes here.
+    # At this time, only filters are supported.
+    config <<-CONFIG
+      filter {
+        csv {
+          source => "datain"
+          target => "data"
+        }
+      }
+    CONFIG
+    sample("datain" => "big,bird,sesame street") do
+      insist { subject["data"]["column1"] } == "big"
+      insist { subject["data"]["column2"] } == "bird"
+      insist { subject["data"]["column3"] } == "sesame street"
+    end
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,76 @@
+--- !ruby/object:Gem::Specification
+name: logstash-filter-csv
+version: !ruby/object:Gem::Version
+  version: 0.1.0
+platform: ruby
+authors:
+- Elasticsearch
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2014-11-02 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: logstash
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: 1.4.0
+    - - <
+      - !ruby/object:Gem::Version
+        version: 2.0.0
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: 1.4.0
+    - - <
+      - !ruby/object:Gem::Version
+        version: 2.0.0
+description: The CSV filter takes an event field containing CSV data, parses it, and
+  stores it as individual fields (can optionally specify the names).
+email: richard.pijnenburg@elasticsearch.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- .gitignore
+- Gemfile
+- Rakefile
+- lib/logstash/filters/csv.rb
+- logstash-filter-csv.gemspec
+- rakelib/publish.rake
+- rakelib/vendor.rake
+- spec/filters/csv_spec.rb
+homepage: http://logstash.net/
+licenses:
+- Apache License (2.0)
+metadata:
+  logstash_plugin: 'true'
+  group: filter
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.4.1
+signing_key:
+specification_version: 4
+summary: The CSV filter takes an event field containing CSV data, parses it, and stores
+  it as individual fields (can optionally specify the names).
+test_files:
+- spec/filters/csv_spec.rb