RubyGems - logstash-output-csv - Versions diffs - 0.1.0 - Mend

logstash-output-csv 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

checksums.yaml ADDED Viewed

@@ -0,0 +1,15 @@
+---
+!binary "U0hBMQ==":
+  metadata.gz: !binary |-
+    MjYxNjRjYWVjNGZkMDg2YzdhMGZhNzQ4YzExMjA3OTEwNjVjZTlhNA==
+  data.tar.gz: !binary |-
+    ZjNmNjY0YjAyYjdmNTkwZjEyNjRiMWJjZTdiYzY1NTNkODE5NzUxMw==
+SHA512:
+  metadata.gz: !binary |-
+    YjM1MTgzMTQwZDRhNTk5YTU2YWM3NjQ5ZGI1YWVmMjU4Y2I0NjhkMmQwZjM5
+    Mjc1YWQxMTRkMTdhNWEzNjNlNzc1YmJlZjc4YWI4NWU2ZjM2OTRmMWEyMDhi
+    NWNiNzg2MjA0MDBkYzVhM2FlMWQ5OWJhMzU1YTQ0ZTczNjQ2NDU=
+  data.tar.gz: !binary |-
+    N2E1MjFhZTljOGRlOWZjYmQzYjQwZTU1OGFlMGE5NWYwZjgyZDdkNTUxYWVi
+    ZjM2MjA4NGIwMDVkMGFkYTcwZWY3MjdhMWY0Nzg4OTFkZWQ3NmU3NTIxOWE4
+    OTQ4OTgzYWM1YjM5NDZkZDJjZWYxZWU5NjIxN2I1Mjg3YjY5OWU=

data/.gitignore ADDED Viewed

@@ -0,0 +1,4 @@
+*.gem
+Gemfile.lock
+.bundle
+vendor

data/Gemfile ADDED Viewed

@@ -0,0 +1,4 @@
+source 'http://rubygems.org'
+gem 'rake'
+gem 'gem_publisher'
+gem 'archive-tar-minitar'

data/LICENSE ADDED Viewed

@@ -0,0 +1,13 @@
+Copyright (c) 2012-2014 Elasticsearch <http://www.elasticsearch.org>
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.

data/Rakefile ADDED Viewed

@@ -0,0 +1,6 @@
+@files=[]
+task :default do
+  system("rake -T")
+end

data/lib/logstash/outputs/csv.rb ADDED Viewed

@@ -0,0 +1,51 @@
+require "csv"
+require "logstash/namespace"
+require "logstash/outputs/file"
+require "logstash/json"
+# CSV output.
+#
+# Write events to disk in CSV or other delimited format
+# Based on the file output, many config values are shared
+# Uses the Ruby csv library internally
+class LogStash::Outputs::CSV < LogStash::Outputs::File
+  config_name "csv"
+  milestone 1
+  # The field names from the event that should be written to the CSV file.
+  # Fields are written to the CSV in the same order as the array.
+  # If a field does not exist on the event, an empty string will be written.
+  # Supports field reference syntax eg: `fields => ["field1", "[nested][field]"]`.
+  config :fields, :validate => :array, :required => true
+  # Options for CSV output. This is passed directly to the Ruby stdlib to_csv function.
+  # Full documentation is available on the http://ruby-doc.org/stdlib-2.0.0/libdoc/csv/rdoc/index.html[Ruby CSV documentation page].
+  # A typical use case would be to use alternative column or row seperators eg: `csv_options => {"col_sep" => "\t" "row_sep" => "\r\n"}` gives tab seperated data with windows line endings
+  config :csv_options, :validate => :hash, :required => false, :default => Hash.new
+  public
+  def register
+    super
+    @csv_options = Hash[@csv_options.map{|(k, v)|[k.to_sym, v]}]
+  end
+  public
+  def receive(event)
+    return unless output?(event)
+    path = event.sprintf(@path)
+    fd = open(path)
+    csv_values = @fields.map {|name| get_value(name, event)}
+    fd.write(csv_values.to_csv(@csv_options))
+    flush(fd)
+    close_stale_files
+  end #def receive
+  private
+  def get_value(name, event)
+    val = event[name]
+    val.is_a?(Hash) ? LogStash::Json.dump(val) : val
+  end
+end # class LogStash::Outputs::CSV

data/logstash-output-csv.gemspec ADDED Viewed

@@ -0,0 +1,28 @@
+Gem::Specification.new do |s|
+  s.name            = 'logstash-output-csv'
+  s.version         = '0.1.0'
+  s.licenses        = ['Apache License (2.0)']
+  s.summary         = "Write events to disk in CSV or other delimited format"
+  s.description     = "Write events to disk in CSV or other delimited format"
+  s.authors         = ["Elasticsearch"]
+  s.email           = 'richard.pijnenburg@elasticsearch.com'
+  s.homepage        = "http://logstash.net/"
+  s.require_paths = ["lib"]
+  # Files
+  s.files = `git ls-files`.split($\)+::Dir.glob('vendor/*')
+  # Tests
+  s.test_files = s.files.grep(%r{^(test|spec|features)/})
+  # Special flag to let us know this is actually a logstash plugin
+  s.metadata = { "logstash_plugin" => "true", "group" => "output" }
+  # Gem dependencies
+  s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
+  s.add_runtime_dependency 'logstash-output-file'
+  s.add_runtime_dependency 'logstash-filter-json'
+end

data/rakelib/publish.rake ADDED Viewed

@@ -0,0 +1,9 @@
+require "gem_publisher"
+desc "Publish gem to RubyGems.org"
+task :publish_gem do |t|
+  gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
+  gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
+  puts "Published #{gem}" if gem
+end

data/rakelib/vendor.rake ADDED Viewed

@@ -0,0 +1,169 @@
+require "net/http"
+require "uri"
+require "digest/sha1"
+def vendor(*args)
+  return File.join("vendor", *args)
+end
+directory "vendor/" => ["vendor"] do |task, args|
+  mkdir task.name
+end
+def fetch(url, sha1, output)
+  puts "Downloading #{url}"
+  actual_sha1 = download(url, output)
+  if actual_sha1 != sha1
+    fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
+  end
+end # def fetch
+def file_fetch(url, sha1)
+  filename = File.basename( URI(url).path )
+  output = "vendor/#{filename}"
+  task output => [ "vendor/" ] do
+    begin
+      actual_sha1 = file_sha1(output)
+      if actual_sha1 != sha1
+        fetch(url, sha1, output)
+      end
+    rescue Errno::ENOENT
+      fetch(url, sha1, output)
+    end
+  end.invoke
+  return output
+end
+def file_sha1(path)
+  digest = Digest::SHA1.new
+  fd = File.new(path, "r")
+  while true
+    begin
+      digest << fd.sysread(16384)
+    rescue EOFError
+      break
+    end
+  end
+  return digest.hexdigest
+ensure
+  fd.close if fd
+end
+def download(url, output)
+  uri = URI(url)
+  digest = Digest::SHA1.new
+  tmp = "#{output}.tmp"
+  Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
+    request = Net::HTTP::Get.new(uri.path)
+    http.request(request) do |response|
+      fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
+      size = (response["content-length"].to_i || -1).to_f
+      count = 0
+      File.open(tmp, "w") do |fd|
+        response.read_body do |chunk|
+          fd.write(chunk)
+          digest << chunk
+          if size > 0 && $stdout.tty?
+            count += chunk.bytesize
+            $stdout.write(sprintf("\r%0.2f%%", count/size * 100))
+          end
+        end
+      end
+      $stdout.write("\r      \r") if $stdout.tty?
+    end
+  end
+  File.rename(tmp, output)
+  return digest.hexdigest
+rescue SocketError => e
+  puts "Failure while downloading #{url}: #{e}"
+  raise
+ensure
+  File.unlink(tmp) if File.exist?(tmp)
+end # def download
+def untar(tarball, &block)
+  require "archive/tar/minitar"
+  tgz = Zlib::GzipReader.new(File.open(tarball))
+  # Pull out typesdb
+  tar = Archive::Tar::Minitar::Input.open(tgz)
+  tar.each do |entry|
+    path = block.call(entry)
+    next if path.nil?
+    parent = File.dirname(path)
+    mkdir_p parent unless File.directory?(parent)
+    # Skip this file if the output file is the same size
+    if entry.directory?
+      mkdir path unless File.directory?(path)
+    else
+      entry_mode = entry.instance_eval { @mode } & 0777
+      if File.exists?(path)
+        stat = File.stat(path)
+        # TODO(sissel): Submit a patch to archive-tar-minitar upstream to
+        # expose headers in the entry.
+        entry_size = entry.instance_eval { @size }
+        # If file sizes are same, skip writing.
+        next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
+      end
+      puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
+      File.open(path, "w") do |fd|
+        # eof? check lets us skip empty files. Necessary because the API provided by
+        # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
+        # IO object. Something about empty files in this EntryStream causes
+        # IO.copy_stream to throw "can't convert nil into String" on JRuby
+        # TODO(sissel): File a bug about this.
+        while !entry.eof?
+          chunk = entry.read(16384)
+          fd.write(chunk)
+        end
+          #IO.copy_stream(entry, fd)
+      end
+      File.chmod(entry_mode, path)
+    end
+  end
+  tar.close
+  File.unlink(tarball) if File.file?(tarball)
+end # def untar
+def ungz(file)
+  outpath = file.gsub('.gz', '')
+  tgz = Zlib::GzipReader.new(File.open(file))
+  begin
+    File.open(outpath, "w") do |out|
+      IO::copy_stream(tgz, out)
+    end
+    File.unlink(file)
+  rescue
+    File.unlink(outpath) if File.file?(outpath)
+   raise
+  end
+  tgz.close
+end
+desc "Process any vendor files required for this plugin"
+task "vendor" do |task, args|
+  @files.each do |file|
+    download = file_fetch(file['url'], file['sha1'])
+    if download =~ /.tar.gz/
+      prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
+      untar(download) do |entry|
+        if !file['files'].nil?
+          next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
+          out = entry.full_name.split("/").last
+        end
+        File.join('vendor', out)
+      end
+    elsif download =~ /.gz/
+      ungz(download)
+    end
+  end
+end

data/spec/outputs/csv_spec.rb ADDED Viewed

@@ -0,0 +1,266 @@
+require "csv"
+require "tempfile"
+require "spec_helper"
+require "logstash/outputs/csv"
+describe LogStash::Outputs::CSV do
+  describe "Write a single field to a csv file" do
+    tmpfile = Tempfile.new('logstash-spec-output-csv')
+    config <<-CONFIG
+      input {
+        generator {
+          add_field => ["foo","bar"]
+          count => 1
+        }
+      }
+      output {
+        csv {
+          path => "#{tmpfile.path}"
+          fields => "foo"
+        }
+      }
+    CONFIG
+    agent do
+      lines = File.readlines(tmpfile.path)
+      insist {lines.count} == 1
+      insist {lines[0]} == "bar\n"
+    end
+  end
+  describe "write multiple fields and lines to a csv file" do
+    tmpfile = Tempfile.new('logstash-spec-output-csv')
+    config <<-CONFIG
+      input {
+        generator {
+          add_field => ["foo", "bar", "baz", "quux"]
+          count => 2
+        }
+      }
+      output {
+        csv {
+          path => "#{tmpfile.path}"
+          fields => ["foo", "baz"]
+        }
+      }
+    CONFIG
+    agent do
+      lines = File.readlines(tmpfile.path)
+      insist {lines.count} == 2
+      insist {lines[0]} == "bar,quux\n"
+      insist {lines[1]} == "bar,quux\n"
+    end
+  end
+  describe "missing event fields are empty in csv" do
+    tmpfile = Tempfile.new('logstash-spec-output-csv')
+    config <<-CONFIG
+      input {
+        generator {
+          add_field => ["foo","bar", "baz", "quux"]
+          count => 1
+        }
+      }
+      output {
+        csv {
+          path => "#{tmpfile.path}"
+          fields => ["foo", "not_there", "baz"]
+        }
+      }
+    CONFIG
+    agent do
+      lines = File.readlines(tmpfile.path)
+      insist {lines.count} == 1
+      insist {lines[0]} == "bar,,quux\n"
+    end
+  end
+  describe "commas are quoted properly" do
+    tmpfile = Tempfile.new('logstash-spec-output-csv')
+    config <<-CONFIG
+      input {
+        generator {
+          add_field => ["foo","one,two", "baz", "quux"]
+          count => 1
+        }
+      }
+      output {
+        csv {
+          path => "#{tmpfile.path}"
+          fields => ["foo", "baz"]
+        }
+      }
+    CONFIG
+    agent do
+      lines = File.readlines(tmpfile.path)
+      insist {lines.count} == 1
+      insist {lines[0]} == "\"one,two\",quux\n"
+    end
+  end
+  describe "new lines are quoted properly" do
+    tmpfile = Tempfile.new('logstash-spec-output-csv')
+    config <<-CONFIG
+      input {
+        generator {
+          add_field => ["foo","one\ntwo", "baz", "quux"]
+          count => 1
+        }
+      }
+      output {
+        csv {
+          path => "#{tmpfile.path}"
+          fields => ["foo", "baz"]
+        }
+      }
+    CONFIG
+    agent do
+      lines = CSV.read(tmpfile.path)
+      insist {lines.count} == 1
+      insist {lines[0][0]} == "one\ntwo"
+    end
+  end
+  describe "fields that are are objects are written as JSON" do
+    tmpfile = Tempfile.new('logstash-spec-output-csv')
+    config <<-CONFIG
+      input {
+        generator {
+          message => '{"foo":{"one":"two"},"baz": "quux"}'
+          count => 1
+        }
+      }
+      filter {
+        json { source => "message"}
+      }
+      output {
+        csv {
+          path => "#{tmpfile.path}"
+          fields => ["foo", "baz"]
+        }
+      }
+    CONFIG
+    agent do
+      lines = CSV.read(tmpfile.path)
+      insist {lines.count} == 1
+      insist {lines[0][0]} == '{"one":"two"}'
+    end
+  end
+  describe "can address nested field using field reference syntax" do
+    tmpfile = Tempfile.new('logstash-spec-output-csv')
+    config <<-CONFIG
+      input {
+        generator {
+          message => '{"foo":{"one":"two"},"baz": "quux"}'
+          count => 1
+        }
+      }
+      filter {
+        json { source => "message"}
+      }
+      output {
+        csv {
+          path => "#{tmpfile.path}"
+          fields => ["[foo][one]", "baz"]
+        }
+      }
+    CONFIG
+    agent do
+      lines = CSV.read(tmpfile.path)
+      insist {lines.count} == 1
+      insist {lines[0][0]} == "two"
+      insist {lines[0][1]} == "quux"
+    end
+  end
+  describe "missing nested field is blank" do
+    tmpfile = Tempfile.new('logstash-spec-output-csv')
+    config <<-CONFIG
+      input {
+        generator {
+          message => '{"foo":{"one":"two"},"baz": "quux"}'
+          count => 1
+        }
+      }
+      filter {
+        json { source => "message"}
+      }
+      output {
+        csv {
+          path => "#{tmpfile.path}"
+          fields => ["[foo][missing]", "baz"]
+        }
+      }
+    CONFIG
+    agent do
+      lines = File.readlines(tmpfile.path)
+      insist {lines.count} == 1
+      insist {lines[0]} == ",quux\n"
+    end
+  end
+  describe "can choose field seperator" do
+    tmpfile = Tempfile.new('logstash-spec-output-csv')
+    config <<-CONFIG
+      input {
+        generator {
+          message => '{"foo":"one","bar": "two"}'
+          count => 1
+        }
+      }
+      filter {
+        json { source => "message"}
+      }
+      output {
+        csv {
+          path => "#{tmpfile.path}"
+          fields => ["foo", "bar"]
+          csv_options => {"col_sep" => "|"}
+        }
+      }
+    CONFIG
+    agent do
+      lines = File.readlines(tmpfile.path)
+      insist {lines.count} == 1
+      insist {lines[0]} == "one|two\n"
+    end
+  end
+  describe "can choose line seperator" do
+    tmpfile = Tempfile.new('logstash-spec-output-csv')
+    config <<-CONFIG
+      input {
+        generator {
+          message => '{"foo":"one","bar": "two"}'
+          count => 2
+        }
+      }
+      filter {
+        json { source => "message"}
+      }
+      output {
+        csv {
+          path => "#{tmpfile.path}"
+          fields => ["foo", "bar"]
+          csv_options => {"col_sep" => "|" "row_sep" => "\t"}
+        }
+      }
+    CONFIG
+    agent do
+      lines = File.readlines(tmpfile.path)
+      insist {lines.count} == 1
+      insist {lines[0]} == "one|two\tone|two\t"
+    end
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,103 @@
+--- !ruby/object:Gem::Specification
+name: logstash-output-csv
+version: !ruby/object:Gem::Version
+  version: 0.1.0
+platform: ruby
+authors:
+- Elasticsearch
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2014-11-10 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: logstash
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: 1.4.0
+    - - <
+      - !ruby/object:Gem::Version
+        version: 2.0.0
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: 1.4.0
+    - - <
+      - !ruby/object:Gem::Version
+        version: 2.0.0
+- !ruby/object:Gem::Dependency
+  name: logstash-output-file
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: logstash-filter-json
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+description: Write events to disk in CSV or other delimited format
+email: richard.pijnenburg@elasticsearch.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- .gitignore
+- Gemfile
+- LICENSE
+- Rakefile
+- lib/logstash/outputs/csv.rb
+- logstash-output-csv.gemspec
+- rakelib/publish.rake
+- rakelib/vendor.rake
+- spec/outputs/csv_spec.rb
+homepage: http://logstash.net/
+licenses:
+- Apache License (2.0)
+metadata:
+  logstash_plugin: 'true'
+  group: output
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.4.1
+signing_key:
+specification_version: 4
+summary: Write events to disk in CSV or other delimited format
+test_files:
+- spec/outputs/csv_spec.rb