RubyGems - logstash-input-elasticsearch - Versions diffs - 0.1.0 - Mend

logstash-input-elasticsearch 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +15 -0
data/.gitignore +4 -0
data/Gemfile +4 -0
data/Rakefile +6 -0
data/lib/logstash/inputs/elasticsearch.rb +134 -0
data/logstash-input-elasticsearch.gemspec +29 -0
data/rakelib/publish.rake +9 -0
data/rakelib/vendor.rake +169 -0
data/spec/inputs/elasticsearch_spec.rb +80 -0
metadata +102 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,15 @@
+---
+!binary "U0hBMQ==":
+  metadata.gz: !binary |-
+    ZGM3OGYyOWE4NTc1ODgxNGFmOTBkNzI1MWExYmMyOWExZTZlZjQyNg==
+  data.tar.gz: !binary |-
+    YjczYjE1ZWRhMjg4NmMzNjg0M2EwZTExMjEyNDI4YTk4MzU1MmZhYw==
+SHA512:
+  metadata.gz: !binary |-
+    NDNiNmFhMDRjMjEwMjAyMDg3NmE3ZmM2M2I4MzdiMThhMDI0Mjk0YmJiYWY1
+    NzFjNjM1YTcyOWYyY2NkMzkwMTVhOGQ5N2NmNTZjZGVmODQ3YTYwN2VmMDAy
+    MGM5NTViNThhNzJmNWU4NWYyOTg0MTk2NjQwOWFkYzY5Zjk2YTM=
+  data.tar.gz: !binary |-
+    NGYxYjNlY2I4NjJiNWU1YTkyZjYwYmNjYzFhNGE3ZDI1MWY3N2VlMTFjNjBm
+    OGEzNWUyZjQ2Yjc0ZmJmNGU0NTVmYTE2Y2JhMTlhMDgwY2UzZjBhZWFkZTI4
+    ZmYwNTk4MmNiMjRhYWVlMzEyNzEyMDI0MjYyMTM3OTQwZDY3MWE=

data/.gitignore ADDED Viewed

@@ -0,0 +1,4 @@
+*.gem
+Gemfile.lock
+.bundle
+vendor

data/Gemfile ADDED Viewed

@@ -0,0 +1,4 @@
+source 'http://rubygems.org'
+gem 'rake'
+gem 'gem_publisher'
+gem 'archive-tar-minitar'

data/Rakefile ADDED Viewed

@@ -0,0 +1,6 @@
+@files=[]
+task :default do
+  system("rake -T")
+end

data/lib/logstash/inputs/elasticsearch.rb ADDED Viewed

@@ -0,0 +1,134 @@
+# encoding: utf-8
+require "logstash/inputs/base"
+require "logstash/namespace"
+require "logstash/util/socket_peer"
+require "logstash/json"
+# Read from an Elasticsearch cluster, based on search query results.
+# This is useful for replaying test logs, reindexing, etc.
+#
+# Example:
+#
+#     input {
+#       # Read all documents from Elasticsearch matching the given query
+#       elasticsearch {
+#         host => "localhost"
+#         query => "ERROR"
+#       }
+#     }
+#
+# This would create an Elasticsearch query with the following format:
+#
+#     http://localhost:9200/logstash-*/_search?q=ERROR&scroll=1m&size=1000
+#
+# * TODO(sissel): Option to keep the index, type, and doc id so we can do reindexing?
+class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
+  config_name "elasticsearch"
+  milestone 1
+  default :codec, "json"
+  # The IP address or hostname of your Elasticsearch server.
+  config :host, :validate => :string, :required => true
+  # The HTTP port of your Elasticsearch server's REST interface.
+  config :port, :validate => :number, :default => 9200
+  # The index or alias to search.
+  config :index, :validate => :string, :default => "logstash-*"
+  # The query to be executed.
+  config :query, :validate => :string, :default => "*"
+  # Enable the Elasticsearch "scan" search type.  This will disable
+  # sorting but increase speed and performance.
+  config :scan, :validate => :boolean, :default => true
+  # This allows you to set the maximum number of hits returned per scroll.
+  config :size, :validate => :number, :default => 1000
+  # This parameter controls the keepalive time in seconds of the scrolling
+  # request and initiates the scrolling process. The timeout applies per
+  # round trip (i.e. between the previous scan scroll request, to the next).
+  config :scroll, :validate => :string, :default => "1m"
+  public
+  def register
+    require "ftw"
+    @agent = FTW::Agent.new
+    params = {
+      "q" => @query,
+      "scroll" => @scroll,
+      "size" => "#{@size}",
+    }
+    params['search_type'] = "scan" if @scan
+    @search_url = "http://#{@host}:#{@port}/#{@index}/_search?#{encode(params)}"
+    @scroll_url = "http://#{@host}:#{@port}/_search/scroll?#{encode({"scroll" => @scroll})}"
+  end # def register
+  private
+  def encode(hash)
+    return hash.collect do |key, value|
+      CGI.escape(key) + "=" + CGI.escape(value)
+    end.join("&")
+  end # def encode
+  private
+  def execute_search_request
+    response = @agent.get!(@search_url)
+    json = ""
+    response.read_body { |c| json << c }
+    json
+  end
+  private
+  def execute_scroll_request(scroll_id)
+    response = @agent.post!(@scroll_url, :body => scroll_id)
+    json = ""
+    response.read_body { |c| json << c }
+    json
+  end
+  public
+  def run(output_queue)
+    result = LogStash::Json.load(execute_search_request)
+    scroll_id = result["_scroll_id"]
+    # When using the search_type=scan we don't get an initial result set.
+    # So we do it here.
+    if @scan
+      result = LogStash::Json.load(execute_scroll_request(scroll_id))
+    end
+    loop do
+      break if result.nil?
+      hits = result["hits"]["hits"]
+      break if hits.empty?
+      hits.each do |hit|
+        # Hack to make codecs work
+        @codec.decode(LogStash::Json.dump(hit["_source"])) do |event|
+          decorate(event)
+          output_queue << event
+        end
+      end
+      # Get the scroll id from the previous result set and use it for getting the next data set
+      scroll_id = result["_scroll_id"]
+      # Fetch the next result set
+      result = LogStash::Json.load(execute_scroll_request(scroll_id))
+      if result["error"]
+        @logger.warn(result["error"], :request => scroll_url)
+        # TODO(sissel): raise an error instead of breaking
+        break
+      end
+    end
+  rescue LogStash::ShutdownSignal
+    # Do nothing, let us quit.
+  end # def run
+end # class LogStash::Inputs::Elasticsearch

data/logstash-input-elasticsearch.gemspec ADDED Viewed

@@ -0,0 +1,29 @@
+Gem::Specification.new do |s|
+  s.name            = 'logstash-input-elasticsearch'
+  s.version         = '0.1.0'
+  s.licenses        = ['Apache License (2.0)']
+  s.summary         = "Read from an Elasticsearch cluster, based on search query results"
+  s.description     = "Read from an Elasticsearch cluster, based on search query results"
+  s.authors         = ["Elasticsearch"]
+  s.email           = 'richard.pijnenburg@elasticsearch.com'
+  s.homepage        = "http://logstash.net/"
+  s.require_paths = ["lib"]
+  # Files
+  s.files = `git ls-files`.split($\)+::Dir.glob('vendor/*')
+  # Tests
+  s.test_files = s.files.grep(%r{^(test|spec|features)/})
+  # Special flag to let us know this is actually a logstash plugin
+  s.metadata = { "logstash_plugin" => "true", "group" => "input" }
+  # Gem dependencies
+  s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
+  s.add_runtime_dependency 'ftw', ['~> 0.0.39']
+  s.add_runtime_dependency 'logstash-codec-json'
+end

data/rakelib/publish.rake ADDED Viewed

@@ -0,0 +1,9 @@
+require "gem_publisher"
+desc "Publish gem to RubyGems.org"
+task :publish_gem do |t|
+  gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
+  gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
+  puts "Published #{gem}" if gem
+end

data/rakelib/vendor.rake ADDED Viewed

@@ -0,0 +1,169 @@
+require "net/http"
+require "uri"
+require "digest/sha1"
+def vendor(*args)
+  return File.join("vendor", *args)
+end
+directory "vendor/" => ["vendor"] do |task, args|
+  mkdir task.name
+end
+def fetch(url, sha1, output)
+  puts "Downloading #{url}"
+  actual_sha1 = download(url, output)
+  if actual_sha1 != sha1
+    fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
+  end
+end # def fetch
+def file_fetch(url, sha1)
+  filename = File.basename( URI(url).path )
+  output = "vendor/#{filename}"
+  task output => [ "vendor/" ] do
+    begin
+      actual_sha1 = file_sha1(output)
+      if actual_sha1 != sha1
+        fetch(url, sha1, output)
+      end
+    rescue Errno::ENOENT
+      fetch(url, sha1, output)
+    end
+  end.invoke
+  return output
+end
+def file_sha1(path)
+  digest = Digest::SHA1.new
+  fd = File.new(path, "r")
+  while true
+    begin
+      digest << fd.sysread(16384)
+    rescue EOFError
+      break
+    end
+  end
+  return digest.hexdigest
+ensure
+  fd.close if fd
+end
+def download(url, output)
+  uri = URI(url)
+  digest = Digest::SHA1.new
+  tmp = "#{output}.tmp"
+  Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
+    request = Net::HTTP::Get.new(uri.path)
+    http.request(request) do |response|
+      fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
+      size = (response["content-length"].to_i || -1).to_f
+      count = 0
+      File.open(tmp, "w") do |fd|
+        response.read_body do |chunk|
+          fd.write(chunk)
+          digest << chunk
+          if size > 0 && $stdout.tty?
+            count += chunk.bytesize
+            $stdout.write(sprintf("\r%0.2f%%", count/size * 100))
+          end
+        end
+      end
+      $stdout.write("\r      \r") if $stdout.tty?
+    end
+  end
+  File.rename(tmp, output)
+  return digest.hexdigest
+rescue SocketError => e
+  puts "Failure while downloading #{url}: #{e}"
+  raise
+ensure
+  File.unlink(tmp) if File.exist?(tmp)
+end # def download
+def untar(tarball, &block)
+  require "archive/tar/minitar"
+  tgz = Zlib::GzipReader.new(File.open(tarball))
+  # Pull out typesdb
+  tar = Archive::Tar::Minitar::Input.open(tgz)
+  tar.each do |entry|
+    path = block.call(entry)
+    next if path.nil?
+    parent = File.dirname(path)
+    mkdir_p parent unless File.directory?(parent)
+    # Skip this file if the output file is the same size
+    if entry.directory?
+      mkdir path unless File.directory?(path)
+    else
+      entry_mode = entry.instance_eval { @mode } & 0777
+      if File.exists?(path)
+        stat = File.stat(path)
+        # TODO(sissel): Submit a patch to archive-tar-minitar upstream to
+        # expose headers in the entry.
+        entry_size = entry.instance_eval { @size }
+        # If file sizes are same, skip writing.
+        next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
+      end
+      puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
+      File.open(path, "w") do |fd|
+        # eof? check lets us skip empty files. Necessary because the API provided by
+        # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
+        # IO object. Something about empty files in this EntryStream causes
+        # IO.copy_stream to throw "can't convert nil into String" on JRuby
+        # TODO(sissel): File a bug about this.
+        while !entry.eof?
+          chunk = entry.read(16384)
+          fd.write(chunk)
+        end
+          #IO.copy_stream(entry, fd)
+      end
+      File.chmod(entry_mode, path)
+    end
+  end
+  tar.close
+  File.unlink(tarball) if File.file?(tarball)
+end # def untar
+def ungz(file)
+  outpath = file.gsub('.gz', '')
+  tgz = Zlib::GzipReader.new(File.open(file))
+  begin
+    File.open(outpath, "w") do |out|
+      IO::copy_stream(tgz, out)
+    end
+    File.unlink(file)
+  rescue
+    File.unlink(outpath) if File.file?(outpath)
+   raise
+  end
+  tgz.close
+end
+desc "Process any vendor files required for this plugin"
+task "vendor" do |task, args|
+  @files.each do |file|
+    download = file_fetch(file['url'], file['sha1'])
+    if download =~ /.tar.gz/
+      prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
+      untar(download) do |entry|
+        if !file['files'].nil?
+          next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
+          out = entry.full_name.split("/").last
+        end
+        File.join('vendor', out)
+      end
+    elsif download =~ /.gz/
+      ungz(download)
+    end
+  end
+end

data/spec/inputs/elasticsearch_spec.rb ADDED Viewed

@@ -0,0 +1,80 @@
+require "spec_helper"
+require "logstash/inputs/elasticsearch"
+describe "inputs/elasticsearch" do
+  search_response = <<-RESPONSE
+    {
+      "_scroll_id":"xxx",
+      "took":5,
+      "timed_out":false,
+      "_shards":{"total":15,"successful":15,"failed":0},
+      "hits":{
+        "total":1000050,
+        "max_score":1.0,
+        "hits":[
+          {
+            "_index":"logstash2",
+            "_type":"logs",
+            "_id":"AmaqL7VuSWKF-F6N_Gz72g",
+            "_score":1.0,
+            "_source" : {
+              "message":"foobar",
+              "@version":"1",
+              "@timestamp":"2014-05-19T21:08:39.000Z",
+              "host":"colin-mbp13r"
+            }
+          }
+        ]
+      }
+    }
+  RESPONSE
+  scroll_response = <<-RESPONSE
+    {
+      "hits":{
+        "hits":[]
+      }
+    }
+  RESPONSE
+  config <<-CONFIG
+    input {
+      elasticsearch {
+        host => "localhost"
+        scan => false
+      }
+    }
+  CONFIG
+  it "should retrieve json event from elasticseach" do
+    # I somewhat duplicated our "input" rspec extension because I needed to add mocks for the the actual ES calls
+    # and rspec expectations need to be in "it" statement but the "input" extension defines the "it"
+    # TODO(colin) see how we can improve our rspec extension to better integrate in these scenarios
+    expect_any_instance_of(LogStash::Inputs::Elasticsearch).to receive(:execute_search_request).and_return(search_response)
+    expect_any_instance_of(LogStash::Inputs::Elasticsearch).to receive(:execute_scroll_request).with(any_args).and_return(scroll_response)
+    pipeline = LogStash::Pipeline.new(config)
+    queue = Queue.new
+    pipeline.instance_eval do
+      @output_func = lambda { |event| queue << event }
+    end
+    pipeline_thread = Thread.new { pipeline.run }
+    event = queue.pop
+    insist { event["message"] } == "foobar"
+    # do not call pipeline.shutdown here, as it will stop the plugin execution randomly
+    # and maybe kill input before calling execute_scroll_request.
+    # TODO(colin) we should rework the pipeliene shutdown to allow a soft/clean shutdown mecanism,
+    # using a shutdown event which can be fed into each plugin queue and when the plugin sees it
+    # exits after completing its processing.
+    #
+    # pipeline.shutdown
+    #
+    # instead, since our scroll_response will terminate the plugin, we can just join the pipeline thread
+    pipeline_thread.join
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,102 @@
+--- !ruby/object:Gem::Specification
+name: logstash-input-elasticsearch
+version: !ruby/object:Gem::Version
+  version: 0.1.0
+platform: ruby
+authors:
+- Elasticsearch
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2014-11-03 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: logstash
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: 1.4.0
+    - - <
+      - !ruby/object:Gem::Version
+        version: 2.0.0
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: 1.4.0
+    - - <
+      - !ruby/object:Gem::Version
+        version: 2.0.0
+- !ruby/object:Gem::Dependency
+  name: ftw
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: 0.0.39
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: 0.0.39
+- !ruby/object:Gem::Dependency
+  name: logstash-codec-json
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+description: Read from an Elasticsearch cluster, based on search query results
+email: richard.pijnenburg@elasticsearch.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- .gitignore
+- Gemfile
+- Rakefile
+- lib/logstash/inputs/elasticsearch.rb
+- logstash-input-elasticsearch.gemspec
+- rakelib/publish.rake
+- rakelib/vendor.rake
+- spec/inputs/elasticsearch_spec.rb
+homepage: http://logstash.net/
+licenses:
+- Apache License (2.0)
+metadata:
+  logstash_plugin: 'true'
+  group: input
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.4.1
+signing_key:
+specification_version: 4
+summary: Read from an Elasticsearch cluster, based on search query results
+test_files:
+- spec/inputs/elasticsearch_spec.rb