logstash-input-elasticsearch 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ ZGM3OGYyOWE4NTc1ODgxNGFmOTBkNzI1MWExYmMyOWExZTZlZjQyNg==
5
+ data.tar.gz: !binary |-
6
+ YjczYjE1ZWRhMjg4NmMzNjg0M2EwZTExMjEyNDI4YTk4MzU1MmZhYw==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ NDNiNmFhMDRjMjEwMjAyMDg3NmE3ZmM2M2I4MzdiMThhMDI0Mjk0YmJiYWY1
10
+ NzFjNjM1YTcyOWYyY2NkMzkwMTVhOGQ5N2NmNTZjZGVmODQ3YTYwN2VmMDAy
11
+ MGM5NTViNThhNzJmNWU4NWYyOTg0MTk2NjQwOWFkYzY5Zjk2YTM=
12
+ data.tar.gz: !binary |-
13
+ NGYxYjNlY2I4NjJiNWU1YTkyZjYwYmNjYzFhNGE3ZDI1MWY3N2VlMTFjNjBm
14
+ OGEzNWUyZjQ2Yjc0ZmJmNGU0NTVmYTE2Y2JhMTlhMDgwY2UzZjBhZWFkZTI4
15
+ ZmYwNTk4MmNiMjRhYWVlMzEyNzEyMDI0MjYyMTM3OTQwZDY3MWE=
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ Gemfile.lock
3
+ .bundle
4
+ vendor
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'http://rubygems.org'
2
+ gem 'rake'
3
+ gem 'gem_publisher'
4
+ gem 'archive-tar-minitar'
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ @files=[]
2
+
3
+ task :default do
4
+ system("rake -T")
5
+ end
6
+
@@ -0,0 +1,134 @@
1
+ # encoding: utf-8
2
+ require "logstash/inputs/base"
3
+ require "logstash/namespace"
4
+ require "logstash/util/socket_peer"
5
+ require "logstash/json"
6
+
7
+ # Read from an Elasticsearch cluster, based on search query results.
8
+ # This is useful for replaying test logs, reindexing, etc.
9
+ #
10
+ # Example:
11
+ #
12
+ # input {
13
+ # # Read all documents from Elasticsearch matching the given query
14
+ # elasticsearch {
15
+ # host => "localhost"
16
+ # query => "ERROR"
17
+ # }
18
+ # }
19
+ #
20
+ # This would create an Elasticsearch query with the following format:
21
+ #
22
+ # http://localhost:9200/logstash-*/_search?q=ERROR&scroll=1m&size=1000
23
+ #
24
+ # * TODO(sissel): Option to keep the index, type, and doc id so we can do reindexing?
25
+ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
26
+ config_name "elasticsearch"
27
+ milestone 1
28
+
29
+ default :codec, "json"
30
+
31
+ # The IP address or hostname of your Elasticsearch server.
32
+ config :host, :validate => :string, :required => true
33
+
34
+ # The HTTP port of your Elasticsearch server's REST interface.
35
+ config :port, :validate => :number, :default => 9200
36
+
37
+ # The index or alias to search.
38
+ config :index, :validate => :string, :default => "logstash-*"
39
+
40
+ # The query to be executed.
41
+ config :query, :validate => :string, :default => "*"
42
+
43
+ # Enable the Elasticsearch "scan" search type. This will disable
44
+ # sorting but increase speed and performance.
45
+ config :scan, :validate => :boolean, :default => true
46
+
47
+ # This allows you to set the maximum number of hits returned per scroll.
48
+ config :size, :validate => :number, :default => 1000
49
+
50
+ # This parameter controls the keepalive time in seconds of the scrolling
51
+ # request and initiates the scrolling process. The timeout applies per
52
+ # round trip (i.e. between the previous scan scroll request, to the next).
53
+ config :scroll, :validate => :string, :default => "1m"
54
+
55
+ public
56
+ def register
57
+ require "ftw"
58
+ @agent = FTW::Agent.new
59
+
60
+ params = {
61
+ "q" => @query,
62
+ "scroll" => @scroll,
63
+ "size" => "#{@size}",
64
+ }
65
+ params['search_type'] = "scan" if @scan
66
+
67
+ @search_url = "http://#{@host}:#{@port}/#{@index}/_search?#{encode(params)}"
68
+ @scroll_url = "http://#{@host}:#{@port}/_search/scroll?#{encode({"scroll" => @scroll})}"
69
+ end # def register
70
+
71
+ private
72
+ def encode(hash)
73
+ return hash.collect do |key, value|
74
+ CGI.escape(key) + "=" + CGI.escape(value)
75
+ end.join("&")
76
+ end # def encode
77
+
78
+ private
79
+ def execute_search_request
80
+ response = @agent.get!(@search_url)
81
+ json = ""
82
+ response.read_body { |c| json << c }
83
+ json
84
+ end
85
+
86
+ private
87
+ def execute_scroll_request(scroll_id)
88
+ response = @agent.post!(@scroll_url, :body => scroll_id)
89
+ json = ""
90
+ response.read_body { |c| json << c }
91
+ json
92
+ end
93
+
94
+ public
95
+ def run(output_queue)
96
+ result = LogStash::Json.load(execute_search_request)
97
+ scroll_id = result["_scroll_id"]
98
+
99
+ # When using the search_type=scan we don't get an initial result set.
100
+ # So we do it here.
101
+ if @scan
102
+ result = LogStash::Json.load(execute_scroll_request(scroll_id))
103
+ end
104
+
105
+ loop do
106
+ break if result.nil?
107
+ hits = result["hits"]["hits"]
108
+ break if hits.empty?
109
+
110
+ hits.each do |hit|
111
+ # Hack to make codecs work
112
+ @codec.decode(LogStash::Json.dump(hit["_source"])) do |event|
113
+ decorate(event)
114
+ output_queue << event
115
+ end
116
+ end
117
+
118
+ # Get the scroll id from the previous result set and use it for getting the next data set
119
+ scroll_id = result["_scroll_id"]
120
+
121
+ # Fetch the next result set
122
+ result = LogStash::Json.load(execute_scroll_request(scroll_id))
123
+
124
+ if result["error"]
125
+ @logger.warn(result["error"], :request => scroll_url)
126
+ # TODO(sissel): raise an error instead of breaking
127
+ break
128
+ end
129
+
130
+ end
131
+ rescue LogStash::ShutdownSignal
132
+ # Do nothing, let us quit.
133
+ end # def run
134
+ end # class LogStash::Inputs::Elasticsearch
@@ -0,0 +1,29 @@
1
+ Gem::Specification.new do |s|
2
+
3
+ s.name = 'logstash-input-elasticsearch'
4
+ s.version = '0.1.0'
5
+ s.licenses = ['Apache License (2.0)']
6
+ s.summary = "Read from an Elasticsearch cluster, based on search query results"
7
+ s.description = "Read from an Elasticsearch cluster, based on search query results"
8
+ s.authors = ["Elasticsearch"]
9
+ s.email = 'richard.pijnenburg@elasticsearch.com'
10
+ s.homepage = "http://logstash.net/"
11
+ s.require_paths = ["lib"]
12
+
13
+ # Files
14
+ s.files = `git ls-files`.split($\)+::Dir.glob('vendor/*')
15
+
16
+ # Tests
17
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
18
+
19
+ # Special flag to let us know this is actually a logstash plugin
20
+ s.metadata = { "logstash_plugin" => "true", "group" => "input" }
21
+
22
+ # Gem dependencies
23
+ s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
24
+
25
+ s.add_runtime_dependency 'ftw', ['~> 0.0.39']
26
+ s.add_runtime_dependency 'logstash-codec-json'
27
+
28
+ end
29
+
@@ -0,0 +1,9 @@
1
+ require "gem_publisher"
2
+
3
+ desc "Publish gem to RubyGems.org"
4
+ task :publish_gem do |t|
5
+ gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
6
+ gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
7
+ puts "Published #{gem}" if gem
8
+ end
9
+
@@ -0,0 +1,169 @@
1
+ require "net/http"
2
+ require "uri"
3
+ require "digest/sha1"
4
+
5
+ def vendor(*args)
6
+ return File.join("vendor", *args)
7
+ end
8
+
9
+ directory "vendor/" => ["vendor"] do |task, args|
10
+ mkdir task.name
11
+ end
12
+
13
+ def fetch(url, sha1, output)
14
+
15
+ puts "Downloading #{url}"
16
+ actual_sha1 = download(url, output)
17
+
18
+ if actual_sha1 != sha1
19
+ fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
20
+ end
21
+ end # def fetch
22
+
23
+ def file_fetch(url, sha1)
24
+ filename = File.basename( URI(url).path )
25
+ output = "vendor/#{filename}"
26
+ task output => [ "vendor/" ] do
27
+ begin
28
+ actual_sha1 = file_sha1(output)
29
+ if actual_sha1 != sha1
30
+ fetch(url, sha1, output)
31
+ end
32
+ rescue Errno::ENOENT
33
+ fetch(url, sha1, output)
34
+ end
35
+ end.invoke
36
+
37
+ return output
38
+ end
39
+
40
+ def file_sha1(path)
41
+ digest = Digest::SHA1.new
42
+ fd = File.new(path, "r")
43
+ while true
44
+ begin
45
+ digest << fd.sysread(16384)
46
+ rescue EOFError
47
+ break
48
+ end
49
+ end
50
+ return digest.hexdigest
51
+ ensure
52
+ fd.close if fd
53
+ end
54
+
55
+ def download(url, output)
56
+ uri = URI(url)
57
+ digest = Digest::SHA1.new
58
+ tmp = "#{output}.tmp"
59
+ Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
60
+ request = Net::HTTP::Get.new(uri.path)
61
+ http.request(request) do |response|
62
+ fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
63
+ size = (response["content-length"].to_i || -1).to_f
64
+ count = 0
65
+ File.open(tmp, "w") do |fd|
66
+ response.read_body do |chunk|
67
+ fd.write(chunk)
68
+ digest << chunk
69
+ if size > 0 && $stdout.tty?
70
+ count += chunk.bytesize
71
+ $stdout.write(sprintf("\r%0.2f%%", count/size * 100))
72
+ end
73
+ end
74
+ end
75
+ $stdout.write("\r \r") if $stdout.tty?
76
+ end
77
+ end
78
+
79
+ File.rename(tmp, output)
80
+
81
+ return digest.hexdigest
82
+ rescue SocketError => e
83
+ puts "Failure while downloading #{url}: #{e}"
84
+ raise
85
+ ensure
86
+ File.unlink(tmp) if File.exist?(tmp)
87
+ end # def download
88
+
89
+ def untar(tarball, &block)
90
+ require "archive/tar/minitar"
91
+ tgz = Zlib::GzipReader.new(File.open(tarball))
92
+ # Pull out typesdb
93
+ tar = Archive::Tar::Minitar::Input.open(tgz)
94
+ tar.each do |entry|
95
+ path = block.call(entry)
96
+ next if path.nil?
97
+ parent = File.dirname(path)
98
+
99
+ mkdir_p parent unless File.directory?(parent)
100
+
101
+ # Skip this file if the output file is the same size
102
+ if entry.directory?
103
+ mkdir path unless File.directory?(path)
104
+ else
105
+ entry_mode = entry.instance_eval { @mode } & 0777
106
+ if File.exists?(path)
107
+ stat = File.stat(path)
108
+ # TODO(sissel): Submit a patch to archive-tar-minitar upstream to
109
+ # expose headers in the entry.
110
+ entry_size = entry.instance_eval { @size }
111
+ # If file sizes are same, skip writing.
112
+ next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
113
+ end
114
+ puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
115
+ File.open(path, "w") do |fd|
116
+ # eof? check lets us skip empty files. Necessary because the API provided by
117
+ # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
118
+ # IO object. Something about empty files in this EntryStream causes
119
+ # IO.copy_stream to throw "can't convert nil into String" on JRuby
120
+ # TODO(sissel): File a bug about this.
121
+ while !entry.eof?
122
+ chunk = entry.read(16384)
123
+ fd.write(chunk)
124
+ end
125
+ #IO.copy_stream(entry, fd)
126
+ end
127
+ File.chmod(entry_mode, path)
128
+ end
129
+ end
130
+ tar.close
131
+ File.unlink(tarball) if File.file?(tarball)
132
+ end # def untar
133
+
134
+ def ungz(file)
135
+
136
+ outpath = file.gsub('.gz', '')
137
+ tgz = Zlib::GzipReader.new(File.open(file))
138
+ begin
139
+ File.open(outpath, "w") do |out|
140
+ IO::copy_stream(tgz, out)
141
+ end
142
+ File.unlink(file)
143
+ rescue
144
+ File.unlink(outpath) if File.file?(outpath)
145
+ raise
146
+ end
147
+ tgz.close
148
+ end
149
+
150
+ desc "Process any vendor files required for this plugin"
151
+ task "vendor" do |task, args|
152
+
153
+ @files.each do |file|
154
+ download = file_fetch(file['url'], file['sha1'])
155
+ if download =~ /.tar.gz/
156
+ prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
157
+ untar(download) do |entry|
158
+ if !file['files'].nil?
159
+ next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
160
+ out = entry.full_name.split("/").last
161
+ end
162
+ File.join('vendor', out)
163
+ end
164
+ elsif download =~ /.gz/
165
+ ungz(download)
166
+ end
167
+ end
168
+
169
+ end
@@ -0,0 +1,80 @@
1
+ require "spec_helper"
2
+ require "logstash/inputs/elasticsearch"
3
+
4
+ describe "inputs/elasticsearch" do
5
+
6
+
7
+ search_response = <<-RESPONSE
8
+ {
9
+ "_scroll_id":"xxx",
10
+ "took":5,
11
+ "timed_out":false,
12
+ "_shards":{"total":15,"successful":15,"failed":0},
13
+ "hits":{
14
+ "total":1000050,
15
+ "max_score":1.0,
16
+ "hits":[
17
+ {
18
+ "_index":"logstash2",
19
+ "_type":"logs",
20
+ "_id":"AmaqL7VuSWKF-F6N_Gz72g",
21
+ "_score":1.0,
22
+ "_source" : {
23
+ "message":"foobar",
24
+ "@version":"1",
25
+ "@timestamp":"2014-05-19T21:08:39.000Z",
26
+ "host":"colin-mbp13r"
27
+ }
28
+ }
29
+ ]
30
+ }
31
+ }
32
+ RESPONSE
33
+
34
+ scroll_response = <<-RESPONSE
35
+ {
36
+ "hits":{
37
+ "hits":[]
38
+ }
39
+ }
40
+ RESPONSE
41
+
42
+ config <<-CONFIG
43
+ input {
44
+ elasticsearch {
45
+ host => "localhost"
46
+ scan => false
47
+ }
48
+ }
49
+ CONFIG
50
+
51
+ it "should retrieve json event from elasticseach" do
52
+ # I somewhat duplicated our "input" rspec extension because I needed to add mocks for the the actual ES calls
53
+ # and rspec expectations need to be in "it" statement but the "input" extension defines the "it"
54
+ # TODO(colin) see how we can improve our rspec extension to better integrate in these scenarios
55
+
56
+ expect_any_instance_of(LogStash::Inputs::Elasticsearch).to receive(:execute_search_request).and_return(search_response)
57
+ expect_any_instance_of(LogStash::Inputs::Elasticsearch).to receive(:execute_scroll_request).with(any_args).and_return(scroll_response)
58
+
59
+ pipeline = LogStash::Pipeline.new(config)
60
+ queue = Queue.new
61
+ pipeline.instance_eval do
62
+ @output_func = lambda { |event| queue << event }
63
+ end
64
+ pipeline_thread = Thread.new { pipeline.run }
65
+ event = queue.pop
66
+
67
+ insist { event["message"] } == "foobar"
68
+
69
+ # do not call pipeline.shutdown here, as it will stop the plugin execution randomly
70
+ # and maybe kill input before calling execute_scroll_request.
71
+ # TODO(colin) we should rework the pipeliene shutdown to allow a soft/clean shutdown mecanism,
72
+ # using a shutdown event which can be fed into each plugin queue and when the plugin sees it
73
+ # exits after completing its processing.
74
+ #
75
+ # pipeline.shutdown
76
+ #
77
+ # instead, since our scroll_response will terminate the plugin, we can just join the pipeline thread
78
+ pipeline_thread.join
79
+ end
80
+ end
metadata ADDED
@@ -0,0 +1,102 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: logstash-input-elasticsearch
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Elasticsearch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: logstash
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.4.0
20
+ - - <
21
+ - !ruby/object:Gem::Version
22
+ version: 2.0.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.4.0
30
+ - - <
31
+ - !ruby/object:Gem::Version
32
+ version: 2.0.0
33
+ - !ruby/object:Gem::Dependency
34
+ name: ftw
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ~>
38
+ - !ruby/object:Gem::Version
39
+ version: 0.0.39
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ~>
45
+ - !ruby/object:Gem::Version
46
+ version: 0.0.39
47
+ - !ruby/object:Gem::Dependency
48
+ name: logstash-codec-json
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ! '>='
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ description: Read from an Elasticsearch cluster, based on search query results
62
+ email: richard.pijnenburg@elasticsearch.com
63
+ executables: []
64
+ extensions: []
65
+ extra_rdoc_files: []
66
+ files:
67
+ - .gitignore
68
+ - Gemfile
69
+ - Rakefile
70
+ - lib/logstash/inputs/elasticsearch.rb
71
+ - logstash-input-elasticsearch.gemspec
72
+ - rakelib/publish.rake
73
+ - rakelib/vendor.rake
74
+ - spec/inputs/elasticsearch_spec.rb
75
+ homepage: http://logstash.net/
76
+ licenses:
77
+ - Apache License (2.0)
78
+ metadata:
79
+ logstash_plugin: 'true'
80
+ group: input
81
+ post_install_message:
82
+ rdoc_options: []
83
+ require_paths:
84
+ - lib
85
+ required_ruby_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ! '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ required_rubygems_version: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ! '>='
93
+ - !ruby/object:Gem::Version
94
+ version: '0'
95
+ requirements: []
96
+ rubyforge_project:
97
+ rubygems_version: 2.4.1
98
+ signing_key:
99
+ specification_version: 4
100
+ summary: Read from an Elasticsearch cluster, based on search query results
101
+ test_files:
102
+ - spec/inputs/elasticsearch_spec.rb