logstash-input-elasticsearch 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ ZGM3OGYyOWE4NTc1ODgxNGFmOTBkNzI1MWExYmMyOWExZTZlZjQyNg==
5
+ data.tar.gz: !binary |-
6
+ YjczYjE1ZWRhMjg4NmMzNjg0M2EwZTExMjEyNDI4YTk4MzU1MmZhYw==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ NDNiNmFhMDRjMjEwMjAyMDg3NmE3ZmM2M2I4MzdiMThhMDI0Mjk0YmJiYWY1
10
+ NzFjNjM1YTcyOWYyY2NkMzkwMTVhOGQ5N2NmNTZjZGVmODQ3YTYwN2VmMDAy
11
+ MGM5NTViNThhNzJmNWU4NWYyOTg0MTk2NjQwOWFkYzY5Zjk2YTM=
12
+ data.tar.gz: !binary |-
13
+ NGYxYjNlY2I4NjJiNWU1YTkyZjYwYmNjYzFhNGE3ZDI1MWY3N2VlMTFjNjBm
14
+ OGEzNWUyZjQ2Yjc0ZmJmNGU0NTVmYTE2Y2JhMTlhMDgwY2UzZjBhZWFkZTI4
15
+ ZmYwNTk4MmNiMjRhYWVlMzEyNzEyMDI0MjYyMTM3OTQwZDY3MWE=
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ Gemfile.lock
3
+ .bundle
4
+ vendor
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'http://rubygems.org'
2
+ gem 'rake'
3
+ gem 'gem_publisher'
4
+ gem 'archive-tar-minitar'
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ @files=[]
2
+
3
+ task :default do
4
+ system("rake -T")
5
+ end
6
+
@@ -0,0 +1,134 @@
1
+ # encoding: utf-8
2
+ require "logstash/inputs/base"
3
+ require "logstash/namespace"
4
+ require "logstash/util/socket_peer"
5
+ require "logstash/json"
6
+
7
+ # Read from an Elasticsearch cluster, based on search query results.
8
+ # This is useful for replaying test logs, reindexing, etc.
9
+ #
10
+ # Example:
11
+ #
12
+ # input {
13
+ # # Read all documents from Elasticsearch matching the given query
14
+ # elasticsearch {
15
+ # host => "localhost"
16
+ # query => "ERROR"
17
+ # }
18
+ # }
19
+ #
20
+ # This would create an Elasticsearch query with the following format:
21
+ #
22
+ # http://localhost:9200/logstash-*/_search?q=ERROR&scroll=1m&size=1000
23
+ #
24
+ # * TODO(sissel): Option to keep the index, type, and doc id so we can do reindexing?
25
+ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
26
+ config_name "elasticsearch"
27
+ milestone 1
28
+
29
+ default :codec, "json"
30
+
31
+ # The IP address or hostname of your Elasticsearch server.
32
+ config :host, :validate => :string, :required => true
33
+
34
+ # The HTTP port of your Elasticsearch server's REST interface.
35
+ config :port, :validate => :number, :default => 9200
36
+
37
+ # The index or alias to search.
38
+ config :index, :validate => :string, :default => "logstash-*"
39
+
40
+ # The query to be executed.
41
+ config :query, :validate => :string, :default => "*"
42
+
43
+ # Enable the Elasticsearch "scan" search type. This will disable
44
+ # sorting but increase speed and performance.
45
+ config :scan, :validate => :boolean, :default => true
46
+
47
+ # This allows you to set the maximum number of hits returned per scroll.
48
+ config :size, :validate => :number, :default => 1000
49
+
50
+ # This parameter controls the keepalive time in seconds of the scrolling
51
+ # request and initiates the scrolling process. The timeout applies per
52
+ # round trip (i.e. between the previous scan scroll request, to the next).
53
+ config :scroll, :validate => :string, :default => "1m"
54
+
55
+ public
56
+ def register
57
+ require "ftw"
58
+ @agent = FTW::Agent.new
59
+
60
+ params = {
61
+ "q" => @query,
62
+ "scroll" => @scroll,
63
+ "size" => "#{@size}",
64
+ }
65
+ params['search_type'] = "scan" if @scan
66
+
67
+ @search_url = "http://#{@host}:#{@port}/#{@index}/_search?#{encode(params)}"
68
+ @scroll_url = "http://#{@host}:#{@port}/_search/scroll?#{encode({"scroll" => @scroll})}"
69
+ end # def register
70
+
71
+ private
72
+ def encode(hash)
73
+ return hash.collect do |key, value|
74
+ CGI.escape(key) + "=" + CGI.escape(value)
75
+ end.join("&")
76
+ end # def encode
77
+
78
+ private
79
+ def execute_search_request
80
+ response = @agent.get!(@search_url)
81
+ json = ""
82
+ response.read_body { |c| json << c }
83
+ json
84
+ end
85
+
86
+ private
87
+ def execute_scroll_request(scroll_id)
88
+ response = @agent.post!(@scroll_url, :body => scroll_id)
89
+ json = ""
90
+ response.read_body { |c| json << c }
91
+ json
92
+ end
93
+
94
+ public
95
+ def run(output_queue)
96
+ result = LogStash::Json.load(execute_search_request)
97
+ scroll_id = result["_scroll_id"]
98
+
99
+ # When using the search_type=scan we don't get an initial result set.
100
+ # So we do it here.
101
+ if @scan
102
+ result = LogStash::Json.load(execute_scroll_request(scroll_id))
103
+ end
104
+
105
+ loop do
106
+ break if result.nil?
107
+ hits = result["hits"]["hits"]
108
+ break if hits.empty?
109
+
110
+ hits.each do |hit|
111
+ # Hack to make codecs work
112
+ @codec.decode(LogStash::Json.dump(hit["_source"])) do |event|
113
+ decorate(event)
114
+ output_queue << event
115
+ end
116
+ end
117
+
118
+ # Get the scroll id from the previous result set and use it for getting the next data set
119
+ scroll_id = result["_scroll_id"]
120
+
121
+ # Fetch the next result set
122
+ result = LogStash::Json.load(execute_scroll_request(scroll_id))
123
+
124
+ if result["error"]
125
+ @logger.warn(result["error"], :request => scroll_url)
126
+ # TODO(sissel): raise an error instead of breaking
127
+ break
128
+ end
129
+
130
+ end
131
+ rescue LogStash::ShutdownSignal
132
+ # Do nothing, let us quit.
133
+ end # def run
134
+ end # class LogStash::Inputs::Elasticsearch
@@ -0,0 +1,29 @@
1
+ Gem::Specification.new do |s|
2
+
3
+ s.name = 'logstash-input-elasticsearch'
4
+ s.version = '0.1.0'
5
+ s.licenses = ['Apache License (2.0)']
6
+ s.summary = "Read from an Elasticsearch cluster, based on search query results"
7
+ s.description = "Read from an Elasticsearch cluster, based on search query results"
8
+ s.authors = ["Elasticsearch"]
9
+ s.email = 'richard.pijnenburg@elasticsearch.com'
10
+ s.homepage = "http://logstash.net/"
11
+ s.require_paths = ["lib"]
12
+
13
+ # Files
14
+ s.files = `git ls-files`.split($\)+::Dir.glob('vendor/*')
15
+
16
+ # Tests
17
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
18
+
19
+ # Special flag to let us know this is actually a logstash plugin
20
+ s.metadata = { "logstash_plugin" => "true", "group" => "input" }
21
+
22
+ # Gem dependencies
23
+ s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
24
+
25
+ s.add_runtime_dependency 'ftw', ['~> 0.0.39']
26
+ s.add_runtime_dependency 'logstash-codec-json'
27
+
28
+ end
29
+
@@ -0,0 +1,9 @@
1
+ require "gem_publisher"
2
+
3
+ desc "Publish gem to RubyGems.org"
4
+ task :publish_gem do |t|
5
+ gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
6
+ gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
7
+ puts "Published #{gem}" if gem
8
+ end
9
+
@@ -0,0 +1,169 @@
1
+ require "net/http"
2
+ require "uri"
3
+ require "digest/sha1"
4
+
5
+ def vendor(*args)
6
+ return File.join("vendor", *args)
7
+ end
8
+
9
+ directory "vendor/" => ["vendor"] do |task, args|
10
+ mkdir task.name
11
+ end
12
+
13
+ def fetch(url, sha1, output)
14
+
15
+ puts "Downloading #{url}"
16
+ actual_sha1 = download(url, output)
17
+
18
+ if actual_sha1 != sha1
19
+ fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
20
+ end
21
+ end # def fetch
22
+
23
+ def file_fetch(url, sha1)
24
+ filename = File.basename( URI(url).path )
25
+ output = "vendor/#{filename}"
26
+ task output => [ "vendor/" ] do
27
+ begin
28
+ actual_sha1 = file_sha1(output)
29
+ if actual_sha1 != sha1
30
+ fetch(url, sha1, output)
31
+ end
32
+ rescue Errno::ENOENT
33
+ fetch(url, sha1, output)
34
+ end
35
+ end.invoke
36
+
37
+ return output
38
+ end
39
+
40
+ def file_sha1(path)
41
+ digest = Digest::SHA1.new
42
+ fd = File.new(path, "r")
43
+ while true
44
+ begin
45
+ digest << fd.sysread(16384)
46
+ rescue EOFError
47
+ break
48
+ end
49
+ end
50
+ return digest.hexdigest
51
+ ensure
52
+ fd.close if fd
53
+ end
54
+
55
+ def download(url, output)
56
+ uri = URI(url)
57
+ digest = Digest::SHA1.new
58
+ tmp = "#{output}.tmp"
59
+ Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
60
+ request = Net::HTTP::Get.new(uri.path)
61
+ http.request(request) do |response|
62
+ fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
63
+ size = (response["content-length"].to_i || -1).to_f
64
+ count = 0
65
+ File.open(tmp, "w") do |fd|
66
+ response.read_body do |chunk|
67
+ fd.write(chunk)
68
+ digest << chunk
69
+ if size > 0 && $stdout.tty?
70
+ count += chunk.bytesize
71
+ $stdout.write(sprintf("\r%0.2f%%", count/size * 100))
72
+ end
73
+ end
74
+ end
75
+ $stdout.write("\r \r") if $stdout.tty?
76
+ end
77
+ end
78
+
79
+ File.rename(tmp, output)
80
+
81
+ return digest.hexdigest
82
+ rescue SocketError => e
83
+ puts "Failure while downloading #{url}: #{e}"
84
+ raise
85
+ ensure
86
+ File.unlink(tmp) if File.exist?(tmp)
87
+ end # def download
88
+
89
+ def untar(tarball, &block)
90
+ require "archive/tar/minitar"
91
+ tgz = Zlib::GzipReader.new(File.open(tarball))
92
+ # Pull out typesdb
93
+ tar = Archive::Tar::Minitar::Input.open(tgz)
94
+ tar.each do |entry|
95
+ path = block.call(entry)
96
+ next if path.nil?
97
+ parent = File.dirname(path)
98
+
99
+ mkdir_p parent unless File.directory?(parent)
100
+
101
+ # Skip this file if the output file is the same size
102
+ if entry.directory?
103
+ mkdir path unless File.directory?(path)
104
+ else
105
+ entry_mode = entry.instance_eval { @mode } & 0777
106
+ if File.exists?(path)
107
+ stat = File.stat(path)
108
+ # TODO(sissel): Submit a patch to archive-tar-minitar upstream to
109
+ # expose headers in the entry.
110
+ entry_size = entry.instance_eval { @size }
111
+ # If file sizes are same, skip writing.
112
+ next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
113
+ end
114
+ puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
115
+ File.open(path, "w") do |fd|
116
+ # eof? check lets us skip empty files. Necessary because the API provided by
117
+ # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
118
+ # IO object. Something about empty files in this EntryStream causes
119
+ # IO.copy_stream to throw "can't convert nil into String" on JRuby
120
+ # TODO(sissel): File a bug about this.
121
+ while !entry.eof?
122
+ chunk = entry.read(16384)
123
+ fd.write(chunk)
124
+ end
125
+ #IO.copy_stream(entry, fd)
126
+ end
127
+ File.chmod(entry_mode, path)
128
+ end
129
+ end
130
+ tar.close
131
+ File.unlink(tarball) if File.file?(tarball)
132
+ end # def untar
133
+
134
+ def ungz(file)
135
+
136
+ outpath = file.gsub('.gz', '')
137
+ tgz = Zlib::GzipReader.new(File.open(file))
138
+ begin
139
+ File.open(outpath, "w") do |out|
140
+ IO::copy_stream(tgz, out)
141
+ end
142
+ File.unlink(file)
143
+ rescue
144
+ File.unlink(outpath) if File.file?(outpath)
145
+ raise
146
+ end
147
+ tgz.close
148
+ end
149
+
150
+ desc "Process any vendor files required for this plugin"
151
+ task "vendor" do |task, args|
152
+
153
+ @files.each do |file|
154
+ download = file_fetch(file['url'], file['sha1'])
155
+ if download =~ /.tar.gz/
156
+ prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
157
+ untar(download) do |entry|
158
+ if !file['files'].nil?
159
+ next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
160
+ out = entry.full_name.split("/").last
161
+ end
162
+ File.join('vendor', out)
163
+ end
164
+ elsif download =~ /.gz/
165
+ ungz(download)
166
+ end
167
+ end
168
+
169
+ end
@@ -0,0 +1,80 @@
1
+ require "spec_helper"
2
+ require "logstash/inputs/elasticsearch"
3
+
4
+ describe "inputs/elasticsearch" do
5
+
6
+
7
+ search_response = <<-RESPONSE
8
+ {
9
+ "_scroll_id":"xxx",
10
+ "took":5,
11
+ "timed_out":false,
12
+ "_shards":{"total":15,"successful":15,"failed":0},
13
+ "hits":{
14
+ "total":1000050,
15
+ "max_score":1.0,
16
+ "hits":[
17
+ {
18
+ "_index":"logstash2",
19
+ "_type":"logs",
20
+ "_id":"AmaqL7VuSWKF-F6N_Gz72g",
21
+ "_score":1.0,
22
+ "_source" : {
23
+ "message":"foobar",
24
+ "@version":"1",
25
+ "@timestamp":"2014-05-19T21:08:39.000Z",
26
+ "host":"colin-mbp13r"
27
+ }
28
+ }
29
+ ]
30
+ }
31
+ }
32
+ RESPONSE
33
+
34
+ scroll_response = <<-RESPONSE
35
+ {
36
+ "hits":{
37
+ "hits":[]
38
+ }
39
+ }
40
+ RESPONSE
41
+
42
+ config <<-CONFIG
43
+ input {
44
+ elasticsearch {
45
+ host => "localhost"
46
+ scan => false
47
+ }
48
+ }
49
+ CONFIG
50
+
51
+ it "should retrieve json event from elasticseach" do
52
+ # I somewhat duplicated our "input" rspec extension because I needed to add mocks for the the actual ES calls
53
+ # and rspec expectations need to be in "it" statement but the "input" extension defines the "it"
54
+ # TODO(colin) see how we can improve our rspec extension to better integrate in these scenarios
55
+
56
+ expect_any_instance_of(LogStash::Inputs::Elasticsearch).to receive(:execute_search_request).and_return(search_response)
57
+ expect_any_instance_of(LogStash::Inputs::Elasticsearch).to receive(:execute_scroll_request).with(any_args).and_return(scroll_response)
58
+
59
+ pipeline = LogStash::Pipeline.new(config)
60
+ queue = Queue.new
61
+ pipeline.instance_eval do
62
+ @output_func = lambda { |event| queue << event }
63
+ end
64
+ pipeline_thread = Thread.new { pipeline.run }
65
+ event = queue.pop
66
+
67
+ insist { event["message"] } == "foobar"
68
+
69
+ # do not call pipeline.shutdown here, as it will stop the plugin execution randomly
70
+ # and maybe kill input before calling execute_scroll_request.
71
+ # TODO(colin) we should rework the pipeliene shutdown to allow a soft/clean shutdown mecanism,
72
+ # using a shutdown event which can be fed into each plugin queue and when the plugin sees it
73
+ # exits after completing its processing.
74
+ #
75
+ # pipeline.shutdown
76
+ #
77
+ # instead, since our scroll_response will terminate the plugin, we can just join the pipeline thread
78
+ pipeline_thread.join
79
+ end
80
+ end
metadata ADDED
@@ -0,0 +1,102 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: logstash-input-elasticsearch
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Elasticsearch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: logstash
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.4.0
20
+ - - <
21
+ - !ruby/object:Gem::Version
22
+ version: 2.0.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.4.0
30
+ - - <
31
+ - !ruby/object:Gem::Version
32
+ version: 2.0.0
33
+ - !ruby/object:Gem::Dependency
34
+ name: ftw
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ~>
38
+ - !ruby/object:Gem::Version
39
+ version: 0.0.39
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ~>
45
+ - !ruby/object:Gem::Version
46
+ version: 0.0.39
47
+ - !ruby/object:Gem::Dependency
48
+ name: logstash-codec-json
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ! '>='
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ description: Read from an Elasticsearch cluster, based on search query results
62
+ email: richard.pijnenburg@elasticsearch.com
63
+ executables: []
64
+ extensions: []
65
+ extra_rdoc_files: []
66
+ files:
67
+ - .gitignore
68
+ - Gemfile
69
+ - Rakefile
70
+ - lib/logstash/inputs/elasticsearch.rb
71
+ - logstash-input-elasticsearch.gemspec
72
+ - rakelib/publish.rake
73
+ - rakelib/vendor.rake
74
+ - spec/inputs/elasticsearch_spec.rb
75
+ homepage: http://logstash.net/
76
+ licenses:
77
+ - Apache License (2.0)
78
+ metadata:
79
+ logstash_plugin: 'true'
80
+ group: input
81
+ post_install_message:
82
+ rdoc_options: []
83
+ require_paths:
84
+ - lib
85
+ required_ruby_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ! '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ required_rubygems_version: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ! '>='
93
+ - !ruby/object:Gem::Version
94
+ version: '0'
95
+ requirements: []
96
+ rubyforge_project:
97
+ rubygems_version: 2.4.1
98
+ signing_key:
99
+ specification_version: 4
100
+ summary: Read from an Elasticsearch cluster, based on search query results
101
+ test_files:
102
+ - spec/inputs/elasticsearch_spec.rb