logstash-output-elasticsearch 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- M2UwZjRiN2E5NDZkZDU0NmUyYjMwNDk4ZWFjMGUxYmY1Y2IwZjMzYQ==
4
+ YTQ0NzIzNzc2MmVkMTYxM2Y5OTBhNTExZjQ4Nzc5ZGJlNDU5MDQwYg==
5
5
  data.tar.gz: !binary |-
6
- MWMxZmIyNjg2NTY1MzkwNDc0ZWE2OTg5MmQ1MWQ3M2QwZDdjM2RkMQ==
6
+ NGQxYTgwYzRlMjhkMDBkYWFjODM4ZDAyNjhmNWE2NDZjOWY1ZDliZA==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- YWEzNmU0ZWMxOTQxNTNjZThkOTkyZDU2ZTBmZjc5M2RhZDc1MjQ1ZDRkYjgx
10
- MmY4NGZlN2I3MTY0ZDhkZjNhMzM0YjQ0MzM4OWNhMTU0ZDZkNGU2MzdhNWYz
11
- MmNhZGVhZTViOWExNGU3NWVjMGViYjJkMzRhOGRiNDliZjcxNzk=
9
+ NTJlMWQzNDU2OTMwZGMzZDMyNjAwYjE5M2M0NjQ2ZWQ5YmQ4OGRlMDIxODEy
10
+ MDY0NTI4NTU2ZWE0NDdlMmU4MTY1NjAwM2IwODAxZTUwZGMxN2IyNDdhZWI3
11
+ MWU5MGYyYmNjYWMyYzA5OTQzM2JjMzZhNTk3Njg4NThkZTJjYjE=
12
12
  data.tar.gz: !binary |-
13
- OGE1ODUxZDU3OTgyODM4ZTI2Njk3YTMyMGVmY2JkNzZmNDNjYmU1NWI5ZDg0
14
- ODZjYWYxZjhjZDU4ODRhNDY4YWU5NzZjNDE0NTkyNTk1ODkyZjVjOTExMWRl
15
- NjM4MmFiYzlhMjM3MmY2ZTA1ZDBlN2EyYThkNjgwMjQwZjQ5ZTg=
13
+ YTQ1YjU1MDNmOTliMzgwYTU4YmNjYTdlMmFjNjQyNTBiODcxMTk5NWU5ZGJh
14
+ MjNkMDZlYTBlM2RkOTdjNTBjNDI5MDI4MDIzMTNjNzBjZjQxMzE1ZTk0YTc3
15
+ Y2Q3MjgxNmE2NjFmOTQxMjdkYWE3ZWU3NjMwNzgwNzU3MjY0ZGU=
@@ -0,0 +1,328 @@
1
+ # encoding: utf-8
2
+ require "logstash/namespace"
3
+ require "logstash/environment"
4
+ require "logstash/outputs/base"
5
+ require "logstash/json"
6
+ require "stud/buffer"
7
+ require "socket" # for Socket.gethostname
8
+
9
+ # This output lets you store logs in Elasticsearch and is the most recommended
10
+ # output for Logstash. If you plan on using the Kibana web interface, you'll
11
+ # need to use this output.
12
+ #
13
+ # *VERSION NOTE*: Your Elasticsearch cluster must be running Elasticsearch
14
+ # %ELASTICSEARCH_VERSION%. If you use any other version of Elasticsearch,
15
+ # you should set `protocol => http` in this plugin.
16
+ #
17
+ # If you want to set other Elasticsearch options that are not exposed directly
18
+ # as configuration options, there are two methods:
19
+ #
20
+ # * Create an `elasticsearch.yml` file in the $PWD of the Logstash process
21
+ # * Pass in es.* java properties (java -Des.node.foo= or ruby -J-Des.node.foo=)
22
+ #
23
+ # With the default `protocol` setting ("node"), this plugin will join your
24
+ # Elasticsearch cluster as a client node, so it will show up in Elasticsearch's
25
+ # cluster status.
26
+ #
27
+ # You can learn more about Elasticsearch at <http://www.elasticsearch.org>
28
+ #
29
+ # ## Operational Notes
30
+ #
31
+ # Template management requires Elasticsearch version 0.90.7 or later. If you
32
+ # are using a version older than this, please upgrade. You will receive
33
+ # more benefits than just template management!
34
+ #
35
+ # If using the default `protocol` setting ("node"), your firewalls might need
36
+ # to permit port 9300 in *both* directions (from Logstash to Elasticsearch, and
37
+ # Elasticsearch to Logstash)
38
+ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
39
+ include Stud::Buffer
40
+
41
+ config_name "elasticsearch"
42
+ milestone 3
43
+
44
+ # The index to write events to. This can be dynamic using the %{foo} syntax.
45
+ # The default value will partition your indices by day so you can more easily
46
+ # delete old data or only search specific date ranges.
47
+ # Indexes may not contain uppercase characters.
48
+ config :index, :validate => :string, :default => "logstash-%{+YYYY.MM.dd}"
49
+
50
+ # The index type to write events to. Generally you should try to write only
51
+ # similar events to the same 'type'. String expansion '%{foo}' works here.
52
+ config :index_type, :validate => :string
53
+
54
+ # Starting in Logstash 1.3 (unless you set option "manage_template" to false)
55
+ # a default mapping template for Elasticsearch will be applied, if you do not
56
+ # already have one set to match the index pattern defined (default of
57
+ # "logstash-%{+YYYY.MM.dd}"), minus any variables. For example, in this case
58
+ # the template will be applied to all indices starting with logstash-*
59
+ #
60
+ # If you have dynamic templating (e.g. creating indices based on field names)
61
+ # then you should set "manage_template" to false and use the REST API to upload
62
+ # your templates manually.
63
+ config :manage_template, :validate => :boolean, :default => true
64
+
65
+ # This configuration option defines how the template is named inside Elasticsearch.
66
+ # Note that if you have used the template management features and subsequently
67
+ # change this, you will need to prune the old template manually, e.g.
68
+ # curl -XDELETE <http://localhost:9200/_template/OldTemplateName?pretty>
69
+ # where OldTemplateName is whatever the former setting was.
70
+ config :template_name, :validate => :string, :default => "logstash"
71
+
72
+ # You can set the path to your own template here, if you so desire.
73
+ # If not set, the included template will be used.
74
+ config :template, :validate => :path
75
+
76
+ # Overwrite the current template with whatever is configured
77
+ # in the template and template_name directives.
78
+ config :template_overwrite, :validate => :boolean, :default => false
79
+
80
+ # The document ID for the index. Useful for overwriting existing entries in
81
+ # Elasticsearch with the same ID.
82
+ config :document_id, :validate => :string, :default => nil
83
+
84
+ # The name of your cluster if you set it on the Elasticsearch side. Useful
85
+ # for discovery.
86
+ config :cluster, :validate => :string
87
+
88
+ # The hostname or IP address of the host to use for Elasticsearch unicast discovery
89
+ # This is only required if the normal multicast/cluster discovery stuff won't
90
+ # work in your environment.
91
+ config :host, :validate => :string
92
+
93
+ # The port for Elasticsearch transport to use.
94
+ #
95
+ # If you do not set this, the following defaults are used:
96
+ # * `protocol => http` - port 9200
97
+ # * `protocol => transport` - port 9300-9305
98
+ # * `protocol => node` - port 9300-9305
99
+ config :port, :validate => :string
100
+
101
+ # The name/address of the host to bind to for Elasticsearch clustering
102
+ config :bind_host, :validate => :string
103
+
104
+ # This is only valid for the 'node' protocol.
105
+ #
106
+ # The port for the node to listen on.
107
+ config :bind_port, :validate => :number
108
+
109
+ # Run the Elasticsearch server embedded in this process.
110
+ # This option is useful if you want to run a single Logstash process that
111
+ # handles log processing and indexing; it saves you from needing to run
112
+ # a separate Elasticsearch process.
113
+ config :embedded, :validate => :boolean, :default => false
114
+
115
+ # If you are running the embedded Elasticsearch server, you can set the http
116
+ # port it listens on here; it is not common to need this setting changed from
117
+ # default.
118
+ config :embedded_http_port, :validate => :string, :default => "9200-9300"
119
+
120
+ # This setting no longer does anything. It exists to keep config validation
121
+ # from failing. It will be removed in future versions.
122
+ config :max_inflight_requests, :validate => :number, :default => 50, :deprecated => true
123
+
124
+ # The node name Elasticsearch will use when joining a cluster.
125
+ #
126
+ # By default, this is generated internally by the ES client.
127
+ config :node_name, :validate => :string
128
+
129
+ # This plugin uses the bulk index api for improved indexing performance.
130
+ # To make efficient bulk api calls, we will buffer a certain number of
131
+ # events before flushing that out to Elasticsearch. This setting
132
+ # controls how many events will be buffered before sending a batch
133
+ # of events.
134
+ config :flush_size, :validate => :number, :default => 5000
135
+
136
+ # The amount of time since last flush before a flush is forced.
137
+ #
138
+ # This setting helps ensure slow event rates don't get stuck in Logstash.
139
+ # For example, if your `flush_size` is 100, and you have received 10 events,
140
+ # and it has been more than `idle_flush_time` seconds since the last flush,
141
+ # Logstash will flush those 10 events automatically.
142
+ #
143
+ # This helps keep both fast and slow log streams moving along in
144
+ # near-real-time.
145
+ config :idle_flush_time, :validate => :number, :default => 1
146
+
147
+ # Choose the protocol used to talk to Elasticsearch.
148
+ #
149
+ # The 'node' protocol will connect to the cluster as a normal Elasticsearch
150
+ # node (but will not store data). This allows you to use things like
151
+ # multicast discovery. If you use the `node` protocol, you must permit
152
+ # bidirectional communication on the port 9300 (or whichever port you have
153
+ # configured).
154
+ #
155
+ # The 'transport' protocol will connect to the host you specify and will
156
+ # not show up as a 'node' in the Elasticsearch cluster. This is useful
157
+ # in situations where you cannot permit connections outbound from the
158
+ # Elasticsearch cluster to this Logstash server.
159
+ #
160
+ # The 'http' protocol will use the Elasticsearch REST/HTTP interface to talk
161
+ # to elasticsearch.
162
+ #
163
+ # All protocols will use bulk requests when talking to Elasticsearch.
164
+ #
165
+ # The default `protocol` setting under java/jruby is "node". The default
166
+ # `protocol` on non-java rubies is "http"
167
+ config :protocol, :validate => [ "node", "transport", "http" ]
168
+
169
+ # The Elasticsearch action to perform. Valid actions are: `index`, `delete`.
170
+ #
171
+ # Use of this setting *REQUIRES* you also configure the `document_id` setting
172
+ # because `delete` actions all require a document id.
173
+ #
174
+ # What does each action do?
175
+ #
176
+ # - index: indexes a document (an event from logstash).
177
+ # - delete: deletes a document by id
178
+ #
179
+ # For more details on actions, check out the [Elasticsearch bulk API documentation](http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/docs-bulk.html)
180
+ config :action, :validate => :string, :default => "index"
181
+
182
+ public
183
+ def register
184
+ client_settings = {}
185
+ client_settings["cluster.name"] = @cluster if @cluster
186
+ client_settings["network.host"] = @bind_host if @bind_host
187
+ client_settings["transport.tcp.port"] = @bind_port if @bind_port
188
+
189
+ if @node_name
190
+ client_settings["node.name"] = @node_name
191
+ else
192
+ client_settings["node.name"] = "logstash-#{Socket.gethostname}-#{$$}-#{object_id}"
193
+ end
194
+
195
+ if @protocol.nil?
196
+ @protocol = LogStash::Environment.jruby? ? "node" : "http"
197
+ end
198
+
199
+ if ["node", "transport"].include?(@protocol)
200
+ # Node or TransportClient; requires JRuby
201
+ raise(LogStash::PluginLoadingError, "This configuration requires JRuby. If you are not using JRuby, you must set 'protocol' to 'http'. For example: output { elasticsearch { protocol => \"http\" } }") unless LogStash::Environment.jruby?
202
+ LogStash::Environment.load_elasticsearch_jars!
203
+
204
+ # setup log4j properties for Elasticsearch
205
+ LogStash::Logger.setup_log4j(@logger)
206
+ end
207
+
208
+ require "logstash/outputs/elasticsearch/protocol"
209
+
210
+ if @port.nil?
211
+ @port = case @protocol
212
+ when "http"; "9200"
213
+ when "transport", "node"; "9300-9305"
214
+ end
215
+ end
216
+
217
+ if @host.nil? && @protocol == "http"
218
+ @logger.info("No 'host' set in elasticsearch output. Defaulting to localhost")
219
+ @host = "localhost"
220
+ end
221
+
222
+ options = {
223
+ :host => @host,
224
+ :port => @port,
225
+ :client_settings => client_settings
226
+ }
227
+
228
+
229
+ client_class = case @protocol
230
+ when "transport"
231
+ LogStash::Outputs::Elasticsearch::Protocols::TransportClient
232
+ when "node"
233
+ LogStash::Outputs::Elasticsearch::Protocols::NodeClient
234
+ when "http"
235
+ LogStash::Outputs::Elasticsearch::Protocols::HTTPClient
236
+ end
237
+
238
+ if @embedded
239
+ raise(LogStash::ConfigurationError, "The 'embedded => true' setting is only valid for the elasticsearch output under JRuby. You are running #{RUBY_DESCRIPTION}") unless LogStash::Environment.jruby?
240
+ LogStash::Environment.load_elasticsearch_jars!
241
+
242
+ # Default @host with embedded to localhost. This should help avoid
243
+ # newbies tripping on ubuntu and other distros that have a default
244
+ # firewall that blocks multicast.
245
+ @host ||= "localhost"
246
+
247
+ # Start Elasticsearch local.
248
+ start_local_elasticsearch
249
+ end
250
+
251
+ @client = client_class.new(options)
252
+
253
+ @logger.info("New Elasticsearch output", :cluster => @cluster,
254
+ :host => @host, :port => @port, :embedded => @embedded,
255
+ :protocol => @protocol)
256
+
257
+
258
+ if @manage_template
259
+ @logger.info("Automatic template management enabled", :manage_template => @manage_template.to_s)
260
+ @client.template_install(@template_name, get_template, @template_overwrite)
261
+ end # if @manage_templates
262
+
263
+ buffer_initialize(
264
+ :max_items => @flush_size,
265
+ :max_interval => @idle_flush_time,
266
+ :logger => @logger
267
+ )
268
+ end # def register
269
+
270
+ public
271
+ def get_template
272
+ if @template.nil?
273
+ @template = LogStash::Environment.plugin_path("outputs/elasticsearch/elasticsearch-template.json")
274
+ if !File.exists?(@template)
275
+ raise "You must specify 'template => ...' in your elasticsearch output (I looked for '#{@template}')"
276
+ end
277
+ end
278
+ template_json = IO.read(@template).gsub(/\n/,'')
279
+ @logger.info("Using mapping template", :template => template_json)
280
+ return LogStash::Json.load(template_json)
281
+ end # def get_template
282
+
283
+ protected
284
+ def start_local_elasticsearch
285
+ @logger.info("Starting embedded Elasticsearch local node.")
286
+ builder = org.elasticsearch.node.NodeBuilder.nodeBuilder
287
+ # Disable 'local only' - LOGSTASH-277
288
+ #builder.local(true)
289
+ builder.settings.put("cluster.name", @cluster) if @cluster
290
+ builder.settings.put("node.name", @node_name) if @node_name
291
+ builder.settings.put("network.host", @bind_host) if @bind_host
292
+ builder.settings.put("http.port", @embedded_http_port)
293
+
294
+ @embedded_elasticsearch = builder.node
295
+ @embedded_elasticsearch.start
296
+ end # def start_local_elasticsearch
297
+
298
+ public
299
+ def receive(event)
300
+ return unless output?(event)
301
+
302
+ # Set the 'type' value for the index.
303
+ if @index_type
304
+ type = event.sprintf(@index_type)
305
+ else
306
+ type = event["type"] || "logs"
307
+ end
308
+
309
+ index = event.sprintf(@index)
310
+
311
+ document_id = @document_id ? event.sprintf(@document_id) : nil
312
+ buffer_receive([event.sprintf(@action), { :_id => document_id, :_index => index, :_type => type }, event.to_hash])
313
+ end # def receive
314
+
315
+ def flush(actions, teardown=false)
316
+ @client.bulk(actions)
317
+ # TODO(sissel): Handle errors. Since bulk requests could mostly succeed
318
+ # (aka partially fail), we need to figure out what documents need to be
319
+ # retried.
320
+ #
321
+ # In the worst case, a failing flush (exception) will incur a retry from Stud::Buffer.
322
+ end # def flush
323
+
324
+ def teardown
325
+ buffer_flush(:final => true)
326
+ end
327
+
328
+ end # class LogStash::Outputs::Elasticsearch
@@ -0,0 +1,34 @@
1
+ {
2
+ "template" : "logstash-*",
3
+ "settings" : {
4
+ "index.refresh_interval" : "5s"
5
+ },
6
+ "mappings" : {
7
+ "_default_" : {
8
+ "_all" : {"enabled" : true},
9
+ "dynamic_templates" : [ {
10
+ "string_fields" : {
11
+ "match" : "*",
12
+ "match_mapping_type" : "string",
13
+ "mapping" : {
14
+ "type" : "string", "index" : "analyzed", "omit_norms" : true,
15
+ "fields" : {
16
+ "raw" : {"type": "string", "index" : "not_analyzed", "ignore_above" : 256}
17
+ }
18
+ }
19
+ }
20
+ } ],
21
+ "properties" : {
22
+ "@version": { "type": "string", "index": "not_analyzed" },
23
+ "geoip" : {
24
+ "type" : "object",
25
+ "dynamic": true,
26
+ "path": "full",
27
+ "properties" : {
28
+ "location" : { "type" : "geo_point" }
29
+ }
30
+ }
31
+ }
32
+ }
33
+ }
34
+ }
@@ -0,0 +1,271 @@
1
+ require "logstash/outputs/elasticsearch"
2
+ require "cabin"
3
+
4
+ module LogStash::Outputs::Elasticsearch
5
+ module Protocols
6
+ class Base
7
+ private
8
+ def initialize(options={})
9
+ # host(s), port, cluster
10
+ @logger = Cabin::Channel.get
11
+ end
12
+
13
+ def client
14
+ return @client if @client
15
+ @client = build_client(@options)
16
+ return @client
17
+ end # def client
18
+
19
+
20
+ def template_install(name, template, force=false)
21
+ if template_exists?(name) && !force
22
+ @logger.debug("Found existing Elasticsearch template. Skipping template management", :name => name)
23
+ return
24
+ end
25
+ template_put(name, template)
26
+ end
27
+
28
+ # Do a bulk request with the given actions.
29
+ #
30
+ # 'actions' is expected to be an array of bulk requests as string json
31
+ # values.
32
+ #
33
+ # Each 'action' becomes a single line in the bulk api call. For more
34
+ # details on the format of each.
35
+ def bulk(actions)
36
+ raise NotImplemented, "You must implement this yourself"
37
+ # bulk([
38
+ # '{ "index" : { "_index" : "test", "_type" : "type1", "_id" : "1" } }',
39
+ # '{ "field1" : "value1" }'
40
+ #])
41
+ end
42
+
43
+ public(:initialize, :template_install)
44
+ end
45
+
46
+ class HTTPClient < Base
47
+ private
48
+
49
+ DEFAULT_OPTIONS = {
50
+ :port => 9200
51
+ }
52
+
53
+ def initialize(options={})
54
+ require "ftw"
55
+ super
56
+ require "elasticsearch" # gem 'elasticsearch-ruby'
57
+ @options = DEFAULT_OPTIONS.merge(options)
58
+ @client = client
59
+ end
60
+
61
+ def build_client(options)
62
+ client = Elasticsearch::Client.new(
63
+ :host => [options[:host], options[:port]].join(":")
64
+ )
65
+
66
+ # Use FTW to do indexing requests, for now, until we
67
+ # can identify and resolve performance problems of elasticsearch-ruby
68
+ @bulk_url = "http://#{options[:host]}:#{options[:port]}/_bulk"
69
+ @agent = FTW::Agent.new
70
+
71
+ return client
72
+ end
73
+
74
+ if ENV["BULK"] == "esruby"
75
+ def bulk(actions)
76
+ bulk_esruby(actions)
77
+ end
78
+ else
79
+ def bulk(actions)
80
+ bulk_ftw(actions)
81
+ end
82
+ end
83
+
84
+ def bulk_esruby(actions)
85
+ @client.bulk(:body => actions.collect do |action, args, source|
86
+ if source
87
+ next [ { action => args }, source ]
88
+ else
89
+ next { action => args }
90
+ end
91
+ end.flatten)
92
+ end # def bulk_esruby
93
+
94
+ # Avoid creating a new string for newline every time
95
+ NEWLINE = "\n".freeze
96
+ def bulk_ftw(actions)
97
+ body = actions.collect do |action, args, source|
98
+ header = { action => args }
99
+ if source
100
+ next [ LogStash::Json.dump(header), NEWLINE, LogStash::Json.dump(source), NEWLINE ]
101
+ else
102
+ next [ LogStash::Json.dump(header), NEWLINE ]
103
+ end
104
+ end.flatten.join("")
105
+ begin
106
+ response = @agent.post!(@bulk_url, :body => body)
107
+ rescue EOFError
108
+ @logger.warn("EOF while writing request or reading response header from elasticsearch", :host => @host, :port => @port)
109
+ raise
110
+ end
111
+
112
+ # Consume the body for error checking
113
+ # This will also free up the connection for reuse.
114
+ response_body = ""
115
+ begin
116
+ response.read_body { |chunk| response_body += chunk }
117
+ rescue EOFError
118
+ @logger.warn("EOF while reading response body from elasticsearch",
119
+ :url => @bulk_url)
120
+ raise
121
+ end
122
+
123
+ if response.status != 200
124
+ @logger.error("Error writing (bulk) to elasticsearch",
125
+ :response => response, :response_body => response_body,
126
+ :request_body => body)
127
+ raise "Non-OK response code from Elasticsearch: #{response.status}"
128
+ end
129
+ end # def bulk_ftw
130
+
131
+ def template_exists?(name)
132
+ @client.indices.get_template(:name => name)
133
+ return true
134
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound
135
+ return false
136
+ end # def template_exists?
137
+
138
+ def template_put(name, template)
139
+ @client.indices.put_template(:name => name, :body => template)
140
+ end # template_put
141
+
142
+ public(:bulk)
143
+ end # class HTTPClient
144
+
145
+ class NodeClient < Base
146
+ private
147
+
148
+ DEFAULT_OPTIONS = {
149
+ :port => 9300,
150
+ }
151
+
152
+ def initialize(options={})
153
+ super
154
+ require "java"
155
+ @options = DEFAULT_OPTIONS.merge(options)
156
+ setup(@options)
157
+ @client = client
158
+ end # def initialize
159
+
160
+ def settings
161
+ return @settings
162
+ end
163
+
164
+ def setup(options={})
165
+ @settings = org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder
166
+ if options[:host]
167
+ @settings.put("discovery.zen.ping.multicast.enabled", false)
168
+ @settings.put("discovery.zen.ping.unicast.hosts", hosts(options))
169
+ end
170
+
171
+ @settings.put("node.client", true)
172
+ @settings.put("http.enabled", false)
173
+
174
+ if options[:client_settings]
175
+ options[:client_settings].each do |key, value|
176
+ @settings.put(key, value)
177
+ end
178
+ end
179
+
180
+ return @settings
181
+ end
182
+
183
+ def hosts(options)
184
+ if options[:port].to_s =~ /^\d+-\d+$/
185
+ # port ranges are 'host[port1-port2]' according to
186
+ # http://www.elasticsearch.org/guide/reference/modules/discovery/zen/
187
+ # However, it seems to only query the first port.
188
+ # So generate our own list of unicast hosts to scan.
189
+ range = Range.new(*options[:port].split("-"))
190
+ return range.collect { |p| "#{options[:host]}:#{p}" }.join(",")
191
+ else
192
+ return "#{options[:host]}:#{options[:port]}"
193
+ end
194
+ end # def hosts
195
+
196
+ def build_client(options)
197
+ nodebuilder = org.elasticsearch.node.NodeBuilder.nodeBuilder
198
+ return nodebuilder.settings(@settings).node.client
199
+ end # def build_client
200
+
201
+ def bulk(actions)
202
+ # Actions an array of [ action, action_metadata, source ]
203
+ prep = @client.prepareBulk
204
+ actions.each do |action, args, source|
205
+ prep.add(build_request(action, args, source))
206
+ end
207
+ response = prep.execute.actionGet()
208
+
209
+ # TODO(sissel): What format should the response be in?
210
+ end # def bulk
211
+
212
+ def build_request(action, args, source)
213
+ case action
214
+ when "index"
215
+ request = org.elasticsearch.action.index.IndexRequest.new(args[:_index])
216
+ request.id(args[:_id]) if args[:_id]
217
+ request.source(source)
218
+ when "delete"
219
+ request = org.elasticsearch.action.delete.DeleteRequest.new(args[:_index])
220
+ request.id(args[:_id])
221
+ #when "update"
222
+ #when "create"
223
+ end # case action
224
+
225
+ request.type(args[:_type]) if args[:_type]
226
+ return request
227
+ end # def build_request
228
+
229
+ def template_exists?(name)
230
+ request = org.elasticsearch.action.admin.indices.template.get.GetIndexTemplatesRequestBuilder.new(@client.admin.indices, name)
231
+ response = request.get
232
+ return !response.getIndexTemplates.isEmpty
233
+ end # def template_exists?
234
+
235
+ def template_put(name, template)
236
+ request = org.elasticsearch.action.admin.indices.template.put.PutIndexTemplateRequestBuilder.new(@client.admin.indices, name)
237
+ request.setSource(LogStash::Json.dump(template))
238
+
239
+ # execute the request and get the response, if it fails, we'll get an exception.
240
+ request.get
241
+ end # template_put
242
+
243
+ public(:initialize, :bulk)
244
+ end # class NodeClient
245
+
246
+ class TransportClient < NodeClient
247
+ private
248
+ def build_client(options)
249
+ client = org.elasticsearch.client.transport.TransportClient.new(settings.build)
250
+
251
+ if options[:host]
252
+ client.addTransportAddress(
253
+ org.elasticsearch.common.transport.InetSocketTransportAddress.new(
254
+ options[:host], options[:port].to_i
255
+ )
256
+ )
257
+ end
258
+
259
+ return client
260
+ end # def build_client
261
+ end # class TransportClient
262
+ end # module Protocols
263
+
264
+ module Requests
265
+ class GetIndexTemplates; end
266
+ class Bulk; end
267
+ class Index; end
268
+ class Delete; end
269
+ end
270
+ end
271
+
@@ -0,0 +1,33 @@
1
+ Gem::Specification.new do |s|
2
+
3
+ s.name = 'logstash-output-elasticsearch'
4
+ s.version = '0.1.1'
5
+ s.licenses = ['Apache License (2.0)']
6
+ s.summary = "Logstash Output to Elasticsearch"
7
+ s.description = "Output events to elasticsearch"
8
+ s.authors = ["Elasticsearch"]
9
+ s.email = 'rubycoder@example.com'
10
+ s.homepage = "http://logstash.net/"
11
+ s.require_paths = ["lib"]
12
+
13
+ # Files
14
+ s.files = `git ls-files`.split($\)
15
+
16
+ # Tests
17
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
18
+
19
+ # Special flag to let us know this is actually a logstash plugin
20
+ s.metadata = { "logstash_plugin" => "true" }
21
+
22
+ # Jar dependencies
23
+ s.requirements << "jar 'org.elasticsearch:elasticsearch', '1.2.2'"
24
+
25
+ # Gem dependencies
26
+ s.add_runtime_dependency 'elasticsearch'
27
+ s.add_runtime_dependency 'stud'
28
+ s.add_runtime_dependency 'cabin', ['>=0.6.0']
29
+ s.add_runtime_dependency 'ftw', ['~> 0.0.39']
30
+ s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
31
+ s.add_runtime_dependency 'jar-dependencies', ['~> 0.0.6']
32
+
33
+ end
@@ -0,0 +1,349 @@
1
+ require "test_utils"
2
+ require "ftw"
3
+ require "logstash/plugin"
4
+ require "logstash/json"
5
+
6
+ describe "outputs/elasticsearch" do
7
+ extend LogStash::RSpec
8
+
9
+ it "should register" do
10
+ output = LogStash::Plugin.lookup("output", "elasticsearch").new("embedded" => "false", "protocol" => "transport", "manage_template" => "false")
11
+
12
+ # register will try to load jars and raise if it cannot find jars
13
+ expect {output.register}.to_not raise_error
14
+ end
15
+
16
+ describe "ship lots of events w/ default index_type", :elasticsearch => true do
17
+ # Generate a random index name
18
+ index = 10.times.collect { rand(10).to_s }.join("")
19
+ type = 10.times.collect { rand(10).to_s }.join("")
20
+
21
+ # Write about 10000 events. Add jitter to increase likeliness of finding
22
+ # boundary-related bugs.
23
+ event_count = 10000 + rand(500)
24
+ flush_size = rand(200) + 1
25
+
26
+ config <<-CONFIG
27
+ input {
28
+ generator {
29
+ message => "hello world"
30
+ count => #{event_count}
31
+ type => "#{type}"
32
+ }
33
+ }
34
+ output {
35
+ elasticsearch {
36
+ host => "127.0.0.1"
37
+ index => "#{index}"
38
+ flush_size => #{flush_size}
39
+ }
40
+ }
41
+ CONFIG
42
+
43
+ agent do
44
+ # Try a few times to check if we have the correct number of events stored
45
+ # in ES.
46
+ #
47
+ # We try multiple times to allow final agent flushes as well as allowing
48
+ # elasticsearch to finish processing everything.
49
+ ftw = FTW::Agent.new
50
+ ftw.post!("http://localhost:9200/#{index}/_refresh")
51
+
52
+ # Wait until all events are available.
53
+ Stud::try(10.times) do
54
+ data = ""
55
+ response = ftw.get!("http://127.0.0.1:9200/#{index}/_count?q=*")
56
+ response.read_body { |chunk| data << chunk }
57
+ result = LogStash::Json.load(data)
58
+ count = result["count"]
59
+ insist { count } == event_count
60
+ end
61
+
62
+ response = ftw.get!("http://127.0.0.1:9200/#{index}/_search?q=*&size=1000")
63
+ data = ""
64
+ response.read_body { |chunk| data << chunk }
65
+ result = LogStash::Json.load(data)
66
+ result["hits"]["hits"].each do |doc|
67
+ # With no 'index_type' set, the document type should be the type
68
+ # set on the input
69
+ insist { doc["_type"] } == type
70
+ insist { doc["_index"] } == index
71
+ insist { doc["_source"]["message"] } == "hello world"
72
+ end
73
+ end
74
+ end
75
+
76
+ describe "testing index_type", :elasticsearch => true do
77
+ describe "no type value" do
78
+ # Generate a random index name
79
+ index = 10.times.collect { rand(10).to_s }.join("")
80
+ event_count = 100 + rand(100)
81
+ flush_size = rand(200) + 1
82
+
83
+ config <<-CONFIG
84
+ input {
85
+ generator {
86
+ message => "hello world"
87
+ count => #{event_count}
88
+ }
89
+ }
90
+ output {
91
+ elasticsearch {
92
+ host => "127.0.0.1"
93
+ index => "#{index}"
94
+ flush_size => #{flush_size}
95
+ }
96
+ }
97
+ CONFIG
98
+
99
+ agent do
100
+ ftw = FTW::Agent.new
101
+ ftw.post!("http://localhost:9200/#{index}/_refresh")
102
+
103
+ # Wait until all events are available.
104
+ Stud::try(10.times) do
105
+ data = ""
106
+ response = ftw.get!("http://127.0.0.1:9200/#{index}/_count?q=*")
107
+ response.read_body { |chunk| data << chunk }
108
+ result = LogStash::Json.load(data)
109
+ count = result["count"]
110
+ insist { count } == event_count
111
+ end
112
+
113
+ response = ftw.get!("http://127.0.0.1:9200/#{index}/_search?q=*&size=1000")
114
+ data = ""
115
+ response.read_body { |chunk| data << chunk }
116
+ result = LogStash::Json.load(data)
117
+ result["hits"]["hits"].each do |doc|
118
+ insist { doc["_type"] } == "logs"
119
+ end
120
+ end
121
+ end
122
+
123
+ describe "default event type value" do
124
+ # Generate a random index name
125
+ index = 10.times.collect { rand(10).to_s }.join("")
126
+ event_count = 100 + rand(100)
127
+ flush_size = rand(200) + 1
128
+
129
+ config <<-CONFIG
130
+ input {
131
+ generator {
132
+ message => "hello world"
133
+ count => #{event_count}
134
+ type => "generated"
135
+ }
136
+ }
137
+ output {
138
+ elasticsearch {
139
+ host => "127.0.0.1"
140
+ index => "#{index}"
141
+ flush_size => #{flush_size}
142
+ }
143
+ }
144
+ CONFIG
145
+
146
+ agent do
147
+ ftw = FTW::Agent.new
148
+ ftw.post!("http://localhost:9200/#{index}/_refresh")
149
+
150
+ # Wait until all events are available.
151
+ Stud::try(10.times) do
152
+ data = ""
153
+ response = ftw.get!("http://127.0.0.1:9200/#{index}/_count?q=*")
154
+ response.read_body { |chunk| data << chunk }
155
+ result = LogStash::Json.load(data)
156
+ count = result["count"]
157
+ insist { count } == event_count
158
+ end
159
+
160
+ response = ftw.get!("http://127.0.0.1:9200/#{index}/_search?q=*&size=1000")
161
+ data = ""
162
+ response.read_body { |chunk| data << chunk }
163
+ result = LogStash::Json.load(data)
164
+ result["hits"]["hits"].each do |doc|
165
+ insist { doc["_type"] } == "generated"
166
+ end
167
+ end
168
+ end
169
+ end
170
+
171
+ describe "action => ...", :elasticsearch => true do
172
+ index_name = 10.times.collect { rand(10).to_s }.join("")
173
+
174
+ config <<-CONFIG
175
+ input {
176
+ generator {
177
+ message => "hello world"
178
+ count => 100
179
+ }
180
+ }
181
+ output {
182
+ elasticsearch {
183
+ host => "127.0.0.1"
184
+ index => "#{index_name}"
185
+ }
186
+ }
187
+ CONFIG
188
+
189
+
190
+ agent do
191
+ ftw = FTW::Agent.new
192
+ ftw.post!("http://localhost:9200/#{index_name}/_refresh")
193
+
194
+ # Wait until all events are available.
195
+ Stud::try(10.times) do
196
+ data = ""
197
+ response = ftw.get!("http://127.0.0.1:9200/#{index_name}/_count?q=*")
198
+ response.read_body { |chunk| data << chunk }
199
+ result = LogStash::Json.load(data)
200
+ count = result["count"]
201
+ insist { count } == 100
202
+ end
203
+
204
+ response = ftw.get!("http://127.0.0.1:9200/#{index_name}/_search?q=*&size=1000")
205
+ data = ""
206
+ response.read_body { |chunk| data << chunk }
207
+ result = LogStash::Json.load(data)
208
+ result["hits"]["hits"].each do |doc|
209
+ insist { doc["_type"] } == "logs"
210
+ end
211
+ end
212
+
213
+ describe "default event type value", :elasticsearch => true do
214
+ # Generate a random index name
215
+ index = 10.times.collect { rand(10).to_s }.join("")
216
+ event_count = 100 + rand(100)
217
+ flush_size = rand(200) + 1
218
+
219
+ config <<-CONFIG
220
+ input {
221
+ generator {
222
+ message => "hello world"
223
+ count => #{event_count}
224
+ type => "generated"
225
+ }
226
+ }
227
+ output {
228
+ elasticsearch {
229
+ host => "127.0.0.1"
230
+ index => "#{index}"
231
+ flush_size => #{flush_size}
232
+ }
233
+ }
234
+ CONFIG
235
+
236
+ agent do
237
+ ftw = FTW::Agent.new
238
+ ftw.post!("http://localhost:9200/#{index}/_refresh")
239
+
240
+ # Wait until all events are available.
241
+ Stud::try(10.times) do
242
+ data = ""
243
+ response = ftw.get!("http://127.0.0.1:9200/#{index}/_count?q=*")
244
+ response.read_body { |chunk| data << chunk }
245
+ result = LogStash::Json.load(data)
246
+ count = result["count"]
247
+ insist { count } == event_count
248
+ end
249
+
250
+ response = ftw.get!("http://127.0.0.1:9200/#{index}/_search?q=*&size=1000")
251
+ data = ""
252
+ response.read_body { |chunk| data << chunk }
253
+ result = LogStash::Json.load(data)
254
+ result["hits"]["hits"].each do |doc|
255
+ insist { doc["_type"] } == "generated"
256
+ end
257
+ end
258
+ end
259
+ end
260
+
261
+ describe "index template expected behavior", :elasticsearch => true do
262
+ ["node", "transport", "http"].each do |protocol|
263
+ context "with protocol => #{protocol}" do
264
+ subject do
265
+ require "logstash/outputs/elasticsearch"
266
+ settings = {
267
+ "manage_template" => true,
268
+ "template_overwrite" => true,
269
+ "protocol" => protocol,
270
+ "host" => "localhost"
271
+ }
272
+ next LogStash::Outputs::ElasticSearch.new(settings)
273
+ end
274
+
275
+ before :each do
276
+ # Delete all templates first.
277
+ require "elasticsearch"
278
+
279
+ # Clean ES of data before we start.
280
+ @es = Elasticsearch::Client.new
281
+ @es.indices.delete_template(:name => "*")
282
+
283
+ # This can fail if there are no indexes, ignore failure.
284
+ @es.indices.delete(:index => "*") rescue nil
285
+
286
+ subject.register
287
+
288
+ subject.receive(LogStash::Event.new("message" => "sample message here"))
289
+ subject.receive(LogStash::Event.new("somevalue" => 100))
290
+ subject.receive(LogStash::Event.new("somevalue" => 10))
291
+ subject.receive(LogStash::Event.new("somevalue" => 1))
292
+ subject.receive(LogStash::Event.new("country" => "us"))
293
+ subject.receive(LogStash::Event.new("country" => "at"))
294
+ subject.receive(LogStash::Event.new("geoip" => { "location" => [ 0.0, 0.0 ] }))
295
+ subject.buffer_flush(:final => true)
296
+ @es.indices.refresh
297
+
298
+ # Wait or fail until everything's indexed.
299
+ Stud::try(20.times) do
300
+ r = @es.search
301
+ insist { r["hits"]["total"] } == 7
302
+ end
303
+ end
304
+
305
+ it "permits phrase searching on string fields" do
306
+ results = @es.search(:q => "message:\"sample message\"")
307
+ insist { results["hits"]["total"] } == 1
308
+ insist { results["hits"]["hits"][0]["_source"]["message"] } == "sample message here"
309
+ end
310
+
311
+ it "numbers dynamically map to a numeric type and permit range queries" do
312
+ results = @es.search(:q => "somevalue:[5 TO 105]")
313
+ insist { results["hits"]["total"] } == 2
314
+
315
+ values = results["hits"]["hits"].collect { |r| r["_source"]["somevalue"] }
316
+ insist { values }.include?(10)
317
+ insist { values }.include?(100)
318
+ reject { values }.include?(1)
319
+ end
320
+
321
+ it "creates .raw field fro any string field which is not_analyzed" do
322
+ results = @es.search(:q => "message.raw:\"sample message here\"")
323
+ insist { results["hits"]["total"] } == 1
324
+ insist { results["hits"]["hits"][0]["_source"]["message"] } == "sample message here"
325
+
326
+ # partial or terms should not work.
327
+ results = @es.search(:q => "message.raw:\"sample\"")
328
+ insist { results["hits"]["total"] } == 0
329
+ end
330
+
331
+ it "make [geoip][location] a geo_point" do
332
+ results = @es.search(:body => { "filter" => { "geo_distance" => { "distance" => "1000km", "geoip.location" => { "lat" => 0.5, "lon" => 0.5 } } } })
333
+ insist { results["hits"]["total"] } == 1
334
+ insist { results["hits"]["hits"][0]["_source"]["geoip"]["location"] } == [ 0.0, 0.0 ]
335
+ end
336
+
337
+ it "should index stopwords like 'at' " do
338
+ results = @es.search(:body => { "facets" => { "t" => { "terms" => { "field" => "country" } } } })["facets"]["t"]
339
+ terms = results["terms"].collect { |t| t["term"] }
340
+
341
+ insist { terms }.include?("us")
342
+
343
+ # 'at' is a stopword, make sure stopwords are not ignored.
344
+ insist { terms }.include?("at")
345
+ end
346
+ end
347
+ end
348
+ end
349
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-output-elasticsearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elasticsearch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-16 00:00:00.000000000 Z
11
+ date: 2014-08-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: elasticsearch
@@ -86,12 +86,31 @@ dependencies:
86
86
  - - <
87
87
  - !ruby/object:Gem::Version
88
88
  version: 2.0.0
89
+ - !ruby/object:Gem::Dependency
90
+ name: jar-dependencies
91
+ requirement: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ~>
94
+ - !ruby/object:Gem::Version
95
+ version: 0.0.6
96
+ type: :runtime
97
+ prerelease: false
98
+ version_requirements: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ~>
101
+ - !ruby/object:Gem::Version
102
+ version: 0.0.6
89
103
  description: Output events to elasticsearch
90
104
  email: rubycoder@example.com
91
105
  executables: []
92
106
  extensions: []
93
107
  extra_rdoc_files: []
94
- files: []
108
+ files:
109
+ - lib/logstash/outputs/elasticsearch.rb
110
+ - lib/logstash/outputs/elasticsearch/elasticsearch-template.json
111
+ - lib/logstash/outputs/elasticsearch/protocol.rb
112
+ - logstash-output-elasticsearch.gemspec
113
+ - spec/outputs/elasticsearch.rb
95
114
  homepage: http://logstash.net/
96
115
  licenses:
97
116
  - Apache License (2.0)
@@ -118,5 +137,6 @@ rubygems_version: 2.3.0
118
137
  signing_key:
119
138
  specification_version: 4
120
139
  summary: Logstash Output to Elasticsearch
121
- test_files: []
140
+ test_files:
141
+ - spec/outputs/elasticsearch.rb
122
142
  has_rdoc: