logstash-lite 0.2.20110206003603 → 0.2.20110329105411

Sign up to get free protection for your applications and to get access to all the features.
data/bin/logstash-test CHANGED
@@ -36,6 +36,8 @@ def check_libraries
36
36
  "needed for websocket output")
37
37
  results << check_lib("rack", "rack", true,
38
38
  "needed for logstash-web")
39
+ results << check_lib("thin", "thin", true,
40
+ "needed for logstash-web")
39
41
  results << check_lib("amqp", "amqp", true,
40
42
  "needed for AMQP input and output")
41
43
  results << check_lib("sinatra/async", "async_sinatra", true,
@@ -46,6 +48,8 @@ def check_libraries
46
48
  "improve logstash debug logging output")
47
49
  results << check_lib("eventmachine", "eventmachine", false,
48
50
  "required for logstash to function")
51
+ results << check_lib("json", "json", false,
52
+ "required for logstash to function")
49
53
 
50
54
  missing_required = results.count { |r| !r[:optional] and !r[:found] }
51
55
  if missing_required == 0
@@ -66,6 +70,8 @@ end
66
70
 
67
71
  def main(args)
68
72
  report_ruby_version
73
+ # TODO(sissel): Add a way to call out specific things to test, like
74
+ # logstash-web, elasticsearch, mongodb, syslog, etc.
69
75
  if !check_libraries
70
76
  puts "Library check failed."
71
77
  return 1
@@ -50,9 +50,21 @@ class LogStash::Filters::Grok < LogStash::Filters::Base
50
50
 
51
51
  if match
52
52
  match.each_capture do |key, value|
53
+ match_type = nil
53
54
  if key.include?(":")
54
- key = key.split(":")[1]
55
+ name, key, match_type = key.split(":")
55
56
  end
57
+
58
+ # http://code.google.com/p/logstash/issues/detail?id=45
59
+ # Permit typing of captures by giving an additional colon and a type,
60
+ # like: %{FOO:name:int} for int coercion.
61
+ case match_type
62
+ when "int"
63
+ value = value.to_i
64
+ when "float"
65
+ value = value.to_f
66
+ end
67
+
56
68
  if event.message == value
57
69
  # Skip patterns that match the entire line
58
70
  @logger.debug("Skipping capture '#{key}' since it matches the whole line.")
@@ -65,7 +77,9 @@ class LogStash::Filters::Grok < LogStash::Filters::Base
65
77
  event.fields[key] = []
66
78
  end
67
79
 
68
- if value && !value.empty?
80
+ # If value is not nil, or responds to empty and is not empty, add the
81
+ # value to the event.
82
+ if !value.nil? && (!value.empty? rescue true)
69
83
  event.fields[key] << value
70
84
  end
71
85
  end
@@ -3,9 +3,10 @@ require "logstash/inputs/base"
3
3
  require "logstash/namespace"
4
4
  require "mq" # rubygem 'amqp'
5
5
  require "uuidtools" # rubygem 'uuidtools'
6
+ require "cgi"
6
7
 
7
8
  class LogStash::Inputs::Amqp < LogStash::Inputs::Base
8
- MQTYPES = [ "fanout", "queue", "topic" ]
9
+ MQTYPES = [ "fanout", "direct", "topic" ]
9
10
 
10
11
  public
11
12
  def initialize(url, type, config={}, &block)
@@ -13,39 +14,47 @@ class LogStash::Inputs::Amqp < LogStash::Inputs::Base
13
14
 
14
15
  @mq = nil
15
16
 
16
- # Handle path /<type>/<name>
17
- unused, @mqtype, @name = @url.path.split("/", 3)
18
- if @mqtype == nil or @name == nil
19
- raise "amqp urls must have a path of /<type>/name where <type> is #{MQTYPES.join(", ")}"
17
+ # Handle path /<vhost>/<type>/<name> or /<type>/<name>
18
+ # vhost allowed to contain slashes
19
+ if @url.path =~ %r{^/((.*)/)?([^/]+)/([^/]+)}
20
+ unused, @vhost, @mqtype, @name = $~.captures
21
+ else
22
+ raise "amqp urls must have a path of /<type>/name or /vhost/<type>/name where <type> is #{MQTYPES.join(", ")}"
20
23
  end
21
24
 
22
25
  if !MQTYPES.include?(@mqtype)
23
- raise "Invalid type '#{@mqtype}' must be one of #{MQTYPES.JOIN(", ")}"
26
+ raise "Invalid type '#{@mqtype}' must be one of #{MQTYPES.join(", ")}"
24
27
  end
25
28
  end # def initialize
26
29
 
27
30
  public
28
31
  def register
29
32
  @logger.info("Registering input #{@url}")
33
+ query_args = @url.query ? CGI.parse(@url.query) : {}
30
34
  amqpsettings = {
35
+ :vhost => (@vhost or "/"),
31
36
  :host => @url.host,
32
37
  :port => (@url.port or 5672),
33
38
  }
34
39
  amqpsettings[:user] = @url.user if @url.user
35
40
  amqpsettings[:pass] = @url.password if @url.password
41
+ amqpsettings[:logging] = query_args.include? "debug"
42
+ queue_name = ((@urlopts["queue"].nil? or @urlopts["queue"].empty?) ? "logstash-#{@name}" : @urlopts["queue"])
43
+ @logger.debug("Connecting with AMQP settings #{amqpsettings.inspect} to set up #{@mqtype.inspect} queue #{queue_name} on exchange #{@name.inspect}")
36
44
  @amqp = AMQP.connect(amqpsettings)
37
45
  @mq = MQ.new(@amqp)
38
46
  @target = nil
39
47
 
40
- @target = @mq.queue(UUIDTools::UUID.timestamp_create)
48
+ @durable_exchange = @urlopts["durable_exchange"] ? true : false
49
+ @durable_queue = @urlopts["durable_queue"] ? true : false
50
+ @target = @mq.queue(queue_name, :durable => @durable_queue)
41
51
  case @mqtype
42
52
  when "fanout"
43
- #@target.bind(MQ.fanout(@url.path, :durable => true))
44
- @target.bind(@mq.fanout(@name))
53
+ @target.bind(@mq.fanout(@name, :durable => @durable_exchange))
45
54
  when "direct"
46
- @target.bind(@mq.direct(@name))
55
+ @target.bind(@mq.direct(@name, :durable => @durable_exchange))
47
56
  when "topic"
48
- @target.bind(@mq.topic(@name))
57
+ @target.bind(@mq.topic(@name, :durable => @durable_exchange))
49
58
  end # case @mqtype
50
59
 
51
60
  @target.subscribe(:ack => true) do |header, message|
@@ -2,4 +2,5 @@ module LogStash
2
2
  module Inputs; end
3
3
  module Outputs; end
4
4
  module Filters; end
5
+ module Search; end
5
6
  end # module LogStash
@@ -2,52 +2,72 @@ require "amqp" # rubygem 'amqp'
2
2
  require "logstash/outputs/base"
3
3
  require "logstash/namespace"
4
4
  require "mq" # rubygem 'amqp'
5
+ require "cgi"
5
6
 
6
7
  class LogStash::Outputs::Amqp < LogStash::Outputs::Base
7
- MQTYPES = [ "fanout", "queue", "topic" ]
8
+ MQTYPES = [ "fanout", "direct", "topic" ]
8
9
 
9
10
  public
10
11
  def initialize(url, config={}, &block)
11
12
  super
12
13
 
13
- # Handle path /<type>/<name>
14
- unused, @mqtype, @name = @url.path.split("/", 3)
15
- if @mqtype == nil or @name == nil
16
- raise "amqp urls must have a path of /<type>/name where <type> is #{MQTYPES.join(", ")}"
14
+ @mq = nil
15
+ @bulk_prefix = nil
16
+
17
+ # Handle path /<vhost>/<type>/<name> or /<type>/<name>
18
+ # vhost allowed to contain slashes
19
+ if @url.path =~ %r{^/((.*)/)?([^/]+)/([^/]+)}
20
+ unused, @vhost, @mqtype, @name = $~.captures
21
+ else
22
+ raise "amqp urls must have a path of /<type>/name or /vhost/<type>/name where <type> is #{MQTYPES.join(", ")}"
17
23
  end
18
24
 
19
25
  if !MQTYPES.include?(@mqtype)
20
- raise "Invalid type '#{@mqtype}' must be one #{MQTYPES.join(", ")}"
26
+ raise "Invalid type '#{@mqtype}' must be one of #{MQTYPES.join(", ")}"
21
27
  end
22
28
  end # def initialize
23
29
 
24
30
  public
25
31
  def register
26
32
  @logger.info("Registering output #{@url}")
33
+ query_args = @url.query ? CGI.parse(@url.query) : {}
27
34
  amqpsettings = {
35
+ :vhost => (@vhost or "/"),
28
36
  :host => @url.host,
29
37
  :port => (@url.port or 5672),
30
38
  }
31
39
  amqpsettings[:user] = @url.user if @url.user
32
40
  amqpsettings[:pass] = @url.password if @url.password
41
+ amqpsettings[:logging] = query_args.include? "debug"
42
+ @logger.debug("Connecting with AMQP settings #{amqpsettings.inspect} to set up #{@mqtype.inspect} exchange #{@name.inspect}")
33
43
  @amqp = AMQP.connect(amqpsettings)
34
44
  @mq = MQ.new(@amqp)
35
45
  @target = nil
36
46
 
47
+ if @urlopts.include? "es_index" and @urlopts.include? "es_type"
48
+ @bulk_prefix = { "index" => { "_index" => @urlopts["es_index"], "_type" => @urlopts["es_type"] } }.to_json + "\n"
49
+ @logger.debug "Preparing ElasticSearch bulk API header for injection: #{@bulk_prefix.inspect}"
50
+ end
51
+
52
+ @durable = @urlopts["durable"] ? true : false
37
53
  case @mqtype
38
54
  when "fanout"
39
- @target = @mq.fanout(@name)
40
- when "queue"
41
- @target = @mq.queue(@name, :durable => @urlopts["durable"] ? true : false)
55
+ @target = @mq.fanout(@name, :durable => @durable)
56
+ when "direct"
57
+ @target = @mq.direct(@name, :durable => @durable)
42
58
  when "topic"
43
- @target = @mq.topic(@name)
59
+ @target = @mq.topic(@name, :durable => @durable)
44
60
  end # case @mqtype
45
61
  end # def register
46
62
 
47
63
  public
48
64
  def receive(event)
49
65
  @logger.debug(["Sending event", { :url => @url, :event => event }])
50
- @target.publish(event.to_json)
66
+ if @bulk_prefix
67
+ @target.publish(@bulk_prefix + event.to_json + "\n")
68
+ else
69
+ @target.publish(event.to_json)
70
+ end
51
71
  end # def receive
52
72
 
53
73
  # This is used by the ElasticSearch AMQP/River output.
@@ -2,6 +2,7 @@ require "em-http-request"
2
2
  require "logstash/namespace"
3
3
  require "logstash/outputs/amqp"
4
4
  require "logstash/outputs/base"
5
+ require "cgi"
5
6
 
6
7
  class LogStash::Outputs::Elasticsearch < LogStash::Outputs::Base
7
8
  public
@@ -41,6 +42,9 @@ class LogStash::Outputs::Elasticsearch < LogStash::Outputs::Base
41
42
  }, # "settings"
42
43
  } # ES Index
43
44
 
45
+ #puts :waiting
46
+ puts @esurl.to_s
47
+ #sleep 10
44
48
  indexurl = @esurl.to_s
45
49
  indexmap_http = EventMachine::HttpRequest.new(indexurl)
46
50
  indexmap_req = indexmap_http.put :body => indexmap.to_json
@@ -49,41 +53,61 @@ class LogStash::Outputs::Elasticsearch < LogStash::Outputs::Base
49
53
  ready(params)
50
54
  end
51
55
  indexmap_req.errback do
52
- @logger.warn(["Failure configuring index", @esurl.to_s, indexmap])
56
+ @logger.warn(["Failure configuring index (http failed to connect?)",
57
+ @esurl.to_s, indexmap])
58
+ @logger.warn([indexmap_req])
59
+ #sleep 30
53
60
  raise "Failure configuring index: #{@esurl.to_s}"
61
+
54
62
  end
55
63
  end # def register
56
64
 
57
65
  public
58
66
  def ready(params)
59
- case params["method"]
67
+ method = params.delete("method")
68
+ case method
60
69
  when "http"
61
70
  @logger.debug "ElasticSearch using http with URL #{@url.to_s}"
62
71
  @http = EventMachine::HttpRequest.new(@url.to_s)
63
72
  @callback = self.method(:receive_http)
64
73
  when "river"
65
- params["port"] ||= 5672
66
- auth = "#{params["user"] or "guest"}:#{params["pass"] or "guest"}"
67
- mq_url = URI::parse("amqp://#{auth}@#{params["host"]}:#{params["port"]}/queue/#{params["queue"]}?durable=1")
74
+ river_type = params.delete("type") || "rabbitmq"
75
+ amqp_host = params.delete("host") || 'localhost'
76
+ amqp_port = params.delete("port") || 5672
77
+ amqp_exchange_type = params.delete("exchange_type") || "direct"
78
+ amqp_queue_name = params.delete("queue") || "es"
79
+ amqp_exchange_name = params.delete("exchange") || amqp_queue_name
80
+ amqp_exchange_durable = (params["durable"] || "false") =~ /^[ty1]/
81
+ amqp_user = params.delete("user") or "guest"
82
+ amqp_pass = params.delete("pass") or "guest"
83
+ amqp_vhost = params.delete("vhost") || "/"
84
+ vhost_str = (amqp_vhost == "/") ? "" : "/#{amqp_vhost}"
85
+ qs = params.map {|k,v| "#{CGI.escape(k)}=#{CGI.escape(v)}"}.join("&")
86
+ mq_url = URI::parse("amqp://#{amqp_user}:#{amqp_pass}@#{amqp_host}:#{amqp_port}#{vhost_str}/#{amqp_exchange_type}/#{amqp_exchange_name}?#{qs}")
68
87
  @mq = LogStash::Outputs::Amqp.new(mq_url.to_s)
69
88
  @mq.register
70
89
  @callback = self.method(:receive_river)
71
90
  em_url = URI.parse("http://#{@url.host}:#{@url.port}/_river/logstash#{@url.path.tr("/", "_")}/_meta")
72
91
  unused, @es_index, @es_type = @url.path.split("/", 3)
73
92
 
74
- river_config = {"type" => params["type"],
75
- params["type"] => {"host" => params["host"],
76
- "user" => params["user"],
77
- "port" => params["port"],
78
- "pass" => params["pass"],
79
- "vhost" => params["vhost"],
80
- "queue" => params["queue"],
81
- "exchange" => params["queue"],
82
- },
83
- "index" => {"bulk_size" => 100,
84
- "bulk_timeout" => "10ms",
85
- },
86
- }
93
+ river_config = {
94
+ "type" => river_type,
95
+ river_type => {
96
+ "host" => amqp_host,
97
+ "user" => amqp_user,
98
+ "port" => amqp_port,
99
+ "pass" => amqp_pass,
100
+ "vhost" => amqp_vhost,
101
+ "queue" => amqp_queue_name,
102
+ "exchange" => amqp_exchange_name,
103
+ "exchange_durable" => amqp_exchange_durable ? "true" : "false",
104
+ "exchange_type" => amqp_exchange_type,
105
+ },
106
+ "index" => {
107
+ "bulk_size" => 100,
108
+ "bulk_timeout" => "10ms",
109
+ },
110
+ }
87
111
  @logger.debug(["ElasticSearch using river", river_config])
88
112
  http_setup = EventMachine::HttpRequest.new(em_url.to_s)
89
113
  req = http_setup.put :body => river_config.to_json
@@ -91,7 +115,7 @@ class LogStash::Outputs::Elasticsearch < LogStash::Outputs::Base
91
115
  @logger.warn "Error setting up river: #{req.response}"
92
116
  end
93
117
  @callback = self.method(:receive_river)
94
- else raise "unknown elasticsearch method #{params["method"].inspect}"
118
+ else raise "unknown elasticsearch method #{method.inspect}"
95
119
  end
96
120
 
97
121
  #receive(LogStash::Event.new({
@@ -0,0 +1,39 @@
1
+
2
+ require "logstash/namespace"
3
+ require "logstash/logging"
4
+ require "logstash/event"
5
+
6
+ class LogStash::Search::Base
7
+ # Do a search.
8
+ #
9
+ # This method is async. You can expect a block and therefore
10
+ # should yield a result, not return one.
11
+ #
12
+ # Implementations should yield a LogStash::Search::Result
13
+ # LogStash::Search::Result#events must be an array of LogStash::Event
14
+ def search(query)
15
+ raise "The class #{self.class.name} must implement the 'search' method."
16
+ end # def search
17
+
18
+ # Yields a histogram by field of a query.
19
+ #
20
+ # This method is async. You should expect a block to be passed and therefore
21
+ # should yield a result, not return one.
22
+ #
23
+ # Implementations should yield a LogStash::Search::FacetResult::Histogram
24
+ def histogram(query, field, interval=nil)
25
+ raise "The class #{self.class.name} must implement the 'histogram' method."
26
+ end
27
+
28
+ # Returns a list of popular terms from a query
29
+ # TODO(sissel): Implement
30
+ def popular_terms(query, fields, count=10)
31
+ raise "The class #{self.class.name} must implement the 'popular_terms' method."
32
+ end
33
+
34
+ # Count the results given by a query.
35
+ def count(query)
36
+ raise "The class #{self.class.name} must implement the 'count' method."
37
+ end
38
+
39
+ end # class LogStash::Search::Base
@@ -0,0 +1,196 @@
1
+
2
+ require "em-http-request"
3
+ require "logstash/namespace"
4
+ require "logstash/logging"
5
+ require "logstash/event"
6
+ require "logstash/search/base"
7
+ require "logstash/search/query"
8
+ require "logstash/search/result"
9
+ require "logstash/search/facetresult"
10
+ require "logstash/search/facetresult/histogram"
11
+
12
+ class LogStash::Search::ElasticSearch < LogStash::Search::Base
13
+ public
14
+ def initialize(settings={})
15
+ @host = (settings[:host] || "localhost")
16
+ @port = (settings[:port] || 9200).to_i
17
+ @logger = LogStash::Logger.new(STDOUT)
18
+ end
19
+
20
+ # See LogStash::Search;:Base#search
21
+ public
22
+ def search(query)
23
+ raise "No block given for search call." if !block_given?
24
+ if query.is_a?(String)
25
+ query = LogStash::Search::Query.parse(query)
26
+ end
27
+
28
+ # TODO(sissel): only search a specific index?
29
+ http = EventMachine::HttpRequest.new("http://#{@host}:#{@port}/_search")
30
+
31
+ @logger.info(["Query", query])
32
+ esreq = {
33
+ "sort" => [
34
+ { "@timestamp" => "desc" }
35
+ ],
36
+ "query" => {
37
+ "query_string" => {
38
+ "query" => query.query_string,
39
+ "default_operator" => "AND"
40
+ } # query_string
41
+ }, # query
42
+ "from" => query.offset,
43
+ "size" => query.count
44
+ } # elasticsearch request
45
+
46
+ @logger.info("ElasticSearch Query: #{esreq.to_json}")
47
+ start_time = Time.now
48
+ req = http.get :body => esreq.to_json
49
+ result = LogStash::Search::Result.new
50
+ req.callback do
51
+ data = JSON.parse(req.response)
52
+ result.duration = Time.now - start_time
53
+
54
+ hits = data["hits"]["hits"] rescue nil
55
+
56
+ if hits.nil? or !data["error"].nil?
57
+ # Use the error message if any, otherwise, return the whole
58
+ # data object as json as the error message for debugging later.
59
+ result.error_message = (data["error"] rescue false) || data.to_json
60
+ yield result
61
+ next
62
+ end
63
+
64
+ @logger.info(["Got search results",
65
+ { :query => query.query_string, :duration => data["duration"],
66
+ :result_count => hits.size }])
67
+ if req.response_header.status != 200
68
+ result.error_message = data["error"] || req.inspect
69
+ @error = data["error"] || req.inspect
70
+ end
71
+
72
+ # We want to yield a list of LogStash::Event objects.
73
+ hits.each do |hit|
74
+ result.events << LogStash::Event.new(hit["_source"])
75
+ end
76
+
77
+ # Total hits this search could find if not limited
78
+ result.total = data["hits"]["total"]
79
+ result.offset = query.offset
80
+
81
+ yield result
82
+ end
83
+
84
+ req.errback do
85
+ @logger.warn(["Query failed", query, req, req.response])
86
+ result.duration = Time.now - start_time
87
+ result.error_message = req.response
88
+ #yield result
89
+
90
+ yield({ "error" => req.response })
91
+ end
92
+ end # def search
93
+
94
+ # See LogStash::Search;:Base#histogram
95
+ public
96
+ def histogram(query, field, interval=nil)
97
+ if query.is_a?(String)
98
+ query = LogStash::Search::Query.parse(query)
99
+ end
100
+
101
+ # TODO(sissel): only search a specific index?
102
+ http = EventMachine::HttpRequest.new("http://#{@host}:#{@port}/_search")
103
+
104
+ @logger.info(["Query", query])
105
+ histogram_settings = {
106
+ "field" => field
107
+ }
108
+
109
+ if !interval.nil? && interval.is_a?(Numeric)
110
+ histogram_settings["interval"] = interval
111
+ end
112
+
113
+ esreq = {
114
+ "query" => {
115
+ "query_string" => {
116
+ "query" => query.query_string,
117
+ "default_operator" => "AND"
118
+ } # query_string
119
+ }, # query
120
+ "from" => 0,
121
+ "size" => 0,
122
+ "facets" => {
123
+ "amazingpants" => { # just a name for this histogram...
124
+ "histogram" => histogram_settings,
125
+ },
126
+ },
127
+ } # elasticsearch request
128
+
129
+ @logger.info("ElasticSearch Facet Query: #{esreq.to_json}")
130
+ start_time = Time.now
131
+ req = http.get :body => esreq.to_json
132
+ result = LogStash::Search::FacetResult.new
133
+ req.callback do
134
+ data = JSON.parse(req.response)
135
+ result.duration = Time.now - start_time
136
+
137
+ @logger.info(["Got search results",
138
+ { :query => query.query_string, :duration => data["duration"] }])
139
+ if req.response_header.status != 200
140
+ result.error_message = data["error"] || req.inspect
141
+ @error = data["error"] || req.inspect
142
+ end
143
+
144
+ entries = data["facets"]["amazingpants"]["entries"] rescue nil
145
+
146
+ if entries.nil? or !data["error"].nil?
147
+ # Use the error message if any, otherwise, return the whole
148
+ # data object as json as the error message for debugging later.
149
+ result.error_message = (data["error"] rescue false) || data.to_json
150
+ yield result
151
+ next
152
+ end
153
+ entries.each do |entry|
154
+ # entry is a hash of keys 'total', 'mean', 'count', and 'key'
155
+ hist_entry = LogStash::Search::FacetResult::Histogram.new
156
+ hist_entry.key = entry["key"]
157
+ hist_entry.count = entry["count"]
158
+ result.results << hist_entry
159
+ end # for each histogram result
160
+ yield result
161
+ end # request callback
162
+
163
+ req.errback do
164
+ @logger.warn(["Query failed", query, req, req.response])
165
+ result.duration = Time.now - start_time
166
+ result.error_message = req.response
167
+ yield result
168
+ #yield({ "error" => req.response })
169
+ end
170
+ end
171
+
172
+ # Not used. Needs refactoring elsewhere.
173
+ private
174
+ def __anonymize
175
+ # TODO(sissel): Plugin-ify this (Search filters!)
176
+ # TODO(sissel): Implement
177
+ # Search anonymization
178
+ #require "digest/md5"
179
+ #data["hits"]["hits"].each do |hit|
180
+ [].each do |hit|
181
+ event = LogStash::Event.new(hit["_source"])
182
+ event.to_hash.each do |key, value|
183
+ next unless value.is_a?(String)
184
+ value.gsub!(/[^ ]+\.loggly\.net/) { |match| "loggly-" + Digest::MD5.hexdigest(match)[0..6] + ".example.com"}
185
+ end
186
+
187
+ event.fields.each do |key, value|
188
+ value = [value] if value.is_a?(String)
189
+ next unless value.is_a?(Array)
190
+ value.each do |v|
191
+ v.gsub!(/[^ ]+\.loggly\.net/) { |match| "loggly-" + Digest::MD5.hexdigest(match)[0..6] + ".example.com"}
192
+ end # value.each
193
+ end # hit._source.@fields.each
194
+ end # data.hits.hits.each
195
+ end # def __anonymize
196
+ end # class LogStash::Search::ElasticSearch