RubyGems - syslogstash - Versions diffs - 1.3.0 → 2.1.0 - Mend

syslogstash 1.3.0 → 2.1.0

Files changed (13) hide show

checksums.yaml +4 -4
data/Dockerfile +13 -0
data/Makefile +14 -0
data/README.md +165 -50
data/bin/syslogstash +43 -38
data/lib/syslogstash.rb +20 -19
data/lib/syslogstash/config.rb +118 -0
data/lib/syslogstash/logstash_writer.rb +96 -28
data/lib/syslogstash/prometheus_exporter.rb +18 -41
data/lib/syslogstash/syslog_reader.rb +38 -39
data/syslogstash.gemspec +1 -1
metadata +8 -6
data/lib/syslogstash/worker.rb +0 -34

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 4fae50a372aca78fab4d20cb3a21ff4248988abf
-  data.tar.gz: 391ae0a267341f5571e9c90427e83b2862628c2f
+  metadata.gz: 8dd70d42345ffba77d56d3511d5220e2ab399ce9
+  data.tar.gz: 98f6175cd0cc98d9ca6a51593657b7e3c09f178c
 SHA512:
-  metadata.gz: 6b0ab3566b3ce68964cfcb34be6efbdc3799394c76e72d672c53bb9a66e723e888df989c8f39155662d8151f6dbfd3b1915a08d212c92d8ca33cc4e0ad20bacc
-  data.tar.gz: 8798ef4d1a150cbf105028e5b6d7dcd9d72ed8309f0e998da6f3d0e8989b0e47016b9a9ba02f018d031d627687f28ccdf8a9450f622efcdf1763cef07dead38c
+  metadata.gz: 9fe1db70bce8d062dbc84b3c24e5650ed489b9ff89f7502225b2673f848e3c4abde63d6369cd14b3235f1adb9bfb15ceb8b217eaf4392a52badbfdd906ecbf03
+  data.tar.gz: 1e4163fa6ccd9c6f6402be87d571f1d67e80584935e7a7772084f3c3dc968f321acbdd0f2ecf7065f9b5f295825d2dd9f997189f7866cd0b9847dc03152a3224

data/Dockerfile ADDED Viewed

@@ -0,0 +1,13 @@
+FROM ruby:2.3-alpine
+ARG GEM_VERSION="> 0"
+COPY pkg/syslogstash-$GEM_VERSION.gem /tmp/syslogstash.gem
+RUN apk update \
+	&& apk add build-base \
+	&& gem install /tmp/syslogstash.gem \
+	&& apk del build-base \
+	&& rm -f /var/cache/apk/* /tmp/syslogstash.gem
+ENTRYPOINT ["/usr/local/bundle/bin/syslogstash"]

data/Makefile ADDED Viewed

@@ -0,0 +1,14 @@
+IMAGE := discourse/syslogstash
+TAG := $(shell date -u +%Y%m%d.%H%M%S)
+.PHONY: default
+default: push
+	@printf "${IMAGE}:${TAG} ready\n"
+.PHONY: push
+push: build
+	docker push ${IMAGE}:${TAG}
+.PHONY: build
+build:
+	docker build --build-arg=http_proxy=${http_proxy} -t ${IMAGE}:${TAG} .

data/README.md CHANGED Viewed

@@ -1,4 +1,21 @@
-Feed everything from one or more syslog pipes to a logstash server.
+Syslogstash is intended to provide a syslog-compatible socket for one or
+more applications to send their syslog messages to.  The messages are then
+parsed and sent to a logstash server for posterity.  No more needing to run
+a syslog server that writes to a file, just to have a second program that
+reads those files again.  With syslogstash, everything is in one neat little
+package.
+If you're running a containerised environment, there's a reasonable chance
+you've got multiple things that want to log to syslog, but you want to keep
+them organised and separate.  That's easy: just run multiple syslogstash
+instances, one per "virtual syslog socket" you want to provide.  Multiple
+containers can share the same socket, they'll just share a logstash
+connection and have the same metadata / extra tags.
+For maximum flexibility, you can optionally feed the syslog messages to one
+or more other "downstream" sockets, and/or print all the log messages to
+stdout for ad-hoc "local" debugging.
 # Installation
@@ -17,74 +34,172 @@ If you're the sturdy type that likes to run from git:
 Or, if you've eschewed the convenience of Rubygems entirely, then you
 presumably know what to do already.
+## Docker
+Published image at https://hub.docker.com/r/discourse/syslogstash/
+To build a new Docker image, run `rake docker:build`.  A `rake docker:push`
+will push out a new release.
 # Usage
-Write a configuration file, then start `syslogstash` giving the name of the
-config file as an argument:
+Syslogstash is configured by means of environment variables.  At the very
+least, `syslogstash` needs to know where logstash is (`LOGSTASH_SERVER`),
+and the socket to listen on for syslog messages (`SYSLOG_SOCKET`).  You
+specify those on the command line, like so:
+    LOGSTASH_SERVER=logstash-json \
+      SYSLOG_SOCKET=/dev/log \
+      syslogstash
+The full set of environment variables, and their meaning, is described in
+the "Syslogstash Configuration" section, below.
+## Logstash server setup
-    syslogstash /etc/syslogstash.conf
+The logstash server(s) you send the collected messages to must be configured
+to listen on a TCP port with the `json_lines` codec.  This can be done quite
+easily as follows:
-## Config File Format
+      tcp {
+        port  => 5151
+        codec => "json_lines"
+      }
-The file which describes how `syslogstash` will operate is a fairly simple
-YAML file.  It consists of two sections, `sockets` and `servers`, which list
-the UNIX sockets to listen for syslog messages on, and the URLs of logstash
-servers to send the resulting log entries to.  Optionally, you can specify
-additional fields to insert into every message received from each syslog
-socket.
+Adjust the port number to taste.
-It looks like this:
-    sockets:
-      # These sockets have no additional fields
-      /tmp/sock1:
-      /tmp/sock2:
+## Signals
-      # This socket will have some fields added to its messages, and will
-      # send all messages to a couple of other sockets, too
-      /tmp/supersock:
-        add_fields:
-          foo: bar
-          baz: wombat
-        relay_to:
-          - /tmp/relaysock1
-          - /tmp/relaysock2
+There are a few signals that syslogstash recognises, to control various
+aspects of runtime operation.  They are:
-    # Every log entry received will be sent to *exactly* one of these
-    # servers.  This provides high availability for your log messages.
-    # NOTE: Only tcp:// URLs are supported.
-    servers:
-      - tcp://10.0.0.1:5151
-      - tcp://10.0.0.2:5151
+* **`SIGUSR1`** / **`SIGUSR2`** -- tell syslogstash to increase (`USR1`) or
+  decrease (`USR2`) the verbosity of its own internal logging.  This doesn't
+  change in *any* way the nature or volume of syslog messages that are
+  processed and sent to logstash, it is *only* for syslogstash's own internal
+  operational logging.
+* **`SIGURG`** -- toggle whether or not relaying to stdout is enabled or
+  disabled.
-### Socket configuration
-Each socket has a configuration associated with it.  Using this
-configuration, you can add logstash fields to each entry, and configure
-socket relaying.
+## Use with Docker
-The following keys are available under each socket's path:
+For convenience, `syslogstash` is available in a Docker container,
+`discourse/syslogstash:v2`.  It requires a bit of gymnastics to get the
+syslog socket from the `syslogstash` container to whatever container you
+want to capture syslog messages from.  Typically, you'll want to share a
+volume between the two containers, tell `syslogstash` to create its socket
+there, and then symlink `/dev/log` from the other container to there.
-* `add_fields` -- A hash of additional fields to add to every log entry that
-  is received on this socket, before it is passed on to logstash.
+For example, you might start the syslogstash container like this:
-* `relay_to` -- A list of sockets to send all received messages to.  This is
-  useful in a very limited range of circumstances, when (for instance) you
-  have another syslog socket consumer that wants to get in on the act, like
-  a legacy syslogd.
+    docker run -v /srv/docker/syslogstash:/syslogstash \
+      -e LOGSTASH_SERVER=logstash-json \
+      -e SYSLOG_SOCKET=/syslogstash/log.sock \
+      discourse/syslogstash:v2
+Then use the same volume in your other container:
-## Logstash server configuration
+    docker run -v /srv/docker/syslogstash:/syslogstash something/funny
-You'll need to setup a TCP input, with the `json_lines` codec, for
-`syslogstash` to send log entries to.  It can look as simple as this:
+In the other container's startup script, include the following command:
-      tcp {
-        port  => 5151
-        codec => "json_lines"
-      }
+    ln -sf /syslogstash/log.sock /dev/log
+... and everything will work nicely.
+If you feel like playing on nightmare mode, you can also mount the log
+socket directly into the other container, like this:
+    docker run -v /srv/docker/syslogstash/log.sock:/dev/log something/funny
+This allows you to deal with poorly-implemented containers which run
+software that logs to syslog but doesn't provide a way to override where
+`/dev/log` points.  *However*, due to the way bind mounts and Unix sockets
+interact, if the syslogstash container restarts *for any reason*, you also
+need to restart any containers that have the socket itself as a volume.  If
+you can coax your container management system into satisfying that
+condition, then you're golden.
+# Syslogstash Configuration
+All configuration of syslogstash is done by placing values in environment
+variables.  The environment variables that syslogstash recognises are listed
+below.
+* **`LOGSTASH_SERVER`** (required) -- the domain name or address of the
+  logstash server(s) you wish to send entries to.  This can be any of:
+  * An IPv4 address and port, separated by a colon.  For example,
+    `192.0.2.42:5151`.  The port *must* be specified.
+  * An IPv6 address (enclosed in square brackets) and port, separated by a
+    colon.  For example, `[2001:db8::42]:5151`.  The port *must* be
+    specified.
+  * A fully-qualified or relative domain name and port, separated by a
+    colon.  The name given will be resolved and all IPv4 and IPv6
+    addresses returned will be tried in random order until a successful
+    connection is made to one of them.  The port *must* be specified.
+  * A fully-qualified or relative domain name *without a port*.  In this
+    case, the name given will be resolved as a SRV record, and the names and
+    ports returned will be used.
+  In all cases, syslogstash respects DNS record TTLs and SRV record
+  weight/priority selection rules.  We're not monsters.
+* **`SYSLOG_SOCKET`** (required) -- the absolute path to the socket which
+  syslogstash should create and listen on for syslog format messages.
+* **`BACKLOG_SIZE`** (optional; default `"1000000"`) -- the maximum number of
+  messages to queue if the logstash servers are unavailable.  Under normal
+  operation, syslog messages are immediately relayed to the logstash server
+  as they are received.  However, if no logstash servers are available,
+  syslogstash will maintain a backlog of up to this many syslog messages,
+  and will send the entire backlog once a logstash server becomes available
+  again.
+    In the event that the queue size limit is reached, the oldest messages
+    will be dropped to make way for the new ones.
+* **`RELAY_TO_STDOUT`** (optional; default `"no"`) -- if set to a
+  true-ish string (any of `true`, `yes`, `on`, or `1`, compared
+  case-insensitively), then all the syslog messages which are received will
+  be printed to stdout (with the priority/facility prefix removed).  This
+  isn't a replacement for a fully-featured syslog server, merely a quick way
+  to dump messages if absolutely required.
+* **`STATS_SERVER`** (optional; default `"no"`) -- if set to a true-ish
+  string (any of `true`, `yes`, `on`, or `1`, compared case-insensitively),
+  then a Prometheus-compatible statistics exporter will be started,
+  listening on all interfaces on port 9159.
+* **`ADD_FIELD_<name>`** (optional) -- if you want to add extra fields to
+  the entries which are forwarded to logstash, you can specify them here,
+  for example:
+        ADD_FIELD_foo=bar ADD_FIELD_baz=wombat [...] syslogstash
+    This will cause all entries sent to logstash to contain `"foo": "bar"`
+    and `"baz": "wombat"`, in addition to the rest of the fields usually
+    created by syslogstash.  Note that nested fields, and value types other
+    than strings, are not supported.  Also, if you specify a field name also
+    used by syslogstash, the results are explicitly undefined.
+* **`RELAY_SOCKETS`** (optional; default `""`) -- on the off-chance you want
+  to feed the syslog messages that syslogstash receives to another
+  syslog-compatible consumer (say, an old-school syslogd) you can specify
+  additional filenames to use here.  Multiple socket filenames can be
+  specified by separating each file name with a colon.  Syslogstash will open
+  each of the specified sockets, if they exist, and write each received
+  message to the socket.  If the socket does not exist, or the open or write
+  operations fail, syslogstash **will not** retry.
 # Contributing
@@ -100,7 +215,7 @@ request](https://github.com/discourse/syslogstash/pulls].
 Unless otherwise stated, everything in this repo is covered by the following
 copyright notice:
-    Copyright (C) 2015 Civilized Discourse Construction Kit Inc.
+    Copyright (C) 2015, 2018 Civilized Discourse Construction Kit Inc.
     This program is free software: you can redistribute it and/or modify it
     under the terms of the GNU General Public License version 3, as

data/bin/syslogstash CHANGED Viewed

@@ -1,51 +1,56 @@
 #!/usr/bin/env ruby
 require 'syslogstash'
-require 'yaml'
+require 'logger'
-if ARGV.length != 1
-	$stderr.puts <<-EOF.gsub(/^\t\t/, '')
-		Invalid usage
+logger = Logger.new($stderr)
+logger.formatter = ->(s, t, p, m) { "#{s[0]} [#{p}] #{m}\n" }
+logger.level = Logger.const_get(ENV['SYSLOGSTASH_LOG_LEVEL'] || "INFO")
-		Usage:
-		  #{$0} <configfile>
-	EOF
-	exit 1
-end
-unless File.exist?(ARGV[0])
-	$stderr.puts "Config file #{ARGV[0]} does not exist"
-	exit 1
+begin
+  cfg = Syslogstash::Config.new(ENV, logger: logger)
+rescue Syslogstash::Config::ConfigurationError => ex
+  $stderr.puts "Error in configuration: #{ex.message}"
+  exit 1
 end
-unless File.readable?(ARGV[0])
-	$stderr.puts "Config file #{ARGV[0]} not readable"
-	exit 1
-end
+syslogstash = Syslogstash.new(cfg)
-cfg = YAML.load_file(ARGV[0])
+sig_r, sig_w = IO.pipe
-unless cfg.is_a? Hash
-	$stderr.puts "Config file #{ARGV[0]} does not contain a YAML hash"
-	exit 1
+Signal.trap("USR1") do
+	sig_w.print '1'
+end
+Signal.trap("USR2") do
+	sig_w.print '2'
+end
+Signal.trap("URG") do
+	sig_w.print 'U'
 end
-%w{sockets servers}.each do |section|
-	unless cfg.has_key?(section)
-		$stderr.puts "Config file #{ARGV[0]} does not have a '#{section}' section"
-		exit 1
-	end
-	unless cfg[section].respond_to?(:empty?)
-		$stderr.puts "Config file #{ARGV[0]} has a malformed '#{section}' section"
-		exit 1
-	end
-	if cfg[section].empty?
-		$stderr.puts "Config file #{ARGV[0]} has an empty '#{section}' section"
-		exit 1
-	end
+Thread.new do
+  loop do
+    begin
+      c = sig_r.getc
+      if c == '1'
+        logger.level -= 1 unless logger.level == Logger::DEBUG
+        logger.info("SignalHandler") { "Received SIGUSR1; log level is now #{Logger::SEV_LABEL[logger.level]}." }
+      elsif c == '2'
+        logger.level += 1 unless logger.level == Logger::ERROR
+        logger.info("SignalHandler") { "Received SIGUSR2; log level is now #{Logger::SEV_LABEL[logger.level]}." }
+      elsif c == 'U'
+        cfg.relay_to_stdout = !cfg.relay_to_stdout
+        logger.info("SignalHandler") { "Received SIGURG; Relaying to stdout is now #{cfg.relay_to_stdout ? "enabled" : "disabled"}" }
+      else
+        logger.error("SignalHandler") { "Got an unrecognised character from signal pipe: #{c.inspect}" }
+      end
+    rescue StandardError => ex
+      logger.error("SignalHandler") { (["Exception raised: #{ex.message} (#{ex.class})"] + ex.backtrace).join("\n  ") }
+    rescue Exception => ex
+      $stderr.puts (["Fatal exception in syslogstash signal handler: #{ex.message} (#{ex.class})"] + ex.backtrace).join("\n  ")
+      exit 42
+    end
+  end
 end
-Syslogstash.new(cfg['sockets'], cfg['servers'], cfg.fetch('backlog', 1_000_000)).run
+syslogstash.run

data/lib/syslogstash.rb CHANGED Viewed

@@ -7,44 +7,45 @@ require 'thwait'
 # server.
 #
 class Syslogstash
-	def initialize(sockets, servers, backlog)
-		@metrics = PrometheusExporter.new
-		@writer = LogstashWriter.new(servers, backlog, @metrics)
-		@readers = sockets.map { |f, cfg| SyslogReader.new(f, cfg, @writer, @metrics) }
+	def initialize(cfg)
+		@cfg    = cfg
+		@stats  = PrometheusExporter.new(cfg)
+		@writer = LogstashWriter.new(cfg, @stats)
+		@reader = SyslogReader.new(cfg, @writer, @stats)
+		@logger = cfg.logger
 	end
 	def run
-		@metrics.run
-		@writer.run
-		@readers.each { |w| w.run }
+		if @cfg.stats_server
+			@logger.debug("main") { "Running stats server" }
+			@stats.run
+		end
-		tw = ThreadsWait.new(@metrics.thread, @writer.thread, *(@readers.map { |r| r.thread }))
+		@writer.run
+		@reader.run
-		dead_thread = tw.next_wait
+		dead_thread = ThreadsWait.new(@reader.thread, @writer.thread).next_wait
 		if dead_thread == @writer.thread
-			$stderr.puts "[Syslogstash] Writer thread crashed."
-		elsif dead_thread == @metrics.thread
-			$stderr.puts "[Syslogstash] Metrics exporter thread crashed."
+			@logger.error("main") { "Writer thread crashed." }
+		elsif dead_thread == @reader.thread
+			@logger.error("main") { "Reader thread crashed." }
 		else
-			reader = @readers.find { |r| r.thread == dead_thread }
-			$stderr.puts "[Syslogstash] Reader thread for #{reader.file} crashed."
+			@logger.fatal("main") { "ThreadsWait#next_wait returned unexpected value #{dead_thread.inspect}" }
+			exit 1
 		end
 		begin
 			dead_thread.join
 		rescue Exception => ex
-			$stderr.puts "[Syslogstash] Exception in thread was: #{ex.message} (#{ex.class})"
-			$stderr.puts ex.backtrace.map { |l| "  #{l}" }.join("\n")
+			@logger.error("main") { (["Exception in crashed thread was: #{ex.message} (#{ex.class})"] + ex.backtrace).join("\n  ") }
 		end
 		exit 1
 	end
 end
+require_relative 'syslogstash/config'
 require_relative 'syslogstash/syslog_reader'
 require_relative 'syslogstash/logstash_writer'
 require_relative 'syslogstash/prometheus_exporter'

data/lib/syslogstash/config.rb ADDED Viewed

@@ -0,0 +1,118 @@
+require 'logger'
+class Syslogstash::Config
+	class ConfigurationError < StandardError; end
+	# Raised if any problems were found with the config
+	class InvalidEnvironmentError < StandardError; end
+	attr_reader :logstash_server,
+	            :syslog_socket,
+	            :backlog_size,
+	            :stats_server,
+	            :add_fields,
+	            :relay_sockets
+	attr_reader :logger
+	attr_accessor :relay_to_stdout
+	# Create a new syslogstash config based on environment variables.
+	#
+	# Examines the environment passed in, and then creates a new config
+	# object if all is well.
+	#
+	# @param env [Hash] the set of environment variables to use.
+	#
+	# @param logger [Logger] the logger to which all diagnostic and error
+	#   data will be sent.
+	#
+	# @raise [ConfigurationError] if any problems are detected with the
+	#   environment variables found.
+	#
+	def initialize(env, logger:)
+		@logger = logger
+		parse_env(env)
+	end
+	private
+	def parse_env(env)
+		@logger.info("config") { "Parsing environment:\n" + env.map { |k, v| "#{k}=#{v.inspect}" }.join("\n") }
+		@logstash_server = pluck_string(env, "LOGSTASH_SERVER")
+		@syslog_socket   = pluck_string(env, "SYSLOG_SOCKET")
+		@relay_to_stdout = pluck_boolean(env, "RELAY_TO_STDOUT", default: false)
+		@stats_server    = pluck_boolean(env, "STATS_SERVER", default: false)
+		@backlog_size    = pluck_integer(env, "BACKLOG_SIZE", valid_range: 0..(2**31 - 1), default: 1_000_000)
+		@add_fields      = pluck_prefix_list(env, "ADD_FIELD_")
+		@relay_sockets   = pluck_path_list(env, "RELAY_SOCKETS", default: [])
+	end
+	def pluck_string(env, key, default: nil)
+		maybe_default(env, key, default) { env[key] }
+	end
+	def pluck_boolean(env, key, default: nil)
+		maybe_default(env, key, default) do
+			case env[key]
+			when /\A(no|off|0|false)\z/
+				false
+			when /\A(yes|on|1|true)\z/
+				true
+			else
+				raise ConfigurationError,
+				      "Value for #{key} (#{env[key].inspect}) is not a valid boolean"
+			end
+		end
+	end
+	def pluck_integer(env, key, valid_range: nil, default: nil)
+		maybe_default(env, key, default) do
+			if env[key] !~ /\A\d+\z/
+				raise InvalidEnvironmentError,
+				      "Value for #{key} (#{env[key].inspect}) is not an integer"
+			end
+			env[key].to_i.tap do |v|
+				unless valid_range.nil? || !valid_range.include?(v)
+					raise InvalidEnvironmentError,
+					      "Value for #{key} (#{env[key]}) out of range (must be between #{valid_range.first} and #{valid_range.last} inclusive)"
+				end
+			end
+		end
+	end
+	def pluck_prefix_list(env, prefix)
+		{}.tap do |list|
+			env.each do |k, v|
+				next unless k.start_with? prefix
+				key = k.sub(prefix, '')
+				list[key] = v
+			end
+			@logger.debug("config") { "Prefix list for #{prefix.inspect} is #{list.inspect}" }
+		end
+	end
+	def pluck_path_list(env, key, default: nil)
+		maybe_default(env, key, default) do
+			env[key].split(":")
+		end
+	end
+	def maybe_default(env, key, default)
+		if env[key].nil? || env[key].empty?
+			if default.nil?
+				raise ConfigurationError,
+				      "Required environment variable #{key} not specified"
+			else
+				@logger.debug("config") { "Using default value #{default.inspect} for config parameter #{key}" }
+				default
+			end
+		else
+			yield.tap { |v| @logger.debug("config") { "Using plucked value #{v.inspect} for config parameter #{key}" } }
+		end
+	end
+end

data/lib/syslogstash/logstash_writer.rb CHANGED Viewed

@@ -1,22 +1,21 @@
-require_relative 'worker'
+require 'resolv'
+require 'ipaddr'
-# Write messages to one of a collection of logstash servers.
+# Write messages to a logstash server.
 #
 class Syslogstash::LogstashWriter
-	include Syslogstash::Worker
+	Target = Struct.new(:hostname, :port)
+	attr_reader :thread
 	# Create a new logstash writer.
 	#
-	# Give it a list of servers, and your writer will be ready to go.
-	# No messages will actually be *delivered*, though, until you call #run.
+	# Once the object is created, you're ready to give it messages by
+	# calling #send_entry.  No messages will actually be *delivered* to
+	# logstash, though, until you call #run.
 	#
-	def initialize(servers, backlog, metrics)
-		@servers, @backlog, @metrics = servers.map { |s| URI(s) }, backlog, metrics
-		unless @servers.all? { |url| url.scheme == 'tcp' }
-			raise ArgumentError,
-					"Unsupported URL scheme: #{@servers.select { |url| url.scheme != 'tcp' }.join(', ')}"
-		end
+	def initialize(cfg, stats)
+		@server_name, @logger, @backlog, @stats = cfg.logstash_server, cfg.logger, cfg.backlog_size, stats
 		@entries = []
 		@entries_mutex = Mutex.new
@@ -31,18 +30,19 @@ class Syslogstash::LogstashWriter
 			@entries << { content: e, arrival_timestamp: Time.now }
 			while @entries.length > @backlog
 				@entries.shift
-				@metrics.dropped
+				@stats.dropped
 			end
 		end
-		@worker.run if @worker
+		@thread.run if @thread
 	end
 	# Start sending messages to logstash servers.  This method will return
 	# almost immediately, and actual message sending will occur in a
-	# separate worker thread.
+	# separate thread.
 	#
 	def run
-		@worker = Thread.new { send_messages }
+		@thread = Thread.new { send_messages }
 	end
 	private
@@ -57,16 +57,14 @@ class Syslogstash::LogstashWriter
 					current_server do |s|
 						s.puts entry[:content]
+						@stats.sent(server_id(s), entry[:arrival_timestamp])
 					end
-					@metrics.sent(@servers.last, entry[:arrival_timestamp])
 					# If we got here, we sent successfully, so we don't want
 					# to put the entry back on the queue in the ensure block
 					entry = nil
 				rescue StandardError => ex
-					log { "Unhandled exception: #{ex.message} (#{ex.class})" }
-					$stderr.puts ex.backtrace.map { |l| "  #{l}" }.join("\n")
+					@logger.error("writer") { (["Unhandled exception while writing entry: #{ex.message} (#{ex.class})"] + ex.backtrace).join("\n  ") }
 				ensure
 					@entries_mutex.synchronize { @entries.unshift if entry }
 				end
@@ -91,30 +89,100 @@ class Syslogstash::LogstashWriter
 		until done
 			if @current_server
 				begin
-					debug { "Using current server" }
+					@logger.debug("writer") { "Using current server #{server_id(@current_server)}" }
 					yield @current_server
 					done = true
 				rescue SystemCallError => ex
 					# Something went wrong during the send; disconnect from this
 					# server and recycle
-					debug { "Error while writing to current server: #{ex.message} (#{ex.class})" }
+					@logger.debug("writer") { "Error while writing to current server: #{ex.message} (#{ex.class})" }
 					@current_server.close
 					@current_server = nil
 					sleep 0.1
 				end
 			else
+				candidates = resolve_server_name
 				begin
-					# Pick another server to connect to at random
-					next_server = @servers.sort { rand }.first
-					debug { "Trying to connect to #{next_server.to_s}" }
-					@current_server = TCPSocket.new(next_server.hostname, next_server.port)
+					next_server = candidates.shift
+					if next_server
+						@logger.debug("writer") { "Trying to connect to #{next_server.to_s}" }
+						@current_server = TCPSocket.new(next_server.hostname, next_server.port)
+					else
+						@logger.debug("writer") { "Could not connect to any server; pausing before trying again" }
+						@current_server = nil
+						sleep 5
+					end
 				rescue SystemCallError => ex
-					# Connection failed for any number of reasons; try again
-					debug { "Failed to connect to #{next_server.to_s}: #{ex.message} (#{ex.class})" }
+					# Connection failed for any number of reasons; try the next one in the list
+					@logger.warn("writer") { "Failed to connect to #{next_server.to_s}: #{ex.message} (#{ex.class})" }
 					sleep 0.1
 					retry
 				end
 			end
 		end
 	end
+	def server_id(s)
+		pa = s.peeraddr
+		if pa[0] == "AF_INET6"
+			"[#{pa[3]}]:#{pa[1]}"
+		else
+			"#{pa[3]}:#{pa[1]}"
+		end
+	end
+	def resolve_server_name
+		return [static_target] if static_target
+		# The IPv6 literal case should have been taken care of by
+		# static_target, so the only two cases we have to deal with
+		# here are specified-port (assume A/AAAA) or no port (assume SRV).
+		if @server_name =~ /:/
+			host, port = @server_name.split(":", 2)
+			addrs = Resolv::DNS.new.getaddresses(host)
+			if addrs.empty?
+				@logger.warn("writer") { "No addresses resolved for server_name #{host.inspect}" }
+			end
+			addrs.map { |a| Target.new(a.to_s, port.to_i) }
+		else
+			# SRV records ftw
+			[].tap do |list|
+				left = Resolv::DNS.new.getresources(@server_name, Resolv::DNS::Resource::IN::SRV)
+				if left.empty?
+					@logger.warn("writer") { "No SRV records found for server_name #{@server_name.inspect}" }
+				end
+				until left.empty?
+					prio = left.map { |rr| rr.priority }.uniq.min
+					candidates = left.select { |rr| rr.priority == prio }
+					left -= candidates
+					candidates.sort_by! { |rr| [rr.weight, rr.target.to_s] }
+					until candidates.empty?
+						selector = rand(candidates.inject(1) { |n, rr| n + rr.weight })
+						chosen = candidates.inject(0) do |n, rr|
+							break rr if n + rr.weight >= selector
+							n + rr.weight
+						end
+						candidates.delete(chosen)
+						list << Target.new(chosen.target.to_s, chosen.port)
+					end
+				end
+			end
+		end
+	end
+	def static_target
+		@static_target ||= begin
+			if @server_name =~ /\A(.*):(\d+)\z/
+				begin
+					Target.new(IPAddr.new($1).to_s, $2.to_i)
+				rescue ArgumentError
+					# Whatever is on the LHS isn't a recognisable address;
+					# assume hostname and continue
+					nil
+				end
+			end
+		end
+	end
 end

data/lib/syslogstash/prometheus_exporter.rb CHANGED Viewed

@@ -1,40 +1,33 @@
-require 'rack'
-require 'prometheus/middleware/exporter'
-require 'rack/handler/webrick'
+require 'frankenstein/server'
 require 'logger'
 class Syslogstash::PrometheusExporter
 	attr_reader :thread
-	def initialize
-		@msg_in  = prom.counter(:syslogstash_messages_received, "The number of syslog messages received from each log socket")
-		@msg_out = prom.counter(:syslogstash_messages_sent, "The number of logstash messages sent to each logstash server")
-		@lag     = prom.gauge(:syslogstash_lag_ms, "How far behind we are in relaying messages")
-		@queue   = prom.gauge(:syslogstash_queue_size, "How many messages are queued to be sent")
+	def initialize(cfg)
+		@stats_server = Frankenstein::Server.new(port: 9159, logger: cfg.logger, metrics_prefix: "syslogstash_server")
+		@msg_in  = prom.counter(:syslogstash_messages_received_total, "The number of syslog messages received from the log socket")
+		@msg_out = prom.counter(:syslogstash_messages_sent_total, "The number of logstash messages sent to each logstash server")
+		@lag     = prom.gauge(:syslogstash_last_relayed_message_timestamp, "When the last message that was successfully relayed to logstash was originally received")
+		@queue   = prom.gauge(:syslogstash_queue_size, "How many messages are currently in the queue to be sent")
+		@dropped = prom.counter(:syslogstash_messages_dropped, "How many messages have been dropped from the backlog queue")
 		@q_mutex = Mutex.new
-		@dropped = prom.counter(:syslogstash_messages_dropped, "How many syslog messages have been dropped from the backlog queue")
+		@lag.set({}, 0)
+		@queue.set({}, 0)
 	end
-	def received(socket, stamp)
+	def received(socket)
 		@msg_in.increment(socket_path: socket)
-		@q_mutex.synchronize { @queue.set({}, (@queue.get({}) || 0) + 1) }
-		if @most_recent_received.nil? || @most_recent_received < stamp
-			@most_recent_received = stamp
-			refresh_lag
-		end
+		@q_mutex.synchronize { @queue.set({}, @queue.get({}) + 1) }
 	end
 	def sent(server, stamp)
 		@msg_out.increment(logstash_server: server)
 		@q_mutex.synchronize { @queue.set({}, @queue.get({}) - 1) }
-		if @most_recent_sent.nil? || @most_recent_sent < stamp
-			@most_recent_sent = stamp
-			refresh_lag
-		end
+		@lag.set({}, stamp.to_f)
 	end
 	def dropped
@@ -43,28 +36,12 @@ class Syslogstash::PrometheusExporter
 	end
 	def run
-		@thread = Thread.new do
-			app = Rack::Builder.new
-			app.use Prometheus::Middleware::Exporter
-			app.run ->(env) { [404, {'Content-Type' => 'text/plain'}, ['Nope']] }
-			logger = Logger.new($stderr)
-			logger.level = Logger::INFO
-			logger.formatter = proc { |s, t, p, m| "[Syslogstash::PrometheusExporter::WEBrick] #{m}\n" }
-			Rack::Handler::WEBrick.run app, Host: '::', Port: 9159, Logger: logger, AccessLog: []
-		end
+		@stats_server.run
 	end
 	private
 	def prom
-		Prometheus::Client.registry
-	end
-	def refresh_lag
-		if @most_recent_received && @most_recent_sent
-			@lag.set({}, ((@most_recent_received.to_f - @most_recent_sent.to_f) * 1000).to_i)
-		end
+		@stats_server.registry
 	end
 end

data/lib/syslogstash/syslog_reader.rb CHANGED Viewed

@@ -1,30 +1,15 @@
-require_relative 'worker'
 # A single socket reader.
 #
 class Syslogstash::SyslogReader
-	include Syslogstash::Worker
-	attr_reader :file
-	def initialize(file, config, logstash, metrics)
-		@file, @logstash, @metrics = file, logstash, metrics
-		config ||= {}
+	attr_reader :thread
-		@add_fields = config['add_fields'] || {}
-		@relay_to   = config['relay_to']   || []
-		unless @add_fields.is_a? Hash
-			raise ArgumentError,
-			      "add_fields parameter to socket #{file} must be a hash"
-		end
+	def initialize(cfg, logstash, stats)
+		@file, @logstash, @stats = cfg.syslog_socket, logstash, stats
-		unless @relay_to.is_a? Array
-			raise ArgumentError,
-			      "relay_to parameter to socket #{file} must be an array"
-		end
-		log { "initialized syslog socket #{file} with config #{config.inspect}" }
+		@add_fields = cfg.add_fields
+		@relay_to   = cfg.relay_sockets
+		@cfg        = cfg
+		@logger     = cfg.logger
 	end
 	# Start reading from the socket file, parsing entries, and flinging
@@ -32,33 +17,32 @@ class Syslogstash::SyslogReader
 	# continuing in a separate thread.
 	#
 	def run
-		debug { "#run called" }
+		@logger.debug("reader") { "#run called" }
 		begin
 			socket = Socket.new(Socket::AF_UNIX, Socket::SOCK_DGRAM, 0)
 			socket.bind(Socket.pack_sockaddr_un(@file))
 			File.chmod(0666, @file)
 		rescue Errno::EEXIST, Errno::EADDRINUSE
-			log { "socket file #{@file} already exists; deleting" }
+			@logger.info("reader") { "socket file #{@file} already exists; deleting" }
 			File.unlink(@file) rescue nil
 			retry
-		rescue SystemCallError
-			log { "Error while trying to bind to #{@file}" }
-			raise
+		rescue StandardError => ex
+			raise ex.class, "Error while trying to bind to #{@file}: #{ex.message}", ex.backtrace
 		end
-		@worker = Thread.new do
+		@thread = Thread.new do
 			begin
 				loop do
 					msg = socket.recvmsg
-					debug { "Message received: #{msg.inspect}" }
-					@metrics.received(@file, Time.now)
-					process_message msg.first.chomp
+					@logger.debug("reader") { "Message received: #{msg.inspect}" }
+					@stats.received(@file)
 					relay_message msg.first
+					process_message msg.first.chomp
 				end
 			ensure
 				socket.close
-				log { "removing socket file #{@file}" }
+				@logger.debug("reader") { "removing socket file #{@file}" }
 				File.unlink(@file) rescue nil
 			end
 		end
@@ -103,7 +87,7 @@ class Syslogstash::SyslogReader
 			@logstash.send_entry(log_entry)
 		else
-			log { "Unparseable message: #{msg}" }
+			@logger.warn("reader") { "Unparseable message: #{msg.inspect}" }
 		end
 	end
@@ -118,13 +102,19 @@ class Syslogstash::SyslogReader
 			e.merge!(h.delete_if { |k,v| v.nil? })
 			e.merge!(@add_fields)
-			debug { "Log entry is: #{e.inspect}" }
+			@logger.debug("reader") { "Complete log entry is: #{e.inspect}" }
 		end
 	end
 	def relay_message(msg)
 		@currently_failed ||= {}
+		if @cfg.relay_to_stdout
+			# This one's easy
+			puts msg.sub(/\A<\d+>/, '')
+			$stdout.flush
+		end
 		@relay_to.each do |f|
 			s = Socket.new(Socket::AF_UNIX, Socket::SOCK_DGRAM, 0)
 			begin
@@ -133,25 +123,34 @@ class Syslogstash::SyslogReader
 				# Socket doesn't exist; we don't care enough about this to bother
 				# reporting it.  People will figure it out themselves soon enough.
 			rescue StandardError => ex
-				log { "Error while connecting to relay socket #{f}: #{ex.message} (#{ex.class})" }
+				unless @currently_failed[f]
+					@logger.warn("reader") { "Error while connecting to relay socket #{f}: #{ex.message} (#{ex.class})" }
+					@currently_failed[f] = true
+				end
 				next
 			end
 			begin
+				# We really, *really* don't want to block the world just because
+				# whoever's on the other end of the relay socket can't process
+				# messages quick enough.
 				s.sendmsg_nonblock(msg)
 				if @currently_failed[f]
-					log { "Backlog on socket #{f} has cleared; messages are being delivered again" }
+					@logger.info("reader") { "Error on socket #{f} has cleared; messages are being delivered again" }
 					@currently_failed[f] = false
 				end
 			rescue Errno::ENOTCONN
-				# Socket isn't being listened to.  Not our problem.
+				unless @currently_failed[f]
+					@logger.debug("reader") { "Nothing is listening on socket #{f}" }
+					@currently_failed[f] = true
+				end
 			rescue IO::EAGAINWaitWritable
 				unless @currently_failed[f]
-					log { "Socket #{f} is backlogged; messages to this socket from socket #{@file} are being discarded undelivered" }
+					@logger.warn("reader") { "Socket #{f} is currently backlogged; messages to this socket are now being discarded undelivered" }
 					@currently_failed[f] = true
 				end
 			rescue StandardError => ex
-				log { "Failed to relay message to socket #{f} from #{@file}: #{ex.message} (#{ex.class})" }
+				@logger.warn("reader") { (["Failed to relay message to socket #{f} from #{@file}: #{ex.message} (#{ex.class})"] + ex.backtrace).join("\n  ") }
 			end
 		end
 	end

data/syslogstash.gemspec CHANGED Viewed

@@ -23,7 +23,7 @@ Gem::Specification.new do |s|
 	s.required_ruby_version = ">= 2.1.0"
-	s.add_runtime_dependency 'prometheus-client', '>= 0.7'
+	s.add_runtime_dependency 'frankenstein'
 	s.add_runtime_dependency 'rack'
 	s.add_development_dependency 'bundler'

metadata CHANGED Viewed

@@ -1,29 +1,29 @@
 --- !ruby/object:Gem::Specification
 name: syslogstash
 version: !ruby/object:Gem::Version
-  version: 1.3.0
+  version: 2.1.0
 platform: ruby
 authors:
 - Matt Palmer
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2018-01-05 00:00:00.000000000 Z
+date: 2018-04-21 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
-  name: prometheus-client
+  name: frankenstein
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: '0.7'
+        version: '0'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: '0.7'
+        version: '0'
 - !ruby/object:Gem::Dependency
   name: rack
   requirement: !ruby/object:Gem::Requirement
@@ -193,14 +193,16 @@ extensions: []
 extra_rdoc_files: []
 files:
 - ".gitignore"
+- Dockerfile
 - LICENCE
+- Makefile
 - README.md
 - bin/syslogstash
 - lib/syslogstash.rb
+- lib/syslogstash/config.rb
 - lib/syslogstash/logstash_writer.rb
 - lib/syslogstash/prometheus_exporter.rb
 - lib/syslogstash/syslog_reader.rb
-- lib/syslogstash/worker.rb
 - syslogstash.gemspec
 homepage: https://github.com/discourse/syslogstash
 licenses: []

data/lib/syslogstash/worker.rb DELETED Viewed

@@ -1,34 +0,0 @@
-# Common code shared between both readers and writers.
-#
-module Syslogstash::Worker
-	# If you ever want to stop a reader, here's how.
-	def stop
-		if @worker
-			@worker.kill
-			@worker.join
-			@worker = nil
-		end
-	end
-	def thread
-		@worker
-	end
-	# If you want to wait for a reader to die, here's how.
-	#
-	def wait
-		@worker.join
-	end
-	private
-	def log
-		$stderr.puts "[#{self.class}] #{yield.to_s}"
-	end
-	def debug
-		if ENV['DEBUG_SYSLOGSTASH']
-			$stderr.puts "[#{self.class}] #{yield.to_s}"
-		end
-	end
-end