RubyGems - syslogstash - Versions diffs - 1.3.0 → 2.1.0 - Mend

syslogstash 1.3.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

checksums.yaml +4 -4
data/Dockerfile +13 -0
data/Makefile +14 -0
data/README.md +165 -50
data/bin/syslogstash +43 -38
data/lib/syslogstash.rb +20 -19
data/lib/syslogstash/config.rb +118 -0
data/lib/syslogstash/logstash_writer.rb +96 -28
data/lib/syslogstash/prometheus_exporter.rb +18 -41
data/lib/syslogstash/syslog_reader.rb +38 -39
data/syslogstash.gemspec +1 -1
metadata +8 -6
data/lib/syslogstash/worker.rb +0 -34

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 4fae50a372aca78fab4d20cb3a21ff4248988abf
-  data.tar.gz: 391ae0a267341f5571e9c90427e83b2862628c2f
+  metadata.gz: 8dd70d42345ffba77d56d3511d5220e2ab399ce9
+  data.tar.gz: 98f6175cd0cc98d9ca6a51593657b7e3c09f178c
 SHA512:
-  metadata.gz: 6b0ab3566b3ce68964cfcb34be6efbdc3799394c76e72d672c53bb9a66e723e888df989c8f39155662d8151f6dbfd3b1915a08d212c92d8ca33cc4e0ad20bacc
-  data.tar.gz: 8798ef4d1a150cbf105028e5b6d7dcd9d72ed8309f0e998da6f3d0e8989b0e47016b9a9ba02f018d031d627687f28ccdf8a9450f622efcdf1763cef07dead38c
+  metadata.gz: 9fe1db70bce8d062dbc84b3c24e5650ed489b9ff89f7502225b2673f848e3c4abde63d6369cd14b3235f1adb9bfb15ceb8b217eaf4392a52badbfdd906ecbf03
+  data.tar.gz: 1e4163fa6ccd9c6f6402be87d571f1d67e80584935e7a7772084f3c3dc968f321acbdd0f2ecf7065f9b5f295825d2dd9f997189f7866cd0b9847dc03152a3224

data/Dockerfile ADDED Viewed

@@ -0,0 +1,13 @@
+FROM ruby:2.3-alpine
+ARG GEM_VERSION="> 0"
+COPY pkg/syslogstash-$GEM_VERSION.gem /tmp/syslogstash.gem
+RUN apk update \
+	&& apk add build-base \
+	&& gem install /tmp/syslogstash.gem \
+	&& apk del build-base \
+	&& rm -f /var/cache/apk/* /tmp/syslogstash.gem
+ENTRYPOINT ["/usr/local/bundle/bin/syslogstash"]

data/Makefile ADDED Viewed

@@ -0,0 +1,14 @@
+IMAGE := discourse/syslogstash
+TAG := $(shell date -u +%Y%m%d.%H%M%S)
+.PHONY: default
+default: push
+	@printf "${IMAGE}:${TAG} ready\n"
+.PHONY: push
+push: build
+	docker push ${IMAGE}:${TAG}
+.PHONY: build
+build:
+	docker build --build-arg=http_proxy=${http_proxy} -t ${IMAGE}:${TAG} .

data/README.md CHANGED Viewed

@@ -1,4 +1,21 @@
-Feed everything from one or more syslog pipes to a logstash server.
+Syslogstash is intended to provide a syslog-compatible socket for one or
+more applications to send their syslog messages to.  The messages are then
+parsed and sent to a logstash server for posterity.  No more needing to run
+a syslog server that writes to a file, just to have a second program that
+reads those files again.  With syslogstash, everything is in one neat little
+package.
+If you're running a containerised environment, there's a reasonable chance
+you've got multiple things that want to log to syslog, but you want to keep
+them organised and separate.  That's easy: just run multiple syslogstash
+instances, one per "virtual syslog socket" you want to provide.  Multiple
+containers can share the same socket, they'll just share a logstash
+connection and have the same metadata / extra tags.
+For maximum flexibility, you can optionally feed the syslog messages to one
+or more other "downstream" sockets, and/or print all the log messages to
+stdout for ad-hoc "local" debugging.
 # Installation
@@ -17,74 +34,172 @@ If you're the sturdy type that likes to run from git:
 Or, if you've eschewed the convenience of Rubygems entirely, then you
 presumably know what to do already.
+## Docker
+Published image at https://hub.docker.com/r/discourse/syslogstash/
+To build a new Docker image, run `rake docker:build`.  A `rake docker:push`
+will push out a new release.
 # Usage
-Write a configuration file, then start `syslogstash` giving the name of the
-config file as an argument:
+Syslogstash is configured by means of environment variables.  At the very
+least, `syslogstash` needs to know where logstash is (`LOGSTASH_SERVER`),
+and the socket to listen on for syslog messages (`SYSLOG_SOCKET`).  You
+specify those on the command line, like so:
+    LOGSTASH_SERVER=logstash-json \
+      SYSLOG_SOCKET=/dev/log \
+      syslogstash
+The full set of environment variables, and their meaning, is described in
+the "Syslogstash Configuration" section, below.
+## Logstash server setup
-    syslogstash /etc/syslogstash.conf
+The logstash server(s) you send the collected messages to must be configured
+to listen on a TCP port with the `json_lines` codec.  This can be done quite
+easily as follows:
-## Config File Format
+      tcp {
+        port  => 5151
+        codec => "json_lines"
+      }
-The file which describes how `syslogstash` will operate is a fairly simple
-YAML file.  It consists of two sections, `sockets` and `servers`, which list
-the UNIX sockets to listen for syslog messages on, and the URLs of logstash
-servers to send the resulting log entries to.  Optionally, you can specify
-additional fields to insert into every message received from each syslog
-socket.
+Adjust the port number to taste.
-It looks like this:
-    sockets:
-      # These sockets have no additional fields
-      /tmp/sock1:
-      /tmp/sock2:
+## Signals
-      # This socket will have some fields added to its messages, and will
-      # send all messages to a couple of other sockets, too
-      /tmp/supersock:
-        add_fields:
-          foo: bar
-          baz: wombat
-        relay_to:
-          - /tmp/relaysock1
-          - /tmp/relaysock2
+There are a few signals that syslogstash recognises, to control various
+aspects of runtime operation.  They are:
-    # Every log entry received will be sent to *exactly* one of these
-    # servers.  This provides high availability for your log messages.
-    # NOTE: Only tcp:// URLs are supported.
-    servers:
-      - tcp://10.0.0.1:5151
-      - tcp://10.0.0.2:5151
+* **`SIGUSR1`** / **`SIGUSR2`** -- tell syslogstash to increase (`USR1`) or
+  decrease (`USR2`) the verbosity of its own internal logging.  This doesn't
+  change in *any* way the nature or volume of syslog messages that are
+  processed and sent to logstash, it is *only* for syslogstash's own internal
+  operational logging.
+* **`SIGURG`** -- toggle whether or not relaying to stdout is enabled or
+  disabled.
-### Socket configuration
-Each socket has a configuration associated with it.  Using this
-configuration, you can add logstash fields to each entry, and configure
-socket relaying.
+## Use with Docker
-The following keys are available under each socket's path:
+For convenience, `syslogstash` is available in a Docker container,
+`discourse/syslogstash:v2`.  It requires a bit of gymnastics to get the
+syslog socket from the `syslogstash` container to whatever container you
+want to capture syslog messages from.  Typically, you'll want to share a
+volume between the two containers, tell `syslogstash` to create its socket
+there, and then symlink `/dev/log` from the other container to there.
-* `add_fields` -- A hash of additional fields to add to every log entry that
-  is received on this socket, before it is passed on to logstash.
+For example, you might start the syslogstash container like this:
-* `relay_to` -- A list of sockets to send all received messages to.  This is
-  useful in a very limited range of circumstances, when (for instance) you
-  have another syslog socket consumer that wants to get in on the act, like
-  a legacy syslogd.
+    docker run -v /srv/docker/syslogstash:/syslogstash \
+      -e LOGSTASH_SERVER=logstash-json \
+      -e SYSLOG_SOCKET=/syslogstash/log.sock \
+      discourse/syslogstash:v2
+Then use the same volume in your other container:
-## Logstash server configuration
+    docker run -v /srv/docker/syslogstash:/syslogstash something/funny
-You'll need to setup a TCP input, with the `json_lines` codec, for
-`syslogstash` to send log entries to.  It can look as simple as this:
+In the other container's startup script, include the following command:
-      tcp {
-        port  => 5151
-        codec => "json_lines"
-      }
+    ln -sf /syslogstash/log.sock /dev/log
+... and everything will work nicely.
+If you feel like playing on nightmare mode, you can also mount the log
+socket directly into the other container, like this:
+    docker run -v /srv/docker/syslogstash/log.sock:/dev/log something/funny
+This allows you to deal with poorly-implemented containers which run
+software that logs to syslog but doesn't provide a way to override where
+`/dev/log` points.  *However*, due to the way bind mounts and Unix sockets
+interact, if the syslogstash container restarts *for any reason*, you also
+need to restart any containers that have the socket itself as a volume.  If
+you can coax your container management system into satisfying that
+condition, then you're golden.
+# Syslogstash Configuration
+All configuration of syslogstash is done by placing values in environment
+variables.  The environment variables that syslogstash recognises are listed
+below.
+* **`LOGSTASH_SERVER`** (required) -- the domain name or address of the
+  logstash server(s) you wish to send entries to.  This can be any of:
+  * An IPv4 address and port, separated by a colon.  For example,
+    `192.0.2.42:5151`.  The port *must* be specified.
+  * An IPv6 address (enclosed in square brackets) and port, separated by a
+    colon.  For example, `[2001:db8::42]:5151`.  The port *must* be
+    specified.
+  * A fully-qualified or relative domain name and port, separated by a
+    colon.  The name given will be resolved and all IPv4 and IPv6
+    addresses returned will be tried in random order until a successful
+    connection is made to one of them.  The port *must* be specified.
+  * A fully-qualified or relative domain name *without a port*.  In this
+    case, the name given will be resolved as a SRV record, and the names and
+    ports returned will be used.
+  In all cases, syslogstash respects DNS record TTLs and SRV record
+  weight/priority selection rules.  We're not monsters.
+* **`SYSLOG_SOCKET`** (required) -- the absolute path to the socket which
+  syslogstash should create and listen on for syslog format messages.
+* **`BACKLOG_SIZE`** (optional; default `"1000000"`) -- the maximum number of
+  messages to queue if the logstash servers are unavailable.  Under normal
+  operation, syslog messages are immediately relayed to the logstash server
+  as they are received.  However, if no logstash servers are available,
+  syslogstash will maintain a backlog of up to this many syslog messages,
+  and will send the entire backlog once a logstash server becomes available
+  again.
+    In the event that the queue size limit is reached, the oldest messages
+    will be dropped to make way for the new ones.
+* **`RELAY_TO_STDOUT`** (optional; default `"no"`) -- if set to a
+  true-ish string (any of `true`, `yes`, `on`, or `1`, compared
+  case-insensitively), then all the syslog messages which are received will
+  be printed to stdout (with the priority/facility prefix removed).  This
+  isn't a replacement for a fully-featured syslog server, merely a quick way
+  to dump messages if absolutely required.
+* **`STATS_SERVER`** (optional; default `"no"`) -- if set to a true-ish
+  string (any of `true`, `yes`, `on`, or `1`, compared case-insensitively),
+  then a Prometheus-compatible statistics exporter will be started,
+  listening on all interfaces on port 9159.
+* **`ADD_FIELD_<name>`** (optional) -- if you want to add extra fields to
+  the entries which are forwarded to logstash, you can specify them here,
+  for example:
+        ADD_FIELD_foo=bar ADD_FIELD_baz=wombat [...] syslogstash
+    This will cause all entries sent to logstash to contain `"foo": "bar"`
+    and `"baz": "wombat"`, in addition to the rest of the fields usually
+    created by syslogstash.  Note that nested fields, and value types other
+    than strings, are not supported.  Also, if you specify a field name also
+    used by syslogstash, the results are explicitly undefined.
+* **`RELAY_SOCKETS`** (optional; default `""`) -- on the off-chance you want
+  to feed the syslog messages that syslogstash receives to another
+  syslog-compatible consumer (say, an old-school syslogd) you can specify
+  additional filenames to use here.  Multiple socket filenames can be
+  specified by separating each file name with a colon.  Syslogstash will open
+  each of the specified sockets, if they exist, and write each received
+  message to the socket.  If the socket does not exist, or the open or write
+  operations fail, syslogstash **will not** retry.
 # Contributing
@@ -100,7 +215,7 @@ request](https://github.com/discourse/syslogstash/pulls].
 Unless otherwise stated, everything in this repo is covered by the following
 copyright notice:
-    Copyright (C) 2015 Civilized Discourse Construction Kit Inc.
+    Copyright (C) 2015, 2018 Civilized Discourse Construction Kit Inc.
     This program is free software: you can redistribute it and/or modify it
     under the terms of the GNU General Public License version 3, as

data/bin/syslogstash CHANGED Viewed

@@ -1,51 +1,56 @@
 #!/usr/bin/env ruby
 require 'syslogstash'
-require 'yaml'
+require 'logger'
-if ARGV.length != 1
-	$stderr.puts <<-EOF.gsub(/^\t\t/, '')
-		Invalid usage
+logger = Logger.new($stderr)
+logger.formatter = ->(s, t, p, m) { "#{s[0]} [#{p}] #{m}\n" }
+logger.level = Logger.const_get(ENV['SYSLOGSTASH_LOG_LEVEL'] || "INFO")
-		Usage:
-		  #{$0} <configfile>
-	EOF
-	exit 1
-end
-unless File.exist?(ARGV[0])
-	$stderr.puts "Config file #{ARGV[0]} does not exist"
-	exit 1
+begin
+  cfg = Syslogstash::Config.new(ENV, logger: logger)
+rescue Syslogstash::Config::ConfigurationError => ex
+  $stderr.puts "Error in configuration: #{ex.message}"
+  exit 1
 end
-unless File.readable?(ARGV[0])
-	$stderr.puts "Config file #{ARGV[0]} not readable"
-	exit 1
-end
+syslogstash = Syslogstash.new(cfg)
-cfg = YAML.load_file(ARGV[0])
+sig_r, sig_w = IO.pipe
-unless cfg.is_a? Hash
-	$stderr.puts "Config file #{ARGV[0]} does not contain a YAML hash"
-	exit 1
+Signal.trap("USR1") do
+	sig_w.print '1'
+end
+Signal.trap("USR2") do
+	sig_w.print '2'
+end
+Signal.trap("URG") do
+	sig_w.print 'U'
 end
-%w{sockets servers}.each do |section|
-	unless cfg.has_key?(section)
-		$stderr.puts "Config file #{ARGV[0]} does not have a '#{section}' section"
-		exit 1
-	end
-	unless cfg[section].respond_to?(:empty?)
-		$stderr.puts "Config file #{ARGV[0]} has a malformed '#{section}' section"
-		exit 1
-	end
-	if cfg[section].empty?
-		$stderr.puts "Config file #{ARGV[0]} has an empty '#{section}' section"
-		exit 1
-	end
+Thread.new do
+  loop do
+    begin
+      c = sig_r.getc
+      if c == '1'
+        logger.level -= 1 unless logger.level == Logger::DEBUG
+        logger.info("SignalHandler") { "Received SIGUSR1; log level is now #{Logger::SEV_LABEL[logger.level]}." }
+      elsif c == '2'
+        logger.level += 1 unless logger.level == Logger::ERROR
+        logger.info("SignalHandler") { "Received SIGUSR2; log level is now #{Logger::SEV_LABEL[logger.level]}." }
+      elsif c == 'U'
+        cfg.relay_to_stdout = !cfg.relay_to_stdout
+        logger.info("SignalHandler") { "Received SIGURG; Relaying to stdout is now #{cfg.relay_to_stdout ? "enabled" : "disabled"}" }
+      else
+        logger.error("SignalHandler") { "Got an unrecognised character from signal pipe: #{c.inspect}" }
+      end
+    rescue StandardError => ex
+      logger.error("SignalHandler") { (["Exception raised: #{ex.message} (#{ex.class})"] + ex.backtrace).join("\n  ") }
+    rescue Exception => ex
+      $stderr.puts (["Fatal exception in syslogstash signal handler: #{ex.message} (#{ex.class})"] + ex.backtrace).join("\n  ")
+      exit 42
+    end
+  end
 end
-Syslogstash.new(cfg['sockets'], cfg['servers'], cfg.fetch('backlog', 1_000_000)).run
+syslogstash.run

data/lib/syslogstash.rb CHANGED Viewed

@@ -7,44 +7,45 @@ require 'thwait'
 # server.
 #
 class Syslogstash
-	def initialize(sockets, servers, backlog)
-		@metrics = PrometheusExporter.new
-		@writer = LogstashWriter.new(servers, backlog, @metrics)
-		@readers = sockets.map { |f, cfg| SyslogReader.new(f, cfg, @writer, @metrics) }
+	def initialize(cfg)
+		@cfg    = cfg
+		@stats  = PrometheusExporter.new(cfg)
+		@writer = LogstashWriter.new(cfg, @stats)
+		@reader = SyslogReader.new(cfg, @writer, @stats)
+		@logger = cfg.logger
 	end
 	def run
-		@metrics.run
-		@writer.run
-		@readers.each { |w| w.run }
+		if @cfg.stats_server
+			@logger.debug("main") { "Running stats server" }
+			@stats.run
+		end
-		tw = ThreadsWait.new(@metrics.thread, @writer.thread, *(@readers.map { |r| r.thread }))
+		@writer.run
+		@reader.run
-		dead_thread = tw.next_wait
+		dead_thread = ThreadsWait.new(@reader.thread, @writer.thread).next_wait
 		if dead_thread == @writer.thread
-			$stderr.puts "[Syslogstash] Writer thread crashed."
-		elsif dead_thread == @metrics.thread
-			$stderr.puts "[Syslogstash] Metrics exporter thread crashed."
+			@logger.error("main") { "Writer thread crashed." }
+		elsif dead_thread == @reader.thread
+			@logger.error("main") { "Reader thread crashed." }
 		else
-			reader = @readers.find { |r| r.thread == dead_thread }
-			$stderr.puts "[Syslogstash] Reader thread for #{reader.file} crashed."
+			@logger.fatal("main") { "ThreadsWait#next_wait returned unexpected value #{dead_thread.inspect}" }
+			exit 1
 		end
 		begin
 			dead_thread.join
 		rescue Exception => ex
-			$stderr.puts "[Syslogstash] Exception in thread was: #{ex.message} (#{ex.class})"
-			$stderr.puts ex.backtrace.map { |l| "  #{l}" }.join("\n")
+			@logger.error("main") { (["Exception in crashed thread was: #{ex.message} (#{ex.class})"] + ex.backtrace).join("\n  ") }
 		end
 		exit 1
 	end
 end
+require_relative 'syslogstash/config'
 require_relative 'syslogstash/syslog_reader'
 require_relative 'syslogstash/logstash_writer'
 require_relative 'syslogstash/prometheus_exporter'

data/lib/syslogstash/config.rb ADDED Viewed

@@ -0,0 +1,118 @@
+require 'logger'
+class Syslogstash::Config
+	class ConfigurationError < StandardError; end
+	# Raised if any problems were found with the config
+	class InvalidEnvironmentError < StandardError; end
+	attr_reader :logstash_server,
+	            :syslog_socket,
+	            :backlog_size,
+	            :stats_server,
+	            :add_fields,
+	            :relay_sockets
+	attr_reader :logger
+	attr_accessor :relay_to_stdout
+	# Create a new syslogstash config based on environment variables.
+	#
+	# Examines the environment passed in, and then creates a new config
+	# object if all is well.
+	#
+	# @param env [Hash] the set of environment variables to use.
+	#
+	# @param logger [Logger] the logger to which all diagnostic and error
+	#   data will be sent.
+	#
+	# @raise [ConfigurationError] if any problems are detected with the
+	#   environment variables found.
+	#
+	def initialize(env, logger:)
+		@logger = logger
+		parse_env(env)
+	end
+	private
+	def parse_env(env)
+		@logger.info("config") { "Parsing environment:\n" + env.map { |k, v| "#{k}=#{v.inspect}" }.join("\n") }
+		@logstash_server = pluck_string(env, "LOGSTASH_SERVER")
+		@syslog_socket   = pluck_string(env, "SYSLOG_SOCKET")
+		@relay_to_stdout = pluck_boolean(env, "RELAY_TO_STDOUT", default: false)
+		@stats_server    = pluck_boolean(env, "STATS_SERVER", default: false)
+		@backlog_size    = pluck_integer(env, "BACKLOG_SIZE", valid_range: 0..(2**31 - 1), default: 1_000_000)
+		@add_fields      = pluck_prefix_list(env, "ADD_FIELD_")
+		@relay_sockets   = pluck_path_list(env, "RELAY_SOCKETS", default: [])
+	end
+	def pluck_string(env, key, default: nil)
+		maybe_default(env, key, default) { env[key] }
+	end
+	def pluck_boolean(env, key, default: nil)
+		maybe_default(env, key, default) do
+			case env[key]
+			when /\A(no|off|0|false)\z/
+				false
+			when /\A(yes|on|1|true)\z/
+				true
+			else
+				raise ConfigurationError,
+				      "Value for #{key} (#{env[key].inspect}) is not a valid boolean"
+			end
+		end
+	end
+	def pluck_integer(env, key, valid_range: nil, default: nil)
+		maybe_default(env, key, default) do
+			if env[key] !~ /\A\d+\z/
+				raise InvalidEnvironmentError,
+				      "Value for #{key} (#{env[key].inspect}) is not an integer"
+			end
+			env[key].to_i.tap do |v|
+				unless valid_range.nil? || !valid_range.include?(v)
+					raise InvalidEnvironmentError,
+					      "Value for #{key} (#{env[key]}) out of range (must be between #{valid_range.first} and #{valid_range.last} inclusive)"
+				end
+			end
+		end
+	end
+	def pluck_prefix_list(env, prefix)
+		{}.tap do |list|
+			env.each do |k, v|
+				next unless k.start_with? prefix
+				key = k.sub(prefix, '')
+				list[key] = v
+			end
+			@logger.debug("config") { "Prefix list for #{prefix.inspect} is #{list.inspect}" }
+		end
+	end
+	def pluck_path_list(env, key, default: nil)
+		maybe_default(env, key, default) do
+			env[key].split(":")
+		end
+	end
+	def maybe_default(env, key, default)
+		if env[key].nil? || env[key].empty?
+			if default.nil?
+				raise ConfigurationError,
+				      "Required environment variable #{key} not specified"
+			else
+				@logger.debug("config") { "Using default value #{default.inspect} for config parameter #{key}" }
+				default
+			end
+		else
+			yield.tap { |v| @logger.debug("config") { "Using plucked value #{v.inspect} for config parameter #{key}" } }
+		end
+	end
+end

data/lib/syslogstash/logstash_writer.rb CHANGED Viewed

@@ -1,22 +1,21 @@
-require_relative 'worker'
+require 'resolv'
+require 'ipaddr'
-# Write messages to one of a collection of logstash servers.
+# Write messages to a logstash server.
 #
 class Syslogstash::LogstashWriter
-	include Syslogstash::Worker
+	Target = Struct.new(:hostname, :port)
+	attr_reader :thread
 	# Create a new logstash writer.
 	#
-	# Give it a list of servers, and your writer will be ready to go.
-	# No messages will actually be *delivered*, though, until you call #run.
+	# Once the object is created, you're ready to give it messages by
+	# calling #send_entry.  No messages will actually be *delivered* to
+	# logstash, though, until you call #run.
 	#
-	def initialize(servers, backlog, metrics)
-		@servers, @backlog, @metrics = servers.map { |s| URI(s) }, backlog, metrics
-		unless @servers.all? { |url| url.scheme == 'tcp' }
-			raise ArgumentError,
-					"Unsupported URL scheme: #{@servers.select { |url| url.scheme != 'tcp' }.join(', ')}"
-		end
+	def initialize(cfg, stats)
+		@server_name, @logger, @backlog, @stats = cfg.logstash_server, cfg.logger, cfg.backlog_size, stats
 		@entries = []
 		@entries_mutex = Mutex.new
@@ -31,18 +30,19 @@ class Syslogstash::LogstashWriter
 			@entries << { content: e, arrival_timestamp: Time.now }
 			while @entries.length > @backlog
 				@entries.shift
-				@metrics.dropped
+				@stats.dropped
 			end
 		end
-		@worker.run if @worker
+		@thread.run if @thread
 	end
 	# Start sending messages to logstash servers.  This method will return
 	# almost immediately, and actual message sending will occur in a
-	# separate worker thread.
+	# separate thread.
 	#
 	def run
-		@worker = Thread.new { send_messages }
+		@thread = Thread.new { send_messages }
 	end
 	private
@@ -57,16 +57,14 @@ class Syslogstash::LogstashWriter
 					current_server do |s|
 						s.puts entry[:content]
+						@stats.sent(server_id(s), entry[:arrival_timestamp])
 					end
-					@metrics.sent(@servers.last, entry[:arrival_timestamp])
 					# If we got here, we sent successfully, so we don't want
 					# to put the entry back on the queue in the ensure block
 					entry = nil
 				rescue StandardError => ex
-					log { "Unhandled exception: #{ex.message} (#{ex.class})" }
-					$stderr.puts ex.backtrace.map { |l| "  #{l}" }.join("\n")
+					@logger.error("writer") { (["Unhandled exception while writing entry: #{ex.message} (#{ex.class})"] + ex.backtrace).join("\n  ") }
 				ensure
 					@entries_mutex.synchronize { @entries.unshift if entry }
 				end
@@ -91,30 +89,100 @@ class Syslogstash::LogstashWriter
 		until done
 			if @current_server
 				begin
-					debug { "Using current server" }
+					@logger.debug("writer") { "Using current server #{server_id(@current_server)}" }
 					yield @current_server
 					done = true
 				rescue SystemCallError => ex
 					# Something went wrong during the send; disconnect from this
 					# server and recycle
-					debug { "Error while writing to current server: #{ex.message} (#{ex.class})" }
+					@logger.debug("writer") { "Error while writing to current server: #{ex.message} (#{ex.class})" }
 					@current_server.close
 					@current_server = nil
 					sleep 0.1
 				end
 			else
+				candidates = resolve_server_name
 				begin
-					# Pick another server to connect to at random
-					next_server = @servers.sort { rand }.first
-					debug { "Trying to connect to #{next_server.to_s}" }
-					@current_server = TCPSocket.new(next_server.hostname, next_server.port)
+					next_server = candidates.shift
+					if next_server
+						@logger.debug("writer") { "Trying to connect to #{next_server.to_s}" }
+						@current_server = TCPSocket.new(next_server.hostname, next_server.port)
+					else
+						@logger.debug("writer") { "Could not connect to any server; pausing before trying again" }
+						@current_server = nil
+						sleep 5
+					end
 				rescue SystemCallError => ex
-					# Connection failed for any number of reasons; try again
-					debug { "Failed to connect to #{next_server.to_s}: #{ex.message} (#{ex.class})" }
+					# Connection failed for any number of reasons; try the next one in the list
+					@logger.warn("writer") { "Failed to connect to #{next_server.to_s}: #{ex.message} (#{ex.class})" }
 					sleep 0.1
 					retry
 				end
 			end
 		end
 	end
+	def server_id(s)
+		pa = s.peeraddr
+		if pa[0] == "AF_INET6"
+			"[#{pa[3]}]:#{pa[1]}"
+		else
+			"#{pa[3]}:#{pa[1]}"
+		end
+	end
+	def resolve_server_name
+		return [static_target] if static_target
+		# The IPv6 literal case should have been taken care of by
+		# static_target, so the only two cases we have to deal with
+		# here are specified-port (assume A/AAAA) or no port (assume SRV).
+		if @server_name =~ /:/
+			host, port = @server_name.split(":", 2)
+			addrs = Resolv::DNS.new.getaddresses(host)
+			if addrs.empty?
+				@logger.warn("writer") { "No addresses resolved for server_name #{host.inspect}" }
+			end
+			addrs.map { |a| Target.new(a.to_s, port.to_i) }
+		else
+			# SRV records ftw
+			[].tap do |list|
+				left = Resolv::DNS.new.getresources(@server_name, Resolv::DNS::Resource::IN::SRV)
+				if left.empty?
+					@logger.warn("writer") { "No SRV records found for server_name #{@server_name.inspect}" }
+				end
+				until left.empty?
+					prio = left.map { |rr| rr.priority }.uniq.min
+					candidates = left.select { |rr| rr.priority == prio }
+					left -= candidates
+					candidates.sort_by! { |rr| [rr.weight, rr.target.to_s] }
+					until candidates.empty?
+						selector = rand(candidates.inject(1) { |n, rr| n + rr.weight })
+						chosen = candidates.inject(0) do |n, rr|
+							break rr if n + rr.weight >= selector
+							n + rr.weight
+						end
+						candidates.delete(chosen)
+						list << Target.new(chosen.target.to_s, chosen.port)
+					end
+				end
+			end
+		end
+	end
+	def static_target
+		@static_target ||= begin
+			if @server_name =~ /\A(.*):(\d+)\z/
+				begin
+					Target.new(IPAddr.new($1).to_s, $2.to_i)
+				rescue ArgumentError
+					# Whatever is on the LHS isn't a recognisable address;
+					# assume hostname and continue
+					nil
+				end
+			end
+		end
+	end
 end

data/lib/syslogstash/prometheus_exporter.rb CHANGED Viewed

@@ -1,40 +1,33 @@
-require 'rack'
-require 'prometheus/middleware/exporter'
-require 'rack/handler/webrick'
+require 'frankenstein/server'
 require 'logger'
 class Syslogstash::PrometheusExporter
 	attr_reader :thread
-	def initialize
-		@msg_in  = prom.counter(:syslogstash_messages_received, "The number of syslog messages received from each log socket")
-		@msg_out = prom.counter(:syslogstash_messages_sent, "The number of logstash messages sent to each logstash server")
-		@lag     = prom.gauge(:syslogstash_lag_ms, "How far behind we are in relaying messages")
-		@queue   = prom.gauge(:syslogstash_queue_size, "How many messages are queued to be sent")
+	def initialize(cfg)
+		@stats_server = Frankenstein::Server.new(port: 9159, logger: cfg.logger, metrics_prefix: "syslogstash_server")
+		@msg_in  = prom.counter(:syslogstash_messages_received_total, "The number of syslog messages received from the log socket")
+		@msg_out = prom.counter(:syslogstash_messages_sent_total, "The number of logstash messages sent to each logstash server")
+		@lag     = prom.gauge(:syslogstash_last_relayed_message_timestamp, "When the last message that was successfully relayed to logstash was originally received")
+		@queue   = prom.gauge(:syslogstash_queue_size, "How many messages are currently in the queue to be sent")
+		@dropped = prom.counter(:syslogstash_messages_dropped, "How many messages have been dropped from the backlog queue")
 		@q_mutex = Mutex.new
-		@dropped = prom.counter(:syslogstash_messages_dropped, "How many syslog messages have been dropped from the backlog queue")
+		@lag.set({}, 0)
+		@queue.set({}, 0)
 	end
-	def received(socket, stamp)
+	def received(socket)
 		@msg_in.increment(socket_path: socket)
-		@q_mutex.synchronize { @queue.set({}, (@queue.get({}) || 0) + 1) }
-		if @most_recent_received.nil? || @most_recent_received < stamp
-			@most_recent_received = stamp
-			refresh_lag
-		end
+		@q_mutex.synchronize { @queue.set({}, @queue.get({}) + 1) }
 	end
 	def sent(server, stamp)
 		@msg_out.increment(logstash_server: server)
 		@q_mutex.synchronize { @queue.set({}, @queue.get({}) - 1) }
-		if @most_recent_sent.nil? || @most_recent_sent < stamp
-			@most_recent_sent = stamp
-			refresh_lag
-		end
+		@lag.set({}, stamp.to_f)
 	end
 	def dropped
@@ -43,28 +36,12 @@ class Syslogstash::PrometheusExporter
 	end
 	def run
-		@thread = Thread.new do
-			app = Rack::Builder.new
-			app.use Prometheus::Middleware::Exporter
-			app.run ->(env) { [404, {'Content-Type' => 'text/plain'}, ['Nope']] }
-			logger = Logger.new($stderr)
-			logger.level = Logger::INFO
-			logger.formatter = proc { |s, t, p, m| "[Syslogstash::PrometheusExporter::WEBrick] #{m}\n" }
-			Rack::Handler::WEBrick.run app, Host: '::', Port: 9159, Logger: logger, AccessLog: []
-		end
+		@stats_server.run
 	end
 	private
 	def prom
-		Prometheus::Client.registry
-	end
-	def refresh_lag
-		if @most_recent_received && @most_recent_sent
-			@lag.set({}, ((@most_recent_received.to_f - @most_recent_sent.to_f) * 1000).to_i)
-		end
+		@stats_server.registry
 	end
 end

data/lib/syslogstash/syslog_reader.rb CHANGED Viewed

@@ -1,30 +1,15 @@
-require_relative 'worker'
 # A single socket reader.
 #
 class Syslogstash::SyslogReader
-	include Syslogstash::Worker
-	attr_reader :file
-	def initialize(file, config, logstash, metrics)
-		@file, @logstash, @metrics = file, logstash, metrics
-		config ||= {}
+	attr_reader :thread
-		@add_fields = config['add_fields'] || {}
-		@relay_to   = config['relay_to']   || []
-		unless @add_fields.is_a? Hash
-			raise ArgumentError,
-			      "add_fields parameter to socket #{file} must be a hash"
-		end
+	def initialize(cfg, logstash, stats)
+		@file, @logstash, @stats = cfg.syslog_socket, logstash, stats
-		unless @relay_to.is_a? Array
-			raise ArgumentError,
-			      "relay_to parameter to socket #{file} must be an array"
-		end
-		log { "initialized syslog socket #{file} with config #{config.inspect}" }
+		@add_fields = cfg.add_fields
+		@relay_to   = cfg.relay_sockets
+		@cfg        = cfg
+		@logger     = cfg.logger
 	end
 	# Start reading from the socket file, parsing entries, and flinging
@@ -32,33 +17,32 @@ class Syslogstash::SyslogReader
 	# continuing in a separate thread.
 	#
 	def run
-		debug { "#run called" }
+		@logger.debug("reader") { "#run called" }
 		begin
 			socket = Socket.new(Socket::AF_UNIX, Socket::SOCK_DGRAM, 0)
 			socket.bind(Socket.pack_sockaddr_un(@file))
 			File.chmod(0666, @file)
 		rescue Errno::EEXIST, Errno::EADDRINUSE
-			log { "socket file #{@file} already exists; deleting" }
+			@logger.info("reader") { "socket file #{@file} already exists; deleting" }
 			File.unlink(@file) rescue nil
 			retry
-		rescue SystemCallError
-			log { "Error while trying to bind to #{@file}" }
-			raise
+		rescue StandardError => ex
+			raise ex.class, "Error while trying to bind to #{@file}: #{ex.message}", ex.backtrace
 		end
-		@worker = Thread.new do
+		@thread = Thread.new do
 			begin
 				loop do
 					msg = socket.recvmsg
-					debug { "Message received: #{msg.inspect}" }
-					@metrics.received(@file, Time.now)
-					process_message msg.first.chomp
+					@logger.debug("reader") { "Message received: #{msg.inspect}" }
+					@stats.received(@file)
 					relay_message msg.first
+					process_message msg.first.chomp
 				end
 			ensure
 				socket.close
-				log { "removing socket file #{@file}" }
+				@logger.debug("reader") { "removing socket file #{@file}" }
 				File.unlink(@file) rescue nil
 			end
 		end
@@ -103,7 +87,7 @@ class Syslogstash::SyslogReader
 			@logstash.send_entry(log_entry)
 		else
-			log { "Unparseable message: #{msg}" }
+			@logger.warn("reader") { "Unparseable message: #{msg.inspect}" }
 		end
 	end
@@ -118,13 +102,19 @@ class Syslogstash::SyslogReader
 			e.merge!(h.delete_if { |k,v| v.nil? })
 			e.merge!(@add_fields)
-			debug { "Log entry is: #{e.inspect}" }
+			@logger.debug("reader") { "Complete log entry is: #{e.inspect}" }
 		end
 	end
 	def relay_message(msg)
 		@currently_failed ||= {}
+		if @cfg.relay_to_stdout
+			# This one's easy
+			puts msg.sub(/\A<\d+>/, '')
+			$stdout.flush
+		end
 		@relay_to.each do |f|
 			s = Socket.new(Socket::AF_UNIX, Socket::SOCK_DGRAM, 0)
 			begin
@@ -133,25 +123,34 @@ class Syslogstash::SyslogReader
 				# Socket doesn't exist; we don't care enough about this to bother
 				# reporting it.  People will figure it out themselves soon enough.
 			rescue StandardError => ex
-				log { "Error while connecting to relay socket #{f}: #{ex.message} (#{ex.class})" }
+				unless @currently_failed[f]
+					@logger.warn("reader") { "Error while connecting to relay socket #{f}: #{ex.message} (#{ex.class})" }
+					@currently_failed[f] = true
+				end
 				next
 			end
 			begin
+				# We really, *really* don't want to block the world just because
+				# whoever's on the other end of the relay socket can't process
+				# messages quick enough.
 				s.sendmsg_nonblock(msg)
 				if @currently_failed[f]
-					log { "Backlog on socket #{f} has cleared; messages are being delivered again" }
+					@logger.info("reader") { "Error on socket #{f} has cleared; messages are being delivered again" }
 					@currently_failed[f] = false
 				end
 			rescue Errno::ENOTCONN
-				# Socket isn't being listened to.  Not our problem.
+				unless @currently_failed[f]
+					@logger.debug("reader") { "Nothing is listening on socket #{f}" }
+					@currently_failed[f] = true
+				end
 			rescue IO::EAGAINWaitWritable
 				unless @currently_failed[f]
-					log { "Socket #{f} is backlogged; messages to this socket from socket #{@file} are being discarded undelivered" }
+					@logger.warn("reader") { "Socket #{f} is currently backlogged; messages to this socket are now being discarded undelivered" }
 					@currently_failed[f] = true
 				end
 			rescue StandardError => ex
-				log { "Failed to relay message to socket #{f} from #{@file}: #{ex.message} (#{ex.class})" }
+				@logger.warn("reader") { (["Failed to relay message to socket #{f} from #{@file}: #{ex.message} (#{ex.class})"] + ex.backtrace).join("\n  ") }
 			end
 		end
 	end

data/syslogstash.gemspec CHANGED Viewed

@@ -23,7 +23,7 @@ Gem::Specification.new do |s|
 	s.required_ruby_version = ">= 2.1.0"
-	s.add_runtime_dependency 'prometheus-client', '>= 0.7'
+	s.add_runtime_dependency 'frankenstein'
 	s.add_runtime_dependency 'rack'
 	s.add_development_dependency 'bundler'

metadata CHANGED Viewed

@@ -1,29 +1,29 @@
 --- !ruby/object:Gem::Specification
 name: syslogstash
 version: !ruby/object:Gem::Version
-  version: 1.3.0
+  version: 2.1.0
 platform: ruby
 authors:
 - Matt Palmer
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2018-01-05 00:00:00.000000000 Z
+date: 2018-04-21 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
-  name: prometheus-client
+  name: frankenstein
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: '0.7'
+        version: '0'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: '0.7'
+        version: '0'
 - !ruby/object:Gem::Dependency
   name: rack
   requirement: !ruby/object:Gem::Requirement
@@ -193,14 +193,16 @@ extensions: []
 extra_rdoc_files: []
 files:
 - ".gitignore"
+- Dockerfile
 - LICENCE
+- Makefile
 - README.md
 - bin/syslogstash
 - lib/syslogstash.rb
+- lib/syslogstash/config.rb
 - lib/syslogstash/logstash_writer.rb
 - lib/syslogstash/prometheus_exporter.rb
 - lib/syslogstash/syslog_reader.rb
-- lib/syslogstash/worker.rb
 - syslogstash.gemspec
 homepage: https://github.com/discourse/syslogstash
 licenses: []

data/lib/syslogstash/worker.rb DELETED Viewed

@@ -1,34 +0,0 @@
-# Common code shared between both readers and writers.
-#
-module Syslogstash::Worker
-	# If you ever want to stop a reader, here's how.
-	def stop
-		if @worker
-			@worker.kill
-			@worker.join
-			@worker = nil
-		end
-	end
-	def thread
-		@worker
-	end
-	# If you want to wait for a reader to die, here's how.
-	#
-	def wait
-		@worker.join
-	end
-	private
-	def log
-		$stderr.puts "[#{self.class}] #{yield.to_s}"
-	end
-	def debug
-		if ENV['DEBUG_SYSLOGSTASH']
-			$stderr.puts "[#{self.class}] #{yield.to_s}"
-		end
-	end
-end