RubyGems - log2json - Versions diffs - 0.1.15 → 0.1.16 - Mend

log2json 0.1.15 → 0.1.16

Files changed (3) hide show

data/bin/redis2es CHANGED Viewed

@@ -8,6 +8,7 @@ require 'json'
 require 'redis'
 require 'persistent_http' # 1.0.5
                           # depends on gene_pool 1.3.0
+require 'fileutils'
 def show_usage_and_exit(status=1)
   puts "Usage: #{$0} <elasticsearch_host> [port]"
@@ -51,6 +52,14 @@ if FLUSH_TIMEOUT < 2 or FLUSH_TIMEOUT % 2 != 0
   exit 1
 end
+# path to the redo log file that stores the log records that this process
+# failed to send to ES.
+const(:REDO_LOG, File.join(Dir.getwd, "redo.log"))
+# Note: Redo log will initially be written to a temporary file in a work folder.
+#       At the termination of this process, it will then be moved to the redo log.
 LOG = Logger.new(STDOUT)
 HTTP_LOG = Logger.new(STDOUT)
 HTTP_LOG.level = Logger::WARN
@@ -61,8 +70,10 @@ HTTP_LOG.level = Logger::WARN
   # this script is the only consumer of the pool and it uses only one connection at a time.
   :pool_size    => 1,
-  # Note: if the ES server can handle the load, we might be able to run multiple instances
-  #       of this script to process the queue and send logs ES with multiple connections.
+  # Note: Currently, we are not using multiple worker threads with a connection pool.
+  #       Instead, we run multiple worker processes of this script, each with a pool size of 1.
+  #       In the future this may change, but for now this seems to be good enough though we
+  #       waste some memory.
   # only renew a connection that's been idle for 5 mintues.
   :idle_timeout => 60 * 5,
@@ -76,22 +87,73 @@ HTTP_LOG.level = Logger::WARN
 @redis = Redis.new(host: REDIS_HOST, port: REDIS_PORT)
 def flush_queue
-  if not @queue.empty?
-    req = Net::HTTP::Post.new('/_bulk')
-    req.body = @queue.join("\n")
-    response = nil
-    begin
-      response = @@http.request(req)
-    ensure
-      if response.nil? or response.code != '200'
-        LOG.error(response.body) if not response.nil?
-        LOG.warn("Failed sending bulk request(#{@queue.size} records) to ES! Logging the request body instead.")
-        LOG.info("Failed request body:\n"+req.body)
-        #FIXME: write the queue to another file. This would allow us to resend these records on startup.
+  return if @queue.empty?
+  req = Net::HTTP::Post.new('/_bulk')
+  req.body = @queue.join('')
+  response = nil
+  begin
+    response = @@http.request(req)
+  ensure
+    unless response != nil and response.code == '200'
+      LOG.error(response.body) if not response.nil?
+      LOG.warn("Failed sending bulk request(#{@queue.size} records) to ES!")
+      LOG.error("Stack trace:\n" + $!.backtrace[1..-1].join("\n")) if $!
+      begin
+        LOG.info("Dumping #{@queue.size} log records in the failed request to the temp redo log...")
+        dump_queue()
+      rescue
+        LOG.error("Error stashing failed request! Dumping to stderr:\n"+req.body)
       end
     end
-    @queue.clear()
   end
+  @queue.clear()
+end
+def redo_tempfile_path
+  tmpdir = ENV['WORK_DIR'] || ENV['TMPDIR'] || '/var/tmp'
+  File.join(tmpdir, File.basename(REDO_LOG)+'.part')
+end
+# dump the queue to a temporary file
+def dump_queue
+  open(redo_tempfile_path, "a:#{LOG_ENCODING}") do |file|
+    file.write("# #{Time.now} #{@queue.size} records ---\n")
+    file.write(@queue.join(''))
+  end
+end
+def handle_redo_log
+  return unless File.exist?(REDO_LOG)
+  if File.exist?(redo_tempfile_path)
+    raise "Temp redo log(#{redo_tempfile_path}) exists!"
+    # In this case, the user needs to decide which redo log files(
+    # the main one or the temporary one, or both) is good, and produces only the main
+    # redo log file to be retried.
+  end
+  i = 0
+  record = ''
+  File.open(REDO_LOG, "r:#{LOG_ENCODING}").each_line do |line|
+    next if line == "\n" or line =~ /^#/
+    if i % 2 == 0
+      if not record.empty?
+        @queue << record
+        record = ''
+      end
+      if @queue.size == FLUSH_SIZE
+        LOG.info("Flushing #{@queue.size} records from the redo log(#{REDO_LOG})...")
+        flush_queue() rescue @queue.clear()
+      end
+    else
+      record << line
+      i += 1
+    end
+  end
+  if not @queue.empty?
+    LOG.info("Flushing #{@queue.size} records from the redo log(#{REDO_LOG})...")
+    flush_queue() rescue @queue.clear()
+  end
+  FileUtils.rm(REDO_LOG)
 end
 # Determines the name of the index in ElasticSearch from the given log record's timestamp.
@@ -106,15 +168,21 @@ def es_index(tstamp)
 end
 def enqueue(logstr)
+  logstr.force_encoding(LOG_ENCODING)
+  type = tstamp = nil
+  type = $1 if logstr =~ /"@type":\s*"([^"]+)"/
+  tstamp = $1 if logstr =~ /"@timestamp":\s*"([^"]+)"/
+  if not (type and tstamp)
+    LOG.warn("Failed to extract @type or @timestamp field using regex, falling back to parsing json...")
+    log = JSON.load(logstr)
+    type, tstamp = log["@type"], log["@timestamp"]
+  end
   # add header for each entry according to http://www.elasticsearch.org/guide/reference/api/bulk/
-  log = JSON.load(logstr.force_encoding(LOG_ENCODING))
-  #FIXME: we can parse it ourselves. we only need to extract the @timestamp value.
   @queue << (
-    {"index" => {"_index" => es_index(log["@timestamp"]), "_type" => log["@type"]}}.to_json <<
-    "\n" <<
-    logstr <<
-    "\n"
+    {"index" => {"_index" => es_index(tstamp), "_type" => type}}.to_json << "\n" <<
+    logstr << "\n"
   )
 end
@@ -172,11 +240,19 @@ def main
   end # loop
 end
+handle_redo_log()
 begin
   main()
 ensure
-  LOG.warn("Terminating! Flushing the queue(size=#{@queue.size})...")
+  LOG.warn("Terminating! Flusing the queue(size=#{@queue.size})...")
   flush_queue()
+  if File.exist?(REDO_LOG) and File.exist?(redo_tempfile_path)
+    LOG.warn("Redo log file(#{REDO_LOG}) already exists!? " +
+             "Please remove it so new redo logs can be written!")
+  else
+    FileUtils.mv(redo_tempfile_path, REDO_LOG) rescue :pass
+  end
 end

data/log2json.gemspec CHANGED Viewed

@@ -1,6 +1,6 @@
 Gem::Specification.new do |s|
   s.name        = 'log2json'
-  s.version     = '0.1.15'
+  s.version     = '0.1.16'
   s.summary     = "Read, filter and ship logs. ie, poor man's roll-your-own, light-weight logstash replacement."
   s.description = IO.read(File.join(File.dirname(__FILE__), 'README'))
   s.authors     = ['Jack Kuan']
@@ -13,6 +13,6 @@ Gem::Specification.new do |s|
   s.add_runtime_dependency 'jls-grok', '~> 0.10.10'
   s.add_runtime_dependency 'redis', '~> 3.0.2'
-  s.add_runtime_dependency 'persistent_http', '~> 1.0.5'
+  s.add_runtime_dependency 'persistent_http', '~> 1.0.6'
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: log2json
 version: !ruby/object:Gem::Version
-  version: 0.1.15
+  version: 0.1.16
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-10-28 00:00:00.000000000 Z
+date: 2013-10-29 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: jls-grok
@@ -50,7 +50,7 @@ dependencies:
     requirements:
     - - ~>
       - !ruby/object:Gem::Version
-        version: 1.0.5
+        version: 1.0.6
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
@@ -58,7 +58,7 @@ dependencies:
     requirements:
     - - ~>
       - !ruby/object:Gem::Version
-        version: 1.0.5
+        version: 1.0.6
 description: ! "Log2json lets you read, filter and send logs as JSON objects via Unix
   pipes.\nIt is inspired by Logstash, and is meant to be compatible with it at the
   JSON\nevent/record level so that it can easily work with Kibana. \n\nReading logs