log2json 0.1.15 → 0.1.16

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/bin/redis2es +99 -23
  2. data/log2json.gemspec +2 -2
  3. metadata +4 -4
data/bin/redis2es CHANGED
@@ -8,6 +8,7 @@ require 'json'
8
8
  require 'redis'
9
9
  require 'persistent_http' # 1.0.5
10
10
  # depends on gene_pool 1.3.0
11
+ require 'fileutils'
11
12
 
12
13
  def show_usage_and_exit(status=1)
13
14
  puts "Usage: #{$0} <elasticsearch_host> [port]"
@@ -51,6 +52,14 @@ if FLUSH_TIMEOUT < 2 or FLUSH_TIMEOUT % 2 != 0
51
52
  exit 1
52
53
  end
53
54
 
55
+ # path to the redo log file that stores the log records that this process
56
+ # failed to send to ES.
57
+ const(:REDO_LOG, File.join(Dir.getwd, "redo.log"))
58
+
59
+ # Note: Redo log will initially be written to a temporary file in a work folder.
60
+ # At the termination of this process, it will then be moved to the redo log.
61
+
62
+
54
63
  LOG = Logger.new(STDOUT)
55
64
  HTTP_LOG = Logger.new(STDOUT)
56
65
  HTTP_LOG.level = Logger::WARN
@@ -61,8 +70,10 @@ HTTP_LOG.level = Logger::WARN
61
70
 
62
71
  # this script is the only consumer of the pool and it uses only one connection at a time.
63
72
  :pool_size => 1,
64
- # Note: if the ES server can handle the load, we might be able to run multiple instances
65
- # of this script to process the queue and send logs ES with multiple connections.
73
+ # Note: Currently, we are not using multiple worker threads with a connection pool.
74
+ # Instead, we run multiple worker processes of this script, each with a pool size of 1.
75
+ # In the future this may change, but for now this seems to be good enough though we
76
+ # waste some memory.
66
77
 
67
78
  # only renew a connection that's been idle for 5 mintues.
68
79
  :idle_timeout => 60 * 5,
@@ -76,22 +87,73 @@ HTTP_LOG.level = Logger::WARN
76
87
  @redis = Redis.new(host: REDIS_HOST, port: REDIS_PORT)
77
88
 
78
89
  def flush_queue
79
- if not @queue.empty?
80
- req = Net::HTTP::Post.new('/_bulk')
81
- req.body = @queue.join("\n")
82
- response = nil
83
- begin
84
- response = @@http.request(req)
85
- ensure
86
- if response.nil? or response.code != '200'
87
- LOG.error(response.body) if not response.nil?
88
- LOG.warn("Failed sending bulk request(#{@queue.size} records) to ES! Logging the request body instead.")
89
- LOG.info("Failed request body:\n"+req.body)
90
- #FIXME: write the queue to another file. This would allow us to resend these records on startup.
90
+ return if @queue.empty?
91
+
92
+ req = Net::HTTP::Post.new('/_bulk')
93
+ req.body = @queue.join('')
94
+ response = nil
95
+ begin
96
+ response = @@http.request(req)
97
+ ensure
98
+ unless response != nil and response.code == '200'
99
+ LOG.error(response.body) if not response.nil?
100
+ LOG.warn("Failed sending bulk request(#{@queue.size} records) to ES!")
101
+ LOG.error("Stack trace:\n" + $!.backtrace[1..-1].join("\n")) if $!
102
+ begin
103
+ LOG.info("Dumping #{@queue.size} log records in the failed request to the temp redo log...")
104
+ dump_queue()
105
+ rescue
106
+ LOG.error("Error stashing failed request! Dumping to stderr:\n"+req.body)
91
107
  end
92
108
  end
93
- @queue.clear()
94
109
  end
110
+ @queue.clear()
111
+ end
112
+
113
+ def redo_tempfile_path
114
+ tmpdir = ENV['WORK_DIR'] || ENV['TMPDIR'] || '/var/tmp'
115
+ File.join(tmpdir, File.basename(REDO_LOG)+'.part')
116
+ end
117
+
118
+ # dump the queue to a temporary file
119
+ def dump_queue
120
+ open(redo_tempfile_path, "a:#{LOG_ENCODING}") do |file|
121
+ file.write("# #{Time.now} #{@queue.size} records ---\n")
122
+ file.write(@queue.join(''))
123
+ end
124
+ end
125
+
126
+ def handle_redo_log
127
+ return unless File.exist?(REDO_LOG)
128
+ if File.exist?(redo_tempfile_path)
129
+ raise "Temp redo log(#{redo_tempfile_path}) exists!"
130
+ # In this case, the user needs to decide which redo log files(
131
+ # the main one or the temporary one, or both) is good, and produces only the main
132
+ # redo log file to be retried.
133
+ end
134
+ i = 0
135
+ record = ''
136
+ File.open(REDO_LOG, "r:#{LOG_ENCODING}").each_line do |line|
137
+ next if line == "\n" or line =~ /^#/
138
+ if i % 2 == 0
139
+ if not record.empty?
140
+ @queue << record
141
+ record = ''
142
+ end
143
+ if @queue.size == FLUSH_SIZE
144
+ LOG.info("Flushing #{@queue.size} records from the redo log(#{REDO_LOG})...")
145
+ flush_queue() rescue @queue.clear()
146
+ end
147
+ else
148
+ record << line
149
+ i += 1
150
+ end
151
+ end
152
+ if not @queue.empty?
153
+ LOG.info("Flushing #{@queue.size} records from the redo log(#{REDO_LOG})...")
154
+ flush_queue() rescue @queue.clear()
155
+ end
156
+ FileUtils.rm(REDO_LOG)
95
157
  end
96
158
 
97
159
  # Determines the name of the index in ElasticSearch from the given log record's timestamp.
@@ -106,15 +168,21 @@ def es_index(tstamp)
106
168
  end
107
169
 
108
170
  def enqueue(logstr)
171
+ logstr.force_encoding(LOG_ENCODING)
172
+
173
+ type = tstamp = nil
174
+ type = $1 if logstr =~ /"@type":\s*"([^"]+)"/
175
+ tstamp = $1 if logstr =~ /"@timestamp":\s*"([^"]+)"/
176
+ if not (type and tstamp)
177
+ LOG.warn("Failed to extract @type or @timestamp field using regex, falling back to parsing json...")
178
+ log = JSON.load(logstr)
179
+ type, tstamp = log["@type"], log["@timestamp"]
180
+ end
181
+
109
182
  # add header for each entry according to http://www.elasticsearch.org/guide/reference/api/bulk/
110
- log = JSON.load(logstr.force_encoding(LOG_ENCODING))
111
- #FIXME: we can parse it ourselves. we only need to extract the @timestamp value.
112
-
113
183
  @queue << (
114
- {"index" => {"_index" => es_index(log["@timestamp"]), "_type" => log["@type"]}}.to_json <<
115
- "\n" <<
116
- logstr <<
117
- "\n"
184
+ {"index" => {"_index" => es_index(tstamp), "_type" => type}}.to_json << "\n" <<
185
+ logstr << "\n"
118
186
  )
119
187
  end
120
188
 
@@ -172,11 +240,19 @@ def main
172
240
  end # loop
173
241
  end
174
242
 
243
+
244
+ handle_redo_log()
175
245
  begin
176
246
  main()
177
247
  ensure
178
- LOG.warn("Terminating! Flushing the queue(size=#{@queue.size})...")
248
+ LOG.warn("Terminating! Flusing the queue(size=#{@queue.size})...")
179
249
  flush_queue()
250
+ if File.exist?(REDO_LOG) and File.exist?(redo_tempfile_path)
251
+ LOG.warn("Redo log file(#{REDO_LOG}) already exists!? " +
252
+ "Please remove it so new redo logs can be written!")
253
+ else
254
+ FileUtils.mv(redo_tempfile_path, REDO_LOG) rescue :pass
255
+ end
180
256
  end
181
257
 
182
258
 
data/log2json.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'log2json'
3
- s.version = '0.1.15'
3
+ s.version = '0.1.16'
4
4
  s.summary = "Read, filter and ship logs. ie, poor man's roll-your-own, light-weight logstash replacement."
5
5
  s.description = IO.read(File.join(File.dirname(__FILE__), 'README'))
6
6
  s.authors = ['Jack Kuan']
@@ -13,6 +13,6 @@ Gem::Specification.new do |s|
13
13
 
14
14
  s.add_runtime_dependency 'jls-grok', '~> 0.10.10'
15
15
  s.add_runtime_dependency 'redis', '~> 3.0.2'
16
- s.add_runtime_dependency 'persistent_http', '~> 1.0.5'
16
+ s.add_runtime_dependency 'persistent_http', '~> 1.0.6'
17
17
 
18
18
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: log2json
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.15
4
+ version: 0.1.16
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-10-28 00:00:00.000000000 Z
12
+ date: 2013-10-29 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: jls-grok
@@ -50,7 +50,7 @@ dependencies:
50
50
  requirements:
51
51
  - - ~>
52
52
  - !ruby/object:Gem::Version
53
- version: 1.0.5
53
+ version: 1.0.6
54
54
  type: :runtime
55
55
  prerelease: false
56
56
  version_requirements: !ruby/object:Gem::Requirement
@@ -58,7 +58,7 @@ dependencies:
58
58
  requirements:
59
59
  - - ~>
60
60
  - !ruby/object:Gem::Version
61
- version: 1.0.5
61
+ version: 1.0.6
62
62
  description: ! "Log2json lets you read, filter and send logs as JSON objects via Unix
63
63
  pipes.\nIt is inspired by Logstash, and is meant to be compatible with it at the
64
64
  JSON\nevent/record level so that it can easily work with Kibana. \n\nReading logs