log2json 0.1.15 → 0.1.16
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/redis2es +99 -23
- data/log2json.gemspec +2 -2
- metadata +4 -4
data/bin/redis2es
CHANGED
@@ -8,6 +8,7 @@ require 'json'
|
|
8
8
|
require 'redis'
|
9
9
|
require 'persistent_http' # 1.0.5
|
10
10
|
# depends on gene_pool 1.3.0
|
11
|
+
require 'fileutils'
|
11
12
|
|
12
13
|
def show_usage_and_exit(status=1)
|
13
14
|
puts "Usage: #{$0} <elasticsearch_host> [port]"
|
@@ -51,6 +52,14 @@ if FLUSH_TIMEOUT < 2 or FLUSH_TIMEOUT % 2 != 0
|
|
51
52
|
exit 1
|
52
53
|
end
|
53
54
|
|
55
|
+
# path to the redo log file that stores the log records that this process
|
56
|
+
# failed to send to ES.
|
57
|
+
const(:REDO_LOG, File.join(Dir.getwd, "redo.log"))
|
58
|
+
|
59
|
+
# Note: Redo log will initially be written to a temporary file in a work folder.
|
60
|
+
# At the termination of this process, it will then be moved to the redo log.
|
61
|
+
|
62
|
+
|
54
63
|
LOG = Logger.new(STDOUT)
|
55
64
|
HTTP_LOG = Logger.new(STDOUT)
|
56
65
|
HTTP_LOG.level = Logger::WARN
|
@@ -61,8 +70,10 @@ HTTP_LOG.level = Logger::WARN
|
|
61
70
|
|
62
71
|
# this script is the only consumer of the pool and it uses only one connection at a time.
|
63
72
|
:pool_size => 1,
|
64
|
-
# Note:
|
65
|
-
#
|
73
|
+
# Note: Currently, we are not using multiple worker threads with a connection pool.
|
74
|
+
# Instead, we run multiple worker processes of this script, each with a pool size of 1.
|
75
|
+
# In the future this may change, but for now this seems to be good enough though we
|
76
|
+
# waste some memory.
|
66
77
|
|
67
78
|
# only renew a connection that's been idle for 5 mintues.
|
68
79
|
:idle_timeout => 60 * 5,
|
@@ -76,22 +87,73 @@ HTTP_LOG.level = Logger::WARN
|
|
76
87
|
@redis = Redis.new(host: REDIS_HOST, port: REDIS_PORT)
|
77
88
|
|
78
89
|
def flush_queue
|
79
|
-
if
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
90
|
+
return if @queue.empty?
|
91
|
+
|
92
|
+
req = Net::HTTP::Post.new('/_bulk')
|
93
|
+
req.body = @queue.join('')
|
94
|
+
response = nil
|
95
|
+
begin
|
96
|
+
response = @@http.request(req)
|
97
|
+
ensure
|
98
|
+
unless response != nil and response.code == '200'
|
99
|
+
LOG.error(response.body) if not response.nil?
|
100
|
+
LOG.warn("Failed sending bulk request(#{@queue.size} records) to ES!")
|
101
|
+
LOG.error("Stack trace:\n" + $!.backtrace[1..-1].join("\n")) if $!
|
102
|
+
begin
|
103
|
+
LOG.info("Dumping #{@queue.size} log records in the failed request to the temp redo log...")
|
104
|
+
dump_queue()
|
105
|
+
rescue
|
106
|
+
LOG.error("Error stashing failed request! Dumping to stderr:\n"+req.body)
|
91
107
|
end
|
92
108
|
end
|
93
|
-
@queue.clear()
|
94
109
|
end
|
110
|
+
@queue.clear()
|
111
|
+
end
|
112
|
+
|
113
|
+
def redo_tempfile_path
|
114
|
+
tmpdir = ENV['WORK_DIR'] || ENV['TMPDIR'] || '/var/tmp'
|
115
|
+
File.join(tmpdir, File.basename(REDO_LOG)+'.part')
|
116
|
+
end
|
117
|
+
|
118
|
+
# dump the queue to a temporary file
|
119
|
+
def dump_queue
|
120
|
+
open(redo_tempfile_path, "a:#{LOG_ENCODING}") do |file|
|
121
|
+
file.write("# #{Time.now} #{@queue.size} records ---\n")
|
122
|
+
file.write(@queue.join(''))
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def handle_redo_log
|
127
|
+
return unless File.exist?(REDO_LOG)
|
128
|
+
if File.exist?(redo_tempfile_path)
|
129
|
+
raise "Temp redo log(#{redo_tempfile_path}) exists!"
|
130
|
+
# In this case, the user needs to decide which redo log files(
|
131
|
+
# the main one or the temporary one, or both) is good, and produces only the main
|
132
|
+
# redo log file to be retried.
|
133
|
+
end
|
134
|
+
i = 0
|
135
|
+
record = ''
|
136
|
+
File.open(REDO_LOG, "r:#{LOG_ENCODING}").each_line do |line|
|
137
|
+
next if line == "\n" or line =~ /^#/
|
138
|
+
if i % 2 == 0
|
139
|
+
if not record.empty?
|
140
|
+
@queue << record
|
141
|
+
record = ''
|
142
|
+
end
|
143
|
+
if @queue.size == FLUSH_SIZE
|
144
|
+
LOG.info("Flushing #{@queue.size} records from the redo log(#{REDO_LOG})...")
|
145
|
+
flush_queue() rescue @queue.clear()
|
146
|
+
end
|
147
|
+
else
|
148
|
+
record << line
|
149
|
+
i += 1
|
150
|
+
end
|
151
|
+
end
|
152
|
+
if not @queue.empty?
|
153
|
+
LOG.info("Flushing #{@queue.size} records from the redo log(#{REDO_LOG})...")
|
154
|
+
flush_queue() rescue @queue.clear()
|
155
|
+
end
|
156
|
+
FileUtils.rm(REDO_LOG)
|
95
157
|
end
|
96
158
|
|
97
159
|
# Determines the name of the index in ElasticSearch from the given log record's timestamp.
|
@@ -106,15 +168,21 @@ def es_index(tstamp)
|
|
106
168
|
end
|
107
169
|
|
108
170
|
def enqueue(logstr)
|
171
|
+
logstr.force_encoding(LOG_ENCODING)
|
172
|
+
|
173
|
+
type = tstamp = nil
|
174
|
+
type = $1 if logstr =~ /"@type":\s*"([^"]+)"/
|
175
|
+
tstamp = $1 if logstr =~ /"@timestamp":\s*"([^"]+)"/
|
176
|
+
if not (type and tstamp)
|
177
|
+
LOG.warn("Failed to extract @type or @timestamp field using regex, falling back to parsing json...")
|
178
|
+
log = JSON.load(logstr)
|
179
|
+
type, tstamp = log["@type"], log["@timestamp"]
|
180
|
+
end
|
181
|
+
|
109
182
|
# add header for each entry according to http://www.elasticsearch.org/guide/reference/api/bulk/
|
110
|
-
log = JSON.load(logstr.force_encoding(LOG_ENCODING))
|
111
|
-
#FIXME: we can parse it ourselves. we only need to extract the @timestamp value.
|
112
|
-
|
113
183
|
@queue << (
|
114
|
-
{"index" => {"_index" => es_index(
|
115
|
-
"\n"
|
116
|
-
logstr <<
|
117
|
-
"\n"
|
184
|
+
{"index" => {"_index" => es_index(tstamp), "_type" => type}}.to_json << "\n" <<
|
185
|
+
logstr << "\n"
|
118
186
|
)
|
119
187
|
end
|
120
188
|
|
@@ -172,11 +240,19 @@ def main
|
|
172
240
|
end # loop
|
173
241
|
end
|
174
242
|
|
243
|
+
|
244
|
+
handle_redo_log()
|
175
245
|
begin
|
176
246
|
main()
|
177
247
|
ensure
|
178
|
-
LOG.warn("Terminating!
|
248
|
+
LOG.warn("Terminating! Flusing the queue(size=#{@queue.size})...")
|
179
249
|
flush_queue()
|
250
|
+
if File.exist?(REDO_LOG) and File.exist?(redo_tempfile_path)
|
251
|
+
LOG.warn("Redo log file(#{REDO_LOG}) already exists!? " +
|
252
|
+
"Please remove it so new redo logs can be written!")
|
253
|
+
else
|
254
|
+
FileUtils.mv(redo_tempfile_path, REDO_LOG) rescue :pass
|
255
|
+
end
|
180
256
|
end
|
181
257
|
|
182
258
|
|
data/log2json.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'log2json'
|
3
|
-
s.version = '0.1.
|
3
|
+
s.version = '0.1.16'
|
4
4
|
s.summary = "Read, filter and ship logs. ie, poor man's roll-your-own, light-weight logstash replacement."
|
5
5
|
s.description = IO.read(File.join(File.dirname(__FILE__), 'README'))
|
6
6
|
s.authors = ['Jack Kuan']
|
@@ -13,6 +13,6 @@ Gem::Specification.new do |s|
|
|
13
13
|
|
14
14
|
s.add_runtime_dependency 'jls-grok', '~> 0.10.10'
|
15
15
|
s.add_runtime_dependency 'redis', '~> 3.0.2'
|
16
|
-
s.add_runtime_dependency 'persistent_http', '~> 1.0.
|
16
|
+
s.add_runtime_dependency 'persistent_http', '~> 1.0.6'
|
17
17
|
|
18
18
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: log2json
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.16
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-10-
|
12
|
+
date: 2013-10-29 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: jls-grok
|
@@ -50,7 +50,7 @@ dependencies:
|
|
50
50
|
requirements:
|
51
51
|
- - ~>
|
52
52
|
- !ruby/object:Gem::Version
|
53
|
-
version: 1.0.
|
53
|
+
version: 1.0.6
|
54
54
|
type: :runtime
|
55
55
|
prerelease: false
|
56
56
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -58,7 +58,7 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - ~>
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: 1.0.
|
61
|
+
version: 1.0.6
|
62
62
|
description: ! "Log2json lets you read, filter and send logs as JSON objects via Unix
|
63
63
|
pipes.\nIt is inspired by Logstash, and is meant to be compatible with it at the
|
64
64
|
JSON\nevent/record level so that it can easily work with Kibana. \n\nReading logs
|