log2json 0.1.15 → 0.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/redis2es +99 -23
- data/log2json.gemspec +2 -2
- metadata +4 -4
data/bin/redis2es
CHANGED
@@ -8,6 +8,7 @@ require 'json'
|
|
8
8
|
require 'redis'
|
9
9
|
require 'persistent_http' # 1.0.5
|
10
10
|
# depends on gene_pool 1.3.0
|
11
|
+
require 'fileutils'
|
11
12
|
|
12
13
|
def show_usage_and_exit(status=1)
|
13
14
|
puts "Usage: #{$0} <elasticsearch_host> [port]"
|
@@ -51,6 +52,14 @@ if FLUSH_TIMEOUT < 2 or FLUSH_TIMEOUT % 2 != 0
|
|
51
52
|
exit 1
|
52
53
|
end
|
53
54
|
|
55
|
+
# path to the redo log file that stores the log records that this process
|
56
|
+
# failed to send to ES.
|
57
|
+
const(:REDO_LOG, File.join(Dir.getwd, "redo.log"))
|
58
|
+
|
59
|
+
# Note: Redo log will initially be written to a temporary file in a work folder.
|
60
|
+
# At the termination of this process, it will then be moved to the redo log.
|
61
|
+
|
62
|
+
|
54
63
|
LOG = Logger.new(STDOUT)
|
55
64
|
HTTP_LOG = Logger.new(STDOUT)
|
56
65
|
HTTP_LOG.level = Logger::WARN
|
@@ -61,8 +70,10 @@ HTTP_LOG.level = Logger::WARN
|
|
61
70
|
|
62
71
|
# this script is the only consumer of the pool and it uses only one connection at a time.
|
63
72
|
:pool_size => 1,
|
64
|
-
# Note:
|
65
|
-
#
|
73
|
+
# Note: Currently, we are not using multiple worker threads with a connection pool.
|
74
|
+
# Instead, we run multiple worker processes of this script, each with a pool size of 1.
|
75
|
+
# In the future this may change, but for now this seems to be good enough though we
|
76
|
+
# waste some memory.
|
66
77
|
|
67
78
|
# only renew a connection that's been idle for 5 mintues.
|
68
79
|
:idle_timeout => 60 * 5,
|
@@ -76,22 +87,73 @@ HTTP_LOG.level = Logger::WARN
|
|
76
87
|
@redis = Redis.new(host: REDIS_HOST, port: REDIS_PORT)
|
77
88
|
|
78
89
|
def flush_queue
|
79
|
-
if
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
90
|
+
return if @queue.empty?
|
91
|
+
|
92
|
+
req = Net::HTTP::Post.new('/_bulk')
|
93
|
+
req.body = @queue.join('')
|
94
|
+
response = nil
|
95
|
+
begin
|
96
|
+
response = @@http.request(req)
|
97
|
+
ensure
|
98
|
+
unless response != nil and response.code == '200'
|
99
|
+
LOG.error(response.body) if not response.nil?
|
100
|
+
LOG.warn("Failed sending bulk request(#{@queue.size} records) to ES!")
|
101
|
+
LOG.error("Stack trace:\n" + $!.backtrace[1..-1].join("\n")) if $!
|
102
|
+
begin
|
103
|
+
LOG.info("Dumping #{@queue.size} log records in the failed request to the temp redo log...")
|
104
|
+
dump_queue()
|
105
|
+
rescue
|
106
|
+
LOG.error("Error stashing failed request! Dumping to stderr:\n"+req.body)
|
91
107
|
end
|
92
108
|
end
|
93
|
-
@queue.clear()
|
94
109
|
end
|
110
|
+
@queue.clear()
|
111
|
+
end
|
112
|
+
|
113
|
+
def redo_tempfile_path
|
114
|
+
tmpdir = ENV['WORK_DIR'] || ENV['TMPDIR'] || '/var/tmp'
|
115
|
+
File.join(tmpdir, File.basename(REDO_LOG)+'.part')
|
116
|
+
end
|
117
|
+
|
118
|
+
# dump the queue to a temporary file
|
119
|
+
def dump_queue
|
120
|
+
open(redo_tempfile_path, "a:#{LOG_ENCODING}") do |file|
|
121
|
+
file.write("# #{Time.now} #{@queue.size} records ---\n")
|
122
|
+
file.write(@queue.join(''))
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def handle_redo_log
|
127
|
+
return unless File.exist?(REDO_LOG)
|
128
|
+
if File.exist?(redo_tempfile_path)
|
129
|
+
raise "Temp redo log(#{redo_tempfile_path}) exists!"
|
130
|
+
# In this case, the user needs to decide which redo log files(
|
131
|
+
# the main one or the temporary one, or both) is good, and produces only the main
|
132
|
+
# redo log file to be retried.
|
133
|
+
end
|
134
|
+
i = 0
|
135
|
+
record = ''
|
136
|
+
File.open(REDO_LOG, "r:#{LOG_ENCODING}").each_line do |line|
|
137
|
+
next if line == "\n" or line =~ /^#/
|
138
|
+
if i % 2 == 0
|
139
|
+
if not record.empty?
|
140
|
+
@queue << record
|
141
|
+
record = ''
|
142
|
+
end
|
143
|
+
if @queue.size == FLUSH_SIZE
|
144
|
+
LOG.info("Flushing #{@queue.size} records from the redo log(#{REDO_LOG})...")
|
145
|
+
flush_queue() rescue @queue.clear()
|
146
|
+
end
|
147
|
+
else
|
148
|
+
record << line
|
149
|
+
i += 1
|
150
|
+
end
|
151
|
+
end
|
152
|
+
if not @queue.empty?
|
153
|
+
LOG.info("Flushing #{@queue.size} records from the redo log(#{REDO_LOG})...")
|
154
|
+
flush_queue() rescue @queue.clear()
|
155
|
+
end
|
156
|
+
FileUtils.rm(REDO_LOG)
|
95
157
|
end
|
96
158
|
|
97
159
|
# Determines the name of the index in ElasticSearch from the given log record's timestamp.
|
@@ -106,15 +168,21 @@ def es_index(tstamp)
|
|
106
168
|
end
|
107
169
|
|
108
170
|
def enqueue(logstr)
|
171
|
+
logstr.force_encoding(LOG_ENCODING)
|
172
|
+
|
173
|
+
type = tstamp = nil
|
174
|
+
type = $1 if logstr =~ /"@type":\s*"([^"]+)"/
|
175
|
+
tstamp = $1 if logstr =~ /"@timestamp":\s*"([^"]+)"/
|
176
|
+
if not (type and tstamp)
|
177
|
+
LOG.warn("Failed to extract @type or @timestamp field using regex, falling back to parsing json...")
|
178
|
+
log = JSON.load(logstr)
|
179
|
+
type, tstamp = log["@type"], log["@timestamp"]
|
180
|
+
end
|
181
|
+
|
109
182
|
# add header for each entry according to http://www.elasticsearch.org/guide/reference/api/bulk/
|
110
|
-
log = JSON.load(logstr.force_encoding(LOG_ENCODING))
|
111
|
-
#FIXME: we can parse it ourselves. we only need to extract the @timestamp value.
|
112
|
-
|
113
183
|
@queue << (
|
114
|
-
{"index" => {"_index" => es_index(
|
115
|
-
"\n"
|
116
|
-
logstr <<
|
117
|
-
"\n"
|
184
|
+
{"index" => {"_index" => es_index(tstamp), "_type" => type}}.to_json << "\n" <<
|
185
|
+
logstr << "\n"
|
118
186
|
)
|
119
187
|
end
|
120
188
|
|
@@ -172,11 +240,19 @@ def main
|
|
172
240
|
end # loop
|
173
241
|
end
|
174
242
|
|
243
|
+
|
244
|
+
handle_redo_log()
|
175
245
|
begin
|
176
246
|
main()
|
177
247
|
ensure
|
178
|
-
LOG.warn("Terminating!
|
248
|
+
LOG.warn("Terminating! Flusing the queue(size=#{@queue.size})...")
|
179
249
|
flush_queue()
|
250
|
+
if File.exist?(REDO_LOG) and File.exist?(redo_tempfile_path)
|
251
|
+
LOG.warn("Redo log file(#{REDO_LOG}) already exists!? " +
|
252
|
+
"Please remove it so new redo logs can be written!")
|
253
|
+
else
|
254
|
+
FileUtils.mv(redo_tempfile_path, REDO_LOG) rescue :pass
|
255
|
+
end
|
180
256
|
end
|
181
257
|
|
182
258
|
|
data/log2json.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'log2json'
|
3
|
-
s.version = '0.1.
|
3
|
+
s.version = '0.1.16'
|
4
4
|
s.summary = "Read, filter and ship logs. ie, poor man's roll-your-own, light-weight logstash replacement."
|
5
5
|
s.description = IO.read(File.join(File.dirname(__FILE__), 'README'))
|
6
6
|
s.authors = ['Jack Kuan']
|
@@ -13,6 +13,6 @@ Gem::Specification.new do |s|
|
|
13
13
|
|
14
14
|
s.add_runtime_dependency 'jls-grok', '~> 0.10.10'
|
15
15
|
s.add_runtime_dependency 'redis', '~> 3.0.2'
|
16
|
-
s.add_runtime_dependency 'persistent_http', '~> 1.0.
|
16
|
+
s.add_runtime_dependency 'persistent_http', '~> 1.0.6'
|
17
17
|
|
18
18
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: log2json
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.16
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-10-
|
12
|
+
date: 2013-10-29 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: jls-grok
|
@@ -50,7 +50,7 @@ dependencies:
|
|
50
50
|
requirements:
|
51
51
|
- - ~>
|
52
52
|
- !ruby/object:Gem::Version
|
53
|
-
version: 1.0.
|
53
|
+
version: 1.0.6
|
54
54
|
type: :runtime
|
55
55
|
prerelease: false
|
56
56
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -58,7 +58,7 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - ~>
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: 1.0.
|
61
|
+
version: 1.0.6
|
62
62
|
description: ! "Log2json lets you read, filter and send logs as JSON objects via Unix
|
63
63
|
pipes.\nIt is inspired by Logstash, and is meant to be compatible with it at the
|
64
64
|
JSON\nevent/record level so that it can easily work with Kibana. \n\nReading logs
|