tailf2kafka 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/tailf2kafka +33 -27
- data/bin/tailf2kafka.save +302 -0
- data/lib/tailf2kafka/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 36ed03a088f2e37c9e35773e233159bc1c32cf2d
|
4
|
+
data.tar.gz: 203b2fd8a437e4d8baa555f46a246411eed0cb9e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5554c921a0ce29a3bf4f3548e8b180a46545ace278163b4cebaff11255e709565730f5de25263fc4fa44636a345eb6eec2a187e7dd012dc16bdd109bbb02c484
|
7
|
+
data.tar.gz: 11bc611eac30966753a8327acc0a7f97304d6b1c8757893ef7316b6cf8f1acfc83fdc7a9981b46ccb720a63bb3e5940657ddc8d4ed3795b838d590c8f3c1d485
|
data/bin/tailf2kafka
CHANGED
@@ -205,7 +205,7 @@ def time_pattern_to_regexp(pattern)
|
|
205
205
|
end
|
206
206
|
end
|
207
207
|
|
208
|
-
#Scan existing files that match watched prefixes and start
|
208
|
+
#Scan existing files that match watched prefixes and start tailing them
|
209
209
|
@settings[:tailf][:files].each do |tailf_file|
|
210
210
|
tailf_file[:prefix] = File.expand_path(tailf_file[:prefix])
|
211
211
|
dir = File.dirname(tailf_file[:prefix])
|
@@ -257,46 +257,52 @@ end
|
|
257
257
|
|
258
258
|
@timers = Timers::Group.new
|
259
259
|
@uploads_timer = @timers.every(@flush_interval) { write_position_file }
|
260
|
-
@
|
260
|
+
@timers.every(60) { delete_old_tailed_files } if @delete_old_tailed_files
|
261
261
|
Thread.new { loop { @timers.wait } }
|
262
262
|
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
@
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
unless
|
272
|
-
@
|
273
|
-
|
274
|
-
|
263
|
+
def setup_watchers
|
264
|
+
@dirs.each_key do |dir|
|
265
|
+
|
266
|
+
@create_notifier.watch(dir, :create, :moved_to) do |event|
|
267
|
+
@mutex.synchronize do
|
268
|
+
path = "#{dir}/#{event.name}"
|
269
|
+
match = @dirs[dir].detect{|h| event.name.match(Regexp.new(h[:prefix] + time_pattern_to_regexp(h[:pattern]) + h[:suffix]))}
|
270
|
+
if match
|
271
|
+
unless File.directory?(path)
|
272
|
+
unless @threads.has_key?(path)
|
273
|
+
@logger.info("File #{event.name} was created in / moved into watched dir #{dir}")
|
274
|
+
@files[path] = { :pattern => match[:pattern], :topic => match[:topic], :inode => File.stat(path).ino, :offset => 0 }
|
275
|
+
@threads[path] = Thread.new { tailf(path) }
|
276
|
+
end
|
275
277
|
end
|
276
278
|
end
|
277
279
|
end
|
278
280
|
end
|
279
|
-
end
|
280
281
|
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
282
|
+
@delete_notifier.watch(dir, :delete, :moved_from) do |event|
|
283
|
+
@mutex.synchronize do
|
284
|
+
path = "#{dir}/#{event.name}"
|
285
|
+
if @threads.has_key?(path)
|
286
|
+
@logger.info("File #{event.name} was deleted / moved from watched dir #{dir}")
|
287
|
+
if @threads[path].alive?
|
288
|
+
@threads[path].terminate
|
289
|
+
@threads[path].join
|
290
|
+
end
|
291
|
+
@threads.delete(path)
|
292
|
+
@files[path][:fd].close unless @files[path][:fd].closed?
|
293
|
+
@files.delete(path)
|
289
294
|
end
|
290
|
-
@threads.delete(path)
|
291
|
-
@files[path][:fd].close unless @files[path][:fd].closed?
|
292
|
-
@files.delete(path)
|
293
295
|
end
|
294
296
|
end
|
295
|
-
end
|
296
297
|
|
298
|
+
end
|
297
299
|
end
|
298
300
|
|
301
|
+
setup_watchers
|
302
|
+
|
299
303
|
Thread.new { @create_notifier.run }
|
300
304
|
Thread.new { @delete_notifier.run }
|
301
305
|
|
306
|
+
@timers.every(60) { setup_watchers }
|
307
|
+
|
302
308
|
@tailf_notifier.run
|
@@ -0,0 +1,302 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'poseidon'
|
5
|
+
require 'yaml'
|
6
|
+
require 'hash_symbolizer'
|
7
|
+
require 'schash'
|
8
|
+
require 'rb-inotify'
|
9
|
+
require 'timers'
|
10
|
+
require 'socket'
|
11
|
+
require 'fileutils'
|
12
|
+
require 'logger'
|
13
|
+
require 'mixlib/shellout'
|
14
|
+
|
15
|
+
$stdout.sync = true
|
16
|
+
|
17
|
+
Thread.abort_on_exception = true
|
18
|
+
|
19
|
+
@config = nil
|
20
|
+
|
21
|
+
loglevels = {
|
22
|
+
:debug => Logger::DEBUG,
|
23
|
+
:info => Logger::INFO,
|
24
|
+
:warn => Logger::WARN,
|
25
|
+
:error => Logger::Error,
|
26
|
+
:fatal => Logger::FATAL,
|
27
|
+
:unknown => Logger::UNKNOWN
|
28
|
+
}
|
29
|
+
|
30
|
+
@loglevel = Logger::INFO
|
31
|
+
|
32
|
+
opts = OptionParser.new
|
33
|
+
opts.banner = "Usage: #{$0} [options]"
|
34
|
+
opts.on( '--config PATH', String, 'Path to settings config' ) { |c| @config = c }
|
35
|
+
opts.on( '--log-level [LEVEL]', [:debug, :info, :warn, :error, :fatal, :unknown] ) { |l| @loglevel = loglevels[l] }
|
36
|
+
opts.on( '-h', '--help', 'Display this screen' ) { puts opts; exit 0 }
|
37
|
+
opts.parse!
|
38
|
+
|
39
|
+
unless @config
|
40
|
+
puts opts
|
41
|
+
exit 1
|
42
|
+
end
|
43
|
+
|
44
|
+
@logger = Logger.new(STDOUT)
|
45
|
+
|
46
|
+
@settings = YAML.load_file(@config).symbolize_keys(true)
|
47
|
+
|
48
|
+
validator = Schash::Validator.new do
|
49
|
+
{
|
50
|
+
tailf: {
|
51
|
+
files: array_of({
|
52
|
+
topic: string,
|
53
|
+
prefix: string,
|
54
|
+
suffix: optional(string),
|
55
|
+
time_pattern: string,
|
56
|
+
}),
|
57
|
+
position_file: string,
|
58
|
+
flush_interval: integer,
|
59
|
+
max_batch_lines: integer,
|
60
|
+
from_begining: boolean,
|
61
|
+
delete_old_tailed_files: optional(boolean),
|
62
|
+
post_delete_command: optional(string),
|
63
|
+
},
|
64
|
+
kafka: {
|
65
|
+
brokers: array_of(string),
|
66
|
+
producer_type: match(/^(sync|async)$/),
|
67
|
+
produce: optional(boolean),
|
68
|
+
},
|
69
|
+
}
|
70
|
+
end
|
71
|
+
|
72
|
+
unless validator.validate(@settings).empty?
|
73
|
+
@logger.error("ERROR: bad settings")
|
74
|
+
@logger.error(validator.validate(@settings))
|
75
|
+
exit 1
|
76
|
+
end
|
77
|
+
|
78
|
+
@settings[:tailf][:files] = @settings[:tailf][:files].map{|h| h.symbolize_keys(true)}
|
79
|
+
|
80
|
+
@mutex = Mutex.new
|
81
|
+
|
82
|
+
@create_notifier = INotify::Notifier.new
|
83
|
+
@delete_notifier = INotify::Notifier.new
|
84
|
+
@tailf_notifier = INotify::Notifier.new
|
85
|
+
|
86
|
+
@dirs = {}
|
87
|
+
@files = {}
|
88
|
+
@threads = {}
|
89
|
+
@position_file = @settings[:tailf][:position_file]
|
90
|
+
@flush_interval = @settings[:tailf][:flush_interval]
|
91
|
+
@max_batch_lines = @settings[:tailf][:max_batch_lines]
|
92
|
+
@from_begining = @settings[:tailf][:from_begining]
|
93
|
+
@delete_old_tailed_files = @settings[:tailf].has_key?(:delete_old_tailed_files) ? @settings[:tailf][:delete_old_tailed_files] : false
|
94
|
+
@brokers = @settings[:kafka][:brokers]
|
95
|
+
@producer_type = @settings[:kafka][:producer_type].to_sym
|
96
|
+
@produce = @settings[:kafka].has_key?(:produce) ? @settings[:kafka][:produce] : true
|
97
|
+
|
98
|
+
def write_position_file
|
99
|
+
@mutex.synchronize do
|
100
|
+
File.open(@position_file, 'w') do |file|
|
101
|
+
@files.each do |path, attrs|
|
102
|
+
file.puts "#{path} #{attrs[:pattern]} #{attrs[:topic]} #{attrs[:inode]} #{attrs[:offset]}"
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def load_position_file
|
109
|
+
if File.exist?(@position_file)
|
110
|
+
IO.readlines(@position_file).each do |line|
|
111
|
+
path, pattern, topic, inode, offset = line.split(' ')
|
112
|
+
#Load state only for that exist with same inode and were not truncated/rewinded.
|
113
|
+
if File.exists?(path) and File.stat(path).ino == inode.to_i and File.stat(path).size >= offset.to_i
|
114
|
+
@files[path] = { :pattern => pattern, :topic => topic, :inode => inode.to_i, :offset => offset.to_i }
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
write_position_file
|
119
|
+
end
|
120
|
+
|
121
|
+
load_position_file
|
122
|
+
|
123
|
+
@topics = @settings[:tailf][:files].map{|tailf_file| tailf_file[:topic]}
|
124
|
+
@producer = Poseidon::Producer.new(@brokers, "#{Socket.gethostname}", :type => @producer_type, :compression_codec => :snappy, :compressed_topics => @topics) if @produce
|
125
|
+
|
126
|
+
@producer_queue = SizedQueue.new(10)
|
127
|
+
|
128
|
+
@producer_thread = Thread.new do
|
129
|
+
loop do
|
130
|
+
batch = @producer_queue.pop
|
131
|
+
begin
|
132
|
+
@producer.send_messages(batch[:messages]) if @produce
|
133
|
+
rescue Poseidon::Errors::UnableToFetchMetadata
|
134
|
+
@logger.warn("Got Poseidon::Errors::UnableToFetchMetadata while trying to produce kafka messages, retrying in 1 second ...")
|
135
|
+
sleep 1
|
136
|
+
retry
|
137
|
+
end
|
138
|
+
@files[batch[:path]][:offset] = batch[:offset]
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def kafka_produce(path, buffer, offset)
|
143
|
+
truncated = nil
|
144
|
+
|
145
|
+
messages = []
|
146
|
+
while msg = buffer.shift
|
147
|
+
unless msg[-1] == "\n"
|
148
|
+
if buffer.empty?
|
149
|
+
truncated = msg
|
150
|
+
else
|
151
|
+
msg = msg + buffer.shift
|
152
|
+
messages << Poseidon::MessageToSend.new(@files[path][:topic], msg.strip)
|
153
|
+
end
|
154
|
+
else
|
155
|
+
messages << Poseidon::MessageToSend.new(@files[path][:topic], msg.strip)
|
156
|
+
end
|
157
|
+
end
|
158
|
+
@producer_queue.push({ :path => path, :messages => messages, :offset => offset})
|
159
|
+
|
160
|
+
truncated
|
161
|
+
end
|
162
|
+
|
163
|
+
def tailf(path)
|
164
|
+
file = File.open(path, 'r')
|
165
|
+
@files[path][:fd] = file
|
166
|
+
file.seek(@files[path][:offset], IO::SEEK_SET)
|
167
|
+
|
168
|
+
truncated = nil
|
169
|
+
loop do #Fast read file in batches until we reach EOF upon which we start the tailf modify watcher
|
170
|
+
batch = file.each_line.take(@max_batch_lines)
|
171
|
+
break if batch.empty?
|
172
|
+
batch.unshift(truncated + batch.shift) if truncated
|
173
|
+
truncated = kafka_produce(path, batch, file.pos)
|
174
|
+
end
|
175
|
+
|
176
|
+
mutex = Mutex.new
|
177
|
+
@tailf_notifier.watch(path, :modify) do |event|
|
178
|
+
mutex.synchronize do
|
179
|
+
unless file.closed?
|
180
|
+
loop do
|
181
|
+
batch = file.each_line.take(@max_batch_lines)
|
182
|
+
break if batch.empty?
|
183
|
+
batch.unshift(truncated + batch.shift) if truncated
|
184
|
+
truncated = kafka_produce(path, batch, file.pos)
|
185
|
+
end
|
186
|
+
else
|
187
|
+
@logger.warn("watcher got modify event on closed file #{event.name}")
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
@time_regexp_hash = {
|
194
|
+
'Y' => '[0-9]{4}',
|
195
|
+
'm' => '[0-9]{2}',
|
196
|
+
'd' => '[0-9]{2}',
|
197
|
+
'H' => '[0-9]{2}',
|
198
|
+
'M' => '[0-9]{2}'
|
199
|
+
}
|
200
|
+
|
201
|
+
def time_pattern_to_regexp(pattern)
|
202
|
+
pattern.gsub(/%([^%])/) do
|
203
|
+
match = $1
|
204
|
+
@time_regexp_hash.has_key?(match) ? @time_regexp_hash[match] : match
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
#Scan existing files that match watched prefixes and start failing them
|
209
|
+
@settings[:tailf][:files].each do |tailf_file|
|
210
|
+
dir = File.dirname(tailf_file[:prefix])
|
211
|
+
if File.exists?(dir) and File.directory?(dir)
|
212
|
+
@dirs[dir] ||= []
|
213
|
+
@dirs[dir] << { :prefix => File.basename(tailf_file[:prefix]), :pattern => tailf_file[:time_pattern], :suffix => "#{tailf_file[:suffix]}", :topic => tailf_file[:topic]}
|
214
|
+
Dir.glob("#{tailf_file[:prefix]}*#{tailf_file[:suffix]}").each do |path|
|
215
|
+
if path.match(Regexp.new(time_pattern_to_regexp(tailf_file[:time_pattern])))
|
216
|
+
unless File.directory?(path)
|
217
|
+
#Populate state only if it was not loaded from position file
|
218
|
+
unless @files.has_key?(path)
|
219
|
+
@files[path] = { :pattern => tailf_file[:time_pattern], :topic => tailf_file[:topic], :inode => File.stat(path).ino, :offset => 0 }
|
220
|
+
@files[path][:offset] = File.stat(path).size unless @from_begining
|
221
|
+
end
|
222
|
+
@threads[path] = Thread.new { tailf(path) } unless @threads.has_key?(path)
|
223
|
+
end
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
def delete_old_tailed_files
|
230
|
+
@mutex.synchronize do
|
231
|
+
@files.each_key do |path|
|
232
|
+
unless path.match(Regexp.new(Time.now.strftime(@files[path][:pattern])))
|
233
|
+
if File.exists?(path) and File.stat(path).ino == @files[path][:inode] and File.stat(path).size == @files[path][:offset] and (Time.now - File.stat(path).mtime) > 30
|
234
|
+
@logger.info("Deleteing old time pattern fully kafka produced file #{path}")
|
235
|
+
FileUtils.rm_r(path)
|
236
|
+
if @settings[:tailf].has_key?(:post_delete_command)
|
237
|
+
@logger.info("Running post delete command => #{@settings[:tailf][:post_delete_command]}")
|
238
|
+
command = Mixlib::created
|
239
|
+
ShellOut.new(@settings[:tailf][:post_delete_command])
|
240
|
+
begin
|
241
|
+
command.run_command
|
242
|
+
if command.error?
|
243
|
+
@logger.error("Failed post delete command => #{@settings[:tailf][:post_delete_command]}")
|
244
|
+
@logger.info("STDOUT: #{command.stdout}")
|
245
|
+
@logger.info("STDERR: #{command.stderr}")
|
246
|
+
end
|
247
|
+
rescue => e
|
248
|
+
@logger.error("Failed post delete command => #{@settings[:tailf][:post_delete_command]}")
|
249
|
+
@logger.info(e.message)
|
250
|
+
end
|
251
|
+
end
|
252
|
+
end
|
253
|
+
end
|
254
|
+
end
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
@timers = Timers::Group.new
|
259
|
+
@uploads_timer = @timers.every(@flush_interval) { write_position_file }
|
260
|
+
@delete_old_tailed_files_timer = @timers.every(60) { delete_old_tailed_files } if @delete_old_tailed_files
|
261
|
+
Thread.new { loop { @timers.wait } }
|
262
|
+
|
263
|
+
@dirs.each_key do |dir|
|
264
|
+
|
265
|
+
@create_notifier.watch(dir, :create, :moved_to) do |event|
|
266
|
+
@mutex.synchronize do
|
267
|
+
path = "#{dir}/#{event.name}"
|
268
|
+
match = @dirs[dir].detect{|h| event.name.match(Regexp.new(h[:prefix] + time_pattern_to_regexp(h[:pattern]) + h[:suffix]))}
|
269
|
+
if match
|
270
|
+
unless File.directory?(path)
|
271
|
+
unless @threads.has_key?(path)
|
272
|
+
@logger.info("File #{event.name} was created in / moved into watched dir #{dir}")
|
273
|
+
@files[path] = { :pattern => match[:pattern], :topic => match[:topic], :inode => File.stat(path).ino, :offset => 0 }
|
274
|
+
@threads[path] = Thread.new { tailf(path) }
|
275
|
+
end
|
276
|
+
end
|
277
|
+
end
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
@delete_notifier.watch(dir, :delete, :moved_from) do |event|
|
282
|
+
@mutex.synchronize do
|
283
|
+
path = "#{dir}/#{event.name}"
|
284
|
+
if @threads.has_key?(path)
|
285
|
+
@logger.info("File #{event.name} was deleted / moved from watched dir #{dir}")
|
286
|
+
if @threads[path].alive?
|
287
|
+
@threads[path].terminate
|
288
|
+
@threads[path].join
|
289
|
+
end
|
290
|
+
@threads.delete(path)
|
291
|
+
@files[path][:fd].close unless @files[path][:fd].closed?
|
292
|
+
@files.delete(path)
|
293
|
+
end
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
297
|
+
end
|
298
|
+
|
299
|
+
Thread.new { @create_notifier.run }
|
300
|
+
Thread.new { @delete_notifier.run }
|
301
|
+
|
302
|
+
@tailf_notifier.run
|
data/lib/tailf2kafka/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tailf2kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexander Piavlo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-07-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: poseidon
|
@@ -128,12 +128,14 @@ email:
|
|
128
128
|
- devops@supersonic.com
|
129
129
|
executables:
|
130
130
|
- tailf2kafka
|
131
|
+
- tailf2kafka.save
|
131
132
|
extensions: []
|
132
133
|
extra_rdoc_files: []
|
133
134
|
files:
|
134
135
|
- LICENSE
|
135
136
|
- README.md
|
136
137
|
- bin/tailf2kafka
|
138
|
+
- bin/tailf2kafka.save
|
137
139
|
- lib/tailf2kafka.rb
|
138
140
|
- lib/tailf2kafka/version.rb
|
139
141
|
- tailf2kafka.gemspec
|