tailf2kafka 0.1.8 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/tailf2kafka +27 -33
- data/lib/tailf2kafka/version.rb +1 -1
- data/tailf2kafka.gemspec +1 -0
- metadata +17 -4
- data/bin/tailf2kafka.save +0 -302
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8eef4e08a12fcd549df3c091eb06a5784669dfa0
|
4
|
+
data.tar.gz: 953bd871eba9fdfa942764464238fa5560c16d2e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a79f4f802ea67c650c673513dcdfb8f033f448264a02bc1ce75a5eab335b86db35313ac15ad6402a152eea0427ce000586453fb8aac1eeaf5ead625cca5349be
|
7
|
+
data.tar.gz: 0f353df4ab74ad941e6857ef8611209c4b976394ce811b538bd8e7eb58dfda0187bd892bb78728edfe61e79f76d9de5f86336caa156f3eba330681f3c607fb7e
|
data/bin/tailf2kafka
CHANGED
@@ -205,7 +205,7 @@ def time_pattern_to_regexp(pattern)
|
|
205
205
|
end
|
206
206
|
end
|
207
207
|
|
208
|
-
#Scan existing files that match watched prefixes and start
|
208
|
+
#Scan existing files that match watched prefixes and start failing them
|
209
209
|
@settings[:tailf][:files].each do |tailf_file|
|
210
210
|
tailf_file[:prefix] = File.expand_path(tailf_file[:prefix])
|
211
211
|
dir = File.dirname(tailf_file[:prefix])
|
@@ -257,52 +257,46 @@ end
|
|
257
257
|
|
258
258
|
@timers = Timers::Group.new
|
259
259
|
@uploads_timer = @timers.every(@flush_interval) { write_position_file }
|
260
|
-
@timers.every(60) { delete_old_tailed_files } if @delete_old_tailed_files
|
260
|
+
@delete_old_tailed_files_timer = @timers.every(60) { delete_old_tailed_files } if @delete_old_tailed_files
|
261
261
|
Thread.new { loop { @timers.wait } }
|
262
262
|
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
@
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
unless
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
@threads[path] = Thread.new { tailf(path) }
|
276
|
-
end
|
263
|
+
@dirs.each_key do |dir|
|
264
|
+
|
265
|
+
@create_notifier.watch(dir, :create, :moved_to) do |event|
|
266
|
+
@mutex.synchronize do
|
267
|
+
path = "#{dir}/#{event.name}"
|
268
|
+
match = @dirs[dir].detect{|h| event.name.match(Regexp.new(h[:prefix] + time_pattern_to_regexp(h[:pattern]) + h[:suffix]))}
|
269
|
+
if match
|
270
|
+
unless File.directory?(path)
|
271
|
+
unless @threads.has_key?(path)
|
272
|
+
@logger.info("File #{event.name} was created in / moved into watched dir #{dir}")
|
273
|
+
@files[path] = { :pattern => match[:pattern], :topic => match[:topic], :inode => File.stat(path).ino, :offset => 0 }
|
274
|
+
@threads[path] = Thread.new { tailf(path) }
|
277
275
|
end
|
278
276
|
end
|
279
277
|
end
|
280
278
|
end
|
279
|
+
end
|
281
280
|
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
end
|
291
|
-
@threads.delete(path)
|
292
|
-
@files[path][:fd].close unless @files[path][:fd].closed?
|
293
|
-
@files.delete(path)
|
281
|
+
@delete_notifier.watch(dir, :delete, :moved_from) do |event|
|
282
|
+
@mutex.synchronize do
|
283
|
+
path = "#{dir}/#{event.name}"
|
284
|
+
if @threads.has_key?(path)
|
285
|
+
@logger.info("File #{event.name} was deleted / moved from watched dir #{dir}")
|
286
|
+
if @threads[path].alive?
|
287
|
+
@threads[path].terminate
|
288
|
+
@threads[path].join
|
294
289
|
end
|
290
|
+
@threads.delete(path)
|
291
|
+
@files[path][:fd].close unless @files[path][:fd].closed?
|
292
|
+
@files.delete(path)
|
295
293
|
end
|
296
294
|
end
|
297
|
-
|
298
295
|
end
|
299
|
-
end
|
300
296
|
|
301
|
-
|
297
|
+
end
|
302
298
|
|
303
299
|
Thread.new { @create_notifier.run }
|
304
300
|
Thread.new { @delete_notifier.run }
|
305
301
|
|
306
|
-
@timers.every(60) { setup_watchers }
|
307
|
-
|
308
302
|
@tailf_notifier.run
|
data/lib/tailf2kafka/version.rb
CHANGED
data/tailf2kafka.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tailf2kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexander Piavlo
|
@@ -108,6 +108,20 @@ dependencies:
|
|
108
108
|
- - ">="
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: activesupport
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: 4.2.6
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: 4.2.6
|
111
125
|
- !ruby/object:Gem::Dependency
|
112
126
|
name: rake
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -128,14 +142,12 @@ email:
|
|
128
142
|
- devops@supersonic.com
|
129
143
|
executables:
|
130
144
|
- tailf2kafka
|
131
|
-
- tailf2kafka.save
|
132
145
|
extensions: []
|
133
146
|
extra_rdoc_files: []
|
134
147
|
files:
|
135
148
|
- LICENSE
|
136
149
|
- README.md
|
137
150
|
- bin/tailf2kafka
|
138
|
-
- bin/tailf2kafka.save
|
139
151
|
- lib/tailf2kafka.rb
|
140
152
|
- lib/tailf2kafka/version.rb
|
141
153
|
- tailf2kafka.gemspec
|
@@ -159,9 +171,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
159
171
|
version: '0'
|
160
172
|
requirements: []
|
161
173
|
rubyforge_project:
|
162
|
-
rubygems_version: 2.
|
174
|
+
rubygems_version: 2.6.4
|
163
175
|
signing_key:
|
164
176
|
specification_version: 4
|
165
177
|
summary: Watch and tail files with specified time based patterns and push them to
|
166
178
|
kafka
|
167
179
|
test_files: []
|
180
|
+
has_rdoc: false
|
data/bin/tailf2kafka.save
DELETED
@@ -1,302 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'optparse'
|
4
|
-
require 'poseidon'
|
5
|
-
require 'yaml'
|
6
|
-
require 'hash_symbolizer'
|
7
|
-
require 'schash'
|
8
|
-
require 'rb-inotify'
|
9
|
-
require 'timers'
|
10
|
-
require 'socket'
|
11
|
-
require 'fileutils'
|
12
|
-
require 'logger'
|
13
|
-
require 'mixlib/shellout'
|
14
|
-
|
15
|
-
$stdout.sync = true
|
16
|
-
|
17
|
-
Thread.abort_on_exception = true
|
18
|
-
|
19
|
-
@config = nil
|
20
|
-
|
21
|
-
loglevels = {
|
22
|
-
:debug => Logger::DEBUG,
|
23
|
-
:info => Logger::INFO,
|
24
|
-
:warn => Logger::WARN,
|
25
|
-
:error => Logger::Error,
|
26
|
-
:fatal => Logger::FATAL,
|
27
|
-
:unknown => Logger::UNKNOWN
|
28
|
-
}
|
29
|
-
|
30
|
-
@loglevel = Logger::INFO
|
31
|
-
|
32
|
-
opts = OptionParser.new
|
33
|
-
opts.banner = "Usage: #{$0} [options]"
|
34
|
-
opts.on( '--config PATH', String, 'Path to settings config' ) { |c| @config = c }
|
35
|
-
opts.on( '--log-level [LEVEL]', [:debug, :info, :warn, :error, :fatal, :unknown] ) { |l| @loglevel = loglevels[l] }
|
36
|
-
opts.on( '-h', '--help', 'Display this screen' ) { puts opts; exit 0 }
|
37
|
-
opts.parse!
|
38
|
-
|
39
|
-
unless @config
|
40
|
-
puts opts
|
41
|
-
exit 1
|
42
|
-
end
|
43
|
-
|
44
|
-
@logger = Logger.new(STDOUT)
|
45
|
-
|
46
|
-
@settings = YAML.load_file(@config).symbolize_keys(true)
|
47
|
-
|
48
|
-
validator = Schash::Validator.new do
|
49
|
-
{
|
50
|
-
tailf: {
|
51
|
-
files: array_of({
|
52
|
-
topic: string,
|
53
|
-
prefix: string,
|
54
|
-
suffix: optional(string),
|
55
|
-
time_pattern: string,
|
56
|
-
}),
|
57
|
-
position_file: string,
|
58
|
-
flush_interval: integer,
|
59
|
-
max_batch_lines: integer,
|
60
|
-
from_begining: boolean,
|
61
|
-
delete_old_tailed_files: optional(boolean),
|
62
|
-
post_delete_command: optional(string),
|
63
|
-
},
|
64
|
-
kafka: {
|
65
|
-
brokers: array_of(string),
|
66
|
-
producer_type: match(/^(sync|async)$/),
|
67
|
-
produce: optional(boolean),
|
68
|
-
},
|
69
|
-
}
|
70
|
-
end
|
71
|
-
|
72
|
-
unless validator.validate(@settings).empty?
|
73
|
-
@logger.error("ERROR: bad settings")
|
74
|
-
@logger.error(validator.validate(@settings))
|
75
|
-
exit 1
|
76
|
-
end
|
77
|
-
|
78
|
-
@settings[:tailf][:files] = @settings[:tailf][:files].map{|h| h.symbolize_keys(true)}
|
79
|
-
|
80
|
-
@mutex = Mutex.new
|
81
|
-
|
82
|
-
@create_notifier = INotify::Notifier.new
|
83
|
-
@delete_notifier = INotify::Notifier.new
|
84
|
-
@tailf_notifier = INotify::Notifier.new
|
85
|
-
|
86
|
-
@dirs = {}
|
87
|
-
@files = {}
|
88
|
-
@threads = {}
|
89
|
-
@position_file = @settings[:tailf][:position_file]
|
90
|
-
@flush_interval = @settings[:tailf][:flush_interval]
|
91
|
-
@max_batch_lines = @settings[:tailf][:max_batch_lines]
|
92
|
-
@from_begining = @settings[:tailf][:from_begining]
|
93
|
-
@delete_old_tailed_files = @settings[:tailf].has_key?(:delete_old_tailed_files) ? @settings[:tailf][:delete_old_tailed_files] : false
|
94
|
-
@brokers = @settings[:kafka][:brokers]
|
95
|
-
@producer_type = @settings[:kafka][:producer_type].to_sym
|
96
|
-
@produce = @settings[:kafka].has_key?(:produce) ? @settings[:kafka][:produce] : true
|
97
|
-
|
98
|
-
def write_position_file
|
99
|
-
@mutex.synchronize do
|
100
|
-
File.open(@position_file, 'w') do |file|
|
101
|
-
@files.each do |path, attrs|
|
102
|
-
file.puts "#{path} #{attrs[:pattern]} #{attrs[:topic]} #{attrs[:inode]} #{attrs[:offset]}"
|
103
|
-
end
|
104
|
-
end
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
def load_position_file
|
109
|
-
if File.exist?(@position_file)
|
110
|
-
IO.readlines(@position_file).each do |line|
|
111
|
-
path, pattern, topic, inode, offset = line.split(' ')
|
112
|
-
#Load state only for that exist with same inode and were not truncated/rewinded.
|
113
|
-
if File.exists?(path) and File.stat(path).ino == inode.to_i and File.stat(path).size >= offset.to_i
|
114
|
-
@files[path] = { :pattern => pattern, :topic => topic, :inode => inode.to_i, :offset => offset.to_i }
|
115
|
-
end
|
116
|
-
end
|
117
|
-
end
|
118
|
-
write_position_file
|
119
|
-
end
|
120
|
-
|
121
|
-
load_position_file
|
122
|
-
|
123
|
-
@topics = @settings[:tailf][:files].map{|tailf_file| tailf_file[:topic]}
|
124
|
-
@producer = Poseidon::Producer.new(@brokers, "#{Socket.gethostname}", :type => @producer_type, :compression_codec => :snappy, :compressed_topics => @topics) if @produce
|
125
|
-
|
126
|
-
@producer_queue = SizedQueue.new(10)
|
127
|
-
|
128
|
-
@producer_thread = Thread.new do
|
129
|
-
loop do
|
130
|
-
batch = @producer_queue.pop
|
131
|
-
begin
|
132
|
-
@producer.send_messages(batch[:messages]) if @produce
|
133
|
-
rescue Poseidon::Errors::UnableToFetchMetadata
|
134
|
-
@logger.warn("Got Poseidon::Errors::UnableToFetchMetadata while trying to produce kafka messages, retrying in 1 second ...")
|
135
|
-
sleep 1
|
136
|
-
retry
|
137
|
-
end
|
138
|
-
@files[batch[:path]][:offset] = batch[:offset]
|
139
|
-
end
|
140
|
-
end
|
141
|
-
|
142
|
-
def kafka_produce(path, buffer, offset)
|
143
|
-
truncated = nil
|
144
|
-
|
145
|
-
messages = []
|
146
|
-
while msg = buffer.shift
|
147
|
-
unless msg[-1] == "\n"
|
148
|
-
if buffer.empty?
|
149
|
-
truncated = msg
|
150
|
-
else
|
151
|
-
msg = msg + buffer.shift
|
152
|
-
messages << Poseidon::MessageToSend.new(@files[path][:topic], msg.strip)
|
153
|
-
end
|
154
|
-
else
|
155
|
-
messages << Poseidon::MessageToSend.new(@files[path][:topic], msg.strip)
|
156
|
-
end
|
157
|
-
end
|
158
|
-
@producer_queue.push({ :path => path, :messages => messages, :offset => offset})
|
159
|
-
|
160
|
-
truncated
|
161
|
-
end
|
162
|
-
|
163
|
-
def tailf(path)
|
164
|
-
file = File.open(path, 'r')
|
165
|
-
@files[path][:fd] = file
|
166
|
-
file.seek(@files[path][:offset], IO::SEEK_SET)
|
167
|
-
|
168
|
-
truncated = nil
|
169
|
-
loop do #Fast read file in batches until we reach EOF upon which we start the tailf modify watcher
|
170
|
-
batch = file.each_line.take(@max_batch_lines)
|
171
|
-
break if batch.empty?
|
172
|
-
batch.unshift(truncated + batch.shift) if truncated
|
173
|
-
truncated = kafka_produce(path, batch, file.pos)
|
174
|
-
end
|
175
|
-
|
176
|
-
mutex = Mutex.new
|
177
|
-
@tailf_notifier.watch(path, :modify) do |event|
|
178
|
-
mutex.synchronize do
|
179
|
-
unless file.closed?
|
180
|
-
loop do
|
181
|
-
batch = file.each_line.take(@max_batch_lines)
|
182
|
-
break if batch.empty?
|
183
|
-
batch.unshift(truncated + batch.shift) if truncated
|
184
|
-
truncated = kafka_produce(path, batch, file.pos)
|
185
|
-
end
|
186
|
-
else
|
187
|
-
@logger.warn("watcher got modify event on closed file #{event.name}")
|
188
|
-
end
|
189
|
-
end
|
190
|
-
end
|
191
|
-
end
|
192
|
-
|
193
|
-
@time_regexp_hash = {
|
194
|
-
'Y' => '[0-9]{4}',
|
195
|
-
'm' => '[0-9]{2}',
|
196
|
-
'd' => '[0-9]{2}',
|
197
|
-
'H' => '[0-9]{2}',
|
198
|
-
'M' => '[0-9]{2}'
|
199
|
-
}
|
200
|
-
|
201
|
-
def time_pattern_to_regexp(pattern)
|
202
|
-
pattern.gsub(/%([^%])/) do
|
203
|
-
match = $1
|
204
|
-
@time_regexp_hash.has_key?(match) ? @time_regexp_hash[match] : match
|
205
|
-
end
|
206
|
-
end
|
207
|
-
|
208
|
-
#Scan existing files that match watched prefixes and start failing them
|
209
|
-
@settings[:tailf][:files].each do |tailf_file|
|
210
|
-
dir = File.dirname(tailf_file[:prefix])
|
211
|
-
if File.exists?(dir) and File.directory?(dir)
|
212
|
-
@dirs[dir] ||= []
|
213
|
-
@dirs[dir] << { :prefix => File.basename(tailf_file[:prefix]), :pattern => tailf_file[:time_pattern], :suffix => "#{tailf_file[:suffix]}", :topic => tailf_file[:topic]}
|
214
|
-
Dir.glob("#{tailf_file[:prefix]}*#{tailf_file[:suffix]}").each do |path|
|
215
|
-
if path.match(Regexp.new(time_pattern_to_regexp(tailf_file[:time_pattern])))
|
216
|
-
unless File.directory?(path)
|
217
|
-
#Populate state only if it was not loaded from position file
|
218
|
-
unless @files.has_key?(path)
|
219
|
-
@files[path] = { :pattern => tailf_file[:time_pattern], :topic => tailf_file[:topic], :inode => File.stat(path).ino, :offset => 0 }
|
220
|
-
@files[path][:offset] = File.stat(path).size unless @from_begining
|
221
|
-
end
|
222
|
-
@threads[path] = Thread.new { tailf(path) } unless @threads.has_key?(path)
|
223
|
-
end
|
224
|
-
end
|
225
|
-
end
|
226
|
-
end
|
227
|
-
end
|
228
|
-
|
229
|
-
def delete_old_tailed_files
|
230
|
-
@mutex.synchronize do
|
231
|
-
@files.each_key do |path|
|
232
|
-
unless path.match(Regexp.new(Time.now.strftime(@files[path][:pattern])))
|
233
|
-
if File.exists?(path) and File.stat(path).ino == @files[path][:inode] and File.stat(path).size == @files[path][:offset] and (Time.now - File.stat(path).mtime) > 30
|
234
|
-
@logger.info("Deleteing old time pattern fully kafka produced file #{path}")
|
235
|
-
FileUtils.rm_r(path)
|
236
|
-
if @settings[:tailf].has_key?(:post_delete_command)
|
237
|
-
@logger.info("Running post delete command => #{@settings[:tailf][:post_delete_command]}")
|
238
|
-
command = Mixlib::created
|
239
|
-
ShellOut.new(@settings[:tailf][:post_delete_command])
|
240
|
-
begin
|
241
|
-
command.run_command
|
242
|
-
if command.error?
|
243
|
-
@logger.error("Failed post delete command => #{@settings[:tailf][:post_delete_command]}")
|
244
|
-
@logger.info("STDOUT: #{command.stdout}")
|
245
|
-
@logger.info("STDERR: #{command.stderr}")
|
246
|
-
end
|
247
|
-
rescue => e
|
248
|
-
@logger.error("Failed post delete command => #{@settings[:tailf][:post_delete_command]}")
|
249
|
-
@logger.info(e.message)
|
250
|
-
end
|
251
|
-
end
|
252
|
-
end
|
253
|
-
end
|
254
|
-
end
|
255
|
-
end
|
256
|
-
end
|
257
|
-
|
258
|
-
@timers = Timers::Group.new
|
259
|
-
@uploads_timer = @timers.every(@flush_interval) { write_position_file }
|
260
|
-
@delete_old_tailed_files_timer = @timers.every(60) { delete_old_tailed_files } if @delete_old_tailed_files
|
261
|
-
Thread.new { loop { @timers.wait } }
|
262
|
-
|
263
|
-
@dirs.each_key do |dir|
|
264
|
-
|
265
|
-
@create_notifier.watch(dir, :create, :moved_to) do |event|
|
266
|
-
@mutex.synchronize do
|
267
|
-
path = "#{dir}/#{event.name}"
|
268
|
-
match = @dirs[dir].detect{|h| event.name.match(Regexp.new(h[:prefix] + time_pattern_to_regexp(h[:pattern]) + h[:suffix]))}
|
269
|
-
if match
|
270
|
-
unless File.directory?(path)
|
271
|
-
unless @threads.has_key?(path)
|
272
|
-
@logger.info("File #{event.name} was created in / moved into watched dir #{dir}")
|
273
|
-
@files[path] = { :pattern => match[:pattern], :topic => match[:topic], :inode => File.stat(path).ino, :offset => 0 }
|
274
|
-
@threads[path] = Thread.new { tailf(path) }
|
275
|
-
end
|
276
|
-
end
|
277
|
-
end
|
278
|
-
end
|
279
|
-
end
|
280
|
-
|
281
|
-
@delete_notifier.watch(dir, :delete, :moved_from) do |event|
|
282
|
-
@mutex.synchronize do
|
283
|
-
path = "#{dir}/#{event.name}"
|
284
|
-
if @threads.has_key?(path)
|
285
|
-
@logger.info("File #{event.name} was deleted / moved from watched dir #{dir}")
|
286
|
-
if @threads[path].alive?
|
287
|
-
@threads[path].terminate
|
288
|
-
@threads[path].join
|
289
|
-
end
|
290
|
-
@threads.delete(path)
|
291
|
-
@files[path][:fd].close unless @files[path][:fd].closed?
|
292
|
-
@files.delete(path)
|
293
|
-
end
|
294
|
-
end
|
295
|
-
end
|
296
|
-
|
297
|
-
end
|
298
|
-
|
299
|
-
Thread.new { @create_notifier.run }
|
300
|
-
Thread.new { @delete_notifier.run }
|
301
|
-
|
302
|
-
@tailf_notifier.run
|