tailf2kafka 0.1.7 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0a52bb6e6124032b75f8bd0ad29f6e3ea31e4d43
4
- data.tar.gz: d0cf9ebc419b52d2a67c4de51e6ace18637c4212
3
+ metadata.gz: 36ed03a088f2e37c9e35773e233159bc1c32cf2d
4
+ data.tar.gz: 203b2fd8a437e4d8baa555f46a246411eed0cb9e
5
5
  SHA512:
6
- metadata.gz: 4fc9d64f58bdb1bd22da9dc45c1f7a1c7c62cd846fc596542613b92e7baa1e1f1d95c718e4c79cce66cfd91d39fa27f8ee7ee8c3b28fe4ab5a0497951a6b1b9b
7
- data.tar.gz: d4cfb41c0a41f0a905f4d68167a42aea8293d5f7a3757cf60a307d3e81a73ec85a49a8b6628a6ac9aecc76a410c3f7ae4fa9c0b2b5ca090be6938e19d2548315
6
+ metadata.gz: 5554c921a0ce29a3bf4f3548e8b180a46545ace278163b4cebaff11255e709565730f5de25263fc4fa44636a345eb6eec2a187e7dd012dc16bdd109bbb02c484
7
+ data.tar.gz: 11bc611eac30966753a8327acc0a7f97304d6b1c8757893ef7316b6cf8f1acfc83fdc7a9981b46ccb720a63bb3e5940657ddc8d4ed3795b838d590c8f3c1d485
data/bin/tailf2kafka CHANGED
@@ -205,7 +205,7 @@ def time_pattern_to_regexp(pattern)
205
205
  end
206
206
  end
207
207
 
208
- #Scan existing files that match watched prefixes and start failing them
208
+ #Scan existing files that match watched prefixes and start tailing them
209
209
  @settings[:tailf][:files].each do |tailf_file|
210
210
  tailf_file[:prefix] = File.expand_path(tailf_file[:prefix])
211
211
  dir = File.dirname(tailf_file[:prefix])
@@ -257,46 +257,52 @@ end
257
257
 
258
258
  @timers = Timers::Group.new
259
259
  @uploads_timer = @timers.every(@flush_interval) { write_position_file }
260
- @delete_old_tailed_files_timer = @timers.every(60) { delete_old_tailed_files } if @delete_old_tailed_files
260
+ @timers.every(60) { delete_old_tailed_files } if @delete_old_tailed_files
261
261
  Thread.new { loop { @timers.wait } }
262
262
 
263
- @dirs.each_key do |dir|
264
-
265
- @create_notifier.watch(dir, :create, :moved_to) do |event|
266
- @mutex.synchronize do
267
- path = "#{dir}/#{event.name}"
268
- match = @dirs[dir].detect{|h| event.name.match(Regexp.new(h[:prefix] + time_pattern_to_regexp(h[:pattern]) + h[:suffix]))}
269
- if match
270
- unless File.directory?(path)
271
- unless @threads.has_key?(path)
272
- @logger.info("File #{event.name} was created in / moved into watched dir #{dir}")
273
- @files[path] = { :pattern => match[:pattern], :topic => match[:topic], :inode => File.stat(path).ino, :offset => 0 }
274
- @threads[path] = Thread.new { tailf(path) }
263
+ def setup_watchers
264
+ @dirs.each_key do |dir|
265
+
266
+ @create_notifier.watch(dir, :create, :moved_to) do |event|
267
+ @mutex.synchronize do
268
+ path = "#{dir}/#{event.name}"
269
+ match = @dirs[dir].detect{|h| event.name.match(Regexp.new(h[:prefix] + time_pattern_to_regexp(h[:pattern]) + h[:suffix]))}
270
+ if match
271
+ unless File.directory?(path)
272
+ unless @threads.has_key?(path)
273
+ @logger.info("File #{event.name} was created in / moved into watched dir #{dir}")
274
+ @files[path] = { :pattern => match[:pattern], :topic => match[:topic], :inode => File.stat(path).ino, :offset => 0 }
275
+ @threads[path] = Thread.new { tailf(path) }
276
+ end
275
277
  end
276
278
  end
277
279
  end
278
280
  end
279
- end
280
281
 
281
- @delete_notifier.watch(dir, :delete, :moved_from) do |event|
282
- @mutex.synchronize do
283
- path = "#{dir}/#{event.name}"
284
- if @threads.has_key?(path)
285
- @logger.info("File #{event.name} was deleted / moved from watched dir #{dir}")
286
- if @threads[path].alive?
287
- @threads[path].terminate
288
- @threads[path].join
282
+ @delete_notifier.watch(dir, :delete, :moved_from) do |event|
283
+ @mutex.synchronize do
284
+ path = "#{dir}/#{event.name}"
285
+ if @threads.has_key?(path)
286
+ @logger.info("File #{event.name} was deleted / moved from watched dir #{dir}")
287
+ if @threads[path].alive?
288
+ @threads[path].terminate
289
+ @threads[path].join
290
+ end
291
+ @threads.delete(path)
292
+ @files[path][:fd].close unless @files[path][:fd].closed?
293
+ @files.delete(path)
289
294
  end
290
- @threads.delete(path)
291
- @files[path][:fd].close unless @files[path][:fd].closed?
292
- @files.delete(path)
293
295
  end
294
296
  end
295
- end
296
297
 
298
+ end
297
299
  end
298
300
 
301
+ setup_watchers
302
+
299
303
  Thread.new { @create_notifier.run }
300
304
  Thread.new { @delete_notifier.run }
301
305
 
306
+ @timers.every(60) { setup_watchers }
307
+
302
308
  @tailf_notifier.run
@@ -0,0 +1,302 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'poseidon'
5
+ require 'yaml'
6
+ require 'hash_symbolizer'
7
+ require 'schash'
8
+ require 'rb-inotify'
9
+ require 'timers'
10
+ require 'socket'
11
+ require 'fileutils'
12
+ require 'logger'
13
+ require 'mixlib/shellout'
14
+
15
+ $stdout.sync = true
16
+
17
+ Thread.abort_on_exception = true
18
+
19
+ @config = nil
20
+
21
+ loglevels = {
22
+ :debug => Logger::DEBUG,
23
+ :info => Logger::INFO,
24
+ :warn => Logger::WARN,
25
+ :error => Logger::Error,
26
+ :fatal => Logger::FATAL,
27
+ :unknown => Logger::UNKNOWN
28
+ }
29
+
30
+ @loglevel = Logger::INFO
31
+
32
+ opts = OptionParser.new
33
+ opts.banner = "Usage: #{$0} [options]"
34
+ opts.on( '--config PATH', String, 'Path to settings config' ) { |c| @config = c }
35
+ opts.on( '--log-level [LEVEL]', [:debug, :info, :warn, :error, :fatal, :unknown] ) { |l| @loglevel = loglevels[l] }
36
+ opts.on( '-h', '--help', 'Display this screen' ) { puts opts; exit 0 }
37
+ opts.parse!
38
+
39
+ unless @config
40
+ puts opts
41
+ exit 1
42
+ end
43
+
44
+ @logger = Logger.new(STDOUT)
45
+
46
+ @settings = YAML.load_file(@config).symbolize_keys(true)
47
+
48
+ validator = Schash::Validator.new do
49
+ {
50
+ tailf: {
51
+ files: array_of({
52
+ topic: string,
53
+ prefix: string,
54
+ suffix: optional(string),
55
+ time_pattern: string,
56
+ }),
57
+ position_file: string,
58
+ flush_interval: integer,
59
+ max_batch_lines: integer,
60
+ from_begining: boolean,
61
+ delete_old_tailed_files: optional(boolean),
62
+ post_delete_command: optional(string),
63
+ },
64
+ kafka: {
65
+ brokers: array_of(string),
66
+ producer_type: match(/^(sync|async)$/),
67
+ produce: optional(boolean),
68
+ },
69
+ }
70
+ end
71
+
72
+ unless validator.validate(@settings).empty?
73
+ @logger.error("ERROR: bad settings")
74
+ @logger.error(validator.validate(@settings))
75
+ exit 1
76
+ end
77
+
78
+ @settings[:tailf][:files] = @settings[:tailf][:files].map{|h| h.symbolize_keys(true)}
79
+
80
+ @mutex = Mutex.new
81
+
82
+ @create_notifier = INotify::Notifier.new
83
+ @delete_notifier = INotify::Notifier.new
84
+ @tailf_notifier = INotify::Notifier.new
85
+
86
+ @dirs = {}
87
+ @files = {}
88
+ @threads = {}
89
+ @position_file = @settings[:tailf][:position_file]
90
+ @flush_interval = @settings[:tailf][:flush_interval]
91
+ @max_batch_lines = @settings[:tailf][:max_batch_lines]
92
+ @from_begining = @settings[:tailf][:from_begining]
93
+ @delete_old_tailed_files = @settings[:tailf].has_key?(:delete_old_tailed_files) ? @settings[:tailf][:delete_old_tailed_files] : false
94
+ @brokers = @settings[:kafka][:brokers]
95
+ @producer_type = @settings[:kafka][:producer_type].to_sym
96
+ @produce = @settings[:kafka].has_key?(:produce) ? @settings[:kafka][:produce] : true
97
+
98
+ def write_position_file
99
+ @mutex.synchronize do
100
+ File.open(@position_file, 'w') do |file|
101
+ @files.each do |path, attrs|
102
+ file.puts "#{path} #{attrs[:pattern]} #{attrs[:topic]} #{attrs[:inode]} #{attrs[:offset]}"
103
+ end
104
+ end
105
+ end
106
+ end
107
+
108
+ def load_position_file
109
+ if File.exist?(@position_file)
110
+ IO.readlines(@position_file).each do |line|
111
+ path, pattern, topic, inode, offset = line.split(' ')
112
+ #Load state only for that exist with same inode and were not truncated/rewinded.
113
+ if File.exists?(path) and File.stat(path).ino == inode.to_i and File.stat(path).size >= offset.to_i
114
+ @files[path] = { :pattern => pattern, :topic => topic, :inode => inode.to_i, :offset => offset.to_i }
115
+ end
116
+ end
117
+ end
118
+ write_position_file
119
+ end
120
+
121
+ load_position_file
122
+
123
+ @topics = @settings[:tailf][:files].map{|tailf_file| tailf_file[:topic]}
124
+ @producer = Poseidon::Producer.new(@brokers, "#{Socket.gethostname}", :type => @producer_type, :compression_codec => :snappy, :compressed_topics => @topics) if @produce
125
+
126
+ @producer_queue = SizedQueue.new(10)
127
+
128
+ @producer_thread = Thread.new do
129
+ loop do
130
+ batch = @producer_queue.pop
131
+ begin
132
+ @producer.send_messages(batch[:messages]) if @produce
133
+ rescue Poseidon::Errors::UnableToFetchMetadata
134
+ @logger.warn("Got Poseidon::Errors::UnableToFetchMetadata while trying to produce kafka messages, retrying in 1 second ...")
135
+ sleep 1
136
+ retry
137
+ end
138
+ @files[batch[:path]][:offset] = batch[:offset]
139
+ end
140
+ end
141
+
142
+ def kafka_produce(path, buffer, offset)
143
+ truncated = nil
144
+
145
+ messages = []
146
+ while msg = buffer.shift
147
+ unless msg[-1] == "\n"
148
+ if buffer.empty?
149
+ truncated = msg
150
+ else
151
+ msg = msg + buffer.shift
152
+ messages << Poseidon::MessageToSend.new(@files[path][:topic], msg.strip)
153
+ end
154
+ else
155
+ messages << Poseidon::MessageToSend.new(@files[path][:topic], msg.strip)
156
+ end
157
+ end
158
+ @producer_queue.push({ :path => path, :messages => messages, :offset => offset})
159
+
160
+ truncated
161
+ end
162
+
163
+ def tailf(path)
164
+ file = File.open(path, 'r')
165
+ @files[path][:fd] = file
166
+ file.seek(@files[path][:offset], IO::SEEK_SET)
167
+
168
+ truncated = nil
169
+ loop do #Fast read file in batches until we reach EOF upon which we start the tailf modify watcher
170
+ batch = file.each_line.take(@max_batch_lines)
171
+ break if batch.empty?
172
+ batch.unshift(truncated + batch.shift) if truncated
173
+ truncated = kafka_produce(path, batch, file.pos)
174
+ end
175
+
176
+ mutex = Mutex.new
177
+ @tailf_notifier.watch(path, :modify) do |event|
178
+ mutex.synchronize do
179
+ unless file.closed?
180
+ loop do
181
+ batch = file.each_line.take(@max_batch_lines)
182
+ break if batch.empty?
183
+ batch.unshift(truncated + batch.shift) if truncated
184
+ truncated = kafka_produce(path, batch, file.pos)
185
+ end
186
+ else
187
+ @logger.warn("watcher got modify event on closed file #{event.name}")
188
+ end
189
+ end
190
+ end
191
+ end
192
+
193
+ @time_regexp_hash = {
194
+ 'Y' => '[0-9]{4}',
195
+ 'm' => '[0-9]{2}',
196
+ 'd' => '[0-9]{2}',
197
+ 'H' => '[0-9]{2}',
198
+ 'M' => '[0-9]{2}'
199
+ }
200
+
201
+ def time_pattern_to_regexp(pattern)
202
+ pattern.gsub(/%([^%])/) do
203
+ match = $1
204
+ @time_regexp_hash.has_key?(match) ? @time_regexp_hash[match] : match
205
+ end
206
+ end
207
+
208
+ #Scan existing files that match watched prefixes and start failing them
209
+ @settings[:tailf][:files].each do |tailf_file|
210
+ dir = File.dirname(tailf_file[:prefix])
211
+ if File.exists?(dir) and File.directory?(dir)
212
+ @dirs[dir] ||= []
213
+ @dirs[dir] << { :prefix => File.basename(tailf_file[:prefix]), :pattern => tailf_file[:time_pattern], :suffix => "#{tailf_file[:suffix]}", :topic => tailf_file[:topic]}
214
+ Dir.glob("#{tailf_file[:prefix]}*#{tailf_file[:suffix]}").each do |path|
215
+ if path.match(Regexp.new(time_pattern_to_regexp(tailf_file[:time_pattern])))
216
+ unless File.directory?(path)
217
+ #Populate state only if it was not loaded from position file
218
+ unless @files.has_key?(path)
219
+ @files[path] = { :pattern => tailf_file[:time_pattern], :topic => tailf_file[:topic], :inode => File.stat(path).ino, :offset => 0 }
220
+ @files[path][:offset] = File.stat(path).size unless @from_begining
221
+ end
222
+ @threads[path] = Thread.new { tailf(path) } unless @threads.has_key?(path)
223
+ end
224
+ end
225
+ end
226
+ end
227
+ end
228
+
229
+ def delete_old_tailed_files
230
+ @mutex.synchronize do
231
+ @files.each_key do |path|
232
+ unless path.match(Regexp.new(Time.now.strftime(@files[path][:pattern])))
233
+ if File.exists?(path) and File.stat(path).ino == @files[path][:inode] and File.stat(path).size == @files[path][:offset] and (Time.now - File.stat(path).mtime) > 30
234
+ @logger.info("Deleteing old time pattern fully kafka produced file #{path}")
235
+ FileUtils.rm_r(path)
236
+ if @settings[:tailf].has_key?(:post_delete_command)
237
+ @logger.info("Running post delete command => #{@settings[:tailf][:post_delete_command]}")
238
+ command = Mixlib::created
239
+ ShellOut.new(@settings[:tailf][:post_delete_command])
240
+ begin
241
+ command.run_command
242
+ if command.error?
243
+ @logger.error("Failed post delete command => #{@settings[:tailf][:post_delete_command]}")
244
+ @logger.info("STDOUT: #{command.stdout}")
245
+ @logger.info("STDERR: #{command.stderr}")
246
+ end
247
+ rescue => e
248
+ @logger.error("Failed post delete command => #{@settings[:tailf][:post_delete_command]}")
249
+ @logger.info(e.message)
250
+ end
251
+ end
252
+ end
253
+ end
254
+ end
255
+ end
256
+ end
257
+
258
+ @timers = Timers::Group.new
259
+ @uploads_timer = @timers.every(@flush_interval) { write_position_file }
260
+ @delete_old_tailed_files_timer = @timers.every(60) { delete_old_tailed_files } if @delete_old_tailed_files
261
+ Thread.new { loop { @timers.wait } }
262
+
263
+ @dirs.each_key do |dir|
264
+
265
+ @create_notifier.watch(dir, :create, :moved_to) do |event|
266
+ @mutex.synchronize do
267
+ path = "#{dir}/#{event.name}"
268
+ match = @dirs[dir].detect{|h| event.name.match(Regexp.new(h[:prefix] + time_pattern_to_regexp(h[:pattern]) + h[:suffix]))}
269
+ if match
270
+ unless File.directory?(path)
271
+ unless @threads.has_key?(path)
272
+ @logger.info("File #{event.name} was created in / moved into watched dir #{dir}")
273
+ @files[path] = { :pattern => match[:pattern], :topic => match[:topic], :inode => File.stat(path).ino, :offset => 0 }
274
+ @threads[path] = Thread.new { tailf(path) }
275
+ end
276
+ end
277
+ end
278
+ end
279
+ end
280
+
281
+ @delete_notifier.watch(dir, :delete, :moved_from) do |event|
282
+ @mutex.synchronize do
283
+ path = "#{dir}/#{event.name}"
284
+ if @threads.has_key?(path)
285
+ @logger.info("File #{event.name} was deleted / moved from watched dir #{dir}")
286
+ if @threads[path].alive?
287
+ @threads[path].terminate
288
+ @threads[path].join
289
+ end
290
+ @threads.delete(path)
291
+ @files[path][:fd].close unless @files[path][:fd].closed?
292
+ @files.delete(path)
293
+ end
294
+ end
295
+ end
296
+
297
+ end
298
+
299
+ Thread.new { @create_notifier.run }
300
+ Thread.new { @delete_notifier.run }
301
+
302
+ @tailf_notifier.run
@@ -1,3 +1,3 @@
1
1
  module Tailf2Kafka
2
- VERSION ||= '0.1.7'
2
+ VERSION ||= '0.1.8'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tailf2kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Piavlo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-29 00:00:00.000000000 Z
11
+ date: 2016-07-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: poseidon
@@ -128,12 +128,14 @@ email:
128
128
  - devops@supersonic.com
129
129
  executables:
130
130
  - tailf2kafka
131
+ - tailf2kafka.save
131
132
  extensions: []
132
133
  extra_rdoc_files: []
133
134
  files:
134
135
  - LICENSE
135
136
  - README.md
136
137
  - bin/tailf2kafka
138
+ - bin/tailf2kafka.save
137
139
  - lib/tailf2kafka.rb
138
140
  - lib/tailf2kafka/version.rb
139
141
  - tailf2kafka.gemspec