tailf2kafka 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0a52bb6e6124032b75f8bd0ad29f6e3ea31e4d43
4
- data.tar.gz: d0cf9ebc419b52d2a67c4de51e6ace18637c4212
3
+ metadata.gz: 36ed03a088f2e37c9e35773e233159bc1c32cf2d
4
+ data.tar.gz: 203b2fd8a437e4d8baa555f46a246411eed0cb9e
5
5
  SHA512:
6
- metadata.gz: 4fc9d64f58bdb1bd22da9dc45c1f7a1c7c62cd846fc596542613b92e7baa1e1f1d95c718e4c79cce66cfd91d39fa27f8ee7ee8c3b28fe4ab5a0497951a6b1b9b
7
- data.tar.gz: d4cfb41c0a41f0a905f4d68167a42aea8293d5f7a3757cf60a307d3e81a73ec85a49a8b6628a6ac9aecc76a410c3f7ae4fa9c0b2b5ca090be6938e19d2548315
6
+ metadata.gz: 5554c921a0ce29a3bf4f3548e8b180a46545ace278163b4cebaff11255e709565730f5de25263fc4fa44636a345eb6eec2a187e7dd012dc16bdd109bbb02c484
7
+ data.tar.gz: 11bc611eac30966753a8327acc0a7f97304d6b1c8757893ef7316b6cf8f1acfc83fdc7a9981b46ccb720a63bb3e5940657ddc8d4ed3795b838d590c8f3c1d485
data/bin/tailf2kafka CHANGED
@@ -205,7 +205,7 @@ def time_pattern_to_regexp(pattern)
205
205
  end
206
206
  end
207
207
 
208
- #Scan existing files that match watched prefixes and start failing them
208
+ #Scan existing files that match watched prefixes and start tailing them
209
209
  @settings[:tailf][:files].each do |tailf_file|
210
210
  tailf_file[:prefix] = File.expand_path(tailf_file[:prefix])
211
211
  dir = File.dirname(tailf_file[:prefix])
@@ -257,46 +257,52 @@ end
257
257
 
258
258
  @timers = Timers::Group.new
259
259
  @uploads_timer = @timers.every(@flush_interval) { write_position_file }
260
- @delete_old_tailed_files_timer = @timers.every(60) { delete_old_tailed_files } if @delete_old_tailed_files
260
+ @timers.every(60) { delete_old_tailed_files } if @delete_old_tailed_files
261
261
  Thread.new { loop { @timers.wait } }
262
262
 
263
- @dirs.each_key do |dir|
264
-
265
- @create_notifier.watch(dir, :create, :moved_to) do |event|
266
- @mutex.synchronize do
267
- path = "#{dir}/#{event.name}"
268
- match = @dirs[dir].detect{|h| event.name.match(Regexp.new(h[:prefix] + time_pattern_to_regexp(h[:pattern]) + h[:suffix]))}
269
- if match
270
- unless File.directory?(path)
271
- unless @threads.has_key?(path)
272
- @logger.info("File #{event.name} was created in / moved into watched dir #{dir}")
273
- @files[path] = { :pattern => match[:pattern], :topic => match[:topic], :inode => File.stat(path).ino, :offset => 0 }
274
- @threads[path] = Thread.new { tailf(path) }
263
+ def setup_watchers
264
+ @dirs.each_key do |dir|
265
+
266
+ @create_notifier.watch(dir, :create, :moved_to) do |event|
267
+ @mutex.synchronize do
268
+ path = "#{dir}/#{event.name}"
269
+ match = @dirs[dir].detect{|h| event.name.match(Regexp.new(h[:prefix] + time_pattern_to_regexp(h[:pattern]) + h[:suffix]))}
270
+ if match
271
+ unless File.directory?(path)
272
+ unless @threads.has_key?(path)
273
+ @logger.info("File #{event.name} was created in / moved into watched dir #{dir}")
274
+ @files[path] = { :pattern => match[:pattern], :topic => match[:topic], :inode => File.stat(path).ino, :offset => 0 }
275
+ @threads[path] = Thread.new { tailf(path) }
276
+ end
275
277
  end
276
278
  end
277
279
  end
278
280
  end
279
- end
280
281
 
281
- @delete_notifier.watch(dir, :delete, :moved_from) do |event|
282
- @mutex.synchronize do
283
- path = "#{dir}/#{event.name}"
284
- if @threads.has_key?(path)
285
- @logger.info("File #{event.name} was deleted / moved from watched dir #{dir}")
286
- if @threads[path].alive?
287
- @threads[path].terminate
288
- @threads[path].join
282
+ @delete_notifier.watch(dir, :delete, :moved_from) do |event|
283
+ @mutex.synchronize do
284
+ path = "#{dir}/#{event.name}"
285
+ if @threads.has_key?(path)
286
+ @logger.info("File #{event.name} was deleted / moved from watched dir #{dir}")
287
+ if @threads[path].alive?
288
+ @threads[path].terminate
289
+ @threads[path].join
290
+ end
291
+ @threads.delete(path)
292
+ @files[path][:fd].close unless @files[path][:fd].closed?
293
+ @files.delete(path)
289
294
  end
290
- @threads.delete(path)
291
- @files[path][:fd].close unless @files[path][:fd].closed?
292
- @files.delete(path)
293
295
  end
294
296
  end
295
- end
296
297
 
298
+ end
297
299
  end
298
300
 
301
+ setup_watchers
302
+
299
303
  Thread.new { @create_notifier.run }
300
304
  Thread.new { @delete_notifier.run }
301
305
 
306
+ @timers.every(60) { setup_watchers }
307
+
302
308
  @tailf_notifier.run
@@ -0,0 +1,302 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'poseidon'
5
+ require 'yaml'
6
+ require 'hash_symbolizer'
7
+ require 'schash'
8
+ require 'rb-inotify'
9
+ require 'timers'
10
+ require 'socket'
11
+ require 'fileutils'
12
+ require 'logger'
13
+ require 'mixlib/shellout'
14
+
15
+ $stdout.sync = true
16
+
17
+ Thread.abort_on_exception = true
18
+
19
+ @config = nil
20
+
21
+ loglevels = {
22
+ :debug => Logger::DEBUG,
23
+ :info => Logger::INFO,
24
+ :warn => Logger::WARN,
25
+ :error => Logger::Error,
26
+ :fatal => Logger::FATAL,
27
+ :unknown => Logger::UNKNOWN
28
+ }
29
+
30
+ @loglevel = Logger::INFO
31
+
32
+ opts = OptionParser.new
33
+ opts.banner = "Usage: #{$0} [options]"
34
+ opts.on( '--config PATH', String, 'Path to settings config' ) { |c| @config = c }
35
+ opts.on( '--log-level [LEVEL]', [:debug, :info, :warn, :error, :fatal, :unknown] ) { |l| @loglevel = loglevels[l] }
36
+ opts.on( '-h', '--help', 'Display this screen' ) { puts opts; exit 0 }
37
+ opts.parse!
38
+
39
+ unless @config
40
+ puts opts
41
+ exit 1
42
+ end
43
+
44
+ @logger = Logger.new(STDOUT)
45
+
46
+ @settings = YAML.load_file(@config).symbolize_keys(true)
47
+
48
+ validator = Schash::Validator.new do
49
+ {
50
+ tailf: {
51
+ files: array_of({
52
+ topic: string,
53
+ prefix: string,
54
+ suffix: optional(string),
55
+ time_pattern: string,
56
+ }),
57
+ position_file: string,
58
+ flush_interval: integer,
59
+ max_batch_lines: integer,
60
+ from_begining: boolean,
61
+ delete_old_tailed_files: optional(boolean),
62
+ post_delete_command: optional(string),
63
+ },
64
+ kafka: {
65
+ brokers: array_of(string),
66
+ producer_type: match(/^(sync|async)$/),
67
+ produce: optional(boolean),
68
+ },
69
+ }
70
+ end
71
+
72
+ unless validator.validate(@settings).empty?
73
+ @logger.error("ERROR: bad settings")
74
+ @logger.error(validator.validate(@settings))
75
+ exit 1
76
+ end
77
+
78
+ @settings[:tailf][:files] = @settings[:tailf][:files].map{|h| h.symbolize_keys(true)}
79
+
80
+ @mutex = Mutex.new
81
+
82
+ @create_notifier = INotify::Notifier.new
83
+ @delete_notifier = INotify::Notifier.new
84
+ @tailf_notifier = INotify::Notifier.new
85
+
86
+ @dirs = {}
87
+ @files = {}
88
+ @threads = {}
89
+ @position_file = @settings[:tailf][:position_file]
90
+ @flush_interval = @settings[:tailf][:flush_interval]
91
+ @max_batch_lines = @settings[:tailf][:max_batch_lines]
92
+ @from_begining = @settings[:tailf][:from_begining]
93
+ @delete_old_tailed_files = @settings[:tailf].has_key?(:delete_old_tailed_files) ? @settings[:tailf][:delete_old_tailed_files] : false
94
+ @brokers = @settings[:kafka][:brokers]
95
+ @producer_type = @settings[:kafka][:producer_type].to_sym
96
+ @produce = @settings[:kafka].has_key?(:produce) ? @settings[:kafka][:produce] : true
97
+
98
+ def write_position_file
99
+ @mutex.synchronize do
100
+ File.open(@position_file, 'w') do |file|
101
+ @files.each do |path, attrs|
102
+ file.puts "#{path} #{attrs[:pattern]} #{attrs[:topic]} #{attrs[:inode]} #{attrs[:offset]}"
103
+ end
104
+ end
105
+ end
106
+ end
107
+
108
+ def load_position_file
109
+ if File.exist?(@position_file)
110
+ IO.readlines(@position_file).each do |line|
111
+ path, pattern, topic, inode, offset = line.split(' ')
112
+ #Load state only for that exist with same inode and were not truncated/rewinded.
113
+ if File.exists?(path) and File.stat(path).ino == inode.to_i and File.stat(path).size >= offset.to_i
114
+ @files[path] = { :pattern => pattern, :topic => topic, :inode => inode.to_i, :offset => offset.to_i }
115
+ end
116
+ end
117
+ end
118
+ write_position_file
119
+ end
120
+
121
+ load_position_file
122
+
123
+ @topics = @settings[:tailf][:files].map{|tailf_file| tailf_file[:topic]}
124
+ @producer = Poseidon::Producer.new(@brokers, "#{Socket.gethostname}", :type => @producer_type, :compression_codec => :snappy, :compressed_topics => @topics) if @produce
125
+
126
+ @producer_queue = SizedQueue.new(10)
127
+
128
+ @producer_thread = Thread.new do
129
+ loop do
130
+ batch = @producer_queue.pop
131
+ begin
132
+ @producer.send_messages(batch[:messages]) if @produce
133
+ rescue Poseidon::Errors::UnableToFetchMetadata
134
+ @logger.warn("Got Poseidon::Errors::UnableToFetchMetadata while trying to produce kafka messages, retrying in 1 second ...")
135
+ sleep 1
136
+ retry
137
+ end
138
+ @files[batch[:path]][:offset] = batch[:offset]
139
+ end
140
+ end
141
+
142
+ def kafka_produce(path, buffer, offset)
143
+ truncated = nil
144
+
145
+ messages = []
146
+ while msg = buffer.shift
147
+ unless msg[-1] == "\n"
148
+ if buffer.empty?
149
+ truncated = msg
150
+ else
151
+ msg = msg + buffer.shift
152
+ messages << Poseidon::MessageToSend.new(@files[path][:topic], msg.strip)
153
+ end
154
+ else
155
+ messages << Poseidon::MessageToSend.new(@files[path][:topic], msg.strip)
156
+ end
157
+ end
158
+ @producer_queue.push({ :path => path, :messages => messages, :offset => offset})
159
+
160
+ truncated
161
+ end
162
+
163
+ def tailf(path)
164
+ file = File.open(path, 'r')
165
+ @files[path][:fd] = file
166
+ file.seek(@files[path][:offset], IO::SEEK_SET)
167
+
168
+ truncated = nil
169
+ loop do #Fast read file in batches until we reach EOF upon which we start the tailf modify watcher
170
+ batch = file.each_line.take(@max_batch_lines)
171
+ break if batch.empty?
172
+ batch.unshift(truncated + batch.shift) if truncated
173
+ truncated = kafka_produce(path, batch, file.pos)
174
+ end
175
+
176
+ mutex = Mutex.new
177
+ @tailf_notifier.watch(path, :modify) do |event|
178
+ mutex.synchronize do
179
+ unless file.closed?
180
+ loop do
181
+ batch = file.each_line.take(@max_batch_lines)
182
+ break if batch.empty?
183
+ batch.unshift(truncated + batch.shift) if truncated
184
+ truncated = kafka_produce(path, batch, file.pos)
185
+ end
186
+ else
187
+ @logger.warn("watcher got modify event on closed file #{event.name}")
188
+ end
189
+ end
190
+ end
191
+ end
192
+
193
+ @time_regexp_hash = {
194
+ 'Y' => '[0-9]{4}',
195
+ 'm' => '[0-9]{2}',
196
+ 'd' => '[0-9]{2}',
197
+ 'H' => '[0-9]{2}',
198
+ 'M' => '[0-9]{2}'
199
+ }
200
+
201
+ def time_pattern_to_regexp(pattern)
202
+ pattern.gsub(/%([^%])/) do
203
+ match = $1
204
+ @time_regexp_hash.has_key?(match) ? @time_regexp_hash[match] : match
205
+ end
206
+ end
207
+
208
+ #Scan existing files that match watched prefixes and start failing them
209
+ @settings[:tailf][:files].each do |tailf_file|
210
+ dir = File.dirname(tailf_file[:prefix])
211
+ if File.exists?(dir) and File.directory?(dir)
212
+ @dirs[dir] ||= []
213
+ @dirs[dir] << { :prefix => File.basename(tailf_file[:prefix]), :pattern => tailf_file[:time_pattern], :suffix => "#{tailf_file[:suffix]}", :topic => tailf_file[:topic]}
214
+ Dir.glob("#{tailf_file[:prefix]}*#{tailf_file[:suffix]}").each do |path|
215
+ if path.match(Regexp.new(time_pattern_to_regexp(tailf_file[:time_pattern])))
216
+ unless File.directory?(path)
217
+ #Populate state only if it was not loaded from position file
218
+ unless @files.has_key?(path)
219
+ @files[path] = { :pattern => tailf_file[:time_pattern], :topic => tailf_file[:topic], :inode => File.stat(path).ino, :offset => 0 }
220
+ @files[path][:offset] = File.stat(path).size unless @from_begining
221
+ end
222
+ @threads[path] = Thread.new { tailf(path) } unless @threads.has_key?(path)
223
+ end
224
+ end
225
+ end
226
+ end
227
+ end
228
+
229
+ def delete_old_tailed_files
230
+ @mutex.synchronize do
231
+ @files.each_key do |path|
232
+ unless path.match(Regexp.new(Time.now.strftime(@files[path][:pattern])))
233
+ if File.exists?(path) and File.stat(path).ino == @files[path][:inode] and File.stat(path).size == @files[path][:offset] and (Time.now - File.stat(path).mtime) > 30
234
+ @logger.info("Deleteing old time pattern fully kafka produced file #{path}")
235
+ FileUtils.rm_r(path)
236
+ if @settings[:tailf].has_key?(:post_delete_command)
237
+ @logger.info("Running post delete command => #{@settings[:tailf][:post_delete_command]}")
238
+ command = Mixlib::created
239
+ ShellOut.new(@settings[:tailf][:post_delete_command])
240
+ begin
241
+ command.run_command
242
+ if command.error?
243
+ @logger.error("Failed post delete command => #{@settings[:tailf][:post_delete_command]}")
244
+ @logger.info("STDOUT: #{command.stdout}")
245
+ @logger.info("STDERR: #{command.stderr}")
246
+ end
247
+ rescue => e
248
+ @logger.error("Failed post delete command => #{@settings[:tailf][:post_delete_command]}")
249
+ @logger.info(e.message)
250
+ end
251
+ end
252
+ end
253
+ end
254
+ end
255
+ end
256
+ end
257
+
258
+ @timers = Timers::Group.new
259
+ @uploads_timer = @timers.every(@flush_interval) { write_position_file }
260
+ @delete_old_tailed_files_timer = @timers.every(60) { delete_old_tailed_files } if @delete_old_tailed_files
261
+ Thread.new { loop { @timers.wait } }
262
+
263
+ @dirs.each_key do |dir|
264
+
265
+ @create_notifier.watch(dir, :create, :moved_to) do |event|
266
+ @mutex.synchronize do
267
+ path = "#{dir}/#{event.name}"
268
+ match = @dirs[dir].detect{|h| event.name.match(Regexp.new(h[:prefix] + time_pattern_to_regexp(h[:pattern]) + h[:suffix]))}
269
+ if match
270
+ unless File.directory?(path)
271
+ unless @threads.has_key?(path)
272
+ @logger.info("File #{event.name} was created in / moved into watched dir #{dir}")
273
+ @files[path] = { :pattern => match[:pattern], :topic => match[:topic], :inode => File.stat(path).ino, :offset => 0 }
274
+ @threads[path] = Thread.new { tailf(path) }
275
+ end
276
+ end
277
+ end
278
+ end
279
+ end
280
+
281
+ @delete_notifier.watch(dir, :delete, :moved_from) do |event|
282
+ @mutex.synchronize do
283
+ path = "#{dir}/#{event.name}"
284
+ if @threads.has_key?(path)
285
+ @logger.info("File #{event.name} was deleted / moved from watched dir #{dir}")
286
+ if @threads[path].alive?
287
+ @threads[path].terminate
288
+ @threads[path].join
289
+ end
290
+ @threads.delete(path)
291
+ @files[path][:fd].close unless @files[path][:fd].closed?
292
+ @files.delete(path)
293
+ end
294
+ end
295
+ end
296
+
297
+ end
298
+
299
+ Thread.new { @create_notifier.run }
300
+ Thread.new { @delete_notifier.run }
301
+
302
+ @tailf_notifier.run
@@ -1,3 +1,3 @@
1
1
  module Tailf2Kafka
2
- VERSION ||= '0.1.7'
2
+ VERSION ||= '0.1.8'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tailf2kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Piavlo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-29 00:00:00.000000000 Z
11
+ date: 2016-07-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: poseidon
@@ -128,12 +128,14 @@ email:
128
128
  - devops@supersonic.com
129
129
  executables:
130
130
  - tailf2kafka
131
+ - tailf2kafka.save
131
132
  extensions: []
132
133
  extra_rdoc_files: []
133
134
  files:
134
135
  - LICENSE
135
136
  - README.md
136
137
  - bin/tailf2kafka
138
+ - bin/tailf2kafka.save
137
139
  - lib/tailf2kafka.rb
138
140
  - lib/tailf2kafka/version.rb
139
141
  - tailf2kafka.gemspec