tailf2kafka 0.1.8 → 0.1.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 36ed03a088f2e37c9e35773e233159bc1c32cf2d
4
- data.tar.gz: 203b2fd8a437e4d8baa555f46a246411eed0cb9e
3
+ metadata.gz: 8eef4e08a12fcd549df3c091eb06a5784669dfa0
4
+ data.tar.gz: 953bd871eba9fdfa942764464238fa5560c16d2e
5
5
  SHA512:
6
- metadata.gz: 5554c921a0ce29a3bf4f3548e8b180a46545ace278163b4cebaff11255e709565730f5de25263fc4fa44636a345eb6eec2a187e7dd012dc16bdd109bbb02c484
7
- data.tar.gz: 11bc611eac30966753a8327acc0a7f97304d6b1c8757893ef7316b6cf8f1acfc83fdc7a9981b46ccb720a63bb3e5940657ddc8d4ed3795b838d590c8f3c1d485
6
+ metadata.gz: a79f4f802ea67c650c673513dcdfb8f033f448264a02bc1ce75a5eab335b86db35313ac15ad6402a152eea0427ce000586453fb8aac1eeaf5ead625cca5349be
7
+ data.tar.gz: 0f353df4ab74ad941e6857ef8611209c4b976394ce811b538bd8e7eb58dfda0187bd892bb78728edfe61e79f76d9de5f86336caa156f3eba330681f3c607fb7e
data/bin/tailf2kafka CHANGED
@@ -205,7 +205,7 @@ def time_pattern_to_regexp(pattern)
205
205
  end
206
206
  end
207
207
 
208
- #Scan existing files that match watched prefixes and start tailing them
208
+ #Scan existing files that match watched prefixes and start failing them
209
209
  @settings[:tailf][:files].each do |tailf_file|
210
210
  tailf_file[:prefix] = File.expand_path(tailf_file[:prefix])
211
211
  dir = File.dirname(tailf_file[:prefix])
@@ -257,52 +257,46 @@ end
257
257
 
258
258
  @timers = Timers::Group.new
259
259
  @uploads_timer = @timers.every(@flush_interval) { write_position_file }
260
- @timers.every(60) { delete_old_tailed_files } if @delete_old_tailed_files
260
+ @delete_old_tailed_files_timer = @timers.every(60) { delete_old_tailed_files } if @delete_old_tailed_files
261
261
  Thread.new { loop { @timers.wait } }
262
262
 
263
- def setup_watchers
264
- @dirs.each_key do |dir|
265
-
266
- @create_notifier.watch(dir, :create, :moved_to) do |event|
267
- @mutex.synchronize do
268
- path = "#{dir}/#{event.name}"
269
- match = @dirs[dir].detect{|h| event.name.match(Regexp.new(h[:prefix] + time_pattern_to_regexp(h[:pattern]) + h[:suffix]))}
270
- if match
271
- unless File.directory?(path)
272
- unless @threads.has_key?(path)
273
- @logger.info("File #{event.name} was created in / moved into watched dir #{dir}")
274
- @files[path] = { :pattern => match[:pattern], :topic => match[:topic], :inode => File.stat(path).ino, :offset => 0 }
275
- @threads[path] = Thread.new { tailf(path) }
276
- end
263
+ @dirs.each_key do |dir|
264
+
265
+ @create_notifier.watch(dir, :create, :moved_to) do |event|
266
+ @mutex.synchronize do
267
+ path = "#{dir}/#{event.name}"
268
+ match = @dirs[dir].detect{|h| event.name.match(Regexp.new(h[:prefix] + time_pattern_to_regexp(h[:pattern]) + h[:suffix]))}
269
+ if match
270
+ unless File.directory?(path)
271
+ unless @threads.has_key?(path)
272
+ @logger.info("File #{event.name} was created in / moved into watched dir #{dir}")
273
+ @files[path] = { :pattern => match[:pattern], :topic => match[:topic], :inode => File.stat(path).ino, :offset => 0 }
274
+ @threads[path] = Thread.new { tailf(path) }
277
275
  end
278
276
  end
279
277
  end
280
278
  end
279
+ end
281
280
 
282
- @delete_notifier.watch(dir, :delete, :moved_from) do |event|
283
- @mutex.synchronize do
284
- path = "#{dir}/#{event.name}"
285
- if @threads.has_key?(path)
286
- @logger.info("File #{event.name} was deleted / moved from watched dir #{dir}")
287
- if @threads[path].alive?
288
- @threads[path].terminate
289
- @threads[path].join
290
- end
291
- @threads.delete(path)
292
- @files[path][:fd].close unless @files[path][:fd].closed?
293
- @files.delete(path)
281
+ @delete_notifier.watch(dir, :delete, :moved_from) do |event|
282
+ @mutex.synchronize do
283
+ path = "#{dir}/#{event.name}"
284
+ if @threads.has_key?(path)
285
+ @logger.info("File #{event.name} was deleted / moved from watched dir #{dir}")
286
+ if @threads[path].alive?
287
+ @threads[path].terminate
288
+ @threads[path].join
294
289
  end
290
+ @threads.delete(path)
291
+ @files[path][:fd].close unless @files[path][:fd].closed?
292
+ @files.delete(path)
295
293
  end
296
294
  end
297
-
298
295
  end
299
- end
300
296
 
301
- setup_watchers
297
+ end
302
298
 
303
299
  Thread.new { @create_notifier.run }
304
300
  Thread.new { @delete_notifier.run }
305
301
 
306
- @timers.every(60) { setup_watchers }
307
-
308
302
  @tailf_notifier.run
@@ -1,3 +1,3 @@
1
1
  module Tailf2Kafka
2
- VERSION ||= '0.1.8'
2
+ VERSION ||= '0.1.9'
3
3
  end
data/tailf2kafka.gemspec CHANGED
@@ -22,6 +22,7 @@ Gem::Specification.new do |s|
22
22
  s.add_dependency('rb-inotify')
23
23
  s.add_dependency('timers')
24
24
  s.add_dependency('mixlib-shellout')
25
+ s.add_dependency('activesupport', '~> 4.2.6')
25
26
 
26
27
  s.add_development_dependency('rake')
27
28
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tailf2kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.8
4
+ version: 0.1.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Piavlo
@@ -108,6 +108,20 @@ dependencies:
108
108
  - - ">="
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: activesupport
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: 4.2.6
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: 4.2.6
111
125
  - !ruby/object:Gem::Dependency
112
126
  name: rake
113
127
  requirement: !ruby/object:Gem::Requirement
@@ -128,14 +142,12 @@ email:
128
142
  - devops@supersonic.com
129
143
  executables:
130
144
  - tailf2kafka
131
- - tailf2kafka.save
132
145
  extensions: []
133
146
  extra_rdoc_files: []
134
147
  files:
135
148
  - LICENSE
136
149
  - README.md
137
150
  - bin/tailf2kafka
138
- - bin/tailf2kafka.save
139
151
  - lib/tailf2kafka.rb
140
152
  - lib/tailf2kafka/version.rb
141
153
  - tailf2kafka.gemspec
@@ -159,9 +171,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
159
171
  version: '0'
160
172
  requirements: []
161
173
  rubyforge_project:
162
- rubygems_version: 2.2.2
174
+ rubygems_version: 2.6.4
163
175
  signing_key:
164
176
  specification_version: 4
165
177
  summary: Watch and tail files with specified time based patterns and push them to
166
178
  kafka
167
179
  test_files: []
180
+ has_rdoc: false
data/bin/tailf2kafka.save DELETED
@@ -1,302 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'optparse'
4
- require 'poseidon'
5
- require 'yaml'
6
- require 'hash_symbolizer'
7
- require 'schash'
8
- require 'rb-inotify'
9
- require 'timers'
10
- require 'socket'
11
- require 'fileutils'
12
- require 'logger'
13
- require 'mixlib/shellout'
14
-
15
- $stdout.sync = true
16
-
17
- Thread.abort_on_exception = true
18
-
19
- @config = nil
20
-
21
- loglevels = {
22
- :debug => Logger::DEBUG,
23
- :info => Logger::INFO,
24
- :warn => Logger::WARN,
25
- :error => Logger::Error,
26
- :fatal => Logger::FATAL,
27
- :unknown => Logger::UNKNOWN
28
- }
29
-
30
- @loglevel = Logger::INFO
31
-
32
- opts = OptionParser.new
33
- opts.banner = "Usage: #{$0} [options]"
34
- opts.on( '--config PATH', String, 'Path to settings config' ) { |c| @config = c }
35
- opts.on( '--log-level [LEVEL]', [:debug, :info, :warn, :error, :fatal, :unknown] ) { |l| @loglevel = loglevels[l] }
36
- opts.on( '-h', '--help', 'Display this screen' ) { puts opts; exit 0 }
37
- opts.parse!
38
-
39
- unless @config
40
- puts opts
41
- exit 1
42
- end
43
-
44
- @logger = Logger.new(STDOUT)
45
-
46
- @settings = YAML.load_file(@config).symbolize_keys(true)
47
-
48
- validator = Schash::Validator.new do
49
- {
50
- tailf: {
51
- files: array_of({
52
- topic: string,
53
- prefix: string,
54
- suffix: optional(string),
55
- time_pattern: string,
56
- }),
57
- position_file: string,
58
- flush_interval: integer,
59
- max_batch_lines: integer,
60
- from_begining: boolean,
61
- delete_old_tailed_files: optional(boolean),
62
- post_delete_command: optional(string),
63
- },
64
- kafka: {
65
- brokers: array_of(string),
66
- producer_type: match(/^(sync|async)$/),
67
- produce: optional(boolean),
68
- },
69
- }
70
- end
71
-
72
- unless validator.validate(@settings).empty?
73
- @logger.error("ERROR: bad settings")
74
- @logger.error(validator.validate(@settings))
75
- exit 1
76
- end
77
-
78
- @settings[:tailf][:files] = @settings[:tailf][:files].map{|h| h.symbolize_keys(true)}
79
-
80
- @mutex = Mutex.new
81
-
82
- @create_notifier = INotify::Notifier.new
83
- @delete_notifier = INotify::Notifier.new
84
- @tailf_notifier = INotify::Notifier.new
85
-
86
- @dirs = {}
87
- @files = {}
88
- @threads = {}
89
- @position_file = @settings[:tailf][:position_file]
90
- @flush_interval = @settings[:tailf][:flush_interval]
91
- @max_batch_lines = @settings[:tailf][:max_batch_lines]
92
- @from_begining = @settings[:tailf][:from_begining]
93
- @delete_old_tailed_files = @settings[:tailf].has_key?(:delete_old_tailed_files) ? @settings[:tailf][:delete_old_tailed_files] : false
94
- @brokers = @settings[:kafka][:brokers]
95
- @producer_type = @settings[:kafka][:producer_type].to_sym
96
- @produce = @settings[:kafka].has_key?(:produce) ? @settings[:kafka][:produce] : true
97
-
98
- def write_position_file
99
- @mutex.synchronize do
100
- File.open(@position_file, 'w') do |file|
101
- @files.each do |path, attrs|
102
- file.puts "#{path} #{attrs[:pattern]} #{attrs[:topic]} #{attrs[:inode]} #{attrs[:offset]}"
103
- end
104
- end
105
- end
106
- end
107
-
108
- def load_position_file
109
- if File.exist?(@position_file)
110
- IO.readlines(@position_file).each do |line|
111
- path, pattern, topic, inode, offset = line.split(' ')
112
- #Load state only for that exist with same inode and were not truncated/rewinded.
113
- if File.exists?(path) and File.stat(path).ino == inode.to_i and File.stat(path).size >= offset.to_i
114
- @files[path] = { :pattern => pattern, :topic => topic, :inode => inode.to_i, :offset => offset.to_i }
115
- end
116
- end
117
- end
118
- write_position_file
119
- end
120
-
121
- load_position_file
122
-
123
- @topics = @settings[:tailf][:files].map{|tailf_file| tailf_file[:topic]}
124
- @producer = Poseidon::Producer.new(@brokers, "#{Socket.gethostname}", :type => @producer_type, :compression_codec => :snappy, :compressed_topics => @topics) if @produce
125
-
126
- @producer_queue = SizedQueue.new(10)
127
-
128
- @producer_thread = Thread.new do
129
- loop do
130
- batch = @producer_queue.pop
131
- begin
132
- @producer.send_messages(batch[:messages]) if @produce
133
- rescue Poseidon::Errors::UnableToFetchMetadata
134
- @logger.warn("Got Poseidon::Errors::UnableToFetchMetadata while trying to produce kafka messages, retrying in 1 second ...")
135
- sleep 1
136
- retry
137
- end
138
- @files[batch[:path]][:offset] = batch[:offset]
139
- end
140
- end
141
-
142
- def kafka_produce(path, buffer, offset)
143
- truncated = nil
144
-
145
- messages = []
146
- while msg = buffer.shift
147
- unless msg[-1] == "\n"
148
- if buffer.empty?
149
- truncated = msg
150
- else
151
- msg = msg + buffer.shift
152
- messages << Poseidon::MessageToSend.new(@files[path][:topic], msg.strip)
153
- end
154
- else
155
- messages << Poseidon::MessageToSend.new(@files[path][:topic], msg.strip)
156
- end
157
- end
158
- @producer_queue.push({ :path => path, :messages => messages, :offset => offset})
159
-
160
- truncated
161
- end
162
-
163
- def tailf(path)
164
- file = File.open(path, 'r')
165
- @files[path][:fd] = file
166
- file.seek(@files[path][:offset], IO::SEEK_SET)
167
-
168
- truncated = nil
169
- loop do #Fast read file in batches until we reach EOF upon which we start the tailf modify watcher
170
- batch = file.each_line.take(@max_batch_lines)
171
- break if batch.empty?
172
- batch.unshift(truncated + batch.shift) if truncated
173
- truncated = kafka_produce(path, batch, file.pos)
174
- end
175
-
176
- mutex = Mutex.new
177
- @tailf_notifier.watch(path, :modify) do |event|
178
- mutex.synchronize do
179
- unless file.closed?
180
- loop do
181
- batch = file.each_line.take(@max_batch_lines)
182
- break if batch.empty?
183
- batch.unshift(truncated + batch.shift) if truncated
184
- truncated = kafka_produce(path, batch, file.pos)
185
- end
186
- else
187
- @logger.warn("watcher got modify event on closed file #{event.name}")
188
- end
189
- end
190
- end
191
- end
192
-
193
- @time_regexp_hash = {
194
- 'Y' => '[0-9]{4}',
195
- 'm' => '[0-9]{2}',
196
- 'd' => '[0-9]{2}',
197
- 'H' => '[0-9]{2}',
198
- 'M' => '[0-9]{2}'
199
- }
200
-
201
- def time_pattern_to_regexp(pattern)
202
- pattern.gsub(/%([^%])/) do
203
- match = $1
204
- @time_regexp_hash.has_key?(match) ? @time_regexp_hash[match] : match
205
- end
206
- end
207
-
208
- #Scan existing files that match watched prefixes and start failing them
209
- @settings[:tailf][:files].each do |tailf_file|
210
- dir = File.dirname(tailf_file[:prefix])
211
- if File.exists?(dir) and File.directory?(dir)
212
- @dirs[dir] ||= []
213
- @dirs[dir] << { :prefix => File.basename(tailf_file[:prefix]), :pattern => tailf_file[:time_pattern], :suffix => "#{tailf_file[:suffix]}", :topic => tailf_file[:topic]}
214
- Dir.glob("#{tailf_file[:prefix]}*#{tailf_file[:suffix]}").each do |path|
215
- if path.match(Regexp.new(time_pattern_to_regexp(tailf_file[:time_pattern])))
216
- unless File.directory?(path)
217
- #Populate state only if it was not loaded from position file
218
- unless @files.has_key?(path)
219
- @files[path] = { :pattern => tailf_file[:time_pattern], :topic => tailf_file[:topic], :inode => File.stat(path).ino, :offset => 0 }
220
- @files[path][:offset] = File.stat(path).size unless @from_begining
221
- end
222
- @threads[path] = Thread.new { tailf(path) } unless @threads.has_key?(path)
223
- end
224
- end
225
- end
226
- end
227
- end
228
-
229
- def delete_old_tailed_files
230
- @mutex.synchronize do
231
- @files.each_key do |path|
232
- unless path.match(Regexp.new(Time.now.strftime(@files[path][:pattern])))
233
- if File.exists?(path) and File.stat(path).ino == @files[path][:inode] and File.stat(path).size == @files[path][:offset] and (Time.now - File.stat(path).mtime) > 30
234
- @logger.info("Deleteing old time pattern fully kafka produced file #{path}")
235
- FileUtils.rm_r(path)
236
- if @settings[:tailf].has_key?(:post_delete_command)
237
- @logger.info("Running post delete command => #{@settings[:tailf][:post_delete_command]}")
238
- command = Mixlib::created
239
- ShellOut.new(@settings[:tailf][:post_delete_command])
240
- begin
241
- command.run_command
242
- if command.error?
243
- @logger.error("Failed post delete command => #{@settings[:tailf][:post_delete_command]}")
244
- @logger.info("STDOUT: #{command.stdout}")
245
- @logger.info("STDERR: #{command.stderr}")
246
- end
247
- rescue => e
248
- @logger.error("Failed post delete command => #{@settings[:tailf][:post_delete_command]}")
249
- @logger.info(e.message)
250
- end
251
- end
252
- end
253
- end
254
- end
255
- end
256
- end
257
-
258
- @timers = Timers::Group.new
259
- @uploads_timer = @timers.every(@flush_interval) { write_position_file }
260
- @delete_old_tailed_files_timer = @timers.every(60) { delete_old_tailed_files } if @delete_old_tailed_files
261
- Thread.new { loop { @timers.wait } }
262
-
263
- @dirs.each_key do |dir|
264
-
265
- @create_notifier.watch(dir, :create, :moved_to) do |event|
266
- @mutex.synchronize do
267
- path = "#{dir}/#{event.name}"
268
- match = @dirs[dir].detect{|h| event.name.match(Regexp.new(h[:prefix] + time_pattern_to_regexp(h[:pattern]) + h[:suffix]))}
269
- if match
270
- unless File.directory?(path)
271
- unless @threads.has_key?(path)
272
- @logger.info("File #{event.name} was created in / moved into watched dir #{dir}")
273
- @files[path] = { :pattern => match[:pattern], :topic => match[:topic], :inode => File.stat(path).ino, :offset => 0 }
274
- @threads[path] = Thread.new { tailf(path) }
275
- end
276
- end
277
- end
278
- end
279
- end
280
-
281
- @delete_notifier.watch(dir, :delete, :moved_from) do |event|
282
- @mutex.synchronize do
283
- path = "#{dir}/#{event.name}"
284
- if @threads.has_key?(path)
285
- @logger.info("File #{event.name} was deleted / moved from watched dir #{dir}")
286
- if @threads[path].alive?
287
- @threads[path].terminate
288
- @threads[path].join
289
- end
290
- @threads.delete(path)
291
- @files[path][:fd].close unless @files[path][:fd].closed?
292
- @files.delete(path)
293
- end
294
- end
295
- end
296
-
297
- end
298
-
299
- Thread.new { @create_notifier.run }
300
- Thread.new { @delete_notifier.run }
301
-
302
- @tailf_notifier.run