tailf2kafka 0.1.8 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 36ed03a088f2e37c9e35773e233159bc1c32cf2d
4
- data.tar.gz: 203b2fd8a437e4d8baa555f46a246411eed0cb9e
3
+ metadata.gz: 8eef4e08a12fcd549df3c091eb06a5784669dfa0
4
+ data.tar.gz: 953bd871eba9fdfa942764464238fa5560c16d2e
5
5
  SHA512:
6
- metadata.gz: 5554c921a0ce29a3bf4f3548e8b180a46545ace278163b4cebaff11255e709565730f5de25263fc4fa44636a345eb6eec2a187e7dd012dc16bdd109bbb02c484
7
- data.tar.gz: 11bc611eac30966753a8327acc0a7f97304d6b1c8757893ef7316b6cf8f1acfc83fdc7a9981b46ccb720a63bb3e5940657ddc8d4ed3795b838d590c8f3c1d485
6
+ metadata.gz: a79f4f802ea67c650c673513dcdfb8f033f448264a02bc1ce75a5eab335b86db35313ac15ad6402a152eea0427ce000586453fb8aac1eeaf5ead625cca5349be
7
+ data.tar.gz: 0f353df4ab74ad941e6857ef8611209c4b976394ce811b538bd8e7eb58dfda0187bd892bb78728edfe61e79f76d9de5f86336caa156f3eba330681f3c607fb7e
data/bin/tailf2kafka CHANGED
@@ -205,7 +205,7 @@ def time_pattern_to_regexp(pattern)
205
205
  end
206
206
  end
207
207
 
208
- #Scan existing files that match watched prefixes and start tailing them
208
+ #Scan existing files that match watched prefixes and start failing them
209
209
  @settings[:tailf][:files].each do |tailf_file|
210
210
  tailf_file[:prefix] = File.expand_path(tailf_file[:prefix])
211
211
  dir = File.dirname(tailf_file[:prefix])
@@ -257,52 +257,46 @@ end
257
257
 
258
258
  @timers = Timers::Group.new
259
259
  @uploads_timer = @timers.every(@flush_interval) { write_position_file }
260
- @timers.every(60) { delete_old_tailed_files } if @delete_old_tailed_files
260
+ @delete_old_tailed_files_timer = @timers.every(60) { delete_old_tailed_files } if @delete_old_tailed_files
261
261
  Thread.new { loop { @timers.wait } }
262
262
 
263
- def setup_watchers
264
- @dirs.each_key do |dir|
265
-
266
- @create_notifier.watch(dir, :create, :moved_to) do |event|
267
- @mutex.synchronize do
268
- path = "#{dir}/#{event.name}"
269
- match = @dirs[dir].detect{|h| event.name.match(Regexp.new(h[:prefix] + time_pattern_to_regexp(h[:pattern]) + h[:suffix]))}
270
- if match
271
- unless File.directory?(path)
272
- unless @threads.has_key?(path)
273
- @logger.info("File #{event.name} was created in / moved into watched dir #{dir}")
274
- @files[path] = { :pattern => match[:pattern], :topic => match[:topic], :inode => File.stat(path).ino, :offset => 0 }
275
- @threads[path] = Thread.new { tailf(path) }
276
- end
263
+ @dirs.each_key do |dir|
264
+
265
+ @create_notifier.watch(dir, :create, :moved_to) do |event|
266
+ @mutex.synchronize do
267
+ path = "#{dir}/#{event.name}"
268
+ match = @dirs[dir].detect{|h| event.name.match(Regexp.new(h[:prefix] + time_pattern_to_regexp(h[:pattern]) + h[:suffix]))}
269
+ if match
270
+ unless File.directory?(path)
271
+ unless @threads.has_key?(path)
272
+ @logger.info("File #{event.name} was created in / moved into watched dir #{dir}")
273
+ @files[path] = { :pattern => match[:pattern], :topic => match[:topic], :inode => File.stat(path).ino, :offset => 0 }
274
+ @threads[path] = Thread.new { tailf(path) }
277
275
  end
278
276
  end
279
277
  end
280
278
  end
279
+ end
281
280
 
282
- @delete_notifier.watch(dir, :delete, :moved_from) do |event|
283
- @mutex.synchronize do
284
- path = "#{dir}/#{event.name}"
285
- if @threads.has_key?(path)
286
- @logger.info("File #{event.name} was deleted / moved from watched dir #{dir}")
287
- if @threads[path].alive?
288
- @threads[path].terminate
289
- @threads[path].join
290
- end
291
- @threads.delete(path)
292
- @files[path][:fd].close unless @files[path][:fd].closed?
293
- @files.delete(path)
281
+ @delete_notifier.watch(dir, :delete, :moved_from) do |event|
282
+ @mutex.synchronize do
283
+ path = "#{dir}/#{event.name}"
284
+ if @threads.has_key?(path)
285
+ @logger.info("File #{event.name} was deleted / moved from watched dir #{dir}")
286
+ if @threads[path].alive?
287
+ @threads[path].terminate
288
+ @threads[path].join
294
289
  end
290
+ @threads.delete(path)
291
+ @files[path][:fd].close unless @files[path][:fd].closed?
292
+ @files.delete(path)
295
293
  end
296
294
  end
297
-
298
295
  end
299
- end
300
296
 
301
- setup_watchers
297
+ end
302
298
 
303
299
  Thread.new { @create_notifier.run }
304
300
  Thread.new { @delete_notifier.run }
305
301
 
306
- @timers.every(60) { setup_watchers }
307
-
308
302
  @tailf_notifier.run
@@ -1,3 +1,3 @@
1
1
  module Tailf2Kafka
2
- VERSION ||= '0.1.8'
2
+ VERSION ||= '0.1.9'
3
3
  end
data/tailf2kafka.gemspec CHANGED
@@ -22,6 +22,7 @@ Gem::Specification.new do |s|
22
22
  s.add_dependency('rb-inotify')
23
23
  s.add_dependency('timers')
24
24
  s.add_dependency('mixlib-shellout')
25
+ s.add_dependency('activesupport', '~> 4.2.6')
25
26
 
26
27
  s.add_development_dependency('rake')
27
28
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tailf2kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.8
4
+ version: 0.1.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Piavlo
@@ -108,6 +108,20 @@ dependencies:
108
108
  - - ">="
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: activesupport
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: 4.2.6
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: 4.2.6
111
125
  - !ruby/object:Gem::Dependency
112
126
  name: rake
113
127
  requirement: !ruby/object:Gem::Requirement
@@ -128,14 +142,12 @@ email:
128
142
  - devops@supersonic.com
129
143
  executables:
130
144
  - tailf2kafka
131
- - tailf2kafka.save
132
145
  extensions: []
133
146
  extra_rdoc_files: []
134
147
  files:
135
148
  - LICENSE
136
149
  - README.md
137
150
  - bin/tailf2kafka
138
- - bin/tailf2kafka.save
139
151
  - lib/tailf2kafka.rb
140
152
  - lib/tailf2kafka/version.rb
141
153
  - tailf2kafka.gemspec
@@ -159,9 +171,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
159
171
  version: '0'
160
172
  requirements: []
161
173
  rubyforge_project:
162
- rubygems_version: 2.2.2
174
+ rubygems_version: 2.6.4
163
175
  signing_key:
164
176
  specification_version: 4
165
177
  summary: Watch and tail files with specified time based patterns and push them to
166
178
  kafka
167
179
  test_files: []
180
+ has_rdoc: false
data/bin/tailf2kafka.save DELETED
@@ -1,302 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'optparse'
4
- require 'poseidon'
5
- require 'yaml'
6
- require 'hash_symbolizer'
7
- require 'schash'
8
- require 'rb-inotify'
9
- require 'timers'
10
- require 'socket'
11
- require 'fileutils'
12
- require 'logger'
13
- require 'mixlib/shellout'
14
-
15
- $stdout.sync = true
16
-
17
- Thread.abort_on_exception = true
18
-
19
- @config = nil
20
-
21
- loglevels = {
22
- :debug => Logger::DEBUG,
23
- :info => Logger::INFO,
24
- :warn => Logger::WARN,
25
- :error => Logger::Error,
26
- :fatal => Logger::FATAL,
27
- :unknown => Logger::UNKNOWN
28
- }
29
-
30
- @loglevel = Logger::INFO
31
-
32
- opts = OptionParser.new
33
- opts.banner = "Usage: #{$0} [options]"
34
- opts.on( '--config PATH', String, 'Path to settings config' ) { |c| @config = c }
35
- opts.on( '--log-level [LEVEL]', [:debug, :info, :warn, :error, :fatal, :unknown] ) { |l| @loglevel = loglevels[l] }
36
- opts.on( '-h', '--help', 'Display this screen' ) { puts opts; exit 0 }
37
- opts.parse!
38
-
39
- unless @config
40
- puts opts
41
- exit 1
42
- end
43
-
44
- @logger = Logger.new(STDOUT)
45
-
46
- @settings = YAML.load_file(@config).symbolize_keys(true)
47
-
48
- validator = Schash::Validator.new do
49
- {
50
- tailf: {
51
- files: array_of({
52
- topic: string,
53
- prefix: string,
54
- suffix: optional(string),
55
- time_pattern: string,
56
- }),
57
- position_file: string,
58
- flush_interval: integer,
59
- max_batch_lines: integer,
60
- from_begining: boolean,
61
- delete_old_tailed_files: optional(boolean),
62
- post_delete_command: optional(string),
63
- },
64
- kafka: {
65
- brokers: array_of(string),
66
- producer_type: match(/^(sync|async)$/),
67
- produce: optional(boolean),
68
- },
69
- }
70
- end
71
-
72
- unless validator.validate(@settings).empty?
73
- @logger.error("ERROR: bad settings")
74
- @logger.error(validator.validate(@settings))
75
- exit 1
76
- end
77
-
78
- @settings[:tailf][:files] = @settings[:tailf][:files].map{|h| h.symbolize_keys(true)}
79
-
80
- @mutex = Mutex.new
81
-
82
- @create_notifier = INotify::Notifier.new
83
- @delete_notifier = INotify::Notifier.new
84
- @tailf_notifier = INotify::Notifier.new
85
-
86
- @dirs = {}
87
- @files = {}
88
- @threads = {}
89
- @position_file = @settings[:tailf][:position_file]
90
- @flush_interval = @settings[:tailf][:flush_interval]
91
- @max_batch_lines = @settings[:tailf][:max_batch_lines]
92
- @from_begining = @settings[:tailf][:from_begining]
93
- @delete_old_tailed_files = @settings[:tailf].has_key?(:delete_old_tailed_files) ? @settings[:tailf][:delete_old_tailed_files] : false
94
- @brokers = @settings[:kafka][:brokers]
95
- @producer_type = @settings[:kafka][:producer_type].to_sym
96
- @produce = @settings[:kafka].has_key?(:produce) ? @settings[:kafka][:produce] : true
97
-
98
- def write_position_file
99
- @mutex.synchronize do
100
- File.open(@position_file, 'w') do |file|
101
- @files.each do |path, attrs|
102
- file.puts "#{path} #{attrs[:pattern]} #{attrs[:topic]} #{attrs[:inode]} #{attrs[:offset]}"
103
- end
104
- end
105
- end
106
- end
107
-
108
- def load_position_file
109
- if File.exist?(@position_file)
110
- IO.readlines(@position_file).each do |line|
111
- path, pattern, topic, inode, offset = line.split(' ')
112
- #Load state only for that exist with same inode and were not truncated/rewinded.
113
- if File.exists?(path) and File.stat(path).ino == inode.to_i and File.stat(path).size >= offset.to_i
114
- @files[path] = { :pattern => pattern, :topic => topic, :inode => inode.to_i, :offset => offset.to_i }
115
- end
116
- end
117
- end
118
- write_position_file
119
- end
120
-
121
- load_position_file
122
-
123
- @topics = @settings[:tailf][:files].map{|tailf_file| tailf_file[:topic]}
124
- @producer = Poseidon::Producer.new(@brokers, "#{Socket.gethostname}", :type => @producer_type, :compression_codec => :snappy, :compressed_topics => @topics) if @produce
125
-
126
- @producer_queue = SizedQueue.new(10)
127
-
128
- @producer_thread = Thread.new do
129
- loop do
130
- batch = @producer_queue.pop
131
- begin
132
- @producer.send_messages(batch[:messages]) if @produce
133
- rescue Poseidon::Errors::UnableToFetchMetadata
134
- @logger.warn("Got Poseidon::Errors::UnableToFetchMetadata while trying to produce kafka messages, retrying in 1 second ...")
135
- sleep 1
136
- retry
137
- end
138
- @files[batch[:path]][:offset] = batch[:offset]
139
- end
140
- end
141
-
142
- def kafka_produce(path, buffer, offset)
143
- truncated = nil
144
-
145
- messages = []
146
- while msg = buffer.shift
147
- unless msg[-1] == "\n"
148
- if buffer.empty?
149
- truncated = msg
150
- else
151
- msg = msg + buffer.shift
152
- messages << Poseidon::MessageToSend.new(@files[path][:topic], msg.strip)
153
- end
154
- else
155
- messages << Poseidon::MessageToSend.new(@files[path][:topic], msg.strip)
156
- end
157
- end
158
- @producer_queue.push({ :path => path, :messages => messages, :offset => offset})
159
-
160
- truncated
161
- end
162
-
163
- def tailf(path)
164
- file = File.open(path, 'r')
165
- @files[path][:fd] = file
166
- file.seek(@files[path][:offset], IO::SEEK_SET)
167
-
168
- truncated = nil
169
- loop do #Fast read file in batches until we reach EOF upon which we start the tailf modify watcher
170
- batch = file.each_line.take(@max_batch_lines)
171
- break if batch.empty?
172
- batch.unshift(truncated + batch.shift) if truncated
173
- truncated = kafka_produce(path, batch, file.pos)
174
- end
175
-
176
- mutex = Mutex.new
177
- @tailf_notifier.watch(path, :modify) do |event|
178
- mutex.synchronize do
179
- unless file.closed?
180
- loop do
181
- batch = file.each_line.take(@max_batch_lines)
182
- break if batch.empty?
183
- batch.unshift(truncated + batch.shift) if truncated
184
- truncated = kafka_produce(path, batch, file.pos)
185
- end
186
- else
187
- @logger.warn("watcher got modify event on closed file #{event.name}")
188
- end
189
- end
190
- end
191
- end
192
-
193
- @time_regexp_hash = {
194
- 'Y' => '[0-9]{4}',
195
- 'm' => '[0-9]{2}',
196
- 'd' => '[0-9]{2}',
197
- 'H' => '[0-9]{2}',
198
- 'M' => '[0-9]{2}'
199
- }
200
-
201
- def time_pattern_to_regexp(pattern)
202
- pattern.gsub(/%([^%])/) do
203
- match = $1
204
- @time_regexp_hash.has_key?(match) ? @time_regexp_hash[match] : match
205
- end
206
- end
207
-
208
- #Scan existing files that match watched prefixes and start failing them
209
- @settings[:tailf][:files].each do |tailf_file|
210
- dir = File.dirname(tailf_file[:prefix])
211
- if File.exists?(dir) and File.directory?(dir)
212
- @dirs[dir] ||= []
213
- @dirs[dir] << { :prefix => File.basename(tailf_file[:prefix]), :pattern => tailf_file[:time_pattern], :suffix => "#{tailf_file[:suffix]}", :topic => tailf_file[:topic]}
214
- Dir.glob("#{tailf_file[:prefix]}*#{tailf_file[:suffix]}").each do |path|
215
- if path.match(Regexp.new(time_pattern_to_regexp(tailf_file[:time_pattern])))
216
- unless File.directory?(path)
217
- #Populate state only if it was not loaded from position file
218
- unless @files.has_key?(path)
219
- @files[path] = { :pattern => tailf_file[:time_pattern], :topic => tailf_file[:topic], :inode => File.stat(path).ino, :offset => 0 }
220
- @files[path][:offset] = File.stat(path).size unless @from_begining
221
- end
222
- @threads[path] = Thread.new { tailf(path) } unless @threads.has_key?(path)
223
- end
224
- end
225
- end
226
- end
227
- end
228
-
229
- def delete_old_tailed_files
230
- @mutex.synchronize do
231
- @files.each_key do |path|
232
- unless path.match(Regexp.new(Time.now.strftime(@files[path][:pattern])))
233
- if File.exists?(path) and File.stat(path).ino == @files[path][:inode] and File.stat(path).size == @files[path][:offset] and (Time.now - File.stat(path).mtime) > 30
234
- @logger.info("Deleteing old time pattern fully kafka produced file #{path}")
235
- FileUtils.rm_r(path)
236
- if @settings[:tailf].has_key?(:post_delete_command)
237
- @logger.info("Running post delete command => #{@settings[:tailf][:post_delete_command]}")
238
- command = Mixlib::created
239
- ShellOut.new(@settings[:tailf][:post_delete_command])
240
- begin
241
- command.run_command
242
- if command.error?
243
- @logger.error("Failed post delete command => #{@settings[:tailf][:post_delete_command]}")
244
- @logger.info("STDOUT: #{command.stdout}")
245
- @logger.info("STDERR: #{command.stderr}")
246
- end
247
- rescue => e
248
- @logger.error("Failed post delete command => #{@settings[:tailf][:post_delete_command]}")
249
- @logger.info(e.message)
250
- end
251
- end
252
- end
253
- end
254
- end
255
- end
256
- end
257
-
258
- @timers = Timers::Group.new
259
- @uploads_timer = @timers.every(@flush_interval) { write_position_file }
260
- @delete_old_tailed_files_timer = @timers.every(60) { delete_old_tailed_files } if @delete_old_tailed_files
261
- Thread.new { loop { @timers.wait } }
262
-
263
- @dirs.each_key do |dir|
264
-
265
- @create_notifier.watch(dir, :create, :moved_to) do |event|
266
- @mutex.synchronize do
267
- path = "#{dir}/#{event.name}"
268
- match = @dirs[dir].detect{|h| event.name.match(Regexp.new(h[:prefix] + time_pattern_to_regexp(h[:pattern]) + h[:suffix]))}
269
- if match
270
- unless File.directory?(path)
271
- unless @threads.has_key?(path)
272
- @logger.info("File #{event.name} was created in / moved into watched dir #{dir}")
273
- @files[path] = { :pattern => match[:pattern], :topic => match[:topic], :inode => File.stat(path).ino, :offset => 0 }
274
- @threads[path] = Thread.new { tailf(path) }
275
- end
276
- end
277
- end
278
- end
279
- end
280
-
281
- @delete_notifier.watch(dir, :delete, :moved_from) do |event|
282
- @mutex.synchronize do
283
- path = "#{dir}/#{event.name}"
284
- if @threads.has_key?(path)
285
- @logger.info("File #{event.name} was deleted / moved from watched dir #{dir}")
286
- if @threads[path].alive?
287
- @threads[path].terminate
288
- @threads[path].join
289
- end
290
- @threads.delete(path)
291
- @files[path][:fd].close unless @files[path][:fd].closed?
292
- @files.delete(path)
293
- end
294
- end
295
- end
296
-
297
- end
298
-
299
- Thread.new { @create_notifier.run }
300
- Thread.new { @delete_notifier.run }
301
-
302
- @tailf_notifier.run