tailf2kafka 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/tailf2kafka +58 -16
- data/lib/tailf2kafka/version.rb +1 -1
- data/tailf2kafka.gemspec +2 -1
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 517eb3cc0c07e25383e56e63d10a09a12c2b5835
|
4
|
+
data.tar.gz: 69136fb7321b932d4aee23368f1b8b363ebc9fa2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e82b708355566b9902c765fbac0c811e4fbc3695af51afe86cc5a617ba3cbdbc58e50c73769fbabf687b51e99d8a686d1ab9f768bb35444c11d1a82e7775ca98
|
7
|
+
data.tar.gz: a44b90c9a612b18b1da300622d61e923c72afe566d881103b07a587238a03a6e695ec1b3af51850be39f8895baf67f836e7e83f38662228afe93723d70a714ee
|
data/bin/tailf2kafka
CHANGED
@@ -9,6 +9,8 @@ require 'rb-inotify'
|
|
9
9
|
require 'timers'
|
10
10
|
require 'socket'
|
11
11
|
require 'fileutils'
|
12
|
+
require 'logger'
|
13
|
+
require 'mixlib/shellout'
|
12
14
|
|
13
15
|
$stdout.sync = true
|
14
16
|
|
@@ -16,9 +18,21 @@ Thread.abort_on_exception = true
|
|
16
18
|
|
17
19
|
@config = nil
|
18
20
|
|
21
|
+
loglevels = {
|
22
|
+
:debug => Logger::DEBUG,
|
23
|
+
:info => Logger::INFO,
|
24
|
+
:warn => Logger::WARN,
|
25
|
+
:error => Logger::Error,
|
26
|
+
:fatal => Logger::FATAL,
|
27
|
+
:unknown => Logger::UNKNOWN
|
28
|
+
}
|
29
|
+
|
30
|
+
@loglevel = Logger::INFO
|
31
|
+
|
19
32
|
opts = OptionParser.new
|
20
33
|
opts.banner = "Usage: #{$0} [options]"
|
21
34
|
opts.on( '--config PATH', String, 'Path to settings config' ) { |c| @config = c }
|
35
|
+
opts.on( '--log-level [LEVEL]', [:debug, :info, :warn, :error, :fatal, :unknown] ) { |l| @loglevel = loglevels[l] }
|
22
36
|
opts.on( '-h', '--help', 'Display this screen' ) { puts opts; exit 0 }
|
23
37
|
opts.parse!
|
24
38
|
|
@@ -27,6 +41,8 @@ unless @config
|
|
27
41
|
exit 1
|
28
42
|
end
|
29
43
|
|
44
|
+
@logger = Logger.new(STDOUT)
|
45
|
+
|
30
46
|
@settings = YAML.load_file(@config).symbolize_keys(true)
|
31
47
|
|
32
48
|
validator = Schash::Validator.new do
|
@@ -43,6 +59,7 @@ validator = Schash::Validator.new do
|
|
43
59
|
max_batch_lines: integer,
|
44
60
|
from_begining: boolean,
|
45
61
|
delete_old_tailed_files: optional(boolean),
|
62
|
+
post_delete_command: optional(string),
|
46
63
|
},
|
47
64
|
kafka: {
|
48
65
|
brokers: array_of(string),
|
@@ -53,8 +70,8 @@ validator = Schash::Validator.new do
|
|
53
70
|
end
|
54
71
|
|
55
72
|
unless validator.validate(@settings).empty?
|
56
|
-
|
57
|
-
|
73
|
+
@logger.error("ERROR: bad settings")
|
74
|
+
@logger.error(validator.validate(@settings))
|
58
75
|
exit 1
|
59
76
|
end
|
60
77
|
|
@@ -114,7 +131,7 @@ load_position_file
|
|
114
131
|
begin
|
115
132
|
@producer.send_messages(batch[:messages]) if @produce
|
116
133
|
rescue Poseidon::Errors::UnableToFetchMetadata
|
117
|
-
|
134
|
+
@logger.warn("Got Poseidon::Errors::UnableToFetchMetadata while trying to produce kafka messages, retrying in 1 second ...")
|
118
135
|
sleep 1
|
119
136
|
retry
|
120
137
|
end
|
@@ -123,31 +140,52 @@ load_position_file
|
|
123
140
|
end
|
124
141
|
|
125
142
|
def kafka_produce(path, buffer, offset)
|
143
|
+
truncated = nil
|
144
|
+
|
126
145
|
messages = []
|
127
|
-
buffer.
|
128
|
-
|
146
|
+
while msg = buffer.shift
|
147
|
+
unless msg[-1] == "\n"
|
148
|
+
if buffer.empty?
|
149
|
+
truncated = msg
|
150
|
+
else
|
151
|
+
msg = msg + buffer.shift
|
152
|
+
messages << Poseidon::MessageToSend.new(@files[path][:topic], msg.strip)
|
153
|
+
end
|
154
|
+
else
|
155
|
+
messages << Poseidon::MessageToSend.new(@files[path][:topic], msg.strip)
|
156
|
+
end
|
129
157
|
end
|
130
158
|
@producer_queue.push({ :path => path, :messages => messages, :offset => offset})
|
159
|
+
|
160
|
+
truncated
|
131
161
|
end
|
132
162
|
|
133
163
|
def tailf(path)
|
134
164
|
file = File.open(path, 'r')
|
135
165
|
@files[path][:fd] = file
|
136
166
|
file.seek(@files[path][:offset], IO::SEEK_SET)
|
167
|
+
|
168
|
+
truncated = nil
|
137
169
|
loop do #Fast read file in batches until we reach EOF upon which we start the tailf modify watcher
|
138
170
|
batch = file.each_line.take(@max_batch_lines)
|
139
171
|
break if batch.empty?
|
140
|
-
|
172
|
+
batch.unshift(truncated + batch.shift) if truncated
|
173
|
+
truncated = kafka_produce(path, batch, file.pos)
|
141
174
|
end
|
175
|
+
|
176
|
+
mutex = Mutex.new
|
142
177
|
@tailf_notifier.watch(path, :modify) do |event|
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
178
|
+
mutex.synchronize do
|
179
|
+
unless file.closed?
|
180
|
+
loop do
|
181
|
+
batch = file.each_line.take(@max_batch_lines)
|
182
|
+
break if batch.empty?
|
183
|
+
batch.unshift(truncated + batch.shift) if truncated
|
184
|
+
truncated = kafka_produce(path, batch, file.pos)
|
185
|
+
end
|
186
|
+
else
|
187
|
+
@logger.warn("watcher got modify event on closed file #{event.name}")
|
148
188
|
end
|
149
|
-
else
|
150
|
-
puts "watcher got modify event on closed file #{event.name}"
|
151
189
|
end
|
152
190
|
end
|
153
191
|
end
|
@@ -192,8 +230,12 @@ def delete_old_tailed_files
|
|
192
230
|
@files.each_key do |path|
|
193
231
|
unless path.match(Regexp.new(Time.now.strftime(@files[path][:pattern])))
|
194
232
|
if File.exists?(path) and File.stat(path).ino == @files[path][:inode] and File.stat(path).size == @files[path][:offset] and (Time.now - File.stat(path).mtime) > 30
|
195
|
-
|
233
|
+
@logger.info("Deleteing old time pattern fully kafka produced file #{path}")
|
196
234
|
FileUtils.rm_r(path)
|
235
|
+
if @settings[:tailf].has_key?(:post_delete_command)
|
236
|
+
command = Mixlib::ShellOut.new(@settings[:tailf][:post_delete_command])
|
237
|
+
command.run_command
|
238
|
+
end
|
197
239
|
end
|
198
240
|
end
|
199
241
|
end
|
@@ -214,7 +256,7 @@ Thread.new { loop { @timers.wait } }
|
|
214
256
|
if match
|
215
257
|
unless File.directory?(path)
|
216
258
|
unless @threads.has_key?(path)
|
217
|
-
|
259
|
+
@logger.info("File #{event.name} was created in / moved into watched dir #{dir}")
|
218
260
|
@files[path] = { :pattern => match[:pattern], :topic => match[:topic], :inode => File.stat(path).ino, :offset => 0 }
|
219
261
|
@threads[path] = Thread.new { tailf(path) }
|
220
262
|
end
|
@@ -227,7 +269,7 @@ Thread.new { loop { @timers.wait } }
|
|
227
269
|
@mutex.synchronize do
|
228
270
|
path = "#{dir}/#{event.name}"
|
229
271
|
if @threads.has_key?(path)
|
230
|
-
|
272
|
+
@logger.info("File #{event.name} was deleted / moved from watched dir #{dir}")
|
231
273
|
if @threads[path].alive?
|
232
274
|
@threads[path].terminate
|
233
275
|
@threads[path].join
|
data/lib/tailf2kafka/version.rb
CHANGED
data/tailf2kafka.gemspec
CHANGED
@@ -13,7 +13,7 @@ Gem::Specification.new do |s|
|
|
13
13
|
s.summary = "Watch and tail files with specified time based patterns and push them to kafka"
|
14
14
|
s.description = "Watch and tail files with specified time based patterns and push them to kafka"
|
15
15
|
s.license = 'MIT'
|
16
|
-
s.has_rdoc = false
|
16
|
+
s.has_rdoc = false
|
17
17
|
|
18
18
|
s.add_dependency('poseidon')
|
19
19
|
s.add_dependency('snappy')
|
@@ -21,6 +21,7 @@ Gem::Specification.new do |s|
|
|
21
21
|
s.add_dependency('schash')
|
22
22
|
s.add_dependency('rb-inotify')
|
23
23
|
s.add_dependency('timers')
|
24
|
+
s.add_dependency('mixlib-shellout')
|
24
25
|
|
25
26
|
s.add_development_dependency('rake')
|
26
27
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tailf2kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexander Piavlo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10-
|
11
|
+
date: 2015-10-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: poseidon
|
@@ -94,6 +94,20 @@ dependencies:
|
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: mixlib-shellout
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
97
111
|
- !ruby/object:Gem::Dependency
|
98
112
|
name: rake
|
99
113
|
requirement: !ruby/object:Gem::Requirement
|