tailf2kafka 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/tailf2kafka +58 -16
- data/lib/tailf2kafka/version.rb +1 -1
- data/tailf2kafka.gemspec +2 -1
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 517eb3cc0c07e25383e56e63d10a09a12c2b5835
|
4
|
+
data.tar.gz: 69136fb7321b932d4aee23368f1b8b363ebc9fa2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e82b708355566b9902c765fbac0c811e4fbc3695af51afe86cc5a617ba3cbdbc58e50c73769fbabf687b51e99d8a686d1ab9f768bb35444c11d1a82e7775ca98
|
7
|
+
data.tar.gz: a44b90c9a612b18b1da300622d61e923c72afe566d881103b07a587238a03a6e695ec1b3af51850be39f8895baf67f836e7e83f38662228afe93723d70a714ee
|
data/bin/tailf2kafka
CHANGED
@@ -9,6 +9,8 @@ require 'rb-inotify'
|
|
9
9
|
require 'timers'
|
10
10
|
require 'socket'
|
11
11
|
require 'fileutils'
|
12
|
+
require 'logger'
|
13
|
+
require 'mixlib/shellout'
|
12
14
|
|
13
15
|
$stdout.sync = true
|
14
16
|
|
@@ -16,9 +18,21 @@ Thread.abort_on_exception = true
|
|
16
18
|
|
17
19
|
@config = nil
|
18
20
|
|
21
|
+
loglevels = {
|
22
|
+
:debug => Logger::DEBUG,
|
23
|
+
:info => Logger::INFO,
|
24
|
+
:warn => Logger::WARN,
|
25
|
+
:error => Logger::Error,
|
26
|
+
:fatal => Logger::FATAL,
|
27
|
+
:unknown => Logger::UNKNOWN
|
28
|
+
}
|
29
|
+
|
30
|
+
@loglevel = Logger::INFO
|
31
|
+
|
19
32
|
opts = OptionParser.new
|
20
33
|
opts.banner = "Usage: #{$0} [options]"
|
21
34
|
opts.on( '--config PATH', String, 'Path to settings config' ) { |c| @config = c }
|
35
|
+
opts.on( '--log-level [LEVEL]', [:debug, :info, :warn, :error, :fatal, :unknown] ) { |l| @loglevel = loglevels[l] }
|
22
36
|
opts.on( '-h', '--help', 'Display this screen' ) { puts opts; exit 0 }
|
23
37
|
opts.parse!
|
24
38
|
|
@@ -27,6 +41,8 @@ unless @config
|
|
27
41
|
exit 1
|
28
42
|
end
|
29
43
|
|
44
|
+
@logger = Logger.new(STDOUT)
|
45
|
+
|
30
46
|
@settings = YAML.load_file(@config).symbolize_keys(true)
|
31
47
|
|
32
48
|
validator = Schash::Validator.new do
|
@@ -43,6 +59,7 @@ validator = Schash::Validator.new do
|
|
43
59
|
max_batch_lines: integer,
|
44
60
|
from_begining: boolean,
|
45
61
|
delete_old_tailed_files: optional(boolean),
|
62
|
+
post_delete_command: optional(string),
|
46
63
|
},
|
47
64
|
kafka: {
|
48
65
|
brokers: array_of(string),
|
@@ -53,8 +70,8 @@ validator = Schash::Validator.new do
|
|
53
70
|
end
|
54
71
|
|
55
72
|
unless validator.validate(@settings).empty?
|
56
|
-
|
57
|
-
|
73
|
+
@logger.error("ERROR: bad settings")
|
74
|
+
@logger.error(validator.validate(@settings))
|
58
75
|
exit 1
|
59
76
|
end
|
60
77
|
|
@@ -114,7 +131,7 @@ load_position_file
|
|
114
131
|
begin
|
115
132
|
@producer.send_messages(batch[:messages]) if @produce
|
116
133
|
rescue Poseidon::Errors::UnableToFetchMetadata
|
117
|
-
|
134
|
+
@logger.warn("Got Poseidon::Errors::UnableToFetchMetadata while trying to produce kafka messages, retrying in 1 second ...")
|
118
135
|
sleep 1
|
119
136
|
retry
|
120
137
|
end
|
@@ -123,31 +140,52 @@ load_position_file
|
|
123
140
|
end
|
124
141
|
|
125
142
|
def kafka_produce(path, buffer, offset)
|
143
|
+
truncated = nil
|
144
|
+
|
126
145
|
messages = []
|
127
|
-
buffer.
|
128
|
-
|
146
|
+
while msg = buffer.shift
|
147
|
+
unless msg[-1] == "\n"
|
148
|
+
if buffer.empty?
|
149
|
+
truncated = msg
|
150
|
+
else
|
151
|
+
msg = msg + buffer.shift
|
152
|
+
messages << Poseidon::MessageToSend.new(@files[path][:topic], msg.strip)
|
153
|
+
end
|
154
|
+
else
|
155
|
+
messages << Poseidon::MessageToSend.new(@files[path][:topic], msg.strip)
|
156
|
+
end
|
129
157
|
end
|
130
158
|
@producer_queue.push({ :path => path, :messages => messages, :offset => offset})
|
159
|
+
|
160
|
+
truncated
|
131
161
|
end
|
132
162
|
|
133
163
|
def tailf(path)
|
134
164
|
file = File.open(path, 'r')
|
135
165
|
@files[path][:fd] = file
|
136
166
|
file.seek(@files[path][:offset], IO::SEEK_SET)
|
167
|
+
|
168
|
+
truncated = nil
|
137
169
|
loop do #Fast read file in batches until we reach EOF upon which we start the tailf modify watcher
|
138
170
|
batch = file.each_line.take(@max_batch_lines)
|
139
171
|
break if batch.empty?
|
140
|
-
|
172
|
+
batch.unshift(truncated + batch.shift) if truncated
|
173
|
+
truncated = kafka_produce(path, batch, file.pos)
|
141
174
|
end
|
175
|
+
|
176
|
+
mutex = Mutex.new
|
142
177
|
@tailf_notifier.watch(path, :modify) do |event|
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
178
|
+
mutex.synchronize do
|
179
|
+
unless file.closed?
|
180
|
+
loop do
|
181
|
+
batch = file.each_line.take(@max_batch_lines)
|
182
|
+
break if batch.empty?
|
183
|
+
batch.unshift(truncated + batch.shift) if truncated
|
184
|
+
truncated = kafka_produce(path, batch, file.pos)
|
185
|
+
end
|
186
|
+
else
|
187
|
+
@logger.warn("watcher got modify event on closed file #{event.name}")
|
148
188
|
end
|
149
|
-
else
|
150
|
-
puts "watcher got modify event on closed file #{event.name}"
|
151
189
|
end
|
152
190
|
end
|
153
191
|
end
|
@@ -192,8 +230,12 @@ def delete_old_tailed_files
|
|
192
230
|
@files.each_key do |path|
|
193
231
|
unless path.match(Regexp.new(Time.now.strftime(@files[path][:pattern])))
|
194
232
|
if File.exists?(path) and File.stat(path).ino == @files[path][:inode] and File.stat(path).size == @files[path][:offset] and (Time.now - File.stat(path).mtime) > 30
|
195
|
-
|
233
|
+
@logger.info("Deleteing old time pattern fully kafka produced file #{path}")
|
196
234
|
FileUtils.rm_r(path)
|
235
|
+
if @settings[:tailf].has_key?(:post_delete_command)
|
236
|
+
command = Mixlib::ShellOut.new(@settings[:tailf][:post_delete_command])
|
237
|
+
command.run_command
|
238
|
+
end
|
197
239
|
end
|
198
240
|
end
|
199
241
|
end
|
@@ -214,7 +256,7 @@ Thread.new { loop { @timers.wait } }
|
|
214
256
|
if match
|
215
257
|
unless File.directory?(path)
|
216
258
|
unless @threads.has_key?(path)
|
217
|
-
|
259
|
+
@logger.info("File #{event.name} was created in / moved into watched dir #{dir}")
|
218
260
|
@files[path] = { :pattern => match[:pattern], :topic => match[:topic], :inode => File.stat(path).ino, :offset => 0 }
|
219
261
|
@threads[path] = Thread.new { tailf(path) }
|
220
262
|
end
|
@@ -227,7 +269,7 @@ Thread.new { loop { @timers.wait } }
|
|
227
269
|
@mutex.synchronize do
|
228
270
|
path = "#{dir}/#{event.name}"
|
229
271
|
if @threads.has_key?(path)
|
230
|
-
|
272
|
+
@logger.info("File #{event.name} was deleted / moved from watched dir #{dir}")
|
231
273
|
if @threads[path].alive?
|
232
274
|
@threads[path].terminate
|
233
275
|
@threads[path].join
|
data/lib/tailf2kafka/version.rb
CHANGED
data/tailf2kafka.gemspec
CHANGED
@@ -13,7 +13,7 @@ Gem::Specification.new do |s|
|
|
13
13
|
s.summary = "Watch and tail files with specified time based patterns and push them to kafka"
|
14
14
|
s.description = "Watch and tail files with specified time based patterns and push them to kafka"
|
15
15
|
s.license = 'MIT'
|
16
|
-
s.has_rdoc = false
|
16
|
+
s.has_rdoc = false
|
17
17
|
|
18
18
|
s.add_dependency('poseidon')
|
19
19
|
s.add_dependency('snappy')
|
@@ -21,6 +21,7 @@ Gem::Specification.new do |s|
|
|
21
21
|
s.add_dependency('schash')
|
22
22
|
s.add_dependency('rb-inotify')
|
23
23
|
s.add_dependency('timers')
|
24
|
+
s.add_dependency('mixlib-shellout')
|
24
25
|
|
25
26
|
s.add_development_dependency('rake')
|
26
27
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tailf2kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexander Piavlo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10-
|
11
|
+
date: 2015-10-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: poseidon
|
@@ -94,6 +94,20 @@ dependencies:
|
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: mixlib-shellout
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
97
111
|
- !ruby/object:Gem::Dependency
|
98
112
|
name: rake
|
99
113
|
requirement: !ruby/object:Gem::Requirement
|