tailf2kafka 0.1.4 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1a5efc928874d8b1900268152cf26bc6b6382491
4
- data.tar.gz: 8814ee2e23060c01064ae53e468c7514e9ec0fb4
3
+ metadata.gz: 517eb3cc0c07e25383e56e63d10a09a12c2b5835
4
+ data.tar.gz: 69136fb7321b932d4aee23368f1b8b363ebc9fa2
5
5
  SHA512:
6
- metadata.gz: bc6f6ff3b03436364582f1279f62aa9862b61a8ae9c7e0217ed420cec647ee3b284031ef6d19c42c0ecae878e886cfc68ee15c29ce9c8a7dee773b232efd80c2
7
- data.tar.gz: b145745c6f763d760827e85050722f8a706794f64dc86b005b6040f6470bd95a9abd515a45938ab867f883fce0cfb0bf0cc98bac652049787e1ba76a8215a07b
6
+ metadata.gz: e82b708355566b9902c765fbac0c811e4fbc3695af51afe86cc5a617ba3cbdbc58e50c73769fbabf687b51e99d8a686d1ab9f768bb35444c11d1a82e7775ca98
7
+ data.tar.gz: a44b90c9a612b18b1da300622d61e923c72afe566d881103b07a587238a03a6e695ec1b3af51850be39f8895baf67f836e7e83f38662228afe93723d70a714ee
data/bin/tailf2kafka CHANGED
@@ -9,6 +9,8 @@ require 'rb-inotify'
9
9
  require 'timers'
10
10
  require 'socket'
11
11
  require 'fileutils'
12
+ require 'logger'
13
+ require 'mixlib/shellout'
12
14
 
13
15
  $stdout.sync = true
14
16
 
@@ -16,9 +18,21 @@ Thread.abort_on_exception = true
16
18
 
17
19
  @config = nil
18
20
 
21
+ loglevels = {
22
+ :debug => Logger::DEBUG,
23
+ :info => Logger::INFO,
24
+ :warn => Logger::WARN,
25
+ :error => Logger::Error,
26
+ :fatal => Logger::FATAL,
27
+ :unknown => Logger::UNKNOWN
28
+ }
29
+
30
+ @loglevel = Logger::INFO
31
+
19
32
  opts = OptionParser.new
20
33
  opts.banner = "Usage: #{$0} [options]"
21
34
  opts.on( '--config PATH', String, 'Path to settings config' ) { |c| @config = c }
35
+ opts.on( '--log-level [LEVEL]', [:debug, :info, :warn, :error, :fatal, :unknown] ) { |l| @loglevel = loglevels[l] }
22
36
  opts.on( '-h', '--help', 'Display this screen' ) { puts opts; exit 0 }
23
37
  opts.parse!
24
38
 
@@ -27,6 +41,8 @@ unless @config
27
41
  exit 1
28
42
  end
29
43
 
44
+ @logger = Logger.new(STDOUT)
45
+
30
46
  @settings = YAML.load_file(@config).symbolize_keys(true)
31
47
 
32
48
  validator = Schash::Validator.new do
@@ -43,6 +59,7 @@ validator = Schash::Validator.new do
43
59
  max_batch_lines: integer,
44
60
  from_begining: boolean,
45
61
  delete_old_tailed_files: optional(boolean),
62
+ post_delete_command: optional(string),
46
63
  },
47
64
  kafka: {
48
65
  brokers: array_of(string),
@@ -53,8 +70,8 @@ validator = Schash::Validator.new do
53
70
  end
54
71
 
55
72
  unless validator.validate(@settings).empty?
56
- puts "ERROR: bad settings "
57
- pp validator.validate(@settings)
73
+ @logger.error("ERROR: bad settings")
74
+ @logger.error(validator.validate(@settings))
58
75
  exit 1
59
76
  end
60
77
 
@@ -114,7 +131,7 @@ load_position_file
114
131
  begin
115
132
  @producer.send_messages(batch[:messages]) if @produce
116
133
  rescue Poseidon::Errors::UnableToFetchMetadata
117
- puts "Got Poseidon::Errors::UnableToFetchMetadata while trying to produce kafka messages, retrying in 1 second ..."
134
+ @logger.warn("Got Poseidon::Errors::UnableToFetchMetadata while trying to produce kafka messages, retrying in 1 second ...")
118
135
  sleep 1
119
136
  retry
120
137
  end
@@ -123,31 +140,52 @@ load_position_file
123
140
  end
124
141
 
125
142
  def kafka_produce(path, buffer, offset)
143
+ truncated = nil
144
+
126
145
  messages = []
127
- buffer.each do |msg|
128
- messages << Poseidon::MessageToSend.new(@files[path][:topic], msg.strip)
146
+ while msg = buffer.shift
147
+ unless msg[-1] == "\n"
148
+ if buffer.empty?
149
+ truncated = msg
150
+ else
151
+ msg = msg + buffer.shift
152
+ messages << Poseidon::MessageToSend.new(@files[path][:topic], msg.strip)
153
+ end
154
+ else
155
+ messages << Poseidon::MessageToSend.new(@files[path][:topic], msg.strip)
156
+ end
129
157
  end
130
158
  @producer_queue.push({ :path => path, :messages => messages, :offset => offset})
159
+
160
+ truncated
131
161
  end
132
162
 
133
163
  def tailf(path)
134
164
  file = File.open(path, 'r')
135
165
  @files[path][:fd] = file
136
166
  file.seek(@files[path][:offset], IO::SEEK_SET)
167
+
168
+ truncated = nil
137
169
  loop do #Fast read file in batches until we reach EOF upon which we start the tailf modify watcher
138
170
  batch = file.each_line.take(@max_batch_lines)
139
171
  break if batch.empty?
140
- kafka_produce(path, batch, file.pos)
172
+ batch.unshift(truncated + batch.shift) if truncated
173
+ truncated = kafka_produce(path, batch, file.pos)
141
174
  end
175
+
176
+ mutex = Mutex.new
142
177
  @tailf_notifier.watch(path, :modify) do |event|
143
- unless file.closed?
144
- loop do
145
- batch = file.each_line.take(@max_batch_lines)
146
- break if batch.empty?
147
- kafka_produce(path, batch, file.pos)
178
+ mutex.synchronize do
179
+ unless file.closed?
180
+ loop do
181
+ batch = file.each_line.take(@max_batch_lines)
182
+ break if batch.empty?
183
+ batch.unshift(truncated + batch.shift) if truncated
184
+ truncated = kafka_produce(path, batch, file.pos)
185
+ end
186
+ else
187
+ @logger.warn("watcher got modify event on closed file #{event.name}")
148
188
  end
149
- else
150
- puts "watcher got modify event on closed file #{event.name}"
151
189
  end
152
190
  end
153
191
  end
@@ -192,8 +230,12 @@ def delete_old_tailed_files
192
230
  @files.each_key do |path|
193
231
  unless path.match(Regexp.new(Time.now.strftime(@files[path][:pattern])))
194
232
  if File.exists?(path) and File.stat(path).ino == @files[path][:inode] and File.stat(path).size == @files[path][:offset] and (Time.now - File.stat(path).mtime) > 30
195
- puts "Deleteing old time pattern fully kafka produced file #{path}"
233
+ @logger.info("Deleteing old time pattern fully kafka produced file #{path}")
196
234
  FileUtils.rm_r(path)
235
+ if @settings[:tailf].has_key?(:post_delete_command)
236
+ command = Mixlib::ShellOut.new(@settings[:tailf][:post_delete_command])
237
+ command.run_command
238
+ end
197
239
  end
198
240
  end
199
241
  end
@@ -214,7 +256,7 @@ Thread.new { loop { @timers.wait } }
214
256
  if match
215
257
  unless File.directory?(path)
216
258
  unless @threads.has_key?(path)
217
- puts "File #{event.name} was created in / moved into watched dir #{dir}"
259
+ @logger.info("File #{event.name} was created in / moved into watched dir #{dir}")
218
260
  @files[path] = { :pattern => match[:pattern], :topic => match[:topic], :inode => File.stat(path).ino, :offset => 0 }
219
261
  @threads[path] = Thread.new { tailf(path) }
220
262
  end
@@ -227,7 +269,7 @@ Thread.new { loop { @timers.wait } }
227
269
  @mutex.synchronize do
228
270
  path = "#{dir}/#{event.name}"
229
271
  if @threads.has_key?(path)
230
- puts "File #{event.name} was deleted / moved from watched dir #{dir}"
272
+ @logger.info("File #{event.name} was deleted / moved from watched dir #{dir}")
231
273
  if @threads[path].alive?
232
274
  @threads[path].terminate
233
275
  @threads[path].join
@@ -1,3 +1,3 @@
1
1
  module Tailf2Kafka
2
- VERSION ||= '0.1.4'
2
+ VERSION ||= '0.1.5'
3
3
  end
data/tailf2kafka.gemspec CHANGED
@@ -13,7 +13,7 @@ Gem::Specification.new do |s|
13
13
  s.summary = "Watch and tail files with specified time based patterns and push them to kafka"
14
14
  s.description = "Watch and tail files with specified time based patterns and push them to kafka"
15
15
  s.license = 'MIT'
16
- s.has_rdoc = false
16
+ s.has_rdoc = false
17
17
 
18
18
  s.add_dependency('poseidon')
19
19
  s.add_dependency('snappy')
@@ -21,6 +21,7 @@ Gem::Specification.new do |s|
21
21
  s.add_dependency('schash')
22
22
  s.add_dependency('rb-inotify')
23
23
  s.add_dependency('timers')
24
+ s.add_dependency('mixlib-shellout')
24
25
 
25
26
  s.add_development_dependency('rake')
26
27
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tailf2kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Piavlo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-19 00:00:00.000000000 Z
11
+ date: 2015-10-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: poseidon
@@ -94,6 +94,20 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: mixlib-shellout
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: rake
99
113
  requirement: !ruby/object:Gem::Requirement