tailf2kafka 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1a5efc928874d8b1900268152cf26bc6b6382491
4
- data.tar.gz: 8814ee2e23060c01064ae53e468c7514e9ec0fb4
3
+ metadata.gz: 517eb3cc0c07e25383e56e63d10a09a12c2b5835
4
+ data.tar.gz: 69136fb7321b932d4aee23368f1b8b363ebc9fa2
5
5
  SHA512:
6
- metadata.gz: bc6f6ff3b03436364582f1279f62aa9862b61a8ae9c7e0217ed420cec647ee3b284031ef6d19c42c0ecae878e886cfc68ee15c29ce9c8a7dee773b232efd80c2
7
- data.tar.gz: b145745c6f763d760827e85050722f8a706794f64dc86b005b6040f6470bd95a9abd515a45938ab867f883fce0cfb0bf0cc98bac652049787e1ba76a8215a07b
6
+ metadata.gz: e82b708355566b9902c765fbac0c811e4fbc3695af51afe86cc5a617ba3cbdbc58e50c73769fbabf687b51e99d8a686d1ab9f768bb35444c11d1a82e7775ca98
7
+ data.tar.gz: a44b90c9a612b18b1da300622d61e923c72afe566d881103b07a587238a03a6e695ec1b3af51850be39f8895baf67f836e7e83f38662228afe93723d70a714ee
data/bin/tailf2kafka CHANGED
@@ -9,6 +9,8 @@ require 'rb-inotify'
9
9
  require 'timers'
10
10
  require 'socket'
11
11
  require 'fileutils'
12
+ require 'logger'
13
+ require 'mixlib/shellout'
12
14
 
13
15
  $stdout.sync = true
14
16
 
@@ -16,9 +18,21 @@ Thread.abort_on_exception = true
16
18
 
17
19
  @config = nil
18
20
 
21
+ loglevels = {
22
+ :debug => Logger::DEBUG,
23
+ :info => Logger::INFO,
24
+ :warn => Logger::WARN,
25
+ :error => Logger::Error,
26
+ :fatal => Logger::FATAL,
27
+ :unknown => Logger::UNKNOWN
28
+ }
29
+
30
+ @loglevel = Logger::INFO
31
+
19
32
  opts = OptionParser.new
20
33
  opts.banner = "Usage: #{$0} [options]"
21
34
  opts.on( '--config PATH', String, 'Path to settings config' ) { |c| @config = c }
35
+ opts.on( '--log-level [LEVEL]', [:debug, :info, :warn, :error, :fatal, :unknown] ) { |l| @loglevel = loglevels[l] }
22
36
  opts.on( '-h', '--help', 'Display this screen' ) { puts opts; exit 0 }
23
37
  opts.parse!
24
38
 
@@ -27,6 +41,8 @@ unless @config
27
41
  exit 1
28
42
  end
29
43
 
44
+ @logger = Logger.new(STDOUT)
45
+
30
46
  @settings = YAML.load_file(@config).symbolize_keys(true)
31
47
 
32
48
  validator = Schash::Validator.new do
@@ -43,6 +59,7 @@ validator = Schash::Validator.new do
43
59
  max_batch_lines: integer,
44
60
  from_begining: boolean,
45
61
  delete_old_tailed_files: optional(boolean),
62
+ post_delete_command: optional(string),
46
63
  },
47
64
  kafka: {
48
65
  brokers: array_of(string),
@@ -53,8 +70,8 @@ validator = Schash::Validator.new do
53
70
  end
54
71
 
55
72
  unless validator.validate(@settings).empty?
56
- puts "ERROR: bad settings "
57
- pp validator.validate(@settings)
73
+ @logger.error("ERROR: bad settings")
74
+ @logger.error(validator.validate(@settings))
58
75
  exit 1
59
76
  end
60
77
 
@@ -114,7 +131,7 @@ load_position_file
114
131
  begin
115
132
  @producer.send_messages(batch[:messages]) if @produce
116
133
  rescue Poseidon::Errors::UnableToFetchMetadata
117
- puts "Got Poseidon::Errors::UnableToFetchMetadata while trying to produce kafka messages, retrying in 1 second ..."
134
+ @logger.warn("Got Poseidon::Errors::UnableToFetchMetadata while trying to produce kafka messages, retrying in 1 second ...")
118
135
  sleep 1
119
136
  retry
120
137
  end
@@ -123,31 +140,52 @@ load_position_file
123
140
  end
124
141
 
125
142
  def kafka_produce(path, buffer, offset)
143
+ truncated = nil
144
+
126
145
  messages = []
127
- buffer.each do |msg|
128
- messages << Poseidon::MessageToSend.new(@files[path][:topic], msg.strip)
146
+ while msg = buffer.shift
147
+ unless msg[-1] == "\n"
148
+ if buffer.empty?
149
+ truncated = msg
150
+ else
151
+ msg = msg + buffer.shift
152
+ messages << Poseidon::MessageToSend.new(@files[path][:topic], msg.strip)
153
+ end
154
+ else
155
+ messages << Poseidon::MessageToSend.new(@files[path][:topic], msg.strip)
156
+ end
129
157
  end
130
158
  @producer_queue.push({ :path => path, :messages => messages, :offset => offset})
159
+
160
+ truncated
131
161
  end
132
162
 
133
163
  def tailf(path)
134
164
  file = File.open(path, 'r')
135
165
  @files[path][:fd] = file
136
166
  file.seek(@files[path][:offset], IO::SEEK_SET)
167
+
168
+ truncated = nil
137
169
  loop do #Fast read file in batches until we reach EOF upon which we start the tailf modify watcher
138
170
  batch = file.each_line.take(@max_batch_lines)
139
171
  break if batch.empty?
140
- kafka_produce(path, batch, file.pos)
172
+ batch.unshift(truncated + batch.shift) if truncated
173
+ truncated = kafka_produce(path, batch, file.pos)
141
174
  end
175
+
176
+ mutex = Mutex.new
142
177
  @tailf_notifier.watch(path, :modify) do |event|
143
- unless file.closed?
144
- loop do
145
- batch = file.each_line.take(@max_batch_lines)
146
- break if batch.empty?
147
- kafka_produce(path, batch, file.pos)
178
+ mutex.synchronize do
179
+ unless file.closed?
180
+ loop do
181
+ batch = file.each_line.take(@max_batch_lines)
182
+ break if batch.empty?
183
+ batch.unshift(truncated + batch.shift) if truncated
184
+ truncated = kafka_produce(path, batch, file.pos)
185
+ end
186
+ else
187
+ @logger.warn("watcher got modify event on closed file #{event.name}")
148
188
  end
149
- else
150
- puts "watcher got modify event on closed file #{event.name}"
151
189
  end
152
190
  end
153
191
  end
@@ -192,8 +230,12 @@ def delete_old_tailed_files
192
230
  @files.each_key do |path|
193
231
  unless path.match(Regexp.new(Time.now.strftime(@files[path][:pattern])))
194
232
  if File.exists?(path) and File.stat(path).ino == @files[path][:inode] and File.stat(path).size == @files[path][:offset] and (Time.now - File.stat(path).mtime) > 30
195
- puts "Deleteing old time pattern fully kafka produced file #{path}"
233
+ @logger.info("Deleteing old time pattern fully kafka produced file #{path}")
196
234
  FileUtils.rm_r(path)
235
+ if @settings[:tailf].has_key?(:post_delete_command)
236
+ command = Mixlib::ShellOut.new(@settings[:tailf][:post_delete_command])
237
+ command.run_command
238
+ end
197
239
  end
198
240
  end
199
241
  end
@@ -214,7 +256,7 @@ Thread.new { loop { @timers.wait } }
214
256
  if match
215
257
  unless File.directory?(path)
216
258
  unless @threads.has_key?(path)
217
- puts "File #{event.name} was created in / moved into watched dir #{dir}"
259
+ @logger.info("File #{event.name} was created in / moved into watched dir #{dir}")
218
260
  @files[path] = { :pattern => match[:pattern], :topic => match[:topic], :inode => File.stat(path).ino, :offset => 0 }
219
261
  @threads[path] = Thread.new { tailf(path) }
220
262
  end
@@ -227,7 +269,7 @@ Thread.new { loop { @timers.wait } }
227
269
  @mutex.synchronize do
228
270
  path = "#{dir}/#{event.name}"
229
271
  if @threads.has_key?(path)
230
- puts "File #{event.name} was deleted / moved from watched dir #{dir}"
272
+ @logger.info("File #{event.name} was deleted / moved from watched dir #{dir}")
231
273
  if @threads[path].alive?
232
274
  @threads[path].terminate
233
275
  @threads[path].join
@@ -1,3 +1,3 @@
1
1
  module Tailf2Kafka
2
- VERSION ||= '0.1.4'
2
+ VERSION ||= '0.1.5'
3
3
  end
data/tailf2kafka.gemspec CHANGED
@@ -13,7 +13,7 @@ Gem::Specification.new do |s|
13
13
  s.summary = "Watch and tail files with specified time based patterns and push them to kafka"
14
14
  s.description = "Watch and tail files with specified time based patterns and push them to kafka"
15
15
  s.license = 'MIT'
16
- s.has_rdoc = false
16
+ s.has_rdoc = false
17
17
 
18
18
  s.add_dependency('poseidon')
19
19
  s.add_dependency('snappy')
@@ -21,6 +21,7 @@ Gem::Specification.new do |s|
21
21
  s.add_dependency('schash')
22
22
  s.add_dependency('rb-inotify')
23
23
  s.add_dependency('timers')
24
+ s.add_dependency('mixlib-shellout')
24
25
 
25
26
  s.add_development_dependency('rake')
26
27
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tailf2kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Piavlo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-19 00:00:00.000000000 Z
11
+ date: 2015-10-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: poseidon
@@ -94,6 +94,20 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: mixlib-shellout
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: rake
99
113
  requirement: !ruby/object:Gem::Requirement