tailf2kafka 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 0511b332f01cad011509e5312d35a44406996394
4
+ data.tar.gz: 1ca828eed00c80a7896b3cf384292e577d8832f5
5
+ SHA512:
6
+ metadata.gz: bbe8203735d60e6483a5aef2f0a3f6c0473828753fa5d4649313b56a147b96e63fa300bec9f6a5900995c6c56578b69e4db6af88d572c1e946fbb8a687508f56
7
+ data.tar.gz: 1e9c1d14ad055d267a005208a5a2a59bd9a5786984bda8b323aff08b6703a75c596761ce688c9ec1552d8fd3a15dc6e5b78e1090ab7e9dd740890b937ab85997
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2015 Supersonic LTD
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,51 @@
1
+ # Tailf2Kafka
2
+
3
+ Watch and tail files in dirs with specified filename time based patterns and push them to kafka.
4
+
5
+
6
+ ## Installation
7
+
8
+ Add this line to your application's Gemfile:
9
+
10
+ gem 'tailf2kafka'
11
+
12
+ And then execute:
13
+
14
+ $ bundle
15
+
16
+ Or install it yourself as:
17
+
18
+ $ gem install tailf2kafka
19
+
20
+ ## Usage
21
+
22
+ $ tailf2kafka -h
23
+ Usage: tailf2kafka [options]
24
+ --config PATH Path to settings config
25
+ -h, --help Display this screen
26
+ $
27
+
28
+ ## Config
29
+
30
+ tailf:
31
+ files:
32
+ - topic: haproxy
33
+ prefix: /var/log/haproxy/haproxy
34
+ time_pattern: ".%Y-%m-%d.%H"
35
+ position_file: "/var/lib/haproxy/tail2kafka.offsets"
36
+ flush_interval: 1
37
+ max_batch_lines: 1024
38
+ from_begining: true
39
+ delete_old_tailed_files: true
40
+ kafka:
41
+ brokers: ["broker1:9092", "broker2:9092", "broker3:9092"]
42
+ producer_type: sync
43
+
44
+ ## Contributing
45
+
46
+ 1. Fork it
47
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
48
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
49
+ 4. Push to the branch (`git push origin my-new-feature`)
50
+ 5. Create new Pull Request
51
+ 6. Go to 1
data/bin/tailf2kafka ADDED
@@ -0,0 +1,244 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'poseidon'
5
+ require 'yaml'
6
+ require 'hash_symbolizer'
7
+ require 'schash'
8
+ require 'rb-inotify'
9
+ require 'timers'
10
+ require 'socket'
11
+ require 'fileutils'
12
+
13
+ $stdout.sync = true
14
+
15
+ Thread.abort_on_exception = true
16
+
17
+ @config = nil
18
+
19
+ opts = OptionParser.new
20
+ opts.banner = "Usage: #{$0} [options]"
21
+ opts.on( '--config PATH', String, 'Path to settings config' ) { |c| @config = c }
22
+ opts.on( '-h', '--help', 'Display this screen' ) { puts opts; exit 0 }
23
+ opts.parse!
24
+
25
+ unless @config
26
+ puts opts
27
+ exit 1
28
+ end
29
+
30
+ @settings = YAML.load_file(@config).symbolize_keys(true)
31
+
32
+ validator = Schash::Validator.new do
33
+ {
34
+ tailf: {
35
+ files: array_of({
36
+ topic: string,
37
+ prefix: string,
38
+ suffix: optional(string),
39
+ time_pattern: string,
40
+ }),
41
+ position_file: string,
42
+ flush_interval: integer,
43
+ max_batch_lines: integer,
44
+ from_begining: boolean,
45
+ delete_old_tailed_files: optional(boolean),
46
+ },
47
+ kafka: {
48
+ brokers: array_of(string),
49
+ producer_type: match(/^(sync|async)$/),
50
+ produce: optional(boolean),
51
+ },
52
+ }
53
+ end
54
+
55
+ unless validator.validate(@settings).empty?
56
+ puts "ERROR: bad settings "
57
+ pp validator.validate(@settings)
58
+ exit 1
59
+ end
60
+
61
+ @settings[:tailf][:files] = @settings[:tailf][:files].map{|h| h.symbolize_keys(true)}
62
+
63
+ @mutex = Mutex.new
64
+
65
+ @create_notifier = INotify::Notifier.new
66
+ @delete_notifier = INotify::Notifier.new
67
+ @tailf_notifier = INotify::Notifier.new
68
+
69
+ @dirs = {}
70
+ @files = {}
71
+ @threads = {}
72
+ @position_file = @settings[:tailf][:position_file]
73
+ @flush_interval = @settings[:tailf][:flush_interval]
74
+ @max_batch_lines = @settings[:tailf][:max_batch_lines]
75
+ @from_begining = @settings[:tailf][:from_begining]
76
+ @delete_old_tailed_files = @settings[:tailf].has_key?(:delete_old_tailed_files) ? @settings[:tailf][:delete_old_tailed_files] : false
77
+ @brokers = @settings[:kafka][:brokers]
78
+ @producer_type = @settings[:kafka][:producer_type].to_sym
79
+ @produce = @settings[:kafka].has_key?(:produce) ? @settings[:kafka][:produce] : true
80
+
81
+ def write_position_file
82
+ @mutex.synchronize do
83
+ File.open(@position_file, 'w') do |file|
84
+ @files.each do |path, attrs|
85
+ file.puts "#{path} #{attrs[:pattern]} #{attrs[:topic]} #{attrs[:inode]} #{attrs[:offset]}"
86
+ end
87
+ end
88
+ end
89
+ end
90
+
91
+ def load_position_file
92
+ if File.exist?(@position_file)
93
+ IO.readlines(@position_file).each do |line|
94
+ path, pattern, topic, inode, offset = line.split(' ')
95
+ #Load state only for that exist with same inode and were not truncated/rewinded.
96
+ if File.exists?(path) and File.stat(path).ino == inode.to_i and File.stat(path).size >= offset.to_i
97
+ @files[path] = { :pattern => pattern, :topic => topic, :inode => inode.to_i, :offset => offset.to_i }
98
+ end
99
+ end
100
+ end
101
+ write_position_file
102
+ end
103
+
104
+ load_position_file
105
+
106
+ @topics = @settings[:tailf][:files].map{|tailf_file| tailf_file[:topic]}
107
+ @producer = Poseidon::Producer.new(@brokers, "#{Socket.gethostname}", :type => @producer_type, :compression_codec => :snappy, :compressed_topics => @topics) if @produce
108
+
109
+ @producer_queue = SizedQueue.new(10)
110
+
111
+ @producer_thread = Thread.new do
112
+ loop do
113
+ batch = @producer_queue.pop
114
+ begin
115
+ @producer.send_messages(batch[:messages]) if @produce
116
+ rescue Poseidon::Errors::UnableToFetchMetadata
117
+ puts "Got Poseidon::Errors::UnableToFetchMetadata while trying to produce kafka messages, retrying in 1 second ..."
118
+ sleep 1
119
+ retry
120
+ end
121
+ @files[batch[:path]][:offset] = batch[:offset]
122
+ end
123
+ end
124
+
125
+ def kafka_produce(path, buffer, offset)
126
+ messages = []
127
+ buffer.each do |msg|
128
+ messages << Poseidon::MessageToSend.new(@files[path][:topic], msg)
129
+ end
130
+ @producer_queue.push({ :path => path, :messages => messages, :offset => offset})
131
+ end
132
+
133
+ def tailf(path)
134
+ file = File.open(path, 'r')
135
+ @files[path][:fd] = file
136
+ file.seek(@files[path][:offset], IO::SEEK_SET)
137
+ loop do #Fast read file in batches until we reach EOF upon which we start the tailf modify watcher
138
+ batch = file.each_line.take(@max_batch_lines)
139
+ break if batch.empty?
140
+ kafka_produce(path, batch, file.pos)
141
+ end
142
+ @tailf_notifier.watch(path, :modify) do |event|
143
+ unless file.closed?
144
+ batch = file.each_line.take(@max_batch_lines)
145
+ kafka_produce(path, batch, file.pos) unless batch.empty?
146
+ else
147
+ puts "watcher got modify event on closed file #{event.name}"
148
+ end
149
+ end
150
+ end
151
+
152
+ @time_regexp_hash = {
153
+ 'Y' => '[0-9]{4}',
154
+ 'm' => '[0-9]{2}',
155
+ 'd' => '[0-9]{2}',
156
+ 'H' => '[0-9]{2}'
157
+ }
158
+
159
+ def time_pattern_to_regexp(pattern)
160
+ pattern.gsub(/%([^%])/) do
161
+ match = $1
162
+ @time_regexp_hash.has_key?(match) ? @time_regexp_hash[match] : match
163
+ end
164
+ end
165
+
166
+ #Scan existing files that match watched prefixes and start failing them
167
+ @settings[:tailf][:files].each do |tailf_file|
168
+ dir = File.dirname(tailf_file[:prefix])
169
+ if File.exists?(dir) and File.directory?(dir)
170
+ @dirs[dir] ||= []
171
+ @dirs[dir] << { :prefix => File.basename(tailf_file[:prefix]), :pattern => tailf_file[:time_pattern], :suffix => "#{tailf_file[:suffix]}" }
172
+ Dir.glob("#{tailf_file[:prefix]}*#{tailf_file[:suffix]}").each do |path|
173
+ if path.match(Regexp.new(time_pattern_to_regexp(tailf_file[:time_pattern])))
174
+ unless File.directory?(path)
175
+ #Populate state only if it was not loaded from position file
176
+ unless @files.has_key?(path)
177
+ @files[path] = { :pattern => tailf_file[:time_pattern], :topic => tailf_file[:topic], :inode => File.stat(path).ino, :offset => 0 }
178
+ @files[path][:offset] = File.stat(path).size unless @from_begining
179
+ end
180
+ @threads[path] = Thread.new { tailf(path) } unless @threads.has_key?(path)
181
+ end
182
+ end
183
+ end
184
+ end
185
+ end
186
+
187
+ def delete_old_tailed_files
188
+ @mutex.synchronize do
189
+ @files.each_key do |path|
190
+ unless path.match(Regexp.new(Time.now.strftime(@files[path][:pattern])))
191
+ if File.exists?(path) and File.stat(path).ino == @files[path][:inode] and File.stat(path).size == @files[path][:offset]
192
+ puts "Deleteing old time pattern fully kafka produced file #{path}"
193
+ FileUtils.rm_r(path)
194
+ end
195
+ end
196
+ end
197
+ end
198
+ end
199
+
200
+ @timers = Timers::Group.new
201
+ @uploads_timer = @timers.every(@flush_interval) { write_position_file }
202
+ @delete_old_tailed_files_timer = @timers.every(60) { delete_old_tailed_files } if @delete_old_tailed_files
203
+ Thread.new { loop { @timers.wait } }
204
+
205
+ @dirs.each_key do |dir|
206
+
207
+ @create_notifier.watch(dir, :create, :moved_to) do |event|
208
+ @mutex.synchronize do
209
+ path = "#{dir}/#{event.name}"
210
+ matches = @dirs[dir].select{|h| event.name.match(Regexp.new(h[:prefix] + time_pattern_to_regexp(h[:pattern]) + h[:suffix]))}.empty?
211
+ unless matches.empty?
212
+ unless File.directory?(path)
213
+ unless @threads.has_key?(path)
214
+ puts "File #{event.name} was created in / moved into watched dir #{dir}"
215
+ @files[path] = { :pattern => match.first[:pattern], :topic => File.basename(@dirs[dir].detect{|prefix| path.start_with?(prefix)}), :inode => File.stat(path).ino, :offset => 0 }
216
+ @threads[path] = Thread.new { tailf(path) }
217
+ end
218
+ end
219
+ end
220
+ end
221
+ end
222
+
223
+ @delete_notifier.watch(dir, :delete, :moved_from) do |event|
224
+ @mutex.synchronize do
225
+ path = "#{dir}/#{event.name}"
226
+ if @threads.has_key?(path)
227
+ puts "File #{event.name} was deleted / moved from watched dir #{dir}"
228
+ if @threads[path].alive?
229
+ @threads[path].terminate
230
+ @threads[path].join
231
+ end
232
+ @threads.delete(path)
233
+ @files[path][:fd].close unless @files[path][:fd].closed?
234
+ @files.delete(path)
235
+ end
236
+ end
237
+ end
238
+
239
+ end
240
+
241
+ Thread.new { @create_notifier.run }
242
+ Thread.new { @delete_notifier.run }
243
+
244
+ @tailf_notifier.run
@@ -0,0 +1,3 @@
1
+ module Tailf2Kafka
2
+ VERSION ||= '0.1.0'
3
+ end
@@ -0,0 +1,3 @@
1
+ module Tailf2Kafka
2
+ require 'tailf2kafak/version.rb'
3
+ end
@@ -0,0 +1,30 @@
1
+ lib = File.expand_path('../lib/', __FILE__)
2
+ $:.unshift lib unless $:.include?(lib)
3
+
4
+ require "tailf2kafka/version"
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = "tailf2kafka"
8
+ s.version = Tailf2Kafka::VERSION
9
+ s.platform = Gem::Platform::RUBY
10
+ s.authors = ["Alexander Piavlo"]
11
+ s.email = ["devops@supersonic.com"]
12
+ s.homepage = "http://github.com/SupersonicAds/tailf2kafka"
13
+ s.summary = "Watch and tail files with specified time based patterns and push them to kafka"
14
+ s.description = "Watch and tail files with specified time based patterns and push them to kafka"
15
+ s.license = 'MIT'
16
+ s.has_rdoc = false
17
+
18
+ s.add_dependency('poseidon')
19
+ s.add_dependency('hash_symbolizer')
20
+ s.add_dependency('schash')
21
+ s.add_dependency('rb-inotify')
22
+ s.add_dependency('timers')
23
+
24
+ s.add_development_dependency('rake')
25
+
26
+ s.files = Dir.glob("{bin,lib}/**/*") + %w(tailf2kafka.gemspec LICENSE README.md)
27
+ s.executables = Dir.glob('bin/**/*').map { |file| File.basename(file) }
28
+ s.test_files = nil
29
+ s.require_paths = ['lib']
30
+ end
metadata ADDED
@@ -0,0 +1,137 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tailf2kafka
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Alexander Piavlo
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-09-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: poseidon
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: hash_symbolizer
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: schash
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rb-inotify
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: timers
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rake
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: Watch and tail files with specified time based patterns and push them
98
+ to kafka
99
+ email:
100
+ - devops@supersonic.com
101
+ executables:
102
+ - tailf2kafka
103
+ extensions: []
104
+ extra_rdoc_files: []
105
+ files:
106
+ - LICENSE
107
+ - README.md
108
+ - bin/tailf2kafka
109
+ - lib/tailf2kafka.rb
110
+ - lib/tailf2kafka/version.rb
111
+ - tailf2kafka.gemspec
112
+ homepage: http://github.com/SupersonicAds/tailf2kafka
113
+ licenses:
114
+ - MIT
115
+ metadata: {}
116
+ post_install_message:
117
+ rdoc_options: []
118
+ require_paths:
119
+ - lib
120
+ required_ruby_version: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ required_rubygems_version: !ruby/object:Gem::Requirement
126
+ requirements:
127
+ - - ">="
128
+ - !ruby/object:Gem::Version
129
+ version: '0'
130
+ requirements: []
131
+ rubyforge_project:
132
+ rubygems_version: 2.2.2
133
+ signing_key:
134
+ specification_version: 4
135
+ summary: Watch and tail files with specified time based patterns and push them to
136
+ kafka
137
+ test_files: []