flydata 0.1.7 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,10 +3,32 @@ module Fluent
3
3
  require 'fluent/plugin/in_mysql_binlog'
4
4
  require 'binlog'
5
5
  require 'kodama'
6
- require File.dirname(__FILE__) + '/preference'
6
+ require_relative 'preference'
7
+ require_relative '../../flydata'
8
+ require_relative '../sync_file_manager'
9
+ require_relative 'mysql/binlog_record_dispatcher'
10
+ require_relative 'mysql/context'
11
+
12
+ #Monkey-patch fluentd class (EngineClass) to support shutdown for input plugin.
13
+ #This will be called when USR1 signal is received
14
+ class EngineClass
15
+ #Send shutdown to all the sources
16
+ def shutdown_source
17
+ @sources.map {|s|
18
+ Thread.new do
19
+ begin
20
+ s.shutdown
21
+ rescue => e
22
+ $log.warn "unexpected error while shutting down", :error_class=>e.class, :error=>e
23
+ $log.warn_backtrace
24
+ end
25
+ end
26
+ }.each {|t|
27
+ t.join
28
+ }
29
+ end
30
+ end
7
31
 
8
- require File.expand_path(File.join(File.dirname(__FILE__), '../../flydata'))
9
- require 'flydata/sync_file_manager'
10
32
 
11
33
  class MysqlBinlogFlydataInput < MysqlBinlogInput
12
34
  include MysqlBinlogFlydataInputPreference
@@ -14,6 +36,7 @@ class MysqlBinlogFlydataInput < MysqlBinlogInput
14
36
 
15
37
  def initialize
16
38
  super
39
+ install_custom_signal_handler
17
40
  end
18
41
 
19
42
  config_param :database, :string
@@ -27,17 +50,18 @@ class MysqlBinlogFlydataInput < MysqlBinlogInput
27
50
  load_custom_conf
28
51
  $log.info "mysql host:\"#{@host}\" username:\"#{@username}\" database:\"#{@database}\" tables:\"#{@tables}\""
29
52
  @tables = @tables.split(/,\s*/)
30
- @sync_fm = Flydata::FileUtil::SyncFileManager.new(nil) # Passing nil for data_entry as this class does not use methods which require data_entry
31
- @record_handler = FlydataMysqlBinlogRecordHandler.new(
32
- database: @database,
33
- tables: @tables,
34
- tag: @tag,
35
- sync_fm: @sync_fm)
53
+ sync_fm = Flydata::FileUtil::SyncFileManager.new(nil) # Passing nil for data_entry as this class does not use methods which require data_entry
54
+
55
+ @context = Mysql::Context.new(
56
+ database: @database, tables: @tables,
57
+ tag: @tag, sync_fm: sync_fm
58
+ )
59
+ @record_dispatcher = Mysql::FlydataBinlogRecordDispatcher.new(@context)
36
60
  end
37
61
 
38
62
  def start
39
63
  super
40
- positions_path = @sync_fm.table_positions_dir_path
64
+ positions_path = @context.sync_fm.table_positions_dir_path
41
65
  Dir.mkdir positions_path unless File.exists? positions_path
42
66
  rescue Binlog::Error
43
67
  if (/basic_string::_M_replace_aux/ === $!.to_s)
@@ -56,7 +80,16 @@ EOS
56
80
  end
57
81
 
58
82
  def run
59
- super
83
+ start_kodama(mysql_url) do |c|
84
+ c.binlog_position_file = @position_file
85
+ c.connection_retry_limit = @retry_limit
86
+ c.connection_retry_wait = @retry_wait
87
+ c.log_level = @log_level.to_sym
88
+ @listen_events.each do |event_type|
89
+ $log.trace { "registered binlog event listener '#{event_type}'" }
90
+ c.send("on_#{event_type}", &method(:event_listener))
91
+ end
92
+ end
60
93
  rescue
61
94
  $log.error "unexpected error. exception: #{$!.class.to_s}, error: #{$!.to_s}\n#{$!.backtrace.join("\n")}"
62
95
  raise
@@ -68,229 +101,88 @@ EOS
68
101
  raise
69
102
  end
70
103
 
104
+ def start_kodama(options, &block)
105
+ @kodama_client = Kodama::Client.new(Kodama::Client.mysql_url(options))
106
+ @kodama_client.logger = $log
107
+ block.call(@kodama_client)
108
+ @kodama_client.start
109
+ end
110
+
71
111
  def event_listener(event)
72
- @record_handler.dispatch(event)
73
- rescue Exception
112
+ @record_dispatcher.dispatch(event)
113
+ rescue Exception => e
74
114
  position = File.open(@position_file) {|f| f.read }
75
115
  $log.error "error occured while processing #{event.event_type} event at #{position}\n#{e.message}\n#{$!.backtrace.join("\n")}"
76
116
  # Not reraising a StandardError because the underlying code can't handle an error well.
77
117
  raise unless e.kind_of?(StandardError)
78
118
  end
79
119
 
80
- end
81
-
82
- class MysqlBinlogRecordHandler
83
- def dispatch(event)
84
- method_name = "on_#{event.event_type.downcase}"
85
- if self.respond_to?(method_name)
86
- # TODO to_hash method call below can fail if event.event_type is
87
- # "Update_rows". This seems to be a bug of ruby-binlog. The bug must
88
- # be fixed when we support record update.
89
- record = MysqlBinlogInput::BinlogUtil.to_hash(event)
90
- self.send(method_name, record)
91
- else
92
- # $log.trace "Unhandled type: #{record["event_type"]}"
93
- end
94
- end
95
- end
96
-
97
- class FlydataMysqlBinlogRecordHandler < MysqlBinlogRecordHandler
98
- TABLE_NAME = 'table_name' # A Flydata JSON tag to specify a table name
99
- TYPE = 'type'
100
- ROW = 'row'
101
- SEQ = 'seq'
102
- RESPECT_ORDER = 'respect_order'
103
- INTEGER_TYPES = {'TINY' => 1,
104
- 'SHORT' => 2,
105
- 'INT24' => 3,
106
- 'LONG' => 4,
107
- 'LONGLONG' => 8
108
- }
109
- SIGNLESS_INTEGER_PREFIX = '0SL'
110
- SRC_POS = 'src_pos'
111
-
112
- def initialize(opts)
113
- mandatory_opts = [:database, :tables, :tag, :sync_fm]
114
-
115
- missing_opts = mandatory_opts - opts.keys
116
- unless (missing_opts.empty?)
117
- raise "Mandatory option(s) are missing: #{missing_opts.join(', ')}"
118
- end
119
- @database = opts[:database]
120
- @tables = opts[:tables]
121
- @tag = opts[:tag]
122
- @sync_fm = opts[:sync_fm]
123
- @current_binlog_file = ""
124
- @first_empty_binlog = true
125
- @query_handler = FlydataMysqlBinlogQueryHandler.new(record_handler: self)
126
- end
127
-
128
- def on_rotate(record)
129
- @current_binlog_file = record["binlog_file"]
130
- end
131
-
132
- def on_write_rows(record)
133
- emit_insert(record)
134
- end
135
-
136
- def on_update_rows(record)
137
- emit_update(record)
138
- end
139
-
140
- def on_delete_rows(record)
141
- emit_delete(record)
142
- end
143
-
144
- def on_query(record)
145
- @query_handler.dispatch(record)
146
- end
147
-
148
- def on_table_changed(table)
149
- $log.trace "Table #{table} has changed. Reloading the table column"
150
- end
151
-
152
- private
153
-
154
- def acceptable?(record)
155
- (@database == record["db_name"]) and @tables.include?(record["table_name"])
156
- end
157
-
158
- def emit_insert(record)
159
- emit_record(:insert, record)
160
- end
161
-
162
- def emit_delete(record)
163
- emit_record(:delete, record)
164
- end
165
-
166
- def emit_update(record)
167
- emit_record(:update, record) do |row|
168
- row.last # For update, row has two arrays (old and new values) Use new values
169
- end
170
- end
171
-
172
- def emit_record(type, record)
173
- return unless acceptable?(record)
174
-
175
- table = record['table_name']
176
- position = record['next_position'] - record['event_length']
177
- check_empty_binlog
178
-
179
- records = record["rows"].collect do |row|
180
- row = yield(row) if block_given? # Give the caller a chance to generate the correct row
181
- { TYPE => type, TABLE_NAME => table,
182
- RESPECT_ORDER => true, # Continuous sync needs record order to be kept
183
- SRC_POS => "#{@current_binlog_file}\t#{position}",
184
- ROW => row.each.with_index(1).inject({}) do |h, (v, i)|
185
- if v.kind_of?(String)
186
- v = v.encode('utf-16', :undef => :replace, :invalid => :replace).encode('utf-8')
187
- end
188
- h[i.to_s] = v
189
- h
190
- end
191
- }
192
- end
193
-
194
- encode_signless_integer(records, record["columns"])
195
-
196
- # Use binlog's timestamp
197
- timestamp = record["timestamp"].to_i
198
- records.each do |row|
199
- @sync_fm.increment_and_save_table_position(row[TABLE_NAME]) do |seq|
200
- row[SEQ] = seq
201
- Engine.emit(@tag, timestamp, row)
120
+ def shutdown
121
+ if @thread and @thread.alive?
122
+ $log.info "Requesting stop Kodama"
123
+ @kodama_client.stop_request
124
+ if wait_till_safe_to_stop
125
+ $log.info "Killing Kodama client"
126
+ Thread.kill(@thread)
127
+ else
128
+ $log.error "Unable to stop Kodama"
202
129
  end
203
130
  end
204
131
  end
205
132
 
206
- private
207
- def encode_signless_integer(records, column_types)
208
- records.each do |record|
209
- record[ROW].keys.each do |position|
210
- index = position.to_i - 1
211
- column_type = column_types[index]
212
- if INTEGER_TYPES.keys.include?(column_type)
213
- # It's a signless integer.
214
- intval = record[ROW][position]
215
- next unless (intval.kind_of?(Numeric) || intval =~ /^-?[\d]+$/)
216
- width = INTEGER_TYPES[column_type] * 2 # * 2 because a single byte requires two characters (e.g. ff)
217
- signless_val = SIGNLESS_INTEGER_PREFIX
218
- signless_val += sprintf("%0#{width}x", intval).gsub(/\.\.f/, 'f' * width).slice(-width..-1)
219
- record[ROW][position] = signless_val
220
- end
221
- end
133
+ def wait_till_safe_to_stop
134
+ retry_count = 5
135
+ 1.upto(retry_count) do |i|
136
+ return true if @kodama_client.safe_to_stop?
137
+ sleep 3
222
138
  end
223
- end
224
-
225
- def check_empty_binlog
226
- #Log one warning per consecutive records that have empty binlog filename
227
- if @current_binlog_file.to_s.empty?
228
- if @first_empty_binlog
229
- $log.warn "Binlog file name is empty. Rotate event not received!"
230
- @first_empty_binlog = false
231
- end
232
- else
233
- @first_empty_binlog = true
234
- end
139
+ false
140
+ end
141
+
142
+ #Hack: All that has been added here is `Fluent::Engine.shutdown_source`. This should be in
143
+ #fluentd's supervisor#install_main_process_signal_handlers
144
+ def install_custom_signal_handler
145
+ trap :USR1 do
146
+ $log.debug "fluentd main process get SIGUSR1"
147
+ $log.info "force flushing buffered events"
148
+ #@log.reopen!
149
+
150
+ # Creating new thread due to mutex can't lock
151
+ # in main thread during trap context
152
+ Thread.new {
153
+ begin
154
+ Fluent::Engine.shutdown_source
155
+ Fluent::Engine.flush!
156
+ $log.debug "flushing thread: flushed"
157
+ rescue Exception => e
158
+ $log.warn "flushing thread error: #{e}"
159
+ end
160
+ }.run
161
+ end
235
162
  end
236
163
  end
237
164
 
238
- class MysqlBinlogQueryHandler
239
- def initialize
240
- @mapping_table = []
241
- end
165
+ class BinLogPosition
166
+ include Comparable
167
+ attr_accessor :file, :pos
242
168
 
243
- def dispatch(record)
244
- @mapping_table.each do |pattern, method_name|
245
- query = normalize_query(record["query"])
246
- if (pattern.match(query))
247
- if (self.respond_to?(method_name))
248
- self.send(method_name, record, query)
249
- else
250
- raise "method '#{method_name}' is not defined in #{self.class.name} although its matching pattern is defined"
251
- end
252
- break
253
- end
254
- end
169
+ def initialize(binlog_content)
170
+ items = binlog_content.split("\t")
171
+ @file = items[0]
172
+ @pos = items[1].to_i
255
173
  end
256
174
 
257
- private
258
- def normalize_query(query)
259
- query = strip_comments(query)
175
+ def <=>(obj)
176
+ (self.file <=> obj.file) == 0 ? (self.pos <=> obj.pos) : (self.file <=> obj.file)
260
177
  end
261
178
 
262
- def strip_comments(query)
263
- query = query.gsub(/--\s.*\n/, ' ') # -- style comments
264
- query = query.gsub(/\/\*[^\*].*\*\//, ' ') # /* */ style comments
265
- query = query.gsub(/\s+/, ' ') # replace multiple spaces with a space
179
+ def to_s
180
+ "#{file}\t#{pos}"
266
181
  end
267
182
  end
268
183
 
269
- class FlydataMysqlBinlogQueryHandler < MysqlBinlogQueryHandler
270
- def initialize(opts)
271
- mandatory_opts = [:record_handler]
272
- missing_opts = mandatory_opts - opts.keys
273
- unless missing_opts.empty?
274
- raise "mandatory options are missing: #{missing_opts.join(", ")}"
275
- end
276
- @opts = opts
277
-
278
- @mapping_table = [
279
- [/^alter table/i, :on_alter_table],
280
- ]
281
- end
282
-
283
- def on_alter_table(record, query)
284
- m = /alter table\s+(?<table>[^\s]+)/i.match(query)
285
- if m.nil?
286
- raise "This alter table query has no table name? '#{query}'"
287
- end
288
-
289
- @opts[:record_handler].on_table_changed(m[:table])
290
- end
291
184
  end
292
185
 
293
- end
294
186
 
295
187
  # HACK
296
188
  # Monkey patch the class to manage string's character encoding.
@@ -335,15 +227,6 @@ module Kodama
335
227
 
336
228
  Client.class_eval do
337
229
  attr_accessor :logger
338
-
339
- # Except for replacing logger, the implementation is a copy of the original
340
- # method.
341
- def self.start(options = {}, &block)
342
- client = self.new(mysql_url(options))
343
- client.logger = $log
344
- block.call(client)
345
- client.start
346
- end
347
230
  end
348
231
 
349
232
  end
@@ -0,0 +1,34 @@
1
+ require_relative 'binlog_query_handler'
2
+
3
+ module Mysql
4
+ class AlterTableQueryHandler < BinlogQueryHandler
5
+ PATTERN = /^alter table/
6
+
7
+ def initialize(context)
8
+ super
9
+ end
10
+
11
+ def pattern
12
+ PATTERN
13
+ end
14
+
15
+ def process(record, normalized_query)
16
+ case normalized_query
17
+ when /^alter table [^\s]+ add column/i
18
+ on_add_column(record, normalized_query)
19
+ else
20
+ $log.debug("not supported alter table query:'#{normalized_query}'")
21
+ end
22
+ end
23
+
24
+ private
25
+
26
+ def on_add_column(record, query)
27
+ $log.debug("on_add_column query:'#{record['query']}'")
28
+ #TODO: Uncomment following lines after supporting alter table on the server side
29
+ #emit_record(:alter_table, record, increment_table_rev: true) do
30
+ # AlterTableAddColumnParser.new.parse(record['query'])
31
+ #end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,20 @@
1
+ module Mysql
2
+ class BinLogPosition
3
+ include Comparable
4
+ attr_accessor :file, :pos
5
+
6
+ def initialize(binlog_content)
7
+ items = binlog_content.split("\t")
8
+ @file = items[0]
9
+ @pos = items[1].to_i
10
+ end
11
+
12
+ def <=>(obj)
13
+ (self.file <=> obj.file) == 0 ? (self.pos <=> obj.pos) : (self.file <=> obj.file)
14
+ end
15
+
16
+ def to_s
17
+ "#{file}\t#{pos}"
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,39 @@
1
+ require_relative 'alter_table_query_handler'
2
+
3
+ module Mysql
4
+ class BinlogQueryDispatcher
5
+ def initialize
6
+ @handlers = []
7
+ end
8
+
9
+ def dispatch(record)
10
+ @handlers.each do |handler|
11
+ query = normalize_query(record["query"])
12
+ if (handler.pattern.match(query))
13
+ handler.process(record, query)
14
+ break
15
+ end
16
+ end
17
+ end
18
+
19
+ private
20
+
21
+ def normalize_query(query)
22
+ query = strip_comments(query)
23
+ end
24
+
25
+ def strip_comments(query)
26
+ query = query.gsub(/--\s.*\n/, ' ') # -- style comments
27
+ query = query.gsub(/\/\*[^\*].*\*\//, ' ') # /* */ style comments
28
+ query = query.gsub(/\s+/, ' ') # replace multiple spaces with a space
29
+ end
30
+ end
31
+
32
+ class FlydataBinlogQueryDispatcher < BinlogQueryDispatcher
33
+ def initialize(context)
34
+ @handlers = [
35
+ AlterTableQueryHandler.new(context),
36
+ ]
37
+ end
38
+ end
39
+ end