flydata 0.1.7 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/flydata.gemspec +15 -3
- data/lib/flydata/cli.rb +1 -1
- data/lib/flydata/command/sender.rb +35 -10
- data/lib/flydata/command/sync.rb +188 -24
- data/lib/flydata/fluent-plugins/in_mysql_binlog_flydata.rb +101 -218
- data/lib/flydata/fluent-plugins/mysql/alter_table_query_handler.rb +34 -0
- data/lib/flydata/fluent-plugins/mysql/binlog_position.rb +20 -0
- data/lib/flydata/fluent-plugins/mysql/binlog_query_dispatcher.rb +39 -0
- data/lib/flydata/fluent-plugins/mysql/binlog_query_handler.rb +11 -0
- data/lib/flydata/fluent-plugins/mysql/binlog_record_dispatcher.rb +50 -0
- data/lib/flydata/fluent-plugins/mysql/binlog_record_handler.rb +100 -0
- data/lib/flydata/fluent-plugins/mysql/context.rb +25 -0
- data/lib/flydata/fluent-plugins/mysql/dml_record_handler.rb +82 -0
- data/lib/flydata/fluent-plugins/mysql/query_parser.rb +69 -0
- data/lib/flydata/sync_file_manager.rb +119 -12
- data/lib/flydata/table_def/mysql_table_def.rb +52 -22
- data/lib/flydata/table_def/redshift_table_def.rb +17 -9
- data/spec/flydata/command/sync_spec.rb +5 -5
- data/spec/flydata/fluent-plugins/in_mysql_binlog_flydata_spec.rb +86 -15
- data/spec/flydata/fluent-plugins/mysql/binlog_position_spec.rb +33 -0
- data/spec/flydata/fluent-plugins/mysql/query_parser_spec.rb +54 -0
- data/spec/flydata/table_def/mysql_table_def_spec.rb +2 -2
- data/spec/flydata/table_def/redshift_table_def_spec.rb +40 -0
- metadata +16 -4
@@ -3,10 +3,32 @@ module Fluent
|
|
3
3
|
require 'fluent/plugin/in_mysql_binlog'
|
4
4
|
require 'binlog'
|
5
5
|
require 'kodama'
|
6
|
-
|
6
|
+
require_relative 'preference'
|
7
|
+
require_relative '../../flydata'
|
8
|
+
require_relative '../sync_file_manager'
|
9
|
+
require_relative 'mysql/binlog_record_dispatcher'
|
10
|
+
require_relative 'mysql/context'
|
11
|
+
|
12
|
+
#Monkey-patch fluentd class (EngineClass) to support shutdown for input plugin.
|
13
|
+
#This will be called when USR1 signal is received
|
14
|
+
class EngineClass
|
15
|
+
#Send shutdown to all the sources
|
16
|
+
def shutdown_source
|
17
|
+
@sources.map {|s|
|
18
|
+
Thread.new do
|
19
|
+
begin
|
20
|
+
s.shutdown
|
21
|
+
rescue => e
|
22
|
+
$log.warn "unexpected error while shutting down", :error_class=>e.class, :error=>e
|
23
|
+
$log.warn_backtrace
|
24
|
+
end
|
25
|
+
end
|
26
|
+
}.each {|t|
|
27
|
+
t.join
|
28
|
+
}
|
29
|
+
end
|
30
|
+
end
|
7
31
|
|
8
|
-
require File.expand_path(File.join(File.dirname(__FILE__), '../../flydata'))
|
9
|
-
require 'flydata/sync_file_manager'
|
10
32
|
|
11
33
|
class MysqlBinlogFlydataInput < MysqlBinlogInput
|
12
34
|
include MysqlBinlogFlydataInputPreference
|
@@ -14,6 +36,7 @@ class MysqlBinlogFlydataInput < MysqlBinlogInput
|
|
14
36
|
|
15
37
|
def initialize
|
16
38
|
super
|
39
|
+
install_custom_signal_handler
|
17
40
|
end
|
18
41
|
|
19
42
|
config_param :database, :string
|
@@ -27,17 +50,18 @@ class MysqlBinlogFlydataInput < MysqlBinlogInput
|
|
27
50
|
load_custom_conf
|
28
51
|
$log.info "mysql host:\"#{@host}\" username:\"#{@username}\" database:\"#{@database}\" tables:\"#{@tables}\""
|
29
52
|
@tables = @tables.split(/,\s*/)
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
53
|
+
sync_fm = Flydata::FileUtil::SyncFileManager.new(nil) # Passing nil for data_entry as this class does not use methods which require data_entry
|
54
|
+
|
55
|
+
@context = Mysql::Context.new(
|
56
|
+
database: @database, tables: @tables,
|
57
|
+
tag: @tag, sync_fm: sync_fm
|
58
|
+
)
|
59
|
+
@record_dispatcher = Mysql::FlydataBinlogRecordDispatcher.new(@context)
|
36
60
|
end
|
37
61
|
|
38
62
|
def start
|
39
63
|
super
|
40
|
-
positions_path = @sync_fm.table_positions_dir_path
|
64
|
+
positions_path = @context.sync_fm.table_positions_dir_path
|
41
65
|
Dir.mkdir positions_path unless File.exists? positions_path
|
42
66
|
rescue Binlog::Error
|
43
67
|
if (/basic_string::_M_replace_aux/ === $!.to_s)
|
@@ -56,7 +80,16 @@ EOS
|
|
56
80
|
end
|
57
81
|
|
58
82
|
def run
|
59
|
-
|
83
|
+
start_kodama(mysql_url) do |c|
|
84
|
+
c.binlog_position_file = @position_file
|
85
|
+
c.connection_retry_limit = @retry_limit
|
86
|
+
c.connection_retry_wait = @retry_wait
|
87
|
+
c.log_level = @log_level.to_sym
|
88
|
+
@listen_events.each do |event_type|
|
89
|
+
$log.trace { "registered binlog event listener '#{event_type}'" }
|
90
|
+
c.send("on_#{event_type}", &method(:event_listener))
|
91
|
+
end
|
92
|
+
end
|
60
93
|
rescue
|
61
94
|
$log.error "unexpected error. exception: #{$!.class.to_s}, error: #{$!.to_s}\n#{$!.backtrace.join("\n")}"
|
62
95
|
raise
|
@@ -68,229 +101,88 @@ EOS
|
|
68
101
|
raise
|
69
102
|
end
|
70
103
|
|
104
|
+
def start_kodama(options, &block)
|
105
|
+
@kodama_client = Kodama::Client.new(Kodama::Client.mysql_url(options))
|
106
|
+
@kodama_client.logger = $log
|
107
|
+
block.call(@kodama_client)
|
108
|
+
@kodama_client.start
|
109
|
+
end
|
110
|
+
|
71
111
|
def event_listener(event)
|
72
|
-
@
|
73
|
-
rescue Exception
|
112
|
+
@record_dispatcher.dispatch(event)
|
113
|
+
rescue Exception => e
|
74
114
|
position = File.open(@position_file) {|f| f.read }
|
75
115
|
$log.error "error occured while processing #{event.event_type} event at #{position}\n#{e.message}\n#{$!.backtrace.join("\n")}"
|
76
116
|
# Not reraising a StandardError because the underlying code can't handle an error well.
|
77
117
|
raise unless e.kind_of?(StandardError)
|
78
118
|
end
|
79
119
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
record = MysqlBinlogInput::BinlogUtil.to_hash(event)
|
90
|
-
self.send(method_name, record)
|
91
|
-
else
|
92
|
-
# $log.trace "Unhandled type: #{record["event_type"]}"
|
93
|
-
end
|
94
|
-
end
|
95
|
-
end
|
96
|
-
|
97
|
-
class FlydataMysqlBinlogRecordHandler < MysqlBinlogRecordHandler
|
98
|
-
TABLE_NAME = 'table_name' # A Flydata JSON tag to specify a table name
|
99
|
-
TYPE = 'type'
|
100
|
-
ROW = 'row'
|
101
|
-
SEQ = 'seq'
|
102
|
-
RESPECT_ORDER = 'respect_order'
|
103
|
-
INTEGER_TYPES = {'TINY' => 1,
|
104
|
-
'SHORT' => 2,
|
105
|
-
'INT24' => 3,
|
106
|
-
'LONG' => 4,
|
107
|
-
'LONGLONG' => 8
|
108
|
-
}
|
109
|
-
SIGNLESS_INTEGER_PREFIX = '0SL'
|
110
|
-
SRC_POS = 'src_pos'
|
111
|
-
|
112
|
-
def initialize(opts)
|
113
|
-
mandatory_opts = [:database, :tables, :tag, :sync_fm]
|
114
|
-
|
115
|
-
missing_opts = mandatory_opts - opts.keys
|
116
|
-
unless (missing_opts.empty?)
|
117
|
-
raise "Mandatory option(s) are missing: #{missing_opts.join(', ')}"
|
118
|
-
end
|
119
|
-
@database = opts[:database]
|
120
|
-
@tables = opts[:tables]
|
121
|
-
@tag = opts[:tag]
|
122
|
-
@sync_fm = opts[:sync_fm]
|
123
|
-
@current_binlog_file = ""
|
124
|
-
@first_empty_binlog = true
|
125
|
-
@query_handler = FlydataMysqlBinlogQueryHandler.new(record_handler: self)
|
126
|
-
end
|
127
|
-
|
128
|
-
def on_rotate(record)
|
129
|
-
@current_binlog_file = record["binlog_file"]
|
130
|
-
end
|
131
|
-
|
132
|
-
def on_write_rows(record)
|
133
|
-
emit_insert(record)
|
134
|
-
end
|
135
|
-
|
136
|
-
def on_update_rows(record)
|
137
|
-
emit_update(record)
|
138
|
-
end
|
139
|
-
|
140
|
-
def on_delete_rows(record)
|
141
|
-
emit_delete(record)
|
142
|
-
end
|
143
|
-
|
144
|
-
def on_query(record)
|
145
|
-
@query_handler.dispatch(record)
|
146
|
-
end
|
147
|
-
|
148
|
-
def on_table_changed(table)
|
149
|
-
$log.trace "Table #{table} has changed. Reloading the table column"
|
150
|
-
end
|
151
|
-
|
152
|
-
private
|
153
|
-
|
154
|
-
def acceptable?(record)
|
155
|
-
(@database == record["db_name"]) and @tables.include?(record["table_name"])
|
156
|
-
end
|
157
|
-
|
158
|
-
def emit_insert(record)
|
159
|
-
emit_record(:insert, record)
|
160
|
-
end
|
161
|
-
|
162
|
-
def emit_delete(record)
|
163
|
-
emit_record(:delete, record)
|
164
|
-
end
|
165
|
-
|
166
|
-
def emit_update(record)
|
167
|
-
emit_record(:update, record) do |row|
|
168
|
-
row.last # For update, row has two arrays (old and new values) Use new values
|
169
|
-
end
|
170
|
-
end
|
171
|
-
|
172
|
-
def emit_record(type, record)
|
173
|
-
return unless acceptable?(record)
|
174
|
-
|
175
|
-
table = record['table_name']
|
176
|
-
position = record['next_position'] - record['event_length']
|
177
|
-
check_empty_binlog
|
178
|
-
|
179
|
-
records = record["rows"].collect do |row|
|
180
|
-
row = yield(row) if block_given? # Give the caller a chance to generate the correct row
|
181
|
-
{ TYPE => type, TABLE_NAME => table,
|
182
|
-
RESPECT_ORDER => true, # Continuous sync needs record order to be kept
|
183
|
-
SRC_POS => "#{@current_binlog_file}\t#{position}",
|
184
|
-
ROW => row.each.with_index(1).inject({}) do |h, (v, i)|
|
185
|
-
if v.kind_of?(String)
|
186
|
-
v = v.encode('utf-16', :undef => :replace, :invalid => :replace).encode('utf-8')
|
187
|
-
end
|
188
|
-
h[i.to_s] = v
|
189
|
-
h
|
190
|
-
end
|
191
|
-
}
|
192
|
-
end
|
193
|
-
|
194
|
-
encode_signless_integer(records, record["columns"])
|
195
|
-
|
196
|
-
# Use binlog's timestamp
|
197
|
-
timestamp = record["timestamp"].to_i
|
198
|
-
records.each do |row|
|
199
|
-
@sync_fm.increment_and_save_table_position(row[TABLE_NAME]) do |seq|
|
200
|
-
row[SEQ] = seq
|
201
|
-
Engine.emit(@tag, timestamp, row)
|
120
|
+
def shutdown
|
121
|
+
if @thread and @thread.alive?
|
122
|
+
$log.info "Requesting stop Kodama"
|
123
|
+
@kodama_client.stop_request
|
124
|
+
if wait_till_safe_to_stop
|
125
|
+
$log.info "Killing Kodama client"
|
126
|
+
Thread.kill(@thread)
|
127
|
+
else
|
128
|
+
$log.error "Unable to stop Kodama"
|
202
129
|
end
|
203
130
|
end
|
204
131
|
end
|
205
132
|
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
column_type = column_types[index]
|
212
|
-
if INTEGER_TYPES.keys.include?(column_type)
|
213
|
-
# It's a signless integer.
|
214
|
-
intval = record[ROW][position]
|
215
|
-
next unless (intval.kind_of?(Numeric) || intval =~ /^-?[\d]+$/)
|
216
|
-
width = INTEGER_TYPES[column_type] * 2 # * 2 because a single byte requires two characters (e.g. ff)
|
217
|
-
signless_val = SIGNLESS_INTEGER_PREFIX
|
218
|
-
signless_val += sprintf("%0#{width}x", intval).gsub(/\.\.f/, 'f' * width).slice(-width..-1)
|
219
|
-
record[ROW][position] = signless_val
|
220
|
-
end
|
221
|
-
end
|
133
|
+
def wait_till_safe_to_stop
|
134
|
+
retry_count = 5
|
135
|
+
1.upto(retry_count) do |i|
|
136
|
+
return true if @kodama_client.safe_to_stop?
|
137
|
+
sleep 3
|
222
138
|
end
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
139
|
+
false
|
140
|
+
end
|
141
|
+
|
142
|
+
#Hack: All that has been added here is `Fluent::Engine.shutdown_source`. This should be in
|
143
|
+
#fluentd's supervisor#install_main_process_signal_handlers
|
144
|
+
def install_custom_signal_handler
|
145
|
+
trap :USR1 do
|
146
|
+
$log.debug "fluentd main process get SIGUSR1"
|
147
|
+
$log.info "force flushing buffered events"
|
148
|
+
#@log.reopen!
|
149
|
+
|
150
|
+
# Creating new thread due to mutex can't lock
|
151
|
+
# in main thread during trap context
|
152
|
+
Thread.new {
|
153
|
+
begin
|
154
|
+
Fluent::Engine.shutdown_source
|
155
|
+
Fluent::Engine.flush!
|
156
|
+
$log.debug "flushing thread: flushed"
|
157
|
+
rescue Exception => e
|
158
|
+
$log.warn "flushing thread error: #{e}"
|
159
|
+
end
|
160
|
+
}.run
|
161
|
+
end
|
235
162
|
end
|
236
163
|
end
|
237
164
|
|
238
|
-
class
|
239
|
-
|
240
|
-
|
241
|
-
end
|
165
|
+
class BinLogPosition
|
166
|
+
include Comparable
|
167
|
+
attr_accessor :file, :pos
|
242
168
|
|
243
|
-
def
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
if (self.respond_to?(method_name))
|
248
|
-
self.send(method_name, record, query)
|
249
|
-
else
|
250
|
-
raise "method '#{method_name}' is not defined in #{self.class.name} although its matching pattern is defined"
|
251
|
-
end
|
252
|
-
break
|
253
|
-
end
|
254
|
-
end
|
169
|
+
def initialize(binlog_content)
|
170
|
+
items = binlog_content.split("\t")
|
171
|
+
@file = items[0]
|
172
|
+
@pos = items[1].to_i
|
255
173
|
end
|
256
174
|
|
257
|
-
|
258
|
-
|
259
|
-
query = strip_comments(query)
|
175
|
+
def <=>(obj)
|
176
|
+
(self.file <=> obj.file) == 0 ? (self.pos <=> obj.pos) : (self.file <=> obj.file)
|
260
177
|
end
|
261
178
|
|
262
|
-
def
|
263
|
-
|
264
|
-
query = query.gsub(/\/\*[^\*].*\*\//, ' ') # /* */ style comments
|
265
|
-
query = query.gsub(/\s+/, ' ') # replace multiple spaces with a space
|
179
|
+
def to_s
|
180
|
+
"#{file}\t#{pos}"
|
266
181
|
end
|
267
182
|
end
|
268
183
|
|
269
|
-
class FlydataMysqlBinlogQueryHandler < MysqlBinlogQueryHandler
|
270
|
-
def initialize(opts)
|
271
|
-
mandatory_opts = [:record_handler]
|
272
|
-
missing_opts = mandatory_opts - opts.keys
|
273
|
-
unless missing_opts.empty?
|
274
|
-
raise "mandatory options are missing: #{missing_opts.join(", ")}"
|
275
|
-
end
|
276
|
-
@opts = opts
|
277
|
-
|
278
|
-
@mapping_table = [
|
279
|
-
[/^alter table/i, :on_alter_table],
|
280
|
-
]
|
281
|
-
end
|
282
|
-
|
283
|
-
def on_alter_table(record, query)
|
284
|
-
m = /alter table\s+(?<table>[^\s]+)/i.match(query)
|
285
|
-
if m.nil?
|
286
|
-
raise "This alter table query has no table name? '#{query}'"
|
287
|
-
end
|
288
|
-
|
289
|
-
@opts[:record_handler].on_table_changed(m[:table])
|
290
|
-
end
|
291
184
|
end
|
292
185
|
|
293
|
-
end
|
294
186
|
|
295
187
|
# HACK
|
296
188
|
# Monkey patch the class to manage string's character encoding.
|
@@ -335,15 +227,6 @@ module Kodama
|
|
335
227
|
|
336
228
|
Client.class_eval do
|
337
229
|
attr_accessor :logger
|
338
|
-
|
339
|
-
# Except for replacing logger, the implementation is a copy of the original
|
340
|
-
# method.
|
341
|
-
def self.start(options = {}, &block)
|
342
|
-
client = self.new(mysql_url(options))
|
343
|
-
client.logger = $log
|
344
|
-
block.call(client)
|
345
|
-
client.start
|
346
|
-
end
|
347
230
|
end
|
348
231
|
|
349
232
|
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require_relative 'binlog_query_handler'
|
2
|
+
|
3
|
+
module Mysql
|
4
|
+
class AlterTableQueryHandler < BinlogQueryHandler
|
5
|
+
PATTERN = /^alter table/
|
6
|
+
|
7
|
+
def initialize(context)
|
8
|
+
super
|
9
|
+
end
|
10
|
+
|
11
|
+
def pattern
|
12
|
+
PATTERN
|
13
|
+
end
|
14
|
+
|
15
|
+
def process(record, normalized_query)
|
16
|
+
case normalized_query
|
17
|
+
when /^alter table [^\s]+ add column/i
|
18
|
+
on_add_column(record, normalized_query)
|
19
|
+
else
|
20
|
+
$log.debug("not supported alter table query:'#{normalized_query}'")
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def on_add_column(record, query)
|
27
|
+
$log.debug("on_add_column query:'#{record['query']}'")
|
28
|
+
#TODO: Uncomment following lines after supporting alter table on the server side
|
29
|
+
#emit_record(:alter_table, record, increment_table_rev: true) do
|
30
|
+
# AlterTableAddColumnParser.new.parse(record['query'])
|
31
|
+
#end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Mysql
|
2
|
+
class BinLogPosition
|
3
|
+
include Comparable
|
4
|
+
attr_accessor :file, :pos
|
5
|
+
|
6
|
+
def initialize(binlog_content)
|
7
|
+
items = binlog_content.split("\t")
|
8
|
+
@file = items[0]
|
9
|
+
@pos = items[1].to_i
|
10
|
+
end
|
11
|
+
|
12
|
+
def <=>(obj)
|
13
|
+
(self.file <=> obj.file) == 0 ? (self.pos <=> obj.pos) : (self.file <=> obj.file)
|
14
|
+
end
|
15
|
+
|
16
|
+
def to_s
|
17
|
+
"#{file}\t#{pos}"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require_relative 'alter_table_query_handler'
|
2
|
+
|
3
|
+
module Mysql
|
4
|
+
class BinlogQueryDispatcher
|
5
|
+
def initialize
|
6
|
+
@handlers = []
|
7
|
+
end
|
8
|
+
|
9
|
+
def dispatch(record)
|
10
|
+
@handlers.each do |handler|
|
11
|
+
query = normalize_query(record["query"])
|
12
|
+
if (handler.pattern.match(query))
|
13
|
+
handler.process(record, query)
|
14
|
+
break
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def normalize_query(query)
|
22
|
+
query = strip_comments(query)
|
23
|
+
end
|
24
|
+
|
25
|
+
def strip_comments(query)
|
26
|
+
query = query.gsub(/--\s.*\n/, ' ') # -- style comments
|
27
|
+
query = query.gsub(/\/\*[^\*].*\*\//, ' ') # /* */ style comments
|
28
|
+
query = query.gsub(/\s+/, ' ') # replace multiple spaces with a space
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
class FlydataBinlogQueryDispatcher < BinlogQueryDispatcher
|
33
|
+
def initialize(context)
|
34
|
+
@handlers = [
|
35
|
+
AlterTableQueryHandler.new(context),
|
36
|
+
]
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|