flydata 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/flydata.gemspec +15 -3
- data/lib/flydata/cli.rb +1 -1
- data/lib/flydata/command/sender.rb +35 -10
- data/lib/flydata/command/sync.rb +188 -24
- data/lib/flydata/fluent-plugins/in_mysql_binlog_flydata.rb +101 -218
- data/lib/flydata/fluent-plugins/mysql/alter_table_query_handler.rb +34 -0
- data/lib/flydata/fluent-plugins/mysql/binlog_position.rb +20 -0
- data/lib/flydata/fluent-plugins/mysql/binlog_query_dispatcher.rb +39 -0
- data/lib/flydata/fluent-plugins/mysql/binlog_query_handler.rb +11 -0
- data/lib/flydata/fluent-plugins/mysql/binlog_record_dispatcher.rb +50 -0
- data/lib/flydata/fluent-plugins/mysql/binlog_record_handler.rb +100 -0
- data/lib/flydata/fluent-plugins/mysql/context.rb +25 -0
- data/lib/flydata/fluent-plugins/mysql/dml_record_handler.rb +82 -0
- data/lib/flydata/fluent-plugins/mysql/query_parser.rb +69 -0
- data/lib/flydata/sync_file_manager.rb +119 -12
- data/lib/flydata/table_def/mysql_table_def.rb +52 -22
- data/lib/flydata/table_def/redshift_table_def.rb +17 -9
- data/spec/flydata/command/sync_spec.rb +5 -5
- data/spec/flydata/fluent-plugins/in_mysql_binlog_flydata_spec.rb +86 -15
- data/spec/flydata/fluent-plugins/mysql/binlog_position_spec.rb +33 -0
- data/spec/flydata/fluent-plugins/mysql/query_parser_spec.rb +54 -0
- data/spec/flydata/table_def/mysql_table_def_spec.rb +2 -2
- data/spec/flydata/table_def/redshift_table_def_spec.rb +40 -0
- metadata +16 -4
@@ -3,10 +3,32 @@ module Fluent
|
|
3
3
|
require 'fluent/plugin/in_mysql_binlog'
|
4
4
|
require 'binlog'
|
5
5
|
require 'kodama'
|
6
|
-
|
6
|
+
require_relative 'preference'
|
7
|
+
require_relative '../../flydata'
|
8
|
+
require_relative '../sync_file_manager'
|
9
|
+
require_relative 'mysql/binlog_record_dispatcher'
|
10
|
+
require_relative 'mysql/context'
|
11
|
+
|
12
|
+
#Monkey-patch fluentd class (EngineClass) to support shutdown for input plugin.
|
13
|
+
#This will be called when USR1 signal is received
|
14
|
+
class EngineClass
|
15
|
+
#Send shutdown to all the sources
|
16
|
+
def shutdown_source
|
17
|
+
@sources.map {|s|
|
18
|
+
Thread.new do
|
19
|
+
begin
|
20
|
+
s.shutdown
|
21
|
+
rescue => e
|
22
|
+
$log.warn "unexpected error while shutting down", :error_class=>e.class, :error=>e
|
23
|
+
$log.warn_backtrace
|
24
|
+
end
|
25
|
+
end
|
26
|
+
}.each {|t|
|
27
|
+
t.join
|
28
|
+
}
|
29
|
+
end
|
30
|
+
end
|
7
31
|
|
8
|
-
require File.expand_path(File.join(File.dirname(__FILE__), '../../flydata'))
|
9
|
-
require 'flydata/sync_file_manager'
|
10
32
|
|
11
33
|
class MysqlBinlogFlydataInput < MysqlBinlogInput
|
12
34
|
include MysqlBinlogFlydataInputPreference
|
@@ -14,6 +36,7 @@ class MysqlBinlogFlydataInput < MysqlBinlogInput
|
|
14
36
|
|
15
37
|
def initialize
|
16
38
|
super
|
39
|
+
install_custom_signal_handler
|
17
40
|
end
|
18
41
|
|
19
42
|
config_param :database, :string
|
@@ -27,17 +50,18 @@ class MysqlBinlogFlydataInput < MysqlBinlogInput
|
|
27
50
|
load_custom_conf
|
28
51
|
$log.info "mysql host:\"#{@host}\" username:\"#{@username}\" database:\"#{@database}\" tables:\"#{@tables}\""
|
29
52
|
@tables = @tables.split(/,\s*/)
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
53
|
+
sync_fm = Flydata::FileUtil::SyncFileManager.new(nil) # Passing nil for data_entry as this class does not use methods which require data_entry
|
54
|
+
|
55
|
+
@context = Mysql::Context.new(
|
56
|
+
database: @database, tables: @tables,
|
57
|
+
tag: @tag, sync_fm: sync_fm
|
58
|
+
)
|
59
|
+
@record_dispatcher = Mysql::FlydataBinlogRecordDispatcher.new(@context)
|
36
60
|
end
|
37
61
|
|
38
62
|
def start
|
39
63
|
super
|
40
|
-
positions_path = @sync_fm.table_positions_dir_path
|
64
|
+
positions_path = @context.sync_fm.table_positions_dir_path
|
41
65
|
Dir.mkdir positions_path unless File.exists? positions_path
|
42
66
|
rescue Binlog::Error
|
43
67
|
if (/basic_string::_M_replace_aux/ === $!.to_s)
|
@@ -56,7 +80,16 @@ EOS
|
|
56
80
|
end
|
57
81
|
|
58
82
|
def run
|
59
|
-
|
83
|
+
start_kodama(mysql_url) do |c|
|
84
|
+
c.binlog_position_file = @position_file
|
85
|
+
c.connection_retry_limit = @retry_limit
|
86
|
+
c.connection_retry_wait = @retry_wait
|
87
|
+
c.log_level = @log_level.to_sym
|
88
|
+
@listen_events.each do |event_type|
|
89
|
+
$log.trace { "registered binlog event listener '#{event_type}'" }
|
90
|
+
c.send("on_#{event_type}", &method(:event_listener))
|
91
|
+
end
|
92
|
+
end
|
60
93
|
rescue
|
61
94
|
$log.error "unexpected error. exception: #{$!.class.to_s}, error: #{$!.to_s}\n#{$!.backtrace.join("\n")}"
|
62
95
|
raise
|
@@ -68,229 +101,88 @@ EOS
|
|
68
101
|
raise
|
69
102
|
end
|
70
103
|
|
104
|
+
def start_kodama(options, &block)
|
105
|
+
@kodama_client = Kodama::Client.new(Kodama::Client.mysql_url(options))
|
106
|
+
@kodama_client.logger = $log
|
107
|
+
block.call(@kodama_client)
|
108
|
+
@kodama_client.start
|
109
|
+
end
|
110
|
+
|
71
111
|
def event_listener(event)
|
72
|
-
@
|
73
|
-
rescue Exception
|
112
|
+
@record_dispatcher.dispatch(event)
|
113
|
+
rescue Exception => e
|
74
114
|
position = File.open(@position_file) {|f| f.read }
|
75
115
|
$log.error "error occured while processing #{event.event_type} event at #{position}\n#{e.message}\n#{$!.backtrace.join("\n")}"
|
76
116
|
# Not reraising a StandardError because the underlying code can't handle an error well.
|
77
117
|
raise unless e.kind_of?(StandardError)
|
78
118
|
end
|
79
119
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
record = MysqlBinlogInput::BinlogUtil.to_hash(event)
|
90
|
-
self.send(method_name, record)
|
91
|
-
else
|
92
|
-
# $log.trace "Unhandled type: #{record["event_type"]}"
|
93
|
-
end
|
94
|
-
end
|
95
|
-
end
|
96
|
-
|
97
|
-
class FlydataMysqlBinlogRecordHandler < MysqlBinlogRecordHandler
|
98
|
-
TABLE_NAME = 'table_name' # A Flydata JSON tag to specify a table name
|
99
|
-
TYPE = 'type'
|
100
|
-
ROW = 'row'
|
101
|
-
SEQ = 'seq'
|
102
|
-
RESPECT_ORDER = 'respect_order'
|
103
|
-
INTEGER_TYPES = {'TINY' => 1,
|
104
|
-
'SHORT' => 2,
|
105
|
-
'INT24' => 3,
|
106
|
-
'LONG' => 4,
|
107
|
-
'LONGLONG' => 8
|
108
|
-
}
|
109
|
-
SIGNLESS_INTEGER_PREFIX = '0SL'
|
110
|
-
SRC_POS = 'src_pos'
|
111
|
-
|
112
|
-
def initialize(opts)
|
113
|
-
mandatory_opts = [:database, :tables, :tag, :sync_fm]
|
114
|
-
|
115
|
-
missing_opts = mandatory_opts - opts.keys
|
116
|
-
unless (missing_opts.empty?)
|
117
|
-
raise "Mandatory option(s) are missing: #{missing_opts.join(', ')}"
|
118
|
-
end
|
119
|
-
@database = opts[:database]
|
120
|
-
@tables = opts[:tables]
|
121
|
-
@tag = opts[:tag]
|
122
|
-
@sync_fm = opts[:sync_fm]
|
123
|
-
@current_binlog_file = ""
|
124
|
-
@first_empty_binlog = true
|
125
|
-
@query_handler = FlydataMysqlBinlogQueryHandler.new(record_handler: self)
|
126
|
-
end
|
127
|
-
|
128
|
-
def on_rotate(record)
|
129
|
-
@current_binlog_file = record["binlog_file"]
|
130
|
-
end
|
131
|
-
|
132
|
-
def on_write_rows(record)
|
133
|
-
emit_insert(record)
|
134
|
-
end
|
135
|
-
|
136
|
-
def on_update_rows(record)
|
137
|
-
emit_update(record)
|
138
|
-
end
|
139
|
-
|
140
|
-
def on_delete_rows(record)
|
141
|
-
emit_delete(record)
|
142
|
-
end
|
143
|
-
|
144
|
-
def on_query(record)
|
145
|
-
@query_handler.dispatch(record)
|
146
|
-
end
|
147
|
-
|
148
|
-
def on_table_changed(table)
|
149
|
-
$log.trace "Table #{table} has changed. Reloading the table column"
|
150
|
-
end
|
151
|
-
|
152
|
-
private
|
153
|
-
|
154
|
-
def acceptable?(record)
|
155
|
-
(@database == record["db_name"]) and @tables.include?(record["table_name"])
|
156
|
-
end
|
157
|
-
|
158
|
-
def emit_insert(record)
|
159
|
-
emit_record(:insert, record)
|
160
|
-
end
|
161
|
-
|
162
|
-
def emit_delete(record)
|
163
|
-
emit_record(:delete, record)
|
164
|
-
end
|
165
|
-
|
166
|
-
def emit_update(record)
|
167
|
-
emit_record(:update, record) do |row|
|
168
|
-
row.last # For update, row has two arrays (old and new values) Use new values
|
169
|
-
end
|
170
|
-
end
|
171
|
-
|
172
|
-
def emit_record(type, record)
|
173
|
-
return unless acceptable?(record)
|
174
|
-
|
175
|
-
table = record['table_name']
|
176
|
-
position = record['next_position'] - record['event_length']
|
177
|
-
check_empty_binlog
|
178
|
-
|
179
|
-
records = record["rows"].collect do |row|
|
180
|
-
row = yield(row) if block_given? # Give the caller a chance to generate the correct row
|
181
|
-
{ TYPE => type, TABLE_NAME => table,
|
182
|
-
RESPECT_ORDER => true, # Continuous sync needs record order to be kept
|
183
|
-
SRC_POS => "#{@current_binlog_file}\t#{position}",
|
184
|
-
ROW => row.each.with_index(1).inject({}) do |h, (v, i)|
|
185
|
-
if v.kind_of?(String)
|
186
|
-
v = v.encode('utf-16', :undef => :replace, :invalid => :replace).encode('utf-8')
|
187
|
-
end
|
188
|
-
h[i.to_s] = v
|
189
|
-
h
|
190
|
-
end
|
191
|
-
}
|
192
|
-
end
|
193
|
-
|
194
|
-
encode_signless_integer(records, record["columns"])
|
195
|
-
|
196
|
-
# Use binlog's timestamp
|
197
|
-
timestamp = record["timestamp"].to_i
|
198
|
-
records.each do |row|
|
199
|
-
@sync_fm.increment_and_save_table_position(row[TABLE_NAME]) do |seq|
|
200
|
-
row[SEQ] = seq
|
201
|
-
Engine.emit(@tag, timestamp, row)
|
120
|
+
def shutdown
|
121
|
+
if @thread and @thread.alive?
|
122
|
+
$log.info "Requesting stop Kodama"
|
123
|
+
@kodama_client.stop_request
|
124
|
+
if wait_till_safe_to_stop
|
125
|
+
$log.info "Killing Kodama client"
|
126
|
+
Thread.kill(@thread)
|
127
|
+
else
|
128
|
+
$log.error "Unable to stop Kodama"
|
202
129
|
end
|
203
130
|
end
|
204
131
|
end
|
205
132
|
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
column_type = column_types[index]
|
212
|
-
if INTEGER_TYPES.keys.include?(column_type)
|
213
|
-
# It's a signless integer.
|
214
|
-
intval = record[ROW][position]
|
215
|
-
next unless (intval.kind_of?(Numeric) || intval =~ /^-?[\d]+$/)
|
216
|
-
width = INTEGER_TYPES[column_type] * 2 # * 2 because a single byte requires two characters (e.g. ff)
|
217
|
-
signless_val = SIGNLESS_INTEGER_PREFIX
|
218
|
-
signless_val += sprintf("%0#{width}x", intval).gsub(/\.\.f/, 'f' * width).slice(-width..-1)
|
219
|
-
record[ROW][position] = signless_val
|
220
|
-
end
|
221
|
-
end
|
133
|
+
def wait_till_safe_to_stop
|
134
|
+
retry_count = 5
|
135
|
+
1.upto(retry_count) do |i|
|
136
|
+
return true if @kodama_client.safe_to_stop?
|
137
|
+
sleep 3
|
222
138
|
end
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
139
|
+
false
|
140
|
+
end
|
141
|
+
|
142
|
+
#Hack: All that has been added here is `Fluent::Engine.shutdown_source`. This should be in
|
143
|
+
#fluentd's supervisor#install_main_process_signal_handlers
|
144
|
+
def install_custom_signal_handler
|
145
|
+
trap :USR1 do
|
146
|
+
$log.debug "fluentd main process get SIGUSR1"
|
147
|
+
$log.info "force flushing buffered events"
|
148
|
+
#@log.reopen!
|
149
|
+
|
150
|
+
# Creating new thread due to mutex can't lock
|
151
|
+
# in main thread during trap context
|
152
|
+
Thread.new {
|
153
|
+
begin
|
154
|
+
Fluent::Engine.shutdown_source
|
155
|
+
Fluent::Engine.flush!
|
156
|
+
$log.debug "flushing thread: flushed"
|
157
|
+
rescue Exception => e
|
158
|
+
$log.warn "flushing thread error: #{e}"
|
159
|
+
end
|
160
|
+
}.run
|
161
|
+
end
|
235
162
|
end
|
236
163
|
end
|
237
164
|
|
238
|
-
class
|
239
|
-
|
240
|
-
|
241
|
-
end
|
165
|
+
class BinLogPosition
|
166
|
+
include Comparable
|
167
|
+
attr_accessor :file, :pos
|
242
168
|
|
243
|
-
def
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
if (self.respond_to?(method_name))
|
248
|
-
self.send(method_name, record, query)
|
249
|
-
else
|
250
|
-
raise "method '#{method_name}' is not defined in #{self.class.name} although its matching pattern is defined"
|
251
|
-
end
|
252
|
-
break
|
253
|
-
end
|
254
|
-
end
|
169
|
+
def initialize(binlog_content)
|
170
|
+
items = binlog_content.split("\t")
|
171
|
+
@file = items[0]
|
172
|
+
@pos = items[1].to_i
|
255
173
|
end
|
256
174
|
|
257
|
-
|
258
|
-
|
259
|
-
query = strip_comments(query)
|
175
|
+
def <=>(obj)
|
176
|
+
(self.file <=> obj.file) == 0 ? (self.pos <=> obj.pos) : (self.file <=> obj.file)
|
260
177
|
end
|
261
178
|
|
262
|
-
def
|
263
|
-
|
264
|
-
query = query.gsub(/\/\*[^\*].*\*\//, ' ') # /* */ style comments
|
265
|
-
query = query.gsub(/\s+/, ' ') # replace multiple spaces with a space
|
179
|
+
def to_s
|
180
|
+
"#{file}\t#{pos}"
|
266
181
|
end
|
267
182
|
end
|
268
183
|
|
269
|
-
class FlydataMysqlBinlogQueryHandler < MysqlBinlogQueryHandler
|
270
|
-
def initialize(opts)
|
271
|
-
mandatory_opts = [:record_handler]
|
272
|
-
missing_opts = mandatory_opts - opts.keys
|
273
|
-
unless missing_opts.empty?
|
274
|
-
raise "mandatory options are missing: #{missing_opts.join(", ")}"
|
275
|
-
end
|
276
|
-
@opts = opts
|
277
|
-
|
278
|
-
@mapping_table = [
|
279
|
-
[/^alter table/i, :on_alter_table],
|
280
|
-
]
|
281
|
-
end
|
282
|
-
|
283
|
-
def on_alter_table(record, query)
|
284
|
-
m = /alter table\s+(?<table>[^\s]+)/i.match(query)
|
285
|
-
if m.nil?
|
286
|
-
raise "This alter table query has no table name? '#{query}'"
|
287
|
-
end
|
288
|
-
|
289
|
-
@opts[:record_handler].on_table_changed(m[:table])
|
290
|
-
end
|
291
184
|
end
|
292
185
|
|
293
|
-
end
|
294
186
|
|
295
187
|
# HACK
|
296
188
|
# Monkey patch the class to manage string's character encoding.
|
@@ -335,15 +227,6 @@ module Kodama
|
|
335
227
|
|
336
228
|
Client.class_eval do
|
337
229
|
attr_accessor :logger
|
338
|
-
|
339
|
-
# Except for replacing logger, the implementation is a copy of the original
|
340
|
-
# method.
|
341
|
-
def self.start(options = {}, &block)
|
342
|
-
client = self.new(mysql_url(options))
|
343
|
-
client.logger = $log
|
344
|
-
block.call(client)
|
345
|
-
client.start
|
346
|
-
end
|
347
230
|
end
|
348
231
|
|
349
232
|
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require_relative 'binlog_query_handler'
|
2
|
+
|
3
|
+
module Mysql
|
4
|
+
class AlterTableQueryHandler < BinlogQueryHandler
|
5
|
+
PATTERN = /^alter table/
|
6
|
+
|
7
|
+
def initialize(context)
|
8
|
+
super
|
9
|
+
end
|
10
|
+
|
11
|
+
def pattern
|
12
|
+
PATTERN
|
13
|
+
end
|
14
|
+
|
15
|
+
def process(record, normalized_query)
|
16
|
+
case normalized_query
|
17
|
+
when /^alter table [^\s]+ add column/i
|
18
|
+
on_add_column(record, normalized_query)
|
19
|
+
else
|
20
|
+
$log.debug("not supported alter table query:'#{normalized_query}'")
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def on_add_column(record, query)
|
27
|
+
$log.debug("on_add_column query:'#{record['query']}'")
|
28
|
+
#TODO: Uncomment following lines after supporting alter table on the server side
|
29
|
+
#emit_record(:alter_table, record, increment_table_rev: true) do
|
30
|
+
# AlterTableAddColumnParser.new.parse(record['query'])
|
31
|
+
#end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Mysql
|
2
|
+
class BinLogPosition
|
3
|
+
include Comparable
|
4
|
+
attr_accessor :file, :pos
|
5
|
+
|
6
|
+
def initialize(binlog_content)
|
7
|
+
items = binlog_content.split("\t")
|
8
|
+
@file = items[0]
|
9
|
+
@pos = items[1].to_i
|
10
|
+
end
|
11
|
+
|
12
|
+
def <=>(obj)
|
13
|
+
(self.file <=> obj.file) == 0 ? (self.pos <=> obj.pos) : (self.file <=> obj.file)
|
14
|
+
end
|
15
|
+
|
16
|
+
def to_s
|
17
|
+
"#{file}\t#{pos}"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require_relative 'alter_table_query_handler'
|
2
|
+
|
3
|
+
module Mysql
|
4
|
+
class BinlogQueryDispatcher
|
5
|
+
def initialize
|
6
|
+
@handlers = []
|
7
|
+
end
|
8
|
+
|
9
|
+
def dispatch(record)
|
10
|
+
@handlers.each do |handler|
|
11
|
+
query = normalize_query(record["query"])
|
12
|
+
if (handler.pattern.match(query))
|
13
|
+
handler.process(record, query)
|
14
|
+
break
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def normalize_query(query)
|
22
|
+
query = strip_comments(query)
|
23
|
+
end
|
24
|
+
|
25
|
+
def strip_comments(query)
|
26
|
+
query = query.gsub(/--\s.*\n/, ' ') # -- style comments
|
27
|
+
query = query.gsub(/\/\*[^\*].*\*\//, ' ') # /* */ style comments
|
28
|
+
query = query.gsub(/\s+/, ' ') # replace multiple spaces with a space
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
class FlydataBinlogQueryDispatcher < BinlogQueryDispatcher
|
33
|
+
def initialize(context)
|
34
|
+
@handlers = [
|
35
|
+
AlterTableQueryHandler.new(context),
|
36
|
+
]
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|