flydata 0.2.8 → 0.2.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/flydata.gemspec +9 -5
- data/lib/flydata/command/sync.rb +89 -988
- data/lib/flydata/fluent-plugins/in_mysql_binlog_flydata.rb +6 -1
- data/lib/flydata/helpers.rb +11 -0
- data/lib/flydata/output/forwarder.rb +166 -0
- data/lib/flydata/parser/mysql/dump_parser.rb +729 -0
- data/lib/flydata/parser/mysql/mysql_alter_table.treetop +214 -2
- data/lib/flydata/sync_file_manager.rb +1 -1
- data/lib/flydata/table_def/mysql_table_def.rb +61 -47
- data/lib/flydata/table_def/redshift_table_def.rb +30 -26
- data/spec/flydata/command/sync_spec.rb +0 -1160
- data/spec/flydata/output/forwarder_spec.rb +105 -0
- data/spec/flydata/parser/mysql/alter_table_parser_spec.rb +224 -23
- data/spec/flydata/parser/mysql/dump_parser_spec.rb +900 -0
- data/spec/flydata/sync_file_manager_spec.rb +159 -0
- data/spec/flydata/table_def/mysql_table_def_spec.rb +2 -2
- data/spec/flydata/table_def/redshift_table_def_spec.rb +199 -44
- metadata +8 -3
@@ -56,7 +56,12 @@ class MysqlBinlogFlydataInput < MysqlBinlogInput
|
|
56
56
|
raise "No position file(#{@position_file}). Initial synchronization is required before starting."
|
57
57
|
end
|
58
58
|
load_custom_conf
|
59
|
-
$log.info "mysql host:\"#{@host}\" username:\"#{@username}\" database:\"#{@database}\" tables:\"#{@tables}\" tables_append_only:\"#{tables_append_only}\""
|
59
|
+
$log.info "mysql host:\"#{@host}\" port:\"#{@port}\" username:\"#{@username}\" database:\"#{@database}\" tables:\"#{@tables}\" tables_append_only:\"#{tables_append_only}\""
|
60
|
+
$log.info "mysql client version: #{`mysql -V`}"
|
61
|
+
server_version = `echo 'select version();' | mysql -h #{@host} --port #{@port} -u #{@username} -p#{@password} 2>/dev/null`
|
62
|
+
server_version = server_version[(server_version.index("\n") + 1)..-1]
|
63
|
+
$log.info "mysql server version: #{server_version}"
|
64
|
+
|
60
65
|
@tables = @tables.split(/,\s*/)
|
61
66
|
@omit_events = Hash.new
|
62
67
|
@tables_append_only.split(/,\s*/).each do |table|
|
data/lib/flydata/helpers.rb
CHANGED
@@ -102,5 +102,16 @@ Usage: flydata COMMAND
|
|
102
102
|
end
|
103
103
|
end
|
104
104
|
end
|
105
|
+
|
106
|
+
UNIT_PREFIX = %W(TB GB MB KB B).freeze
|
107
|
+
def as_size( s )
|
108
|
+
s = s.to_f
|
109
|
+
i = UNIT_PREFIX.length - 1
|
110
|
+
while s > 512 && i > 0
|
111
|
+
s /= 1024
|
112
|
+
i -= 1
|
113
|
+
end
|
114
|
+
((s > 9 || s.modulo(1) < 0.1 ? '%d' : '%.1f') % s) + ' ' + UNIT_PREFIX[i]
|
115
|
+
end
|
105
116
|
end
|
106
117
|
end
|
@@ -0,0 +1,166 @@
|
|
1
|
+
require 'socket'
|
2
|
+
|
3
|
+
module Flydata
|
4
|
+
module Output
|
5
|
+
class ForwarderFactory
|
6
|
+
def self.create(forwarder_key, tag, servers, options = {})
|
7
|
+
case forwarder_key
|
8
|
+
when nil, "tcpforwarder"
|
9
|
+
puts "Creating TCP connection" if FLYDATA_DEBUG
|
10
|
+
forward = TcpForwarder.new(tag, servers, options)
|
11
|
+
when "sslforwarder"
|
12
|
+
puts "Creating SSL connection" if FLYDATA_DEBUG
|
13
|
+
forward = SslForwarder.new(tag, servers, options)
|
14
|
+
else
|
15
|
+
raise "Unsupported Forwarding type #{forwarder_key}"
|
16
|
+
end
|
17
|
+
forward
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
class TcpForwarder
|
22
|
+
FORWARD_HEADER = [0x92].pack('C')
|
23
|
+
BUFFER_SIZE = 1024 * 1024 * 32 # 32M
|
24
|
+
DEFUALT_SEND_TIMEOUT = 60 # 1 minute
|
25
|
+
RETRY_INTERVAL = 2
|
26
|
+
RETRY_LIMIT = 10
|
27
|
+
|
28
|
+
def initialize(tag, servers, options = {})
|
29
|
+
@tag = tag
|
30
|
+
unless servers and servers.kind_of?(Array) and not servers.empty?
|
31
|
+
raise "Servers must not be empty."
|
32
|
+
end
|
33
|
+
@servers = servers
|
34
|
+
@server_index = 0
|
35
|
+
set_options(options)
|
36
|
+
reset
|
37
|
+
end
|
38
|
+
|
39
|
+
def set_options(options)
|
40
|
+
if options[:buffer_size_limit]
|
41
|
+
@buffer_size_limit = options[:buffer_size_limit]
|
42
|
+
else
|
43
|
+
@buffer_size_limit = BUFFER_SIZE
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
attr_reader :buffer_record_count, :buffer_size
|
48
|
+
|
49
|
+
def emit(records, time = Time.now.to_i)
|
50
|
+
records = [records] unless records.kind_of?(Array)
|
51
|
+
records.each do |record|
|
52
|
+
event_data = [time,record].to_msgpack
|
53
|
+
@buffer_records << event_data
|
54
|
+
@buffer_record_count += 1
|
55
|
+
@buffer_size += event_data.bytesize
|
56
|
+
end
|
57
|
+
if @buffer_size > @buffer_size_limit
|
58
|
+
send
|
59
|
+
else
|
60
|
+
false
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
#TODO retry logic
|
65
|
+
def send
|
66
|
+
if @buffer_size > 0
|
67
|
+
else
|
68
|
+
return false
|
69
|
+
end
|
70
|
+
if ENV['FLYDATA_BENCHMARK']
|
71
|
+
reset
|
72
|
+
return true
|
73
|
+
end
|
74
|
+
sock = nil
|
75
|
+
retry_count = 0
|
76
|
+
begin
|
77
|
+
sock = connect(pickup_server)
|
78
|
+
|
79
|
+
# Write header
|
80
|
+
sock.write FORWARD_HEADER
|
81
|
+
# Write tag
|
82
|
+
sock.write @tag.to_msgpack
|
83
|
+
# Write records
|
84
|
+
sock.write [0xdb, @buffer_records.bytesize].pack('CN')
|
85
|
+
StringIO.open(@buffer_records) do |i|
|
86
|
+
FileUtils.copy_stream(i, sock)
|
87
|
+
end
|
88
|
+
rescue => e
|
89
|
+
retry_count += 1
|
90
|
+
if retry_count > RETRY_LIMIT
|
91
|
+
puts "! Error: Failed to send data. Exceeded the retry limit. retry_count:#{retry_count}"
|
92
|
+
raise e
|
93
|
+
end
|
94
|
+
puts "! Warn: Retring to send data. retry_count:#{retry_count} error=#{e.to_s}"
|
95
|
+
wait_time = RETRY_INTERVAL ** retry_count
|
96
|
+
puts " Now waiting for next retry. time=#{wait_time}sec"
|
97
|
+
sleep wait_time
|
98
|
+
retry
|
99
|
+
ensure
|
100
|
+
if sock
|
101
|
+
sock.close rescue nil
|
102
|
+
end
|
103
|
+
end
|
104
|
+
reset
|
105
|
+
true
|
106
|
+
end
|
107
|
+
|
108
|
+
#TODO: Check server status
|
109
|
+
def pickup_server
|
110
|
+
ret_server = @servers[@server_index]
|
111
|
+
@server_index += 1
|
112
|
+
if @server_index >= (@servers.count)
|
113
|
+
@server_index = 0
|
114
|
+
end
|
115
|
+
ret_server
|
116
|
+
end
|
117
|
+
|
118
|
+
def connect(server)
|
119
|
+
host, port = server.split(':')
|
120
|
+
sock = TCPSocket.new(host, port.to_i)
|
121
|
+
|
122
|
+
# Set options
|
123
|
+
opt = [1, DEFUALT_SEND_TIMEOUT].pack('I!I!')
|
124
|
+
sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
|
125
|
+
opt = [DEFUALT_SEND_TIMEOUT, 0].pack('L!L!')
|
126
|
+
sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
|
127
|
+
|
128
|
+
sock
|
129
|
+
end
|
130
|
+
|
131
|
+
def reset
|
132
|
+
@buffer_records = ''
|
133
|
+
@buffer_record_count = 0
|
134
|
+
@buffer_size = 0
|
135
|
+
end
|
136
|
+
|
137
|
+
def flush
|
138
|
+
send
|
139
|
+
end
|
140
|
+
|
141
|
+
def close
|
142
|
+
flush
|
143
|
+
end
|
144
|
+
end
|
145
|
+
class SslForwarder < TcpForwarder
|
146
|
+
def connect(server)
|
147
|
+
tcp_sock = super
|
148
|
+
ssl_ctx = ssl_ctx_with_verification
|
149
|
+
ssl_sock = OpenSSL::SSL::SSLSocket.new(tcp_sock, ssl_ctx)
|
150
|
+
ssl_sock.sync_close = true
|
151
|
+
ssl_sock.connect
|
152
|
+
ssl_sock
|
153
|
+
end
|
154
|
+
|
155
|
+
private
|
156
|
+
def ssl_ctx_with_verification
|
157
|
+
cert_store = OpenSSL::X509::Store.new
|
158
|
+
cert_store.set_default_paths
|
159
|
+
ssl_ctx = OpenSSL::SSL::SSLContext.new
|
160
|
+
ssl_ctx.verify_mode = OpenSSL::SSL::VERIFY_PEER
|
161
|
+
ssl_ctx.cert_store = cert_store
|
162
|
+
ssl_ctx
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
@@ -0,0 +1,729 @@
|
|
1
|
+
module Flydata
|
2
|
+
module Parser
|
3
|
+
module Mysql
|
4
|
+
|
5
|
+
module MysqlAccessible
|
6
|
+
def mysql_conf(conf)
|
7
|
+
@mysql_conf = [:host, :port, :username, :password, :database].inject({}) {|h, sym| h[sym] = conf[sym.to_s]; h}
|
8
|
+
end
|
9
|
+
|
10
|
+
def mysql_cli(conf = nil)
|
11
|
+
mysql_conf(conf) if conf
|
12
|
+
return Mysql2::Client.new(@mysql_conf) if @mysql_conf
|
13
|
+
nil
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
module DumpStreamIO
|
18
|
+
# return position
|
19
|
+
# sync command doesn't resume if pos is -1 in dump position file
|
20
|
+
def pos
|
21
|
+
-1
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
class MysqlTable
|
26
|
+
def initialize(table_name, columns = {}, primary_keys = [])
|
27
|
+
@table_name = table_name
|
28
|
+
@columns = columns
|
29
|
+
@primary_keys = primary_keys
|
30
|
+
end
|
31
|
+
|
32
|
+
attr_accessor :table_name, :columns, :primary_keys
|
33
|
+
|
34
|
+
def add_column(column)
|
35
|
+
@columns[column[:column_name]] = column
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
class MysqlDumpGenerator
|
40
|
+
# host, port, username, password, database, tables
|
41
|
+
MYSQL_DUMP_CMD_TEMPLATE = "mysqldump --protocol=tcp -h %s -P %s -u%s %s --skip-lock-tables --single-transaction --hex-blob %s %s %s"
|
42
|
+
EXTRA_MYSQLDUMP_PARAMS = ""
|
43
|
+
def initialize(conf)
|
44
|
+
password = conf['password'].to_s.empty? ? "" : "-p#{conf['password']}"
|
45
|
+
tables = if conf['tables']
|
46
|
+
conf['tables'].split(',').join(' ')
|
47
|
+
else
|
48
|
+
''
|
49
|
+
end
|
50
|
+
@dump_cmd = MYSQL_DUMP_CMD_TEMPLATE %
|
51
|
+
[conf['host'], conf['port'], conf['username'], password, self.class::EXTRA_MYSQLDUMP_PARAMS, conf['database'], tables]
|
52
|
+
@db_opts = [:host, :port, :username, :password, :database].inject({}) {|h, sym| h[sym] = conf[sym.to_s]; h}
|
53
|
+
end
|
54
|
+
def dump(file_path)
|
55
|
+
raise "subclass must implement the method"
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
class MysqlDumpGeneratorMasterData < MysqlDumpGenerator
|
60
|
+
EXTRA_MYSQLDUMP_PARAMS = "--flush-logs --master-data=2"
|
61
|
+
def dump(file_path)
|
62
|
+
cmd = "#{@dump_cmd} -r #{file_path}"
|
63
|
+
o, e, s = Open3.capture3(cmd)
|
64
|
+
e.to_s.each_line {|l| puts l unless /^Warning:/ =~ l } unless e.to_s.empty?
|
65
|
+
unless s.exitstatus == 0
|
66
|
+
if File.exists?(file_path)
|
67
|
+
File.open(file_path, 'r') {|f| f.each_line{|l| puts l}}
|
68
|
+
FileUtils.rm(file_path)
|
69
|
+
end
|
70
|
+
raise "Failed to run mysqldump command."
|
71
|
+
end
|
72
|
+
unless File.exists?(file_path)
|
73
|
+
raise "mysqldump file does not exist. Something wrong..."
|
74
|
+
end
|
75
|
+
if File.size(file_path) == 0
|
76
|
+
raise "mysqldump file is empty. Something wrong..."
|
77
|
+
end
|
78
|
+
true
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
class MysqlDumpGeneratorNoMasterData < MysqlDumpGenerator
|
83
|
+
EXTRA_MYSQLDUMP_PARAMS = ""
|
84
|
+
CHANGE_MASTER_TEMPLATE = <<EOS
|
85
|
+
--
|
86
|
+
-- Position to start replication or point-in-time recovery from
|
87
|
+
--
|
88
|
+
|
89
|
+
-- CHANGE MASTER TO MASTER_LOG_FILE='%s', MASTER_LOG_POS=%d;
|
90
|
+
|
91
|
+
EOS
|
92
|
+
|
93
|
+
def dump(file_path = nil, &block)
|
94
|
+
unless file_path || block
|
95
|
+
raise ArgumentError.new("file_path or block must be given.")
|
96
|
+
end
|
97
|
+
|
98
|
+
# RDS doesn't allow obtaining binlog position using mysqldump. Get it separately and insert it into the dump file.
|
99
|
+
table_locker = create_table_locker
|
100
|
+
table_locker.resume # Lock tables
|
101
|
+
|
102
|
+
begin
|
103
|
+
# create pipe for callback function
|
104
|
+
rd_io, wr_io = IO.pipe
|
105
|
+
wr_io.sync = true
|
106
|
+
wr_io.set_encoding("utf-8")
|
107
|
+
rd_io.extend(DumpStreamIO)
|
108
|
+
|
109
|
+
# start mysqldump
|
110
|
+
Open3.popen3 @dump_cmd do |cmd_in, cmd_out, cmd_err|
|
111
|
+
cmd_in.close_write
|
112
|
+
cmd_out.set_encoding("utf-8") # mysqldump output must be in UTF-8
|
113
|
+
|
114
|
+
first_line = cmd_out.gets # wait until first line comes
|
115
|
+
binlog_file, binlog_pos = table_locker.resume
|
116
|
+
|
117
|
+
threads = []
|
118
|
+
|
119
|
+
# filter dump stream and write data to pipe
|
120
|
+
threads << Thread.new do
|
121
|
+
begin
|
122
|
+
wr_io.print(first_line) # write a first line
|
123
|
+
fileter_dump_stream(cmd_out, wr_io, binlog_file, binlog_pos)
|
124
|
+
ensure
|
125
|
+
wr_io.close rescue nil
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# show err message
|
130
|
+
threads << Thread.new do
|
131
|
+
cmd_err.each_line do |line|
|
132
|
+
$stderr.print line unless /^Warning:/ === line
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
if block
|
137
|
+
# call callback function with io if block is given
|
138
|
+
block.call(rd_io)
|
139
|
+
elsif file_path
|
140
|
+
# store data to file
|
141
|
+
open_file_stream(file_path) {|f| rd_io.each_line{|l| f.print(l)}}
|
142
|
+
end
|
143
|
+
|
144
|
+
threads.each(&:join)
|
145
|
+
end
|
146
|
+
rescue
|
147
|
+
# Cleanup
|
148
|
+
FileUtils.rm(file_path) if file_path && File.exists?(file_path)
|
149
|
+
raise
|
150
|
+
ensure
|
151
|
+
# Let table_locker finish its task even if an exception happened
|
152
|
+
table_locker.resume if table_locker.alive?
|
153
|
+
rd_io.close rescue nil
|
154
|
+
wr_io.close rescue nil
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
private
|
159
|
+
|
160
|
+
def open_file_stream(file_path, &block)
|
161
|
+
File.open(file_path, "w", encoding: "utf-8") {|f| block.call(f)}
|
162
|
+
end
|
163
|
+
|
164
|
+
# This query generates a query which flushes user tables with read lock
|
165
|
+
FLUSH_TABLES_QUERY_TEMPLATE = "FLUSH TABLES %s WITH READ LOCK;"
|
166
|
+
USER_TABLES_QUERY = <<EOS
|
167
|
+
SELECT CONCAT('`',
|
168
|
+
REPLACE(TABLE_SCHEMA, '`', '``'), '`.`',
|
169
|
+
REPLACE(TABLE_NAME, '`', '``'), '` ')
|
170
|
+
AS tables
|
171
|
+
FROM INFORMATION_SCHEMA.TABLES
|
172
|
+
WHERE TABLE_TYPE = 'BASE TABLE'
|
173
|
+
AND ENGINE NOT IN ('MEMORY', 'CSV', 'PERFORMANCE_SCHEMA');
|
174
|
+
EOS
|
175
|
+
|
176
|
+
def create_table_locker
|
177
|
+
Fiber.new do
|
178
|
+
client = Mysql2::Client.new(@db_opts)
|
179
|
+
# Lock tables
|
180
|
+
client.query "FLUSH LOCAL TABLES;"
|
181
|
+
q = flush_tables_with_read_lock_query(client)
|
182
|
+
puts "FLUSH TABLES query: #{q}" if FLYDATA_DEBUG
|
183
|
+
client.query q
|
184
|
+
begin
|
185
|
+
Fiber.yield # Lock is done. Let dump to start
|
186
|
+
# obtain binlog pos
|
187
|
+
result = client.query "SHOW MASTER STATUS;"
|
188
|
+
row = result.first
|
189
|
+
if row.nil?
|
190
|
+
raise "MySQL DB has no replication master status. Check if the DB is set up as a replication master. In case of RDS, make sure that Backup Retention Period is set to more than 0."
|
191
|
+
end
|
192
|
+
ensure
|
193
|
+
# unlock tables
|
194
|
+
client.query "UNLOCK TABLES;"
|
195
|
+
client.close
|
196
|
+
end
|
197
|
+
|
198
|
+
[row["File"], row['Position']]
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
def flush_tables_with_read_lock_query(client)
|
203
|
+
tables = ""
|
204
|
+
if mysql_server_version(client) >= "5.5"
|
205
|
+
# FLUSH TABLES table_names,... WITH READ LOCK syntax is supported from MySQL 5.5
|
206
|
+
result = client.query(USER_TABLES_QUERY)
|
207
|
+
tables = result.collect{|r| r['tables']}.join(", ")
|
208
|
+
end
|
209
|
+
FLUSH_TABLES_QUERY_TEMPLATE % [tables]
|
210
|
+
end
|
211
|
+
|
212
|
+
VERSION_QUERY = "SHOW VARIABLES LIKE 'version'"
|
213
|
+
def mysql_server_version(client)
|
214
|
+
result = client.query(VERSION_QUERY)
|
215
|
+
result.first['Value']
|
216
|
+
end
|
217
|
+
|
218
|
+
def fileter_dump_stream(cmd_out, w_io, binlog_file, binlog_pos)
|
219
|
+
find_insert_pos = :not_started
|
220
|
+
cmd_out.each_line do |line|
|
221
|
+
if find_insert_pos == :not_started && /^-- Server version/ === line
|
222
|
+
find_insert_pos = :finding
|
223
|
+
elsif find_insert_pos == :finding && /^--/ === line
|
224
|
+
# wait before writing the first database queries
|
225
|
+
# insert binlog pos
|
226
|
+
change_master = CHANGE_MASTER_TEMPLATE % [binlog_file, binlog_pos]
|
227
|
+
w_io.print change_master
|
228
|
+
|
229
|
+
find_insert_pos = :found
|
230
|
+
end
|
231
|
+
w_io.print line
|
232
|
+
w_io.puts unless line.end_with?("\n")
|
233
|
+
w_io.flush
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
end
|
238
|
+
|
239
|
+
class MysqlDumpParser
|
240
|
+
|
241
|
+
module State
|
242
|
+
START = 'START'
|
243
|
+
CREATE_TABLE = 'CREATE_TABLE'
|
244
|
+
CREATE_TABLE_COLUMNS = 'CREATE_TABLE_COLUMNS'
|
245
|
+
CREATE_TABLE_CONSTRAINTS = 'CREATE_TABLE_CONSTRAINTS'
|
246
|
+
INSERT_RECORD = 'INSERT_RECORD'
|
247
|
+
PARSING_INSERT_RECORD = 'PARSING_INSERT_RECORD'
|
248
|
+
end
|
249
|
+
|
250
|
+
attr_accessor :binlog_pos
|
251
|
+
|
252
|
+
def initialize(option = {})
|
253
|
+
@binlog_pos = option[:binlog_pos]
|
254
|
+
@option = option
|
255
|
+
end
|
256
|
+
|
257
|
+
def parse(dump_io, create_table_block, insert_record_block, check_point_block)
|
258
|
+
unless dump_io.kind_of?(IO)
|
259
|
+
raise ArgumentError.new("Invalid argument. The first parameter must be io.")
|
260
|
+
end
|
261
|
+
|
262
|
+
invalid_file = false
|
263
|
+
current_state = State::START
|
264
|
+
substate = nil
|
265
|
+
buffered_line = nil
|
266
|
+
bytesize = 0
|
267
|
+
|
268
|
+
readline_proc = Proc.new do
|
269
|
+
line = nil
|
270
|
+
if buffered_line
|
271
|
+
line = buffered_line
|
272
|
+
buffered_line = nil
|
273
|
+
else
|
274
|
+
rawline = dump_io.readline
|
275
|
+
bytesize += rawline.bytesize
|
276
|
+
line = rawline.strip
|
277
|
+
end
|
278
|
+
line
|
279
|
+
end
|
280
|
+
|
281
|
+
state_start = Proc.new do
|
282
|
+
line = readline_proc.call
|
283
|
+
|
284
|
+
# -- CHANGE MASTER TO MASTER_LOG_FILE='mysql-bin.000002', MASTER_LOG_POS=120;
|
285
|
+
m = /^\-\- CHANGE MASTER TO MASTER_LOG_FILE='(?<binfile>[^']+)', MASTER_LOG_POS=(?<pos>\d+)/.match(line)
|
286
|
+
if m
|
287
|
+
@binlog_pos = {binfile: m[:binfile], pos: m[:pos].to_i}
|
288
|
+
current_state = State::CREATE_TABLE
|
289
|
+
check_point_block.call(nil, dump_io.pos, bytesize, @binlog_pos, current_state)
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
current_table = nil
|
294
|
+
state_create_table = Proc.new do
|
295
|
+
line = readline_proc.call
|
296
|
+
|
297
|
+
# CREATE TABLE `active_admin_comments` (
|
298
|
+
m = /^CREATE TABLE `(?<table_name>[^`]+)`/.match(line)
|
299
|
+
if m
|
300
|
+
current_table = MysqlTable.new(m[:table_name])
|
301
|
+
current_state = State::CREATE_TABLE_COLUMNS
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
305
|
+
state_create_table_constraints = Proc.new do
|
306
|
+
line = readline_proc.call
|
307
|
+
|
308
|
+
# PRIMARY KEY (`id`),
|
309
|
+
if line.start_with?(')')
|
310
|
+
create_table_block.call(current_table)
|
311
|
+
current_state = State::INSERT_RECORD
|
312
|
+
check_point_block.call(current_table, dump_io.pos, bytesize, @binlog_pos, current_state)
|
313
|
+
elsif m = /^PRIMARY KEY \((?<primary_keys>[^\)]+)\)/.match(line)
|
314
|
+
current_table.primary_keys = m[:primary_keys].split(',').collect do |pk_str|
|
315
|
+
pk_str[1..-2]
|
316
|
+
end
|
317
|
+
end
|
318
|
+
end
|
319
|
+
|
320
|
+
state_create_table_columns = Proc.new do
|
321
|
+
start_pos = dump_io.pos
|
322
|
+
line = readline_proc.call
|
323
|
+
|
324
|
+
# `author_type` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
|
325
|
+
if line.start_with?("\`")
|
326
|
+
column = {}
|
327
|
+
|
328
|
+
# parse column line
|
329
|
+
line = line[0..-2] if line.end_with?(',')
|
330
|
+
items = line.split
|
331
|
+
column[:column_name] = items.shift[1..-2]
|
332
|
+
column[:format_type_str] = format_type_str = items.shift
|
333
|
+
pos = format_type_str.index('(')
|
334
|
+
if pos
|
335
|
+
ft = column[:format_type] = format_type_str[0..pos-1]
|
336
|
+
if ft == 'decimal'
|
337
|
+
precision, scale = format_type_str[pos+1..-2].split(',').collect{|v| v.to_i}
|
338
|
+
column[:decimal_precision] = precision
|
339
|
+
column[:decimal_scale] = scale
|
340
|
+
else
|
341
|
+
column[:format_size] = format_type_str[pos+1..-2].to_i
|
342
|
+
end
|
343
|
+
else
|
344
|
+
column[:format_type] = format_type_str
|
345
|
+
end
|
346
|
+
while (item = items.shift) do
|
347
|
+
case item
|
348
|
+
when 'DEFAULT'
|
349
|
+
value = items.shift
|
350
|
+
value = value.start_with?('\'') ? value[1..-2] : value
|
351
|
+
value = nil if value == 'NULL'
|
352
|
+
column[:default] = value
|
353
|
+
when 'NOT'
|
354
|
+
if items[1] == 'NULL'
|
355
|
+
items.shift
|
356
|
+
column[:not_null] = true
|
357
|
+
end
|
358
|
+
when 'unsigned'
|
359
|
+
column[:unsigned] = true
|
360
|
+
else
|
361
|
+
#ignore other options
|
362
|
+
end
|
363
|
+
end
|
364
|
+
|
365
|
+
current_table.add_column(column)
|
366
|
+
else
|
367
|
+
current_state = State::CREATE_TABLE_CONSTRAINTS
|
368
|
+
buffered_line = line
|
369
|
+
state_create_table_constraints.call
|
370
|
+
end
|
371
|
+
end
|
372
|
+
|
373
|
+
state_insert_record = Proc.new do
|
374
|
+
line = readline_proc.call
|
375
|
+
|
376
|
+
if line.start_with?('INSERT')
|
377
|
+
buffered_line = line
|
378
|
+
current_state = State::PARSING_INSERT_RECORD
|
379
|
+
elsif line.start_with?('UNLOCK')
|
380
|
+
current_state = State::CREATE_TABLE
|
381
|
+
check_point_block.call(current_table, dump_io.pos, bytesize, @binlog_pos, current_state)
|
382
|
+
end
|
383
|
+
end
|
384
|
+
|
385
|
+
state_parsing_insert_record = Proc.new do
|
386
|
+
line = readline_proc.call
|
387
|
+
|
388
|
+
values_set = InsertParser.new.parse(line)
|
389
|
+
current_state = State::INSERT_RECORD
|
390
|
+
|
391
|
+
if insert_record_block.call(current_table, values_set)
|
392
|
+
check_point_block.call(current_table, dump_io.pos, bytesize, @binlog_pos, current_state)
|
393
|
+
end
|
394
|
+
end
|
395
|
+
|
396
|
+
# Start reading file from top
|
397
|
+
begin
|
398
|
+
# resume(only when using dump file)
|
399
|
+
if @option[:last_pos] && (@option[:last_pos].to_i != -1)
|
400
|
+
dump_io.pos = @option[:last_pos].to_i
|
401
|
+
current_state = @option[:state]
|
402
|
+
substate = @option[:substate]
|
403
|
+
current_table = @option[:mysql_table]
|
404
|
+
bytesize = dump_io.pos
|
405
|
+
end
|
406
|
+
|
407
|
+
until dump_io.eof? do
|
408
|
+
case current_state
|
409
|
+
when State::START
|
410
|
+
state_start.call
|
411
|
+
when State::CREATE_TABLE
|
412
|
+
state_create_table.call
|
413
|
+
when State::CREATE_TABLE_COLUMNS
|
414
|
+
state_create_table_columns.call
|
415
|
+
when State::CREATE_TABLE_CONSTRAINTS
|
416
|
+
state_create_table_constraints.call
|
417
|
+
when State::INSERT_RECORD
|
418
|
+
state_insert_record.call
|
419
|
+
when State::PARSING_INSERT_RECORD
|
420
|
+
state_parsing_insert_record.call
|
421
|
+
end
|
422
|
+
end
|
423
|
+
end
|
424
|
+
@binlog_pos
|
425
|
+
end
|
426
|
+
|
427
|
+
# Parse the insert line containing multiple values. (max line size is 1kb)
|
428
|
+
# ex) INSERT INTO `data_entries` VALUES (2,2,'access_log'), (2,3,'access_log2');
|
429
|
+
class InsertParser
|
430
|
+
#INSERT INTO `data_entries` VALUES (2,2,'access_log'), (2,3,'access_log2');
|
431
|
+
module State
|
432
|
+
IN_VALUE = 'IN_VALUE'
|
433
|
+
NEXT_VALUES = 'NEXT_VALUES'
|
434
|
+
end
|
435
|
+
|
436
|
+
def initialize
|
437
|
+
@values = []
|
438
|
+
@values_set = []
|
439
|
+
end
|
440
|
+
|
441
|
+
def start_ruby_prof
|
442
|
+
RubyProf.start if defined?(RubyProf) and not RubyProf.running?
|
443
|
+
end
|
444
|
+
|
445
|
+
def stop_ruby_prof
|
446
|
+
if defined?(RubyProf) and RubyProf.running?
|
447
|
+
result = RubyProf.stop
|
448
|
+
#printer = RubyProf::GraphPrinter.new(result)
|
449
|
+
printer = RubyProf::GraphHtmlPrinter.new(result)
|
450
|
+
#printer.print(STDOUT)
|
451
|
+
printer.print(File.new("ruby-prof-out-#{Time.now.to_i}.html", "w"), :min_percent => 3)
|
452
|
+
end
|
453
|
+
end
|
454
|
+
|
455
|
+
def parse(line)
|
456
|
+
start_ruby_prof
|
457
|
+
bench_start_time = Time.now
|
458
|
+
_parse(line)
|
459
|
+
ensure
|
460
|
+
stop_ruby_prof
|
461
|
+
if ENV['FLYDATA_BENCHMARK']
|
462
|
+
puts " -> time:#{Time.now.to_f - bench_start_time.to_f} size:#{target_line.size}"
|
463
|
+
end
|
464
|
+
end
|
465
|
+
|
466
|
+
private
|
467
|
+
|
468
|
+
def _parse(target_line)
|
469
|
+
target_line = target_line.strip
|
470
|
+
start_index = target_line.index('(')
|
471
|
+
target_line = target_line[start_index..-2]
|
472
|
+
|
473
|
+
# Split insert line text with ',' and take care of ',' inside of the values later.
|
474
|
+
#
|
475
|
+
# We are using the C native method that is like 'split', 'start_with?', 'regexp'
|
476
|
+
# instead of 'String#each_char' and string comparision for the performance.
|
477
|
+
# 'String#each_char' is twice as slow as the current storategy.
|
478
|
+
items = target_line.split(',')
|
479
|
+
index = 0
|
480
|
+
cur_state = State::NEXT_VALUES
|
481
|
+
|
482
|
+
loop do
|
483
|
+
case cur_state
|
484
|
+
when State::NEXT_VALUES
|
485
|
+
chars = items[index]
|
486
|
+
break unless chars
|
487
|
+
items[index] = chars[1..-1]
|
488
|
+
cur_state = State::IN_VALUE
|
489
|
+
when State::IN_VALUE
|
490
|
+
chars = items[index]
|
491
|
+
index += 1
|
492
|
+
if chars.start_with?("'")
|
493
|
+
# single item (not last item)
|
494
|
+
# size check added below otherwise end_with? matches the single quote which was also used by start_with?
|
495
|
+
if chars.size > 1 and chars.end_with?("'") and !last_char_escaped?(chars)
|
496
|
+
@values << replace_escape_char(chars[1..-2])
|
497
|
+
# single item (last item)
|
498
|
+
# size check added below otherwise end_with? matches the single quote which was also used by start_with?
|
499
|
+
elsif chars.size > 2 and chars.end_with?("')") and !last_char_escaped?(chars[0..-2])
|
500
|
+
@values << replace_escape_char(chars[1..-3])
|
501
|
+
@values_set << @values
|
502
|
+
@values = []
|
503
|
+
cur_state = State::NEXT_VALUES
|
504
|
+
# multi items
|
505
|
+
else
|
506
|
+
cur_value = chars[1..-1]
|
507
|
+
loop do
|
508
|
+
next_chars = items[index]
|
509
|
+
index += 1
|
510
|
+
if next_chars.end_with?('\'') and !last_char_escaped?(next_chars)
|
511
|
+
cur_value << ','
|
512
|
+
cur_value << next_chars[0..-2]
|
513
|
+
@values << replace_escape_char(cur_value)
|
514
|
+
break
|
515
|
+
elsif next_chars.end_with?("')") and !last_char_escaped?(next_chars[0..-2])
|
516
|
+
cur_value << ','
|
517
|
+
cur_value << next_chars[0..-3]
|
518
|
+
@values << replace_escape_char(cur_value)
|
519
|
+
@values_set << @values
|
520
|
+
@values = []
|
521
|
+
cur_state = State::NEXT_VALUES
|
522
|
+
break
|
523
|
+
else
|
524
|
+
cur_value << ','
|
525
|
+
cur_value << next_chars
|
526
|
+
end
|
527
|
+
end
|
528
|
+
end
|
529
|
+
else
|
530
|
+
if chars.end_with?(')')
|
531
|
+
chars = chars[0..-2]
|
532
|
+
@values << (chars == 'NULL' ? nil : remove_leading_zeros(chars))
|
533
|
+
@values_set << @values
|
534
|
+
@values = []
|
535
|
+
cur_state = State::NEXT_VALUES
|
536
|
+
else
|
537
|
+
@values << (chars == 'NULL' ? nil : remove_leading_zeros(chars))
|
538
|
+
end
|
539
|
+
end
|
540
|
+
else
|
541
|
+
raise "Invalid state: #{cur_state}"
|
542
|
+
end
|
543
|
+
end
|
544
|
+
return @values_set
|
545
|
+
end
|
546
|
+
|
547
|
+
ESCAPE_HASH_TABLE = {"\\\\" => "\\", "\\'" => "'", "\\\"" => "\"", "\\n" => "\n", "\\r" => "\r"}
|
548
|
+
|
549
|
+
def replace_escape_char(original)
|
550
|
+
original.gsub(/\\\\|\\'|\\"|\\n|\\r/, ESCAPE_HASH_TABLE)
|
551
|
+
end
|
552
|
+
|
553
|
+
# This method assume that the last character is '(single quotation)
|
554
|
+
# abcd\' -> true
|
555
|
+
# abcd\\' -> false (back slash escape back slash)
|
556
|
+
# abcd\\\' -> true
|
557
|
+
def last_char_escaped?(text)
|
558
|
+
flag = false
|
559
|
+
(text.length - 2).downto(0) do |i|
|
560
|
+
if text[i] == '\\'
|
561
|
+
flag = !flag
|
562
|
+
else
|
563
|
+
break
|
564
|
+
end
|
565
|
+
end
|
566
|
+
flag
|
567
|
+
end
|
568
|
+
|
569
|
+
def remove_leading_zeros(number_string)
|
570
|
+
if number_string.start_with?('0')
|
571
|
+
number_string.sub(/^0*([1-9][0-9]*(\.\d*)?|0(\.\d*)?)$/,'\1')
|
572
|
+
else
|
573
|
+
number_string
|
574
|
+
end
|
575
|
+
end
|
576
|
+
end
|
577
|
+
end
|
578
|
+
class CompatibilityCheck
|
579
|
+
|
580
|
+
class CompatibilityError < StandardError
|
581
|
+
end
|
582
|
+
|
583
|
+
SELECT_QUERY_TMPLT = "SELECT %s"
|
584
|
+
|
585
|
+
def initialize(de_hash, dump_dir=nil)
|
586
|
+
@db_opts = [:host, :port, :username, :password, :database].inject({}) {|h, sym| h[sym] = de_hash[sym.to_s]; h}
|
587
|
+
@dump_dir = dump_dir
|
588
|
+
@errors=[]
|
589
|
+
end
|
590
|
+
|
591
|
+
def check
|
592
|
+
self.methods.grep(/^check_/).each do |m|
|
593
|
+
begin
|
594
|
+
send(m)
|
595
|
+
rescue CompatibilityError => e
|
596
|
+
@errors << e
|
597
|
+
end
|
598
|
+
end
|
599
|
+
print_errors
|
600
|
+
end
|
601
|
+
|
602
|
+
def print_errors
|
603
|
+
return if @errors.empty?
|
604
|
+
puts "There may be some compatibility issues with your MySQL credentials: "
|
605
|
+
@errors.each do |error|
|
606
|
+
puts " * #{error.message}"
|
607
|
+
end
|
608
|
+
raise "Please correct these errors if you wish to run FlyData Sync"
|
609
|
+
end
|
610
|
+
|
611
|
+
def check_mysql_user_compat
|
612
|
+
client = Mysql2::Client.new(@db_opts)
|
613
|
+
grants_sql = "SHOW GRANTS"
|
614
|
+
correct_db = ["ON (\\*|#{@db_opts[:database]})","TO '#{@db_opts[:username]}"]
|
615
|
+
necessary_permission_fields= ["SELECT","RELOAD","LOCK TABLES","REPLICATION SLAVE","REPLICATION CLIENT"]
|
616
|
+
all_privileges_field= ["ALL PRIVILEGES"]
|
617
|
+
result = client.query(grants_sql)
|
618
|
+
# Do not catch MySQL connection problem because check should stop if no MySQL connection can be made.
|
619
|
+
client.close
|
620
|
+
missing_priv = []
|
621
|
+
result.each do |res|
|
622
|
+
# SHOW GRANTS should only return one column
|
623
|
+
res_value = res.values.first
|
624
|
+
if correct_db.all? {|perm| res_value.match(perm)}
|
625
|
+
necessary_permission_fields.each do |priv|
|
626
|
+
missing_priv << priv unless res_value.match(priv)
|
627
|
+
end
|
628
|
+
return true if missing_priv.empty? or all_privileges_field.all? {|d| res_value.match(d)}
|
629
|
+
end
|
630
|
+
end
|
631
|
+
raise CompatibilityError, "The user '#{@db_opts[:username]}' does not have the correct permissions to run FlyData Sync\n * These privileges are missing: #{missing_priv.join(", ")}"
|
632
|
+
end
|
633
|
+
|
634
|
+
def check_mysql_protocol_tcp_compat
|
635
|
+
query = "mysql -u #{@db_opts[:username]} -h #{@db_opts[:host]} -P #{@db_opts[:port]} #{@db_opts[:database]} -e \"SHOW GRANTS;\" --protocol=tcp"
|
636
|
+
query << " -p#{@db_opts[:password]}" unless @db_opts[:password].to_s.empty?
|
637
|
+
|
638
|
+
Open3.popen3(query) do |stdin, stdout, stderr|
|
639
|
+
stdin.close
|
640
|
+
while !stderr.eof?
|
641
|
+
line = stderr.gets
|
642
|
+
unless /Warning: Using a password on the command line interface can be insecure./ === line
|
643
|
+
raise CompatibilityError, "Cannot connect to MySQL database. Please make sure you can connect with this command:\n $ mysql -u #{@db_opts[:username]} -h #{@db_opts[:host]} -P #{@db_opts[:port]} #{@db_opts[:database]} --protocol=tcp -p"
|
644
|
+
end
|
645
|
+
end
|
646
|
+
end
|
647
|
+
end
|
648
|
+
|
649
|
+
def check_mysql_row_mode_compat
|
650
|
+
sys_var_to_check = {'@@binlog_format'=>'ROW', '@@binlog_checksum'=>'NONE', '@@log_bin_use_v1_row_events'=>1}
|
651
|
+
errors={}
|
652
|
+
|
653
|
+
client = Mysql2::Client.new(@db_opts)
|
654
|
+
|
655
|
+
begin
|
656
|
+
sys_var_to_check.each_key do |sys_var|
|
657
|
+
sel_query = SELECT_QUERY_TMPLT % sys_var
|
658
|
+
begin
|
659
|
+
result = client.query(sel_query)
|
660
|
+
unless result.first[sys_var] == sys_var_to_check[sys_var]
|
661
|
+
errors[sys_var]=result.first[sys_var]
|
662
|
+
end
|
663
|
+
rescue Mysql2::Error => e
|
664
|
+
if e.message =~ /Unknown system variable/
|
665
|
+
unless e.message =~ /(binlog_checksum|log_bin_use_v1_row_events)/
|
666
|
+
errors[sys_var] = false
|
667
|
+
end
|
668
|
+
else
|
669
|
+
raise e
|
670
|
+
end
|
671
|
+
end
|
672
|
+
end
|
673
|
+
ensure
|
674
|
+
client.close
|
675
|
+
end
|
676
|
+
unless errors.empty?
|
677
|
+
error_explanation = ""
|
678
|
+
errors.each_key do |err_key|
|
679
|
+
error_explanation << "\n * #{err_key} is #{errors[err_key]} but should be #{sys_var_to_check[err_key]}"
|
680
|
+
end
|
681
|
+
raise CompatibilityError, "These system variable(s) are not the correct value: #{error_explanation}\n Please change these system variables for FlyData Sync to run correctly"
|
682
|
+
end
|
683
|
+
end
|
684
|
+
|
685
|
+
def check_writing_permissions
|
686
|
+
write_errors = []
|
687
|
+
paths_to_check = ["~/.flydata"]
|
688
|
+
paths_to_check << @dump_dir unless @dump_dir.to_s.empty?
|
689
|
+
paths_to_check.each do |path|
|
690
|
+
full_path = File.expand_path(path)
|
691
|
+
full_path = File.dirname(full_path) unless File.directory?(full_path)
|
692
|
+
write_errors << full_path unless File.writable?(full_path)
|
693
|
+
end
|
694
|
+
unless write_errors.empty?
|
695
|
+
error_dir = write_errors.join(", ")
|
696
|
+
raise CompatibilityError, "We cannot access the directories: #{error_dir}"
|
697
|
+
end
|
698
|
+
end
|
699
|
+
end
|
700
|
+
|
701
|
+
class DatabaseSizeCheck
|
702
|
+
include MysqlAccessible
|
703
|
+
|
704
|
+
SIZE_CHECK_QUERY = <<EOT
|
705
|
+
SELECT
|
706
|
+
SUM(data_length) bytesize
|
707
|
+
FROM
|
708
|
+
information_schema.tables
|
709
|
+
WHERE
|
710
|
+
table_schema NOT IN ('information_schema','performance_schema','mysql') AND table_name in (%s);
|
711
|
+
EOT
|
712
|
+
|
713
|
+
def initialize(de_conf)
|
714
|
+
@de_conf = de_conf
|
715
|
+
@tables = de_conf['tables'].split(',')
|
716
|
+
@query = SIZE_CHECK_QUERY % [@tables.collect{|t| "'#{t}'"}.join(',')]
|
717
|
+
end
|
718
|
+
|
719
|
+
def get_db_bytesize
|
720
|
+
client = mysql_cli(@de_conf)
|
721
|
+
result = client.query(@query)
|
722
|
+
return result.first['bytesize'].to_i
|
723
|
+
ensure
|
724
|
+
client.close rescue nil
|
725
|
+
end
|
726
|
+
end
|
727
|
+
end
|
728
|
+
end
|
729
|
+
end
|