flydata 0.0.5.6 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -13,7 +13,7 @@ module Flydata
13
13
  # Start sender(fluentd) process
14
14
  say('Starting sender process.')
15
15
  Dir.chdir(FLYDATA_HOME){
16
- system("fluentd -d #{FLYDATA_HOME}/flydata.pid -l #{FLYDATA_HOME}/flydata.log -c #{FLYDATA_HOME}/flydata.conf")
16
+ system("fluentd -d #{FLYDATA_HOME}/flydata.pid -l #{FLYDATA_HOME}/flydata.log -c #{FLYDATA_HOME}/flydata.conf -p #{File.dirname(__FILE__)}/../fluent-plugins")
17
17
  }
18
18
  sleep 5
19
19
 
@@ -23,7 +23,7 @@ module Flydata
23
23
  data_port = flydata.data_port.get
24
24
  say("Go to your Dashboard! #{flydata.flydata_api_host}/data_ports/#{data_port['id']}")
25
25
  say <<EOF
26
- Please Note: Records and Total Size are updated every 10 minutes. You can download logs one hour after they are generated as they are sent to s3 once per hour.
26
+ Please Note: Records and Total Size are updated every 10-20 minutes.
27
27
  EOF
28
28
  end
29
29
  def stop
@@ -79,8 +79,11 @@ EOF
79
79
  say("Done! Client is ready now.")
80
80
  return true
81
81
  end
82
+ if process_died?
83
+ raise "Client could not been launched. Detail here #{FLYDATA_HOME}/flydata.log"
84
+ end
82
85
  say("Waiting for the client side to become active... (#{i}/#{retry_count})")
83
- sleep 30
86
+ sleep 10
84
87
  end
85
88
  raise "Somthing has gone wrong... Please try setup command again."
86
89
  end
@@ -121,6 +124,10 @@ EOF
121
124
  # Returns true if the process is running
122
125
  `[ -f #{FLYDATA_HOME}/flydata.pid ] && pgrep -P \`cat #{FLYDATA_HOME}/flydata.pid\``.to_i > 0
123
126
  end
127
+ def process_died?
128
+ # Returns true if the process is running
129
+ !!(`tail -n 1 #{FLYDATA_HOME}/flydata.log` =~ /process died within/)
130
+ end
124
131
  def uploaded_successfully?(data_port_id)
125
132
  res = flydata.get("/data_ports/#{data_port_id}/tail.json")
126
133
  res and res['logs'] and res['logs'].size > 0
@@ -27,7 +27,7 @@ module Flydata
27
27
 
28
28
  choice = nil
29
29
  say('Please select your log path for setting log deletion.')
30
- say(" (About log deletion - http://flydata.com/how-to-uninstall-flydata/)")
30
+ say(" (About log deletion - http://docs.hapyrus.com/faq/how-log-deletion-works/)")
31
31
  newline
32
32
  choose do |menu|
33
33
  menu.index = :letter
@@ -1,3 +1,5 @@
1
+ require_relative 'sync'
2
+
1
3
  module Flydata
2
4
  module Command
3
5
  class Setup < Base
@@ -18,12 +20,25 @@ module Flydata
18
20
  end
19
21
 
20
22
  def initial_run
23
+ last_message = nil
21
24
  run do
25
+ Flydata::Command::Conf.new.copy_templates
22
26
  puts
23
- shown = show_registered_redshift_entries
24
- if shown
27
+ shown_redshift_entries = (show_registered_redshift_entries)
28
+ shown_mysql_data_entries = show_registered_redshift_mysql_data_entries
29
+ if shown_redshift_entries
25
30
  Flydata::Command::Sender.new.stop
26
31
  true
32
+ elsif shown_mysql_data_entries
33
+ de = retrieve_data_entries.first
34
+ if File.exists?(Flydata::FileUtil::SyncFileManager.new(de).binlog_path)
35
+ Flydata::Command::Sender.new.stop
36
+ true
37
+ else
38
+ last_message = "\n! NOTE: Initial synchronization of MySQL database is required.\n" +
39
+ " Please run 'flydata sync' to start synchronization."
40
+ false
41
+ end
27
42
  else
28
43
  _run
29
44
  end
@@ -31,6 +46,7 @@ module Flydata
31
46
  puts
32
47
  print_usage
33
48
  puts "Completed setup of FlyData!"
49
+ puts last_message if last_message
34
50
  end
35
51
 
36
52
  def run(&block)
@@ -59,6 +75,13 @@ module Flydata
59
75
  [:redshift, :s3backup, :restart_flydata, :cancel])
60
76
  end
61
77
 
78
+ #### flydata-sync(RedshiftMysqlDataEntry)
79
+ def show_registered_redshift_mysql_data_entries
80
+ show_registered_entries('RedshiftMysqlDataEntry') do |de|
81
+ say(" - #{de['display_name']}: flydata-sync (mysql -> redshift)")
82
+ end
83
+ end
84
+
62
85
  #### redshift backup mode
63
86
  def start_redshift_mode
64
87
  newline
@@ -306,7 +329,7 @@ module Flydata
306
329
  say("** Log deletion setting **")
307
330
  say("Flydata has a log deletion feature that flydata will delete old log archives uploaded by flydata automatically.")
308
331
  say("Flydata will delete logs whose last modified timestamp is 7 days ago.")
309
- say("For more details - http://flydata.com/how-to-uninstall-flydata/")
332
+ say("For more details - http://docs.hapyrus.com/faq/how-log-deletion-works/")
310
333
  ask_yes_no("Set auto log deletion mode?")
311
334
  end
312
335
 
@@ -0,0 +1,962 @@
1
+ require 'msgpack'
2
+ require 'open3'
3
+ require 'flydata/sync_file_manager'
4
+ #require 'ruby-prof'
5
+
6
+ module Flydata
7
+ module Command
8
+ class Sync < Base
9
+ include Helpers
10
+ CREATE_TABLE_OPTION = !!(ENV['FLYDATA_CREATE_TABLE_OPTION']) || false
11
+ INSERT_PROGRESS_INTERVAL = 1000
12
+
13
+ # for dump.pos file
14
+ STATUS_PARSING = 'PARSING'
15
+ STATUS_COMPLETE = 'COMPLETE'
16
+
17
+ def run
18
+ de = retrieve_data_entries.first
19
+ raise "There are no data entry." unless de
20
+ case de['type']
21
+ when 'RedshiftMysqlDataEntry'
22
+ sync_mysql_to_redshift(de)
23
+ else
24
+ raise "No supported data entry. Only mysql-redshift sync is supported."
25
+ end
26
+ end
27
+
28
+ def reset
29
+ de = retrieve_data_entries.first
30
+ sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
31
+ [sync_fm.dump_file_path, sync_fm.dump_pos_path, sync_fm.binlog_path, sync_fm.mysql_table_marshal_dump_path, sync_fm.table_position_file_paths].flatten.each do |path|
32
+ FileUtils.rm(path) if File.exists?(path)
33
+ end
34
+ end
35
+
36
+ def check
37
+ de = retrieve_data_entries.first
38
+ ret = do_check(de)
39
+ if ret['complete']
40
+ puts "No buffer data on FlyData. #{ret.inspect}"
41
+ true
42
+ else
43
+ puts "Now processing data on FlyData. #{ret.inspect}"
44
+ false
45
+ end
46
+ end
47
+
48
+ def complete
49
+ de = retrieve_data_entries.first
50
+ sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
51
+ info = sync_fm.load_dump_pos
52
+ if info[:status] == STATUS_COMPLETE
53
+ sync_fm.save_binlog(info[:binlog_pos])
54
+ Flydata::Command::Sender.new.start
55
+ else
56
+ raise "Initial sync status is not complete. Try running 'flydata sync'."
57
+ end
58
+ end
59
+
60
+ # skip initial sync
61
+ def skip
62
+ de = retrieve_data_entries.first
63
+ sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
64
+ binlog_path = sync_fm.binlog_path
65
+ `touch #{binlog_path}`
66
+ puts "Created an empty binlog position file."
67
+ puts "-> #{binlog_path}"
68
+ puts "Run 'flydata start' to start continuous sync."
69
+ end
70
+
71
+ private
72
+
73
+ def do_check(de)
74
+ flydata.data_entry.buffer_stat(de['id'], env_mode)
75
+ end
76
+
77
+ def sync_mysql_to_redshift(de)
78
+ dp = flydata.data_port.get
79
+ sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
80
+
81
+ # Check client condition
82
+ if File.exists?(sync_fm.binlog_path)
83
+ raise "Already synchronized. If you want to do initial sync, delete #{sync_fm.binlog_path}."
84
+ end
85
+
86
+ # Copy template if not exists
87
+ unless Flydata::Preference::DataEntryPreference.conf_exists?(de)
88
+ Flydata::Command::Conf.new.copy_templates
89
+ end
90
+
91
+ if generate_mysqldump(de, sync_fm)
92
+ parse_mysqldump(dp, de, sync_fm)
93
+ end
94
+ end
95
+
96
+ def generate_mysqldump(de, sync_fm, overwrite = false)
97
+
98
+ # validate parameter
99
+ %w(host username database).each do |k|
100
+ if de['mysql_data_entry_preference'][k].to_s.empty?
101
+ raise "'#{k}' is required. Set the value in the conf file " +
102
+ "-> #{Flydata::Preference::DataEntryPreference.conf_path(de)}"
103
+ end
104
+ end
105
+
106
+ puts "Running mysqldump... host:#{de['mysql_data_entry_preference']['host']} " +
107
+ "username:#{de['mysql_data_entry_preference']['username']} " +
108
+ "database:#{de['mysql_data_entry_preference']['database']}"
109
+ if de['mysql_data_entry_preference']['data_servers']
110
+ puts "Send to Custom Data Servers: #{de['mysql_data_entry_preference']['data_servers']}"
111
+ end
112
+
113
+ if de['mysql_data_entry_preference']['tables']
114
+ puts " target tables: #{de['mysql_data_entry_preference']['tables']}"
115
+ else
116
+ puts " target tables: <all-tables>"
117
+ end
118
+
119
+ fp = sync_fm.dump_file_path
120
+ if File.exists?(fp) and File.size(fp) > 0 and not overwrite
121
+ puts " -> Skip"
122
+ return fp
123
+ end
124
+
125
+ puts "[Confirm] mysqldump path: #{fp}"
126
+ if ask_yes_no('OK?')
127
+ Flydata::Mysql::MysqlDumpGenerator.new(de['mysql_data_entry_preference']).dump(fp)
128
+ else
129
+ newline
130
+ puts "You can change the mysqldump path with 'mysqldump_path' in the conf file."
131
+ puts "Edit '#{Flydata::Preference::DataEntryPreference.conf_path(de)}'"
132
+ return nil
133
+ end
134
+ puts " -> Done"
135
+ fp
136
+ end
137
+
138
+ # Checkpoint
139
+ # -- CHANGE MASTER TO MASTER_LOG_FILE='mysql-bin.000215', MASTER_LOG_POS=120;
140
+ # <- checkpoint(after binlog)
141
+ #...
142
+ #CREATE TABLE `accounts` (
143
+ #...
144
+ #) ENGINE=InnoDB AUTO_INCREMENT=71 DEFAULT CHARSET=utf8;
145
+ # <- checkpoint(after create table)
146
+ #INSERT INTO `accounts` values (x,x,x),(y,y,y),....();
147
+ #INSERT INTO `accounts` values (x,x,x),(y,y,y),....();
148
+ #INSERT INTO `accounts` values (x,x,x),(y,y,y),....();
149
+ # <- checkpoint(when buffered data is sent to server)
150
+ #INSERT INTO `accounts` values (x,x,x),(y,y,y),....();
151
+ #INSERT INTO `accounts` values (x,x,x),(y,y,y),....();
152
+ #
153
+ #...
154
+ #UNLOCK TABLES;
155
+ # <- checkpoint
156
+ #...
157
+ #CREATE TABLE ...
158
+ def parse_mysqldump(dp, de, sync_fm)
159
+ puts "Parsing mysqldump file..."
160
+
161
+ # Prepare forwarder
162
+ de_tag_name = de["tag_name#{env_suffix}"]
163
+ server_port = dp['server_port']
164
+ servers = if de['mysql_data_entry_preference']['data_servers']
165
+ de['mysql_data_entry_preference']['data_servers'].split(',')
166
+ else
167
+ dp["servers#{env_suffix}"].collect{|s| "#{s}:#{server_port}"}
168
+ end
169
+ forwarder_type = de['mysql_data_entry_preference']['forwarder'] ||
170
+ (dp['ssl_enabled'] ? 'sslforwarder' : 'tcpforwarder')
171
+ forwarder = Flydata::Output::ForwarderFactory.create(forwarder_type, de_tag_name, servers)
172
+
173
+ # Load dump.pos file for resume
174
+ dump_pos_info = sync_fm.load_dump_pos
175
+ option = dump_pos_info || {}
176
+ if option[:table_name]
177
+ puts "Resuming... Last processed table: #{option[:table_name]}"
178
+ end
179
+
180
+ bench_start_time = Time.now
181
+
182
+ # Start parsing dump file
183
+ tmp_num_inserted_record = 0
184
+ dump_fp = sync_fm.dump_file_path
185
+ dump_file_size = File.size(dump_fp)
186
+ binlog_pos = Flydata::Mysql::MysqlDumpParser.new(dump_fp, option).parse(
187
+ # create table
188
+ Proc.new { |mysql_table|
189
+ redshift_table = Flydata::Mysql::RedshiftTableAdapter.new(mysql_table)
190
+ mysql_table.set_adapter(:redshift, redshift_table)
191
+
192
+ tmp_num_inserted_record = 0
193
+
194
+ if CREATE_TABLE_OPTION
195
+ print "- Creating table: #{redshift_table.table_name}"
196
+ sql = redshift_table.create_table_sql
197
+ ret = flydata.redshift_cluster.run_query(sql)
198
+ if ret['message'].index('ERROR:')
199
+ if ret['message'].index('already exists')
200
+ puts " -> Skip"
201
+ else
202
+ raise "Failed to create table. error=#{ret['message']}"
203
+ end
204
+ else
205
+ puts " -> OK"
206
+ end
207
+ else
208
+ puts "- Parsing table: #{mysql_table.table_name}"
209
+ end
210
+
211
+ # dump mysql_table for resume
212
+ sync_fm.save_mysql_table_marshal_dump(mysql_table)
213
+ },
214
+ # insert record
215
+ Proc.new { |mysql_table, values_set|
216
+ mysql_table_name = mysql_table.table_name
217
+ records = values_set.collect do |values|
218
+ json = generate_json(mysql_table, values)
219
+ {table_name: mysql_table_name, log: json}
220
+ end
221
+ ret = forwarder.emit(records)
222
+ tmp_num_inserted_record += 1
223
+ print '.'
224
+ ret
225
+ },
226
+ # checkpoint
227
+ Proc.new { |mysql_table, last_pos, binlog_pos, state, substate|
228
+ # flush if buffer records exist
229
+ if tmp_num_inserted_record > 0 && forwarder.buffer_record_count > 0
230
+ puts
231
+ forwarder.flush # send buffer data to the server before checkpoint
232
+ end
233
+
234
+ # show the current progress
235
+ puts " #{(last_pos.to_f/dump_file_size * 100).round(1)}% (#{last_pos}/#{dump_file_size}) #{Time.now.to_i - bench_start_time.to_i}sec"
236
+
237
+ # save check point
238
+ table_name = mysql_table.nil? ? '' : mysql_table.table_name
239
+ sync_fm.save_dump_pos(STATUS_PARSING, table_name, last_pos, binlog_pos, state, substate)
240
+ }
241
+ )
242
+ forwarder.close
243
+
244
+ if ENV['FLYDATA_BENCHMARK']
245
+ puts "Done!"
246
+ bench_end_time = Time.now
247
+ elapsed_time = bench_end_time.to_i - bench_start_time.to_i
248
+ puts "Elapsed:#{elapsed_time}sec start:#{bench_start_time} end:#{bench_end_time}"
249
+ return true
250
+ end
251
+
252
+ # wait until finish
253
+ puts "Start waiting until all data is processed on FlyData..."
254
+ sleep 10
255
+ until check
256
+ sleep 10
257
+ end
258
+
259
+ sync_fm.save_dump_pos(STATUS_COMPLETE, '', dump_file_size, binlog_pos)
260
+ puts "Congratulations! All data is processed on FlyData. Please check tables and data on your Redshift Cluster."
261
+ puts "After checking, run 'flydata sync:complete' to start continuously synchronization."
262
+ end
263
+
264
+ def generate_json(mysql_table, values)
265
+ h = {}
266
+ mysql_table.columns.each_key.with_index do |k, i|
267
+ h[k] = values[i]
268
+ end
269
+ h.to_json
270
+ end
271
+ end
272
+ end
273
+
274
+ module Output
275
+ class ForwarderFactory
276
+
277
+ def self.create(forwarder_key, tag, servers, options = {})
278
+ case forwarder_key
279
+ when nil, "tcpforwarder"
280
+ puts "Creating TCP connection"
281
+ forward = TcpForwarder.new(tag, servers, options)
282
+ when "sslforwarder"
283
+ puts "Creating SSL connection"
284
+ forward = SslForwarder.new(tag, servers, options)
285
+ else
286
+ raise "Unsupported Forwarding type #{forwarder_key}"
287
+ end
288
+ forward
289
+ end
290
+
291
+ end
292
+ class TcpForwarder
293
+ FORWARD_HEADER = [0x92].pack('C')
294
+ BUFFER_SIZE = 1024 * 1024 * 32 # 32M
295
+ DEFUALT_SEND_TIMEOUT = 60 # 1 minute
296
+ RETRY_INTERVAL = 2
297
+ RETRY_LIMIT = 10
298
+
299
+ def initialize(tag, servers, options = {})
300
+ @tag = tag
301
+ unless servers and servers.kind_of?(Array) and not servers.empty?
302
+ raise "Servers must not be empty."
303
+ end
304
+ @servers = servers
305
+ @server_index = 0
306
+ set_options(options)
307
+ reset
308
+ end
309
+
310
+ def set_options(options)
311
+ if options[:buffer_size_limit]
312
+ @buffer_size_limit = options[:buffer_size_limit]
313
+ else
314
+ @buffer_size_limit = BUFFER_SIZE
315
+ end
316
+ end
317
+
318
+ attr_reader :buffer_record_count, :buffer_size
319
+
320
+ def emit(records, time = Time.now.to_i)
321
+ records = [records] unless records.kind_of?(Array)
322
+ records.each do |record|
323
+ event_data = [time,record].to_msgpack
324
+ @buffer_records << event_data
325
+ @buffer_record_count += 1
326
+ @buffer_size += event_data.bytesize
327
+ end
328
+ if @buffer_size > @buffer_size_limit
329
+ send
330
+ else
331
+ false
332
+ end
333
+ end
334
+
335
+ #TODO retry logic
336
+ def send
337
+ if @buffer_size > 0
338
+ puts " -> Sending #{@buffer_record_count}records #{@buffer_size}byte"
339
+ else
340
+ return false
341
+ end
342
+ if ENV['FLYDATA_BENCHMARK']
343
+ reset
344
+ return true
345
+ end
346
+ sock = nil
347
+ retry_count = 0
348
+ begin
349
+ sock = connect(pickup_server)
350
+
351
+ # Write header
352
+ sock.write FORWARD_HEADER
353
+ # Write tag
354
+ sock.write @tag.to_msgpack
355
+ # Write records
356
+ sock.write [0xdb, @buffer_records.bytesize].pack('CN')
357
+ StringIO.open(@buffer_records) do |i|
358
+ FileUtils.copy_stream(i, sock)
359
+ end
360
+ rescue => e
361
+ retry_count += 1
362
+ if retry_count > RETRY_LIMIT
363
+ puts "! Error: Failed to send data. Exceeded the retry limit. retry_count:#{retry_count}"
364
+ raise e
365
+ end
366
+ puts "! Warn: Retring to send data. retry_count:#{retry_count} error=#{e.to_s}"
367
+ wait_time = RETRY_INTERVAL ** retry_count
368
+ puts " Now waiting for next retry. time=#{wait_time}sec"
369
+ sleep wait_time
370
+ retry
371
+ ensure
372
+ if sock
373
+ sock.close rescue nil
374
+ end
375
+ end
376
+ reset
377
+ true
378
+ end
379
+
380
+ #TODO: Check server status
381
+ def pickup_server
382
+ ret_server = @servers[@server_index]
383
+ @server_index += 1
384
+ if @server_index >= (@servers.count)
385
+ @server_index = 0
386
+ end
387
+ ret_server
388
+ end
389
+
390
+ def connect(server)
391
+ host, port = server.split(':')
392
+ sock = TCPSocket.new(host, port.to_i)
393
+
394
+ # Set options
395
+ opt = [1, DEFUALT_SEND_TIMEOUT].pack('I!I!')
396
+ sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
397
+ opt = [DEFUALT_SEND_TIMEOUT, 0].pack('L!L!')
398
+ sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
399
+
400
+ sock
401
+ end
402
+
403
+ def reset
404
+ @buffer_records = ''
405
+ @buffer_record_count = 0
406
+ @buffer_size = 0
407
+ end
408
+
409
+ def flush
410
+ send
411
+ end
412
+
413
+ def close
414
+ flush
415
+ end
416
+ end
417
+ class SslForwarder < TcpForwarder
418
+ def connect(server)
419
+ tcp_sock = super
420
+ ssl_ctx = ssl_ctx_with_verification
421
+ ssl_sock = OpenSSL::SSL::SSLSocket.new(tcp_sock, ssl_ctx)
422
+ ssl_sock.sync_close = true
423
+ ssl_sock.connect
424
+ ssl_sock
425
+ end
426
+
427
+ private
428
+ def ssl_ctx_with_verification
429
+ cert_store = OpenSSL::X509::Store.new
430
+ cert_store.set_default_paths
431
+ ssl_ctx = OpenSSL::SSL::SSLContext.new
432
+ ssl_ctx.verify_mode = OpenSSL::SSL::VERIFY_PEER
433
+ ssl_ctx.cert_store = cert_store
434
+ ssl_ctx
435
+ end
436
+ end
437
+ end
438
+
439
+ module Redshift
440
+ module Util
441
+ MAX_TABLENAME_LENGTH = 127
442
+ REDSHIFT_RESERVED_WORDS = %w[
443
+ aes128 aes256 all allowoverwrite analyse analyze and any array
444
+ as asc authorization backup between binary blanksasnull both
445
+ bytedict case cast check collate column constraint create
446
+ credentials cross current_date current_time current_timestamp
447
+ current_user current_user_id default deferrable deflate defrag
448
+ delta delta32k desc disable distinct do else emptyasnull enable
449
+ encode encrypt encryption end except explicit false for foreign
450
+ freeze from full globaldict256 globaldict64k grant group gzip having
451
+ identity ignore ilike in initially inner intersect into is isnull
452
+ join leading left like limit localtime localtimestamp lun luns
453
+ minus mostly13 mostly32 mostly8 natural new not notnull null nulls
454
+ off offline offset old on only open or order outer overlaps parallel
455
+ partition percent placing primary raw readratio recover references
456
+ rejectlog resort restore right select session_user similar some
457
+ sysdate system table tag tdes text255 text32k then to top trailing
458
+ true truncatecolumns union unique user using verbose wallet when
459
+ where with without]
460
+ # Create a symbol-keyed hash for performance
461
+ REDSHIFT_RESERVED_WORDS_HASH = REDSHIFT_RESERVED_WORDS.inject({}) {|h, word| h[word.to_sym] = true; h}
462
+
463
+ REDSHIFT_SYSTEM_COLUMNS = %w[oid tableoid xmin cmin xmax cmax ctid]
464
+ REDSHIFT_SYSTEM_COLUMNS_HASH = REDSHIFT_SYSTEM_COLUMNS.inject({}) {|h, word| h[word.to_sym] = true; h}
465
+
466
+ def convert_to_valid_name(key, type = :table)
467
+ @memo ||= { table:{}, column:{} }
468
+ key_sym = key.to_sym
469
+ return @memo[type][key_sym] if @memo[type][key_sym]
470
+
471
+ name = key.downcase.gsub(/[^a-z0-9_$]/, '_')
472
+ name = "_#{name}" if is_redshift_reserved_word?(name, type) or name =~ /^[0-9$]/
473
+ if name.length > MAX_TABLENAME_LENGTH
474
+ name = nil
475
+ end
476
+ @memo[key_sym] = name
477
+ name
478
+ end
479
+
480
+ def is_redshift_reserved_word?(name, type = :table)
481
+ return false unless name
482
+ return true if REDSHIFT_RESERVED_WORDS_HASH[name.to_sym] == true
483
+
484
+ case type
485
+ when :table
486
+ false
487
+ when :column
488
+ REDSHIFT_SYSTEM_COLUMNS_HASH[name.to_sym] == true
489
+ else
490
+ false
491
+ end
492
+ end
493
+ end
494
+ end
495
+
496
+ module Mysql
497
+ class MysqlTable
498
+ def initialize(table_name, columns = {}, primary_keys = [])
499
+ @table_name = table_name
500
+ @columns = columns
501
+ @primary_keys = primary_keys
502
+ @adapters = {}
503
+ end
504
+
505
+ attr_accessor :table_name, :columns, :primary_keys
506
+
507
+ def add_column(column)
508
+ @columns[column[:column_name]] = column
509
+ end
510
+
511
+ def set_adapter(key, adapter)
512
+ @adapters[key] = adapter
513
+ end
514
+
515
+ def adapter(key)
516
+ @adapters[key]
517
+ end
518
+ end
519
+
520
+ class RedshiftTableAdapter
521
+ include Flydata::Redshift::Util
522
+ def initialize(mysql_table)
523
+ @table_name = convert_to_valid_name(mysql_table.table_name)
524
+ set_columns(mysql_table.columns)
525
+ @primary_keys = mysql_table.primary_keys
526
+ end
527
+
528
+ attr_reader :table_name, :columns, :primary_keys
529
+
530
+ def create_table_sql
531
+ col_def = @columns.inject([]) { |list, (cn, column)|
532
+ list << build_column_def(column)
533
+ list
534
+ }
535
+ if @primary_keys.count > 0
536
+ col_def << "primary key (#{@primary_keys.join(',')})"
537
+ end
538
+ <<EOT
539
+ CREATE TABLE #{@table_name} (#{col_def.join(',')});
540
+ EOT
541
+ end
542
+
543
+ private
544
+
545
+ def set_columns(columns)
546
+ @columns = {}
547
+ columns.each do |k, column|
548
+ new_k = convert_to_valid_name(k, :column)
549
+ new_column = column.dup
550
+ new_column[:column_name] = new_k
551
+ @columns[new_k] = convert_column_format_type(new_column)
552
+ end
553
+ end
554
+
555
+ # Mysql Field Types
556
+ # http://help.scibit.com/mascon/masconMySQL_Field_Types.html
557
+ def convert_column_format_type(column)
558
+ ret_c = {}.merge(column)
559
+ ret_c.delete(:format_type_str)
560
+ ret_c[:format_type] = case column[:format_type]
561
+ when 'tinyint'
562
+ 'smallint'
563
+ when 'smallint'
564
+ column[:unsigned] ? 'integer' : 'smallint'
565
+ when 'mediumint'
566
+ 'integer'
567
+ when 'int', 'integer'
568
+ column[:unsigned] ? 'bigint' : 'integer'
569
+ when 'bigint'
570
+ # max unsigned bigint is 18446744073709551615
571
+ column[:unsigned] ? 'decimal(20,0)' : 'bigint'
572
+ when 'float'
573
+ 'real'
574
+ when 'double', 'double precision', 'real'
575
+ 'double precision'
576
+ when 'decimal', 'numeric'
577
+ ret_c[:format_type_str] = "decimal(#{column[:decimal_precision]},#{column[:decimal_scale]})"
578
+ 'decimal'
579
+ when 'date'
580
+ 'date'
581
+ when 'datetime'
582
+ 'timestamp'
583
+ when 'time'
584
+ 'timestamp' #TODO: redshift does not support time only column type
585
+ when 'year'
586
+ 'smallint'
587
+ when 'char'
588
+ ret_c[:format_type_str] = "char(#{column[:format_size]})"
589
+ 'char'
590
+ when 'varchar'
591
+ ret_c[:format_type_str] = "varchar(#{column[:format_size]})"
592
+ 'varchar'
593
+ when 'tinyblob','tinytext'
594
+ ret_c[:format_size] = 255
595
+ ret_c[:format_type_str] = "varchar(#{ret_c[:format_size]})"
596
+ 'varchar'
597
+ when 'blob','text', 'mediumblob', 'mediumtext', 'longblob', 'longtext'
598
+ ret_c[:format_size] = 65535 #TODO: review
599
+ ret_c[:format_type_str] = "varchar(#{ret_c[:format_size]})"
600
+ 'varchar'
601
+ else
602
+ #TODO: discuss
603
+ 'varchar'
604
+ end
605
+ ret_c
606
+ end
607
+
608
+ def build_column_def(column)
609
+ format_type = column[:format_type]
610
+ format_type = column[:format_type_str] if column[:format_type_str]
611
+ def_str = "#{column[:column_name]} #{format_type}"
612
+ if column[:not_null]
613
+ def_str << " not null"
614
+ elsif column.has_key?(:default)
615
+ val = column[:default]
616
+ val = val.nil? ? 'null' : "'#{val}'"
617
+ def_str << " default #{val}"
618
+ end
619
+ def_str
620
+ end
621
+
622
+ end
623
+
624
+ class MysqlDumpGenerator
625
+ # host, port, username, password, database, tables
626
+ MYSQL_DUMP_CMD_TEMPLATE = "mysqldump -h %s -P %s -u%s %s --skip-lock-tables --single-transaction --flush-logs --hex-blob --master-data=2 %s %s"
627
+ def initialize(conf)
628
+ password = conf['password'].to_s.empty? ? "" : "-p#{conf['password']}"
629
+ tables = if conf['tables']
630
+ conf['tables'].split(',').join(' ')
631
+ else
632
+ ''
633
+ end
634
+ @dump_cmd = MYSQL_DUMP_CMD_TEMPLATE %
635
+ [conf['host'], conf['port'], conf['username'], password, conf['database'], tables]
636
+ end
637
+ def dump(file_path)
638
+ cmd = "#{@dump_cmd} > #{file_path}"
639
+ o, e, s = Open3.capture3(cmd)
640
+ e.to_s.each_line {|l| puts l unless /^Warning:/ =~ l } unless e.to_s.empty?
641
+ unless s.exitstatus == 0
642
+ if File.exists?(file_path)
643
+ File.open(file_path, 'r') {|f| f.each_line{|l| puts l}}
644
+ FileUtils.rm(file_path)
645
+ end
646
+ raise "Failed to run mysqldump command."
647
+ end
648
+ unless File.exists?(file_path)
649
+ raise "mysqldump file does not exist. Something wrong..."
650
+ end
651
+ if File.size(file_path) == 0
652
+ raise "mysqldump file is empty. Something wrong..."
653
+ end
654
+ true
655
+ end
656
+ end
657
+
658
+ class MysqlDumpParser
659
+
660
+ module State
661
+ START = 'START'
662
+ CREATE_TABLE = 'CREATE_TABLE'
663
+ CREATE_TABLE_COLUMNS = 'CREATE_TABLE_COLUMNS'
664
+ CREATE_TABLE_CONSTRAINTS = 'CREATE_TABLE_CONSTRAINTS'
665
+ INSERT_RECORD = 'INSERT_RECORD'
666
+ PARSING_INSERT_RECORD = 'PARSING_INSERT_RECORD'
667
+ end
668
+
669
+ attr_accessor :binlog_pos
670
+
671
+ def initialize(file_path, option = {})
672
+ @file_path = file_path
673
+ raise "Dump file does not exist. file_path:#{file_path}" unless File.exist?(file_path)
674
+ @binlog_pos = option[:binlog_pos]
675
+ @option = option
676
+ end
677
+
678
+ def parse(create_table_block, insert_record_block, check_point_block)
679
+ invalid_file = false
680
+ current_state = State::START
681
+ substate = nil
682
+
683
+ state_start = Proc.new do |f|
684
+ line = f.readline.strip
685
+ # -- CHANGE MASTER TO MASTER_LOG_FILE='mysql-bin.000002', MASTER_LOG_POS=120;
686
+ m = /^\-\- CHANGE MASTER TO MASTER_LOG_FILE='(?<binfile>[^']+)', MASTER_LOG_POS=(?<pos>\d+)/.match(line)
687
+ if m
688
+ @binlog_pos = {binfile: m[:binfile], pos: m[:pos].to_i}
689
+ current_state = State::CREATE_TABLE
690
+ check_point_block.call(nil, f.pos, @binlog_pos, current_state)
691
+ end
692
+ end
693
+
694
+ current_table = nil
695
+ state_create_table = Proc.new do |f|
696
+ line = f.readline.strip
697
+ # CREATE TABLE `active_admin_comments` (
698
+ m = /^CREATE TABLE `(?<table_name>[^`]+)`/.match(line)
699
+ if m
700
+ current_table = MysqlTable.new(m[:table_name])
701
+ current_state = State::CREATE_TABLE_COLUMNS
702
+ end
703
+ end
704
+
705
+ state_create_table_constraints = Proc.new do |f|
706
+ line = f.readline.strip
707
+ # PRIMARY KEY (`id`),
708
+ if line.start_with?(')')
709
+ create_table_block.call(current_table)
710
+ current_state = State::INSERT_RECORD
711
+ check_point_block.call(current_table, f.pos, @binlog_pos, current_state)
712
+ elsif m = /^PRIMARY KEY \((?<primary_keys>[^\)]+)\)/.match(line)
713
+ current_table.primary_keys = m[:primary_keys].split(',').collect do |pk_str|
714
+ pk_str[1..-2]
715
+ end
716
+ end
717
+ end
718
+
719
+ state_create_table_columns = Proc.new do |f|
720
+ start_pos = f.pos
721
+ line = f.readline.strip
722
+ # `author_type` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
723
+ if line.start_with?("\`")
724
+ column = {}
725
+
726
+ # parse column line
727
+ line = line[0..-2] if line.end_with?(',')
728
+ items = line.split
729
+ column[:column_name] = items.shift[1..-2]
730
+ column[:format_type_str] = format_type_str = items.shift
731
+ pos = format_type_str.index('(')
732
+ if pos
733
+ ft = column[:format_type] = format_type_str[0..pos-1]
734
+ if ft == 'decimal'
735
+ precision, scale = format_type_str[pos+1..-2].split(',').collect{|v| v.to_i}
736
+ column[:decimal_precision] = precision
737
+ column[:decimal_scale] = scale
738
+ else
739
+ column[:format_size] = format_type_str[pos+1..-2].to_i
740
+ end
741
+ else
742
+ column[:format_type] = format_type_str
743
+ end
744
+ while (item = items.shift) do
745
+ case item
746
+ when 'DEFAULT'
747
+ value = items.shift
748
+ value = value.start_with?('\'') ? value[1..-2] : value
749
+ value = nil if value == 'NULL'
750
+ column[:default] = value
751
+ when 'NOT'
752
+ if items[1] == 'NULL'
753
+ items.shift
754
+ column[:not_null] = true
755
+ end
756
+ when 'unsigned'
757
+ column[:unsigned] = true
758
+ else
759
+ #ignore other options
760
+ end
761
+ end
762
+
763
+ current_table.add_column(column)
764
+ else
765
+ current_state = State::CREATE_TABLE_CONSTRAINTS
766
+ f.pos = start_pos
767
+ state_create_table_constraints.call(f)
768
+ end
769
+ end
770
+
771
+ state_insert_record = Proc.new do |f|
772
+ original_pos = f.pos
773
+ command = f.read(6)
774
+ if command == 'INSERT'
775
+ current_state = State::PARSING_INSERT_RECORD
776
+ else
777
+ f.pos = original_pos
778
+ f.readline
779
+ if command == 'UNLOCK'
780
+ current_state = State::CREATE_TABLE
781
+ check_point_block.call(current_table, f.pos, @binlog_pos, current_state)
782
+ end
783
+ end
784
+ end
785
+
786
+ state_parsing_insert_record = Proc.new do |f|
787
+ values_set = InsertParser.new(f).parse
788
+ current_state = State::INSERT_RECORD
789
+ if insert_record_block.call(current_table, values_set)
790
+ check_point_block.call(current_table, f.pos, @binlog_pos, current_state)
791
+ end
792
+ end
793
+
794
+ # Start reading file from top
795
+ File.open(@file_path, 'r') do |f|
796
+ last_saved_pos = 0
797
+
798
+ # resume
799
+ if @option[:last_pos]
800
+ f.pos = @option[:last_pos].to_i
801
+ current_state = @option[:state]
802
+ substate = @option[:substate]
803
+ current_table = @option[:mysql_table]
804
+ end
805
+
806
+ until f.eof? do
807
+ case current_state
808
+ when State::START
809
+ state_start.call(f)
810
+ when State::CREATE_TABLE
811
+ state_create_table.call(f)
812
+ when State::CREATE_TABLE_COLUMNS
813
+ state_create_table_columns.call(f)
814
+ when State::CREATE_TABLE_CONSTRAINTS
815
+ state_create_table_constraints.call(f)
816
+ when State::INSERT_RECORD
817
+ state_insert_record.call(f)
818
+ when State::PARSING_INSERT_RECORD
819
+ state_parsing_insert_record.call(f)
820
+ end
821
+ end
822
+ end
823
+ @binlog_pos
824
+ end
825
+
826
+ class InsertParser
827
+ #INSERT INTO `data_entries` VALUES (2,2,'access_log'), (2,3,'access_log2');
828
+ module State
829
+ IN_VALUE = 'IN_VALUE'
830
+ NEXT_VALUES = 'NEXT_VALUES'
831
+ end
832
+
833
+ def initialize(file)
834
+ @file = file
835
+ @values = []
836
+ @values_set = []
837
+ end
838
+
839
+ def start_ruby_prof
840
+ RubyProf.start if defined?(RubyProf) and not RubyProf.running?
841
+ end
842
+
843
+ def stop_ruby_prof
844
+ if defined?(RubyProf) and RubyProf.running?
845
+ result = RubyProf.stop
846
+ #printer = RubyProf::GraphPrinter.new(result)
847
+ printer = RubyProf::GraphHtmlPrinter.new(result)
848
+ #printer.print(STDOUT)
849
+ printer.print(File.new("ruby-prof-out-#{Time.now.to_i}.html", "w"), :min_percent => 3)
850
+ end
851
+ end
852
+
853
+ def parse
854
+ start_ruby_prof
855
+ bench_start_time = Time.now
856
+ target_line = @file.readline
857
+ _parse(target_line)
858
+ ensure
859
+ stop_ruby_prof
860
+ if ENV['FLYDATA_BENCHMARK']
861
+ puts " -> time:#{Time.now.to_f - bench_start_time.to_f} size:#{target_line.size}"
862
+ end
863
+ end
864
+
865
+ private
866
+
867
+ def _parse(target_line)
868
+ target_line = target_line.strip
869
+ start_index = target_line.index('(')
870
+ target_line = target_line[start_index..-2]
871
+ items = target_line.split(',')
872
+ index = 0
873
+ cur_state = State::NEXT_VALUES
874
+
875
+ loop do
876
+ case cur_state
877
+ when State::NEXT_VALUES
878
+ chars = items[index]
879
+ break unless chars
880
+ items[index] = chars[1..-1]
881
+ cur_state = State::IN_VALUE
882
+ when State::IN_VALUE
883
+ chars = items[index]
884
+ index += 1
885
+ if chars.start_with?("'")
886
+ # single item (not last item)
887
+ if chars.end_with?("'") and !last_char_escaped?(chars)
888
+ @values << replace_escape_char(chars[1..-2])
889
+ # single item (last item)
890
+ elsif chars.end_with?("')") and !last_char_escaped?(chars[0..-2])
891
+ @values << replace_escape_char(chars[1..-3])
892
+ @values_set << @values
893
+ @values = []
894
+ cur_state = State::NEXT_VALUES
895
+ # multi items
896
+ else
897
+ cur_value = chars[1..-1]
898
+ loop do
899
+ next_chars = items[index]
900
+ index += 1
901
+ if next_chars.end_with?('\'') and !last_char_escaped?(next_chars)
902
+ cur_value << ','
903
+ cur_value << next_chars[0..-2]
904
+ @values << replace_escape_char(cur_value)
905
+ break
906
+ elsif next_chars.end_with?("')") and !last_char_escaped?(next_chars[0..-2])
907
+ cur_value << ','
908
+ cur_value << next_chars[0..-3]
909
+ @values << replace_escape_char(cur_value)
910
+ @values_set << @values
911
+ @values = []
912
+ cur_state = State::NEXT_VALUES
913
+ break
914
+ else
915
+ cur_value << ','
916
+ cur_value << next_chars
917
+ end
918
+ end
919
+ end
920
+ else
921
+ if chars.end_with?(')')
922
+ chars = chars[0..-2]
923
+ @values << (chars == 'NULL' ? nil : chars)
924
+ @values_set << @values
925
+ @values = []
926
+ cur_state = State::NEXT_VALUES
927
+ else
928
+ @values << (chars == 'NULL' ? nil : chars)
929
+ end
930
+ end
931
+ else
932
+ raise "Invalid state: #{cur_state}"
933
+ end
934
+ end
935
+ return @values_set
936
+ end
937
+
938
+ ESCAPE_HASH_TABLE = {"\\\\" => "\\", "\\'" => "'", "\\n" => "\n", "\\r" => "\r"}
939
+
940
+ def replace_escape_char(original)
941
+ original.gsub(/\\\\|\\'|\\n|\\r/, ESCAPE_HASH_TABLE)
942
+ end
943
+
944
+ # This method assume that the last character is '(single quotation)
945
+ # abcd\' -> true
946
+ # abcd\\' -> false (back slash escape back slash)
947
+ # abcd\\\' -> true
948
+ def last_char_escaped?(text)
949
+ flag = false
950
+ (text.length - 2).downto(0) do |i|
951
+ if text[i] == '\\'
952
+ flag = !flag
953
+ else
954
+ break
955
+ end
956
+ end
957
+ flag
958
+ end
959
+ end
960
+ end
961
+ end
962
+ end