flydata 0.0.5.6 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,7 +13,7 @@ module Flydata
13
13
  # Start sender(fluentd) process
14
14
  say('Starting sender process.')
15
15
  Dir.chdir(FLYDATA_HOME){
16
- system("fluentd -d #{FLYDATA_HOME}/flydata.pid -l #{FLYDATA_HOME}/flydata.log -c #{FLYDATA_HOME}/flydata.conf")
16
+ system("fluentd -d #{FLYDATA_HOME}/flydata.pid -l #{FLYDATA_HOME}/flydata.log -c #{FLYDATA_HOME}/flydata.conf -p #{File.dirname(__FILE__)}/../fluent-plugins")
17
17
  }
18
18
  sleep 5
19
19
 
@@ -23,7 +23,7 @@ module Flydata
23
23
  data_port = flydata.data_port.get
24
24
  say("Go to your Dashboard! #{flydata.flydata_api_host}/data_ports/#{data_port['id']}")
25
25
  say <<EOF
26
- Please Note: Records and Total Size are updated every 10 minutes. You can download logs one hour after they are generated as they are sent to s3 once per hour.
26
+ Please Note: Records and Total Size are updated every 10-20 minutes.
27
27
  EOF
28
28
  end
29
29
  def stop
@@ -79,8 +79,11 @@ EOF
79
79
  say("Done! Client is ready now.")
80
80
  return true
81
81
  end
82
+ if process_died?
83
+ raise "Client could not been launched. Detail here #{FLYDATA_HOME}/flydata.log"
84
+ end
82
85
  say("Waiting for the client side to become active... (#{i}/#{retry_count})")
83
- sleep 30
86
+ sleep 10
84
87
  end
85
88
  raise "Somthing has gone wrong... Please try setup command again."
86
89
  end
@@ -121,6 +124,10 @@ EOF
121
124
  # Returns true if the process is running
122
125
  `[ -f #{FLYDATA_HOME}/flydata.pid ] && pgrep -P \`cat #{FLYDATA_HOME}/flydata.pid\``.to_i > 0
123
126
  end
127
+ def process_died?
128
+ # Returns true if the process is running
129
+ !!(`tail -n 1 #{FLYDATA_HOME}/flydata.log` =~ /process died within/)
130
+ end
124
131
  def uploaded_successfully?(data_port_id)
125
132
  res = flydata.get("/data_ports/#{data_port_id}/tail.json")
126
133
  res and res['logs'] and res['logs'].size > 0
@@ -27,7 +27,7 @@ module Flydata
27
27
 
28
28
  choice = nil
29
29
  say('Please select your log path for setting log deletion.')
30
- say(" (About log deletion - http://flydata.com/how-to-uninstall-flydata/)")
30
+ say(" (About log deletion - http://docs.hapyrus.com/faq/how-log-deletion-works/)")
31
31
  newline
32
32
  choose do |menu|
33
33
  menu.index = :letter
@@ -1,3 +1,5 @@
1
+ require_relative 'sync'
2
+
1
3
  module Flydata
2
4
  module Command
3
5
  class Setup < Base
@@ -18,12 +20,25 @@ module Flydata
18
20
  end
19
21
 
20
22
  def initial_run
23
+ last_message = nil
21
24
  run do
25
+ Flydata::Command::Conf.new.copy_templates
22
26
  puts
23
- shown = show_registered_redshift_entries
24
- if shown
27
+ shown_redshift_entries = (show_registered_redshift_entries)
28
+ shown_mysql_data_entries = show_registered_redshift_mysql_data_entries
29
+ if shown_redshift_entries
25
30
  Flydata::Command::Sender.new.stop
26
31
  true
32
+ elsif shown_mysql_data_entries
33
+ de = retrieve_data_entries.first
34
+ if File.exists?(Flydata::FileUtil::SyncFileManager.new(de).binlog_path)
35
+ Flydata::Command::Sender.new.stop
36
+ true
37
+ else
38
+ last_message = "\n! NOTE: Initial synchronization of MySQL database is required.\n" +
39
+ " Please run 'flydata sync' to start synchronization."
40
+ false
41
+ end
27
42
  else
28
43
  _run
29
44
  end
@@ -31,6 +46,7 @@ module Flydata
31
46
  puts
32
47
  print_usage
33
48
  puts "Completed setup of FlyData!"
49
+ puts last_message if last_message
34
50
  end
35
51
 
36
52
  def run(&block)
@@ -59,6 +75,13 @@ module Flydata
59
75
  [:redshift, :s3backup, :restart_flydata, :cancel])
60
76
  end
61
77
 
78
+ #### flydata-sync(RedshiftMysqlDataEntry)
79
+ def show_registered_redshift_mysql_data_entries
80
+ show_registered_entries('RedshiftMysqlDataEntry') do |de|
81
+ say(" - #{de['display_name']}: flydata-sync (mysql -> redshift)")
82
+ end
83
+ end
84
+
62
85
  #### redshift backup mode
63
86
  def start_redshift_mode
64
87
  newline
@@ -306,7 +329,7 @@ module Flydata
306
329
  say("** Log deletion setting **")
307
330
  say("Flydata has a log deletion feature that flydata will delete old log archives uploaded by flydata automatically.")
308
331
  say("Flydata will delete logs whose last modified timestamp is 7 days ago.")
309
- say("For more details - http://flydata.com/how-to-uninstall-flydata/")
332
+ say("For more details - http://docs.hapyrus.com/faq/how-log-deletion-works/")
310
333
  ask_yes_no("Set auto log deletion mode?")
311
334
  end
312
335
 
@@ -0,0 +1,962 @@
1
+ require 'msgpack'
2
+ require 'open3'
3
+ require 'flydata/sync_file_manager'
4
+ #require 'ruby-prof'
5
+
6
+ module Flydata
7
+ module Command
8
+ class Sync < Base
9
+ include Helpers
10
+ CREATE_TABLE_OPTION = !!(ENV['FLYDATA_CREATE_TABLE_OPTION']) || false
11
+ INSERT_PROGRESS_INTERVAL = 1000
12
+
13
+ # for dump.pos file
14
+ STATUS_PARSING = 'PARSING'
15
+ STATUS_COMPLETE = 'COMPLETE'
16
+
17
+ def run
18
+ de = retrieve_data_entries.first
19
+ raise "There are no data entry." unless de
20
+ case de['type']
21
+ when 'RedshiftMysqlDataEntry'
22
+ sync_mysql_to_redshift(de)
23
+ else
24
+ raise "No supported data entry. Only mysql-redshift sync is supported."
25
+ end
26
+ end
27
+
28
+ def reset
29
+ de = retrieve_data_entries.first
30
+ sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
31
+ [sync_fm.dump_file_path, sync_fm.dump_pos_path, sync_fm.binlog_path, sync_fm.mysql_table_marshal_dump_path, sync_fm.table_position_file_paths].flatten.each do |path|
32
+ FileUtils.rm(path) if File.exists?(path)
33
+ end
34
+ end
35
+
36
+ def check
37
+ de = retrieve_data_entries.first
38
+ ret = do_check(de)
39
+ if ret['complete']
40
+ puts "No buffer data on FlyData. #{ret.inspect}"
41
+ true
42
+ else
43
+ puts "Now processing data on FlyData. #{ret.inspect}"
44
+ false
45
+ end
46
+ end
47
+
48
+ def complete
49
+ de = retrieve_data_entries.first
50
+ sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
51
+ info = sync_fm.load_dump_pos
52
+ if info[:status] == STATUS_COMPLETE
53
+ sync_fm.save_binlog(info[:binlog_pos])
54
+ Flydata::Command::Sender.new.start
55
+ else
56
+ raise "Initial sync status is not complete. Try running 'flydata sync'."
57
+ end
58
+ end
59
+
60
+ # skip initial sync
61
+ def skip
62
+ de = retrieve_data_entries.first
63
+ sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
64
+ binlog_path = sync_fm.binlog_path
65
+ `touch #{binlog_path}`
66
+ puts "Created an empty binlog position file."
67
+ puts "-> #{binlog_path}"
68
+ puts "Run 'flydata start' to start continuous sync."
69
+ end
70
+
71
+ private
72
+
73
+ def do_check(de)
74
+ flydata.data_entry.buffer_stat(de['id'], env_mode)
75
+ end
76
+
77
+ def sync_mysql_to_redshift(de)
78
+ dp = flydata.data_port.get
79
+ sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
80
+
81
+ # Check client condition
82
+ if File.exists?(sync_fm.binlog_path)
83
+ raise "Already synchronized. If you want to do initial sync, delete #{sync_fm.binlog_path}."
84
+ end
85
+
86
+ # Copy template if not exists
87
+ unless Flydata::Preference::DataEntryPreference.conf_exists?(de)
88
+ Flydata::Command::Conf.new.copy_templates
89
+ end
90
+
91
+ if generate_mysqldump(de, sync_fm)
92
+ parse_mysqldump(dp, de, sync_fm)
93
+ end
94
+ end
95
+
96
+ def generate_mysqldump(de, sync_fm, overwrite = false)
97
+
98
+ # validate parameter
99
+ %w(host username database).each do |k|
100
+ if de['mysql_data_entry_preference'][k].to_s.empty?
101
+ raise "'#{k}' is required. Set the value in the conf file " +
102
+ "-> #{Flydata::Preference::DataEntryPreference.conf_path(de)}"
103
+ end
104
+ end
105
+
106
+ puts "Running mysqldump... host:#{de['mysql_data_entry_preference']['host']} " +
107
+ "username:#{de['mysql_data_entry_preference']['username']} " +
108
+ "database:#{de['mysql_data_entry_preference']['database']}"
109
+ if de['mysql_data_entry_preference']['data_servers']
110
+ puts "Send to Custom Data Servers: #{de['mysql_data_entry_preference']['data_servers']}"
111
+ end
112
+
113
+ if de['mysql_data_entry_preference']['tables']
114
+ puts " target tables: #{de['mysql_data_entry_preference']['tables']}"
115
+ else
116
+ puts " target tables: <all-tables>"
117
+ end
118
+
119
+ fp = sync_fm.dump_file_path
120
+ if File.exists?(fp) and File.size(fp) > 0 and not overwrite
121
+ puts " -> Skip"
122
+ return fp
123
+ end
124
+
125
+ puts "[Confirm] mysqldump path: #{fp}"
126
+ if ask_yes_no('OK?')
127
+ Flydata::Mysql::MysqlDumpGenerator.new(de['mysql_data_entry_preference']).dump(fp)
128
+ else
129
+ newline
130
+ puts "You can change the mysqldump path with 'mysqldump_path' in the conf file."
131
+ puts "Edit '#{Flydata::Preference::DataEntryPreference.conf_path(de)}'"
132
+ return nil
133
+ end
134
+ puts " -> Done"
135
+ fp
136
+ end
137
+
138
+ # Checkpoint
139
+ # -- CHANGE MASTER TO MASTER_LOG_FILE='mysql-bin.000215', MASTER_LOG_POS=120;
140
+ # <- checkpoint(after binlog)
141
+ #...
142
+ #CREATE TABLE `accounts` (
143
+ #...
144
+ #) ENGINE=InnoDB AUTO_INCREMENT=71 DEFAULT CHARSET=utf8;
145
+ # <- checkpoint(after create table)
146
+ #INSERT INTO `accounts` values (x,x,x),(y,y,y),....();
147
+ #INSERT INTO `accounts` values (x,x,x),(y,y,y),....();
148
+ #INSERT INTO `accounts` values (x,x,x),(y,y,y),....();
149
+ # <- checkpoint(when buffered data is sent to server)
150
+ #INSERT INTO `accounts` values (x,x,x),(y,y,y),....();
151
+ #INSERT INTO `accounts` values (x,x,x),(y,y,y),....();
152
+ #
153
+ #...
154
+ #UNLOCK TABLES;
155
+ # <- checkpoint
156
+ #...
157
+ #CREATE TABLE ...
158
+ def parse_mysqldump(dp, de, sync_fm)
159
+ puts "Parsing mysqldump file..."
160
+
161
+ # Prepare forwarder
162
+ de_tag_name = de["tag_name#{env_suffix}"]
163
+ server_port = dp['server_port']
164
+ servers = if de['mysql_data_entry_preference']['data_servers']
165
+ de['mysql_data_entry_preference']['data_servers'].split(',')
166
+ else
167
+ dp["servers#{env_suffix}"].collect{|s| "#{s}:#{server_port}"}
168
+ end
169
+ forwarder_type = de['mysql_data_entry_preference']['forwarder'] ||
170
+ (dp['ssl_enabled'] ? 'sslforwarder' : 'tcpforwarder')
171
+ forwarder = Flydata::Output::ForwarderFactory.create(forwarder_type, de_tag_name, servers)
172
+
173
+ # Load dump.pos file for resume
174
+ dump_pos_info = sync_fm.load_dump_pos
175
+ option = dump_pos_info || {}
176
+ if option[:table_name]
177
+ puts "Resuming... Last processed table: #{option[:table_name]}"
178
+ end
179
+
180
+ bench_start_time = Time.now
181
+
182
+ # Start parsing dump file
183
+ tmp_num_inserted_record = 0
184
+ dump_fp = sync_fm.dump_file_path
185
+ dump_file_size = File.size(dump_fp)
186
+ binlog_pos = Flydata::Mysql::MysqlDumpParser.new(dump_fp, option).parse(
187
+ # create table
188
+ Proc.new { |mysql_table|
189
+ redshift_table = Flydata::Mysql::RedshiftTableAdapter.new(mysql_table)
190
+ mysql_table.set_adapter(:redshift, redshift_table)
191
+
192
+ tmp_num_inserted_record = 0
193
+
194
+ if CREATE_TABLE_OPTION
195
+ print "- Creating table: #{redshift_table.table_name}"
196
+ sql = redshift_table.create_table_sql
197
+ ret = flydata.redshift_cluster.run_query(sql)
198
+ if ret['message'].index('ERROR:')
199
+ if ret['message'].index('already exists')
200
+ puts " -> Skip"
201
+ else
202
+ raise "Failed to create table. error=#{ret['message']}"
203
+ end
204
+ else
205
+ puts " -> OK"
206
+ end
207
+ else
208
+ puts "- Parsing table: #{mysql_table.table_name}"
209
+ end
210
+
211
+ # dump mysql_table for resume
212
+ sync_fm.save_mysql_table_marshal_dump(mysql_table)
213
+ },
214
+ # insert record
215
+ Proc.new { |mysql_table, values_set|
216
+ mysql_table_name = mysql_table.table_name
217
+ records = values_set.collect do |values|
218
+ json = generate_json(mysql_table, values)
219
+ {table_name: mysql_table_name, log: json}
220
+ end
221
+ ret = forwarder.emit(records)
222
+ tmp_num_inserted_record += 1
223
+ print '.'
224
+ ret
225
+ },
226
+ # checkpoint
227
+ Proc.new { |mysql_table, last_pos, binlog_pos, state, substate|
228
+ # flush if buffer records exist
229
+ if tmp_num_inserted_record > 0 && forwarder.buffer_record_count > 0
230
+ puts
231
+ forwarder.flush # send buffer data to the server before checkpoint
232
+ end
233
+
234
+ # show the current progress
235
+ puts " #{(last_pos.to_f/dump_file_size * 100).round(1)}% (#{last_pos}/#{dump_file_size}) #{Time.now.to_i - bench_start_time.to_i}sec"
236
+
237
+ # save check point
238
+ table_name = mysql_table.nil? ? '' : mysql_table.table_name
239
+ sync_fm.save_dump_pos(STATUS_PARSING, table_name, last_pos, binlog_pos, state, substate)
240
+ }
241
+ )
242
+ forwarder.close
243
+
244
+ if ENV['FLYDATA_BENCHMARK']
245
+ puts "Done!"
246
+ bench_end_time = Time.now
247
+ elapsed_time = bench_end_time.to_i - bench_start_time.to_i
248
+ puts "Elapsed:#{elapsed_time}sec start:#{bench_start_time} end:#{bench_end_time}"
249
+ return true
250
+ end
251
+
252
+ # wait until finish
253
+ puts "Start waiting until all data is processed on FlyData..."
254
+ sleep 10
255
+ until check
256
+ sleep 10
257
+ end
258
+
259
+ sync_fm.save_dump_pos(STATUS_COMPLETE, '', dump_file_size, binlog_pos)
260
+ puts "Congratulations! All data is processed on FlyData. Please check tables and data on your Redshift Cluster."
261
+ puts "After checking, run 'flydata sync:complete' to start continuously synchronization."
262
+ end
263
+
264
+ def generate_json(mysql_table, values)
265
+ h = {}
266
+ mysql_table.columns.each_key.with_index do |k, i|
267
+ h[k] = values[i]
268
+ end
269
+ h.to_json
270
+ end
271
+ end
272
+ end
273
+
274
+ module Output
275
+ class ForwarderFactory
276
+
277
+ def self.create(forwarder_key, tag, servers, options = {})
278
+ case forwarder_key
279
+ when nil, "tcpforwarder"
280
+ puts "Creating TCP connection"
281
+ forward = TcpForwarder.new(tag, servers, options)
282
+ when "sslforwarder"
283
+ puts "Creating SSL connection"
284
+ forward = SslForwarder.new(tag, servers, options)
285
+ else
286
+ raise "Unsupported Forwarding type #{forwarder_key}"
287
+ end
288
+ forward
289
+ end
290
+
291
+ end
292
+ class TcpForwarder
293
+ FORWARD_HEADER = [0x92].pack('C')
294
+ BUFFER_SIZE = 1024 * 1024 * 32 # 32M
295
+ DEFUALT_SEND_TIMEOUT = 60 # 1 minute
296
+ RETRY_INTERVAL = 2
297
+ RETRY_LIMIT = 10
298
+
299
+ def initialize(tag, servers, options = {})
300
+ @tag = tag
301
+ unless servers and servers.kind_of?(Array) and not servers.empty?
302
+ raise "Servers must not be empty."
303
+ end
304
+ @servers = servers
305
+ @server_index = 0
306
+ set_options(options)
307
+ reset
308
+ end
309
+
310
+ def set_options(options)
311
+ if options[:buffer_size_limit]
312
+ @buffer_size_limit = options[:buffer_size_limit]
313
+ else
314
+ @buffer_size_limit = BUFFER_SIZE
315
+ end
316
+ end
317
+
318
+ attr_reader :buffer_record_count, :buffer_size
319
+
320
+ def emit(records, time = Time.now.to_i)
321
+ records = [records] unless records.kind_of?(Array)
322
+ records.each do |record|
323
+ event_data = [time,record].to_msgpack
324
+ @buffer_records << event_data
325
+ @buffer_record_count += 1
326
+ @buffer_size += event_data.bytesize
327
+ end
328
+ if @buffer_size > @buffer_size_limit
329
+ send
330
+ else
331
+ false
332
+ end
333
+ end
334
+
335
+ #TODO retry logic
336
+ def send
337
+ if @buffer_size > 0
338
+ puts " -> Sending #{@buffer_record_count}records #{@buffer_size}byte"
339
+ else
340
+ return false
341
+ end
342
+ if ENV['FLYDATA_BENCHMARK']
343
+ reset
344
+ return true
345
+ end
346
+ sock = nil
347
+ retry_count = 0
348
+ begin
349
+ sock = connect(pickup_server)
350
+
351
+ # Write header
352
+ sock.write FORWARD_HEADER
353
+ # Write tag
354
+ sock.write @tag.to_msgpack
355
+ # Write records
356
+ sock.write [0xdb, @buffer_records.bytesize].pack('CN')
357
+ StringIO.open(@buffer_records) do |i|
358
+ FileUtils.copy_stream(i, sock)
359
+ end
360
+ rescue => e
361
+ retry_count += 1
362
+ if retry_count > RETRY_LIMIT
363
+ puts "! Error: Failed to send data. Exceeded the retry limit. retry_count:#{retry_count}"
364
+ raise e
365
+ end
366
+ puts "! Warn: Retring to send data. retry_count:#{retry_count} error=#{e.to_s}"
367
+ wait_time = RETRY_INTERVAL ** retry_count
368
+ puts " Now waiting for next retry. time=#{wait_time}sec"
369
+ sleep wait_time
370
+ retry
371
+ ensure
372
+ if sock
373
+ sock.close rescue nil
374
+ end
375
+ end
376
+ reset
377
+ true
378
+ end
379
+
380
+ #TODO: Check server status
381
+ def pickup_server
382
+ ret_server = @servers[@server_index]
383
+ @server_index += 1
384
+ if @server_index >= (@servers.count)
385
+ @server_index = 0
386
+ end
387
+ ret_server
388
+ end
389
+
390
+ def connect(server)
391
+ host, port = server.split(':')
392
+ sock = TCPSocket.new(host, port.to_i)
393
+
394
+ # Set options
395
+ opt = [1, DEFUALT_SEND_TIMEOUT].pack('I!I!')
396
+ sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
397
+ opt = [DEFUALT_SEND_TIMEOUT, 0].pack('L!L!')
398
+ sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
399
+
400
+ sock
401
+ end
402
+
403
+ def reset
404
+ @buffer_records = ''
405
+ @buffer_record_count = 0
406
+ @buffer_size = 0
407
+ end
408
+
409
+ def flush
410
+ send
411
+ end
412
+
413
+ def close
414
+ flush
415
+ end
416
+ end
417
+ class SslForwarder < TcpForwarder
418
+ def connect(server)
419
+ tcp_sock = super
420
+ ssl_ctx = ssl_ctx_with_verification
421
+ ssl_sock = OpenSSL::SSL::SSLSocket.new(tcp_sock, ssl_ctx)
422
+ ssl_sock.sync_close = true
423
+ ssl_sock.connect
424
+ ssl_sock
425
+ end
426
+
427
+ private
428
+ def ssl_ctx_with_verification
429
+ cert_store = OpenSSL::X509::Store.new
430
+ cert_store.set_default_paths
431
+ ssl_ctx = OpenSSL::SSL::SSLContext.new
432
+ ssl_ctx.verify_mode = OpenSSL::SSL::VERIFY_PEER
433
+ ssl_ctx.cert_store = cert_store
434
+ ssl_ctx
435
+ end
436
+ end
437
+ end
438
+
439
+ module Redshift
440
+ module Util
441
+ MAX_TABLENAME_LENGTH = 127
442
+ REDSHIFT_RESERVED_WORDS = %w[
443
+ aes128 aes256 all allowoverwrite analyse analyze and any array
444
+ as asc authorization backup between binary blanksasnull both
445
+ bytedict case cast check collate column constraint create
446
+ credentials cross current_date current_time current_timestamp
447
+ current_user current_user_id default deferrable deflate defrag
448
+ delta delta32k desc disable distinct do else emptyasnull enable
449
+ encode encrypt encryption end except explicit false for foreign
450
+ freeze from full globaldict256 globaldict64k grant group gzip having
451
+ identity ignore ilike in initially inner intersect into is isnull
452
+ join leading left like limit localtime localtimestamp lun luns
453
+ minus mostly13 mostly32 mostly8 natural new not notnull null nulls
454
+ off offline offset old on only open or order outer overlaps parallel
455
+ partition percent placing primary raw readratio recover references
456
+ rejectlog resort restore right select session_user similar some
457
+ sysdate system table tag tdes text255 text32k then to top trailing
458
+ true truncatecolumns union unique user using verbose wallet when
459
+ where with without]
460
+ # Create a symbol-keyed hash for performance
461
+ REDSHIFT_RESERVED_WORDS_HASH = REDSHIFT_RESERVED_WORDS.inject({}) {|h, word| h[word.to_sym] = true; h}
462
+
463
+ REDSHIFT_SYSTEM_COLUMNS = %w[oid tableoid xmin cmin xmax cmax ctid]
464
+ REDSHIFT_SYSTEM_COLUMNS_HASH = REDSHIFT_SYSTEM_COLUMNS.inject({}) {|h, word| h[word.to_sym] = true; h}
465
+
466
+ def convert_to_valid_name(key, type = :table)
467
+ @memo ||= { table:{}, column:{} }
468
+ key_sym = key.to_sym
469
+ return @memo[type][key_sym] if @memo[type][key_sym]
470
+
471
+ name = key.downcase.gsub(/[^a-z0-9_$]/, '_')
472
+ name = "_#{name}" if is_redshift_reserved_word?(name, type) or name =~ /^[0-9$]/
473
+ if name.length > MAX_TABLENAME_LENGTH
474
+ name = nil
475
+ end
476
+ @memo[key_sym] = name
477
+ name
478
+ end
479
+
480
+ def is_redshift_reserved_word?(name, type = :table)
481
+ return false unless name
482
+ return true if REDSHIFT_RESERVED_WORDS_HASH[name.to_sym] == true
483
+
484
+ case type
485
+ when :table
486
+ false
487
+ when :column
488
+ REDSHIFT_SYSTEM_COLUMNS_HASH[name.to_sym] == true
489
+ else
490
+ false
491
+ end
492
+ end
493
+ end
494
+ end
495
+
496
+ module Mysql
497
+ class MysqlTable
498
+ def initialize(table_name, columns = {}, primary_keys = [])
499
+ @table_name = table_name
500
+ @columns = columns
501
+ @primary_keys = primary_keys
502
+ @adapters = {}
503
+ end
504
+
505
+ attr_accessor :table_name, :columns, :primary_keys
506
+
507
+ def add_column(column)
508
+ @columns[column[:column_name]] = column
509
+ end
510
+
511
+ def set_adapter(key, adapter)
512
+ @adapters[key] = adapter
513
+ end
514
+
515
+ def adapter(key)
516
+ @adapters[key]
517
+ end
518
+ end
519
+
520
+ class RedshiftTableAdapter
521
+ include Flydata::Redshift::Util
522
+ def initialize(mysql_table)
523
+ @table_name = convert_to_valid_name(mysql_table.table_name)
524
+ set_columns(mysql_table.columns)
525
+ @primary_keys = mysql_table.primary_keys
526
+ end
527
+
528
+ attr_reader :table_name, :columns, :primary_keys
529
+
530
+ def create_table_sql
531
+ col_def = @columns.inject([]) { |list, (cn, column)|
532
+ list << build_column_def(column)
533
+ list
534
+ }
535
+ if @primary_keys.count > 0
536
+ col_def << "primary key (#{@primary_keys.join(',')})"
537
+ end
538
+ <<EOT
539
+ CREATE TABLE #{@table_name} (#{col_def.join(',')});
540
+ EOT
541
+ end
542
+
543
+ private
544
+
545
+ def set_columns(columns)
546
+ @columns = {}
547
+ columns.each do |k, column|
548
+ new_k = convert_to_valid_name(k, :column)
549
+ new_column = column.dup
550
+ new_column[:column_name] = new_k
551
+ @columns[new_k] = convert_column_format_type(new_column)
552
+ end
553
+ end
554
+
555
+ # Mysql Field Types
556
+ # http://help.scibit.com/mascon/masconMySQL_Field_Types.html
557
+ def convert_column_format_type(column)
558
+ ret_c = {}.merge(column)
559
+ ret_c.delete(:format_type_str)
560
+ ret_c[:format_type] = case column[:format_type]
561
+ when 'tinyint'
562
+ 'smallint'
563
+ when 'smallint'
564
+ column[:unsigned] ? 'integer' : 'smallint'
565
+ when 'mediumint'
566
+ 'integer'
567
+ when 'int', 'integer'
568
+ column[:unsigned] ? 'bigint' : 'integer'
569
+ when 'bigint'
570
+ # max unsigned bigint is 18446744073709551615
571
+ column[:unsigned] ? 'decimal(20,0)' : 'bigint'
572
+ when 'float'
573
+ 'real'
574
+ when 'double', 'double precision', 'real'
575
+ 'double precision'
576
+ when 'decimal', 'numeric'
577
+ ret_c[:format_type_str] = "decimal(#{column[:decimal_precision]},#{column[:decimal_scale]})"
578
+ 'decimal'
579
+ when 'date'
580
+ 'date'
581
+ when 'datetime'
582
+ 'timestamp'
583
+ when 'time'
584
+ 'timestamp' #TODO: redshift does not support time only column type
585
+ when 'year'
586
+ 'smallint'
587
+ when 'char'
588
+ ret_c[:format_type_str] = "char(#{column[:format_size]})"
589
+ 'char'
590
+ when 'varchar'
591
+ ret_c[:format_type_str] = "varchar(#{column[:format_size]})"
592
+ 'varchar'
593
+ when 'tinyblob','tinytext'
594
+ ret_c[:format_size] = 255
595
+ ret_c[:format_type_str] = "varchar(#{ret_c[:format_size]})"
596
+ 'varchar'
597
+ when 'blob','text', 'mediumblob', 'mediumtext', 'longblob', 'longtext'
598
+ ret_c[:format_size] = 65535 #TODO: review
599
+ ret_c[:format_type_str] = "varchar(#{ret_c[:format_size]})"
600
+ 'varchar'
601
+ else
602
+ #TODO: discuss
603
+ 'varchar'
604
+ end
605
+ ret_c
606
+ end
607
+
608
+ def build_column_def(column)
609
+ format_type = column[:format_type]
610
+ format_type = column[:format_type_str] if column[:format_type_str]
611
+ def_str = "#{column[:column_name]} #{format_type}"
612
+ if column[:not_null]
613
+ def_str << " not null"
614
+ elsif column.has_key?(:default)
615
+ val = column[:default]
616
+ val = val.nil? ? 'null' : "'#{val}'"
617
+ def_str << " default #{val}"
618
+ end
619
+ def_str
620
+ end
621
+
622
+ end
623
+
624
+ class MysqlDumpGenerator
625
+ # host, port, username, password, database, tables
626
+ MYSQL_DUMP_CMD_TEMPLATE = "mysqldump -h %s -P %s -u%s %s --skip-lock-tables --single-transaction --flush-logs --hex-blob --master-data=2 %s %s"
627
+ def initialize(conf)
628
+ password = conf['password'].to_s.empty? ? "" : "-p#{conf['password']}"
629
+ tables = if conf['tables']
630
+ conf['tables'].split(',').join(' ')
631
+ else
632
+ ''
633
+ end
634
+ @dump_cmd = MYSQL_DUMP_CMD_TEMPLATE %
635
+ [conf['host'], conf['port'], conf['username'], password, conf['database'], tables]
636
+ end
637
+ def dump(file_path)
638
+ cmd = "#{@dump_cmd} > #{file_path}"
639
+ o, e, s = Open3.capture3(cmd)
640
+ e.to_s.each_line {|l| puts l unless /^Warning:/ =~ l } unless e.to_s.empty?
641
+ unless s.exitstatus == 0
642
+ if File.exists?(file_path)
643
+ File.open(file_path, 'r') {|f| f.each_line{|l| puts l}}
644
+ FileUtils.rm(file_path)
645
+ end
646
+ raise "Failed to run mysqldump command."
647
+ end
648
+ unless File.exists?(file_path)
649
+ raise "mysqldump file does not exist. Something wrong..."
650
+ end
651
+ if File.size(file_path) == 0
652
+ raise "mysqldump file is empty. Something wrong..."
653
+ end
654
+ true
655
+ end
656
+ end
657
+
658
+ class MysqlDumpParser
659
+
660
+ module State
661
+ START = 'START'
662
+ CREATE_TABLE = 'CREATE_TABLE'
663
+ CREATE_TABLE_COLUMNS = 'CREATE_TABLE_COLUMNS'
664
+ CREATE_TABLE_CONSTRAINTS = 'CREATE_TABLE_CONSTRAINTS'
665
+ INSERT_RECORD = 'INSERT_RECORD'
666
+ PARSING_INSERT_RECORD = 'PARSING_INSERT_RECORD'
667
+ end
668
+
669
+ attr_accessor :binlog_pos
670
+
671
+ def initialize(file_path, option = {})
672
+ @file_path = file_path
673
+ raise "Dump file does not exist. file_path:#{file_path}" unless File.exist?(file_path)
674
+ @binlog_pos = option[:binlog_pos]
675
+ @option = option
676
+ end
677
+
678
+ def parse(create_table_block, insert_record_block, check_point_block)
679
+ invalid_file = false
680
+ current_state = State::START
681
+ substate = nil
682
+
683
+ state_start = Proc.new do |f|
684
+ line = f.readline.strip
685
+ # -- CHANGE MASTER TO MASTER_LOG_FILE='mysql-bin.000002', MASTER_LOG_POS=120;
686
+ m = /^\-\- CHANGE MASTER TO MASTER_LOG_FILE='(?<binfile>[^']+)', MASTER_LOG_POS=(?<pos>\d+)/.match(line)
687
+ if m
688
+ @binlog_pos = {binfile: m[:binfile], pos: m[:pos].to_i}
689
+ current_state = State::CREATE_TABLE
690
+ check_point_block.call(nil, f.pos, @binlog_pos, current_state)
691
+ end
692
+ end
693
+
694
+ current_table = nil
695
+ state_create_table = Proc.new do |f|
696
+ line = f.readline.strip
697
+ # CREATE TABLE `active_admin_comments` (
698
+ m = /^CREATE TABLE `(?<table_name>[^`]+)`/.match(line)
699
+ if m
700
+ current_table = MysqlTable.new(m[:table_name])
701
+ current_state = State::CREATE_TABLE_COLUMNS
702
+ end
703
+ end
704
+
705
+ state_create_table_constraints = Proc.new do |f|
706
+ line = f.readline.strip
707
+ # PRIMARY KEY (`id`),
708
+ if line.start_with?(')')
709
+ create_table_block.call(current_table)
710
+ current_state = State::INSERT_RECORD
711
+ check_point_block.call(current_table, f.pos, @binlog_pos, current_state)
712
+ elsif m = /^PRIMARY KEY \((?<primary_keys>[^\)]+)\)/.match(line)
713
+ current_table.primary_keys = m[:primary_keys].split(',').collect do |pk_str|
714
+ pk_str[1..-2]
715
+ end
716
+ end
717
+ end
718
+
719
+ state_create_table_columns = Proc.new do |f|
720
+ start_pos = f.pos
721
+ line = f.readline.strip
722
+ # `author_type` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
723
+ if line.start_with?("\`")
724
+ column = {}
725
+
726
+ # parse column line
727
+ line = line[0..-2] if line.end_with?(',')
728
+ items = line.split
729
+ column[:column_name] = items.shift[1..-2]
730
+ column[:format_type_str] = format_type_str = items.shift
731
+ pos = format_type_str.index('(')
732
+ if pos
733
+ ft = column[:format_type] = format_type_str[0..pos-1]
734
+ if ft == 'decimal'
735
+ precision, scale = format_type_str[pos+1..-2].split(',').collect{|v| v.to_i}
736
+ column[:decimal_precision] = precision
737
+ column[:decimal_scale] = scale
738
+ else
739
+ column[:format_size] = format_type_str[pos+1..-2].to_i
740
+ end
741
+ else
742
+ column[:format_type] = format_type_str
743
+ end
744
+ while (item = items.shift) do
745
+ case item
746
+ when 'DEFAULT'
747
+ value = items.shift
748
+ value = value.start_with?('\'') ? value[1..-2] : value
749
+ value = nil if value == 'NULL'
750
+ column[:default] = value
751
+ when 'NOT'
752
+ if items[1] == 'NULL'
753
+ items.shift
754
+ column[:not_null] = true
755
+ end
756
+ when 'unsigned'
757
+ column[:unsigned] = true
758
+ else
759
+ #ignore other options
760
+ end
761
+ end
762
+
763
+ current_table.add_column(column)
764
+ else
765
+ current_state = State::CREATE_TABLE_CONSTRAINTS
766
+ f.pos = start_pos
767
+ state_create_table_constraints.call(f)
768
+ end
769
+ end
770
+
771
+ state_insert_record = Proc.new do |f|
772
+ original_pos = f.pos
773
+ command = f.read(6)
774
+ if command == 'INSERT'
775
+ current_state = State::PARSING_INSERT_RECORD
776
+ else
777
+ f.pos = original_pos
778
+ f.readline
779
+ if command == 'UNLOCK'
780
+ current_state = State::CREATE_TABLE
781
+ check_point_block.call(current_table, f.pos, @binlog_pos, current_state)
782
+ end
783
+ end
784
+ end
785
+
786
+ state_parsing_insert_record = Proc.new do |f|
787
+ values_set = InsertParser.new(f).parse
788
+ current_state = State::INSERT_RECORD
789
+ if insert_record_block.call(current_table, values_set)
790
+ check_point_block.call(current_table, f.pos, @binlog_pos, current_state)
791
+ end
792
+ end
793
+
794
+ # Start reading file from top
795
+ File.open(@file_path, 'r') do |f|
796
+ last_saved_pos = 0
797
+
798
+ # resume
799
+ if @option[:last_pos]
800
+ f.pos = @option[:last_pos].to_i
801
+ current_state = @option[:state]
802
+ substate = @option[:substate]
803
+ current_table = @option[:mysql_table]
804
+ end
805
+
806
+ until f.eof? do
807
+ case current_state
808
+ when State::START
809
+ state_start.call(f)
810
+ when State::CREATE_TABLE
811
+ state_create_table.call(f)
812
+ when State::CREATE_TABLE_COLUMNS
813
+ state_create_table_columns.call(f)
814
+ when State::CREATE_TABLE_CONSTRAINTS
815
+ state_create_table_constraints.call(f)
816
+ when State::INSERT_RECORD
817
+ state_insert_record.call(f)
818
+ when State::PARSING_INSERT_RECORD
819
+ state_parsing_insert_record.call(f)
820
+ end
821
+ end
822
+ end
823
+ @binlog_pos
824
+ end
825
+
826
+ class InsertParser
827
+ #INSERT INTO `data_entries` VALUES (2,2,'access_log'), (2,3,'access_log2');
828
+ module State
829
+ IN_VALUE = 'IN_VALUE'
830
+ NEXT_VALUES = 'NEXT_VALUES'
831
+ end
832
+
833
+ def initialize(file)
834
+ @file = file
835
+ @values = []
836
+ @values_set = []
837
+ end
838
+
839
+ def start_ruby_prof
840
+ RubyProf.start if defined?(RubyProf) and not RubyProf.running?
841
+ end
842
+
843
+ def stop_ruby_prof
844
+ if defined?(RubyProf) and RubyProf.running?
845
+ result = RubyProf.stop
846
+ #printer = RubyProf::GraphPrinter.new(result)
847
+ printer = RubyProf::GraphHtmlPrinter.new(result)
848
+ #printer.print(STDOUT)
849
+ printer.print(File.new("ruby-prof-out-#{Time.now.to_i}.html", "w"), :min_percent => 3)
850
+ end
851
+ end
852
+
853
+ def parse
854
+ start_ruby_prof
855
+ bench_start_time = Time.now
856
+ target_line = @file.readline
857
+ _parse(target_line)
858
+ ensure
859
+ stop_ruby_prof
860
+ if ENV['FLYDATA_BENCHMARK']
861
+ puts " -> time:#{Time.now.to_f - bench_start_time.to_f} size:#{target_line.size}"
862
+ end
863
+ end
864
+
865
+ private
866
+
867
+ def _parse(target_line)
868
+ target_line = target_line.strip
869
+ start_index = target_line.index('(')
870
+ target_line = target_line[start_index..-2]
871
+ items = target_line.split(',')
872
+ index = 0
873
+ cur_state = State::NEXT_VALUES
874
+
875
+ loop do
876
+ case cur_state
877
+ when State::NEXT_VALUES
878
+ chars = items[index]
879
+ break unless chars
880
+ items[index] = chars[1..-1]
881
+ cur_state = State::IN_VALUE
882
+ when State::IN_VALUE
883
+ chars = items[index]
884
+ index += 1
885
+ if chars.start_with?("'")
886
+ # single item (not last item)
887
+ if chars.end_with?("'") and !last_char_escaped?(chars)
888
+ @values << replace_escape_char(chars[1..-2])
889
+ # single item (last item)
890
+ elsif chars.end_with?("')") and !last_char_escaped?(chars[0..-2])
891
+ @values << replace_escape_char(chars[1..-3])
892
+ @values_set << @values
893
+ @values = []
894
+ cur_state = State::NEXT_VALUES
895
+ # multi items
896
+ else
897
+ cur_value = chars[1..-1]
898
+ loop do
899
+ next_chars = items[index]
900
+ index += 1
901
+ if next_chars.end_with?('\'') and !last_char_escaped?(next_chars)
902
+ cur_value << ','
903
+ cur_value << next_chars[0..-2]
904
+ @values << replace_escape_char(cur_value)
905
+ break
906
+ elsif next_chars.end_with?("')") and !last_char_escaped?(next_chars[0..-2])
907
+ cur_value << ','
908
+ cur_value << next_chars[0..-3]
909
+ @values << replace_escape_char(cur_value)
910
+ @values_set << @values
911
+ @values = []
912
+ cur_state = State::NEXT_VALUES
913
+ break
914
+ else
915
+ cur_value << ','
916
+ cur_value << next_chars
917
+ end
918
+ end
919
+ end
920
+ else
921
+ if chars.end_with?(')')
922
+ chars = chars[0..-2]
923
+ @values << (chars == 'NULL' ? nil : chars)
924
+ @values_set << @values
925
+ @values = []
926
+ cur_state = State::NEXT_VALUES
927
+ else
928
+ @values << (chars == 'NULL' ? nil : chars)
929
+ end
930
+ end
931
+ else
932
+ raise "Invalid state: #{cur_state}"
933
+ end
934
+ end
935
+ return @values_set
936
+ end
937
+
938
+ ESCAPE_HASH_TABLE = {"\\\\" => "\\", "\\'" => "'", "\\n" => "\n", "\\r" => "\r"}
939
+
940
+ def replace_escape_char(original)
941
+ original.gsub(/\\\\|\\'|\\n|\\r/, ESCAPE_HASH_TABLE)
942
+ end
943
+
944
+ # This method assume that the last character is '(single quotation)
945
+ # abcd\' -> true
946
+ # abcd\\' -> false (back slash escape back slash)
947
+ # abcd\\\' -> true
948
+ def last_char_escaped?(text)
949
+ flag = false
950
+ (text.length - 2).downto(0) do |i|
951
+ if text[i] == '\\'
952
+ flag = !flag
953
+ else
954
+ break
955
+ end
956
+ end
957
+ flag
958
+ end
959
+ end
960
+ end
961
+ end
962
+ end