flydata 0.1.7 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.7
1
+ 0.1.8
data/flydata.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "flydata"
8
- s.version = "0.1.7"
8
+ s.version = "0.1.8"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Koichi Fujikawa"]
12
- s.date = "2014-06-01"
12
+ s.date = "2014-06-23"
13
13
  s.description = "FlyData Command Line Interface"
14
14
  s.email = "sysadmin@flydata.co"
15
15
  s.executables = ["fdmysqldump", "flydata"]
@@ -50,6 +50,15 @@ Gem::Specification.new do |s|
50
50
  "lib/flydata/credentials.rb",
51
51
  "lib/flydata/cron.rb",
52
52
  "lib/flydata/fluent-plugins/in_mysql_binlog_flydata.rb",
53
+ "lib/flydata/fluent-plugins/mysql/alter_table_query_handler.rb",
54
+ "lib/flydata/fluent-plugins/mysql/binlog_position.rb",
55
+ "lib/flydata/fluent-plugins/mysql/binlog_query_dispatcher.rb",
56
+ "lib/flydata/fluent-plugins/mysql/binlog_query_handler.rb",
57
+ "lib/flydata/fluent-plugins/mysql/binlog_record_dispatcher.rb",
58
+ "lib/flydata/fluent-plugins/mysql/binlog_record_handler.rb",
59
+ "lib/flydata/fluent-plugins/mysql/context.rb",
60
+ "lib/flydata/fluent-plugins/mysql/dml_record_handler.rb",
61
+ "lib/flydata/fluent-plugins/mysql/query_parser.rb",
53
62
  "lib/flydata/fluent-plugins/out_forward_ssl.rb",
54
63
  "lib/flydata/fluent-plugins/preference.rb",
55
64
  "lib/flydata/flydata_crontab.sh",
@@ -71,6 +80,8 @@ Gem::Specification.new do |s|
71
80
  "spec/flydata/command/sender_spec.rb",
72
81
  "spec/flydata/command/sync_spec.rb",
73
82
  "spec/flydata/fluent-plugins/in_mysql_binlog_flydata_spec.rb",
83
+ "spec/flydata/fluent-plugins/mysql/binlog_position_spec.rb",
84
+ "spec/flydata/fluent-plugins/mysql/query_parser_spec.rb",
74
85
  "spec/flydata/heroku_spec.rb",
75
86
  "spec/flydata/table_def/mysql_table_def_spec.rb",
76
87
  "spec/flydata/table_def/mysqldump_test_foreign_key.dump",
@@ -79,6 +90,7 @@ Gem::Specification.new do |s|
79
90
  "spec/flydata/table_def/mysqldump_test_table_enum.dump",
80
91
  "spec/flydata/table_def/mysqldump_test_table_multi_pk.dump",
81
92
  "spec/flydata/table_def/mysqldump_test_table_no_pk.dump",
93
+ "spec/flydata/table_def/redshift_table_def_spec.rb",
82
94
  "spec/flydata/util/encryptor_spec.rb",
83
95
  "spec/flydata_spec.rb",
84
96
  "spec/spec_helper.rb",
@@ -87,7 +99,7 @@ Gem::Specification.new do |s|
87
99
  s.homepage = "http://flydata.co/"
88
100
  s.licenses = ["All right reserved."]
89
101
  s.require_paths = ["lib"]
90
- s.rubygems_version = "1.8.24"
102
+ s.rubygems_version = "1.8.23"
91
103
  s.summary = "FlyData CLI"
92
104
 
93
105
  if s.respond_to? :specification_version then
data/lib/flydata/cli.rb CHANGED
@@ -16,7 +16,7 @@ module Flydata
16
16
  cmd, sub_cmd = parse_command(first_arg)
17
17
  cmd_cls = "Flydata::Command::#{cmd.capitalize}".constantize
18
18
  cmd_obj = cmd_cls.new
19
- sub_cmd ? cmd_obj.send(sub_cmd) : cmd_obj.run
19
+ sub_cmd ? cmd_obj.send(sub_cmd,*@args) : cmd_obj.run(*@args)
20
20
  else
21
21
  raise 'no command given'
22
22
  end
@@ -1,7 +1,7 @@
1
1
  module Flydata
2
2
  module Command
3
3
  class Sender < Base
4
- def start
4
+ def start(show_final_message = true)
5
5
  # Check if process exist
6
6
  if process_exist?
7
7
  say("Process is still running. Please stop process first.")
@@ -19,26 +19,46 @@ module Flydata
19
19
 
20
20
  wait_until_client_ready
21
21
  #wait_until_logs_uploaded
22
-
23
- data_port = flydata.data_port.get
24
- say("Go to your Dashboard! #{flydata.flydata_api_host}/data_ports/#{data_port['id']}")
25
- say <<EOF
22
+ if show_final_message
23
+ data_port = flydata.data_port.get
24
+ say("Go to your Dashboard! #{flydata.flydata_api_host}/data_ports/#{data_port['id']}")
25
+ say <<EOF
26
26
  Please Note: Records and Total Size are updated every 10-20 minutes.
27
27
  EOF
28
+ end
28
29
  end
29
- def stop
30
+ def stop(options = {})
30
31
  unless process_exist?
31
- say("Process doesn't exist.")
32
+ say("Process doesn't exist.") unless options[:quiet]
32
33
  return true
33
34
  end
34
35
 
35
- say('Stopping sender process.')
36
+ say('Stopping sender process.') unless options[:quiet]
36
37
  if system("kill `cat #{FLYDATA_HOME}/flydata.pid`") and wait_until_client_stop
37
- say('Done.')
38
+ say('Done.') unless options[:quiet]
38
39
  return true
39
40
  end
40
41
  raise 'Something has gone wrong..'
41
42
  end
43
+ def flush_client_buffer
44
+ unless process_exist?
45
+ return true if client_buffer_empty?
46
+ say("Process doesn't exist. But, the client buffer is not empty!!")
47
+ start false
48
+ end
49
+
50
+ say('Stopping input plugins and flushing the client buffer.')
51
+ system("kill -USR1 `cat #{FLYDATA_HOME}/flydata.pid`")
52
+
53
+ retry_count = 12
54
+ 1.upto(retry_count) do |i|
55
+ return true if client_buffer_empty?
56
+ say("Waiting for the buffer to get empty... (#{i}/#{retry_count})")
57
+ sleep 5
58
+ end
59
+
60
+ raise 'Something is wrong! Unable to flush client buffer'
61
+ end
42
62
  def restart
43
63
  if process_exist?
44
64
  say('Restarting sender process.')
@@ -98,7 +118,7 @@ EOF
98
118
  retry_count = 5
99
119
  1.upto(retry_count) do |i|
100
120
  return true unless process_exist?
101
- say("Waiting for the client stopping... (#{i}/#{retry_count})")
121
+ say("Waiting for the client to stop... (#{i}/#{retry_count})")
102
122
  sleep 3
103
123
  end
104
124
  false
@@ -139,6 +159,11 @@ EOF
139
159
  res = flydata.get("/data_ports/#{data_port_id}/tail.json")
140
160
  res and res['logs'] and res['logs'].size > 0
141
161
  end
162
+ def client_buffer_empty?
163
+ client_buffer = File.join(FLYDATA_HOME, 'buffer')
164
+ say("Checking the client buffer #{client_buffer}")
165
+ Dir.glob("#{client_buffer}/*").empty?
166
+ end
142
167
  end
143
168
  end
144
169
  end
@@ -1,5 +1,7 @@
1
+ require 'fiber'
1
2
  require 'msgpack'
2
3
  require 'open3'
4
+ require 'mysql2'
3
5
  require 'flydata/sync_file_manager'
4
6
  require 'flydata/table_def'
5
7
  #require 'ruby-prof'
@@ -15,21 +17,36 @@ module Flydata
15
17
  STATUS_PARSING = 'PARSING'
16
18
  STATUS_COMPLETE = 'COMPLETE'
17
19
 
18
- def run
20
+ def run(*tables)
19
21
  de = retrieve_data_entries.first
20
22
  raise "There are no data entry." unless de
21
23
  case de['type']
22
24
  when 'RedshiftMysqlDataEntry'
25
+ de = load_sync_info(override_tables(de, tables))
26
+ flush_buffer_and_stop unless de['mysql_data_entry_preference']['initial_sync']
23
27
  sync_mysql_to_redshift(de)
24
28
  else
25
29
  raise "No supported data entry. Only mysql-redshift sync is supported."
26
30
  end
27
31
  end
28
32
 
33
+ def flush
34
+ flush_buffer_and_stop
35
+ puts "Buffers have been flushed and the sender process has been stopped."
36
+ end
37
+
29
38
  def reset
30
39
  de = retrieve_data_entries.first
31
40
  sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
32
- [sync_fm.dump_file_path, sync_fm.dump_pos_path, sync_fm.binlog_path, sync_fm.mysql_table_marshal_dump_path, sync_fm.table_position_file_paths].flatten.each do |path|
41
+ [
42
+ sync_fm.dump_file_path,
43
+ sync_fm.dump_pos_path,
44
+ sync_fm.binlog_path,
45
+ sync_fm.mysql_table_marshal_dump_path,
46
+ sync_fm.table_position_file_paths,
47
+ sync_fm.sync_info_file,
48
+ sync_fm.table_rev_file_paths,
49
+ ].flatten.each do |path|
33
50
  FileUtils.rm(path) if File.exists?(path)
34
51
  end
35
52
  end
@@ -48,18 +65,6 @@ module Flydata
48
65
  end
49
66
  end
50
67
 
51
- def complete
52
- de = retrieve_data_entries.first
53
- sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
54
- info = sync_fm.load_dump_pos
55
- if info[:status] == STATUS_COMPLETE
56
- sync_fm.save_binlog(info[:binlog_pos])
57
- Flydata::Command::Sender.new.start
58
- else
59
- raise "Initial sync status is not complete. Try running 'flydata sync'."
60
- end
61
- end
62
-
63
68
  # skip initial sync
64
69
  def skip
65
70
  de = retrieve_data_entries.first
@@ -71,12 +76,12 @@ module Flydata
71
76
  puts "Run 'flydata start' to start continuous sync."
72
77
  end
73
78
 
74
- def generate_table_ddl
79
+ def generate_table_ddl(*tables)
75
80
  de = retrieve_data_entries.first
76
81
  raise "There are no data entry." unless de
77
82
  case de['type']
78
83
  when 'RedshiftMysqlDataEntry'
79
- do_generate_table_ddl(de)
84
+ do_generate_table_ddl(override_tables(de, tables))
80
85
  else
81
86
  raise "No supported data entry. Only mysql-redshift sync is supported."
82
87
  end
@@ -102,10 +107,11 @@ module Flydata
102
107
  params << (mp['password'] ? "-p#{mp['password']}" : "")
103
108
  if mp['database'] then params << mp['database'] else raise "`database` is not defined in the data entry" end
104
109
  if mp['tables'] then params << mp['tables'].gsub(/,/, ' ') else raise "`tables` is not defined in the data entry" end
110
+
105
111
  command = DDL_DUMP_CMD_TEMPLATE % params
106
112
 
107
113
  IO.popen(command, 'r') do |io|
108
- create_flydata_ctl_table = true
114
+ create_flydata_ctl_table = mp['initial_sync']
109
115
  while !io.eof?
110
116
  mysql_tabledef = Flydata::TableDef::MysqlTableDef.create(io)
111
117
  if mysql_tabledef.nil?
@@ -124,7 +130,7 @@ module Flydata
124
130
  sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
125
131
 
126
132
  # Check client condition
127
- if File.exists?(sync_fm.binlog_path)
133
+ if File.exists?(sync_fm.binlog_path) and de['mysql_data_entry_preference']['initial_sync']
128
134
  raise "Already synchronized. If you want to do initial sync, delete #{sync_fm.binlog_path}."
129
135
  end
130
136
 
@@ -134,7 +140,9 @@ module Flydata
134
140
  end
135
141
 
136
142
  if generate_mysqldump(de, sync_fm)
143
+ sync_fm.save_sync_info(de['mysql_data_entry_preference']['initial_sync'], de['mysql_data_entry_preference']['tables'])
137
144
  parse_mysqldump(dp, de, sync_fm)
145
+ complete
138
146
  end
139
147
  end
140
148
 
@@ -169,7 +177,7 @@ module Flydata
169
177
 
170
178
  puts "[Confirm] mysqldump path: #{fp}"
171
179
  if ask_yes_no('OK?')
172
- Flydata::Mysql::MysqlDumpGenerator.new(de['mysql_data_entry_preference']).dump(fp)
180
+ Flydata::Mysql::MysqlDumpGeneratorNoMasterData.new(de['mysql_data_entry_preference']).dump(fp)
173
181
  else
174
182
  newline
175
183
  puts "You can change the mysqldump path with 'mysqldump_path' in the conf file."
@@ -293,7 +301,6 @@ module Flydata
293
301
  puts "Elapsed:#{elapsed_time}sec start:#{bench_start_time} end:#{bench_end_time}"
294
302
  return true
295
303
  end
296
-
297
304
  # wait until finish
298
305
  puts "Start waiting until all data is processed on FlyData..."
299
306
  sleep 10
@@ -302,8 +309,25 @@ module Flydata
302
309
  end
303
310
 
304
311
  sync_fm.save_dump_pos(STATUS_COMPLETE, '', dump_file_size, binlog_pos)
305
- puts "Congratulations! All data is processed on FlyData. Please check tables and data on your Redshift Cluster."
306
- puts "After checking, run 'flydata sync:complete' to start continuously synchronization."
312
+ tables = de['mysql_data_entry_preference']['tables'].split(',').join(' ')
313
+ sync_fm.save_table_binlog_pos(tables, binlog_pos)
314
+ end
315
+
316
+ def complete
317
+ de = load_sync_info(retrieve_data_entries.first)
318
+ sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
319
+ info = sync_fm.load_dump_pos
320
+ if info[:status] == STATUS_COMPLETE
321
+ if de['mysql_data_entry_preference']['initial_sync']
322
+ sync_fm.save_binlog(info[:binlog_pos])
323
+ end
324
+ sync_fm.move_table_binlog_files(de['mysql_data_entry_preference']['tables'].split(','))
325
+ sync_fm.reset_table_position_files(de['mysql_data_entry_preference']['tables'].split(','))
326
+ sync_fm.backup_dump_dir
327
+ Flydata::Command::Sender.new.start
328
+ else
329
+ raise "Initial sync status is not complete. Try running 'flydata sync'."
330
+ end
307
331
  end
308
332
 
309
333
  def generate_json(mysql_table, values)
@@ -313,6 +337,34 @@ module Flydata
313
337
  end
314
338
  h.to_json
315
339
  end
340
+
341
+ def override_tables(de, tables)
342
+ de['mysql_data_entry_preference']['initial_sync'] = tables.empty?
343
+ if ! de['mysql_data_entry_preference']['initial_sync']
344
+ de['mysql_data_entry_preference']['tables'] = tables.join(',')
345
+ end
346
+ de
347
+ end
348
+
349
+ def load_sync_info(de)
350
+ sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
351
+ mp = de['mysql_data_entry_preference']
352
+ unless (rs = sync_fm.load_sync_info).nil?
353
+ mp['initial_sync'] = rs[:initial_sync]
354
+ mp['tables'] = rs[:tables]
355
+ end
356
+ de
357
+ end
358
+
359
+ def flush_buffer_and_stop
360
+ sender = Flydata::Command::Sender.new
361
+ sender.flush_client_buffer
362
+ puts "Checking the server."
363
+ until check
364
+ sleep 10
365
+ end
366
+ sender.stop(quiet: true)
367
+ end
316
368
  end
317
369
  end
318
370
 
@@ -668,7 +720,8 @@ EOT
668
720
 
669
721
  class MysqlDumpGenerator
670
722
  # host, port, username, password, database, tables
671
- MYSQL_DUMP_CMD_TEMPLATE = "mysqldump --protocol=tcp -h %s -P %s -u%s %s --skip-lock-tables --single-transaction --flush-logs --hex-blob --master-data=2 %s %s"
723
+ MYSQL_DUMP_CMD_TEMPLATE = "mysqldump --protocol=tcp -h %s -P %s -u%s %s --skip-lock-tables --single-transaction --hex-blob %s %s %s"
724
+ EXTRA_MYSQLDUMP_PARAMS = ""
672
725
  def initialize(conf)
673
726
  password = conf['password'].to_s.empty? ? "" : "-p#{conf['password']}"
674
727
  tables = if conf['tables']
@@ -677,8 +730,16 @@ EOT
677
730
  ''
678
731
  end
679
732
  @dump_cmd = MYSQL_DUMP_CMD_TEMPLATE %
680
- [conf['host'], conf['port'], conf['username'], password, conf['database'], tables]
733
+ [conf['host'], conf['port'], conf['username'], password, self.class::EXTRA_MYSQLDUMP_PARAMS, conf['database'], tables]
734
+ @db_opts = [:host, :port, :username, :password, :database].inject({}) {|h, sym| h[sym] = conf[sym.to_s]; h}
681
735
  end
736
+ def dump(file_path)
737
+ raise "subclass must implement the method"
738
+ end
739
+ end
740
+
741
+ class MysqlDumpGeneratorMasterData < MysqlDumpGenerator
742
+ EXTRA_MYSQLDUMP_PARAMS = "--flush-logs --master-data=2"
682
743
  def dump(file_path)
683
744
  cmd = "#{@dump_cmd} > #{file_path}"
684
745
  o, e, s = Open3.capture3(cmd)
@@ -700,6 +761,109 @@ EOT
700
761
  end
701
762
  end
702
763
 
764
+ class MysqlDumpGeneratorNoMasterData < MysqlDumpGenerator
765
+ EXTRA_MYSQLDUMP_PARAMS = ""
766
+ CHANGE_MASTER_TEMPLATE = <<EOS
767
+ --
768
+ -- Position to start replication or point-in-time recovery from
769
+ --
770
+
771
+ -- CHANGE MASTER TO MASTER_LOG_FILE='%s', MASTER_LOG_POS=%d;
772
+
773
+ EOS
774
+
775
+ def dump(file_path)
776
+ # RDS doesn't allow obtaining binlog position using mysqldump. Get it separately and insert it into the dump file.
777
+ table_locker = Fiber.new do
778
+ client = Mysql2::Client.new(@db_opts)
779
+ # Lock tables
780
+ client.query "FLUSH LOCAL TABLES;"
781
+ q = flush_tables_with_read_lock_query(client)
782
+ puts "FLUSH TABLES query: #{q}" if FLYDATA_DEBUG
783
+ client.query q
784
+ begin
785
+ Fiber.yield # Lock is done. Let dump to start
786
+ # obtain binlog pos
787
+ result = client.query "SHOW MASTER STATUS;"
788
+ row = result.first
789
+ if row.nil?
790
+ raise "MySQL DB has no replication master status. Check if the DB is set up as a replication master. In case of RDS, make sure that Backup Retention Period is set to more than 0."
791
+ end
792
+ ensure
793
+ # unlock tables
794
+ client.query "UNLOCK TABLES;"
795
+ client.close
796
+ end
797
+
798
+ [row["File"], row['Position']]
799
+ end
800
+
801
+ table_locker.resume # Lock tables
802
+ begin
803
+ # start dump
804
+ Open3.popen3 @dump_cmd do |cmd_in, cmd_out, cmd_err|
805
+ cmd_in.close_write
806
+ File.open(file_path, "w") do |f|
807
+ find_insert_pos = :not_started
808
+ cmd_out.each_line do |line|
809
+ if find_insert_pos == :not_started && /^-- Server version/ === line
810
+ find_insert_pos = :finding
811
+ elsif find_insert_pos == :finding && /^--/ === line
812
+ # wait before writing the first database queries
813
+ file, pos = table_locker.resume # Get binlog pos
814
+ # insert binlog pos
815
+ change_master = CHANGE_MASTER_TEMPLATE % [file, pos]
816
+ f.print change_master
817
+
818
+ find_insert_pos = :found
819
+ # resume dump
820
+ end
821
+ f.print line
822
+ end
823
+ end
824
+ cmd_err.each_line do |line|
825
+ $stderr.print line unless /^Warning:/ === line
826
+ end
827
+ end
828
+ rescue
829
+ # Cleanup
830
+ FileUtils.rm(file_path) if File.exists?(file_path)
831
+ raise
832
+ ensure
833
+ # Let table_locker finish its task even if an exception happened
834
+ table_locker.resume if table_locker.alive?
835
+ end
836
+ end
837
+
838
+ private
839
+ # This query generates a query which flushes user tables with read lock
840
+ FLUSH_TABLES_QUERY_TEMPLATE = "FLUSH TABLES %s WITH READ LOCK;"
841
+ USER_TABLES_QUERY = <<EOS
842
+ SELECT CONCAT('`',
843
+ REPLACE(TABLE_SCHEMA, '`', '``'), '`.`',
844
+ REPLACE(TABLE_NAME, '`', '``'), '` ')
845
+ AS tables
846
+ FROM INFORMATION_SCHEMA.TABLES
847
+ WHERE TABLE_TYPE = 'BASE TABLE'
848
+ AND ENGINE NOT IN ('MEMORY', 'CSV', 'PERFORMANCE_SCHEMA');
849
+ EOS
850
+ def flush_tables_with_read_lock_query(client)
851
+ tables = ""
852
+ if mysql_server_version(client) >= "5.5"
853
+ # FLUSH TABLES table_names,... WITH READ LOCK syntax is supported from MySQL 5.5
854
+ result = client.query(USER_TABLES_QUERY)
855
+ tables = result.collect{|r| r['tables']}.join(", ")
856
+ end
857
+ FLUSH_TABLES_QUERY_TEMPLATE % [tables]
858
+ end
859
+
860
+ VERSION_QUERY = "SHOW VARIABLES LIKE 'version'"
861
+ def mysql_server_version(client)
862
+ result = client.query(VERSION_QUERY)
863
+ result.first['Value']
864
+ end
865
+ end
866
+
703
867
  class MysqlDumpParser
704
868
 
705
869
  module State