flydata 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.7
1
+ 0.1.8
data/flydata.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "flydata"
8
- s.version = "0.1.7"
8
+ s.version = "0.1.8"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Koichi Fujikawa"]
12
- s.date = "2014-06-01"
12
+ s.date = "2014-06-23"
13
13
  s.description = "FlyData Command Line Interface"
14
14
  s.email = "sysadmin@flydata.co"
15
15
  s.executables = ["fdmysqldump", "flydata"]
@@ -50,6 +50,15 @@ Gem::Specification.new do |s|
50
50
  "lib/flydata/credentials.rb",
51
51
  "lib/flydata/cron.rb",
52
52
  "lib/flydata/fluent-plugins/in_mysql_binlog_flydata.rb",
53
+ "lib/flydata/fluent-plugins/mysql/alter_table_query_handler.rb",
54
+ "lib/flydata/fluent-plugins/mysql/binlog_position.rb",
55
+ "lib/flydata/fluent-plugins/mysql/binlog_query_dispatcher.rb",
56
+ "lib/flydata/fluent-plugins/mysql/binlog_query_handler.rb",
57
+ "lib/flydata/fluent-plugins/mysql/binlog_record_dispatcher.rb",
58
+ "lib/flydata/fluent-plugins/mysql/binlog_record_handler.rb",
59
+ "lib/flydata/fluent-plugins/mysql/context.rb",
60
+ "lib/flydata/fluent-plugins/mysql/dml_record_handler.rb",
61
+ "lib/flydata/fluent-plugins/mysql/query_parser.rb",
53
62
  "lib/flydata/fluent-plugins/out_forward_ssl.rb",
54
63
  "lib/flydata/fluent-plugins/preference.rb",
55
64
  "lib/flydata/flydata_crontab.sh",
@@ -71,6 +80,8 @@ Gem::Specification.new do |s|
71
80
  "spec/flydata/command/sender_spec.rb",
72
81
  "spec/flydata/command/sync_spec.rb",
73
82
  "spec/flydata/fluent-plugins/in_mysql_binlog_flydata_spec.rb",
83
+ "spec/flydata/fluent-plugins/mysql/binlog_position_spec.rb",
84
+ "spec/flydata/fluent-plugins/mysql/query_parser_spec.rb",
74
85
  "spec/flydata/heroku_spec.rb",
75
86
  "spec/flydata/table_def/mysql_table_def_spec.rb",
76
87
  "spec/flydata/table_def/mysqldump_test_foreign_key.dump",
@@ -79,6 +90,7 @@ Gem::Specification.new do |s|
79
90
  "spec/flydata/table_def/mysqldump_test_table_enum.dump",
80
91
  "spec/flydata/table_def/mysqldump_test_table_multi_pk.dump",
81
92
  "spec/flydata/table_def/mysqldump_test_table_no_pk.dump",
93
+ "spec/flydata/table_def/redshift_table_def_spec.rb",
82
94
  "spec/flydata/util/encryptor_spec.rb",
83
95
  "spec/flydata_spec.rb",
84
96
  "spec/spec_helper.rb",
@@ -87,7 +99,7 @@ Gem::Specification.new do |s|
87
99
  s.homepage = "http://flydata.co/"
88
100
  s.licenses = ["All right reserved."]
89
101
  s.require_paths = ["lib"]
90
- s.rubygems_version = "1.8.24"
102
+ s.rubygems_version = "1.8.23"
91
103
  s.summary = "FlyData CLI"
92
104
 
93
105
  if s.respond_to? :specification_version then
data/lib/flydata/cli.rb CHANGED
@@ -16,7 +16,7 @@ module Flydata
16
16
  cmd, sub_cmd = parse_command(first_arg)
17
17
  cmd_cls = "Flydata::Command::#{cmd.capitalize}".constantize
18
18
  cmd_obj = cmd_cls.new
19
- sub_cmd ? cmd_obj.send(sub_cmd) : cmd_obj.run
19
+ sub_cmd ? cmd_obj.send(sub_cmd,*@args) : cmd_obj.run(*@args)
20
20
  else
21
21
  raise 'no command given'
22
22
  end
@@ -1,7 +1,7 @@
1
1
  module Flydata
2
2
  module Command
3
3
  class Sender < Base
4
- def start
4
+ def start(show_final_message = true)
5
5
  # Check if process exist
6
6
  if process_exist?
7
7
  say("Process is still running. Please stop process first.")
@@ -19,26 +19,46 @@ module Flydata
19
19
 
20
20
  wait_until_client_ready
21
21
  #wait_until_logs_uploaded
22
-
23
- data_port = flydata.data_port.get
24
- say("Go to your Dashboard! #{flydata.flydata_api_host}/data_ports/#{data_port['id']}")
25
- say <<EOF
22
+ if show_final_message
23
+ data_port = flydata.data_port.get
24
+ say("Go to your Dashboard! #{flydata.flydata_api_host}/data_ports/#{data_port['id']}")
25
+ say <<EOF
26
26
  Please Note: Records and Total Size are updated every 10-20 minutes.
27
27
  EOF
28
+ end
28
29
  end
29
- def stop
30
+ def stop(options = {})
30
31
  unless process_exist?
31
- say("Process doesn't exist.")
32
+ say("Process doesn't exist.") unless options[:quiet]
32
33
  return true
33
34
  end
34
35
 
35
- say('Stopping sender process.')
36
+ say('Stopping sender process.') unless options[:quiet]
36
37
  if system("kill `cat #{FLYDATA_HOME}/flydata.pid`") and wait_until_client_stop
37
- say('Done.')
38
+ say('Done.') unless options[:quiet]
38
39
  return true
39
40
  end
40
41
  raise 'Something has gone wrong..'
41
42
  end
43
+ def flush_client_buffer
44
+ unless process_exist?
45
+ return true if client_buffer_empty?
46
+ say("Process doesn't exist. But, the client buffer is not empty!!")
47
+ start false
48
+ end
49
+
50
+ say('Stopping input plugins and flushing the client buffer.')
51
+ system("kill -USR1 `cat #{FLYDATA_HOME}/flydata.pid`")
52
+
53
+ retry_count = 12
54
+ 1.upto(retry_count) do |i|
55
+ return true if client_buffer_empty?
56
+ say("Waiting for the buffer to get empty... (#{i}/#{retry_count})")
57
+ sleep 5
58
+ end
59
+
60
+ raise 'Something is wrong! Unable to flush client buffer'
61
+ end
42
62
  def restart
43
63
  if process_exist?
44
64
  say('Restarting sender process.')
@@ -98,7 +118,7 @@ EOF
98
118
  retry_count = 5
99
119
  1.upto(retry_count) do |i|
100
120
  return true unless process_exist?
101
- say("Waiting for the client stopping... (#{i}/#{retry_count})")
121
+ say("Waiting for the client to stop... (#{i}/#{retry_count})")
102
122
  sleep 3
103
123
  end
104
124
  false
@@ -139,6 +159,11 @@ EOF
139
159
  res = flydata.get("/data_ports/#{data_port_id}/tail.json")
140
160
  res and res['logs'] and res['logs'].size > 0
141
161
  end
162
+ def client_buffer_empty?
163
+ client_buffer = File.join(FLYDATA_HOME, 'buffer')
164
+ say("Checking the client buffer #{client_buffer}")
165
+ Dir.glob("#{client_buffer}/*").empty?
166
+ end
142
167
  end
143
168
  end
144
169
  end
@@ -1,5 +1,7 @@
1
+ require 'fiber'
1
2
  require 'msgpack'
2
3
  require 'open3'
4
+ require 'mysql2'
3
5
  require 'flydata/sync_file_manager'
4
6
  require 'flydata/table_def'
5
7
  #require 'ruby-prof'
@@ -15,21 +17,36 @@ module Flydata
15
17
  STATUS_PARSING = 'PARSING'
16
18
  STATUS_COMPLETE = 'COMPLETE'
17
19
 
18
- def run
20
+ def run(*tables)
19
21
  de = retrieve_data_entries.first
20
22
  raise "There are no data entry." unless de
21
23
  case de['type']
22
24
  when 'RedshiftMysqlDataEntry'
25
+ de = load_sync_info(override_tables(de, tables))
26
+ flush_buffer_and_stop unless de['mysql_data_entry_preference']['initial_sync']
23
27
  sync_mysql_to_redshift(de)
24
28
  else
25
29
  raise "No supported data entry. Only mysql-redshift sync is supported."
26
30
  end
27
31
  end
28
32
 
33
+ def flush
34
+ flush_buffer_and_stop
35
+ puts "Buffers have been flushed and the sender process has been stopped."
36
+ end
37
+
29
38
  def reset
30
39
  de = retrieve_data_entries.first
31
40
  sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
32
- [sync_fm.dump_file_path, sync_fm.dump_pos_path, sync_fm.binlog_path, sync_fm.mysql_table_marshal_dump_path, sync_fm.table_position_file_paths].flatten.each do |path|
41
+ [
42
+ sync_fm.dump_file_path,
43
+ sync_fm.dump_pos_path,
44
+ sync_fm.binlog_path,
45
+ sync_fm.mysql_table_marshal_dump_path,
46
+ sync_fm.table_position_file_paths,
47
+ sync_fm.sync_info_file,
48
+ sync_fm.table_rev_file_paths,
49
+ ].flatten.each do |path|
33
50
  FileUtils.rm(path) if File.exists?(path)
34
51
  end
35
52
  end
@@ -48,18 +65,6 @@ module Flydata
48
65
  end
49
66
  end
50
67
 
51
- def complete
52
- de = retrieve_data_entries.first
53
- sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
54
- info = sync_fm.load_dump_pos
55
- if info[:status] == STATUS_COMPLETE
56
- sync_fm.save_binlog(info[:binlog_pos])
57
- Flydata::Command::Sender.new.start
58
- else
59
- raise "Initial sync status is not complete. Try running 'flydata sync'."
60
- end
61
- end
62
-
63
68
  # skip initial sync
64
69
  def skip
65
70
  de = retrieve_data_entries.first
@@ -71,12 +76,12 @@ module Flydata
71
76
  puts "Run 'flydata start' to start continuous sync."
72
77
  end
73
78
 
74
- def generate_table_ddl
79
+ def generate_table_ddl(*tables)
75
80
  de = retrieve_data_entries.first
76
81
  raise "There are no data entry." unless de
77
82
  case de['type']
78
83
  when 'RedshiftMysqlDataEntry'
79
- do_generate_table_ddl(de)
84
+ do_generate_table_ddl(override_tables(de, tables))
80
85
  else
81
86
  raise "No supported data entry. Only mysql-redshift sync is supported."
82
87
  end
@@ -102,10 +107,11 @@ module Flydata
102
107
  params << (mp['password'] ? "-p#{mp['password']}" : "")
103
108
  if mp['database'] then params << mp['database'] else raise "`database` is not defined in the data entry" end
104
109
  if mp['tables'] then params << mp['tables'].gsub(/,/, ' ') else raise "`tables` is not defined in the data entry" end
110
+
105
111
  command = DDL_DUMP_CMD_TEMPLATE % params
106
112
 
107
113
  IO.popen(command, 'r') do |io|
108
- create_flydata_ctl_table = true
114
+ create_flydata_ctl_table = mp['initial_sync']
109
115
  while !io.eof?
110
116
  mysql_tabledef = Flydata::TableDef::MysqlTableDef.create(io)
111
117
  if mysql_tabledef.nil?
@@ -124,7 +130,7 @@ module Flydata
124
130
  sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
125
131
 
126
132
  # Check client condition
127
- if File.exists?(sync_fm.binlog_path)
133
+ if File.exists?(sync_fm.binlog_path) and de['mysql_data_entry_preference']['initial_sync']
128
134
  raise "Already synchronized. If you want to do initial sync, delete #{sync_fm.binlog_path}."
129
135
  end
130
136
 
@@ -134,7 +140,9 @@ module Flydata
134
140
  end
135
141
 
136
142
  if generate_mysqldump(de, sync_fm)
143
+ sync_fm.save_sync_info(de['mysql_data_entry_preference']['initial_sync'], de['mysql_data_entry_preference']['tables'])
137
144
  parse_mysqldump(dp, de, sync_fm)
145
+ complete
138
146
  end
139
147
  end
140
148
 
@@ -169,7 +177,7 @@ module Flydata
169
177
 
170
178
  puts "[Confirm] mysqldump path: #{fp}"
171
179
  if ask_yes_no('OK?')
172
- Flydata::Mysql::MysqlDumpGenerator.new(de['mysql_data_entry_preference']).dump(fp)
180
+ Flydata::Mysql::MysqlDumpGeneratorNoMasterData.new(de['mysql_data_entry_preference']).dump(fp)
173
181
  else
174
182
  newline
175
183
  puts "You can change the mysqldump path with 'mysqldump_path' in the conf file."
@@ -293,7 +301,6 @@ module Flydata
293
301
  puts "Elapsed:#{elapsed_time}sec start:#{bench_start_time} end:#{bench_end_time}"
294
302
  return true
295
303
  end
296
-
297
304
  # wait until finish
298
305
  puts "Start waiting until all data is processed on FlyData..."
299
306
  sleep 10
@@ -302,8 +309,25 @@ module Flydata
302
309
  end
303
310
 
304
311
  sync_fm.save_dump_pos(STATUS_COMPLETE, '', dump_file_size, binlog_pos)
305
- puts "Congratulations! All data is processed on FlyData. Please check tables and data on your Redshift Cluster."
306
- puts "After checking, run 'flydata sync:complete' to start continuously synchronization."
312
+ tables = de['mysql_data_entry_preference']['tables'].split(',').join(' ')
313
+ sync_fm.save_table_binlog_pos(tables, binlog_pos)
314
+ end
315
+
316
+ def complete
317
+ de = load_sync_info(retrieve_data_entries.first)
318
+ sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
319
+ info = sync_fm.load_dump_pos
320
+ if info[:status] == STATUS_COMPLETE
321
+ if de['mysql_data_entry_preference']['initial_sync']
322
+ sync_fm.save_binlog(info[:binlog_pos])
323
+ end
324
+ sync_fm.move_table_binlog_files(de['mysql_data_entry_preference']['tables'].split(','))
325
+ sync_fm.reset_table_position_files(de['mysql_data_entry_preference']['tables'].split(','))
326
+ sync_fm.backup_dump_dir
327
+ Flydata::Command::Sender.new.start
328
+ else
329
+ raise "Initial sync status is not complete. Try running 'flydata sync'."
330
+ end
307
331
  end
308
332
 
309
333
  def generate_json(mysql_table, values)
@@ -313,6 +337,34 @@ module Flydata
313
337
  end
314
338
  h.to_json
315
339
  end
340
+
341
+ def override_tables(de, tables)
342
+ de['mysql_data_entry_preference']['initial_sync'] = tables.empty?
343
+ if ! de['mysql_data_entry_preference']['initial_sync']
344
+ de['mysql_data_entry_preference']['tables'] = tables.join(',')
345
+ end
346
+ de
347
+ end
348
+
349
+ def load_sync_info(de)
350
+ sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
351
+ mp = de['mysql_data_entry_preference']
352
+ unless (rs = sync_fm.load_sync_info).nil?
353
+ mp['initial_sync'] = rs[:initial_sync]
354
+ mp['tables'] = rs[:tables]
355
+ end
356
+ de
357
+ end
358
+
359
+ def flush_buffer_and_stop
360
+ sender = Flydata::Command::Sender.new
361
+ sender.flush_client_buffer
362
+ puts "Checking the server."
363
+ until check
364
+ sleep 10
365
+ end
366
+ sender.stop(quiet: true)
367
+ end
316
368
  end
317
369
  end
318
370
 
@@ -668,7 +720,8 @@ EOT
668
720
 
669
721
  class MysqlDumpGenerator
670
722
  # host, port, username, password, database, tables
671
- MYSQL_DUMP_CMD_TEMPLATE = "mysqldump --protocol=tcp -h %s -P %s -u%s %s --skip-lock-tables --single-transaction --flush-logs --hex-blob --master-data=2 %s %s"
723
+ MYSQL_DUMP_CMD_TEMPLATE = "mysqldump --protocol=tcp -h %s -P %s -u%s %s --skip-lock-tables --single-transaction --hex-blob %s %s %s"
724
+ EXTRA_MYSQLDUMP_PARAMS = ""
672
725
  def initialize(conf)
673
726
  password = conf['password'].to_s.empty? ? "" : "-p#{conf['password']}"
674
727
  tables = if conf['tables']
@@ -677,8 +730,16 @@ EOT
677
730
  ''
678
731
  end
679
732
  @dump_cmd = MYSQL_DUMP_CMD_TEMPLATE %
680
- [conf['host'], conf['port'], conf['username'], password, conf['database'], tables]
733
+ [conf['host'], conf['port'], conf['username'], password, self.class::EXTRA_MYSQLDUMP_PARAMS, conf['database'], tables]
734
+ @db_opts = [:host, :port, :username, :password, :database].inject({}) {|h, sym| h[sym] = conf[sym.to_s]; h}
681
735
  end
736
+ def dump(file_path)
737
+ raise "subclass must implement the method"
738
+ end
739
+ end
740
+
741
+ class MysqlDumpGeneratorMasterData < MysqlDumpGenerator
742
+ EXTRA_MYSQLDUMP_PARAMS = "--flush-logs --master-data=2"
682
743
  def dump(file_path)
683
744
  cmd = "#{@dump_cmd} > #{file_path}"
684
745
  o, e, s = Open3.capture3(cmd)
@@ -700,6 +761,109 @@ EOT
700
761
  end
701
762
  end
702
763
 
764
+ class MysqlDumpGeneratorNoMasterData < MysqlDumpGenerator
765
+ EXTRA_MYSQLDUMP_PARAMS = ""
766
+ CHANGE_MASTER_TEMPLATE = <<EOS
767
+ --
768
+ -- Position to start replication or point-in-time recovery from
769
+ --
770
+
771
+ -- CHANGE MASTER TO MASTER_LOG_FILE='%s', MASTER_LOG_POS=%d;
772
+
773
+ EOS
774
+
775
+ def dump(file_path)
776
+ # RDS doesn't allow obtaining binlog position using mysqldump. Get it separately and insert it into the dump file.
777
+ table_locker = Fiber.new do
778
+ client = Mysql2::Client.new(@db_opts)
779
+ # Lock tables
780
+ client.query "FLUSH LOCAL TABLES;"
781
+ q = flush_tables_with_read_lock_query(client)
782
+ puts "FLUSH TABLES query: #{q}" if FLYDATA_DEBUG
783
+ client.query q
784
+ begin
785
+ Fiber.yield # Lock is done. Let dump to start
786
+ # obtain binlog pos
787
+ result = client.query "SHOW MASTER STATUS;"
788
+ row = result.first
789
+ if row.nil?
790
+ raise "MySQL DB has no replication master status. Check if the DB is set up as a replication master. In case of RDS, make sure that Backup Retention Period is set to more than 0."
791
+ end
792
+ ensure
793
+ # unlock tables
794
+ client.query "UNLOCK TABLES;"
795
+ client.close
796
+ end
797
+
798
+ [row["File"], row['Position']]
799
+ end
800
+
801
+ table_locker.resume # Lock tables
802
+ begin
803
+ # start dump
804
+ Open3.popen3 @dump_cmd do |cmd_in, cmd_out, cmd_err|
805
+ cmd_in.close_write
806
+ File.open(file_path, "w") do |f|
807
+ find_insert_pos = :not_started
808
+ cmd_out.each_line do |line|
809
+ if find_insert_pos == :not_started && /^-- Server version/ === line
810
+ find_insert_pos = :finding
811
+ elsif find_insert_pos == :finding && /^--/ === line
812
+ # wait before writing the first database queries
813
+ file, pos = table_locker.resume # Get binlog pos
814
+ # insert binlog pos
815
+ change_master = CHANGE_MASTER_TEMPLATE % [file, pos]
816
+ f.print change_master
817
+
818
+ find_insert_pos = :found
819
+ # resume dump
820
+ end
821
+ f.print line
822
+ end
823
+ end
824
+ cmd_err.each_line do |line|
825
+ $stderr.print line unless /^Warning:/ === line
826
+ end
827
+ end
828
+ rescue
829
+ # Cleanup
830
+ FileUtils.rm(file_path) if File.exists?(file_path)
831
+ raise
832
+ ensure
833
+ # Let table_locker finish its task even if an exception happened
834
+ table_locker.resume if table_locker.alive?
835
+ end
836
+ end
837
+
838
+ private
839
+ # This query generates a query which flushes user tables with read lock
840
+ FLUSH_TABLES_QUERY_TEMPLATE = "FLUSH TABLES %s WITH READ LOCK;"
841
+ USER_TABLES_QUERY = <<EOS
842
+ SELECT CONCAT('`',
843
+ REPLACE(TABLE_SCHEMA, '`', '``'), '`.`',
844
+ REPLACE(TABLE_NAME, '`', '``'), '` ')
845
+ AS tables
846
+ FROM INFORMATION_SCHEMA.TABLES
847
+ WHERE TABLE_TYPE = 'BASE TABLE'
848
+ AND ENGINE NOT IN ('MEMORY', 'CSV', 'PERFORMANCE_SCHEMA');
849
+ EOS
850
+ def flush_tables_with_read_lock_query(client)
851
+ tables = ""
852
+ if mysql_server_version(client) >= "5.5"
853
+ # FLUSH TABLES table_names,... WITH READ LOCK syntax is supported from MySQL 5.5
854
+ result = client.query(USER_TABLES_QUERY)
855
+ tables = result.collect{|r| r['tables']}.join(", ")
856
+ end
857
+ FLUSH_TABLES_QUERY_TEMPLATE % [tables]
858
+ end
859
+
860
+ VERSION_QUERY = "SHOW VARIABLES LIKE 'version'"
861
+ def mysql_server_version(client)
862
+ result = client.query(VERSION_QUERY)
863
+ result.first['Value']
864
+ end
865
+ end
866
+
703
867
  class MysqlDumpParser
704
868
 
705
869
  module State