flydata 0.6.3 → 0.6.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (94) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +2 -2
  3. data/VERSION +1 -1
  4. data/bin/fdredshift +78 -0
  5. data/circle.yml +1 -1
  6. data/ext/flydata/{parser/mysql → source_mysql/parser}/.gitignore +0 -0
  7. data/ext/flydata/{parser/mysql → source_mysql/parser}/dump_parser_ext.cpp +3 -3
  8. data/ext/flydata/source_mysql/parser/extconf.rb +3 -0
  9. data/ext/flydata/{parser/mysql → source_mysql/parser}/parser.txt +0 -0
  10. data/ext/flydata/{parser/mysql → source_mysql/parser}/sql_parser.cpp +0 -0
  11. data/ext/flydata/{parser/mysql → source_mysql/parser}/sql_parser.h +0 -0
  12. data/flydata-core/lib/flydata-core/mysql/binlog_pos.rb +34 -32
  13. data/flydata-core/lib/flydata-core/mysql/compatibility_checker.rb +20 -0
  14. data/flydata-core/lib/flydata-core/table_def/mysql_table_def.rb +12 -4
  15. data/flydata-core/lib/flydata-core/table_def/redshift_table_def.rb +60 -6
  16. data/flydata-core/spec/mysql/binlog_pos_spec.rb +474 -0
  17. data/flydata-core/spec/table_def/mysql_table_def_spec.rb +57 -0
  18. data/flydata-core/spec/table_def/mysql_to_redshift_table_def_spec.rb +174 -20
  19. data/flydata-core/spec/table_def/mysqldump_test_col_comment_with_AUTO_INCREMENT_keyword.dump +43 -0
  20. data/flydata-core/spec/table_def/mysqldump_test_col_comment_with_not_null_keyword.dump +43 -0
  21. data/flydata-core/spec/table_def/mysqldump_test_col_comment_with_unique_keyword.dump +43 -0
  22. data/flydata-core/spec/table_def/mysqldump_test_col_comment_with_unsigned_keyword.dump +43 -0
  23. data/flydata-core/spec/table_def/redshift_table_def_spec.rb +41 -8
  24. data/flydata.gemspec +0 -0
  25. data/lib/flydata/cli.rb +11 -5
  26. data/lib/flydata/command/base.rb +14 -1
  27. data/lib/flydata/command/exclusive_runnable.rb +42 -12
  28. data/lib/flydata/command/helper.rb +6 -6
  29. data/lib/flydata/command/sender.rb +4 -3
  30. data/lib/flydata/command/setup.rb +30 -381
  31. data/lib/flydata/command/stop.rb +1 -0
  32. data/lib/flydata/command/sync.rb +273 -301
  33. data/lib/flydata/compatibility_check.rb +24 -117
  34. data/lib/flydata/fluent-plugins/in_mysql_binlog_flydata.rb +3 -3
  35. data/lib/flydata/fluent-plugins/mysql/alter_table_query_handler.rb +2 -2
  36. data/lib/flydata/fluent-plugins/mysql/binlog_record_handler.rb +6 -6
  37. data/lib/flydata/fluent-plugins/mysql/truncate_table_query_handler.rb +0 -1
  38. data/lib/flydata/parser.rb +14 -0
  39. data/lib/flydata/{parser_provider.rb → parser/parser_provider.rb} +6 -4
  40. data/lib/flydata/parser/source_table.rb +33 -0
  41. data/lib/flydata/source.rb +105 -0
  42. data/lib/flydata/source/component.rb +21 -0
  43. data/lib/flydata/source/errors.rb +7 -0
  44. data/lib/flydata/source/generate_source_dump.rb +72 -0
  45. data/lib/flydata/source/parse_dump_and_send.rb +52 -0
  46. data/lib/flydata/source/setup.rb +31 -0
  47. data/lib/flydata/source/source_pos.rb +45 -0
  48. data/lib/flydata/source/sync.rb +56 -0
  49. data/lib/flydata/source/sync_generate_table_ddl.rb +43 -0
  50. data/lib/flydata/source_file/setup.rb +17 -0
  51. data/lib/flydata/source_file/sync.rb +14 -0
  52. data/lib/flydata/{command → source_mysql/command}/mysql.rb +2 -1
  53. data/lib/flydata/{command → source_mysql/command}/mysql_command_base.rb +2 -4
  54. data/lib/flydata/{command → source_mysql/command}/mysqlbinlog.rb +2 -1
  55. data/lib/flydata/{command → source_mysql/command}/mysqldump.rb +2 -1
  56. data/lib/flydata/source_mysql/generate_source_dump.rb +53 -0
  57. data/lib/flydata/source_mysql/mysql_compatibility_check.rb +114 -0
  58. data/lib/flydata/source_mysql/parse_dump_and_send.rb +28 -0
  59. data/lib/flydata/{parser/mysql → source_mysql/parser}/.gitignore +0 -0
  60. data/lib/flydata/{parser/mysql → source_mysql/parser}/dump_parser.rb +32 -67
  61. data/lib/flydata/{parser/mysql → source_mysql/parser}/mysql_alter_table.treetop +0 -0
  62. data/lib/flydata/source_mysql/setup.rb +24 -0
  63. data/lib/flydata/source_mysql/source_pos.rb +21 -0
  64. data/lib/flydata/source_mysql/sync.rb +45 -0
  65. data/lib/flydata/source_mysql/sync_generate_table_ddl.rb +40 -0
  66. data/lib/flydata/{mysql → source_mysql}/table_ddl.rb +6 -17
  67. data/lib/flydata/source_zendesk/sync_generate_table_ddl.rb +30 -0
  68. data/lib/flydata/source_zendesk/zendesk_flydata_tabledefs.rb +133 -0
  69. data/lib/flydata/sync_file_manager.rb +132 -73
  70. data/lib/flydata/table_ddl.rb +18 -0
  71. data/spec/flydata/cli_spec.rb +1 -0
  72. data/spec/flydata/command/exclusive_runnable_spec.rb +19 -8
  73. data/spec/flydata/command/sender_spec.rb +1 -1
  74. data/spec/flydata/command/setup_spec.rb +4 -4
  75. data/spec/flydata/command/sync_spec.rb +97 -134
  76. data/spec/flydata/compatibility_check_spec.rb +16 -289
  77. data/spec/flydata/fluent-plugins/mysql/alter_table_query_handler_spec.rb +3 -3
  78. data/spec/flydata/fluent-plugins/mysql/dml_record_handler_spec.rb +1 -1
  79. data/spec/flydata/fluent-plugins/mysql/shared_query_handler_context.rb +4 -2
  80. data/spec/flydata/fluent-plugins/mysql/truncate_query_handler_spec.rb +1 -1
  81. data/spec/flydata/source_mysql/generate_source_dump_spec.rb +69 -0
  82. data/spec/flydata/source_mysql/mysql_compatibility_check_spec.rb +280 -0
  83. data/spec/flydata/{parser/mysql → source_mysql/parser}/alter_table_parser_spec.rb +2 -2
  84. data/spec/flydata/{parser/mysql → source_mysql/parser}/dump_parser_spec.rb +75 -70
  85. data/spec/flydata/source_mysql/sync_generate_table_ddl_spec.rb +137 -0
  86. data/spec/flydata/{mysql → source_mysql}/table_ddl_spec.rb +2 -2
  87. data/spec/flydata/source_spec.rb +140 -0
  88. data/spec/flydata/source_zendesk/sync_generate_table_ddl_spec.rb +33 -0
  89. data/spec/flydata/sync_file_manager_spec.rb +157 -77
  90. data/tmpl/redshift_mysql_data_entry.conf.tmpl +1 -1
  91. metadata +56 -23
  92. data/ext/flydata/parser/mysql/extconf.rb +0 -3
  93. data/lib/flydata/mysql/binlog_position.rb +0 -22
  94. data/spec/flydata/mysql/binlog_position_spec.rb +0 -35
@@ -8,6 +8,7 @@ module Flydata
8
8
  def self.slop
9
9
  Slop.new do
10
10
  on 'f', 'full', 'Stop all the processes'
11
+ on 'force-run', 'Run forcefully, ignoring exclusive run info'
11
12
  end
12
13
  end
13
14
  def run
@@ -1,5 +1,4 @@
1
1
  require 'msgpack'
2
- require 'mysql2'
3
2
  require 'rest_client'
4
3
  require 'sys/filesystem'
5
4
  require 'flydata/agent'
@@ -8,17 +7,16 @@ require 'flydata/command/conf'
8
7
  require 'flydata/command/sender'
9
8
  require 'flydata/compatibility_check'
10
9
  require 'flydata/errors'
10
+ require 'flydata/source/errors'
11
11
  require 'flydata/helpers'
12
12
  require 'flydata/json'
13
13
  require 'flydata/queueable_thread'
14
14
  require 'flydata/output/forwarder'
15
- require 'flydata/parser/mysql/dump_parser'
15
+ require 'flydata/parser'
16
16
  require 'flydata/preference/data_entry_preference'
17
17
  require 'flydata/sync_file_manager'
18
18
  require 'flydata-core/table_def'
19
- require 'flydata-core/mysql/binlog_pos'
20
- require 'flydata/mysql/table_ddl'
21
- require 'flydata-core/mysql/command_generator'
19
+ require 'flydata/table_ddl'
22
20
  require 'flydata/event/api_event_sender'
23
21
  require 'flydata-core/event/event_dictionary'
24
22
  require 'sigdump/setup'
@@ -41,7 +39,7 @@ module Flydata
41
39
  STATUS_COMPLETE = 'COMPLETE'
42
40
 
43
41
  attr_reader :full_initial_sync, # true if full initial sync
44
- :full_tables, # all tables (same as data_entry['mysql_data_entry_preference']['tables'])
42
+ :full_tables, # all tables (same as the value of `tables` data entry preference)
45
43
  :new_tables, # tables which is not finihed initial-sync(pos file doesn't exist)
46
44
  :ddl_tables, # tables generated ddl
47
45
  :input_tables # tables which user put
@@ -49,9 +47,6 @@ module Flydata
49
47
  #target_tables # target tables for current command(sync/reset/generate_table_ddl)
50
48
  #target_tables_for_api # target tables for calling api(tables parameter needs to be empty for full_initial_sync)
51
49
 
52
- class SyncDataEntryError < StandardError
53
- end
54
-
55
50
  # Command: flydata sync
56
51
  # - Arguments
57
52
  def self.slop
@@ -59,8 +54,8 @@ module Flydata
59
54
  on 'c', 'skip-cleanup', 'Skip server cleanup'
60
55
  on 'f', 'skip-flush', 'Skip server flush'
61
56
  on 'y', 'yes', 'Skip command prompt assuming yes to all questions. Use this for batch operation.'
62
- on 'd', 'dump-file', 'Dump mysqldump into a file. Use this for debugging after making sure the free space.' # dummy for compatibility
63
- on 's', 'dump-stream', 'Dump mysqldump stream instead of saving dump file. It might cause timeout error if db size is larger than 10GB.'
57
+ on 'd', 'dump-file', 'Save dump result into a file.'
58
+ on 's', 'dump-stream', 'Stream dump result to a pipe instead of saving dump file. It might cause timeout error if db size is larger than 10GB.'
64
59
  on 'n', 'no-flydata-start', 'Don\'t start the flydata agent after initial sync.'
65
60
  #TODO : This option is temp! Should remove soon.
66
61
  on 'ff', 'Skip checking query queue and flush'
@@ -76,10 +71,9 @@ module Flydata
76
71
 
77
72
  # Public method
78
73
  # - Called from Sender#start/restart
79
- def try_mysql_sync(options)
80
- # Start initial sync
81
- handle_mysql_sync(nil, options)
82
- rescue SyncDataEntryError
74
+ def try_initial_sync(options)
75
+ handle_initial_sync(nil, options) if source.sync.supported?
76
+ rescue Source::UnsupportedSourceError
83
77
  return
84
78
  end
85
79
 
@@ -89,6 +83,7 @@ module Flydata
89
83
  Slop.new do
90
84
  on 'f', 'skip-flush', 'Skip server flush'
91
85
  on 'y', 'yes', 'Skip command prompt assuming yes to all questions. Use this for batch operation.'
86
+ on 'force-run', 'Run forcefully, ignoring exclusive run info'
92
87
  end
93
88
  end
94
89
 
@@ -118,6 +113,8 @@ EOS
118
113
  on 'y', 'yes', 'Skip command prompt assuming yes to all questions. Use this for batch operation.'
119
114
  on 'a', 'all', 'Resets Sync for all tables'
120
115
  on 'i', 'init', 'Resets unfinished initial sync'
116
+ on 'force-run', 'Run forcefully, ignoring exclusive run info'
117
+ on 'f', 'force', "Resets tables including append only tables. Use this option only when you want to stop sync for the append only table permanently."
121
118
  end
122
119
  end
123
120
 
@@ -143,6 +140,9 @@ EOS
143
140
  reset_init = false
144
141
  end
145
142
  sync_resumed = set_current_tables(tables, resume: !opts[:all])
143
+ target_tables = opts[:all] ? @full_tables : @input_tables
144
+ target_append_only_tables = target_tables & @append_only_tables
145
+ target_full_sync_tables = target_tables - @append_only_tables
146
146
 
147
147
  return if !sync_resumed && reset_init
148
148
 
@@ -152,6 +152,26 @@ EOS
152
152
  ERROR! You cannot reset tables because the previous initial sync has not been completed. Reset the unfinished initial sync first with the following command:
153
153
 
154
154
  flydata sync:reset --init
155
+
156
+ EOS
157
+ return
158
+ end
159
+
160
+ if target_append_only_tables.size > 0 && !opts[:force]
161
+ log_info_stdout <<EOS
162
+ ERROR! Reset failed because it includes append only table(s). Sync can no longer continue if you reset an append only table.
163
+
164
+ Append only table(s): #{target_append_only_tables.join(", ")}
165
+
166
+ If you really want to reset append-only tables, use '--force' option.
167
+
168
+ EOS
169
+
170
+ log_info_stdout <<EOS unless target_full_sync_tables.empty?
171
+ To reset all tables except for append only tables, run the following command.
172
+
173
+ flydata sync:reset #{target_full_sync_tables.join(" ")}
174
+
155
175
  EOS
156
176
  return
157
177
  end
@@ -203,11 +223,11 @@ EOS
203
223
  def skip
204
224
  de = data_entry
205
225
  sync_fm = create_sync_file_manager(de)
206
- binlog_path = sync_fm.binlog_path
226
+ source_pos_path = sync_fm.source_pos_path
207
227
  sync_fm.close
208
- `touch #{binlog_path}`
209
- log_info_stdout("Created an empty binlog position file.")
210
- log_info_stdout("-> #{binlog_path}")
228
+ `touch #{source_pos_path}`
229
+ log_info_stdout("Created an empty source position file.")
230
+ log_info_stdout("-> #{source_pos_path}")
211
231
  log_info_stdout("Run 'flydata start' to start continuous sync.")
212
232
  end
213
233
  run_exclusive :skip
@@ -220,6 +240,7 @@ EOS
220
240
  on 'y', 'yes', 'Skip command prompt assuming yes to all questions. Use this for batch operation.'
221
241
  on 's', 'skip-primary-key-check', 'Skip primary key check when generating DDL'
222
242
  on 'all-tables', 'Generate all table schema'
243
+ #no 'force-run' option. because stdout is often redirected to a file.
223
244
  end
224
245
  end
225
246
 
@@ -228,70 +249,22 @@ EOS
228
249
  def generate_table_ddl(*tables)
229
250
  # Compatibility check
230
251
  de = data_entry
231
- dp = flydata.data_port.get
232
- Flydata::MysqlCompatibilityCheck.new(dp, de['mysql_data_entry_preference']).check
252
+ context = source.sync_generate_table_ddl(flydata.data_port.get, opts)
253
+ context.run_compatibility_check
233
254
 
234
255
  # Set instance variables
235
256
  set_current_tables(tables, include_all_tables: true)
236
257
 
237
- do_generate_table_ddl(de)
258
+ do_generate_table_ddl(context, de)
238
259
  end
239
260
  run_exclusive :generate_table_ddl
240
261
 
241
- # Command: flydata sync:fix_binlogpos
242
- # - Arguments
243
- def self.slop_fix_binlogpos
244
- Slop.new do
245
- on 'f', 'force', 'update sent binlog position file forcibly'
246
- end
247
- end
248
-
249
- # Command: flydata sync:fix_binlogpos
250
- # Set binlog path
251
- # - Entry method
252
- def fix_binlogpos
253
- de = data_entry
254
- sync_fm = create_sync_file_manager(de)
255
-
256
- if File.exists?(sync_fm.sent_binlog_path) && !opts.force?
257
- log_info_stdout("Skip creating sent binlogpos because sent position file is exist already. (#{sync_fm.sent_binlog_path})")
258
- return
259
- end
260
-
261
- if Flydata::Command::Sender.new.process_exist?
262
- log_warn_stderr("flydata is running. flydata process needs to be stopped with 'flydata stop'.")
263
- return
264
- end
265
-
266
- binlog_info = sync_fm.load_binlog
267
- if binlog_info.nil?
268
- log_info_stdout("Skip creating sent binlogpos because binlog position file is empty or invalid. (#{sync_fm.sent_binlog_path})")
269
- return
270
- end
271
-
272
- say("Updating binlog position files...")
273
- log_info("Updating binlog position files... Original binlog_info:#{binlog_info}")
274
-
275
- # Update binlog.sent.pos file
276
- # -1 is because the position in binlog.pos is the next event's position.
277
- # on the other hand the position in sent position indicates already processed.
278
- binlog_info[:pos] -= 1
279
- log_info("Updating sent position file. #{binlog_info} -> #{sync_fm.sent_binlog_path}")
280
- sync_fm.save_sent_binlog(binlog_info)
281
-
282
- # Update binlog.pos file to start from head of the current binlog file
283
- new_binlog_info = binlog_info.dup.tap{|h| h[:pos] = 4} # 4 is the first position of binlog file.
284
- log_info("Updating original position file. #{new_binlog_info} -> #{sync_fm.binlog_path}")
285
- sync_fm.save_binlog(new_binlog_info)
286
- log_info_stdout("Done!")
287
- end
288
- run_exclusive :fix_binlogpos
289
-
290
262
  # Command: flydata sync:repair
291
263
  # - Arguments
292
264
  def self.slop_repair
293
265
  Slop.new do
294
266
  on 'y', 'yes', 'Skip command prompt assuming yes to all questions. Use this for batch operation.'
267
+ on 'force-run', 'Run forcefully, ignoring exclusive run info'
295
268
  end
296
269
  end
297
270
 
@@ -301,7 +274,8 @@ EOS
301
274
  run_exclusive :repair
302
275
 
303
276
  def check(options = {})
304
- status, pos_mismatch_tables, gap_tables = _check(options)
277
+ context = source.source_pos
278
+ status, pos_mismatch_tables, gap_tables = _check(context, options)
305
279
 
306
280
  if status.include? :OK
307
281
  message = "\nNo errors are found. Sync is clean.\n"
@@ -349,7 +323,7 @@ EOS
349
323
  JSON.generate(h)
350
324
  end
351
325
 
352
- def _check(options = {})
326
+ def _check(source_pos_context, options = {})
353
327
  options[:stop_agent] ||= false
354
328
 
355
329
  set_current_tables
@@ -361,14 +335,15 @@ EOS
361
335
  abnormal_shutdown = false
362
336
  begin
363
337
  begin
364
- flush_buffer_and_stop(@full_tables, force: false, timeout: 7200) # 2 hrs. It can take more but 2 hours are long enough to pull someone's attention.
338
+ flush_buffer_and_stop(@full_tables, force: false, timeout: 55) # A short timeout is set. Otherwise, the check command and the repair command takes forever to complete.
365
339
  rescue ServerDataProcessingTimeout => e
366
340
  data_stuck_at = e.state
367
341
  end
368
342
 
369
343
  # Agent is stopped but locked. There was an abnormal shutdown.
370
344
  abnormal_shutdown = sender.agent_locked?
371
- table_status_hash = get_table_status(@full_tables)
345
+ table_status_hash = get_table_status(@full_tables, source_pos_context)
346
+ corrupt_master_pos_files = check_master_position_files
372
347
  pos_mismatch_tables = check_position_files(table_status_hash)
373
348
  gap_tables = check_gaps(table_status_hash)
374
349
  ensure
@@ -382,6 +357,9 @@ EOS
382
357
  if data_stuck_at == :UPLOAD
383
358
  status << :STUCK_AT_UPLOAD
384
359
  end
360
+ if corrupt_master_pos_files
361
+ status << :CORRUPT_MASTER_POS
362
+ end
385
363
  if gap_tables
386
364
  status << :TABLE_GAPS
387
365
  end
@@ -394,15 +372,18 @@ EOS
394
372
  if status.empty?
395
373
  status << :OK
396
374
  end
397
- [status, pos_mismatch_tables, gap_tables, table_status_hash]
375
+ [status, corrupt_master_pos_files, pos_mismatch_tables, gap_tables, table_status_hash]
398
376
  end
399
377
 
400
378
  def _repair
401
379
  de = data_entry
380
+ sync_fm = create_sync_file_manager(de)
381
+ context = source.source_pos
402
382
  set_current_tables
403
383
  # Stop agent. Check sync and make sure the state is :STUCK_AT_UPLOAD
404
384
  # Get table status for the tables.
405
- status, pos_mismatch_tables, gap_tables, table_status_hash = _check(stop_agent:true)
385
+ status, corrupt_master_pos_files, pos_mismatch_tables, gap_tables, table_status_hash = _check(context, stop_agent:true)
386
+
406
387
  if status.include? :STUCK_AT_PROCESS
407
388
  e = AgentError.new("Data is stuck while processing")
408
389
  e.description = <<EOS
@@ -417,6 +398,17 @@ EOS
417
398
  return
418
399
  end
419
400
 
401
+ if status.include?(:ABNORMAL_SHUTDOWN) && status.uniq.length == 1
402
+ # abnormal shutdown occurred, but all other conditions are healthy.
403
+ log_info_stdout ""
404
+ log_info_stdout "Agent process was shut down unexpectedly"
405
+ log_info_stdout "but sync is in good condition. Nothing to repair."
406
+
407
+ # Remove the lock file if exists.
408
+ File.delete(FLYDATA_LOCK) if File.exists?(FLYDATA_LOCK)
409
+ return
410
+ end
411
+
420
412
  gt = []
421
413
  pt = []
422
414
  gt = gap_tables.collect{|bt| bt[:table] } if gap_tables
@@ -433,7 +425,11 @@ EOS
433
425
  #{tables.join("\n ")}
434
426
 
435
427
  EOS
428
+ log_info_stdout <<EOS if corrupt_master_pos_files
429
+ - Sync is broken due to corrupt master position files
430
+ #{corrupt_master_pos_files.join("\n ")}
436
431
 
432
+ EOS
437
433
  log_info_stdout <<EOS if status.include? :ABNORMAL_SHUTDOWN
438
434
  - Agent process was not shut down correctly. Files may be corrupt.
439
435
 
@@ -441,74 +437,73 @@ EOS
441
437
 
442
438
  return unless ask_yes_no("Proceed?")
443
439
 
444
- oldest_binlog = get_oldest_available_binlog
440
+ oldest_source_pos = get_oldest_available_source_pos
445
441
  unrepairable_tables = []
446
- # Determine the master binlog positions
447
- sent_binlog_pos = nil
442
+ # Determine the master source positions
443
+ sent_source_pos = nil
448
444
  @full_tables.each do |table|
449
- binlog_str = table_status_hash[table]["src_pos"]
450
- if binlog_str.nil? || binlog_str == "-"
445
+ table_source_pos = table_status_hash[table]["source_pos"]
446
+
447
+ if table_source_pos.nil? || table_source_pos.empty?
451
448
  # the table status has no src_pos (which is rare.) Skip the table
452
449
  next
453
450
  end
454
- binlog_pos = FlydataCore::Mysql::BinlogPos.new(binlog_str)
455
451
  if tables.empty?
456
- if sent_binlog_pos.nil? || sent_binlog_pos < binlog_pos
457
- sent_binlog_pos = binlog_pos
452
+ if sent_source_pos.nil? || sent_source_pos < table_source_pos
453
+ sent_source_pos = table_source_pos
458
454
  end
459
455
  else
460
456
  if tables.include?(table)
461
- if sent_binlog_pos.nil? || sent_binlog_pos > binlog_pos
462
- if oldest_binlog && binlog_pos < oldest_binlog
457
+ if sent_source_pos.nil? || sent_source_pos > table_source_pos
458
+ if oldest_source_pos && table_source_pos < oldest_source_pos
463
459
  unrepairable_tables << table
464
460
  else
465
- sent_binlog_pos = binlog_pos
461
+ sent_source_pos = table_source_pos
466
462
  end
467
463
  end
468
464
  end
469
465
  end
470
466
  end
471
- # if sent_binlog_pos is nil, it means sync has started for none of tables. No need
467
+ # if sent_source_pos is nil, it means sync has started for none of tables. No need
472
468
  # to repair positions nor clean buffer data.
473
- if sent_binlog_pos
474
- if oldest_binlog && sent_binlog_pos < oldest_binlog
475
- e = AgentError.new("Repair failed due to expired binlog")
469
+ if sent_source_pos
470
+ if oldest_source_pos && sent_source_pos < oldest_source_pos
471
+ e = AgentError.new("Repair failed due to expired source position")
476
472
  e.description = <<EOS
477
- Repair failed because the starting binlog position `#{sent_binlog_pos} no longer exists. Run full initial sync instead.
473
+ Repair failed because the starting source position `#{sent_source_pos} no longer exists. Run full initial sync instead.
478
474
  EOS
479
475
  raise e
480
476
  end
481
- master_binlog_pos = FlydataCore::Mysql::BinlogPos.new(sent_binlog_pos.filename, 4)
482
-
477
+ master_source_pos = context.resume_pos(sent_source_pos)
478
+
483
479
  # Delete agent buffer
484
480
  log_info_stdout "Deleting data in the agent buffer..."
485
481
  files = Flydata::Agent.new(FLYDATA_HOME).delete_buffer_files
486
482
  unless files.empty?
487
483
  $log.debug "Deleted buffer files\n " + files.join("\n ")
488
484
  end
489
-
485
+
490
486
  # Delete query queue items for the tables
491
487
  log_info_stdout "Deleting data stuck in the server buffer..."
492
488
  cleanup_sync_server(de, tables, queue_only: true) unless tables.empty?
493
-
494
- # Save the positions (binlog and seq)
489
+
490
+ # Save the positions (source_pos and seq)
495
491
  log_info_stdout "Fixing table positions..."
496
492
  @full_tables.each do |table|
497
- binlog_str = table_status_hash[table]["src_pos"]
498
- if binlog_str.nil? || binlog_str == "-"
493
+ table_source_pos = table_status_hash[table]["source_pos"]
494
+ if table_source_pos.nil? || table_source_pos.empty?
499
495
  # no init sync has happened to the table. No need to set positions.
500
496
  next
501
497
  end
502
- binlog_pos = FlydataCore::Mysql::BinlogPos.new(binlog_str)
503
498
  pos = table_status_hash[table]["seq"]
504
- old_binlog_pos, old_pos = save_table_positions(table, binlog_pos, pos)
499
+ old_source_pos, old_pos = save_table_positions(table, table_source_pos, pos, context)
505
500
  if pos.to_i != old_pos.to_i && !tables.include?(table)
506
501
  $log.debug "Fixed broken table position. table:#{table} pos:#{old_pos} -> #{pos}"
507
502
  end
508
503
  end
509
-
504
+
510
505
  log_info_stdout "Fixing the master position files..."
511
- save_master_binlog_positions(master_binlog_pos, sent_binlog_pos)
506
+ save_master_source_positions(master_source_pos, sent_source_pos, context)
512
507
  end
513
508
 
514
509
  # Remove the lock file if exists.
@@ -519,13 +514,11 @@ EOS
519
514
 
520
515
  # Initial sync
521
516
 
522
- def handle_mysql_sync(tables = nil, options = {})
517
+ def handle_initial_sync(tables = nil, options = {})
523
518
  unless Flydata::Command::Sender.new.wait_until_server_ready
524
519
  raise Timeout::Error, "Timed out to wait for the server side to become active. Pleae try again later."
525
520
  end
526
521
 
527
- de = data_entry
528
-
529
522
  # Setup instance variables
530
523
  sync_resumed = set_current_tables(tables, resume: true)
531
524
 
@@ -533,7 +526,7 @@ EOS
533
526
  # skip confirmation prompts and resume sync right away.
534
527
  # #initial_sync knows where to resume from.
535
528
  log_info_stdout("Resuming the initial sync...")
536
- initial_sync(de, options.merge(sync_resumed: true))
529
+ initial_sync(options.merge(sync_resumed: true))
537
530
  elsif !@unsynced_tables.empty?
538
531
  show_purpose_name
539
532
  unsynced_table_message = "We've noticed that these tables have not been synced yet: #{@unsynced_tables.join(", ")}\n"
@@ -546,7 +539,7 @@ EOS
546
539
  end
547
540
  log_info_stdout(unsynced_table_message)
548
541
  if ask_yes_no("Do you want to run initial sync on all of these tables now?")
549
- initial_sync(de, options.merge(sync_resumed: false))
542
+ initial_sync(options.merge(sync_resumed: false))
550
543
  else
551
544
  #If generate_table_ddl has not been run for these tables, warn user
552
545
  unless @ddl_tables.empty?
@@ -559,7 +552,8 @@ EOS
559
552
  end
560
553
  end
561
554
 
562
- def initial_sync(de, opt)
555
+ def initial_sync(opt)
556
+ de = data_entry
563
557
  # Load sync information from file
564
558
  validate_initial_sync_status
565
559
  begin
@@ -576,7 +570,7 @@ EOS
576
570
  # flush is unnecessary for full initial sync because it's guaranteed
577
571
  # that agent is stopped with no leftover buffer.
578
572
  end
579
- sync_mysql_to_redshift(de, opt)
573
+ perform_initial_sync(de, opt)
580
574
  rescue ServerDataProcessingTimeout => e
581
575
  ee = ServerDataProcessingTimeout.new("Delayed Data Processing")
582
576
  ee.description = <<EOS
@@ -593,7 +587,7 @@ EOS
593
587
  end
594
588
 
595
589
  # Don't call the method unless init sync needs to be run for a table
596
- def sync_mysql_to_redshift(de, options = {})
590
+ def perform_initial_sync(de, options = {})
597
591
  dp = flydata.data_port.get
598
592
  sync_fm = create_sync_file_manager(de)
599
593
 
@@ -602,60 +596,54 @@ EOS
602
596
  Flydata::Command::Conf.new.copy_templates
603
597
  end
604
598
 
605
- generate_mysqldump(de, sync_fm, !opts.dump_stream?, options) do |mysqldump_io, binlog_pos, db_bytesize|
606
- parse_mysqldump_and_send(mysqldump_io, dp, de, sync_fm, binlog_pos, db_bytesize)
599
+ generate_source_dump(de, sync_fm, !opts.dump_stream?, options) do |dump_io, source_pos|
600
+ parse_dump_and_send(dump_io, dp, de, sync_fm, source_pos)
607
601
  end
608
- complete_mysqldump_processing(dp, de, sync_fm)
602
+ complete_dump_processing(sync_fm)
609
603
  end
610
604
 
611
- def generate_mysqldump(de, sync_fm, file_dump = true, options = {},
612
- &block)
613
- # validate parameter
614
- %w(host username database).each do |k|
615
- if de['mysql_data_entry_preference'][k].to_s.empty?
616
- raise "'#{k}' is required. Set the value in the conf file " +
617
- "-> #{Flydata::Preference::DataEntryPreference.conf_path(de)}"
618
- end
619
- end
605
+ def generate_source_dump(de, sync_fm, file_dump = true, options = {},
606
+ &dump_ready_callback)
607
+ dp = flydata.data_port.get
608
+ context = source.generate_source_dump(dp, options)
620
609
 
621
610
  # Status is parsed or complete -> skip dump and parse
622
611
  dump_pos_info = sync_fm.load_dump_pos
623
612
  if dump_pos_info[:status] == STATUS_PARSED || dump_pos_info[:status] == STATUS_COMPLETE
624
- initialize_binlog_positions_and_call_callback(
625
- nil, options[:binlog_ready_callback], sync_fm)
613
+ initialize_source_positions_and_call_callback(
614
+ nil, options[:source_pos_ready_callback], sync_fm)
615
+ return
626
616
  end
627
617
 
628
- # mysqldump file exists -> skip dump
629
- dp = flydata.data_port.get
618
+ # dump file exists -> skip dump
630
619
  fp = sync_fm.dump_file_path
631
620
  if file_dump && File.exists?(fp) && File.size(fp) > 0
632
- initialize_binlog_positions_and_call_callback(
633
- nil, options[:binlog_ready_callback], sync_fm)
634
- return call_block_or_return_io(fp, &block)
621
+ initialize_source_positions_and_call_callback(
622
+ nil, options[:source_pos_ready_callback], sync_fm)
623
+ return call_block_or_return_io(fp, &dump_ready_callback)
635
624
  end
636
625
 
637
- log_info_stdout("Checking MySQL server connection and configuration...")
638
- Flydata::MysqlCompatibilityCheck.new(dp, de['mysql_data_entry_preference'], dump_dir: fp, backup_dir: sync_fm.backup_dir).check
626
+ log_info_stdout("Checking the data source connection and configuration...")
627
+ context.run_compatibility_check(fp, sync_fm.backup_dir)
639
628
  log_info_stdout("Checking the database size...")
640
629
 
641
- db_bytesize = Flydata::Parser::Mysql::DatabaseSizeCheck.new(de['mysql_data_entry_preference'].merge('tables' => target_tables)).get_db_bytesize
630
+ db_bytesize = context.dump_size(target_tables)
642
631
 
643
632
  tables = target_tables
644
633
  tables ||= '<all tables>'
645
- data_servers = de['mysql_data_entry_preference']['data_servers'] ? "\n data servers: #{de['mysql_data_entry_preference']['data_servers']}" : ""
634
+ data_servers = source.sync.data_servers
635
+ data_servers = data_servers ? "\n data servers: #{data_servers}" : ""
646
636
 
647
637
  confirmation_text = <<-EOM
648
638
 
649
639
  FlyData Sync will start synchronizing the following database tables
650
- host: #{de['mysql_data_entry_preference']['host']}
651
- port: #{de['mysql_data_entry_preference']['port']}
652
- username: #{de['mysql_data_entry_preference']['username']}
653
- database: #{de['mysql_data_entry_preference']['database']}
654
- tables: #{tables.join(", ")}#{data_servers}
655
640
  EOM
641
+ context.confirmation_items.each do |name, value|
642
+ confirmation_text << " %-14s%s\n" % ["#{name}:", value.to_s]
643
+ end
656
644
 
657
- confirmation_text << <<-EOM if de['mysql_data_entry_preference']['ssl_ca']
658
- ssl: Yes
645
+ confirmation_text << <<-EOM
646
+ tables: #{tables.join(", ")}#{data_servers}
659
647
  EOM
660
648
  confirmation_text << <<-EOM if file_dump
661
649
  dump file: #{fp}
@@ -674,7 +662,7 @@ EOM
674
662
  log_warn_stderr("!!WARNING There may not be enough disk space for a DB dump. We recommend 1GB free disk space after the dump. free disk space:#{as_size(free_disk_bytesize)}(#{free_disk_bytesize} byte) /" +
675
663
  " db size:#{as_size(db_bytesize)}(#{db_bytesize} byte)")
676
664
  unless ask_yes_no('Do you want to continue?')
677
- log_warn_stderr("To change the dump file directory, delete '#' and modify the path of 'mysqldump_dir:' in '#{Preference::DataEntryPreference.conf_path(de)}'")
665
+ log_warn_stderr("To change the dump file directory, delete '#' and modify the path of 'dump_dir:' in '#{Preference::DataEntryPreference.conf_path(de)}'")
678
666
  exit 1
679
667
  end
680
668
  end
@@ -692,18 +680,17 @@ EOM
692
680
  data_entry_id: de['id'],
693
681
  data_port_id: de['data_port_id'])
694
682
  end
695
- dump_generator = Flydata::Parser::Mysql::MysqlDumpGeneratorNoMasterData.new(de['mysql_data_entry_preference'].merge('tables' => target_tables))
696
683
  if file_dump
697
- binlog_pos = nil
684
+ source_pos = nil
698
685
  begin
699
- dump_generator.dump(fp) do |_io, _binlog_pos|
700
- binlog_pos = _binlog_pos
701
- initialize_binlog_positions_and_call_callback(binlog_pos, options[:binlog_ready_callback], sync_fm)
686
+ context.dump(target_tables, fp) do |_io, _source_pos|
687
+ source_pos = _source_pos
688
+ initialize_source_positions_and_call_callback(source_pos, options[:source_pos_ready_callback], sync_fm)
702
689
  end
703
690
  log_info_stdout(" -> Database dump done")
704
691
  rescue Exception => e
705
692
  #Catch all exceptions including SystemExit and Interrupt.
706
- log_info_stdout "Quit while running mysqldump, deleting dump file..."
693
+ log_info_stdout "Quit while running dump, deleting dump file..."
707
694
  sync_fm.delete_dump_file
708
695
  log_info_stdout "Dump file deleted. To restart the FlyData Agent, Following tables may need reset : #{target_tables}"
709
696
  if (target_tables.nil? or target_tables.empty?)
@@ -713,11 +700,11 @@ EOM
713
700
  end
714
701
  raise e
715
702
  end
716
- call_block_or_return_io(fp, binlog_pos, &block)
703
+ call_block_or_return_io(fp, source_pos, &dump_ready_callback)
717
704
  else
718
- dump_generator.dump do |io, binlog_pos|
719
- initialize_binlog_positions_and_call_callback(binlog_pos, options[:binlog_ready_callback], sync_fm)
720
- block.call(io, binlog_pos, db_bytesize)
705
+ context.dump(target_tables) do |io, source_pos|
706
+ initialize_source_positions_and_call_callback(source_pos, options[:source_pos_ready_callback], sync_fm)
707
+ dump_ready_callback.call(io, source_pos)
721
708
  end
722
709
  end
723
710
  else
@@ -725,16 +712,16 @@ EOM
725
712
  end
726
713
  end
727
714
 
728
- def initialize_binlog_positions_and_call_callback(binlog_pos, callback, sync_fm)
729
- if binlog_pos
730
- initialize_positions(sync_fm, binlog_pos)
715
+ def initialize_source_positions_and_call_callback(source_pos, callback, sync_fm)
716
+ if source_pos
717
+ initialize_positions(sync_fm, source_pos)
731
718
  else
732
- # no binlog_pos was given because dump was completed in the
719
+ # no source_pos was given because dump was completed in the
733
720
  # previous init sync attempt. Position files must be there already
734
721
  # so no initialization is necessary.
735
722
  end
736
723
  if callback
737
- callback.call(binlog_pos)
724
+ callback.call
738
725
  end
739
726
  end
740
727
 
@@ -744,11 +731,11 @@ EOM
744
731
  stat.block_size * stat.blocks_available
745
732
  end
746
733
 
747
- def call_block_or_return_io(fp, binlog_pos = nil, &block)
748
- if block
734
+ def call_block_or_return_io(fp, source_pos= nil, &source_pos_block)
735
+ if source_pos_block
749
736
  f_io = open_file_io(fp)
750
737
  begin
751
- block.call(f_io, binlog_pos)
738
+ source_pos_block.call(f_io, source_pos)
752
739
  return nil
753
740
  ensure
754
741
  f_io.close rescue nil
@@ -781,35 +768,35 @@ EOM
781
768
  # <- checkpoint
782
769
  #...
783
770
  #CREATE TABLE ...
784
- def parse_mysqldump_and_send(mysqldump_io, dp, de, sync_fm, binlog_pos, db_bytesize = nil)
771
+ def parse_dump_and_send(dump_io, dp, de, sync_fm, source_pos)
772
+
773
+ context = source.parse_dump_and_send
785
774
  # Prepare forwarder
786
775
  de_tag_name = de["tag_name#{env_suffix}"]
787
776
  server_port = dp['server_port']
788
- servers = if de['mysql_data_entry_preference']['data_servers']
789
- de['mysql_data_entry_preference']['data_servers'].split(',')
790
- else
791
- dp["servers#{env_suffix}"].collect{|s| "#{s}:#{server_port}"}
792
- end
793
- forwarder_type = de['mysql_data_entry_preference']['forwarder'] ||
777
+ data_servers = source.sync.data_servers
778
+ servers = data_servers ? data_servers.split(',') :
779
+ dp["servers#{env_suffix}"].collect{|s| "#{s}:#{server_port}"}
780
+ forwarder_type = source.sync.forwarder ||
794
781
  (dp['ssl_enabled'] ? 'sslforwarder' : 'tcpforwarder')
795
782
  forwarder = Flydata::Output::ForwarderFactory.create(forwarder_type, de_tag_name, servers)
796
783
 
797
784
  # Load dump.pos file for resume
798
785
  dump_pos_info = sync_fm.load_dump_pos
799
- option = dump_pos_info || {}
800
- if option[:table_name] && option[:last_pos].to_i != -1
801
- binlog_pos = option[:binlog_pos]
802
- log_info_stdout("Resuming... Last processed table: #{option[:table_name]}")
786
+ dump_pos_info ||= {}
787
+ if dump_pos_info[:table_name] && dump_pos_info[:last_pos].to_i != -1
788
+ source_pos = dump_pos_info[:source_pos]
789
+ log_info_stdout("Resuming... Last processed table: #{dump_pos_info[:table_name]}")
803
790
  else
804
- option[:binlog_pos] = binlog_pos
791
+ dump_pos_info[:source_pos] = source_pos
805
792
  end
806
- if option[:mysql_table] && option[:mysql_table].value_converters.nil?
807
- # Old marshal dumped mysql_table object may not have value_converters
808
- option[:mysql_table].set_value_converters(FlydataCore::TableDef::MysqlTableDef::VALUE_CONVERTERS)
793
+ if dump_pos_info[:source_table] && dump_pos_info[:source_table].value_converters.nil?
794
+ # Old marshal dumped source_table object may not have value_converters
795
+ dump_pos_info[:source_table].set_value_converters(context.value_converters)
809
796
  end
810
- if option[:mysql_table] && option[:mysql_table].column_names.nil?
811
- # Old marshal dumped mysql_table object may not have column_names
812
- option[:mysql_table].set_column_names
797
+ if dump_pos_info[:source_table] && dump_pos_info[:source_table].column_names.nil?
798
+ # Old marshal dumped source_table object may not have column_names
799
+ dump_pos_info[:source_table].set_column_names
813
800
  end
814
801
  log_info_stdout("Sending data to FlyData Server...")
815
802
 
@@ -831,40 +818,41 @@ EOM
831
818
  send_record_counts_threads = []
832
819
  begin
833
820
  RubyProf.start if RUN_PROFILE and defined?(RubyProf) and not RubyProf.running?
834
- Flydata::Parser::Mysql::MysqlDumpParser.new(option).parse(
835
- mysqldump_io,
821
+ context.parse_dump(
822
+ dump_pos_info,
823
+ dump_io,
836
824
  # create table
837
- Proc.new { |mysql_table|
838
- mysql_table.set_value_converters(FlydataCore::TableDef::MysqlTableDef::VALUE_CONVERTERS)
839
- mt = mysql_table.clone
840
- mt.value_converters = {}
825
+ Proc.new { |source_table|
826
+ source_table.set_value_converters(context.value_converters)
827
+ st = source_table.clone
828
+ st.value_converters = {}
841
829
  # don't let the closure hold these objects as it causes memory leak
842
- mysql_table = nil
830
+ source_table = nil
843
831
  output_thread.run do
844
832
  tmp_num_inserted_record = 0
845
- # dump mysql_table for resume
833
+ # dump source_table for resume
846
834
  #TODO: make it option
847
- sync_fm.save_mysql_table_marshal_dump(mt)
848
- log_info_stdout(" -> Started sending data for table '#{mt.table_name}'")
835
+ sync_fm.save_source_table_marshal_dump(st)
836
+ log_info_stdout(" -> Started sending data for table '#{st.table_name}'")
849
837
  end
850
838
  },
851
839
  # insert record
852
- Proc.new { |mysql_table, values_set|
853
- mysql_table_name = mysql_table.table_name
840
+ Proc.new { |source_table, values_set|
841
+ source_table_name = source_table.table_name
854
842
  records = values_set.collect do |values|
855
- convert_to_flydata_values(mysql_table, values)
856
- json = JSON.generate_kv_pairs(mysql_table.column_names, values)
843
+ convert_to_flydata_values(source_table, values)
844
+ json = JSON.generate_kv_pairs(source_table.column_names, values)
857
845
  values.clear
858
846
  unless json.start_with? '{'
859
847
  raise DumpParseError.new("Broken JSON record json:#{json[0..100]}")
860
848
  end
861
- {table_name: mysql_table_name, log: json}
849
+ {table_name: source_table_name, log: json}
862
850
  end
863
851
  # release resources to prevent the closure from keeping it
864
852
  values_set.clear
865
853
  values_set = nil
866
- mysql_table = nil
867
- mysql_table_name = nil
854
+ source_table = nil
855
+ source_table_name = nil
868
856
  output_thread.run do
869
857
  forwarder.emit(records)
870
858
  tmp_num_inserted_record += 1
@@ -873,9 +861,9 @@ EOM
873
861
  true
874
862
  },
875
863
  # checkpoint
876
- Proc.new { |mysql_table, last_pos, bytesize, binlog_pos, state, substate|
877
- table_name = mysql_table.nil? ? '' : mysql_table.table_name
878
- mysql_table = nil
864
+ Proc.new { |source_table, last_pos, bytesize, source_pos, state, substate|
865
+ table_name = source_table.nil? ? '' : source_table.table_name
866
+ source_table = nil
879
867
  output_thread.run do
880
868
  skip = skip_checkpoint
881
869
  skip_checkpoint = false
@@ -894,12 +882,12 @@ EOM
894
882
  end
895
883
 
896
884
  # save check point
897
- sync_fm.save_dump_pos(STATUS_PARSING, table_name, last_pos, binlog_pos, state, substate)
885
+ sync_fm.save_dump_pos(STATUS_PARSING, table_name, last_pos, source_pos, state, substate)
898
886
 
899
887
  # send record count for the table
900
888
  if table_name.to_s != '' &&
901
- state == Flydata::Parser::Mysql::MysqlDumpParser::State::CREATE_TABLE
902
- # all records for `mysql_table` have been sent
889
+ state == Flydata::Parser::State::CREATE_TABLE
890
+ # all records for `source_table` have been sent
903
891
  send_record_counts(de, sync_fm, table_name,
904
892
  send_record_counts_threads)
905
893
  log_info_stdout(" -> Finished sending data for table '#{table_name}'...")
@@ -928,7 +916,7 @@ EOM
928
916
  log_info_stdout(" -> Done")
929
917
  #log_info_stdout(" -> Records sent to the server")
930
918
  #log_info_stdout(" -> #{sync_fm.load_stats}")
931
- sync_fm.save_dump_pos(STATUS_PARSED, '', dump_file_size, binlog_pos)
919
+ sync_fm.save_dump_pos(STATUS_PARSED, '', dump_file_size, source_pos)
932
920
 
933
921
  if ENV['FLYDATA_BENCHMARK']
934
922
  bench_end_time = Time.now
@@ -937,15 +925,15 @@ EOM
937
925
  end
938
926
  end
939
927
 
940
- def complete_mysqldump_processing(dp, de, sync_fm)
928
+ def complete_dump_processing(sync_fm)
941
929
  return if ENV['FLYDATA_BENCHMARK']
942
930
 
943
931
  # Status is not parsed -> don't complete
944
932
  dump_pos_info = sync_fm.load_dump_pos
945
933
  return unless dump_pos_info[:status] == STATUS_PARSED
946
- binlog_pos = dump_pos_info[:binlog_pos]
934
+ source_pos = dump_pos_info[:source_pos]
947
935
 
948
- sync_fm.save_dump_pos(STATUS_COMPLETE, '', -1, binlog_pos)
936
+ sync_fm.save_dump_pos(STATUS_COMPLETE, '', -1, source_pos)
949
937
  end
950
938
 
951
939
  # option: timeout, tables
@@ -1015,8 +1003,8 @@ EOM
1015
1003
 
1016
1004
  def complete(de)
1017
1005
  sync_fm = create_sync_file_manager(de)
1018
- info = sync_fm.load_dump_pos
1019
- if info[:status] == STATUS_COMPLETE
1006
+ dump_pos_info = sync_fm.load_dump_pos
1007
+ if dump_pos_info[:status] == STATUS_COMPLETE
1020
1008
  send_record_counts(de, sync_fm)
1021
1009
  sync_fm.delete_dump_file
1022
1010
  sync_fm.backup_dump_dir
@@ -1058,18 +1046,18 @@ EOM
1058
1046
  end
1059
1047
  end
1060
1048
 
1061
- def initialize_positions(sync_fm, binlog_pos)
1062
- sync_fm.save_table_binlog_pos(target_tables, binlog_pos)
1049
+ def initialize_positions(sync_fm, source_pos)
1050
+ sync_fm.save_table_source_pos(target_tables, source_pos)
1063
1051
 
1064
1052
  if @full_initial_sync
1065
- sync_fm.save_binlog(binlog_pos)
1053
+ sync_fm.save_source_pos(source_pos)
1066
1054
  end
1067
- sync_fm.install_table_binlog_files(target_tables)
1055
+ sync_fm.install_table_source_pos_files(target_tables)
1068
1056
  sync_fm.reset_table_position_files(target_tables)
1069
1057
  end
1070
1058
 
1071
- def convert_to_flydata_values(mysql_table, values)
1072
- vc = mysql_table.value_converters
1059
+ def convert_to_flydata_values(source_table, values)
1060
+ vc = source_table.value_converters
1073
1061
  return if vc.empty?
1074
1062
 
1075
1063
  vc.each_pair do |index, converter|
@@ -1127,40 +1115,26 @@ EOM
1127
1115
 
1128
1116
  # Generate table ddl
1129
1117
 
1130
- def do_generate_table_ddl(de)
1131
- if `which mysqldump`.empty?
1132
- raise "mysqldump is not installed. mysqldump is required to run the command"
1133
- end
1134
-
1118
+ def do_generate_table_ddl(context, de)
1135
1119
  error_list = []
1136
1120
  schema_name = (de['schema_name'] || nil)
1137
- mp = de['mysql_data_entry_preference']
1138
1121
 
1139
1122
  tables = opts.all_tables? ? @full_tables : (@input_tables.empty? ? @unsynced_tables : @input_tables)
1140
1123
 
1141
1124
  raise "There are no valid unsynced tables, if you want to just get ddl for all tables, please run \`flydata sync:generate_table_ddl --all-tables\`" if tables.empty?
1142
1125
 
1143
- %w(host username database).each do |conf_name|
1144
- raise "MySQL `#{conf_name}` is neither defined in the data entry nor the local config file" if mp[conf_name].to_s.empty?
1145
- end
1146
1126
  if tables.empty?
1147
1127
  raise "`tables` (or `tables_append_only`) is neither defined in the data entry nor the local config file"
1148
1128
  end
1149
1129
 
1130
+ flydata_tabledefs, error_list = context.generate_flydata_tabledef(tables,
1131
+ skip_primary_key_check: opts.skip_primary_key_check?)
1150
1132
  create_flydata_ctl_table = true
1151
- option = {skip_primary_key_check: opts.skip_primary_key_check?}.merge(mp)
1152
- missing_tables = FlydataCore::Mysql::CommandGenerator.each_mysql_tabledef(tables, option) do |mysql_tabledef, error|
1153
- if error
1154
- error_list << error.err_hash
1155
- next
1156
- end
1157
- flydata_tabledef = mysql_tabledef.to_flydata_tabledef
1158
- puts FlydataCore::TableDef::SyncRedshiftTableDef.from_flydata_tabledef(flydata_tabledef, flydata_ctl_table: create_flydata_ctl_table, schema_name: schema_name, ctl_only: opts.ctl_only?)
1133
+ append_only = tables & @append_only_tables
1134
+ flydata_tabledefs.each do |flydata_tabledef|
1135
+ puts FlydataCore::TableDef::SyncRedshiftTableDef.from_flydata_tabledef(flydata_tabledef, flydata_ctl_table: create_flydata_ctl_table, schema_name: schema_name, ctl_only: opts.ctl_only?, skip_drop_table: append_only.include?(flydata_tabledef[:table_name]))
1159
1136
  create_flydata_ctl_table = false
1160
1137
  end
1161
- if missing_tables
1162
- missing_tables.each {|missing_table| error_list << { error: 'table does not exist in the MySQL database', table: missing_table } }
1163
- end
1164
1138
  table_validity_hash = {}
1165
1139
  tables_without_error = tables
1166
1140
  unless error_list.empty?
@@ -1190,11 +1164,10 @@ EOS
1190
1164
  flydata.data_entry.update_table_validity(de['id'], {updated_tables: table_validity_hash}) unless table_validity_hash.empty?
1191
1165
 
1192
1166
  sync_fm = create_sync_file_manager(de)
1193
- sync_fm.save_generated_ddl(tables_without_error, Mysql::TableDdl::VERSION)
1167
+ sync_fm.save_generated_ddl(tables_without_error, TableDdl::VERSION)
1194
1168
  sync_fm.close
1195
1169
  end
1196
1170
 
1197
-
1198
1171
  ALL_DONE_MESSAGE_TEMPLATE = <<-EOM
1199
1172
 
1200
1173
  Congratulations! FlyData has started synchronizing your database tables.
@@ -1228,20 +1201,21 @@ Thank you for using FlyData!
1228
1201
  # Utility methods
1229
1202
 
1230
1203
  def set_current_tables(input_tables = nil, options = {})
1231
- de = data_entry
1232
- sync_fm = create_sync_file_manager(de)
1204
+ sync_fm = create_sync_file_manager
1233
1205
  sync_info = sync_fm.load_sync_info
1234
1206
  sync_resumed = options[:resume] && !!sync_info
1235
1207
 
1208
+ table_lists = source.sync.table_lists
1209
+
1236
1210
  #full_tables will either include all tables including invalid tables or all valid tables that aren't new tables
1237
1211
 
1238
1212
  # The 'new_tables' list may include tables which has already been
1239
1213
  # synced for backward compatibility reason.
1240
1214
  # Filter out such tables so that we get a list of 'new' tables with no
1241
1215
  # position file.
1242
- real_new_tables = sync_fm.get_new_table_list(de['mysql_data_entry_preference']['new_tables'], "pos")
1243
- @full_tables = options[:include_all_tables] ? de['mysql_data_entry_preference']['tables'] + de['mysql_data_entry_preference']['invalid_tables'] :
1244
- de['mysql_data_entry_preference']['tables'] - real_new_tables
1216
+ real_new_tables = sync_fm.get_new_table_list(table_lists['new_tables'], "pos")
1217
+ @full_tables = options[:include_all_tables] ? table_lists['tables'] + table_lists['invalid_tables'] :
1218
+ table_lists['tables'] - real_new_tables
1245
1219
 
1246
1220
  @unsynced_tables = sync_fm.get_new_table_list(@full_tables, "pos") # Get list of tables that do not have a .pos file
1247
1221
  @ddl_tables = sync_fm.get_new_table_list(@full_tables, "generated_ddl")
@@ -1250,6 +1224,8 @@ Thank you for using FlyData!
1250
1224
  @input_tables ||= []
1251
1225
  @full_initial_sync = sync_resumed ? sync_info[:initial_sync] :
1252
1226
  (@unsynced_tables == @full_tables)
1227
+ @append_only_tables = table_lists['tables_append_only']
1228
+ @append_only_tables ||= []
1253
1229
 
1254
1230
  sync_fm.close
1255
1231
 
@@ -1292,38 +1268,15 @@ Thank you for using FlyData!
1292
1268
  end
1293
1269
 
1294
1270
  def data_entry
1295
- @de ||= retrieve_sync_data_entry
1296
- end
1297
-
1298
- def retrieve_sync_data_entry
1299
- de = retrieve_data_entries.first
1300
- raise "There are no data entries." unless de
1301
- case de['type']
1302
- when 'RedshiftMysqlDataEntry'
1303
- mp = de['mysql_data_entry_preference']
1304
-
1305
- if mp['tables_append_only']
1306
- mp['tables'] = (mp['tables'].split(",") + mp['tables_append_only'].split(",")).uniq
1307
- else
1308
- mp['tables'] = mp['tables'].split(",").uniq
1309
- end
1310
- mp['invalid_tables'] = mp['invalid_tables'].kind_of?(String) ? mp['invalid_tables'].split(",").uniq : []
1311
- mp['new_tables'] = mp['new_tables'].kind_of?(String) ? mp['new_tables'].split(",").uniq : []
1312
-
1313
- unless mp['ssl_ca_content'].to_s.strip.empty?
1314
- sync_fm = create_sync_file_manager(de)
1315
- sync_fm.save_ssl_ca(mp['ssl_ca_content'])
1316
- mp['ssl_ca'] = sync_fm.ssl_ca_path
1317
- mp['sslca'] = mp['ssl_ca']
1318
- end
1319
- else
1320
- raise SyncDataEntryError, "No supported data entry. Only mysql-redshift sync is supported."
1271
+ unless @sync_de
1272
+ @sync_de = super
1273
+ source.sync.setup # updates the data entry contents
1321
1274
  end
1322
- de
1275
+ @sync_de
1323
1276
  end
1324
1277
 
1325
1278
  def create_sync_file_manager(de = data_entry)
1326
- SyncFileManager.new(de)
1279
+ SyncFileManager.new(de, source)
1327
1280
  end
1328
1281
 
1329
1282
  def verify_input_tables(input_tables, all_tables)
@@ -1335,6 +1288,19 @@ Thank you for using FlyData!
1335
1288
  raise "These tables are not registered tables: #{inval_table.join(", ")}" unless inval_table.empty?
1336
1289
  end
1337
1290
 
1291
+ def check_master_position_files
1292
+ de = data_entry
1293
+ sync_fm = create_sync_file_manager(de)
1294
+ old_master_source_pos = sync_fm.load_source_pos
1295
+ old_sent_source_pos = sync_fm.load_sent_source_pos
1296
+
1297
+ corrupt_files = []
1298
+ corrupt_files << sync_fm.source_pos_path if old_master_source_pos.nil?
1299
+ corrupt_files << sync_fm.sent_source_pos_path if old_sent_source_pos.nil?
1300
+
1301
+ corrupt_files.empty? ? nil : corrupt_files
1302
+ end
1303
+
1338
1304
  def check_position_files(table_status_hash)
1339
1305
  de = data_entry
1340
1306
  sync_fm = create_sync_file_manager(de)
@@ -1359,73 +1325,79 @@ Thank you for using FlyData!
1359
1325
  gap_tables.empty? ? nil : gap_tables
1360
1326
  end
1361
1327
 
1362
- def save_table_positions(table, binlog_pos, pos)
1328
+ def save_table_positions(table, source_pos, pos, context)
1363
1329
  de = data_entry
1364
1330
  sync_fm = create_sync_file_manager(de)
1365
- s = sync_fm.get_table_binlog_pos(table)
1366
- old_binlog_pos = s ? FlydataCore::Mysql::BinlogPos.new(s) : nil
1331
+ old_source_pos = sync_fm.get_table_source_pos(table)
1367
1332
  old_pos = sync_fm.get_table_position(table)
1368
1333
  if pos.to_i != old_pos.to_i
1369
1334
  sync_fm.save_table_position(table, pos)
1370
1335
  $log.debug "table pos updated. table:#{table} pos:#{old_pos} -> #{pos}"
1371
1336
  end
1372
- if binlog_pos != old_binlog_pos
1373
- sync_fm.save_table_binlog_pos(table, binlog_pos.to_s, destination: :positions)
1374
- $log.debug "table binlog updated. table:#{table} binlog:`#{old_binlog_pos}` -> `#{binlog_pos}`"
1337
+ if source_pos != old_source_pos
1338
+ sync_fm.save_table_source_pos(table, source_pos, destination: :positions)
1339
+ $log.debug "table source_pos updated. table:#{table} source_pos:`#{old_source_pos}` -> `#{source_pos}`"
1375
1340
  end
1376
- [old_binlog_pos, old_pos]
1341
+ [old_source_pos, old_pos]
1377
1342
  end
1378
1343
 
1379
- def save_master_binlog_positions(master_binlog_pos, sent_binlog_pos)
1344
+ def save_master_source_positions(master_source_pos, sent_source_pos, context)
1380
1345
  de = data_entry
1381
1346
  sync_fm = create_sync_file_manager(de)
1382
- s = sync_fm.load_binlog
1383
- old_master_binlog_pos = s ? FlydataCore::Mysql::BinlogPos.new(s) : nil
1384
- s = sync_fm.load_sent_binlog
1385
- old_sent_binlog_pos = s ? FlydataCore::Mysql::BinlogPos.new(s) : nil
1386
- if master_binlog_pos != old_master_binlog_pos
1387
- sync_fm.save_binlog(master_binlog_pos.to_s)
1388
- $log.debug "master binlog positions updated. `#{old_master_binlog_pos}` -> `#{master_binlog_pos}`"
1389
- end
1390
- if sent_binlog_pos != old_sent_binlog_pos
1391
- sync_fm.save_sent_binlog(sent_binlog_pos.to_s)
1392
- $log.debug "sent binlog positions updated. `#{old_sent_binlog_pos}` -> `#{sent_binlog_pos}`"
1393
- end
1394
- [old_master_binlog_pos, old_sent_binlog_pos]
1347
+
1348
+ old_master_source_pos = sync_fm.load_source_pos
1349
+ old_sent_source_pos = sync_fm.load_sent_source_pos
1350
+
1351
+ if master_source_pos != old_master_source_pos
1352
+ sync_fm.save_source_pos(master_source_pos)
1353
+ $log.debug "master source positions updated. `#{old_master_source_pos}` -> `#{master_source_pos}`"
1354
+ end
1355
+ if sent_source_pos != old_sent_source_pos
1356
+ sync_fm.save_sent_source_pos(sent_source_pos.to_s)
1357
+ $log.debug "sent source positions updated. `#{old_sent_source_pos}` -> `#{sent_source_pos}`"
1358
+ end
1359
+
1360
+ [old_master_source_pos, old_sent_source_pos]
1395
1361
  end
1396
1362
 
1397
- def get_table_status(tables)
1363
+ def get_table_status(tables, source_pos_context)
1398
1364
  de = data_entry
1399
1365
  sync_fm = create_sync_file_manager(de)
1400
1366
  result = flydata.data_entry.table_status(de['id'], mode: env_mode, tables: tables)
1401
1367
  result = result["table_status"]
1402
1368
 
1403
1369
  table_status_hash = result.inject({}){|h, ts| h[ts["table_name"]] = ts; h}
1370
+ source_pos = nil
1371
+ table_status_hash.keys.each do |table|
1372
+ src_pos_str = table_status_hash[table].delete("src_pos")
1373
+
1374
+ source_pos = src_pos_str ? source_pos_context.create_source_pos(src_pos_str) : nil
1375
+ table_status_hash[table].merge!( { "source_pos" => source_pos } )
1376
+ end
1404
1377
  missing_tables = tables - table_status_hash.keys
1405
1378
  unless missing_tables.empty?
1406
1379
  raise "table status is not available for these table(s): #{missing_tables.join(",")}"
1407
1380
  end
1408
-
1409
- populate_initial_binlog_positions(table_status_hash, sync_fm)
1381
+ populate_initial_source_positions(table_status_hash, sync_fm)
1410
1382
  table_status_hash
1411
1383
  end
1412
1384
 
1413
- # table_status has no binlog position for sequence "0". Populate the info
1385
+ # table_status has no source position for sequence "0". Populate the info
1414
1386
  # from 'table.binlog.pos.init' file.
1415
- def populate_initial_binlog_positions(table_status_hash, sync_fm)
1387
+ def populate_initial_source_positions(table_status_hash, sync_fm)
1416
1388
  table_status_hash.keys.each do |table|
1417
- src_pos = table_status_hash[table]["src_pos"]
1418
- if src_pos.nil? || src_pos == "-"
1419
- init_binlog_pos = sync_fm.get_table_binlog_pos_init(table)
1420
- if init_binlog_pos
1421
- table_status_hash[table]["src_pos"] = init_binlog_pos
1389
+ table_source_pos = table_status_hash[table]["source_pos"]
1390
+ if table_source_pos.nil? || table_source_pos.empty?
1391
+ init_source_pos = sync_fm.get_table_source_pos_init(table)
1392
+ if init_source_pos
1393
+ table_status_hash[table]["source_pos"] = init_source_pos
1422
1394
  end
1423
1395
  end
1424
1396
  end
1425
1397
  end
1426
1398
 
1427
1399
  # TODO implement
1428
- def get_oldest_available_binlog
1400
+ def get_oldest_available_source_pos
1429
1401
  nil
1430
1402
  end
1431
1403
  end