flydata 0.6.3 → 0.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +2 -2
- data/VERSION +1 -1
- data/bin/fdredshift +78 -0
- data/circle.yml +1 -1
- data/ext/flydata/{parser/mysql → source_mysql/parser}/.gitignore +0 -0
- data/ext/flydata/{parser/mysql → source_mysql/parser}/dump_parser_ext.cpp +3 -3
- data/ext/flydata/source_mysql/parser/extconf.rb +3 -0
- data/ext/flydata/{parser/mysql → source_mysql/parser}/parser.txt +0 -0
- data/ext/flydata/{parser/mysql → source_mysql/parser}/sql_parser.cpp +0 -0
- data/ext/flydata/{parser/mysql → source_mysql/parser}/sql_parser.h +0 -0
- data/flydata-core/lib/flydata-core/mysql/binlog_pos.rb +34 -32
- data/flydata-core/lib/flydata-core/mysql/compatibility_checker.rb +20 -0
- data/flydata-core/lib/flydata-core/table_def/mysql_table_def.rb +12 -4
- data/flydata-core/lib/flydata-core/table_def/redshift_table_def.rb +60 -6
- data/flydata-core/spec/mysql/binlog_pos_spec.rb +474 -0
- data/flydata-core/spec/table_def/mysql_table_def_spec.rb +57 -0
- data/flydata-core/spec/table_def/mysql_to_redshift_table_def_spec.rb +174 -20
- data/flydata-core/spec/table_def/mysqldump_test_col_comment_with_AUTO_INCREMENT_keyword.dump +43 -0
- data/flydata-core/spec/table_def/mysqldump_test_col_comment_with_not_null_keyword.dump +43 -0
- data/flydata-core/spec/table_def/mysqldump_test_col_comment_with_unique_keyword.dump +43 -0
- data/flydata-core/spec/table_def/mysqldump_test_col_comment_with_unsigned_keyword.dump +43 -0
- data/flydata-core/spec/table_def/redshift_table_def_spec.rb +41 -8
- data/flydata.gemspec +0 -0
- data/lib/flydata/cli.rb +11 -5
- data/lib/flydata/command/base.rb +14 -1
- data/lib/flydata/command/exclusive_runnable.rb +42 -12
- data/lib/flydata/command/helper.rb +6 -6
- data/lib/flydata/command/sender.rb +4 -3
- data/lib/flydata/command/setup.rb +30 -381
- data/lib/flydata/command/stop.rb +1 -0
- data/lib/flydata/command/sync.rb +273 -301
- data/lib/flydata/compatibility_check.rb +24 -117
- data/lib/flydata/fluent-plugins/in_mysql_binlog_flydata.rb +3 -3
- data/lib/flydata/fluent-plugins/mysql/alter_table_query_handler.rb +2 -2
- data/lib/flydata/fluent-plugins/mysql/binlog_record_handler.rb +6 -6
- data/lib/flydata/fluent-plugins/mysql/truncate_table_query_handler.rb +0 -1
- data/lib/flydata/parser.rb +14 -0
- data/lib/flydata/{parser_provider.rb → parser/parser_provider.rb} +6 -4
- data/lib/flydata/parser/source_table.rb +33 -0
- data/lib/flydata/source.rb +105 -0
- data/lib/flydata/source/component.rb +21 -0
- data/lib/flydata/source/errors.rb +7 -0
- data/lib/flydata/source/generate_source_dump.rb +72 -0
- data/lib/flydata/source/parse_dump_and_send.rb +52 -0
- data/lib/flydata/source/setup.rb +31 -0
- data/lib/flydata/source/source_pos.rb +45 -0
- data/lib/flydata/source/sync.rb +56 -0
- data/lib/flydata/source/sync_generate_table_ddl.rb +43 -0
- data/lib/flydata/source_file/setup.rb +17 -0
- data/lib/flydata/source_file/sync.rb +14 -0
- data/lib/flydata/{command → source_mysql/command}/mysql.rb +2 -1
- data/lib/flydata/{command → source_mysql/command}/mysql_command_base.rb +2 -4
- data/lib/flydata/{command → source_mysql/command}/mysqlbinlog.rb +2 -1
- data/lib/flydata/{command → source_mysql/command}/mysqldump.rb +2 -1
- data/lib/flydata/source_mysql/generate_source_dump.rb +53 -0
- data/lib/flydata/source_mysql/mysql_compatibility_check.rb +114 -0
- data/lib/flydata/source_mysql/parse_dump_and_send.rb +28 -0
- data/lib/flydata/{parser/mysql → source_mysql/parser}/.gitignore +0 -0
- data/lib/flydata/{parser/mysql → source_mysql/parser}/dump_parser.rb +32 -67
- data/lib/flydata/{parser/mysql → source_mysql/parser}/mysql_alter_table.treetop +0 -0
- data/lib/flydata/source_mysql/setup.rb +24 -0
- data/lib/flydata/source_mysql/source_pos.rb +21 -0
- data/lib/flydata/source_mysql/sync.rb +45 -0
- data/lib/flydata/source_mysql/sync_generate_table_ddl.rb +40 -0
- data/lib/flydata/{mysql → source_mysql}/table_ddl.rb +6 -17
- data/lib/flydata/source_zendesk/sync_generate_table_ddl.rb +30 -0
- data/lib/flydata/source_zendesk/zendesk_flydata_tabledefs.rb +133 -0
- data/lib/flydata/sync_file_manager.rb +132 -73
- data/lib/flydata/table_ddl.rb +18 -0
- data/spec/flydata/cli_spec.rb +1 -0
- data/spec/flydata/command/exclusive_runnable_spec.rb +19 -8
- data/spec/flydata/command/sender_spec.rb +1 -1
- data/spec/flydata/command/setup_spec.rb +4 -4
- data/spec/flydata/command/sync_spec.rb +97 -134
- data/spec/flydata/compatibility_check_spec.rb +16 -289
- data/spec/flydata/fluent-plugins/mysql/alter_table_query_handler_spec.rb +3 -3
- data/spec/flydata/fluent-plugins/mysql/dml_record_handler_spec.rb +1 -1
- data/spec/flydata/fluent-plugins/mysql/shared_query_handler_context.rb +4 -2
- data/spec/flydata/fluent-plugins/mysql/truncate_query_handler_spec.rb +1 -1
- data/spec/flydata/source_mysql/generate_source_dump_spec.rb +69 -0
- data/spec/flydata/source_mysql/mysql_compatibility_check_spec.rb +280 -0
- data/spec/flydata/{parser/mysql → source_mysql/parser}/alter_table_parser_spec.rb +2 -2
- data/spec/flydata/{parser/mysql → source_mysql/parser}/dump_parser_spec.rb +75 -70
- data/spec/flydata/source_mysql/sync_generate_table_ddl_spec.rb +137 -0
- data/spec/flydata/{mysql → source_mysql}/table_ddl_spec.rb +2 -2
- data/spec/flydata/source_spec.rb +140 -0
- data/spec/flydata/source_zendesk/sync_generate_table_ddl_spec.rb +33 -0
- data/spec/flydata/sync_file_manager_spec.rb +157 -77
- data/tmpl/redshift_mysql_data_entry.conf.tmpl +1 -1
- metadata +56 -23
- data/ext/flydata/parser/mysql/extconf.rb +0 -3
- data/lib/flydata/mysql/binlog_position.rb +0 -22
- data/spec/flydata/mysql/binlog_position_spec.rb +0 -35
data/lib/flydata/command/stop.rb
CHANGED
data/lib/flydata/command/sync.rb
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
require 'msgpack'
|
|
2
|
-
require 'mysql2'
|
|
3
2
|
require 'rest_client'
|
|
4
3
|
require 'sys/filesystem'
|
|
5
4
|
require 'flydata/agent'
|
|
@@ -8,17 +7,16 @@ require 'flydata/command/conf'
|
|
|
8
7
|
require 'flydata/command/sender'
|
|
9
8
|
require 'flydata/compatibility_check'
|
|
10
9
|
require 'flydata/errors'
|
|
10
|
+
require 'flydata/source/errors'
|
|
11
11
|
require 'flydata/helpers'
|
|
12
12
|
require 'flydata/json'
|
|
13
13
|
require 'flydata/queueable_thread'
|
|
14
14
|
require 'flydata/output/forwarder'
|
|
15
|
-
require 'flydata/parser
|
|
15
|
+
require 'flydata/parser'
|
|
16
16
|
require 'flydata/preference/data_entry_preference'
|
|
17
17
|
require 'flydata/sync_file_manager'
|
|
18
18
|
require 'flydata-core/table_def'
|
|
19
|
-
require 'flydata
|
|
20
|
-
require 'flydata/mysql/table_ddl'
|
|
21
|
-
require 'flydata-core/mysql/command_generator'
|
|
19
|
+
require 'flydata/table_ddl'
|
|
22
20
|
require 'flydata/event/api_event_sender'
|
|
23
21
|
require 'flydata-core/event/event_dictionary'
|
|
24
22
|
require 'sigdump/setup'
|
|
@@ -41,7 +39,7 @@ module Flydata
|
|
|
41
39
|
STATUS_COMPLETE = 'COMPLETE'
|
|
42
40
|
|
|
43
41
|
attr_reader :full_initial_sync, # true if full initial sync
|
|
44
|
-
:full_tables, # all tables (same as
|
|
42
|
+
:full_tables, # all tables (same as the value of `tables` data entry preference)
|
|
45
43
|
:new_tables, # tables which is not finihed initial-sync(pos file doesn't exist)
|
|
46
44
|
:ddl_tables, # tables generated ddl
|
|
47
45
|
:input_tables # tables which user put
|
|
@@ -49,9 +47,6 @@ module Flydata
|
|
|
49
47
|
#target_tables # target tables for current command(sync/reset/generate_table_ddl)
|
|
50
48
|
#target_tables_for_api # target tables for calling api(tables parameter needs to be empty for full_initial_sync)
|
|
51
49
|
|
|
52
|
-
class SyncDataEntryError < StandardError
|
|
53
|
-
end
|
|
54
|
-
|
|
55
50
|
# Command: flydata sync
|
|
56
51
|
# - Arguments
|
|
57
52
|
def self.slop
|
|
@@ -59,8 +54,8 @@ module Flydata
|
|
|
59
54
|
on 'c', 'skip-cleanup', 'Skip server cleanup'
|
|
60
55
|
on 'f', 'skip-flush', 'Skip server flush'
|
|
61
56
|
on 'y', 'yes', 'Skip command prompt assuming yes to all questions. Use this for batch operation.'
|
|
62
|
-
on 'd', 'dump-file', '
|
|
63
|
-
on 's', 'dump-stream', '
|
|
57
|
+
on 'd', 'dump-file', 'Save dump result into a file.'
|
|
58
|
+
on 's', 'dump-stream', 'Stream dump result to a pipe instead of saving dump file. It might cause timeout error if db size is larger than 10GB.'
|
|
64
59
|
on 'n', 'no-flydata-start', 'Don\'t start the flydata agent after initial sync.'
|
|
65
60
|
#TODO : This option is temp! Should remove soon.
|
|
66
61
|
on 'ff', 'Skip checking query queue and flush'
|
|
@@ -76,10 +71,9 @@ module Flydata
|
|
|
76
71
|
|
|
77
72
|
# Public method
|
|
78
73
|
# - Called from Sender#start/restart
|
|
79
|
-
def
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
rescue SyncDataEntryError
|
|
74
|
+
def try_initial_sync(options)
|
|
75
|
+
handle_initial_sync(nil, options) if source.sync.supported?
|
|
76
|
+
rescue Source::UnsupportedSourceError
|
|
83
77
|
return
|
|
84
78
|
end
|
|
85
79
|
|
|
@@ -89,6 +83,7 @@ module Flydata
|
|
|
89
83
|
Slop.new do
|
|
90
84
|
on 'f', 'skip-flush', 'Skip server flush'
|
|
91
85
|
on 'y', 'yes', 'Skip command prompt assuming yes to all questions. Use this for batch operation.'
|
|
86
|
+
on 'force-run', 'Run forcefully, ignoring exclusive run info'
|
|
92
87
|
end
|
|
93
88
|
end
|
|
94
89
|
|
|
@@ -118,6 +113,8 @@ EOS
|
|
|
118
113
|
on 'y', 'yes', 'Skip command prompt assuming yes to all questions. Use this for batch operation.'
|
|
119
114
|
on 'a', 'all', 'Resets Sync for all tables'
|
|
120
115
|
on 'i', 'init', 'Resets unfinished initial sync'
|
|
116
|
+
on 'force-run', 'Run forcefully, ignoring exclusive run info'
|
|
117
|
+
on 'f', 'force', "Resets tables including append only tables. Use this option only when you want to stop sync for the append only table permanently."
|
|
121
118
|
end
|
|
122
119
|
end
|
|
123
120
|
|
|
@@ -143,6 +140,9 @@ EOS
|
|
|
143
140
|
reset_init = false
|
|
144
141
|
end
|
|
145
142
|
sync_resumed = set_current_tables(tables, resume: !opts[:all])
|
|
143
|
+
target_tables = opts[:all] ? @full_tables : @input_tables
|
|
144
|
+
target_append_only_tables = target_tables & @append_only_tables
|
|
145
|
+
target_full_sync_tables = target_tables - @append_only_tables
|
|
146
146
|
|
|
147
147
|
return if !sync_resumed && reset_init
|
|
148
148
|
|
|
@@ -152,6 +152,26 @@ EOS
|
|
|
152
152
|
ERROR! You cannot reset tables because the previous initial sync has not been completed. Reset the unfinished initial sync first with the following command:
|
|
153
153
|
|
|
154
154
|
flydata sync:reset --init
|
|
155
|
+
|
|
156
|
+
EOS
|
|
157
|
+
return
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
if target_append_only_tables.size > 0 && !opts[:force]
|
|
161
|
+
log_info_stdout <<EOS
|
|
162
|
+
ERROR! Reset failed because it includes append only table(s). Sync can no longer continue if you reset an append only table.
|
|
163
|
+
|
|
164
|
+
Append only table(s): #{target_append_only_tables.join(", ")}
|
|
165
|
+
|
|
166
|
+
If you really want to reset append-only tables, use '--force' option.
|
|
167
|
+
|
|
168
|
+
EOS
|
|
169
|
+
|
|
170
|
+
log_info_stdout <<EOS unless target_full_sync_tables.empty?
|
|
171
|
+
To reset all tables except for append only tables, run the following command.
|
|
172
|
+
|
|
173
|
+
flydata sync:reset #{target_full_sync_tables.join(" ")}
|
|
174
|
+
|
|
155
175
|
EOS
|
|
156
176
|
return
|
|
157
177
|
end
|
|
@@ -203,11 +223,11 @@ EOS
|
|
|
203
223
|
def skip
|
|
204
224
|
de = data_entry
|
|
205
225
|
sync_fm = create_sync_file_manager(de)
|
|
206
|
-
|
|
226
|
+
source_pos_path = sync_fm.source_pos_path
|
|
207
227
|
sync_fm.close
|
|
208
|
-
`touch #{
|
|
209
|
-
log_info_stdout("Created an empty
|
|
210
|
-
log_info_stdout("-> #{
|
|
228
|
+
`touch #{source_pos_path}`
|
|
229
|
+
log_info_stdout("Created an empty source position file.")
|
|
230
|
+
log_info_stdout("-> #{source_pos_path}")
|
|
211
231
|
log_info_stdout("Run 'flydata start' to start continuous sync.")
|
|
212
232
|
end
|
|
213
233
|
run_exclusive :skip
|
|
@@ -220,6 +240,7 @@ EOS
|
|
|
220
240
|
on 'y', 'yes', 'Skip command prompt assuming yes to all questions. Use this for batch operation.'
|
|
221
241
|
on 's', 'skip-primary-key-check', 'Skip primary key check when generating DDL'
|
|
222
242
|
on 'all-tables', 'Generate all table schema'
|
|
243
|
+
#no 'force-run' option. because stdout is often redirected to a file.
|
|
223
244
|
end
|
|
224
245
|
end
|
|
225
246
|
|
|
@@ -228,70 +249,22 @@ EOS
|
|
|
228
249
|
def generate_table_ddl(*tables)
|
|
229
250
|
# Compatibility check
|
|
230
251
|
de = data_entry
|
|
231
|
-
|
|
232
|
-
|
|
252
|
+
context = source.sync_generate_table_ddl(flydata.data_port.get, opts)
|
|
253
|
+
context.run_compatibility_check
|
|
233
254
|
|
|
234
255
|
# Set instance variables
|
|
235
256
|
set_current_tables(tables, include_all_tables: true)
|
|
236
257
|
|
|
237
|
-
do_generate_table_ddl(de)
|
|
258
|
+
do_generate_table_ddl(context, de)
|
|
238
259
|
end
|
|
239
260
|
run_exclusive :generate_table_ddl
|
|
240
261
|
|
|
241
|
-
# Command: flydata sync:fix_binlogpos
|
|
242
|
-
# - Arguments
|
|
243
|
-
def self.slop_fix_binlogpos
|
|
244
|
-
Slop.new do
|
|
245
|
-
on 'f', 'force', 'update sent binlog position file forcibly'
|
|
246
|
-
end
|
|
247
|
-
end
|
|
248
|
-
|
|
249
|
-
# Command: flydata sync:fix_binlogpos
|
|
250
|
-
# Set binlog path
|
|
251
|
-
# - Entry method
|
|
252
|
-
def fix_binlogpos
|
|
253
|
-
de = data_entry
|
|
254
|
-
sync_fm = create_sync_file_manager(de)
|
|
255
|
-
|
|
256
|
-
if File.exists?(sync_fm.sent_binlog_path) && !opts.force?
|
|
257
|
-
log_info_stdout("Skip creating sent binlogpos because sent position file is exist already. (#{sync_fm.sent_binlog_path})")
|
|
258
|
-
return
|
|
259
|
-
end
|
|
260
|
-
|
|
261
|
-
if Flydata::Command::Sender.new.process_exist?
|
|
262
|
-
log_warn_stderr("flydata is running. flydata process needs to be stopped with 'flydata stop'.")
|
|
263
|
-
return
|
|
264
|
-
end
|
|
265
|
-
|
|
266
|
-
binlog_info = sync_fm.load_binlog
|
|
267
|
-
if binlog_info.nil?
|
|
268
|
-
log_info_stdout("Skip creating sent binlogpos because binlog position file is empty or invalid. (#{sync_fm.sent_binlog_path})")
|
|
269
|
-
return
|
|
270
|
-
end
|
|
271
|
-
|
|
272
|
-
say("Updating binlog position files...")
|
|
273
|
-
log_info("Updating binlog position files... Original binlog_info:#{binlog_info}")
|
|
274
|
-
|
|
275
|
-
# Update binlog.sent.pos file
|
|
276
|
-
# -1 is because the position in binlog.pos is the next event's position.
|
|
277
|
-
# on the other hand the position in sent position indicates already processed.
|
|
278
|
-
binlog_info[:pos] -= 1
|
|
279
|
-
log_info("Updating sent position file. #{binlog_info} -> #{sync_fm.sent_binlog_path}")
|
|
280
|
-
sync_fm.save_sent_binlog(binlog_info)
|
|
281
|
-
|
|
282
|
-
# Update binlog.pos file to start from head of the current binlog file
|
|
283
|
-
new_binlog_info = binlog_info.dup.tap{|h| h[:pos] = 4} # 4 is the first position of binlog file.
|
|
284
|
-
log_info("Updating original position file. #{new_binlog_info} -> #{sync_fm.binlog_path}")
|
|
285
|
-
sync_fm.save_binlog(new_binlog_info)
|
|
286
|
-
log_info_stdout("Done!")
|
|
287
|
-
end
|
|
288
|
-
run_exclusive :fix_binlogpos
|
|
289
|
-
|
|
290
262
|
# Command: flydata sync:repair
|
|
291
263
|
# - Arguments
|
|
292
264
|
def self.slop_repair
|
|
293
265
|
Slop.new do
|
|
294
266
|
on 'y', 'yes', 'Skip command prompt assuming yes to all questions. Use this for batch operation.'
|
|
267
|
+
on 'force-run', 'Run forcefully, ignoring exclusive run info'
|
|
295
268
|
end
|
|
296
269
|
end
|
|
297
270
|
|
|
@@ -301,7 +274,8 @@ EOS
|
|
|
301
274
|
run_exclusive :repair
|
|
302
275
|
|
|
303
276
|
def check(options = {})
|
|
304
|
-
|
|
277
|
+
context = source.source_pos
|
|
278
|
+
status, pos_mismatch_tables, gap_tables = _check(context, options)
|
|
305
279
|
|
|
306
280
|
if status.include? :OK
|
|
307
281
|
message = "\nNo errors are found. Sync is clean.\n"
|
|
@@ -349,7 +323,7 @@ EOS
|
|
|
349
323
|
JSON.generate(h)
|
|
350
324
|
end
|
|
351
325
|
|
|
352
|
-
def _check(options = {})
|
|
326
|
+
def _check(source_pos_context, options = {})
|
|
353
327
|
options[:stop_agent] ||= false
|
|
354
328
|
|
|
355
329
|
set_current_tables
|
|
@@ -361,14 +335,15 @@ EOS
|
|
|
361
335
|
abnormal_shutdown = false
|
|
362
336
|
begin
|
|
363
337
|
begin
|
|
364
|
-
flush_buffer_and_stop(@full_tables, force: false, timeout:
|
|
338
|
+
flush_buffer_and_stop(@full_tables, force: false, timeout: 55) # A short timeout is set. Otherwise, the check command and the repair command takes forever to complete.
|
|
365
339
|
rescue ServerDataProcessingTimeout => e
|
|
366
340
|
data_stuck_at = e.state
|
|
367
341
|
end
|
|
368
342
|
|
|
369
343
|
# Agent is stopped but locked. There was an abnormal shutdown.
|
|
370
344
|
abnormal_shutdown = sender.agent_locked?
|
|
371
|
-
table_status_hash = get_table_status(@full_tables)
|
|
345
|
+
table_status_hash = get_table_status(@full_tables, source_pos_context)
|
|
346
|
+
corrupt_master_pos_files = check_master_position_files
|
|
372
347
|
pos_mismatch_tables = check_position_files(table_status_hash)
|
|
373
348
|
gap_tables = check_gaps(table_status_hash)
|
|
374
349
|
ensure
|
|
@@ -382,6 +357,9 @@ EOS
|
|
|
382
357
|
if data_stuck_at == :UPLOAD
|
|
383
358
|
status << :STUCK_AT_UPLOAD
|
|
384
359
|
end
|
|
360
|
+
if corrupt_master_pos_files
|
|
361
|
+
status << :CORRUPT_MASTER_POS
|
|
362
|
+
end
|
|
385
363
|
if gap_tables
|
|
386
364
|
status << :TABLE_GAPS
|
|
387
365
|
end
|
|
@@ -394,15 +372,18 @@ EOS
|
|
|
394
372
|
if status.empty?
|
|
395
373
|
status << :OK
|
|
396
374
|
end
|
|
397
|
-
[status, pos_mismatch_tables, gap_tables, table_status_hash]
|
|
375
|
+
[status, corrupt_master_pos_files, pos_mismatch_tables, gap_tables, table_status_hash]
|
|
398
376
|
end
|
|
399
377
|
|
|
400
378
|
def _repair
|
|
401
379
|
de = data_entry
|
|
380
|
+
sync_fm = create_sync_file_manager(de)
|
|
381
|
+
context = source.source_pos
|
|
402
382
|
set_current_tables
|
|
403
383
|
# Stop agent. Check sync and make sure the state is :STUCK_AT_UPLOAD
|
|
404
384
|
# Get table status for the tables.
|
|
405
|
-
status, pos_mismatch_tables, gap_tables, table_status_hash = _check(stop_agent:true)
|
|
385
|
+
status, corrupt_master_pos_files, pos_mismatch_tables, gap_tables, table_status_hash = _check(context, stop_agent:true)
|
|
386
|
+
|
|
406
387
|
if status.include? :STUCK_AT_PROCESS
|
|
407
388
|
e = AgentError.new("Data is stuck while processing")
|
|
408
389
|
e.description = <<EOS
|
|
@@ -417,6 +398,17 @@ EOS
|
|
|
417
398
|
return
|
|
418
399
|
end
|
|
419
400
|
|
|
401
|
+
if status.include?(:ABNORMAL_SHUTDOWN) && status.uniq.length == 1
|
|
402
|
+
# abnormal shutdown occurred, but all other conditions are healthy.
|
|
403
|
+
log_info_stdout ""
|
|
404
|
+
log_info_stdout "Agent process was shut down unexpectedly"
|
|
405
|
+
log_info_stdout "but sync is in good condition. Nothing to repair."
|
|
406
|
+
|
|
407
|
+
# Remove the lock file if exists.
|
|
408
|
+
File.delete(FLYDATA_LOCK) if File.exists?(FLYDATA_LOCK)
|
|
409
|
+
return
|
|
410
|
+
end
|
|
411
|
+
|
|
420
412
|
gt = []
|
|
421
413
|
pt = []
|
|
422
414
|
gt = gap_tables.collect{|bt| bt[:table] } if gap_tables
|
|
@@ -433,7 +425,11 @@ EOS
|
|
|
433
425
|
#{tables.join("\n ")}
|
|
434
426
|
|
|
435
427
|
EOS
|
|
428
|
+
log_info_stdout <<EOS if corrupt_master_pos_files
|
|
429
|
+
- Sync is broken due to corrupt master position files
|
|
430
|
+
#{corrupt_master_pos_files.join("\n ")}
|
|
436
431
|
|
|
432
|
+
EOS
|
|
437
433
|
log_info_stdout <<EOS if status.include? :ABNORMAL_SHUTDOWN
|
|
438
434
|
- Agent process was not shut down correctly. Files may be corrupt.
|
|
439
435
|
|
|
@@ -441,74 +437,73 @@ EOS
|
|
|
441
437
|
|
|
442
438
|
return unless ask_yes_no("Proceed?")
|
|
443
439
|
|
|
444
|
-
|
|
440
|
+
oldest_source_pos = get_oldest_available_source_pos
|
|
445
441
|
unrepairable_tables = []
|
|
446
|
-
# Determine the master
|
|
447
|
-
|
|
442
|
+
# Determine the master source positions
|
|
443
|
+
sent_source_pos = nil
|
|
448
444
|
@full_tables.each do |table|
|
|
449
|
-
|
|
450
|
-
|
|
445
|
+
table_source_pos = table_status_hash[table]["source_pos"]
|
|
446
|
+
|
|
447
|
+
if table_source_pos.nil? || table_source_pos.empty?
|
|
451
448
|
# the table status has no src_pos (which is rare.) Skip the table
|
|
452
449
|
next
|
|
453
450
|
end
|
|
454
|
-
binlog_pos = FlydataCore::Mysql::BinlogPos.new(binlog_str)
|
|
455
451
|
if tables.empty?
|
|
456
|
-
if
|
|
457
|
-
|
|
452
|
+
if sent_source_pos.nil? || sent_source_pos < table_source_pos
|
|
453
|
+
sent_source_pos = table_source_pos
|
|
458
454
|
end
|
|
459
455
|
else
|
|
460
456
|
if tables.include?(table)
|
|
461
|
-
if
|
|
462
|
-
if
|
|
457
|
+
if sent_source_pos.nil? || sent_source_pos > table_source_pos
|
|
458
|
+
if oldest_source_pos && table_source_pos < oldest_source_pos
|
|
463
459
|
unrepairable_tables << table
|
|
464
460
|
else
|
|
465
|
-
|
|
461
|
+
sent_source_pos = table_source_pos
|
|
466
462
|
end
|
|
467
463
|
end
|
|
468
464
|
end
|
|
469
465
|
end
|
|
470
466
|
end
|
|
471
|
-
# if
|
|
467
|
+
# if sent_source_pos is nil, it means sync has started for none of tables. No need
|
|
472
468
|
# to repair positions nor clean buffer data.
|
|
473
|
-
if
|
|
474
|
-
if
|
|
475
|
-
e = AgentError.new("Repair failed due to expired
|
|
469
|
+
if sent_source_pos
|
|
470
|
+
if oldest_source_pos && sent_source_pos < oldest_source_pos
|
|
471
|
+
e = AgentError.new("Repair failed due to expired source position")
|
|
476
472
|
e.description = <<EOS
|
|
477
|
-
Repair failed because the starting
|
|
473
|
+
Repair failed because the starting source position `#{sent_source_pos} no longer exists. Run full initial sync instead.
|
|
478
474
|
EOS
|
|
479
475
|
raise e
|
|
480
476
|
end
|
|
481
|
-
|
|
482
|
-
|
|
477
|
+
master_source_pos = context.resume_pos(sent_source_pos)
|
|
478
|
+
|
|
483
479
|
# Delete agent buffer
|
|
484
480
|
log_info_stdout "Deleting data in the agent buffer..."
|
|
485
481
|
files = Flydata::Agent.new(FLYDATA_HOME).delete_buffer_files
|
|
486
482
|
unless files.empty?
|
|
487
483
|
$log.debug "Deleted buffer files\n " + files.join("\n ")
|
|
488
484
|
end
|
|
489
|
-
|
|
485
|
+
|
|
490
486
|
# Delete query queue items for the tables
|
|
491
487
|
log_info_stdout "Deleting data stuck in the server buffer..."
|
|
492
488
|
cleanup_sync_server(de, tables, queue_only: true) unless tables.empty?
|
|
493
|
-
|
|
494
|
-
# Save the positions (
|
|
489
|
+
|
|
490
|
+
# Save the positions (source_pos and seq)
|
|
495
491
|
log_info_stdout "Fixing table positions..."
|
|
496
492
|
@full_tables.each do |table|
|
|
497
|
-
|
|
498
|
-
if
|
|
493
|
+
table_source_pos = table_status_hash[table]["source_pos"]
|
|
494
|
+
if table_source_pos.nil? || table_source_pos.empty?
|
|
499
495
|
# no init sync has happened to the table. No need to set positions.
|
|
500
496
|
next
|
|
501
497
|
end
|
|
502
|
-
binlog_pos = FlydataCore::Mysql::BinlogPos.new(binlog_str)
|
|
503
498
|
pos = table_status_hash[table]["seq"]
|
|
504
|
-
|
|
499
|
+
old_source_pos, old_pos = save_table_positions(table, table_source_pos, pos, context)
|
|
505
500
|
if pos.to_i != old_pos.to_i && !tables.include?(table)
|
|
506
501
|
$log.debug "Fixed broken table position. table:#{table} pos:#{old_pos} -> #{pos}"
|
|
507
502
|
end
|
|
508
503
|
end
|
|
509
|
-
|
|
504
|
+
|
|
510
505
|
log_info_stdout "Fixing the master position files..."
|
|
511
|
-
|
|
506
|
+
save_master_source_positions(master_source_pos, sent_source_pos, context)
|
|
512
507
|
end
|
|
513
508
|
|
|
514
509
|
# Remove the lock file if exists.
|
|
@@ -519,13 +514,11 @@ EOS
|
|
|
519
514
|
|
|
520
515
|
# Initial sync
|
|
521
516
|
|
|
522
|
-
def
|
|
517
|
+
def handle_initial_sync(tables = nil, options = {})
|
|
523
518
|
unless Flydata::Command::Sender.new.wait_until_server_ready
|
|
524
519
|
raise Timeout::Error, "Timed out to wait for the server side to become active. Pleae try again later."
|
|
525
520
|
end
|
|
526
521
|
|
|
527
|
-
de = data_entry
|
|
528
|
-
|
|
529
522
|
# Setup instance variables
|
|
530
523
|
sync_resumed = set_current_tables(tables, resume: true)
|
|
531
524
|
|
|
@@ -533,7 +526,7 @@ EOS
|
|
|
533
526
|
# skip confirmation prompts and resume sync right away.
|
|
534
527
|
# #initial_sync knows where to resume from.
|
|
535
528
|
log_info_stdout("Resuming the initial sync...")
|
|
536
|
-
initial_sync(
|
|
529
|
+
initial_sync(options.merge(sync_resumed: true))
|
|
537
530
|
elsif !@unsynced_tables.empty?
|
|
538
531
|
show_purpose_name
|
|
539
532
|
unsynced_table_message = "We've noticed that these tables have not been synced yet: #{@unsynced_tables.join(", ")}\n"
|
|
@@ -546,7 +539,7 @@ EOS
|
|
|
546
539
|
end
|
|
547
540
|
log_info_stdout(unsynced_table_message)
|
|
548
541
|
if ask_yes_no("Do you want to run initial sync on all of these tables now?")
|
|
549
|
-
initial_sync(
|
|
542
|
+
initial_sync(options.merge(sync_resumed: false))
|
|
550
543
|
else
|
|
551
544
|
#If generate_table_ddl has not been run for these tables, warn user
|
|
552
545
|
unless @ddl_tables.empty?
|
|
@@ -559,7 +552,8 @@ EOS
|
|
|
559
552
|
end
|
|
560
553
|
end
|
|
561
554
|
|
|
562
|
-
def initial_sync(
|
|
555
|
+
def initial_sync(opt)
|
|
556
|
+
de = data_entry
|
|
563
557
|
# Load sync information from file
|
|
564
558
|
validate_initial_sync_status
|
|
565
559
|
begin
|
|
@@ -576,7 +570,7 @@ EOS
|
|
|
576
570
|
# flush is unnecessary for full initial sync because it's guaranteed
|
|
577
571
|
# that agent is stopped with no leftover buffer.
|
|
578
572
|
end
|
|
579
|
-
|
|
573
|
+
perform_initial_sync(de, opt)
|
|
580
574
|
rescue ServerDataProcessingTimeout => e
|
|
581
575
|
ee = ServerDataProcessingTimeout.new("Delayed Data Processing")
|
|
582
576
|
ee.description = <<EOS
|
|
@@ -593,7 +587,7 @@ EOS
|
|
|
593
587
|
end
|
|
594
588
|
|
|
595
589
|
# Don't call the method unless init sync needs to be run for a table
|
|
596
|
-
def
|
|
590
|
+
def perform_initial_sync(de, options = {})
|
|
597
591
|
dp = flydata.data_port.get
|
|
598
592
|
sync_fm = create_sync_file_manager(de)
|
|
599
593
|
|
|
@@ -602,60 +596,54 @@ EOS
|
|
|
602
596
|
Flydata::Command::Conf.new.copy_templates
|
|
603
597
|
end
|
|
604
598
|
|
|
605
|
-
|
|
606
|
-
|
|
599
|
+
generate_source_dump(de, sync_fm, !opts.dump_stream?, options) do |dump_io, source_pos|
|
|
600
|
+
parse_dump_and_send(dump_io, dp, de, sync_fm, source_pos)
|
|
607
601
|
end
|
|
608
|
-
|
|
602
|
+
complete_dump_processing(sync_fm)
|
|
609
603
|
end
|
|
610
604
|
|
|
611
|
-
def
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
if de['mysql_data_entry_preference'][k].to_s.empty?
|
|
616
|
-
raise "'#{k}' is required. Set the value in the conf file " +
|
|
617
|
-
"-> #{Flydata::Preference::DataEntryPreference.conf_path(de)}"
|
|
618
|
-
end
|
|
619
|
-
end
|
|
605
|
+
def generate_source_dump(de, sync_fm, file_dump = true, options = {},
|
|
606
|
+
&dump_ready_callback)
|
|
607
|
+
dp = flydata.data_port.get
|
|
608
|
+
context = source.generate_source_dump(dp, options)
|
|
620
609
|
|
|
621
610
|
# Status is parsed or complete -> skip dump and parse
|
|
622
611
|
dump_pos_info = sync_fm.load_dump_pos
|
|
623
612
|
if dump_pos_info[:status] == STATUS_PARSED || dump_pos_info[:status] == STATUS_COMPLETE
|
|
624
|
-
|
|
625
|
-
nil, options[:
|
|
613
|
+
initialize_source_positions_and_call_callback(
|
|
614
|
+
nil, options[:source_pos_ready_callback], sync_fm)
|
|
615
|
+
return
|
|
626
616
|
end
|
|
627
617
|
|
|
628
|
-
#
|
|
629
|
-
dp = flydata.data_port.get
|
|
618
|
+
# dump file exists -> skip dump
|
|
630
619
|
fp = sync_fm.dump_file_path
|
|
631
620
|
if file_dump && File.exists?(fp) && File.size(fp) > 0
|
|
632
|
-
|
|
633
|
-
nil, options[:
|
|
634
|
-
return call_block_or_return_io(fp, &
|
|
621
|
+
initialize_source_positions_and_call_callback(
|
|
622
|
+
nil, options[:source_pos_ready_callback], sync_fm)
|
|
623
|
+
return call_block_or_return_io(fp, &dump_ready_callback)
|
|
635
624
|
end
|
|
636
625
|
|
|
637
|
-
log_info_stdout("Checking
|
|
638
|
-
|
|
626
|
+
log_info_stdout("Checking the data source connection and configuration...")
|
|
627
|
+
context.run_compatibility_check(fp, sync_fm.backup_dir)
|
|
639
628
|
log_info_stdout("Checking the database size...")
|
|
640
629
|
|
|
641
|
-
db_bytesize =
|
|
630
|
+
db_bytesize = context.dump_size(target_tables)
|
|
642
631
|
|
|
643
632
|
tables = target_tables
|
|
644
633
|
tables ||= '<all tables>'
|
|
645
|
-
data_servers =
|
|
634
|
+
data_servers = source.sync.data_servers
|
|
635
|
+
data_servers = data_servers ? "\n data servers: #{data_servers}" : ""
|
|
646
636
|
|
|
647
637
|
confirmation_text = <<-EOM
|
|
648
638
|
|
|
649
639
|
FlyData Sync will start synchronizing the following database tables
|
|
650
|
-
host: #{de['mysql_data_entry_preference']['host']}
|
|
651
|
-
port: #{de['mysql_data_entry_preference']['port']}
|
|
652
|
-
username: #{de['mysql_data_entry_preference']['username']}
|
|
653
|
-
database: #{de['mysql_data_entry_preference']['database']}
|
|
654
|
-
tables: #{tables.join(", ")}#{data_servers}
|
|
655
640
|
EOM
|
|
641
|
+
context.confirmation_items.each do |name, value|
|
|
642
|
+
confirmation_text << " %-14s%s\n" % ["#{name}:", value.to_s]
|
|
643
|
+
end
|
|
656
644
|
|
|
657
|
-
confirmation_text << <<-EOM
|
|
658
|
-
|
|
645
|
+
confirmation_text << <<-EOM
|
|
646
|
+
tables: #{tables.join(", ")}#{data_servers}
|
|
659
647
|
EOM
|
|
660
648
|
confirmation_text << <<-EOM if file_dump
|
|
661
649
|
dump file: #{fp}
|
|
@@ -674,7 +662,7 @@ EOM
|
|
|
674
662
|
log_warn_stderr("!!WARNING There may not be enough disk space for a DB dump. We recommend 1GB free disk space after the dump. free disk space:#{as_size(free_disk_bytesize)}(#{free_disk_bytesize} byte) /" +
|
|
675
663
|
" db size:#{as_size(db_bytesize)}(#{db_bytesize} byte)")
|
|
676
664
|
unless ask_yes_no('Do you want to continue?')
|
|
677
|
-
log_warn_stderr("To change the dump file directory, delete '#' and modify the path of '
|
|
665
|
+
log_warn_stderr("To change the dump file directory, delete '#' and modify the path of 'dump_dir:' in '#{Preference::DataEntryPreference.conf_path(de)}'")
|
|
678
666
|
exit 1
|
|
679
667
|
end
|
|
680
668
|
end
|
|
@@ -692,18 +680,17 @@ EOM
|
|
|
692
680
|
data_entry_id: de['id'],
|
|
693
681
|
data_port_id: de['data_port_id'])
|
|
694
682
|
end
|
|
695
|
-
dump_generator = Flydata::Parser::Mysql::MysqlDumpGeneratorNoMasterData.new(de['mysql_data_entry_preference'].merge('tables' => target_tables))
|
|
696
683
|
if file_dump
|
|
697
|
-
|
|
684
|
+
source_pos = nil
|
|
698
685
|
begin
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
686
|
+
context.dump(target_tables, fp) do |_io, _source_pos|
|
|
687
|
+
source_pos = _source_pos
|
|
688
|
+
initialize_source_positions_and_call_callback(source_pos, options[:source_pos_ready_callback], sync_fm)
|
|
702
689
|
end
|
|
703
690
|
log_info_stdout(" -> Database dump done")
|
|
704
691
|
rescue Exception => e
|
|
705
692
|
#Catch all exceptions including SystemExit and Interrupt.
|
|
706
|
-
log_info_stdout "Quit while running
|
|
693
|
+
log_info_stdout "Quit while running dump, deleting dump file..."
|
|
707
694
|
sync_fm.delete_dump_file
|
|
708
695
|
log_info_stdout "Dump file deleted. To restart the FlyData Agent, Following tables may need reset : #{target_tables}"
|
|
709
696
|
if (target_tables.nil? or target_tables.empty?)
|
|
@@ -713,11 +700,11 @@ EOM
|
|
|
713
700
|
end
|
|
714
701
|
raise e
|
|
715
702
|
end
|
|
716
|
-
call_block_or_return_io(fp,
|
|
703
|
+
call_block_or_return_io(fp, source_pos, &dump_ready_callback)
|
|
717
704
|
else
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
705
|
+
context.dump(target_tables) do |io, source_pos|
|
|
706
|
+
initialize_source_positions_and_call_callback(source_pos, options[:source_pos_ready_callback], sync_fm)
|
|
707
|
+
dump_ready_callback.call(io, source_pos)
|
|
721
708
|
end
|
|
722
709
|
end
|
|
723
710
|
else
|
|
@@ -725,16 +712,16 @@ EOM
|
|
|
725
712
|
end
|
|
726
713
|
end
|
|
727
714
|
|
|
728
|
-
def
|
|
729
|
-
if
|
|
730
|
-
initialize_positions(sync_fm,
|
|
715
|
+
def initialize_source_positions_and_call_callback(source_pos, callback, sync_fm)
|
|
716
|
+
if source_pos
|
|
717
|
+
initialize_positions(sync_fm, source_pos)
|
|
731
718
|
else
|
|
732
|
-
# no
|
|
719
|
+
# no source_pos was given because dump was completed in the
|
|
733
720
|
# previous init sync attempt. Position files must be there already
|
|
734
721
|
# so no initialization is necessary.
|
|
735
722
|
end
|
|
736
723
|
if callback
|
|
737
|
-
callback.call
|
|
724
|
+
callback.call
|
|
738
725
|
end
|
|
739
726
|
end
|
|
740
727
|
|
|
@@ -744,11 +731,11 @@ EOM
|
|
|
744
731
|
stat.block_size * stat.blocks_available
|
|
745
732
|
end
|
|
746
733
|
|
|
747
|
-
def call_block_or_return_io(fp,
|
|
748
|
-
if
|
|
734
|
+
def call_block_or_return_io(fp, source_pos= nil, &source_pos_block)
|
|
735
|
+
if source_pos_block
|
|
749
736
|
f_io = open_file_io(fp)
|
|
750
737
|
begin
|
|
751
|
-
|
|
738
|
+
source_pos_block.call(f_io, source_pos)
|
|
752
739
|
return nil
|
|
753
740
|
ensure
|
|
754
741
|
f_io.close rescue nil
|
|
@@ -781,35 +768,35 @@ EOM
|
|
|
781
768
|
# <- checkpoint
|
|
782
769
|
#...
|
|
783
770
|
#CREATE TABLE ...
|
|
784
|
-
def
|
|
771
|
+
def parse_dump_and_send(dump_io, dp, de, sync_fm, source_pos)
|
|
772
|
+
|
|
773
|
+
context = source.parse_dump_and_send
|
|
785
774
|
# Prepare forwarder
|
|
786
775
|
de_tag_name = de["tag_name#{env_suffix}"]
|
|
787
776
|
server_port = dp['server_port']
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
end
|
|
793
|
-
forwarder_type = de['mysql_data_entry_preference']['forwarder'] ||
|
|
777
|
+
data_servers = source.sync.data_servers
|
|
778
|
+
servers = data_servers ? data_servers.split(',') :
|
|
779
|
+
dp["servers#{env_suffix}"].collect{|s| "#{s}:#{server_port}"}
|
|
780
|
+
forwarder_type = source.sync.forwarder ||
|
|
794
781
|
(dp['ssl_enabled'] ? 'sslforwarder' : 'tcpforwarder')
|
|
795
782
|
forwarder = Flydata::Output::ForwarderFactory.create(forwarder_type, de_tag_name, servers)
|
|
796
783
|
|
|
797
784
|
# Load dump.pos file for resume
|
|
798
785
|
dump_pos_info = sync_fm.load_dump_pos
|
|
799
|
-
|
|
800
|
-
if
|
|
801
|
-
|
|
802
|
-
log_info_stdout("Resuming... Last processed table: #{
|
|
786
|
+
dump_pos_info ||= {}
|
|
787
|
+
if dump_pos_info[:table_name] && dump_pos_info[:last_pos].to_i != -1
|
|
788
|
+
source_pos = dump_pos_info[:source_pos]
|
|
789
|
+
log_info_stdout("Resuming... Last processed table: #{dump_pos_info[:table_name]}")
|
|
803
790
|
else
|
|
804
|
-
|
|
791
|
+
dump_pos_info[:source_pos] = source_pos
|
|
805
792
|
end
|
|
806
|
-
if
|
|
807
|
-
# Old marshal dumped
|
|
808
|
-
|
|
793
|
+
if dump_pos_info[:source_table] && dump_pos_info[:source_table].value_converters.nil?
|
|
794
|
+
# Old marshal dumped source_table object may not have value_converters
|
|
795
|
+
dump_pos_info[:source_table].set_value_converters(context.value_converters)
|
|
809
796
|
end
|
|
810
|
-
if
|
|
811
|
-
# Old marshal dumped
|
|
812
|
-
|
|
797
|
+
if dump_pos_info[:source_table] && dump_pos_info[:source_table].column_names.nil?
|
|
798
|
+
# Old marshal dumped source_table object may not have column_names
|
|
799
|
+
dump_pos_info[:source_table].set_column_names
|
|
813
800
|
end
|
|
814
801
|
log_info_stdout("Sending data to FlyData Server...")
|
|
815
802
|
|
|
@@ -831,40 +818,41 @@ EOM
|
|
|
831
818
|
send_record_counts_threads = []
|
|
832
819
|
begin
|
|
833
820
|
RubyProf.start if RUN_PROFILE and defined?(RubyProf) and not RubyProf.running?
|
|
834
|
-
|
|
835
|
-
|
|
821
|
+
context.parse_dump(
|
|
822
|
+
dump_pos_info,
|
|
823
|
+
dump_io,
|
|
836
824
|
# create table
|
|
837
|
-
Proc.new { |
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
825
|
+
Proc.new { |source_table|
|
|
826
|
+
source_table.set_value_converters(context.value_converters)
|
|
827
|
+
st = source_table.clone
|
|
828
|
+
st.value_converters = {}
|
|
841
829
|
# don't let the closure hold these objects as it causes memory leak
|
|
842
|
-
|
|
830
|
+
source_table = nil
|
|
843
831
|
output_thread.run do
|
|
844
832
|
tmp_num_inserted_record = 0
|
|
845
|
-
# dump
|
|
833
|
+
# dump source_table for resume
|
|
846
834
|
#TODO: make it option
|
|
847
|
-
sync_fm.
|
|
848
|
-
log_info_stdout(" -> Started sending data for table '#{
|
|
835
|
+
sync_fm.save_source_table_marshal_dump(st)
|
|
836
|
+
log_info_stdout(" -> Started sending data for table '#{st.table_name}'")
|
|
849
837
|
end
|
|
850
838
|
},
|
|
851
839
|
# insert record
|
|
852
|
-
Proc.new { |
|
|
853
|
-
|
|
840
|
+
Proc.new { |source_table, values_set|
|
|
841
|
+
source_table_name = source_table.table_name
|
|
854
842
|
records = values_set.collect do |values|
|
|
855
|
-
convert_to_flydata_values(
|
|
856
|
-
json = JSON.generate_kv_pairs(
|
|
843
|
+
convert_to_flydata_values(source_table, values)
|
|
844
|
+
json = JSON.generate_kv_pairs(source_table.column_names, values)
|
|
857
845
|
values.clear
|
|
858
846
|
unless json.start_with? '{'
|
|
859
847
|
raise DumpParseError.new("Broken JSON record json:#{json[0..100]}")
|
|
860
848
|
end
|
|
861
|
-
{table_name:
|
|
849
|
+
{table_name: source_table_name, log: json}
|
|
862
850
|
end
|
|
863
851
|
# release resources to prevent the closure from keeping it
|
|
864
852
|
values_set.clear
|
|
865
853
|
values_set = nil
|
|
866
|
-
|
|
867
|
-
|
|
854
|
+
source_table = nil
|
|
855
|
+
source_table_name = nil
|
|
868
856
|
output_thread.run do
|
|
869
857
|
forwarder.emit(records)
|
|
870
858
|
tmp_num_inserted_record += 1
|
|
@@ -873,9 +861,9 @@ EOM
|
|
|
873
861
|
true
|
|
874
862
|
},
|
|
875
863
|
# checkpoint
|
|
876
|
-
Proc.new { |
|
|
877
|
-
table_name =
|
|
878
|
-
|
|
864
|
+
Proc.new { |source_table, last_pos, bytesize, source_pos, state, substate|
|
|
865
|
+
table_name = source_table.nil? ? '' : source_table.table_name
|
|
866
|
+
source_table = nil
|
|
879
867
|
output_thread.run do
|
|
880
868
|
skip = skip_checkpoint
|
|
881
869
|
skip_checkpoint = false
|
|
@@ -894,12 +882,12 @@ EOM
|
|
|
894
882
|
end
|
|
895
883
|
|
|
896
884
|
# save check point
|
|
897
|
-
sync_fm.save_dump_pos(STATUS_PARSING, table_name, last_pos,
|
|
885
|
+
sync_fm.save_dump_pos(STATUS_PARSING, table_name, last_pos, source_pos, state, substate)
|
|
898
886
|
|
|
899
887
|
# send record count for the table
|
|
900
888
|
if table_name.to_s != '' &&
|
|
901
|
-
state == Flydata::Parser::
|
|
902
|
-
# all records for `
|
|
889
|
+
state == Flydata::Parser::State::CREATE_TABLE
|
|
890
|
+
# all records for `source_table` have been sent
|
|
903
891
|
send_record_counts(de, sync_fm, table_name,
|
|
904
892
|
send_record_counts_threads)
|
|
905
893
|
log_info_stdout(" -> Finished sending data for table '#{table_name}'...")
|
|
@@ -928,7 +916,7 @@ EOM
|
|
|
928
916
|
log_info_stdout(" -> Done")
|
|
929
917
|
#log_info_stdout(" -> Records sent to the server")
|
|
930
918
|
#log_info_stdout(" -> #{sync_fm.load_stats}")
|
|
931
|
-
sync_fm.save_dump_pos(STATUS_PARSED, '', dump_file_size,
|
|
919
|
+
sync_fm.save_dump_pos(STATUS_PARSED, '', dump_file_size, source_pos)
|
|
932
920
|
|
|
933
921
|
if ENV['FLYDATA_BENCHMARK']
|
|
934
922
|
bench_end_time = Time.now
|
|
@@ -937,15 +925,15 @@ EOM
|
|
|
937
925
|
end
|
|
938
926
|
end
|
|
939
927
|
|
|
940
|
-
def
|
|
928
|
+
def complete_dump_processing(sync_fm)
|
|
941
929
|
return if ENV['FLYDATA_BENCHMARK']
|
|
942
930
|
|
|
943
931
|
# Status is not parsed -> don't complete
|
|
944
932
|
dump_pos_info = sync_fm.load_dump_pos
|
|
945
933
|
return unless dump_pos_info[:status] == STATUS_PARSED
|
|
946
|
-
|
|
934
|
+
source_pos = dump_pos_info[:source_pos]
|
|
947
935
|
|
|
948
|
-
sync_fm.save_dump_pos(STATUS_COMPLETE, '', -1,
|
|
936
|
+
sync_fm.save_dump_pos(STATUS_COMPLETE, '', -1, source_pos)
|
|
949
937
|
end
|
|
950
938
|
|
|
951
939
|
# option: timeout, tables
|
|
@@ -1015,8 +1003,8 @@ EOM
|
|
|
1015
1003
|
|
|
1016
1004
|
def complete(de)
|
|
1017
1005
|
sync_fm = create_sync_file_manager(de)
|
|
1018
|
-
|
|
1019
|
-
if
|
|
1006
|
+
dump_pos_info = sync_fm.load_dump_pos
|
|
1007
|
+
if dump_pos_info[:status] == STATUS_COMPLETE
|
|
1020
1008
|
send_record_counts(de, sync_fm)
|
|
1021
1009
|
sync_fm.delete_dump_file
|
|
1022
1010
|
sync_fm.backup_dump_dir
|
|
@@ -1058,18 +1046,18 @@ EOM
|
|
|
1058
1046
|
end
|
|
1059
1047
|
end
|
|
1060
1048
|
|
|
1061
|
-
def initialize_positions(sync_fm,
|
|
1062
|
-
sync_fm.
|
|
1049
|
+
def initialize_positions(sync_fm, source_pos)
|
|
1050
|
+
sync_fm.save_table_source_pos(target_tables, source_pos)
|
|
1063
1051
|
|
|
1064
1052
|
if @full_initial_sync
|
|
1065
|
-
sync_fm.
|
|
1053
|
+
sync_fm.save_source_pos(source_pos)
|
|
1066
1054
|
end
|
|
1067
|
-
sync_fm.
|
|
1055
|
+
sync_fm.install_table_source_pos_files(target_tables)
|
|
1068
1056
|
sync_fm.reset_table_position_files(target_tables)
|
|
1069
1057
|
end
|
|
1070
1058
|
|
|
1071
|
-
def convert_to_flydata_values(
|
|
1072
|
-
vc =
|
|
1059
|
+
def convert_to_flydata_values(source_table, values)
|
|
1060
|
+
vc = source_table.value_converters
|
|
1073
1061
|
return if vc.empty?
|
|
1074
1062
|
|
|
1075
1063
|
vc.each_pair do |index, converter|
|
|
@@ -1127,40 +1115,26 @@ EOM
|
|
|
1127
1115
|
|
|
1128
1116
|
# Generate table ddl
|
|
1129
1117
|
|
|
1130
|
-
def do_generate_table_ddl(de)
|
|
1131
|
-
if `which mysqldump`.empty?
|
|
1132
|
-
raise "mysqldump is not installed. mysqldump is required to run the command"
|
|
1133
|
-
end
|
|
1134
|
-
|
|
1118
|
+
def do_generate_table_ddl(context, de)
|
|
1135
1119
|
error_list = []
|
|
1136
1120
|
schema_name = (de['schema_name'] || nil)
|
|
1137
|
-
mp = de['mysql_data_entry_preference']
|
|
1138
1121
|
|
|
1139
1122
|
tables = opts.all_tables? ? @full_tables : (@input_tables.empty? ? @unsynced_tables : @input_tables)
|
|
1140
1123
|
|
|
1141
1124
|
raise "There are no valid unsynced tables, if you want to just get ddl for all tables, please run \`flydata sync:generate_table_ddl --all-tables\`" if tables.empty?
|
|
1142
1125
|
|
|
1143
|
-
%w(host username database).each do |conf_name|
|
|
1144
|
-
raise "MySQL `#{conf_name}` is neither defined in the data entry nor the local config file" if mp[conf_name].to_s.empty?
|
|
1145
|
-
end
|
|
1146
1126
|
if tables.empty?
|
|
1147
1127
|
raise "`tables` (or `tables_append_only`) is neither defined in the data entry nor the local config file"
|
|
1148
1128
|
end
|
|
1149
1129
|
|
|
1130
|
+
flydata_tabledefs, error_list = context.generate_flydata_tabledef(tables,
|
|
1131
|
+
skip_primary_key_check: opts.skip_primary_key_check?)
|
|
1150
1132
|
create_flydata_ctl_table = true
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
error_list << error.err_hash
|
|
1155
|
-
next
|
|
1156
|
-
end
|
|
1157
|
-
flydata_tabledef = mysql_tabledef.to_flydata_tabledef
|
|
1158
|
-
puts FlydataCore::TableDef::SyncRedshiftTableDef.from_flydata_tabledef(flydata_tabledef, flydata_ctl_table: create_flydata_ctl_table, schema_name: schema_name, ctl_only: opts.ctl_only?)
|
|
1133
|
+
append_only = tables & @append_only_tables
|
|
1134
|
+
flydata_tabledefs.each do |flydata_tabledef|
|
|
1135
|
+
puts FlydataCore::TableDef::SyncRedshiftTableDef.from_flydata_tabledef(flydata_tabledef, flydata_ctl_table: create_flydata_ctl_table, schema_name: schema_name, ctl_only: opts.ctl_only?, skip_drop_table: append_only.include?(flydata_tabledef[:table_name]))
|
|
1159
1136
|
create_flydata_ctl_table = false
|
|
1160
1137
|
end
|
|
1161
|
-
if missing_tables
|
|
1162
|
-
missing_tables.each {|missing_table| error_list << { error: 'table does not exist in the MySQL database', table: missing_table } }
|
|
1163
|
-
end
|
|
1164
1138
|
table_validity_hash = {}
|
|
1165
1139
|
tables_without_error = tables
|
|
1166
1140
|
unless error_list.empty?
|
|
@@ -1190,11 +1164,10 @@ EOS
|
|
|
1190
1164
|
flydata.data_entry.update_table_validity(de['id'], {updated_tables: table_validity_hash}) unless table_validity_hash.empty?
|
|
1191
1165
|
|
|
1192
1166
|
sync_fm = create_sync_file_manager(de)
|
|
1193
|
-
sync_fm.save_generated_ddl(tables_without_error,
|
|
1167
|
+
sync_fm.save_generated_ddl(tables_without_error, TableDdl::VERSION)
|
|
1194
1168
|
sync_fm.close
|
|
1195
1169
|
end
|
|
1196
1170
|
|
|
1197
|
-
|
|
1198
1171
|
ALL_DONE_MESSAGE_TEMPLATE = <<-EOM
|
|
1199
1172
|
|
|
1200
1173
|
Congratulations! FlyData has started synchronizing your database tables.
|
|
@@ -1228,20 +1201,21 @@ Thank you for using FlyData!
|
|
|
1228
1201
|
# Utility methods
|
|
1229
1202
|
|
|
1230
1203
|
def set_current_tables(input_tables = nil, options = {})
|
|
1231
|
-
|
|
1232
|
-
sync_fm = create_sync_file_manager(de)
|
|
1204
|
+
sync_fm = create_sync_file_manager
|
|
1233
1205
|
sync_info = sync_fm.load_sync_info
|
|
1234
1206
|
sync_resumed = options[:resume] && !!sync_info
|
|
1235
1207
|
|
|
1208
|
+
table_lists = source.sync.table_lists
|
|
1209
|
+
|
|
1236
1210
|
#full_tables will either include all tables including invalid tables or all valid tables that aren't new tables
|
|
1237
1211
|
|
|
1238
1212
|
# The 'new_tables' list may include tables which has already been
|
|
1239
1213
|
# synced for backward compatibility reason.
|
|
1240
1214
|
# Filter out such tables so that we get a list of 'new' tables with no
|
|
1241
1215
|
# position file.
|
|
1242
|
-
real_new_tables = sync_fm.get_new_table_list(
|
|
1243
|
-
@full_tables = options[:include_all_tables] ?
|
|
1244
|
-
|
|
1216
|
+
real_new_tables = sync_fm.get_new_table_list(table_lists['new_tables'], "pos")
|
|
1217
|
+
@full_tables = options[:include_all_tables] ? table_lists['tables'] + table_lists['invalid_tables'] :
|
|
1218
|
+
table_lists['tables'] - real_new_tables
|
|
1245
1219
|
|
|
1246
1220
|
@unsynced_tables = sync_fm.get_new_table_list(@full_tables, "pos") # Get list of tables that do not have a .pos file
|
|
1247
1221
|
@ddl_tables = sync_fm.get_new_table_list(@full_tables, "generated_ddl")
|
|
@@ -1250,6 +1224,8 @@ Thank you for using FlyData!
|
|
|
1250
1224
|
@input_tables ||= []
|
|
1251
1225
|
@full_initial_sync = sync_resumed ? sync_info[:initial_sync] :
|
|
1252
1226
|
(@unsynced_tables == @full_tables)
|
|
1227
|
+
@append_only_tables = table_lists['tables_append_only']
|
|
1228
|
+
@append_only_tables ||= []
|
|
1253
1229
|
|
|
1254
1230
|
sync_fm.close
|
|
1255
1231
|
|
|
@@ -1292,38 +1268,15 @@ Thank you for using FlyData!
|
|
|
1292
1268
|
end
|
|
1293
1269
|
|
|
1294
1270
|
def data_entry
|
|
1295
|
-
@
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
def retrieve_sync_data_entry
|
|
1299
|
-
de = retrieve_data_entries.first
|
|
1300
|
-
raise "There are no data entries." unless de
|
|
1301
|
-
case de['type']
|
|
1302
|
-
when 'RedshiftMysqlDataEntry'
|
|
1303
|
-
mp = de['mysql_data_entry_preference']
|
|
1304
|
-
|
|
1305
|
-
if mp['tables_append_only']
|
|
1306
|
-
mp['tables'] = (mp['tables'].split(",") + mp['tables_append_only'].split(",")).uniq
|
|
1307
|
-
else
|
|
1308
|
-
mp['tables'] = mp['tables'].split(",").uniq
|
|
1309
|
-
end
|
|
1310
|
-
mp['invalid_tables'] = mp['invalid_tables'].kind_of?(String) ? mp['invalid_tables'].split(",").uniq : []
|
|
1311
|
-
mp['new_tables'] = mp['new_tables'].kind_of?(String) ? mp['new_tables'].split(",").uniq : []
|
|
1312
|
-
|
|
1313
|
-
unless mp['ssl_ca_content'].to_s.strip.empty?
|
|
1314
|
-
sync_fm = create_sync_file_manager(de)
|
|
1315
|
-
sync_fm.save_ssl_ca(mp['ssl_ca_content'])
|
|
1316
|
-
mp['ssl_ca'] = sync_fm.ssl_ca_path
|
|
1317
|
-
mp['sslca'] = mp['ssl_ca']
|
|
1318
|
-
end
|
|
1319
|
-
else
|
|
1320
|
-
raise SyncDataEntryError, "No supported data entry. Only mysql-redshift sync is supported."
|
|
1271
|
+
unless @sync_de
|
|
1272
|
+
@sync_de = super
|
|
1273
|
+
source.sync.setup # updates the data entry contents
|
|
1321
1274
|
end
|
|
1322
|
-
|
|
1275
|
+
@sync_de
|
|
1323
1276
|
end
|
|
1324
1277
|
|
|
1325
1278
|
def create_sync_file_manager(de = data_entry)
|
|
1326
|
-
SyncFileManager.new(de)
|
|
1279
|
+
SyncFileManager.new(de, source)
|
|
1327
1280
|
end
|
|
1328
1281
|
|
|
1329
1282
|
def verify_input_tables(input_tables, all_tables)
|
|
@@ -1335,6 +1288,19 @@ Thank you for using FlyData!
|
|
|
1335
1288
|
raise "These tables are not registered tables: #{inval_table.join(", ")}" unless inval_table.empty?
|
|
1336
1289
|
end
|
|
1337
1290
|
|
|
1291
|
+
def check_master_position_files
|
|
1292
|
+
de = data_entry
|
|
1293
|
+
sync_fm = create_sync_file_manager(de)
|
|
1294
|
+
old_master_source_pos = sync_fm.load_source_pos
|
|
1295
|
+
old_sent_source_pos = sync_fm.load_sent_source_pos
|
|
1296
|
+
|
|
1297
|
+
corrupt_files = []
|
|
1298
|
+
corrupt_files << sync_fm.source_pos_path if old_master_source_pos.nil?
|
|
1299
|
+
corrupt_files << sync_fm.sent_source_pos_path if old_sent_source_pos.nil?
|
|
1300
|
+
|
|
1301
|
+
corrupt_files.empty? ? nil : corrupt_files
|
|
1302
|
+
end
|
|
1303
|
+
|
|
1338
1304
|
def check_position_files(table_status_hash)
|
|
1339
1305
|
de = data_entry
|
|
1340
1306
|
sync_fm = create_sync_file_manager(de)
|
|
@@ -1359,73 +1325,79 @@ Thank you for using FlyData!
|
|
|
1359
1325
|
gap_tables.empty? ? nil : gap_tables
|
|
1360
1326
|
end
|
|
1361
1327
|
|
|
1362
|
-
def save_table_positions(table,
|
|
1328
|
+
def save_table_positions(table, source_pos, pos, context)
|
|
1363
1329
|
de = data_entry
|
|
1364
1330
|
sync_fm = create_sync_file_manager(de)
|
|
1365
|
-
|
|
1366
|
-
old_binlog_pos = s ? FlydataCore::Mysql::BinlogPos.new(s) : nil
|
|
1331
|
+
old_source_pos = sync_fm.get_table_source_pos(table)
|
|
1367
1332
|
old_pos = sync_fm.get_table_position(table)
|
|
1368
1333
|
if pos.to_i != old_pos.to_i
|
|
1369
1334
|
sync_fm.save_table_position(table, pos)
|
|
1370
1335
|
$log.debug "table pos updated. table:#{table} pos:#{old_pos} -> #{pos}"
|
|
1371
1336
|
end
|
|
1372
|
-
if
|
|
1373
|
-
sync_fm.
|
|
1374
|
-
$log.debug "table
|
|
1337
|
+
if source_pos != old_source_pos
|
|
1338
|
+
sync_fm.save_table_source_pos(table, source_pos, destination: :positions)
|
|
1339
|
+
$log.debug "table source_pos updated. table:#{table} source_pos:`#{old_source_pos}` -> `#{source_pos}`"
|
|
1375
1340
|
end
|
|
1376
|
-
[
|
|
1341
|
+
[old_source_pos, old_pos]
|
|
1377
1342
|
end
|
|
1378
1343
|
|
|
1379
|
-
def
|
|
1344
|
+
def save_master_source_positions(master_source_pos, sent_source_pos, context)
|
|
1380
1345
|
de = data_entry
|
|
1381
1346
|
sync_fm = create_sync_file_manager(de)
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
|
|
1385
|
-
|
|
1386
|
-
if
|
|
1387
|
-
sync_fm.
|
|
1388
|
-
$log.debug "master
|
|
1389
|
-
end
|
|
1390
|
-
if
|
|
1391
|
-
sync_fm.
|
|
1392
|
-
$log.debug "sent
|
|
1393
|
-
end
|
|
1394
|
-
|
|
1347
|
+
|
|
1348
|
+
old_master_source_pos = sync_fm.load_source_pos
|
|
1349
|
+
old_sent_source_pos = sync_fm.load_sent_source_pos
|
|
1350
|
+
|
|
1351
|
+
if master_source_pos != old_master_source_pos
|
|
1352
|
+
sync_fm.save_source_pos(master_source_pos)
|
|
1353
|
+
$log.debug "master source positions updated. `#{old_master_source_pos}` -> `#{master_source_pos}`"
|
|
1354
|
+
end
|
|
1355
|
+
if sent_source_pos != old_sent_source_pos
|
|
1356
|
+
sync_fm.save_sent_source_pos(sent_source_pos.to_s)
|
|
1357
|
+
$log.debug "sent source positions updated. `#{old_sent_source_pos}` -> `#{sent_source_pos}`"
|
|
1358
|
+
end
|
|
1359
|
+
|
|
1360
|
+
[old_master_source_pos, old_sent_source_pos]
|
|
1395
1361
|
end
|
|
1396
1362
|
|
|
1397
|
-
def get_table_status(tables)
|
|
1363
|
+
def get_table_status(tables, source_pos_context)
|
|
1398
1364
|
de = data_entry
|
|
1399
1365
|
sync_fm = create_sync_file_manager(de)
|
|
1400
1366
|
result = flydata.data_entry.table_status(de['id'], mode: env_mode, tables: tables)
|
|
1401
1367
|
result = result["table_status"]
|
|
1402
1368
|
|
|
1403
1369
|
table_status_hash = result.inject({}){|h, ts| h[ts["table_name"]] = ts; h}
|
|
1370
|
+
source_pos = nil
|
|
1371
|
+
table_status_hash.keys.each do |table|
|
|
1372
|
+
src_pos_str = table_status_hash[table].delete("src_pos")
|
|
1373
|
+
|
|
1374
|
+
source_pos = src_pos_str ? source_pos_context.create_source_pos(src_pos_str) : nil
|
|
1375
|
+
table_status_hash[table].merge!( { "source_pos" => source_pos } )
|
|
1376
|
+
end
|
|
1404
1377
|
missing_tables = tables - table_status_hash.keys
|
|
1405
1378
|
unless missing_tables.empty?
|
|
1406
1379
|
raise "table status is not available for these table(s): #{missing_tables.join(",")}"
|
|
1407
1380
|
end
|
|
1408
|
-
|
|
1409
|
-
populate_initial_binlog_positions(table_status_hash, sync_fm)
|
|
1381
|
+
populate_initial_source_positions(table_status_hash, sync_fm)
|
|
1410
1382
|
table_status_hash
|
|
1411
1383
|
end
|
|
1412
1384
|
|
|
1413
|
-
# table_status has no
|
|
1385
|
+
# table_status has no source position for sequence "0". Populate the info
|
|
1414
1386
|
# from 'table.binlog.pos.init' file.
|
|
1415
|
-
def
|
|
1387
|
+
def populate_initial_source_positions(table_status_hash, sync_fm)
|
|
1416
1388
|
table_status_hash.keys.each do |table|
|
|
1417
|
-
|
|
1418
|
-
if
|
|
1419
|
-
|
|
1420
|
-
if
|
|
1421
|
-
table_status_hash[table]["
|
|
1389
|
+
table_source_pos = table_status_hash[table]["source_pos"]
|
|
1390
|
+
if table_source_pos.nil? || table_source_pos.empty?
|
|
1391
|
+
init_source_pos = sync_fm.get_table_source_pos_init(table)
|
|
1392
|
+
if init_source_pos
|
|
1393
|
+
table_status_hash[table]["source_pos"] = init_source_pos
|
|
1422
1394
|
end
|
|
1423
1395
|
end
|
|
1424
1396
|
end
|
|
1425
1397
|
end
|
|
1426
1398
|
|
|
1427
1399
|
# TODO implement
|
|
1428
|
-
def
|
|
1400
|
+
def get_oldest_available_source_pos
|
|
1429
1401
|
nil
|
|
1430
1402
|
end
|
|
1431
1403
|
end
|