flydata 0.3.4 → 0.3.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3eda267f32da9fd2ce6f6dd3ad583b2803d689ce
4
- data.tar.gz: 43876505915eee1ef232973a23de0d432afcb7e3
3
+ metadata.gz: 3aabc1598aa6770f9f4238bae95ef42b1577c0aa
4
+ data.tar.gz: 5ba322b53b34ffaf0986542a8071c103c4d2d881
5
5
  SHA512:
6
- metadata.gz: 927ee474b1b8567b25667d24e2bd876bbde521192566f59fd24c53c82ea1fe181fea48087183490674feaa283139f0379af481a07d6a6558720a719fe3ee39b4
7
- data.tar.gz: 17dcdec903a1a1c9bf137005b57495f272655c291103f377005ca8cf30208ecb7dba68ed0a564f1a2c11556764e0ddab545277c990439bc72e67fc2885226267
6
+ metadata.gz: a1b6059f259512e9d7f190f9bc37c51905979adab22310f4782e92b3ebbc7156673ce48259d7f7ba6a43ae2d40755b2836b59adffd3b36d035a8ceb0ff1b9c4b
7
+ data.tar.gz: 129df374f7bbba7ed5dc70eb23bd52b46ee683521cd391c7fa0c9440f9c56649d2761210be0eca31cb8d1c05c5ed55a9ba8aadacec84085d73460af1b950303f
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.4
1
+ 0.3.5
@@ -2,14 +2,12 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: flydata 0.3.4 ruby lib
6
5
 
7
6
  Gem::Specification.new do |s|
8
7
  s.name = "flydata"
9
- s.version = "0.3.4"
8
+ s.version = "0.3.5"
10
9
 
11
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
- s.require_paths = ["lib"]
13
11
  s.authors = ["Koichi Fujikawa", "Masashi Miyazaki", "Matthew Luu", "Mak Inada", "Sriram NS"]
14
12
  s.date = "2015-02-28"
15
13
  s.description = "FlyData Agent"
@@ -152,7 +150,8 @@ Gem::Specification.new do |s|
152
150
  ]
153
151
  s.homepage = "http://flydata.com/"
154
152
  s.licenses = ["All right reserved."]
155
- s.rubygems_version = "2.2.2"
153
+ s.require_paths = ["lib"]
154
+ s.rubygems_version = "2.0.14"
156
155
  s.summary = "FlyData Agent"
157
156
 
158
157
  if s.respond_to? :specification_version then
@@ -7,12 +7,13 @@ module Flydata
7
7
  super
8
8
  end
9
9
 
10
- def buffer_stat(data_entry_id, mode = nil)
11
- @client.get("/#{@model_name.pluralize}/#{data_entry_id}/buffer_stat/#{mode}")
10
+ def buffer_stat(data_entry_id, params = {})
11
+ tables = params[:tables] ? params[:tables].join(',') : ''
12
+ @client.get("/#{@model_name.pluralize}/#{data_entry_id}/buffer_stat/#{params[:mode]}?tables=#{tables}")
12
13
  end
13
-
14
+
14
15
  def cleanup_sync(data_entry_id, tables)
15
- @client.post("/#{@model_name.pluralize}/#{data_entry_id}/cleanup_sync", nil, {tables: tables.join(',')})
16
+ @client.post("/#{@model_name.pluralize}/#{data_entry_id}/cleanup_sync", nil, {tables: tables.join(',')})
16
17
  end
17
18
  end
18
19
  end
@@ -63,7 +63,7 @@ module Flydata
63
63
  if json_response.class == Hash and json_response["success"] == false
64
64
  err_msg = json_response['errors'] ? json_response['errors'].to_s : "Unkown error."
65
65
  raise err_msg
66
- elsif json_response.class == Hash and json_response["auth_token"]
66
+ elsif json_response.class == Hash and json_response["auth_token"]
67
67
  @credentials.write_credentials(json_response["auth_token"])
68
68
  end
69
69
  json_response
@@ -73,10 +73,11 @@ module Flydata
73
73
  parameters = {}
74
74
  parameters.merge(headers) if headers
75
75
  end
76
- def generate_auth_url(path)
76
+ def generate_auth_url(path)
77
77
  token = @credentials.token
78
78
  token = '' unless token
79
- "#{@flydata_api_host}#{path}?auth_token=#{token}"
79
+ c = (path && path.include?('?')) ? '&' : '?'
80
+ "#{@flydata_api_host}#{path}#{c}auth_token=#{token}"
80
81
  end
81
82
  end
82
83
  end
@@ -24,11 +24,13 @@ module Flydata
24
24
  STATUS_WAITING = 'WAITING'
25
25
  STATUS_COMPLETE = 'COMPLETE'
26
26
 
27
- attr_reader :initial_sync, :new_tables, :ddl_tables
27
+ attr_reader :full_initial_sync, :full_tables, :new_tables, :ddl_tables, :input_tables
28
28
 
29
29
  class SyncDataEntryError < StandardError
30
30
  end
31
31
 
32
+ # Command: flydata sync
33
+ # - Arguments
32
34
  def self.slop
33
35
  Slop.new do
34
36
  on 'c', 'skip-cleanup', 'Skip server cleanup'
@@ -42,7 +44,10 @@ module Flydata
42
44
  end
43
45
  end
44
46
 
47
+ # Command: flydata sync
48
+ # - Entry method
45
49
  def run(*tables)
50
+ # Process check
46
51
  sender = Flydata::Command::Sender.new
47
52
  if (sender.process_exist?)
48
53
  if tables.empty?
@@ -55,76 +60,40 @@ module Flydata
55
60
  exit 1
56
61
  end
57
62
 
58
- handle_mysql_sync(tables)
63
+ # Setup instance variables
64
+ set_current_tables(tables)
59
65
 
66
+ # Start initial sync with check
67
+ handle_mysql_sync
68
+
69
+ # Start continuous sync by starting fluentd process
60
70
  unless opts.no_flydata_start?
61
71
  log_info_stdout("Starting FlyData Agent...")
62
72
  Flydata::Command::Sender.new.start(quiet: true)
63
73
  log_info_stdout(" -> Done")
64
74
  end
65
75
 
76
+ # Show message
66
77
  dashboard_url = "#{flydata.flydata_api_host}/dashboard"
67
78
  redshift_console_url = "#{flydata.flydata_api_host}/redshift_clusters/query/new"
68
79
  last_message = ALL_DONE_MESSAGE_TEMPLATE % [redshift_console_url, dashboard_url]
69
80
  log_info_stdout(last_message)
70
81
  end
71
82
 
83
+ # Public method
84
+ # - Called from Sender#start/restart
72
85
  def try_mysql_sync
73
- handle_mysql_sync()
86
+ # Setup instance variables
87
+ set_current_tables
88
+
89
+ # Start initial sync
90
+ handle_mysql_sync
74
91
  rescue SyncDataEntryError
75
92
  return
76
93
  end
77
94
 
78
- def handle_mysql_sync(tables=nil)
79
- de = retrieve_sync_data_entry
80
-
81
- set_current_tables(de)
82
- verify_input_tables(tables, de['mysql_data_entry_preference']['tables'])
83
-
84
- unless @new_tables.empty?
85
- say("We've noticed that these tables have not been synced yet: #{@new_tables.join(", ")}")
86
- unless @ddl_tables.empty?
87
- say(" WARNING: We've noticed that at least one of these tables have not had their DDL generated yet.")
88
- say(" We recommend you run our 'flydata sync:generate_table_ddl > create_table.sql'")
89
- say(" to generate SQL to run on Redshift to create the correct tables")
90
- say(" Without running this sql on your Redshift cluster, there may be issues with your data")
91
- end
92
- if ask_yes_no("Do you want to run initial sync on all of these tables now?")
93
- tables = @initial_sync ? [] : @new_tables
94
- initial_sync(tables, de)
95
- else
96
- #If generate_table_ddl has not been run for these tables, warn user
97
- unless @ddl_tables.empty?
98
- say(" You can generate DDL SQL for your new tables by running this command")
99
- say(" $> flydata sync:generate_table_ddl > create_table.sql")
100
- end
101
- puts "Without syncing these tables, we cannot start the flydata process"
102
- raise "Please try again"
103
- end
104
- end
105
- end
106
- def initial_sync(tables, de=nil)
107
- de = retrieve_sync_data_entry unless de
108
- de = load_sync_info(de,tables)
109
- validate_initial_sync_status(de, tables)
110
- begin
111
- flush_buffer_and_stop unless @initial_sync
112
- sync_mysql_to_redshift(de)
113
- rescue ServerDataProcessingTimeout => e
114
- ee = ServerDataProcessingTimeout.new("Delayed Data Processing")
115
- ee.description = <<EOS
116
- Data processing is taking more than expected. Please contact support@flydata.com to check the system status.
117
- Once checked, you can resume sync with the following command.
118
-
119
- flydata start
120
-
121
- EOS
122
- ee.set_backtrace e.backtrace
123
- raise ee
124
- end
125
- complete
126
- end
127
-
95
+ # Command: flydata sync:flush
96
+ # - Arguments
128
97
  def self.slop_flush
129
98
  Slop.new do
130
99
  on 'f', 'skip-flush', 'Skip server flush'
@@ -132,9 +101,11 @@ EOS
132
101
  end
133
102
  end
134
103
 
135
- def flush
104
+ # Command: flydata sync:flush
105
+ # - Entry method
106
+ def flush(*tables)
136
107
  begin
137
- flush_buffer_and_stop
108
+ flush_buffer_and_stop(tables)
138
109
  rescue ServerDataProcessingTimeout => e
139
110
  ee = ServerDataProcessingTimeout.new("Delayed Data Processing")
140
111
  ee.description = <<EOS
@@ -147,6 +118,8 @@ EOS
147
118
  log_info_stdout("Buffers have been flushed and the sender process has been stopped.")
148
119
  end
149
120
 
121
+ # Command: flydata sync:reset
122
+ # - Arguments
150
123
  def self.slop_reset
151
124
  Slop.new do
152
125
  on 'c', 'client', 'Resets client only.'
@@ -154,19 +127,22 @@ EOS
154
127
  end
155
128
  end
156
129
 
130
+ # Command: flydata sync:reset
131
+ # - Entry method
157
132
  def reset(*tables)
133
+ # Flush client buffer
158
134
  msg = tables.empty? ? '' : " for these tables : #{tables.join(" ")}"
159
135
  return unless ask_yes_no("This resets the current sync#{msg}. Are you sure?")
160
136
  sender = Flydata::Command::Sender.new
161
137
  sender.flush_client_buffer # TODO We should rather delete buffer files
162
138
  sender.stop
163
139
 
164
- de = retrieve_sync_data_entry
165
- all_tables = de['mysql_data_entry_preference']['tables']
166
- verify_input_tables(tables, all_tables)
140
+ # Set instance variables
141
+ de = data_entry
142
+ set_current_tables(tables)
167
143
 
168
144
  begin
169
- wait_for_server_buffer(SERVER_DATA_PROCESSING_TIMEOUT)
145
+ wait_for_server_buffer(timeout: SERVER_DATA_PROCESSING_TIMEOUT, tables: target_tables_for_api)
170
146
  rescue ServerDataProcessingTimeout => e
171
147
  ee = ServerDataProcessingTimeout.new("Delayed Data Processing")
172
148
  ee.description = <<EOS
@@ -179,20 +155,24 @@ EOS
179
155
  ee.set_backtrace e.backtrace
180
156
  raise ee
181
157
  end
182
- cleanup_sync_server(de, tables) unless opts.client?
183
- sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
158
+
159
+ # Cleanup tables on server
160
+ cleanup_sync_server(de, @input_tables) unless opts.client?
161
+
162
+ # Delete local files
163
+ sync_fm = create_sync_file_manager(de)
184
164
  delete_files = [
185
165
  sync_fm.dump_file_path,
186
166
  sync_fm.dump_pos_path,
187
167
  sync_fm.mysql_table_marshal_dump_path,
188
168
  sync_fm.sync_info_file,
189
- sync_fm.table_position_file_paths(*tables),
190
- sync_fm.table_binlog_pos_paths(*tables),
191
- sync_fm.table_binlog_pos_init_paths(*tables),
192
- sync_fm.table_rev_file_paths(*tables),
193
- sync_fm.table_ddl_file_paths(*tables)
169
+ sync_fm.table_position_file_paths(*@input_tables),
170
+ sync_fm.table_binlog_pos_paths(*@input_tables),
171
+ sync_fm.table_binlog_pos_init_paths(*@input_tables),
172
+ sync_fm.table_rev_file_paths(*@input_tables),
173
+ sync_fm.table_ddl_file_paths(*@input_tables)
194
174
  ]
195
- delete_files << sync_fm.binlog_path if tables.empty? or all_tables.empty?
175
+ delete_files << sync_fm.binlog_path if @input_tables.empty? or @full_tables.empty?
196
176
  delete_files.flatten.each do |path|
197
177
  FileUtils.rm(path) if File.exists?(path)
198
178
  end
@@ -200,75 +180,11 @@ EOS
200
180
  log_info_stdout("Reset completed successfully.")
201
181
  end
202
182
 
203
- def wait_for_server_buffer(timeout = 0)
204
- start_time = Time.now
205
- log_info_stdout("Waiting for the server buffer to get empty.")
206
- prev_message =nil
207
- while (status = check) && (status['state'] == 'processing')
208
- prev_message = status['message']
209
- if timeout > 0 && Time.now - start_time > timeout
210
- raise ServerDataProcessingTimeout.new
211
- end
212
- print_progress(status)
213
- sleep 10
214
- end
215
- end
216
-
217
- def wait_for_server_data_processing(timeout = 0)
218
- state = :PROCESS
219
- start_time = Time.now
220
- log_info_stdout("Uploading data to Redshift...")
221
- sleep 10
222
- status = nil
223
- prev_message =nil
224
- while (status = check)
225
- if state == :PROCESS && status['state'] == 'uploading'
226
- log_info_stdout(" -> Done")
227
- state = :UPLOAD
228
- log_info_stdout("Finishing data upload...")
229
- end
230
-
231
- #TODO This is based on a temporary option
232
- if state == :UPLOAD && opts.ff?
233
- log_info_stdout("Skip checking for pending uploads")
234
- break
235
- end
236
-
237
- if status['message'] != prev_message
238
- # making some progress. Reset timer
239
- start_time = Time.now
240
- end
241
- prev_message = status['message']
242
- if timeout > 0 && Time.now - start_time > timeout
243
- raise ServerDataProcessingTimeout.new
244
- end
245
- print_progress(status)
246
- sleep 10
247
- end
248
- if (state == :PROCESS)
249
- # :UPLOAD state was skipped due to no data
250
- log_info_stdout(" -> Done")
251
- log_info_stdout("Finishing data upload...")
252
- end
253
- log_info_stdout(" -> Done")
254
- end
255
-
256
- def check
257
- de = retrieve_sync_data_entry
258
- retry_on(RestClient::Exception) do
259
- status = do_check(de)
260
- if status['complete']
261
- nil
262
- else
263
- status
264
- end
265
- end
266
- end
267
-
183
+ # Depricated Command: flydata sync:skip
268
184
  # skip initial sync
269
185
  def skip
270
- de = retrieve_sync_data_entry
271
- sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
186
+ de = data_entry
187
+ sync_fm = create_sync_file_manager(de)
272
188
  binlog_path = sync_fm.binlog_path
273
189
  sync_fm.close
274
190
  `touch #{binlog_path}`
@@ -277,6 +193,8 @@ EOS
277
193
  log_info_stdout("Run 'flydata start' to start continuous sync.")
278
194
  end
279
195
 
196
+ # Command: flydata sync:generate_table_ddl
197
+ # - Arguments
280
198
  def self.slop_generate_table_ddl
281
199
  Slop.new do
282
200
  on 'c', 'ctl-only', 'Only generate FlyData Control definitions'
@@ -286,166 +204,77 @@ EOS
286
204
  end
287
205
  end
288
206
 
207
+ # Command: flydata sync:generate_table_ddl
208
+ # - Entry method
289
209
  def generate_table_ddl(*tables)
290
- de = retrieve_sync_data_entry
210
+ # Compatibility check
211
+ de = data_entry
291
212
  dp = flydata.data_port.get
292
213
  Flydata::MysqlCompatibilityCheck.new(dp, de['mysql_data_entry_preference']).check
293
- do_generate_table_ddl(de, tables)
294
- end
295
214
 
296
- private
215
+ # Set instance variables
216
+ set_current_tables(tables)
297
217
 
298
- def verify_input_tables(input_tables, all_tables)
299
- return unless input_tables
300
- inval_table = []
301
- input_tables.each do |tab|
302
- inval_table << tab unless all_tables.include?(tab)
303
- end
304
- raise "These tables are not registered tables: #{inval_table.join(", ")}" unless inval_table.empty?
218
+ do_generate_table_ddl(de)
305
219
  end
306
220
 
307
- def set_current_tables(de)
308
- sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
309
- full_tables = de['mysql_data_entry_preference']['tables']
310
-
311
- @new_tables = sync_fm.get_new_table_list(full_tables, "pos")
312
- @ddl_tables = sync_fm.get_new_table_list(full_tables, "generated_ddl")
313
-
314
- @initial_sync = (@new_tables == full_tables)
315
- sync_fm.close
316
- end
221
+ private
317
222
 
318
- def validate_initial_sync_status(de, tables)
319
- sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
320
- dump_pos_info = sync_fm.load_dump_pos
321
- fp = sync_fm.dump_file_path
322
- sync_fm.close
223
+ # Initial sync
323
224
 
324
- # status is parsing but dumpfile doesn't exist due to streaming -> raise error
325
- if dump_pos_info[:status] == STATUS_PARSING && !File.exists?(fp)
326
- raise "FlyData Sync was interrupted with invalid state. Run 'flydata sync:reset#{tables.empty? ? '' : ' ' + tables.join(' ')}' first."
327
- end
328
- end
225
+ def handle_mysql_sync
226
+ de = data_entry
329
227
 
330
- def retrieve_sync_data_entry
331
- de = retrieve_data_entries.first unless de
332
- raise "There are no data entries." unless de
333
- case de['type']
334
- when 'RedshiftMysqlDataEntry'
335
- mp = de['mysql_data_entry_preference']
336
- if mp['tables_append_only']
337
- mp['tables'] = (mp['tables'].split(",") + mp['tables_append_only'].split(",")).uniq
338
- else
339
- mp['tables'] = mp['tables'].split(",").uniq
228
+ unless @new_tables.empty?
229
+ say("We've noticed that these tables have not been synced yet: #{@new_tables.join(", ")}")
230
+ unless @ddl_tables.empty?
231
+ say(" WARNING: We've noticed that at least one of these tables have not had their DDL generated yet.")
232
+ say(" We recommend you run our 'flydata sync:generate_table_ddl > create_table.sql'")
233
+ say(" to generate SQL to run on Redshift to create the correct tables")
234
+ say(" Without running this sql on your Redshift cluster, there may be issues with your data")
340
235
  end
341
- else
342
- raise SyncDataEntryError, "No supported data entry. Only mysql-redshift sync is supported."
343
- end
344
- de
345
- end
346
-
347
- def cleanup_sync_server(de, tables = [])
348
- print "Cleaning the server."
349
- log_info("Cleaning the server.")
350
- worker = Thread.new do
351
- begin
352
- flydata.data_entry.cleanup_sync(de['id'], tables)
353
- rescue RestClient::RequestTimeout, RestClient::GatewayTimeout
354
- # server is taking time to cleanup. Try again
355
- retry
236
+ if ask_yes_no("Do you want to run initial sync on all of these tables now?")
237
+ initial_sync(de)
238
+ else
239
+ #If generate_table_ddl has not been run for these tables, warn user
240
+ unless @ddl_tables.empty?
241
+ say(" You can generate DDL SQL for your new tables by running this command")
242
+ say(" $> flydata sync:generate_table_ddl > create_table.sql")
243
+ end
244
+ puts "Without syncing these tables, we cannot start the flydata process"
245
+ raise "Please try again"
356
246
  end
357
247
  end
358
- until worker.join(5)
359
- print "."
360
- end
361
- puts
362
- log_info_stdout("Done.")
363
- end
364
-
365
- def do_check(de)
366
- flydata.data_entry.buffer_stat(de['id'], env_mode)
367
248
  end
368
249
 
369
- def print_progress(buffer_stat)
370
- message = buffer_stat['message']
371
- log_info_stdout(message) unless message.nil? || message.empty?
372
- end
373
-
374
- def do_generate_table_ddl(de, tables=[])
375
- if `which mysqldump`.empty?
376
- raise "mysqldump is not installed. mysqldump is required to run the command"
377
- end
378
-
379
- error_list = []
380
-
381
- schema_name = (de['schema_name'] || nil)
382
-
383
- mp = de['mysql_data_entry_preference']
384
-
385
- set_current_tables(de)
386
-
387
- tables = opts.all_tables? ? mp['tables'] : (tables.empty? ? @new_tables : tables)
388
- raise "There are no valid unsynced tables, if you want to just get ddl for all tables, please run \`flydata sync:generate_table_ddl --all-tables\`" if tables.empty?
389
-
390
- %w(host username database).each do |conf_name|
391
- raise "MySQL `#{conf_name}` is neither defined in the data entry nor the local config file" if mp[conf_name].to_s.empty?
392
- end
393
- if tables.empty?
394
- raise "`tables` (or `tables_append_only`) is neither defined in the data entry nor the local config file"
395
- end
250
+ def initial_sync(de)
251
+ # Load sync information from file
252
+ load_sync_info(de)
253
+ validate_initial_sync_status
254
+ begin
255
+ flush_buffer_and_stop(target_tables_for_api) unless @full_initial_sync
256
+ sync_mysql_to_redshift(de)
257
+ rescue ServerDataProcessingTimeout => e
258
+ ee = ServerDataProcessingTimeout.new("Delayed Data Processing")
259
+ ee.description = <<EOS
260
+ Data processing is taking more than expected. Please contact support@flydata.com to check the system status.
261
+ Once checked, you can resume sync with the following command.
396
262
 
397
- command = Util::MysqlUtil.generate_mysql_ddl_dump_cmd(mp.merge(tables: tables))
263
+ flydata start
398
264
 
399
- Open3.popen3(command) do |stdin, stdout, stderr|
400
- stdin.close
401
- stdout.set_encoding("utf-8") # mysqldump output must be in UTF-8
402
- create_flydata_ctl_table = @initial_sync
403
- while !stdout.eof?
404
- begin
405
- mysql_tabledef = FlydataCore::TableDef::MysqlTableDef.create(stdout, skip_primary_key_check: opts.skip_primary_key_check?)
406
- if mysql_tabledef.nil?
407
- # stream had no more create table definition
408
- break
409
- end
410
- flydata_tabledef = mysql_tabledef.to_flydata_tabledef
411
- puts FlydataCore::TableDef::RedshiftTableDef.from_flydata_tabledef(flydata_tabledef, flydata_ctl_table: create_flydata_ctl_table, schema_name: schema_name, ctl_only: opts.ctl_only?)
412
- rescue FlydataCore::TableDefError=> e
413
- error_list << e.err_hash
414
- next
415
- end
416
- create_flydata_ctl_table = false
417
- end
418
- errors = ""
419
- while !stderr.eof?
420
- line = stderr.gets.gsub('mysqldump: ', '')
421
- errors << line unless /Warning: Using a password on the command line interface can be insecure./ === line
422
- end
423
- raise errors unless errors.empty?
424
- end
425
- unless error_list.empty?
426
- log_error_stderr("We have noticed the following error(s):")
427
- group_error = error_list.group_by {|d| d[:error]}
428
- group_error.each_key do |a|
429
- log_error_stderr("The following table(s) have #{a}:")
430
- group_error[a].each do |hash|
431
- log_error_stderr(" - #{hash[:table]}") if hash[:table]
432
- end
433
- end
434
- log_error_stderr("Please fix the above error(s) to try to sync those table(s) or contact us for further help.")
265
+ EOS
266
+ ee.set_backtrace e.backtrace
267
+ raise ee
435
268
  end
436
- tables_without_error = tables - error_list.inject([]){|arr, err| arr << err[:table] if err[:table]}
437
-
438
- sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
439
- sync_fm.mark_generated_tables(tables_without_error)
440
- sync_fm.close
269
+ complete(de)
441
270
  end
442
271
 
443
272
  def sync_mysql_to_redshift(de)
444
273
  dp = flydata.data_port.get
445
- sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
274
+ sync_fm = create_sync_file_manager(de)
446
275
 
447
276
  # Check client condition
448
- if File.exists?(sync_fm.binlog_path) and @initial_sync
277
+ if File.exists?(sync_fm.binlog_path) and @full_initial_sync
449
278
  raise "Already synchronized. If you want to do initial sync, run 'flydata sync:reset'"
450
279
  end
451
280
 
@@ -454,7 +283,7 @@ EOS
454
283
  Flydata::Command::Conf.new.copy_templates
455
284
  end
456
285
  generate_mysqldump(de, sync_fm, !opts.dump_stream?) do |mysqldump_io, db_bytesize|
457
- sync_fm.save_sync_info(@initial_sync, de['mysql_data_entry_preference']['tables'])
286
+ sync_fm.save_sync_info(@full_initial_sync, de['mysql_data_entry_preference']['tables'])
458
287
  parse_mysqldump_and_send(mysqldump_io, dp, de, sync_fm, db_bytesize)
459
288
  end
460
289
  wait_for_mysqldump_processed(dp, de, sync_fm)
@@ -481,7 +310,7 @@ EOS
481
310
  return call_block_or_return_io(fp, &block)
482
311
  end
483
312
 
484
- tables = de['mysql_data_entry_preference']['tables']
313
+ tables = target_tables
485
314
  tables ||= '<all tables>'
486
315
  data_servers = de['mysql_data_entry_preference']['data_servers'] ? "\n data servers: #{de['mysql_data_entry_preference']['data_servers']}" : ""
487
316
 
@@ -507,7 +336,7 @@ EOM
507
336
  Flydata::MysqlCompatibilityCheck.new(dp, de['mysql_data_entry_preference'], dump_dir: fp, backup_dir: sync_fm.backup_dir).check
508
337
  log_info_stdout("Checking database size...")
509
338
 
510
- db_bytesize = Flydata::Parser::Mysql::DatabaseSizeCheck.new(de['mysql_data_entry_preference']).get_db_bytesize
339
+ db_bytesize = Flydata::Parser::Mysql::DatabaseSizeCheck.new(de['mysql_data_entry_preference'].merge('tables' => target_tables)).get_db_bytesize
511
340
  log_info_stdout(" -> #{as_size(db_bytesize)} (#{db_bytesize} byte)")
512
341
 
513
342
  if file_dump
@@ -523,17 +352,16 @@ EOM
523
352
  end
524
353
  end
525
354
 
526
-
527
355
  log_info_stdout("Exporting data from database.")
528
356
  log_info_stdout("This process can take hours depending on data size and load on your database. Please be patient...")
529
357
  if file_dump
530
358
  Flydata::Parser::Mysql::MysqlDumpGeneratorNoMasterData.
531
- new(de['mysql_data_entry_preference']).dump(fp)
359
+ new(de['mysql_data_entry_preference'].merge('tables' => target_tables)).dump(fp)
532
360
  log_info_stdout(" -> Done")
533
361
  call_block_or_return_io(fp, &block)
534
362
  else
535
363
  Flydata::Parser::Mysql::MysqlDumpGeneratorNoMasterData.
536
- new(de['mysql_data_entry_preference']).dump {|io| block.call(io, db_bytesize)}
364
+ new(de['mysql_data_entry_preference'].merge('tables' => target_tables)).dump {|io| block.call(io, db_bytesize)}
537
365
  end
538
366
  else
539
367
  exit 1
@@ -603,7 +431,7 @@ EOM
603
431
  log_info_stdout("Resuming... Last processed table: #{option[:table_name]}")
604
432
  else
605
433
  #If its a new sync, ensure server side resources are clean
606
- cleanup_sync_server(de, de['mysql_data_entry_preference']['tables']) unless opts.skip_cleanup?
434
+ cleanup_sync_server(de, target_tables_for_api) unless opts.skip_cleanup?
607
435
  end
608
436
  log_info_stdout("Sending data to FlyData Server...")
609
437
 
@@ -671,38 +499,85 @@ EOM
671
499
  return unless dump_pos_info[:status] == STATUS_WAITING
672
500
  binlog_pos = dump_pos_info[:binlog_pos]
673
501
 
674
- wait_for_server_data_processing(SERVER_DATA_PROCESSING_TIMEOUT)
675
- tables = de['mysql_data_entry_preference']['tables']
676
- sync_fm.save_table_binlog_pos(tables, binlog_pos)
502
+ wait_for_server_data_processing(
503
+ timeout: SERVER_DATA_PROCESSING_TIMEOUT, tables: target_tables_for_api)
504
+ sync_fm.save_table_binlog_pos(target_tables, binlog_pos)
677
505
  sync_fm.save_dump_pos(STATUS_COMPLETE, '', -1, binlog_pos)
678
506
  end
679
507
 
680
- ALL_DONE_MESSAGE_TEMPLATE = <<-EOM
508
+ # option: timeout, tables
509
+ def wait_for_server_data_processing(option = {})
510
+ timeout = option[:timeout] || 0
511
+ tables = option[:tables] || []
681
512
 
682
- Congratulations! FlyData has started synchronizing your database tables.
513
+ state = :PROCESS
514
+ start_time = Time.now
515
+ log_info_stdout("Uploading data to Redshift...")
516
+ sleep 10
517
+ status = nil
518
+ prev_message =nil
519
+ while (status = check_server_status(tables))
520
+ if state == :PROCESS && status['state'] == 'uploading'
521
+ log_info_stdout(" -> Done")
522
+ state = :UPLOAD
523
+ log_info_stdout("Finishing data upload...")
524
+ end
683
525
 
684
- What's next?
526
+ #TODO This is based on a temporary option
527
+ if state == :UPLOAD && opts.ff?
528
+ log_info_stdout("Skip checking for pending uploads")
529
+ break
530
+ end
685
531
 
686
- - Check data on Redshift (%s)
687
- - Check your FlyData usage on the FlyData Dashboard (%s)
688
- - To manage the FlyData Agent, use the 'flydata' command (type 'flydata' for help)
689
- - If you encounter an issue,
690
- please check our documentation (https://www.flydata.com/docs/) or
691
- contact our customer support team (support@flydata.com)
532
+ if status['message'] != prev_message
533
+ # making some progress. Reset timer
534
+ start_time = Time.now
535
+ end
536
+ prev_message = status['message']
537
+ if timeout > 0 && Time.now - start_time > timeout
538
+ raise ServerDataProcessingTimeout.new
539
+ end
540
+ print_progress(status)
541
+ sleep 10
542
+ end
543
+ if (state == :PROCESS)
544
+ # :UPLOAD state was skipped due to no data
545
+ log_info_stdout(" -> Done")
546
+ log_info_stdout("Finishing data upload...")
547
+ end
548
+ log_info_stdout(" -> Done")
549
+ end
692
550
 
693
- Thank you for using FlyData!
694
- EOM
551
+ def check_server_status(tables = [])
552
+ de = data_entry
553
+ retry_on(RestClient::Exception) do
554
+ status = do_check_server_status(de, tables)
555
+ if status['complete']
556
+ nil
557
+ else
558
+ status
559
+ end
560
+ end
561
+ end
695
562
 
696
- def complete
697
- de = load_sync_info(retrieve_sync_data_entry)
698
- sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
563
+ def do_check_server_status(de, tables = [])
564
+ flydata.data_entry.buffer_stat(de['id'], mode: env_mode, tables: tables)
565
+ end
566
+
567
+ def print_progress(buffer_stat)
568
+ message = buffer_stat['message']
569
+ log_info_stdout(message) unless message.nil? || message.empty?
570
+ end
571
+
572
+ def complete(de)
573
+ sync_fm = create_sync_file_manager(de)
699
574
  info = sync_fm.load_dump_pos
700
575
  if info[:status] == STATUS_COMPLETE
701
- if @initial_sync
576
+ if @full_initial_sync
702
577
  sync_fm.save_binlog(info[:binlog_pos])
703
578
  end
704
- sync_fm.install_table_binlog_files(de['mysql_data_entry_preference']['tables'])
705
- sync_fm.reset_table_position_files(de['mysql_data_entry_preference']['tables'])
579
+ sync_fm.install_table_binlog_files(target_tables)
580
+ sync_fm.reset_table_position_files(target_tables)
706
581
  sync_fm.delete_dump_file
707
582
  sync_fm.backup_dump_dir
708
583
  else
@@ -719,29 +594,238 @@ Thank you for using FlyData!
719
594
  h.to_json
720
595
  end
721
596
 
722
- def load_sync_info(de ,additional_tables=[])
723
- sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
597
+ # Sync reset
598
+
599
+ def wait_for_server_buffer(option = {})
600
+ timeout = option[:timeout] || 0
601
+ tables = option[:tables] || []
602
+
603
+ start_time = Time.now
604
+ log_info_stdout("Waiting for the server buffer to get empty.")
605
+ prev_message =nil
606
+ while (status = check_server_status(tables)) && (status['state'] == 'processing')
607
+ prev_message = status['message']
608
+ if timeout > 0 && Time.now - start_time > timeout
609
+ raise ServerDataProcessingTimeout.new
610
+ end
611
+ print_progress(status)
612
+ sleep 10
613
+ end
614
+ end
615
+
616
+ def cleanup_sync_server(de, tables = [])
617
+ print "Cleaning the server."
618
+ log_info("Cleaning the server.")
619
+ worker = Thread.new do
620
+ begin
621
+ flydata.data_entry.cleanup_sync(de['id'], tables)
622
+ rescue RestClient::RequestTimeout, RestClient::GatewayTimeout
623
+ # server is taking time to cleanup. Try again
624
+ retry
625
+ end
626
+ end
627
+ until worker.join(5)
628
+ print "."
629
+ end
630
+ puts
631
+ log_info_stdout("Done.")
632
+ end
633
+
634
+ # Generate table ddl
635
+
636
+ def do_generate_table_ddl(de)
637
+ if `which mysqldump`.empty?
638
+ raise "mysqldump is not installed. mysqldump is required to run the command"
639
+ end
640
+
641
+ error_list = []
642
+ schema_name = (de['schema_name'] || nil)
724
643
  mp = de['mysql_data_entry_preference']
725
- unless (rs = sync_fm.load_sync_info).nil?
726
- @initial_sync = rs[:initial_sync]
727
- mp['tables'] = rs[:tables]
728
- else
729
- mp['tables'] = additional_tables unless @initial_sync
644
+
645
+ tables = opts.all_tables? ? @full_tables : (@input_tables.empty? ? @new_tables : @input_tables)
646
+ raise "There are no valid unsynced tables, if you want to just get ddl for all tables, please run \`flydata sync:generate_table_ddl --all-tables\`" if tables.empty?
647
+
648
+ %w(host username database).each do |conf_name|
649
+ raise "MySQL `#{conf_name}` is neither defined in the data entry nor the local config file" if mp[conf_name].to_s.empty?
730
650
  end
651
+ if tables.empty?
652
+ raise "`tables` (or `tables_append_only`) is neither defined in the data entry nor the local config file"
653
+ end
654
+
655
+ command = Util::MysqlUtil.generate_mysql_ddl_dump_cmd(mp.merge(tables: tables))
656
+
657
+ Open3.popen3(command) do |stdin, stdout, stderr|
658
+ stdin.close
659
+ stdout.set_encoding("utf-8") # mysqldump output must be in UTF-8
660
+ create_flydata_ctl_table = @full_initial_sync
661
+ while !stdout.eof?
662
+ begin
663
+ mysql_tabledef = FlydataCore::TableDef::MysqlTableDef.create(stdout, skip_primary_key_check: opts.skip_primary_key_check?)
664
+ if mysql_tabledef.nil?
665
+ # stream had no more create table definition
666
+ break
667
+ end
668
+ flydata_tabledef = mysql_tabledef.to_flydata_tabledef
669
+ puts FlydataCore::TableDef::RedshiftTableDef.from_flydata_tabledef(flydata_tabledef, flydata_ctl_table: create_flydata_ctl_table, schema_name: schema_name, ctl_only: opts.ctl_only?)
670
+ rescue FlydataCore::TableDefError=> e
671
+ error_list << e.err_hash
672
+ next
673
+ end
674
+ create_flydata_ctl_table = false
675
+ end
676
+ errors = ""
677
+ while !stderr.eof?
678
+ line = stderr.gets.gsub('mysqldump: ', '')
679
+ errors << line unless /Warning: Using a password on the command line interface can be insecure./ === line
680
+ end
681
+ raise errors unless errors.empty?
682
+ end
683
+ unless error_list.empty?
684
+ log_error_stderr("We have noticed the following error(s):")
685
+ group_error = error_list.group_by {|d| d[:error]}
686
+ group_error.each_key do |a|
687
+ log_error_stderr("The following table(s) have #{a}:")
688
+ group_error[a].each do |hash|
689
+ log_error_stderr(" - #{hash[:table]}") if hash[:table]
690
+ end
691
+ end
692
+ log_error_stderr("Please fix the above error(s) to try to sync those table(s) or contact us for further help.")
693
+ end
694
+ tables_without_error = tables - error_list.inject([]){|arr, err| arr << err[:table] if err[:table]}
695
+
696
+ sync_fm = create_sync_file_manager(de)
697
+ sync_fm.mark_generated_tables(tables_without_error)
731
698
  sync_fm.close
732
- de
733
699
  end
734
700
 
735
- def flush_buffer_and_stop
701
+
702
+ ALL_DONE_MESSAGE_TEMPLATE = <<-EOM
703
+
704
+ Congratulations! FlyData has started synchronizing your database tables.
705
+
706
+ What's next?
707
+
708
+ - Check data on Redshift (%s)
709
+ - Check your FlyData usage on the FlyData Dashboard (%s)
710
+ - To manage the FlyData Agent, use the 'flydata' command (type 'flydata' for help)
711
+ - If you encounter an issue,
712
+ please check our documentation (https://www.flydata.com/docs/) or
713
+ contact our customer support team (support@flydata.com)
714
+
715
+ Thank you for using FlyData!
716
+ EOM
717
+
718
+ # Sync flush
719
+
720
+ def flush_buffer_and_stop(tables = [])
736
721
  sender = Flydata::Command::Sender.new
737
722
  sender.flush_client_buffer
738
723
  if opts.skip_flush?
739
724
  log_info_stdout("Skip waiting for server data processing.")
740
725
  else
741
- wait_for_server_data_processing(SERVER_DATA_PROCESSING_TIMEOUT)
726
+ wait_for_server_data_processing(
727
+ timeout: SERVER_DATA_PROCESSING_TIMEOUT, tables: tables)
742
728
  end
743
729
  sender.stop(quiet: true)
744
730
  end
731
+
732
+ # Utility methods
733
+
734
+ def set_current_tables(input_tables = nil)
735
+ de = data_entry
736
+ sync_fm = create_sync_file_manager(de)
737
+ @input_tables = input_tables || []
738
+ @full_tables = de['mysql_data_entry_preference']['tables']
739
+
740
+ @new_tables = sync_fm.get_new_table_list(@full_tables, "pos")
741
+ @ddl_tables = sync_fm.get_new_table_list(@full_tables, "generated_ddl")
742
+
743
+ @full_initial_sync = (@new_tables == @full_tables)
744
+
745
+ sync_fm.close
746
+
747
+ verify_input_tables(@input_tables, @full_tables)
748
+ end
749
+
750
+ def validate_initial_sync_status
751
+ sync_fm = create_sync_file_manager
752
+ dump_pos_info = sync_fm.load_dump_pos
753
+ fp = sync_fm.dump_file_path
754
+ sync_fm.close
755
+
756
+ # status is parsing but dumpfile doesn't exist due to streaming -> raise error
757
+ if dump_pos_info[:status] == STATUS_PARSING && !File.exists?(fp)
758
+ raise "FlyData Sync was interrupted with invalid state. Run 'flydata sync:reset#{@input_tables.join(',')}' first."
759
+ end
760
+ end
761
+
762
+ def load_sync_info(de)
763
+ # for debug
764
+ raise "!AssertionError. set_current_tables needs to be called in advance" if @full_tables.nil?
765
+
766
+ sync_fm = create_sync_file_manager(de)
767
+ if (rs = sync_fm.load_sync_info)
768
+ @full_initial_sync = rs[:initial_sync]
769
+ @input_tables = rs[:tables]
770
+ end
771
+ sync_fm.close
772
+ de
773
+ end
774
+
775
+ def target_tables
776
+ if @full_initial_sync
777
+ @full_tables
778
+ elsif !@input_tables.empty?
779
+ @input_tables
780
+ else
781
+ @new_tables
782
+ end
783
+ end
784
+
785
+ # return empty array if full sync
786
+ def target_tables_for_api
787
+ if @full_initial_sync
788
+ []
789
+ elsif !@input_tables.empty?
790
+ @input_tables
791
+ else
792
+ @new_tables
793
+ end
794
+ end
795
+
796
+ def data_entry
797
+ @de ||= retrieve_sync_data_entry
798
+ end
799
+
800
+ def retrieve_sync_data_entry
801
+ de = retrieve_data_entries.first unless de
802
+ raise "There are no data entries." unless de
803
+ case de['type']
804
+ when 'RedshiftMysqlDataEntry'
805
+ mp = de['mysql_data_entry_preference']
806
+ if mp['tables_append_only']
807
+ mp['tables'] = (mp['tables'].split(",") + mp['tables_append_only'].split(",")).uniq
808
+ else
809
+ mp['tables'] = mp['tables'].split(",").uniq
810
+ end
811
+ else
812
+ raise SyncDataEntryError, "No supported data entry. Only mysql-redshift sync is supported."
813
+ end
814
+ de
815
+ end
816
+
817
+ def create_sync_file_manager(de = data_entry)
818
+ Flydata::FileUtil::SyncFileManager.new(de)
819
+ end
820
+
821
+ def verify_input_tables(input_tables, all_tables)
822
+ return unless input_tables
823
+ inval_table = []
824
+ input_tables.each do |tab|
825
+ inval_table << tab unless all_tables.include?(tab)
826
+ end
827
+ raise "These tables are not registered tables: #{inval_table.join(", ")}" unless inval_table.empty?
828
+ end
745
829
  end
746
830
  end
747
831
  end
@@ -18,7 +18,7 @@ module Flydata
18
18
  allow(Kernel).to receive(:sleep)
19
19
  allow_any_instance_of(Flydata::Api::DataPort).to receive(:get).and_return("Wibble")
20
20
  allow_any_instance_of(Flydata::AgentCompatibilityCheck).to receive(:check).and_return(true)
21
- Flydata::Command::Sync.any_instance.should_receive(:handle_mysql_sync).and_return("Wobble")
21
+ Flydata::Command::Sync.any_instance.should_receive(:try_mysql_sync).and_return("Wobble")
22
22
  end
23
23
 
24
24
  context "as daemon" do
@@ -65,6 +65,10 @@ module Flydata
65
65
  end
66
66
  end
67
67
  describe '#do_generate_table_ddl' do
68
+ before do
69
+ allow(subject).to receive(:data_entry).and_return(default_data_entry)
70
+ subject.send(:set_current_tables)
71
+ end
68
72
  shared_examples 'throws an error' do
69
73
  it "throws an error" do
70
74
  expect {
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: flydata
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.4
4
+ version: 0.3.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Koichi Fujikawa
@@ -592,7 +592,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
592
592
  version: '0'
593
593
  requirements: []
594
594
  rubyforge_project:
595
- rubygems_version: 2.2.2
595
+ rubygems_version: 2.0.14
596
596
  signing_key:
597
597
  specification_version: 4
598
598
  summary: FlyData Agent