flydata 0.3.4 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3eda267f32da9fd2ce6f6dd3ad583b2803d689ce
4
- data.tar.gz: 43876505915eee1ef232973a23de0d432afcb7e3
3
+ metadata.gz: 3aabc1598aa6770f9f4238bae95ef42b1577c0aa
4
+ data.tar.gz: 5ba322b53b34ffaf0986542a8071c103c4d2d881
5
5
  SHA512:
6
- metadata.gz: 927ee474b1b8567b25667d24e2bd876bbde521192566f59fd24c53c82ea1fe181fea48087183490674feaa283139f0379af481a07d6a6558720a719fe3ee39b4
7
- data.tar.gz: 17dcdec903a1a1c9bf137005b57495f272655c291103f377005ca8cf30208ecb7dba68ed0a564f1a2c11556764e0ddab545277c990439bc72e67fc2885226267
6
+ metadata.gz: a1b6059f259512e9d7f190f9bc37c51905979adab22310f4782e92b3ebbc7156673ce48259d7f7ba6a43ae2d40755b2836b59adffd3b36d035a8ceb0ff1b9c4b
7
+ data.tar.gz: 129df374f7bbba7ed5dc70eb23bd52b46ee683521cd391c7fa0c9440f9c56649d2761210be0eca31cb8d1c05c5ed55a9ba8aadacec84085d73460af1b950303f
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.4
1
+ 0.3.5
@@ -2,14 +2,12 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: flydata 0.3.4 ruby lib
6
5
 
7
6
  Gem::Specification.new do |s|
8
7
  s.name = "flydata"
9
- s.version = "0.3.4"
8
+ s.version = "0.3.5"
10
9
 
11
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
- s.require_paths = ["lib"]
13
11
  s.authors = ["Koichi Fujikawa", "Masashi Miyazaki", "Matthew Luu", "Mak Inada", "Sriram NS"]
14
12
  s.date = "2015-02-28"
15
13
  s.description = "FlyData Agent"
@@ -152,7 +150,8 @@ Gem::Specification.new do |s|
152
150
  ]
153
151
  s.homepage = "http://flydata.com/"
154
152
  s.licenses = ["All right reserved."]
155
- s.rubygems_version = "2.2.2"
153
+ s.require_paths = ["lib"]
154
+ s.rubygems_version = "2.0.14"
156
155
  s.summary = "FlyData Agent"
157
156
 
158
157
  if s.respond_to? :specification_version then
@@ -7,12 +7,13 @@ module Flydata
7
7
  super
8
8
  end
9
9
 
10
- def buffer_stat(data_entry_id, mode = nil)
11
- @client.get("/#{@model_name.pluralize}/#{data_entry_id}/buffer_stat/#{mode}")
10
+ def buffer_stat(data_entry_id, params = {})
11
+ tables = params[:tables] ? params[:tables].join(',') : ''
12
+ @client.get("/#{@model_name.pluralize}/#{data_entry_id}/buffer_stat/#{params[:mode]}?tables=#{tables}")
12
13
  end
13
-
14
+
14
15
  def cleanup_sync(data_entry_id, tables)
15
- @client.post("/#{@model_name.pluralize}/#{data_entry_id}/cleanup_sync", nil, {tables: tables.join(',')})
16
+ @client.post("/#{@model_name.pluralize}/#{data_entry_id}/cleanup_sync", nil, {tables: tables.join(',')})
16
17
  end
17
18
  end
18
19
  end
@@ -63,7 +63,7 @@ module Flydata
63
63
  if json_response.class == Hash and json_response["success"] == false
64
64
  err_msg = json_response['errors'] ? json_response['errors'].to_s : "Unkown error."
65
65
  raise err_msg
66
- elsif json_response.class == Hash and json_response["auth_token"]
66
+ elsif json_response.class == Hash and json_response["auth_token"]
67
67
  @credentials.write_credentials(json_response["auth_token"])
68
68
  end
69
69
  json_response
@@ -73,10 +73,11 @@ module Flydata
73
73
  parameters = {}
74
74
  parameters.merge(headers) if headers
75
75
  end
76
- def generate_auth_url(path)
76
+ def generate_auth_url(path)
77
77
  token = @credentials.token
78
78
  token = '' unless token
79
- "#{@flydata_api_host}#{path}?auth_token=#{token}"
79
+ c = (path && path.include?('?')) ? '&' : '?'
80
+ "#{@flydata_api_host}#{path}#{c}auth_token=#{token}"
80
81
  end
81
82
  end
82
83
  end
@@ -24,11 +24,13 @@ module Flydata
24
24
  STATUS_WAITING = 'WAITING'
25
25
  STATUS_COMPLETE = 'COMPLETE'
26
26
 
27
- attr_reader :initial_sync, :new_tables, :ddl_tables
27
+ attr_reader :full_initial_sync, :full_tables, :new_tables, :ddl_tables, :input_tables
28
28
 
29
29
  class SyncDataEntryError < StandardError
30
30
  end
31
31
 
32
+ # Command: flydata sync
33
+ # - Arguments
32
34
  def self.slop
33
35
  Slop.new do
34
36
  on 'c', 'skip-cleanup', 'Skip server cleanup'
@@ -42,7 +44,10 @@ module Flydata
42
44
  end
43
45
  end
44
46
 
47
+ # Command: flydata sync
48
+ # - Entry method
45
49
  def run(*tables)
50
+ # Process check
46
51
  sender = Flydata::Command::Sender.new
47
52
  if (sender.process_exist?)
48
53
  if tables.empty?
@@ -55,76 +60,40 @@ module Flydata
55
60
  exit 1
56
61
  end
57
62
 
58
- handle_mysql_sync(tables)
63
+ # Setup instance variables
64
+ set_current_tables(tables)
59
65
 
66
+ # Start initial sync with check
67
+ handle_mysql_sync
68
+
69
+ # Start continuous sync by starting fluentd process
60
70
  unless opts.no_flydata_start?
61
71
  log_info_stdout("Starting FlyData Agent...")
62
72
  Flydata::Command::Sender.new.start(quiet: true)
63
73
  log_info_stdout(" -> Done")
64
74
  end
65
75
 
76
+ # Show message
66
77
  dashboard_url = "#{flydata.flydata_api_host}/dashboard"
67
78
  redshift_console_url = "#{flydata.flydata_api_host}/redshift_clusters/query/new"
68
79
  last_message = ALL_DONE_MESSAGE_TEMPLATE % [redshift_console_url, dashboard_url]
69
80
  log_info_stdout(last_message)
70
81
  end
71
82
 
83
+ # Public method
84
+ # - Called from Sender#start/restart
72
85
  def try_mysql_sync
73
- handle_mysql_sync()
86
+ # Setup instance variables
87
+ set_current_tables
88
+
89
+ # Start initial sync
90
+ handle_mysql_sync
74
91
  rescue SyncDataEntryError
75
92
  return
76
93
  end
77
94
 
78
- def handle_mysql_sync(tables=nil)
79
- de = retrieve_sync_data_entry
80
-
81
- set_current_tables(de)
82
- verify_input_tables(tables, de['mysql_data_entry_preference']['tables'])
83
-
84
- unless @new_tables.empty?
85
- say("We've noticed that these tables have not been synced yet: #{@new_tables.join(", ")}")
86
- unless @ddl_tables.empty?
87
- say(" WARNING: We've noticed that at least one of these tables have not had their DDL generated yet.")
88
- say(" We recommend you run our 'flydata sync:generate_table_ddl > create_table.sql'")
89
- say(" to generate SQL to run on Redshift to create the correct tables")
90
- say(" Without running this sql on your Redshift cluster, there may be issues with your data")
91
- end
92
- if ask_yes_no("Do you want to run initial sync on all of these tables now?")
93
- tables = @initial_sync ? [] : @new_tables
94
- initial_sync(tables, de)
95
- else
96
- #If generate_table_ddl has not been run for these tables, warn user
97
- unless @ddl_tables.empty?
98
- say(" You can generate DDL SQL for your new tables by running this command")
99
- say(" $> flydata sync:generate_table_ddl > create_table.sql")
100
- end
101
- puts "Without syncing these tables, we cannot start the flydata process"
102
- raise "Please try again"
103
- end
104
- end
105
- end
106
- def initial_sync(tables, de=nil)
107
- de = retrieve_sync_data_entry unless de
108
- de = load_sync_info(de,tables)
109
- validate_initial_sync_status(de, tables)
110
- begin
111
- flush_buffer_and_stop unless @initial_sync
112
- sync_mysql_to_redshift(de)
113
- rescue ServerDataProcessingTimeout => e
114
- ee = ServerDataProcessingTimeout.new("Delayed Data Processing")
115
- ee.description = <<EOS
116
- Data processing is taking more than expected. Please contact support@flydata.com to check the system status.
117
- Once checked, you can resume sync with the following command.
118
-
119
- flydata start
120
-
121
- EOS
122
- ee.set_backtrace e.backtrace
123
- raise ee
124
- end
125
- complete
126
- end
127
-
95
+ # Command: flydata sync:flush
96
+ # - Arguments
128
97
  def self.slop_flush
129
98
  Slop.new do
130
99
  on 'f', 'skip-flush', 'Skip server flush'
@@ -132,9 +101,11 @@ EOS
132
101
  end
133
102
  end
134
103
 
135
- def flush
104
+ # Command: flydata sync:flush
105
+ # - Entry method
106
+ def flush(*tables)
136
107
  begin
137
- flush_buffer_and_stop
108
+ flush_buffer_and_stop(tables)
138
109
  rescue ServerDataProcessingTimeout => e
139
110
  ee = ServerDataProcessingTimeout.new("Delayed Data Processing")
140
111
  ee.description = <<EOS
@@ -147,6 +118,8 @@ EOS
147
118
  log_info_stdout("Buffers have been flushed and the sender process has been stopped.")
148
119
  end
149
120
 
121
+ # Command: flydata sync:reset
122
+ # - Arguments
150
123
  def self.slop_reset
151
124
  Slop.new do
152
125
  on 'c', 'client', 'Resets client only.'
@@ -154,19 +127,22 @@ EOS
154
127
  end
155
128
  end
156
129
 
130
+ # Command: flydata sync:reset
131
+ # - Entry method
157
132
  def reset(*tables)
133
+ # Flush client buffer
158
134
  msg = tables.empty? ? '' : " for these tables : #{tables.join(" ")}"
159
135
  return unless ask_yes_no("This resets the current sync#{msg}. Are you sure?")
160
136
  sender = Flydata::Command::Sender.new
161
137
  sender.flush_client_buffer # TODO We should rather delete buffer files
162
138
  sender.stop
163
139
 
164
- de = retrieve_sync_data_entry
165
- all_tables = de['mysql_data_entry_preference']['tables']
166
- verify_input_tables(tables, all_tables)
140
+ # Set instance variables
141
+ de = data_entry
142
+ set_current_tables(tables)
167
143
 
168
144
  begin
169
- wait_for_server_buffer(SERVER_DATA_PROCESSING_TIMEOUT)
145
+ wait_for_server_buffer(timeout: SERVER_DATA_PROCESSING_TIMEOUT, tables: target_tables_for_api)
170
146
  rescue ServerDataProcessingTimeout => e
171
147
  ee = ServerDataProcessingTimeout.new("Delayed Data Processing")
172
148
  ee.description = <<EOS
@@ -179,20 +155,24 @@ EOS
179
155
  ee.set_backtrace e.backtrace
180
156
  raise ee
181
157
  end
182
- cleanup_sync_server(de, tables) unless opts.client?
183
- sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
158
+
159
+ # Cleanup tables on server
160
+ cleanup_sync_server(de, @input_tables) unless opts.client?
161
+
162
+ # Delete local files
163
+ sync_fm = create_sync_file_manager(de)
184
164
  delete_files = [
185
165
  sync_fm.dump_file_path,
186
166
  sync_fm.dump_pos_path,
187
167
  sync_fm.mysql_table_marshal_dump_path,
188
168
  sync_fm.sync_info_file,
189
- sync_fm.table_position_file_paths(*tables),
190
- sync_fm.table_binlog_pos_paths(*tables),
191
- sync_fm.table_binlog_pos_init_paths(*tables),
192
- sync_fm.table_rev_file_paths(*tables),
193
- sync_fm.table_ddl_file_paths(*tables)
169
+ sync_fm.table_position_file_paths(*@input_tables),
170
+ sync_fm.table_binlog_pos_paths(*@input_tables),
171
+ sync_fm.table_binlog_pos_init_paths(*@input_tables),
172
+ sync_fm.table_rev_file_paths(*@input_tables),
173
+ sync_fm.table_ddl_file_paths(*@input_tables)
194
174
  ]
195
- delete_files << sync_fm.binlog_path if tables.empty? or all_tables.empty?
175
+ delete_files << sync_fm.binlog_path if @input_tables.empty? or @full_tables.empty?
196
176
  delete_files.flatten.each do |path|
197
177
  FileUtils.rm(path) if File.exists?(path)
198
178
  end
@@ -200,75 +180,11 @@ EOS
200
180
  log_info_stdout("Reset completed successfully.")
201
181
  end
202
182
 
203
- def wait_for_server_buffer(timeout = 0)
204
- start_time = Time.now
205
- log_info_stdout("Waiting for the server buffer to get empty.")
206
- prev_message =nil
207
- while (status = check) && (status['state'] == 'processing')
208
- prev_message = status['message']
209
- if timeout > 0 && Time.now - start_time > timeout
210
- raise ServerDataProcessingTimeout.new
211
- end
212
- print_progress(status)
213
- sleep 10
214
- end
215
- end
216
-
217
- def wait_for_server_data_processing(timeout = 0)
218
- state = :PROCESS
219
- start_time = Time.now
220
- log_info_stdout("Uploading data to Redshift...")
221
- sleep 10
222
- status = nil
223
- prev_message =nil
224
- while (status = check)
225
- if state == :PROCESS && status['state'] == 'uploading'
226
- log_info_stdout(" -> Done")
227
- state = :UPLOAD
228
- log_info_stdout("Finishing data upload...")
229
- end
230
-
231
- #TODO This is based on a temporary option
232
- if state == :UPLOAD && opts.ff?
233
- log_info_stdout("Skip checking for pending uploads")
234
- break
235
- end
236
-
237
- if status['message'] != prev_message
238
- # making some progress. Reset timer
239
- start_time = Time.now
240
- end
241
- prev_message = status['message']
242
- if timeout > 0 && Time.now - start_time > timeout
243
- raise ServerDataProcessingTimeout.new
244
- end
245
- print_progress(status)
246
- sleep 10
247
- end
248
- if (state == :PROCESS)
249
- # :UPLOAD state was skipped due to no data
250
- log_info_stdout(" -> Done")
251
- log_info_stdout("Finishing data upload...")
252
- end
253
- log_info_stdout(" -> Done")
254
- end
255
-
256
- def check
257
- de = retrieve_sync_data_entry
258
- retry_on(RestClient::Exception) do
259
- status = do_check(de)
260
- if status['complete']
261
- nil
262
- else
263
- status
264
- end
265
- end
266
- end
267
-
183
+ # Depricated Command: flydata sync:skip
268
184
  # skip initial sync
269
185
  def skip
270
- de = retrieve_sync_data_entry
271
- sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
186
+ de = data_entry
187
+ sync_fm = create_sync_file_manager(de)
272
188
  binlog_path = sync_fm.binlog_path
273
189
  sync_fm.close
274
190
  `touch #{binlog_path}`
@@ -277,6 +193,8 @@ EOS
277
193
  log_info_stdout("Run 'flydata start' to start continuous sync.")
278
194
  end
279
195
 
196
+ # Command: flydata sync:generate_table_ddl
197
+ # - Arguments
280
198
  def self.slop_generate_table_ddl
281
199
  Slop.new do
282
200
  on 'c', 'ctl-only', 'Only generate FlyData Control definitions'
@@ -286,166 +204,77 @@ EOS
286
204
  end
287
205
  end
288
206
 
207
+ # Command: flydata sync:generate_table_ddl
208
+ # - Entry method
289
209
  def generate_table_ddl(*tables)
290
- de = retrieve_sync_data_entry
210
+ # Compatibility check
211
+ de = data_entry
291
212
  dp = flydata.data_port.get
292
213
  Flydata::MysqlCompatibilityCheck.new(dp, de['mysql_data_entry_preference']).check
293
- do_generate_table_ddl(de, tables)
294
- end
295
214
 
296
- private
215
+ # Set instance variables
216
+ set_current_tables(tables)
297
217
 
298
- def verify_input_tables(input_tables, all_tables)
299
- return unless input_tables
300
- inval_table = []
301
- input_tables.each do |tab|
302
- inval_table << tab unless all_tables.include?(tab)
303
- end
304
- raise "These tables are not registered tables: #{inval_table.join(", ")}" unless inval_table.empty?
218
+ do_generate_table_ddl(de)
305
219
  end
306
220
 
307
- def set_current_tables(de)
308
- sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
309
- full_tables = de['mysql_data_entry_preference']['tables']
310
-
311
- @new_tables = sync_fm.get_new_table_list(full_tables, "pos")
312
- @ddl_tables = sync_fm.get_new_table_list(full_tables, "generated_ddl")
313
-
314
- @initial_sync = (@new_tables == full_tables)
315
- sync_fm.close
316
- end
221
+ private
317
222
 
318
- def validate_initial_sync_status(de, tables)
319
- sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
320
- dump_pos_info = sync_fm.load_dump_pos
321
- fp = sync_fm.dump_file_path
322
- sync_fm.close
223
+ # Initial sync
323
224
 
324
- # status is parsing but dumpfile doesn't exist due to streaming -> raise error
325
- if dump_pos_info[:status] == STATUS_PARSING && !File.exists?(fp)
326
- raise "FlyData Sync was interrupted with invalid state. Run 'flydata sync:reset#{tables.empty? ? '' : ' ' + tables.join(' ')}' first."
327
- end
328
- end
225
+ def handle_mysql_sync
226
+ de = data_entry
329
227
 
330
- def retrieve_sync_data_entry
331
- de = retrieve_data_entries.first unless de
332
- raise "There are no data entries." unless de
333
- case de['type']
334
- when 'RedshiftMysqlDataEntry'
335
- mp = de['mysql_data_entry_preference']
336
- if mp['tables_append_only']
337
- mp['tables'] = (mp['tables'].split(",") + mp['tables_append_only'].split(",")).uniq
338
- else
339
- mp['tables'] = mp['tables'].split(",").uniq
228
+ unless @new_tables.empty?
229
+ say("We've noticed that these tables have not been synced yet: #{@new_tables.join(", ")}")
230
+ unless @ddl_tables.empty?
231
+ say(" WARNING: We've noticed that at least one of these tables have not had their DDL generated yet.")
232
+ say(" We recommend you run our 'flydata sync:generate_table_ddl > create_table.sql'")
233
+ say(" to generate SQL to run on Redshift to create the correct tables")
234
+ say(" Without running this sql on your Redshift cluster, there may be issues with your data")
340
235
  end
341
- else
342
- raise SyncDataEntryError, "No supported data entry. Only mysql-redshift sync is supported."
343
- end
344
- de
345
- end
346
-
347
- def cleanup_sync_server(de, tables = [])
348
- print "Cleaning the server."
349
- log_info("Cleaning the server.")
350
- worker = Thread.new do
351
- begin
352
- flydata.data_entry.cleanup_sync(de['id'], tables)
353
- rescue RestClient::RequestTimeout, RestClient::GatewayTimeout
354
- # server is taking time to cleanup. Try again
355
- retry
236
+ if ask_yes_no("Do you want to run initial sync on all of these tables now?")
237
+ initial_sync(de)
238
+ else
239
+ #If generate_table_ddl has not been run for these tables, warn user
240
+ unless @ddl_tables.empty?
241
+ say(" You can generate DDL SQL for your new tables by running this command")
242
+ say(" $> flydata sync:generate_table_ddl > create_table.sql")
243
+ end
244
+ puts "Without syncing these tables, we cannot start the flydata process"
245
+ raise "Please try again"
356
246
  end
357
247
  end
358
- until worker.join(5)
359
- print "."
360
- end
361
- puts
362
- log_info_stdout("Done.")
363
- end
364
-
365
- def do_check(de)
366
- flydata.data_entry.buffer_stat(de['id'], env_mode)
367
248
  end
368
249
 
369
- def print_progress(buffer_stat)
370
- message = buffer_stat['message']
371
- log_info_stdout(message) unless message.nil? || message.empty?
372
- end
373
-
374
- def do_generate_table_ddl(de, tables=[])
375
- if `which mysqldump`.empty?
376
- raise "mysqldump is not installed. mysqldump is required to run the command"
377
- end
378
-
379
- error_list = []
380
-
381
- schema_name = (de['schema_name'] || nil)
382
-
383
- mp = de['mysql_data_entry_preference']
384
-
385
- set_current_tables(de)
386
-
387
- tables = opts.all_tables? ? mp['tables'] : (tables.empty? ? @new_tables : tables)
388
- raise "There are no valid unsynced tables, if you want to just get ddl for all tables, please run \`flydata sync:generate_table_ddl --all-tables\`" if tables.empty?
389
-
390
- %w(host username database).each do |conf_name|
391
- raise "MySQL `#{conf_name}` is neither defined in the data entry nor the local config file" if mp[conf_name].to_s.empty?
392
- end
393
- if tables.empty?
394
- raise "`tables` (or `tables_append_only`) is neither defined in the data entry nor the local config file"
395
- end
250
+ def initial_sync(de)
251
+ # Load sync information from file
252
+ load_sync_info(de)
253
+ validate_initial_sync_status
254
+ begin
255
+ flush_buffer_and_stop(target_tables_for_api) unless @full_initial_sync
256
+ sync_mysql_to_redshift(de)
257
+ rescue ServerDataProcessingTimeout => e
258
+ ee = ServerDataProcessingTimeout.new("Delayed Data Processing")
259
+ ee.description = <<EOS
260
+ Data processing is taking more than expected. Please contact support@flydata.com to check the system status.
261
+ Once checked, you can resume sync with the following command.
396
262
 
397
- command = Util::MysqlUtil.generate_mysql_ddl_dump_cmd(mp.merge(tables: tables))
263
+ flydata start
398
264
 
399
- Open3.popen3(command) do |stdin, stdout, stderr|
400
- stdin.close
401
- stdout.set_encoding("utf-8") # mysqldump output must be in UTF-8
402
- create_flydata_ctl_table = @initial_sync
403
- while !stdout.eof?
404
- begin
405
- mysql_tabledef = FlydataCore::TableDef::MysqlTableDef.create(stdout, skip_primary_key_check: opts.skip_primary_key_check?)
406
- if mysql_tabledef.nil?
407
- # stream had no more create table definition
408
- break
409
- end
410
- flydata_tabledef = mysql_tabledef.to_flydata_tabledef
411
- puts FlydataCore::TableDef::RedshiftTableDef.from_flydata_tabledef(flydata_tabledef, flydata_ctl_table: create_flydata_ctl_table, schema_name: schema_name, ctl_only: opts.ctl_only?)
412
- rescue FlydataCore::TableDefError=> e
413
- error_list << e.err_hash
414
- next
415
- end
416
- create_flydata_ctl_table = false
417
- end
418
- errors = ""
419
- while !stderr.eof?
420
- line = stderr.gets.gsub('mysqldump: ', '')
421
- errors << line unless /Warning: Using a password on the command line interface can be insecure./ === line
422
- end
423
- raise errors unless errors.empty?
424
- end
425
- unless error_list.empty?
426
- log_error_stderr("We have noticed the following error(s):")
427
- group_error = error_list.group_by {|d| d[:error]}
428
- group_error.each_key do |a|
429
- log_error_stderr("The following table(s) have #{a}:")
430
- group_error[a].each do |hash|
431
- log_error_stderr(" - #{hash[:table]}") if hash[:table]
432
- end
433
- end
434
- log_error_stderr("Please fix the above error(s) to try to sync those table(s) or contact us for further help.")
265
+ EOS
266
+ ee.set_backtrace e.backtrace
267
+ raise ee
435
268
  end
436
- tables_without_error = tables - error_list.inject([]){|arr, err| arr << err[:table] if err[:table]}
437
-
438
- sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
439
- sync_fm.mark_generated_tables(tables_without_error)
440
- sync_fm.close
269
+ complete(de)
441
270
  end
442
271
 
443
272
  def sync_mysql_to_redshift(de)
444
273
  dp = flydata.data_port.get
445
- sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
274
+ sync_fm = create_sync_file_manager(de)
446
275
 
447
276
  # Check client condition
448
- if File.exists?(sync_fm.binlog_path) and @initial_sync
277
+ if File.exists?(sync_fm.binlog_path) and @full_initial_sync
449
278
  raise "Already synchronized. If you want to do initial sync, run 'flydata sync:reset'"
450
279
  end
451
280
 
@@ -454,7 +283,7 @@ EOS
454
283
  Flydata::Command::Conf.new.copy_templates
455
284
  end
456
285
  generate_mysqldump(de, sync_fm, !opts.dump_stream?) do |mysqldump_io, db_bytesize|
457
- sync_fm.save_sync_info(@initial_sync, de['mysql_data_entry_preference']['tables'])
286
+ sync_fm.save_sync_info(@full_initial_sync, de['mysql_data_entry_preference']['tables'])
458
287
  parse_mysqldump_and_send(mysqldump_io, dp, de, sync_fm, db_bytesize)
459
288
  end
460
289
  wait_for_mysqldump_processed(dp, de, sync_fm)
@@ -481,7 +310,7 @@ EOS
481
310
  return call_block_or_return_io(fp, &block)
482
311
  end
483
312
 
484
- tables = de['mysql_data_entry_preference']['tables']
313
+ tables = target_tables
485
314
  tables ||= '<all tables>'
486
315
  data_servers = de['mysql_data_entry_preference']['data_servers'] ? "\n data servers: #{de['mysql_data_entry_preference']['data_servers']}" : ""
487
316
 
@@ -507,7 +336,7 @@ EOM
507
336
  Flydata::MysqlCompatibilityCheck.new(dp, de['mysql_data_entry_preference'], dump_dir: fp, backup_dir: sync_fm.backup_dir).check
508
337
  log_info_stdout("Checking database size...")
509
338
 
510
- db_bytesize = Flydata::Parser::Mysql::DatabaseSizeCheck.new(de['mysql_data_entry_preference']).get_db_bytesize
339
+ db_bytesize = Flydata::Parser::Mysql::DatabaseSizeCheck.new(de['mysql_data_entry_preference'].merge('tables' => target_tables)).get_db_bytesize
511
340
  log_info_stdout(" -> #{as_size(db_bytesize)} (#{db_bytesize} byte)")
512
341
 
513
342
  if file_dump
@@ -523,17 +352,16 @@ EOM
523
352
  end
524
353
  end
525
354
 
526
-
527
355
  log_info_stdout("Exporting data from database.")
528
356
  log_info_stdout("This process can take hours depending on data size and load on your database. Please be patient...")
529
357
  if file_dump
530
358
  Flydata::Parser::Mysql::MysqlDumpGeneratorNoMasterData.
531
- new(de['mysql_data_entry_preference']).dump(fp)
359
+ new(de['mysql_data_entry_preference'].merge('tables' => target_tables)).dump(fp)
532
360
  log_info_stdout(" -> Done")
533
361
  call_block_or_return_io(fp, &block)
534
362
  else
535
363
  Flydata::Parser::Mysql::MysqlDumpGeneratorNoMasterData.
536
- new(de['mysql_data_entry_preference']).dump {|io| block.call(io, db_bytesize)}
364
+ new(de['mysql_data_entry_preference'].merge('tables' => target_tables)).dump {|io| block.call(io, db_bytesize)}
537
365
  end
538
366
  else
539
367
  exit 1
@@ -603,7 +431,7 @@ EOM
603
431
  log_info_stdout("Resuming... Last processed table: #{option[:table_name]}")
604
432
  else
605
433
  #If its a new sync, ensure server side resources are clean
606
- cleanup_sync_server(de, de['mysql_data_entry_preference']['tables']) unless opts.skip_cleanup?
434
+ cleanup_sync_server(de, target_tables_for_api) unless opts.skip_cleanup?
607
435
  end
608
436
  log_info_stdout("Sending data to FlyData Server...")
609
437
 
@@ -671,38 +499,85 @@ EOM
671
499
  return unless dump_pos_info[:status] == STATUS_WAITING
672
500
  binlog_pos = dump_pos_info[:binlog_pos]
673
501
 
674
- wait_for_server_data_processing(SERVER_DATA_PROCESSING_TIMEOUT)
675
- tables = de['mysql_data_entry_preference']['tables']
676
- sync_fm.save_table_binlog_pos(tables, binlog_pos)
502
+ wait_for_server_data_processing(
503
+ timeout: SERVER_DATA_PROCESSING_TIMEOUT, tables: target_tables_for_api)
504
+ sync_fm.save_table_binlog_pos(target_tables, binlog_pos)
677
505
  sync_fm.save_dump_pos(STATUS_COMPLETE, '', -1, binlog_pos)
678
506
  end
679
507
 
680
- ALL_DONE_MESSAGE_TEMPLATE = <<-EOM
508
+ # option: timeout, tables
509
+ def wait_for_server_data_processing(option = {})
510
+ timeout = option[:timeout] || 0
511
+ tables = option[:tables] || []
681
512
 
682
- Congratulations! FlyData has started synchronizing your database tables.
513
+ state = :PROCESS
514
+ start_time = Time.now
515
+ log_info_stdout("Uploading data to Redshift...")
516
+ sleep 10
517
+ status = nil
518
+ prev_message =nil
519
+ while (status = check_server_status(tables))
520
+ if state == :PROCESS && status['state'] == 'uploading'
521
+ log_info_stdout(" -> Done")
522
+ state = :UPLOAD
523
+ log_info_stdout("Finishing data upload...")
524
+ end
683
525
 
684
- What's next?
526
+ #TODO This is based on a temporary option
527
+ if state == :UPLOAD && opts.ff?
528
+ log_info_stdout("Skip checking for pending uploads")
529
+ break
530
+ end
685
531
 
686
- - Check data on Redshift (%s)
687
- - Check your FlyData usage on the FlyData Dashboard (%s)
688
- - To manage the FlyData Agent, use the 'flydata' command (type 'flydata' for help)
689
- - If you encounter an issue,
690
- please check our documentation (https://www.flydata.com/docs/) or
691
- contact our customer support team (support@flydata.com)
532
+ if status['message'] != prev_message
533
+ # making some progress. Reset timer
534
+ start_time = Time.now
535
+ end
536
+ prev_message = status['message']
537
+ if timeout > 0 && Time.now - start_time > timeout
538
+ raise ServerDataProcessingTimeout.new
539
+ end
540
+ print_progress(status)
541
+ sleep 10
542
+ end
543
+ if (state == :PROCESS)
544
+ # :UPLOAD state was skipped due to no data
545
+ log_info_stdout(" -> Done")
546
+ log_info_stdout("Finishing data upload...")
547
+ end
548
+ log_info_stdout(" -> Done")
549
+ end
692
550
 
693
- Thank you for using FlyData!
694
- EOM
551
+ def check_server_status(tables = [])
552
+ de = data_entry
553
+ retry_on(RestClient::Exception) do
554
+ status = do_check_server_status(de, tables)
555
+ if status['complete']
556
+ nil
557
+ else
558
+ status
559
+ end
560
+ end
561
+ end
695
562
 
696
- def complete
697
- de = load_sync_info(retrieve_sync_data_entry)
698
- sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
563
+ def do_check_server_status(de, tables = [])
564
+ flydata.data_entry.buffer_stat(de['id'], mode: env_mode, tables: tables)
565
+ end
566
+
567
+ def print_progress(buffer_stat)
568
+ message = buffer_stat['message']
569
+ log_info_stdout(message) unless message.nil? || message.empty?
570
+ end
571
+
572
+ def complete(de)
573
+ sync_fm = create_sync_file_manager(de)
699
574
  info = sync_fm.load_dump_pos
700
575
  if info[:status] == STATUS_COMPLETE
701
- if @initial_sync
576
+ if @full_initial_sync
702
577
  sync_fm.save_binlog(info[:binlog_pos])
703
578
  end
704
- sync_fm.install_table_binlog_files(de['mysql_data_entry_preference']['tables'])
705
- sync_fm.reset_table_position_files(de['mysql_data_entry_preference']['tables'])
579
+ sync_fm.install_table_binlog_files(target_tables)
580
+ sync_fm.reset_table_position_files(target_tables)
706
581
  sync_fm.delete_dump_file
707
582
  sync_fm.backup_dump_dir
708
583
  else
@@ -719,29 +594,238 @@ Thank you for using FlyData!
719
594
  h.to_json
720
595
  end
721
596
 
722
- def load_sync_info(de ,additional_tables=[])
723
- sync_fm = Flydata::FileUtil::SyncFileManager.new(de)
597
+ # Sync reset
598
+
599
+ def wait_for_server_buffer(option = {})
600
+ timeout = option[:timeout] || 0
601
+ tables = option[:tables] || []
602
+
603
+ start_time = Time.now
604
+ log_info_stdout("Waiting for the server buffer to get empty.")
605
+ prev_message =nil
606
+ while (status = check_server_status(tables)) && (status['state'] == 'processing')
607
+ prev_message = status['message']
608
+ if timeout > 0 && Time.now - start_time > timeout
609
+ raise ServerDataProcessingTimeout.new
610
+ end
611
+ print_progress(status)
612
+ sleep 10
613
+ end
614
+ end
615
+
616
+ def cleanup_sync_server(de, tables = [])
617
+ print "Cleaning the server."
618
+ log_info("Cleaning the server.")
619
+ worker = Thread.new do
620
+ begin
621
+ flydata.data_entry.cleanup_sync(de['id'], tables)
622
+ rescue RestClient::RequestTimeout, RestClient::GatewayTimeout
623
+ # server is taking time to cleanup. Try again
624
+ retry
625
+ end
626
+ end
627
+ until worker.join(5)
628
+ print "."
629
+ end
630
+ puts
631
+ log_info_stdout("Done.")
632
+ end
633
+
634
+ # Generate table ddl
635
+
636
+ def do_generate_table_ddl(de)
637
+ if `which mysqldump`.empty?
638
+ raise "mysqldump is not installed. mysqldump is required to run the command"
639
+ end
640
+
641
+ error_list = []
642
+ schema_name = (de['schema_name'] || nil)
724
643
  mp = de['mysql_data_entry_preference']
725
- unless (rs = sync_fm.load_sync_info).nil?
726
- @initial_sync = rs[:initial_sync]
727
- mp['tables'] = rs[:tables]
728
- else
729
- mp['tables'] = additional_tables unless @initial_sync
644
+
645
+ tables = opts.all_tables? ? @full_tables : (@input_tables.empty? ? @new_tables : @input_tables)
646
+ raise "There are no valid unsynced tables, if you want to just get ddl for all tables, please run \`flydata sync:generate_table_ddl --all-tables\`" if tables.empty?
647
+
648
+ %w(host username database).each do |conf_name|
649
+ raise "MySQL `#{conf_name}` is neither defined in the data entry nor the local config file" if mp[conf_name].to_s.empty?
730
650
  end
651
+ if tables.empty?
652
+ raise "`tables` (or `tables_append_only`) is neither defined in the data entry nor the local config file"
653
+ end
654
+
655
+ command = Util::MysqlUtil.generate_mysql_ddl_dump_cmd(mp.merge(tables: tables))
656
+
657
+ Open3.popen3(command) do |stdin, stdout, stderr|
658
+ stdin.close
659
+ stdout.set_encoding("utf-8") # mysqldump output must be in UTF-8
660
+ create_flydata_ctl_table = @full_initial_sync
661
+ while !stdout.eof?
662
+ begin
663
+ mysql_tabledef = FlydataCore::TableDef::MysqlTableDef.create(stdout, skip_primary_key_check: opts.skip_primary_key_check?)
664
+ if mysql_tabledef.nil?
665
+ # stream had no more create table definition
666
+ break
667
+ end
668
+ flydata_tabledef = mysql_tabledef.to_flydata_tabledef
669
+ puts FlydataCore::TableDef::RedshiftTableDef.from_flydata_tabledef(flydata_tabledef, flydata_ctl_table: create_flydata_ctl_table, schema_name: schema_name, ctl_only: opts.ctl_only?)
670
+ rescue FlydataCore::TableDefError=> e
671
+ error_list << e.err_hash
672
+ next
673
+ end
674
+ create_flydata_ctl_table = false
675
+ end
676
+ errors = ""
677
+ while !stderr.eof?
678
+ line = stderr.gets.gsub('mysqldump: ', '')
679
+ errors << line unless /Warning: Using a password on the command line interface can be insecure./ === line
680
+ end
681
+ raise errors unless errors.empty?
682
+ end
683
+ unless error_list.empty?
684
+ log_error_stderr("We have noticed the following error(s):")
685
+ group_error = error_list.group_by {|d| d[:error]}
686
+ group_error.each_key do |a|
687
+ log_error_stderr("The following table(s) have #{a}:")
688
+ group_error[a].each do |hash|
689
+ log_error_stderr(" - #{hash[:table]}") if hash[:table]
690
+ end
691
+ end
692
+ log_error_stderr("Please fix the above error(s) to try to sync those table(s) or contact us for further help.")
693
+ end
694
+ tables_without_error = tables - error_list.inject([]){|arr, err| arr << err[:table] if err[:table]}
695
+
696
+ sync_fm = create_sync_file_manager(de)
697
+ sync_fm.mark_generated_tables(tables_without_error)
731
698
  sync_fm.close
732
- de
733
699
  end
734
700
 
735
- def flush_buffer_and_stop
701
+
702
+ ALL_DONE_MESSAGE_TEMPLATE = <<-EOM
703
+
704
+ Congratulations! FlyData has started synchronizing your database tables.
705
+
706
+ What's next?
707
+
708
+ - Check data on Redshift (%s)
709
+ - Check your FlyData usage on the FlyData Dashboard (%s)
710
+ - To manage the FlyData Agent, use the 'flydata' command (type 'flydata' for help)
711
+ - If you encounter an issue,
712
+ please check our documentation (https://www.flydata.com/docs/) or
713
+ contact our customer support team (support@flydata.com)
714
+
715
+ Thank you for using FlyData!
716
+ EOM
717
+
718
+ # Sync flush
719
+
720
+ def flush_buffer_and_stop(tables = [])
736
721
  sender = Flydata::Command::Sender.new
737
722
  sender.flush_client_buffer
738
723
  if opts.skip_flush?
739
724
  log_info_stdout("Skip waiting for server data processing.")
740
725
  else
741
- wait_for_server_data_processing(SERVER_DATA_PROCESSING_TIMEOUT)
726
+ wait_for_server_data_processing(
727
+ timeout: SERVER_DATA_PROCESSING_TIMEOUT, tables: tables)
742
728
  end
743
729
  sender.stop(quiet: true)
744
730
  end
731
+
732
+ # Utility methods
733
+
734
+ def set_current_tables(input_tables = nil)
735
+ de = data_entry
736
+ sync_fm = create_sync_file_manager(de)
737
+ @input_tables = input_tables || []
738
+ @full_tables = de['mysql_data_entry_preference']['tables']
739
+
740
+ @new_tables = sync_fm.get_new_table_list(@full_tables, "pos")
741
+ @ddl_tables = sync_fm.get_new_table_list(@full_tables, "generated_ddl")
742
+
743
+ @full_initial_sync = (@new_tables == @full_tables)
744
+
745
+ sync_fm.close
746
+
747
+ verify_input_tables(@input_tables, @full_tables)
748
+ end
749
+
750
+ def validate_initial_sync_status
751
+ sync_fm = create_sync_file_manager
752
+ dump_pos_info = sync_fm.load_dump_pos
753
+ fp = sync_fm.dump_file_path
754
+ sync_fm.close
755
+
756
+ # status is parsing but dumpfile doesn't exist due to streaming -> raise error
757
+ if dump_pos_info[:status] == STATUS_PARSING && !File.exists?(fp)
758
+ raise "FlyData Sync was interrupted with invalid state. Run 'flydata sync:reset#{@input_tables.join(',')}' first."
759
+ end
760
+ end
761
+
762
+ def load_sync_info(de)
763
+ # for debug
764
+ raise "!AssertionError. set_current_tables needs to be called in advance" if @full_tables.nil?
765
+
766
+ sync_fm = create_sync_file_manager(de)
767
+ if (rs = sync_fm.load_sync_info)
768
+ @full_initial_sync = rs[:initial_sync]
769
+ @input_tables = rs[:tables]
770
+ end
771
+ sync_fm.close
772
+ de
773
+ end
774
+
775
+ def target_tables
776
+ if @full_initial_sync
777
+ @full_tables
778
+ elsif !@input_tables.empty?
779
+ @input_tables
780
+ else
781
+ @new_tables
782
+ end
783
+ end
784
+
785
+ # return empty array if full sync
786
+ def target_tables_for_api
787
+ if @full_initial_sync
788
+ []
789
+ elsif !@input_tables.empty?
790
+ @input_tables
791
+ else
792
+ @new_tables
793
+ end
794
+ end
795
+
796
+ def data_entry
797
+ @de ||= retrieve_sync_data_entry
798
+ end
799
+
800
+ def retrieve_sync_data_entry
801
+ de = retrieve_data_entries.first unless de
802
+ raise "There are no data entries." unless de
803
+ case de['type']
804
+ when 'RedshiftMysqlDataEntry'
805
+ mp = de['mysql_data_entry_preference']
806
+ if mp['tables_append_only']
807
+ mp['tables'] = (mp['tables'].split(",") + mp['tables_append_only'].split(",")).uniq
808
+ else
809
+ mp['tables'] = mp['tables'].split(",").uniq
810
+ end
811
+ else
812
+ raise SyncDataEntryError, "No supported data entry. Only mysql-redshift sync is supported."
813
+ end
814
+ de
815
+ end
816
+
817
+ def create_sync_file_manager(de = data_entry)
818
+ Flydata::FileUtil::SyncFileManager.new(de)
819
+ end
820
+
821
+ def verify_input_tables(input_tables, all_tables)
822
+ return unless input_tables
823
+ inval_table = []
824
+ input_tables.each do |tab|
825
+ inval_table << tab unless all_tables.include?(tab)
826
+ end
827
+ raise "These tables are not registered tables: #{inval_table.join(", ")}" unless inval_table.empty?
828
+ end
745
829
  end
746
830
  end
747
831
  end
@@ -18,7 +18,7 @@ module Flydata
18
18
  allow(Kernel).to receive(:sleep)
19
19
  allow_any_instance_of(Flydata::Api::DataPort).to receive(:get).and_return("Wibble")
20
20
  allow_any_instance_of(Flydata::AgentCompatibilityCheck).to receive(:check).and_return(true)
21
- Flydata::Command::Sync.any_instance.should_receive(:handle_mysql_sync).and_return("Wobble")
21
+ Flydata::Command::Sync.any_instance.should_receive(:try_mysql_sync).and_return("Wobble")
22
22
  end
23
23
 
24
24
  context "as daemon" do
@@ -65,6 +65,10 @@ module Flydata
65
65
  end
66
66
  end
67
67
  describe '#do_generate_table_ddl' do
68
+ before do
69
+ allow(subject).to receive(:data_entry).and_return(default_data_entry)
70
+ subject.send(:set_current_tables)
71
+ end
68
72
  shared_examples 'throws an error' do
69
73
  it "throws an error" do
70
74
  expect {
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: flydata
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.4
4
+ version: 0.3.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Koichi Fujikawa
@@ -592,7 +592,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
592
592
  version: '0'
593
593
  requirements: []
594
594
  rubyforge_project:
595
- rubygems_version: 2.2.2
595
+ rubygems_version: 2.0.14
596
596
  signing_key:
597
597
  specification_version: 4
598
598
  summary: FlyData Agent