flydata 0.4.3 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 325ec216ca30d8548887146aa97bde6daff26476
4
- data.tar.gz: 59a60554b1eea3751b0fc904d9df55fbbc1fab4d
3
+ metadata.gz: 0bbdc5f5871df8b3fec4c8c92abd1fdb6b6b24f6
4
+ data.tar.gz: 0be09c4771db44cac9c3229251598f6f5c3c23fd
5
5
  SHA512:
6
- metadata.gz: 34cf256c13392454624c461b5920bbf8f77540fbe99a470b4acc1249a31e49d3ab627934c1f6b54b32083edf520c8e2836622bb71228febd5236b3b02a36abba
7
- data.tar.gz: b2004689294e57c0dee428ea852fe421fbd945b1bf47ec130757c097212d4c0e0eb469543df32994365b5b21cd90b4721cc843f195b80c0f15437119c205fa06
6
+ metadata.gz: 3bdacc04e29100c441e7e1eb1660eb9763e2566f0ac05846a7cc85ac2067e4f0171c41d95e4a12ae5fa7b9c3720b5583040191a68b669b392e360fa58be79abb
7
+ data.tar.gz: 932a0de391228f576f389c731f312108931be9473875fe8265cc982d58771a6ea20671a762eeaf7b30a5aefbdfa0d60bb4572f409635f757e2bd900fad65e966
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.3
1
+ 0.5.0
@@ -289,6 +289,10 @@ class RedshiftAccessError < UserResourceError
289
289
  def err_code; ErrorCode::REDSHIFT_ACCESS_ERROR; end
290
290
  end
291
291
 
292
+ # error_content[:errors] has a list of errors
293
+ class MultipleErrors < DataDeliveryError
294
+ def err_code; -1; end
295
+ end
292
296
 
293
297
  ## Depricated errors
294
298
 
@@ -156,6 +156,8 @@ module FlydataCore
156
156
  "FROM information_schema.tables " +
157
157
  "WHERE table_schema = '%s' and table_name in (%s)"
158
158
 
159
+ UNSUPPORTED_ENGINES = %w(MEMORY BLACKHOLE)
160
+
159
161
  # option[:client] : Mysql2::Client object
160
162
  # option[:tables] : target table list
161
163
  def create_query(option = @option)
@@ -168,12 +170,12 @@ module FlydataCore
168
170
  def check_result(result, option = @option)
169
171
  invalid_tables = []
170
172
  result.each do |r|
171
- invalid_tables.push(r['table_name']) if r['table_type'] == 'VIEW' || r['engine'] == 'MEMORY'
173
+ invalid_tables.push(r['table_name']) if r['table_type'] == 'VIEW' || UNSUPPORTED_ENGINES.include?(r['engine'])
172
174
  end
173
175
 
174
176
  unless invalid_tables.empty?
175
177
  raise FlydataCore::MysqlCompatibilityError,
176
- "FlyData does not support VIEW and MEMORY ENGINE table. " +
178
+ "FlyData does not support VIEW and #{UNSUPPORTED_ENGINES.join(',')} STORAGE ENGINE table. " +
177
179
  "Remove following tables from data entry: #{invalid_tables.join(", ")}"
178
180
  end
179
181
  end
@@ -0,0 +1,38 @@
1
+ module FlydataCore
2
+
3
+ module OptionValidator
4
+ def self.included(base)
5
+ base.extend(ClassMethods)
6
+ end
7
+
8
+ def validate_options(o, options = nil)
9
+ self.class.validate_options(o, options)
10
+ end
11
+
12
+ module ClassMethods
13
+ def validate_options(o, options = nil)
14
+ if (options.nil?)
15
+ unless defined?(self::OPTIONS)
16
+ raise "validate_options needs OPTIONS hash defined"
17
+ end
18
+ options = self::OPTIONS
19
+ end
20
+
21
+ result = o.clone
22
+
23
+ options.keys.each do |k|
24
+ unless (result.has_key?(k))
25
+ if (options[k] != :mandatory)
26
+ result[k] = options[k]
27
+ else
28
+ raise "Mandatory option #{k} is missing"
29
+ end
30
+ end
31
+ end
32
+
33
+ result
34
+ end
35
+ end
36
+ end
37
+
38
+ end
@@ -2,6 +2,13 @@
2
2
  # known as Copy Handler)
3
3
  # SimpleDB related classes and methods defined in flydata and flydata-web will
4
4
  # be eventually consolidated into this library.
5
+ module FlydataCore
6
+ module QueryJob
7
+ # A special "sequence number" which kicks off sync handoff between init and cont syncs.
8
+ SYNC_FIRST_SEQ = '1.sync'
9
+ end
10
+ end
11
+
5
12
  Dir[File.join(File.dirname(__FILE__), "query_job/**/*.rb")].each do |f|
6
13
  require f
7
14
  end
@@ -0,0 +1,44 @@
1
+ require 'flydata-core/query_job/redshift'
2
+ require 'flydata-core/table_def/redshift_table_def'
3
+
4
+ module FlydataCore
5
+ module QueryJob
6
+
7
+ class TableStatus
8
+ @@domain = 'flydata_redshift_table_status'
9
+
10
+ def self.domain=(domain)
11
+ @@domain = domain
12
+ end
13
+
14
+ def self.sdb=(sdb)
15
+ @@sdb = sdb
16
+ end
17
+
18
+ def self.where(*args)
19
+ table_status_items = {}
20
+ @@sdb.domains[@@domain].items.where(*args).each(select: :all) do |item|
21
+ class << item
22
+ # returns the corect (Redshift) table name
23
+ def table_name
24
+ FlydataCore::TableDef::RedshiftTableDef.convert_to_valid_table_name(attributes['table_name'].first)
25
+ end
26
+
27
+ def target_table_names
28
+ FlydataCore::QueryJob::Redshift.target_table_names(attributes['table_name'].first)
29
+ end
30
+ end
31
+ tn = item.attributes['table_name'].first
32
+ redshift_table_name = FlydataCore::TableDef::RedshiftTableDef.convert_to_valid_table_name(tn)
33
+ if redshift_table_name == tn
34
+ table_status_items[redshift_table_name] ||= item
35
+ else
36
+ table_status_items[redshift_table_name] = item
37
+ end
38
+ end
39
+ table_status_items.values
40
+ end
41
+ end
42
+
43
+ end
44
+ end
@@ -285,7 +285,6 @@ EOS
285
285
  end
286
286
 
287
287
  MAX_COLUMNNAME_LENGTH = 127
288
-
289
288
  VALID_IDENTIFIER_CHARACTERS = [" "] + %w"a-z 0-9 ! # $ % & ' ( ) * + , . \\- \\/ : ; < = > ? @ \\[ \\\\ \\] ^ { \\| } ~ `"
290
289
 
291
290
  def self.convert_to_valid_name(key)
@@ -335,7 +334,7 @@ EOS
335
334
  elsif time_match = TIME_REGEXP.match(value_str)
336
335
  t = convert_time_into_timestamp(time_match)
337
336
  else
338
- t = DateTime.parse(value_str)
337
+ t = DateTime.parse(convert_date(value_str))
339
338
  end
340
339
  t = t.new_offset(0) # Redshift Plug-in uses UTC
341
340
  t.strftime('%Y-%m-%d %H:%M:%S.%6N')
@@ -0,0 +1,117 @@
1
+ require 'spec_helper'
2
+ require 'flydata-core/option_validator'
3
+
4
+ module FlydataCore
5
+
6
+ describe OptionValidator do
7
+ describe '#validate_options' do
8
+ context 'with OPTIONS' do
9
+ class GoodBoy
10
+ include OptionValidator
11
+ OPTIONS = {
12
+ :a => :mandatory,
13
+ :b => 'some default value'
14
+ }
15
+
16
+ def initialize(o)
17
+ @o = validate_options(o)
18
+ end
19
+ attr_reader :o
20
+ end
21
+
22
+ context 'valid options are passed through' do
23
+ subject{ GoodBoy.new(a: 'me', b: 'you') }
24
+
25
+ describe '#o' do
26
+ subject { super().o }
27
+ it { is_expected.to eq({a: 'me', b: 'you'}) }
28
+ end
29
+ end
30
+ context 'optional option may be omitted' do
31
+ subject{ GoodBoy.new(a: 'me') }
32
+
33
+ describe '#o' do
34
+ subject { super().o }
35
+ it { is_expected.to eq({a: 'me', b: 'some default value' }) }
36
+ end
37
+ end
38
+ context 'mandatory option must be there' do
39
+ it { expect{ GoodBoy.new(b: 'you') }.to raise_error }
40
+ end
41
+ context 'unknown options are passed through' do
42
+ subject{ GoodBoy.new(a: 'm', c: 'who') }
43
+
44
+ describe '#o' do
45
+ subject { super().o }
46
+ it { is_expected.to eq({a: 'm', b: 'some default value', c: 'who'}) }
47
+ end
48
+ end
49
+ end
50
+ context 'without OPTIONS' do
51
+ class BadBoy
52
+ include OptionValidator
53
+
54
+ def initialize(o)
55
+ @o = validate_options(o)
56
+ end
57
+ attr_reader :o
58
+ end
59
+
60
+ context 'OPTIONS must exist' do
61
+ it { expect{ BadBoy.new(a: 'me', b: 'you') }.to raise_error }
62
+ end
63
+ end
64
+ context 'nested classes' do
65
+ class Parent
66
+ include OptionValidator
67
+ OPTIONS = { :a => :mandatory }
68
+ end
69
+ context "Child's OPTIONS overrides parent's" do
70
+ class Child < Parent
71
+ OPTIONS = {:b => :mandatory}
72
+
73
+ def initialize(o)
74
+ @o = validate_options(o)
75
+ end
76
+ attr_reader :o
77
+ end
78
+ subject { Child.new(b: 'test') }
79
+
80
+ describe '#o' do
81
+ subject { super().o }
82
+ it { is_expected.to eq({b: 'test'}) }
83
+ end
84
+ end
85
+ context "Parent's OPTIONS carries over if child doesn't have its own" do
86
+ class Child2 < Parent
87
+ def initialize(o)
88
+ @o = validate_options(o)
89
+ end
90
+ attr_reader :o
91
+ end
92
+ it { expect { Child2.new(b: 'test') }.to raise_error }
93
+ end
94
+ end
95
+ end
96
+ describe "::validate_options" do
97
+ context 'with OPTIONS' do
98
+ class GoodBoy2
99
+ include OptionValidator
100
+ OPTIONS = {
101
+ :a => :mandatory,
102
+ :b => 'default value'
103
+ }
104
+
105
+ def self.test(o)
106
+ validate_options(o)
107
+ end
108
+ end
109
+
110
+ context 'valid options are passed through' do
111
+ it { expect(GoodBoy2.test(a: 'me')).to eq(a: 'me', b: 'default value') }
112
+ end
113
+ end
114
+ end
115
+ end
116
+
117
+ end
@@ -662,6 +662,21 @@ EOS
662
662
  end
663
663
  end
664
664
 
665
+ context 'with zero date' do
666
+ let(:value) { '0000-00-00 10:24:28' }
667
+ it { is_expected.to eq('0001-01-01 10:24:28.000000') }
668
+ end
669
+
670
+ context 'with zero month' do
671
+ let(:value) { '2005-00-19 10:19:25' }
672
+ it { is_expected.to eq('2005-01-19 10:19:25.000000') }
673
+ end
674
+
675
+ context 'with zero day and month' do
676
+ let(:value) { '2010-00-00 00:00:00' }
677
+ it { is_expected.to eq('2010-01-01 00:00:00.000000') }
678
+ end
679
+
665
680
  context 'with time value' do
666
681
  context 'when value is positive' do
667
682
  context 'with sec' do
data/flydata.gemspec CHANGED
@@ -2,16 +2,16 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: flydata 0.4.3 ruby lib
5
+ # stub: flydata 0.5.0 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "flydata"
9
- s.version = "0.4.3"
9
+ s.version = "0.5.0"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib"]
13
13
  s.authors = ["Koichi Fujikawa", "Masashi Miyazaki", "Matthew Luu", "Mak Inada", "Sriram NS"]
14
- s.date = "2015-07-08"
14
+ s.date = "2015-07-24"
15
15
  s.description = "FlyData Agent"
16
16
  s.email = "sysadmin@flydata.com"
17
17
  s.executables = ["fdmysqldump", "flydata", "serverinfo"]
@@ -49,8 +49,10 @@ Gem::Specification.new do |s|
49
49
  "flydata-core/lib/flydata-core/logger.rb",
50
50
  "flydata-core/lib/flydata-core/mysql/command_generator.rb",
51
51
  "flydata-core/lib/flydata-core/mysql/compatibility_checker.rb",
52
+ "flydata-core/lib/flydata-core/option_validator.rb",
52
53
  "flydata-core/lib/flydata-core/query_job.rb",
53
54
  "flydata-core/lib/flydata-core/query_job/redshift.rb",
55
+ "flydata-core/lib/flydata-core/query_job/table_status.rb",
54
56
  "flydata-core/lib/flydata-core/record/record.rb",
55
57
  "flydata-core/lib/flydata-core/redshift/string.rb",
56
58
  "flydata-core/lib/flydata-core/table_def.rb",
@@ -64,6 +66,7 @@ Gem::Specification.new do |s|
64
66
  "flydata-core/spec/logger_spec.rb",
65
67
  "flydata-core/spec/mysql/command_generator_spec.rb",
66
68
  "flydata-core/spec/mysql/compatibility_checker.rb",
69
+ "flydata-core/spec/option_validator_spec.rb",
67
70
  "flydata-core/spec/query_job/redshift_spec.rb",
68
71
  "flydata-core/spec/redshift/string_spec.rb",
69
72
  "flydata-core/spec/spec_helper.rb",
@@ -20,12 +20,19 @@ module Flydata
20
20
  end
21
21
 
22
22
  # Update validity of tables
23
- # table_validity_hash: { "bad_table": "error reason", "good_table": nil }
23
+ # table_validity_hash: {"updated_tables":{ "bad_table": "error reason", "good_table": nil }}
24
24
  # table "bad_table" will be marked invalid with reason "error reason"
25
25
  # table "good table" will be marked valid, that is, clear its error reason if it's set.
26
26
  def update_table_validity(data_entry_id, table_validity_hash)
27
27
  @client.post("/#{@model_name.pluralize}/#{data_entry_id}/update_table_validity", {:headers => {:content_type => :json}}, table_validity_hash.to_json)
28
28
  end
29
+
30
+ # Tells the server that an initial sync has completed
31
+ # stats_hash: {"init_sync_stats":{"Table1": 100, "Table2": 12345}}
32
+ # Sent 100 records for Table1, Sent 12345 records for Table2
33
+ def complete_init_sync(data_entry_id, stats_hash)
34
+ @client.post("/#{@model_name.pluralize}/#{data_entry_id}/complete_init_sync", {:headers => {:content_type => :json}}, stats_hash.to_json)
35
+ end
29
36
  end
30
37
  end
31
38
  end
@@ -48,7 +48,6 @@ module Flydata
48
48
 
49
49
  def stop
50
50
  if running?
51
- log_info_stdout("Stopping Helper.")
52
51
  run_command(stop_cmd)
53
52
  else
54
53
  log_info_stdout("Helper is not running.")
@@ -136,10 +135,8 @@ EOS
136
135
  end
137
136
 
138
137
  def raw_start
139
- log_info_stdout("Starting Helper.")
140
138
  Flydata::Helper::Server.run(
141
139
  DEFAULT_OPTIONS.merge(create_config_with_args))
142
- log_info_stdout("Done")
143
140
  end
144
141
 
145
142
  def create_helper_dirs
@@ -32,14 +32,24 @@ module Flydata
32
32
  dp = flydata.data_port.get
33
33
  AgentCompatibilityCheck.new(dp).check
34
34
 
35
- Flydata::Command::Sync.new.try_mysql_sync
36
-
37
- # Start sender(fluentd) process
38
- log_info_stdout("Starting FlyData Agent sender process.") unless options[:quiet]
39
- raw_start(options)
35
+ fluentd_started = false
36
+ start_fluentd = Proc.new do
37
+ # Start sender(fluentd) process
38
+ log_info_stdout("Starting FlyData Agent sender process.") unless options[:quiet]
39
+ raw_start(options)
40
+
41
+ wait_until_client_ready(options)
42
+ #wait_until_logs_uploaded
43
+ fluentd_started = true
44
+ end
40
45
 
41
- wait_until_client_ready(options)
42
- #wait_until_logs_uploaded
46
+ quiet_option = options[:quiet]
47
+ # surpress messages if fluentd is started in #try_mysql_sync
48
+ options[:quiet] = true
49
+ Flydata::Command::Sync.new.try_mysql_sync(
50
+ binlog_ready_callback: start_fluentd)
51
+ options[:quiet] = quiet_option
52
+ start_fluentd.call unless fluentd_started
43
53
  if options[:show_final_message] && !options[:quiet]
44
54
  data_port = flydata.data_port.get
45
55
  log_info_stdout("Go to your Dashboard! #{flydata.flydata_api_host}/data_ports/#{data_port['id']}")
@@ -28,7 +28,7 @@ module Flydata
28
28
 
29
29
  # for dump.pos file
30
30
  STATUS_PARSING = 'PARSING'
31
- STATUS_WAITING = 'WAITING'
31
+ STATUS_PARSED = 'WAITING' # the value is different from the constant name on purpose for backward compatibility.
32
32
  STATUS_COMPLETE = 'COMPLETE'
33
33
 
34
34
  attr_reader :full_initial_sync, # true if full initial sync
@@ -74,16 +74,27 @@ module Flydata
74
74
  exit 1
75
75
  end
76
76
 
77
- # Start initial sync with check
78
- handle_mysql_sync(tables)
77
+ fluentd_started = false
79
78
 
80
- # Start continuous sync by starting fluentd process
81
- unless opts.no_flydata_start?
82
- log_info_stdout("Starting FlyData Agent...")
83
- Flydata::Command::Sender.new.start(quiet: true)
84
- log_info_stdout(" -> Done")
79
+ quiet_option = false
80
+
81
+ start_fluentd = Proc.new do |binlog_pos|
82
+ # Start continuous sync by starting fluentd process
83
+ unless opts.no_flydata_start?
84
+ log_info_stdout("Starting FlyData Agent...") unless quiet_option
85
+ Flydata::Command::Sender.new.start(quiet: true)
86
+ log_info_stdout(" -> Done") unless quiet_option
87
+ end
88
+ fluentd_started = true
85
89
  end
86
90
 
91
+ quiet_option = true
92
+ # Start initial sync with check
93
+ handle_mysql_sync(tables, binlog_ready_callback: start_fluentd)
94
+ quiet_option = false
95
+
96
+ start_fluentd.call unless fluentd_started
97
+
87
98
  # Show message
88
99
  dashboard_url = "#{flydata.flydata_api_host}/dashboard"
89
100
  redshift_console_url = "#{flydata.flydata_api_host}/redshift_clusters/query/new"
@@ -93,9 +104,9 @@ module Flydata
93
104
 
94
105
  # Public method
95
106
  # - Called from Sender#start/restart
96
- def try_mysql_sync
107
+ def try_mysql_sync(options)
97
108
  # Start initial sync
98
- handle_mysql_sync
109
+ handle_mysql_sync(nil, options)
99
110
  rescue SyncDataEntryError
100
111
  return
101
112
  end
@@ -113,7 +124,7 @@ module Flydata
113
124
  # - Entry method
114
125
  def flush(*tables)
115
126
  begin
116
- flush_buffer_and_stop(tables)
127
+ flush_buffer_and_stop(tables, skip_flush: opts.skip_flush?)
117
128
  rescue ServerDataProcessingTimeout => e
118
129
  ee = ServerDataProcessingTimeout.new("Delayed Data Processing")
119
130
  ee.description = <<EOS
@@ -139,17 +150,27 @@ EOS
139
150
  # - Entry method
140
151
  def reset(*tables)
141
152
  show_purpose_name
153
+
154
+ # Set instance variables
155
+ sync_resumed = set_current_tables(tables, resume: true)
156
+
157
+ message = ''
158
+ if sync_resumed && !tables.empty?
159
+ log_info_stdout <<EOS
160
+ Initial sync is in progress. In this case, you can only reset the initial sync.To reset specific table(s), please resume and complete the initial sync by running the 'flydata start' command first.
161
+ If you'd like to reset the initial sync in progress, run the 'flydata reset' command with no arguments.
162
+ EOS
163
+ return
164
+ end
165
+
142
166
  # Flush client buffer
143
- msg = tables.empty? ? '' : " for these tables : #{tables.join(" ")}"
144
- return unless ask_yes_no("This resets the current sync#{msg}. Are you sure?")
167
+ msg_tables = @input_tables.empty? ? '' : " for these tables : #{@input_tables.join(" ")}"
168
+ msg_sync_type = sync_resumed ? "the current initial sync" : "the current sync"
169
+ return unless ask_yes_no("This resets #{msg_sync_type}#{msg_tables}. Are you sure?")
145
170
  sender = Flydata::Command::Sender.new
146
171
  sender.flush_client_buffer # TODO We should rather delete buffer files
147
172
  sender.stop
148
173
 
149
- # Set instance variables
150
- de = data_entry
151
- set_current_tables(tables)
152
-
153
174
  begin
154
175
  wait_for_server_buffer(timeout: SERVER_DATA_PROCESSING_TIMEOUT, tables: target_tables_for_api)
155
176
  rescue ServerDataProcessingTimeout => e
@@ -166,6 +187,7 @@ EOS
166
187
  end
167
188
 
168
189
  # Cleanup tables on server
190
+ de = data_entry
169
191
  cleanup_sync_server(de, @input_tables) unless opts.client?
170
192
 
171
193
  # Delete local files
@@ -284,17 +306,17 @@ EOS
284
306
 
285
307
  # Initial sync
286
308
 
287
- def handle_mysql_sync(tables = nil)
309
+ def handle_mysql_sync(tables = nil, options = {})
288
310
  de = data_entry
289
311
 
290
312
  # Setup instance variables
291
- sync_resumed = set_current_tables(tables)
313
+ sync_resumed = set_current_tables(tables, resume: true)
292
314
 
293
315
  if sync_resumed
294
- # skip confirmation prompts and resume sync right away. #initial_sync knows
295
- # where to resume from.
316
+ # skip confirmation prompts and resume sync right away.
317
+ # #initial_sync knows where to resume from.
296
318
  log_info_stdout("Resuming the initial sync...")
297
- initial_sync(de, sync_resumed: true)
319
+ initial_sync(de, options.merge(sync_resumed: true))
298
320
  elsif !@new_tables.empty?
299
321
  show_purpose_name
300
322
  unsynced_table_message = "We've noticed that these tables have not been synced yet: #{@new_tables.join(", ")}\n"
@@ -307,7 +329,7 @@ EOS
307
329
  end
308
330
  log_info_stdout(unsynced_table_message)
309
331
  if ask_yes_no("Do you want to run initial sync on all of these tables now?")
310
- initial_sync(de, sync_resumed: false)
332
+ initial_sync(de, options.merge(sync_resumed: false))
311
333
  else
312
334
  #If generate_table_ddl has not been run for these tables, warn user
313
335
  unless @ddl_tables.empty?
@@ -324,13 +346,20 @@ EOS
324
346
  # Load sync information from file
325
347
  validate_initial_sync_status
326
348
  begin
327
- unless @full_initial_sync || opt[:sync_resumed]
328
- # flush is unnecessary for full initial sync or sync resume because in either
329
- # case it's guaranteed that agent is stopped with no leftover buffer.
349
+ if opt[:sync_resumed]
350
+ # parallel cont sync has sent buffer data by now so server buffer
351
+ # data will exist. Skip server data flush.
352
+ flush_buffer_and_stop(target_tables_for_api, skip_flush: true)
353
+ elsif !@full_initial_sync
354
+ # flush leftover data for tables being added.
330
355
  log_info_stdout("Sending the existing buffer data...")
331
- flush_buffer_and_stop(target_tables_for_api)
356
+ flush_buffer_and_stop(target_tables_for_api,
357
+ skip_flush: opts.skip_flush?)
358
+ else
359
+ # flush is unnecessary for full initial sync because it's guaranteed
360
+ # that agent is stopped with no leftover buffer.
332
361
  end
333
- sync_mysql_to_redshift(de)
362
+ sync_mysql_to_redshift(de, opt)
334
363
  rescue ServerDataProcessingTimeout => e
335
364
  ee = ServerDataProcessingTimeout.new("Delayed Data Processing")
336
365
  ee.description = <<EOS
@@ -346,12 +375,12 @@ EOS
346
375
  complete(de)
347
376
  end
348
377
 
349
- def sync_mysql_to_redshift(de)
378
+ def sync_mysql_to_redshift(de, options = {})
350
379
  dp = flydata.data_port.get
351
380
  sync_fm = create_sync_file_manager(de)
352
381
 
353
382
  # Check client condition
354
- if File.exists?(sync_fm.binlog_path) and @full_initial_sync
383
+ if @full_initial_sync && !options[:sync_resumed] && File.exists?(sync_fm.binlog_path)
355
384
  raise "Already synchronized. If you want to do initial sync, run 'flydata sync:reset'"
356
385
  end
357
386
 
@@ -359,14 +388,20 @@ EOS
359
388
  unless Flydata::Preference::DataEntryPreference.conf_exists?(de)
360
389
  Flydata::Command::Conf.new.copy_templates
361
390
  end
362
- generate_mysqldump(de, sync_fm, !opts.dump_stream?) do |mysqldump_io, binlog_pos, db_bytesize|
363
- sync_fm.save_sync_info(@full_initial_sync, target_tables)
391
+
392
+ # initialize stats
393
+ target_tables.each do |table_name|
394
+ sync_fm.save_record_count_stat(table_name, 0)
395
+ end
396
+
397
+ generate_mysqldump(de, sync_fm, !opts.dump_stream?, options) do |mysqldump_io, binlog_pos, db_bytesize|
364
398
  parse_mysqldump_and_send(mysqldump_io, dp, de, sync_fm, binlog_pos, db_bytesize)
365
399
  end
366
- wait_for_mysqldump_processed(dp, de, sync_fm)
400
+ complete_mysqldump_processing(dp, de, sync_fm)
367
401
  end
368
402
 
369
- def generate_mysqldump(de, sync_fm, file_dump = true, &block)
403
+ def generate_mysqldump(de, sync_fm, file_dump = true, options = {},
404
+ &block)
370
405
  # validate parameter
371
406
  %w(host username database).each do |k|
372
407
  if de['mysql_data_entry_preference'][k].to_s.empty?
@@ -375,15 +410,19 @@ EOS
375
410
  end
376
411
  end
377
412
 
378
- # Status is waiting or complete -> skip dump and parse
413
+ # Status is parsed or complete -> skip dump and parse
379
414
  dump_pos_info = sync_fm.load_dump_pos
380
- return if dump_pos_info[:status] == STATUS_WAITING || dump_pos_info[:status] == STATUS_COMPLETE
415
+ if dump_pos_info[:status] == STATUS_PARSED || dump_pos_info[:status] == STATUS_COMPLETE
416
+ initialize_binlog_positions_and_call_callback(
417
+ nil, options[:binlog_ready_callback], sync_fm)
418
+ end
381
419
 
382
420
  # mysqldump file exists -> skip dump
383
421
  dp = flydata.data_port.get
384
422
  fp = sync_fm.dump_file_path
385
423
  if file_dump && File.exists?(fp) && File.size(fp) > 0
386
- log_info_stdout(" -> Skip")
424
+ initialize_binlog_positions_and_call_callback(
425
+ nil, options[:binlog_ready_callback], sync_fm)
387
426
  return call_block_or_return_io(fp, &block)
388
427
  end
389
428
 
@@ -435,10 +474,16 @@ EOM
435
474
 
436
475
  log_info_stdout("Setting binary log position and exporting data from the database.")
437
476
  log_info_stdout("This process can take hours depending on data size and load on your database. Please be patient...")
477
+ sync_fm.save_sync_info(@full_initial_sync, target_tables)
438
478
  if file_dump
479
+ binlog_pos = nil
439
480
  begin
440
- binlog_pos = Flydata::Parser::Mysql::MysqlDumpGeneratorNoMasterData.
441
- new(de['mysql_data_entry_preference'].merge('tables' => target_tables)).dump(fp)
481
+ Flydata::Parser::Mysql::MysqlDumpGeneratorNoMasterData.
482
+ new(de['mysql_data_entry_preference'].merge('tables' => target_tables)).dump(fp) do |_io, _binlog_pos|
483
+ binlog_pos = _binlog_pos
484
+ initialize_binlog_positions_and_call_callback(
485
+ binlog_pos, options[:binlog_ready_callback], sync_fm)
486
+ end
442
487
  log_info_stdout(" -> Database dump done")
443
488
  rescue Exception => e
444
489
  #Catch all exceptions including SystemExit and Interrupt.
@@ -450,13 +495,31 @@ EOM
450
495
  call_block_or_return_io(fp, binlog_pos, &block)
451
496
  else
452
497
  Flydata::Parser::Mysql::MysqlDumpGeneratorNoMasterData.
453
- new(de['mysql_data_entry_preference'].merge('tables' => target_tables)).dump {|io, binlog_pos| block.call(io, binlog_pos, db_bytesize)}
498
+ new(de['mysql_data_entry_preference'].merge('tables' => target_tables)).dump do |io, binlog_pos|
499
+ initialize_binlog_positions_and_call_callback(
500
+ binlog_pos, options[:binlog_ready_callback], sync_fm)
501
+
502
+ block.call(io, binlog_pos, db_bytesize)
503
+ end
454
504
  end
455
505
  else
456
506
  exit 1
457
507
  end
458
508
  end
459
509
 
510
+ def initialize_binlog_positions_and_call_callback(binlog_pos, callback, sync_fm)
511
+ if binlog_pos
512
+ initialize_positions(sync_fm, binlog_pos)
513
+ else
514
+ # no binlog_pos was given because dump was completed in the
515
+ # previous init sync attempt. Position files must be there already
516
+ # so no initialization is necessary.
517
+ end
518
+ if callback
519
+ callback.call(binlog_pos)
520
+ end
521
+ end
522
+
460
523
  # return available disk size(byte)
461
524
  def free_disk_space(dump_path)
462
525
  stat = Sys::Filesystem.stat(dump_path)
@@ -584,7 +647,7 @@ EOM
584
647
  log_info_stdout(" -> Done")
585
648
  #log_info_stdout(" -> Records sent to the server")
586
649
  #log_info_stdout(" -> #{sync_fm.load_stats}")
587
- sync_fm.save_dump_pos(STATUS_WAITING, '', dump_file_size, binlog_pos)
650
+ sync_fm.save_dump_pos(STATUS_PARSED, '', dump_file_size, binlog_pos)
588
651
 
589
652
  if ENV['FLYDATA_BENCHMARK']
590
653
  bench_end_time = Time.now
@@ -593,17 +656,14 @@ EOM
593
656
  end
594
657
  end
595
658
 
596
- def wait_for_mysqldump_processed(dp, de, sync_fm)
659
+ def complete_mysqldump_processing(dp, de, sync_fm)
597
660
  return if ENV['FLYDATA_BENCHMARK']
598
661
 
599
- # Status is not waiting -> skip waiting
662
+ # Status is not parsed -> don't complete
600
663
  dump_pos_info = sync_fm.load_dump_pos
601
- return unless dump_pos_info[:status] == STATUS_WAITING
664
+ return unless dump_pos_info[:status] == STATUS_PARSED
602
665
  binlog_pos = dump_pos_info[:binlog_pos]
603
666
 
604
- wait_for_server_data_processing(
605
- timeout: SERVER_DATA_PROCESSING_TIMEOUT, tables: target_tables_for_api)
606
- sync_fm.save_table_binlog_pos(target_tables, binlog_pos)
607
667
  sync_fm.save_dump_pos(STATUS_COMPLETE, '', -1, binlog_pos)
608
668
  end
609
669
 
@@ -676,19 +736,48 @@ EOM
676
736
  sync_fm = create_sync_file_manager(de)
677
737
  info = sync_fm.load_dump_pos
678
738
  if info[:status] == STATUS_COMPLETE
679
- if @full_initial_sync
680
- sync_fm.save_binlog(info[:binlog_pos])
681
- end
682
- sync_fm.install_table_binlog_files(target_tables)
683
- sync_fm.reset_table_position_files(target_tables)
739
+ send_record_counts(de, sync_fm)
684
740
  sync_fm.delete_dump_file
685
741
  sync_fm.backup_dump_dir
686
742
  else
687
- raise "Initial sync status is not complete. Try running 'flydata sync'."
743
+ raise "Initial sync status is not complete. Try running 'flydata start' again."
688
744
  end
689
745
  sync_fm.close
690
746
  end
691
747
 
748
+ NUM_TABLES_IN_CHUNK = 30
749
+ def send_record_counts(de, sync_fm)
750
+ stats = sync_fm.load_stats
751
+ stats.each_slice(NUM_TABLES_IN_CHUNK) do |slice|
752
+ h = Hash[slice]
753
+ send_record_counts_chunk(de, h)
754
+ end
755
+ end
756
+
757
+ def send_record_counts_chunk(de, stats)
758
+ retry_count = 0
759
+ retry_interval = 3
760
+ begin
761
+ flydata.data_entry.complete_init_sync(de['id'],
762
+ {init_sync_stats: {record_counts:stats}})
763
+ rescue
764
+ retry_count += 1
765
+ raise if retry_count > 3
766
+ sleep retry_interval
767
+ retry_interval *= 2
768
+ end
769
+ end
770
+
771
+ def initialize_positions(sync_fm, binlog_pos)
772
+ sync_fm.save_table_binlog_pos(target_tables, binlog_pos)
773
+
774
+ if @full_initial_sync
775
+ sync_fm.save_binlog(binlog_pos)
776
+ end
777
+ sync_fm.install_table_binlog_files(target_tables)
778
+ sync_fm.reset_table_position_files(target_tables)
779
+ end
780
+
692
781
  def convert_to_flydata_values(mysql_table, values)
693
782
  types = mysql_table.columns.each_value.collect{|col_attrs| col_attrs[:format_type]}
694
783
  types.size.times.collect{|i| FlydataCore::TableDef::MysqlTableDef.convert_to_flydata_value(values[i], types[i]) }
@@ -828,16 +917,15 @@ Thank you for using FlyData!
828
917
 
829
918
  # Sync flush
830
919
 
831
- def flush_buffer_and_stop(tables = [])
920
+ def flush_buffer_and_stop(tables = [], options = {})
832
921
  sender = Flydata::Command::Sender.new
833
922
  sender.flush_client_buffer
834
923
  sender.stop(quiet: true)
835
- if opts.skip_flush?
836
- log_info_stdout("Skip waiting for server data processing.")
837
- else
838
- wait_for_server_data_processing(
839
- timeout: SERVER_DATA_PROCESSING_TIMEOUT, tables: tables)
840
- end
924
+
925
+ return if options[:skip_flush]
926
+
927
+ wait_for_server_data_processing(
928
+ timeout: SERVER_DATA_PROCESSING_TIMEOUT, tables: tables)
841
929
  end
842
930
 
843
931
  # Utility methods
@@ -845,16 +933,19 @@ Thank you for using FlyData!
845
933
  def set_current_tables(input_tables = nil, options = {})
846
934
  de = data_entry
847
935
  sync_fm = create_sync_file_manager(de)
848
- @input_tables = input_tables || []
936
+ sync_info = sync_fm.load_sync_info
937
+ sync_resumed = options[:resume] && !!sync_info
938
+
849
939
  @invalid_tables = de['mysql_data_entry_preference']['invalid_tables'] # tables marked as invalid as a result of previous check
850
940
  @full_tables = options[:include_invalid_tables] ? de['mysql_data_entry_preference']['tables'] + @invalid_tables : de['mysql_data_entry_preference']['tables']
851
941
 
852
942
  @new_tables = sync_fm.get_new_table_list(@full_tables, "pos")
853
943
  @ddl_tables = sync_fm.get_new_table_list(@full_tables, "generated_ddl")
854
944
 
855
- @full_initial_sync = (@new_tables == @full_tables)
856
-
857
- sync_resumed = load_sync_info(sync_fm)
945
+ @input_tables = sync_resumed ? sync_info[:tables] : input_tables
946
+ @input_tables ||= []
947
+ @full_initial_sync = sync_resumed ? sync_info[:initial_sync] :
948
+ (@new_tables == @full_tables)
858
949
 
859
950
  sync_fm.close
860
951
 
@@ -875,19 +966,6 @@ Thank you for using FlyData!
875
966
  end
876
967
  end
877
968
 
878
- def load_sync_info(sync_fm)
879
- sync_resumed = false
880
- # for debug
881
- raise "!AssertionError. set_current_tables needs to be called in advance" if @full_tables.nil?
882
-
883
- if (rs = sync_fm.load_sync_info)
884
- @full_initial_sync = rs[:initial_sync]
885
- @input_tables = rs[:tables]
886
- sync_resumed = true
887
- end
888
- sync_resumed
889
- end
890
-
891
969
  def target_tables
892
970
  if @full_initial_sync
893
971
  @full_tables
@@ -1,4 +1,5 @@
1
1
  require 'mysql2'
2
+ require 'flydata-core/query_job'
2
3
 
3
4
  module Flydata
4
5
  class SyncFileManager
@@ -69,7 +70,7 @@ module Flydata
69
70
  table_positions_dir_path = ENV['FLYDATA_TABLE_POSITIONS'] || File.join(FLYDATA_HOME, 'positions')
70
71
  new_tables = []
71
72
  tables.each do |table|
72
- new_tables << table unless File.exists?(File.join(table_positions_dir_path, "#{table}.#{file_type}"))
73
+ new_tables << table unless File.exists?(File.join(table_positions_dir_path, "#{table}.#{file_type}"))
73
74
  end
74
75
  new_tables
75
76
  end
@@ -184,6 +185,7 @@ module Flydata
184
185
  f = @table_position_files[table_name]
185
186
  seq = f.read
186
187
  seq = seq.to_i + 1
188
+ seq = FlydataCore::QueryJob::SYNC_FIRST_SEQ if seq == 1
187
189
  begin
188
190
  yield(seq)
189
191
  ensure
@@ -108,6 +108,7 @@ module Flydata
108
108
  end
109
109
  context 'with no stream option' do
110
110
  before do
111
+ expect(default_sync_fm).to receive(:save_sync_info).once
111
112
  expect(subject).to receive(:free_disk_space).and_return(disk_byte)
112
113
  expect(File).to receive(:dirname)
113
114
  end
@@ -128,6 +129,7 @@ module Flydata
128
129
  end
129
130
  context 'with stream option' do
130
131
  it 'will export to io' do
132
+ expect(default_sync_fm).to receive(:save_sync_info).once
131
133
  Flydata::Parser::Mysql::MysqlDumpGeneratorNoMasterData.any_instance.should_receive(:dump)
132
134
 
133
135
  subject.send(:generate_mysqldump, default_data_entry, default_sync_fm, false)
@@ -234,11 +234,12 @@ module Flydata
234
234
  end
235
235
  let(:normal_table) { {"table_name"=>"normal_table", "table_type"=>"BASE TABLE", "engine"=>"InnoDB"} }
236
236
  let(:engine_table) { {"table_name"=>"engine_table", "table_type"=>"BASE TABLE", "engine"=>"MEMORY"} }
237
+ let(:blackhole_table) { {"table_name"=>"blackhole_table", "table_type"=>"BASE TABLE", "engine"=>"BLACKHOLE"} }
237
238
  let(:view) { {"table_name"=>"view_table", "table_type"=>"VIEW", "engine"=>nil} }
238
239
  let(:client) { double('client') }
239
240
  let(:subject_object) { MysqlCompatibilityCheck.new(default_data_port,test_data_entry, {}) }
240
241
  let(:error) { FlydataCore::MysqlCompatibilityError }
241
- let(:base_error_msg) { "FlyData does not support VIEW and MEMORY ENGINE table. Remove following tables from data entry: %s" }
242
+ let(:base_error_msg) { "FlyData does not support VIEW and MEMORY,BLACKHOLE STORAGE ENGINE table. Remove following tables from data entry: %s" }
242
243
  subject { subject_object.check_mysql_table_types }
243
244
  before do
244
245
  allow(Mysql2::Client).to receive(:new).and_return(client)
@@ -256,6 +257,11 @@ module Flydata
256
257
  let(:table_list) { [ engine_table ] }
257
258
  it { expect{subject}.to raise_error(error, /#{error_msg}/) }
258
259
  end
260
+ context "where data entry has BLACKHOLE engine table" do
261
+ let(:error_msg) { base_error_msg % blackhole_table['table_name'] }
262
+ let(:table_list) { [ blackhole_table ] }
263
+ it { expect{subject}.to raise_error(error, /#{error_msg}/) }
264
+ end
259
265
  context "where data entry has the VIEW" do
260
266
  let(:error_msg) { base_error_msg % view['table_name'] }
261
267
  let(:table_list) { [ view ] }
@@ -236,8 +236,8 @@ module Flydata
236
236
  end
237
237
  subject { subject_object }
238
238
  context 'when an exception happens in the block' do
239
- let(:first_pos) { 1 }
240
- let(:last_pos) { first_pos + 1 }
239
+ let(:first_pos) { '1.sync' }
240
+ let(:last_pos) { first_pos.to_i + 1 }
241
241
  before do
242
242
  subject.increment_and_save_table_position(test_table) do |seq|
243
243
  expect(seq).to eq first_pos
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: flydata
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.3
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Koichi Fujikawa
@@ -12,7 +12,7 @@ authors:
12
12
  autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2015-07-08 00:00:00.000000000 Z
15
+ date: 2015-07-24 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: rest-client
@@ -486,8 +486,10 @@ files:
486
486
  - flydata-core/lib/flydata-core/logger.rb
487
487
  - flydata-core/lib/flydata-core/mysql/command_generator.rb
488
488
  - flydata-core/lib/flydata-core/mysql/compatibility_checker.rb
489
+ - flydata-core/lib/flydata-core/option_validator.rb
489
490
  - flydata-core/lib/flydata-core/query_job.rb
490
491
  - flydata-core/lib/flydata-core/query_job/redshift.rb
492
+ - flydata-core/lib/flydata-core/query_job/table_status.rb
491
493
  - flydata-core/lib/flydata-core/record/record.rb
492
494
  - flydata-core/lib/flydata-core/redshift/string.rb
493
495
  - flydata-core/lib/flydata-core/table_def.rb
@@ -501,6 +503,7 @@ files:
501
503
  - flydata-core/spec/logger_spec.rb
502
504
  - flydata-core/spec/mysql/command_generator_spec.rb
503
505
  - flydata-core/spec/mysql/compatibility_checker.rb
506
+ - flydata-core/spec/option_validator_spec.rb
504
507
  - flydata-core/spec/query_job/redshift_spec.rb
505
508
  - flydata-core/spec/redshift/string_spec.rb
506
509
  - flydata-core/spec/spec_helper.rb