flydata 0.7.17 → 0.7.18

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 340f31b8701b211b25628d5690237565ff5e46c6
4
- data.tar.gz: 25bbb20fa4a67ca4f25f20917a350e649a224b80
3
+ metadata.gz: 5f6e6673d1001075c0ba38d2c4551aceac38d5b5
4
+ data.tar.gz: ff03d98fe9137ff5e4a10176a0e9b0c000d68c72
5
5
  SHA512:
6
- metadata.gz: ae89812c8d23ad80ec1777ec1a6c39764808d7279ff54db865b7bc1cd090045cef87f426af834aaf50d147f1d7c2d9eef1a16150b6844d79701313f84c7b3a17
7
- data.tar.gz: 4f6c6c97c6c04ebee65780bf31930fc58a6316cf7830bcac5defd63bc8a9db68d3062577d142e09cd1d86f9cc66b7c4bd207d6b5094ae6c4c89f48b983c88d57
6
+ metadata.gz: abedde840231a9460445b0983a7ec8d9e1da3d043792352c626d8202b1792787a150571c502ccf39a3d6ea6f3840a0fd617b96922ae55d0c37c81d86cdd73556
7
+ data.tar.gz: dc1a691a6c514f75339b99ed7c8c46238b8ca6b32bedb3c923e182408382f0bbe13a1e4556c41175876bfa0423a588a9e5af412afb6fe4bd6a7f828f3dedb387
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.7.17
1
+ 0.7.18
@@ -10,14 +10,21 @@ module Postgresql
10
10
  class PGClient
11
11
  PG_CONNECT_TIMEOUT = 10.0
12
12
 
13
- def initialize(dbconf)
13
+ def initialize(dbconf, options = {})
14
14
  @dbconf = FlydataCore::Postgresql::Config.opts_for_pg(dbconf)
15
+ @options = options
15
16
  end
16
17
 
17
18
  attr_reader :dbconf
18
19
 
19
20
  def establish_connection
20
- @conn = create_connection if @conn.nil?
21
+ if @conn.nil?
22
+ @conn = create_connection
23
+ if @options[:notice_receiver]
24
+ @conn.set_notice_receiver{|r| @options[:notice_receiver].call(r) }
25
+ end
26
+ end
27
+ @conn
21
28
  end
22
29
 
23
30
  def query(query, params = [])
@@ -42,6 +49,11 @@ class PGClient
42
49
  : result
43
50
  end
44
51
 
52
+ def exec(query)
53
+ establish_connection
54
+ @conn.exec(query)
55
+ end
56
+
45
57
  def close
46
58
  if @conn
47
59
  @conn.finish
@@ -159,13 +159,14 @@ CREATE TABLE IF NOT EXISTS %s (
159
159
  EOS
160
160
 
161
161
  def self.remove_table_sql(flydata_tabledef, schema_name, options)
162
- return "" if options[:skip_drop_table]
163
162
  table_name = flydata_tabledef[:table_name]
164
163
  redshift_tbl = table_name_for_ddl(table_name, schema_name)
165
164
  sql = ""
166
165
  if options[:backup_postfix]
167
166
  # drop backup table if exists
168
- sql += drop_backup_table_sql(flydata_tabledef, schema_name, options)
167
+ unless options[:skip_drop_table]
168
+ sql += drop_backup_table_sql(flydata_tabledef, schema_name, options)
169
+ end
169
170
  # create an empty table to prevent RENAME TABLE query from failing
170
171
  sql += create_table_sql(flydata_tabledef, schema_name,
171
172
  options.merge(create_table_sql: CREATE_TABLE_IF_NOT_EXISTS_SQL))
@@ -173,6 +174,7 @@ EOS
173
174
  "#{table_name}#{options[:backup_postfix]}", nil)
174
175
  sql += RENAME_TABLE_SQL % [redshift_tbl, backup_tbl]
175
176
  else
177
+ return "" if options[:skip_drop_table]
176
178
  sql += DROP_TABLE_SQL % redshift_tbl
177
179
  end
178
180
  sql
@@ -117,7 +117,16 @@ EOT
117
117
  option.merge!(skip_drop_table: true)
118
118
  end
119
119
 
120
- let(:drop_table_subquery) { ""}
120
+ let(:drop_table_subquery) { <<EOT
121
+ CREATE TABLE IF NOT EXISTS #{schema_prefix}"test_table" (
122
+ "id" int4,
123
+ "age" int8,
124
+ "value" varchar(max),
125
+ PRIMARY KEY ("id")
126
+ ) DISTKEY("id") SORTKEY("id");
127
+ ALTER TABLE #{schema_prefix}"test_table" RENAME TO "test_table_flydata20160125232857";
128
+ EOT
129
+ }
121
130
  it 'should return ddl' do
122
131
  expect(subject).to eq "BEGIN;\n#{create_table_queries}\n#{flydata_ctl_update}\nCOMMIT;\n"
123
132
  end
data/flydata.gemspec CHANGED
Binary file
@@ -9,8 +9,10 @@ module Flydata
9
9
  super
10
10
  end
11
11
 
12
- def run_query(sql)
13
- @client.post("#{@url_path}/query", nil, {redshift_query: {body: sql}})
12
+ def show_default
13
+ # currently one user has one redshift_cluster
14
+ url_path = "#{@url_path}/show_default?password_required=1"
15
+ @client.get(url_path)
14
16
  end
15
17
  end
16
18
  end
@@ -4,6 +4,7 @@ require 'flydata/command_loggable'
4
4
  require 'flydata/source'
5
5
  require 'flydata/command/exclusive_runnable'
6
6
  require 'flydata/preference/data_entry_preference'
7
+ require 'flydata/util/encryptor'
7
8
 
8
9
 
9
10
  module Flydata
@@ -42,14 +43,35 @@ module Flydata
42
43
  log_info_stdout("Your current application name is '#{de['purpose_name']}'")
43
44
  end
44
45
 
45
- def data_entry
46
- @de ||= retrieve_data_entries.first
46
+ def data_entry(refresh: false)
47
+ if @de.nil? || refresh
48
+ @de = retrieve_data_entries.first
49
+ end
47
50
  raise "No data entry exists. Please set one up on the FlyData Console (#{dashboard_url})" unless @de
48
51
  @de
49
52
  end
50
53
 
51
- def source
52
- @source ||= Source.create(data_entry)
54
+ def data_port
55
+ return @data_port if @data_port
56
+ @data_port = flydata.data_port.get
57
+ end
58
+
59
+ def redshift_cluster
60
+ return @redshift_cluster if @redshift_cluster
61
+ @redshift_cluster = flydata.redshift_cluster.show_default
62
+ @redshift_cluster['password'] = Flydata::Util::Encryptor.decrypt(
63
+ @redshift_cluster['encrypted_password'],
64
+ data_port['key'],
65
+ 'redshift_cluster password')
66
+ @redshift_cluster
67
+ end
68
+
69
+ def source(refresh: false)
70
+ if @source.nil? || refresh
71
+ @source = nil
72
+ @source = Source.create(data_entry(refresh: refresh))
73
+ end
74
+ @source
53
75
  end
54
76
 
55
77
  def register_crontab
@@ -9,7 +9,9 @@ module Flydata
9
9
  Slop.new do
10
10
  on 'n', 'no-daemon', 'Start FlyData agent as a regular program'
11
11
  on 'e', 'no-email', 'Skip sending init-sync-start notification email'
12
+ on 'y', 'yes', 'Skip command prompt assuming yes to all questions. Use this for batch operation.'
12
13
  on 'force-run', 'Run forcefully, ignoring exclusive run info'
14
+ on 'auto-create', 'Create tables on Redshift automatically'
13
15
  end
14
16
  end
15
17
  def start(options_or_show_final_message = {show_final_message: true}) # For backward compatibility. Use only as options going forward
@@ -66,7 +68,10 @@ module Flydata
66
68
  options[:quiet] = true
67
69
  Flydata::Command::Sync.new.try_initial_sync(
68
70
  source_pos_ready_callback: start_fluentd,
69
- no_email: opts.no_email?)
71
+ no_email: opts.no_email?,
72
+ auto_create: opts.auto_create?,
73
+ slop_opts: opts,
74
+ )
70
75
  options[:quiet] = quiet_option
71
76
  start_fluentd.call unless fluentd_started
72
77
  if options[:show_final_message] && !options[:quiet]
@@ -12,6 +12,7 @@ require 'flydata/helpers'
12
12
  require 'flydata/json'
13
13
  require 'flydata/queueable_thread'
14
14
  require 'flydata/output/forwarder'
15
+ require 'flydata/output/ddl_runner'
15
16
  require 'flydata/parser'
16
17
  require 'flydata/preference/data_entry_preference'
17
18
  require 'flydata/sync_file_manager'
@@ -20,6 +21,7 @@ require 'flydata-core/table_def'
20
21
  require 'flydata/table_ddl'
21
22
  require 'flydata/event/api_event_sender'
22
23
  require 'flydata-core/event/event_dictionary'
24
+ require 'flydata-core/record/record'
23
25
  require 'sigdump/setup'
24
26
  #require 'ruby-prof' # to enable profiling, also set the class' RUN_PROFILE
25
27
 
@@ -34,6 +36,11 @@ module Flydata
34
36
  INSERT_PROGRESS_INTERVAL = 1000
35
37
  SERVER_DATA_PROCESSING_TIMEOUT = 3600 # seconds
36
38
 
39
+ # for sync_info file auto_create_status
40
+ AUTO_CREATE_STATUS_START = 'START'
41
+ #AUTO_CREATE_STATUS_SENT_DDL = 'SENT_DDL'
42
+ AUTO_CREATE_STATUS_CREATED_TABLES = 'CREATED_TABLES'
43
+
37
44
  # for dump.pos file
38
45
  STATUS_START = 'START' # only :source_pos is available at the begining of parse
39
46
  STATUS_PARSING = 'PARSING'
@@ -74,9 +81,13 @@ module Flydata
74
81
  # Public method
75
82
  # - Called from Sender#start/restart
76
83
  def try_initial_sync(options)
84
+ @opts = options[:slop_opts] if options[:slop_opts]
77
85
  handle_initial_sync(options) if source.sync.supported?
78
86
  rescue Source::UnsupportedSourceError
79
87
  return
88
+ rescue => e
89
+ log_error("[error] Unexpcted error happend during inital sync. error:#{e}")
90
+ raise e
80
91
  end
81
92
 
82
93
  # Command: flydata sync:flush
@@ -130,7 +141,7 @@ EOS
130
141
  tables = []
131
142
  reset_init = false
132
143
  end
133
- sync_resumed = set_current_tables(tables, resume: !opts[:all])
144
+ sync_resumed, auto_create = set_current_tables(tables, resume: !opts[:all])
134
145
  target_tables = opts[:all] ? @full_tables : @input_tables
135
146
  target_append_only_tables = target_tables & @append_only_tables
136
147
  target_full_sync_tables = target_tables - @append_only_tables
@@ -607,7 +618,10 @@ EOS
607
618
  end
608
619
 
609
620
  # Setup instance variables
610
- sync_resumed = set_current_tables(nil, resume: true)
621
+ # Need to try the sync for all tables if auto create mode is on
622
+ include_all_tables = !!options[:auto_create]
623
+ sync_resumed, auto_create = set_current_tables(nil, resume: true, include_all_tables: include_all_tables)
624
+ options[:auto_create] ||= auto_create
611
625
 
612
626
  if sync_resumed
613
627
  # skip confirmation prompts and resume sync right away.
@@ -617,7 +631,7 @@ EOS
617
631
  elsif !@unsynced_tables.empty?
618
632
  show_purpose_name
619
633
  unsynced_table_message = "We've noticed that these tables have not been synced yet: #{@unsynced_tables.join(", ")}\n"
620
- unless @no_ddl_generated_tables.empty?
634
+ if !@no_ddl_generated_tables.empty? && !options[:auto_create]
621
635
  unsynced_table_message <<
622
636
  " WARNING: We've noticed that at least one of these tables have not had their DDL generated yet.\n" +
623
637
  " We recommend you run our 'flydata sync:generate_table_ddl > create_table.sql'\n" +
@@ -655,7 +669,7 @@ EOS
655
669
  _reset(recover_cmd, reset_client_only: false, delete_tbl_ddl: false)
656
670
 
657
671
  # Setup instance variables again
658
- sync_resumed = set_current_tables(nil, resume: true)
672
+ set_current_tables(nil, resume: true)
659
673
  end
660
674
 
661
675
  begin
@@ -675,6 +689,7 @@ EOS
675
689
  perform_initial_sync(de, opt)
676
690
  rescue ServerDataProcessingTimeout => e
677
691
  ee = ServerDataProcessingTimeout.new("Delayed Data Processing")
692
+ log_error("[error] Delayed Data Processing. Please check and resume the sync by running 'flydata start'")
678
693
  ee.description = <<EOS
679
694
  Data processing is taking more than expected. Please contact support@flydata.com to check the system status.
680
695
  Once checked, you can resume sync with the following command.
@@ -684,6 +699,14 @@ EOS
684
699
  EOS
685
700
  ee.set_backtrace e.backtrace
686
701
  raise ee
702
+ rescue AgentInternalError => e
703
+ case e.code
704
+ when AgentInternalError::NO_VALID_TABLE_ERR
705
+ # Proceed normal restart when no valid table exists for initial sync
706
+ return
707
+ else
708
+ raise e
709
+ end
687
710
  end
688
711
  complete(de)
689
712
  end
@@ -713,7 +736,7 @@ EOS
713
736
  dump_pos_info = sync_fm.load_dump_pos
714
737
  if dump_pos_info[:status] == STATUS_PARSED || dump_pos_info[:status] == STATUS_COMPLETE
715
738
  initialize_source_positions_and_call_callback(
716
- nil, options[:source_pos_ready_callback], sync_fm)
739
+ nil, options[:source_pos_ready_callback], sync_fm, options)
717
740
  return
718
741
  end
719
742
 
@@ -721,7 +744,7 @@ EOS
721
744
  fp = sync_fm.dump_file_path
722
745
  if file_dump && File.exists?(fp) && File.size(fp) > 0
723
746
  initialize_source_positions_and_call_callback(
724
- nil, options[:source_pos_ready_callback], sync_fm)
747
+ nil, options[:source_pos_ready_callback], sync_fm, options)
725
748
  return call_block_or_return_io(fp, &dump_ready_callback)
726
749
  end
727
750
 
@@ -770,9 +793,9 @@ EOM
770
793
  end
771
794
  end
772
795
 
773
- log_info_stdout("Setting binary log position and exporting data from the database.")
774
- log_info_stdout("This process can take hours depending on data size and load on your database. Please be patient...")
775
- sync_fm.save_sync_info(@full_initial_sync, target_tables)
796
+ if sync_fm.load_sync_info.nil?
797
+ sync_fm.save_sync_info(@full_initial_sync, target_tables, (options[:auto_create] ? AUTO_CREATE_STATUS_START : nil))
798
+ end
776
799
  # This notification will be uncommented after init_sync_finish email integration is released
777
800
  unless options[:sync_resumed]
778
801
  FlydataCore::Event::ApiEventSender.instance.send_event(
@@ -782,12 +805,19 @@ EOM
782
805
  data_entry_id: de['id'],
783
806
  data_port_id: de['data_port_id'])
784
807
  end
808
+
809
+ handle_auto_create(dp, de, sync_fm)
810
+
811
+ log_info_stdout("Setting binary log position and exporting data from the database.")
812
+ log_info_stdout("This process can take hours depending on data size and load on your database. Please be patient...")
813
+
785
814
  if file_dump
786
815
  source_pos = nil
816
+
787
817
  begin
788
818
  context.dump(target_tables, fp) do |_io, _source_pos|
789
819
  source_pos = _source_pos
790
- initialize_source_positions_and_call_callback(source_pos, options[:source_pos_ready_callback], sync_fm)
820
+ initialize_source_positions_and_call_callback(source_pos, options[:source_pos_ready_callback], sync_fm, options)
791
821
  end
792
822
  log_info_stdout(" -> Database dump done")
793
823
  rescue Exception => e
@@ -800,7 +830,7 @@ EOM
800
830
  call_block_or_return_io(fp, source_pos, &dump_ready_callback)
801
831
  else
802
832
  context.dump(target_tables) do |io, source_pos|
803
- initialize_source_positions_and_call_callback(source_pos, options[:source_pos_ready_callback], sync_fm)
833
+ initialize_source_positions_and_call_callback(source_pos, options[:source_pos_ready_callback], sync_fm, options)
804
834
  dump_ready_callback.call(io, source_pos)
805
835
  end
806
836
  end
@@ -809,9 +839,41 @@ EOM
809
839
  end
810
840
  end
811
841
 
812
- def initialize_source_positions_and_call_callback(source_pos, callback, sync_fm)
842
+ def handle_auto_create(dp, de, sync_fm)
843
+ auto_create_status = sync_fm.load_sync_info[:auto_create_status]
844
+ return if auto_create_status.nil?
845
+ loop do
846
+ case auto_create_status
847
+ when AUTO_CREATE_STATUS_START
848
+ log_info_stdout("Creating tables on Redshift...")
849
+ ddl_context = source.sync_generate_table_ddl(dp)
850
+ flydata_tabledefs = generate_and_run_table_ddl(ddl_context, de, auto_create: true) || []
851
+ if flydata_tabledefs.empty?
852
+ log_error_stderr("No valid table for sync. Please check table errors on Dashboard - tables:#{target_tables.join(", ")}")
853
+ sync_fm.delete_sync_info # No need to keep sync.info
854
+ raise AgentInternalError.new("No valid tables for sync - tables:#{target_tables.join(", ")}",
855
+ AgentInternalError::NO_VALID_TABLE_ERR)
856
+ end
857
+ # no error tables to create ddl
858
+ table_names = flydata_tabledefs.collect{|d| d[:table_name]}
859
+ # Refresh source and data_entry to refrect invalid tables
860
+ # Set tables whose ddl is sent only as input tables because new tables may be added during creating target tables
861
+ auto_create_status = AUTO_CREATE_STATUS_CREATED_TABLES
862
+ sync_fm.save_sync_info(@full_initial_sync, table_names, auto_create_status)
863
+ source(refresh: true)
864
+ set_current_tables(table_names, resume: true)
865
+ when AUTO_CREATE_STATUS_CREATED_TABLES
866
+ log_info_stdout("Tables are created on Redshift... tables:#{target_tables.join(", ")}")
867
+ break
868
+ else
869
+ raise "Invalid auto_create_status in dump/sync.info filedump."
870
+ end
871
+ end
872
+ end
873
+
874
+ def initialize_source_positions_and_call_callback(source_pos, callback, sync_fm, options = {})
813
875
  if source_pos
814
- initialize_positions(sync_fm, source_pos)
876
+ initialize_positions(sync_fm, source_pos, options)
815
877
  else
816
878
  # no source_pos was given because dump was completed in the
817
879
  # previous init sync attempt. Position files must be there already
@@ -1152,14 +1214,14 @@ EOM
1152
1214
  end
1153
1215
  end
1154
1216
 
1155
- def initialize_positions(sync_fm, source_pos)
1217
+ def initialize_positions(sync_fm, source_pos, options)
1156
1218
  sync_fm.save_table_source_pos(target_tables, source_pos)
1157
1219
 
1158
1220
  if @full_initial_sync
1159
1221
  sync_fm.save_source_pos(source_pos)
1160
1222
  end
1161
1223
  sync_fm.install_table_source_pos_files(target_tables)
1162
- sync_fm.reset_table_position_files(target_tables)
1224
+ sync_fm.reset_table_position_files(target_tables, options)
1163
1225
  end
1164
1226
 
1165
1227
  def convert_to_flydata_values(source_table, values)
@@ -1226,7 +1288,53 @@ EOM
1226
1288
  end
1227
1289
  end
1228
1290
 
1229
- def generate_flydata_tabledefs(context, de)
1291
+ # Generate and send table ddl to a data server
1292
+ def generate_and_send_table_ddl(context, de)
1293
+ flydata_tabledefs = generate_flydata_tabledefs(context, de)
1294
+ return flydata_tabledefs if flydata_tabledefs.nil? || flydata_tabledefs.empty?
1295
+
1296
+ #TODO: Check per-table position file and raise an error if pos > 0
1297
+
1298
+ base_record = {
1299
+ table_rev: 1,
1300
+ seq: SyncFileManager::INITIAL_SYNC_SEQ,
1301
+ respect_order: true,
1302
+ type: 'initial_sync',
1303
+ src_pos: '-',
1304
+ v: FlydataCore::Record::V2,
1305
+ }
1306
+
1307
+ records = flydata_tabledefs.collect do |flydata_tabledef|
1308
+ table_name = flydata_tabledef[:table_name]
1309
+ ddl_options = flydata_tabledef[:ddl_options] || {}
1310
+ base_record.merge(
1311
+ flydata_tabledef: flydata_tabledef.dup,
1312
+ skip_drop_table: ddl_options[:skip_drop_table],
1313
+ table_name: table_name
1314
+ )
1315
+ end
1316
+
1317
+ forwarder = build_forwarder(context.dp, de)
1318
+ forwarder.emit(records)
1319
+ forwarder.flush
1320
+
1321
+ flydata_tabledefs
1322
+ ensure
1323
+ if forwarder
1324
+ forwarder.close rescue nil
1325
+ end
1326
+ end
1327
+
1328
+ # Generate and run table ddl on Redshift
1329
+ def generate_and_run_table_ddl(context, de, options = {})
1330
+ flydata_tabledefs = generate_flydata_tabledefs(context, de, options)
1331
+ return flydata_tabledefs if flydata_tabledefs.nil? || flydata_tabledefs.empty?
1332
+ ddl_runner = Flydata::Output::RedshiftDDLRunner.new(redshift_cluster, de)
1333
+ ddl_runner.run_ddls(flydata_tabledefs)
1334
+ flydata_tabledefs
1335
+ end
1336
+
1337
+ def generate_flydata_tabledefs(context, de, options = {})
1230
1338
  schema_name = (de['schema_name'] || nil)
1231
1339
  tables = opts.all_tables? ? @full_tables : (@input_tables.empty? ? @unsynced_tables : @input_tables)
1232
1340
 
@@ -1258,25 +1366,32 @@ EOM
1258
1366
  table_validity_hash = Hash.new {|h,k| h[k] = {}}
1259
1367
  tables_without_error = tables
1260
1368
  unless error_list.empty?
1261
- log_error_stderr("\n\nERROR: FlyData Sync will not sync the following table(s) due to an error.")
1369
+ unless options[:auto_create]
1370
+ log_error_stderr("\n\nERROR: FlyData Sync will not sync the following table(s) due to an error.")
1371
+ end
1372
+
1262
1373
  group_error = error_list.group_by {|d| d[:error]}
1374
+ error_info_hash = {}
1263
1375
  group_error.each_key do |error|
1264
1376
  group_error[error].each do |hash|
1265
1377
  if table = hash[:table]
1378
+ error_info_hash[table] = error
1266
1379
  log_error_stderr(" - #{table} (#{error})")
1267
1380
  table_validity_hash[table][TableAttribute::INVALID_TABLE_REASON] = error
1268
1381
  end
1269
1382
  end
1270
1383
  end
1271
- log_error_stderr(<<EOS)
1272
- To sync these table(s), please fix the error(s) and run "flydata sync:generate_table_ddl" again.
1273
- EOS
1274
- tables_without_error = tables - error_list.inject([]){|arr, err| arr << err[:table] if err[:table]}
1275
- unless tables_without_error.empty?
1276
- log_error_stderr(<<EOS)
1277
1384
 
1278
- The other tables are ready to sync. To start sync, run the generated script on the Redshift cluster and run "flydata start".
1279
- EOS
1385
+ if options[:auto_create]
1386
+ log_error_stderr("[error] FlyData Sync will not sync the following table(s) due to an error. #{error_info_hash.collect{|k,v| "#{k}(#{v})"}.join(", ")}")
1387
+ log_warn_stderr("To sync these table(s), please fix the error(s) and run \"flydata restart --auto-create\" again.")
1388
+ else
1389
+ log_error_stderr("To sync these table(s), please fix the error(s) and run \"flydata sync:generate_table_ddl\" again.")
1390
+ end
1391
+
1392
+ tables_without_error = tables - error_list.inject([]){|arr, err| arr << err[:table] if err[:table]}
1393
+ unless tables_without_error.empty? || options[:auto_create]
1394
+ log_error_stderr("The other tables are ready to sync. To start sync, run the generated script on the Redshift cluster and run \"flydata start\".")
1280
1395
  end
1281
1396
  end
1282
1397
 
@@ -1346,7 +1461,11 @@ Thank you for using FlyData!
1346
1461
  sync_fm = create_sync_file_manager
1347
1462
  sync_info = sync_fm.load_sync_info
1348
1463
  sync_resumed = options[:resume] && !!sync_info
1349
-
1464
+ auto_create = if sync_resumed
1465
+ !!sync_info[:auto_create_status]
1466
+ else
1467
+ false
1468
+ end
1350
1469
  table_lists = source.sync.table_lists
1351
1470
 
1352
1471
  # `full_tables` will either
@@ -1383,13 +1502,14 @@ Thank you for using FlyData!
1383
1502
 
1384
1503
  verify_input_tables(@input_tables, @full_tables)
1385
1504
 
1386
- sync_resumed
1505
+ [sync_resumed, auto_create]
1387
1506
  end
1388
1507
 
1389
1508
  def validate_initial_sync_status
1390
1509
  sync_fm = create_sync_file_manager
1391
1510
  dump_pos_info = sync_fm.load_dump_pos
1392
- sync_info_exists = !!sync_fm.load_sync_info
1511
+ sync_info = sync_fm.load_sync_info || {}
1512
+ sync_info_exists = !sync_info.empty?
1393
1513
  dump_file_deleted = !File.exists?(sync_fm.dump_file_path)
1394
1514
  sync_fm.close
1395
1515
 
@@ -1399,7 +1519,7 @@ Thank you for using FlyData!
1399
1519
  end
1400
1520
 
1401
1521
  # check if the previous initial sync was aborted during dump.
1402
- sync_info_exists && dump_file_deleted
1522
+ sync_info_exists && sync_info[:auto_create_status] == AUTO_CREATE_STATUS_CREATED_TABLES && dump_file_deleted
1403
1523
  end
1404
1524
 
1405
1525
  def target_tables
@@ -1423,8 +1543,8 @@ Thank you for using FlyData!
1423
1543
  end
1424
1544
  end
1425
1545
 
1426
- def data_entry
1427
- unless @sync_de
1546
+ def data_entry(refresh: false)
1547
+ if @sync_de.nil? || refresh
1428
1548
  @sync_de = super
1429
1549
  source.sync.setup # updates the data entry contents
1430
1550
  end
@@ -39,4 +39,16 @@ end
39
39
  class DumpParseError < AgentError
40
40
  end
41
41
 
42
+ class AgentInternalError < AgentError
43
+ NO_VALID_TABLE_ERR = 101
44
+ UNKNOWN_ERR = 999
45
+
46
+ def initialize(message, code = UNKNOWN_ERR)
47
+ super("#{message} code:#{code}")
48
+ @code = code
49
+ end
50
+
51
+ attr_reader :code
52
+ end
53
+
42
54
  end
@@ -0,0 +1,70 @@
1
+ require 'flydata-core/postgresql/pg_client'
2
+ require 'flydata-core/table_def/sync_redshift_table_def'
3
+ require 'flydata/command_loggable'
4
+
5
+ module Flydata
6
+ module Output
7
+ class RedshiftDDLRunner
8
+ include CommandLoggable
9
+
10
+ def initialize(dbconf, de)
11
+ @pg_client = FlydataCore::Postgresql::PGClient.new(dbconf, notice_receiver: Proc.new{|result|
12
+ log_info_stdout(" #{result.error_message.to_s.strip}")
13
+ })
14
+ @schema_name = de['schema_name']
15
+ @schema_name = nil if @schema_name.to_s.strip.empty?
16
+ end
17
+
18
+ def run_ddls(flydata_tabledefs)
19
+ @pg_client.establish_connection
20
+ create_schema
21
+ create_ctl_tables
22
+ flydata_tabledefs.each.with_index(1) do |flydata_tabledef, index|
23
+ run_ddl(flydata_tabledef, index, flydata_tabledefs.size)
24
+ end
25
+ ensure
26
+ @pg_client.close
27
+ end
28
+
29
+ private
30
+
31
+ def create_schema
32
+ return unless @schema_name
33
+ query = FlydataCore::TableDef::SyncRedshiftTableDef.create_schema_sql(@schema_name)
34
+ begin
35
+ @pg_client.exec(query)
36
+ rescue PG::InsufficientPrivilege
37
+ # Ignore
38
+ end
39
+ end
40
+
41
+ def create_ctl_tables
42
+ query = FlydataCore::TableDef::SyncRedshiftTableDef.create_flydata_ctl_table_sql(@schema_name)
43
+ log_info_stdout(" -> Creating flydata ctl tables... schema:\"#{@schema_name}\"")
44
+ log_info("query:\n#{query}")
45
+ @pg_client.exec(query)
46
+ end
47
+
48
+ def run_ddl(flydata_tabledef, index, total)
49
+ ddl = FlydataCore::TableDef::SyncRedshiftTableDef.from_flydata_tabledef(
50
+ flydata_tabledef,
51
+ flydata_tabledef[:ddl_options].merge(flydata_ctl_table: false))
52
+ log_info_stdout(" -> Creating \"#{flydata_tabledef[:table_name]}\"... (#{index}/#{total})")
53
+ log_info("query:\n#{ddl}")
54
+ @pg_client.exec(ddl)
55
+ rescue PG::DependentObjectsStillExist => e
56
+ log_error_stderr(" [error] #{e.to_s.strip}")
57
+ # ignore this error
58
+ end
59
+
60
+ =begin
61
+ def wrap(query)
62
+ query = query.strip
63
+ query = "BEGIN;\n#{query}" unless /^BEGIN;/i.match(query)
64
+ query = "#{query}\nEND;" unless /END;$/i.match(query)
65
+ query
66
+ end
67
+ =end
68
+ end
69
+ end
70
+ end
@@ -11,6 +11,8 @@ module Flydata
11
11
  BACKUP_DIR = ENV['FLYDATA_BACKUP'] || File.join(FLYDATA_HOME, 'backup')
12
12
  TABLE_POSITIONS_DIR = FLYDATA_TABLE_POSITIONS_DIR
13
13
 
14
+ INITIAL_SYNC_SEQ = 1
15
+
14
16
  def initialize(data_entry, source = nil)
15
17
  @data_entry = data_entry
16
18
  @source = source #for Source dependent objects
@@ -171,10 +173,10 @@ module Flydata
171
173
  end
172
174
 
173
175
  # table files
174
- def reset_table_position_files(tables)
176
+ def reset_table_position_files(tables, options = {})
175
177
  tables.each do |table_name|
176
178
  file = File.join(table_positions_dir_path, table_name + ".pos")
177
- File.open(file, "w") {|f| f.write('0') }
179
+ File.open(file, "w") {|f| f.write("0") }
178
180
  end
179
181
  end
180
182
 
@@ -204,7 +206,7 @@ module Flydata
204
206
 
205
207
  def increment_table_position(seq)
206
208
  seq = seq.to_i + 1
207
- seq = FlydataCore::QueryJob::SYNC_FIRST_SEQ if seq == 1
209
+ seq = "#{seq}.sync" if [1,2].include?(seq)
208
210
  seq
209
211
  end
210
212
 
@@ -223,8 +225,12 @@ module Flydata
223
225
  # logical transaction ends
224
226
  end
225
227
 
228
+ def table_position_file_path(table_name)
229
+ File.join(table_positions_dir_path, table_name + ".pos")
230
+ end
231
+
226
232
  def open_table_position_file(table_name)
227
- file = File.join(table_positions_dir_path, table_name + ".pos")
233
+ file = table_position_file_path(table_name)
228
234
  retry_count = 0
229
235
  begin
230
236
  @table_position_files[table_name] ||= (f = File.open(file, File::RDWR); f.sync = true; f)
@@ -253,6 +259,15 @@ module Flydata
253
259
  f.rewind
254
260
  end
255
261
 
262
+ # This doen't cache the File object
263
+ def save_table_positions(table_names, seq)
264
+ table_names = Array(table_names)
265
+ table_names.each do |table_name|
266
+ file_path = table_position_file_path(table_name)
267
+ File.write(file_path, seq.to_s)
268
+ end
269
+ end
270
+
256
271
  def get_table_position(table_name)
257
272
  f = open_table_position_file(table_name)
258
273
  seq = f.read
@@ -269,21 +284,41 @@ module Flydata
269
284
  FLYDATA_LOCK
270
285
  end
271
286
 
287
+ # "sync.info" file includes initial sync information for resuming
288
+ # - initial_sync: True if initial sync is full initial sync
289
+ # - tables: target tables for initial sync
290
+ # - auto_create_status: START|SENT_DDL|CREATED_TABLES
291
+
272
292
  def sync_info_file
273
293
  File.join(dump_dir, "sync.info")
274
294
  end
275
295
 
276
- def save_sync_info(initial_sync, tables)
296
+ def delete_sync_info
297
+ FileUtils.rm(sync_info_file)
298
+ end
299
+
300
+ def save_sync_info(initial_sync, tables, auto_create_status = nil)
277
301
  File.open(sync_info_file, "w") do |f|
278
- f.write([initial_sync, tables.join(" ")].join("\t"))
302
+ content = {
303
+ initial_sync: initial_sync,
304
+ tables: tables,
305
+ auto_create_status: auto_create_status,
306
+ }.to_json
307
+ f.write(content)
279
308
  end
280
309
  end
281
310
 
282
311
  def load_sync_info
283
312
  return nil unless File.exists?(sync_info_file)
284
- items = File.open(sync_info_file, 'r').readline.split("\t")
285
- { initial_sync: (items[0] == 'true'),
286
- tables: items[1].split(" ") }
313
+ line = File.open(sync_info_file, 'r').readline
314
+ begin
315
+ JSON.parse(line, symbolize_names: true)
316
+ rescue
317
+ # For compatibility
318
+ items = line.split("\t")
319
+ { initial_sync: (items[0] == 'true'),
320
+ tables: items[1].split(" ") }
321
+ end
287
322
  end
288
323
 
289
324
  def get_table_source_pos_init(table_name)
@@ -116,6 +116,7 @@ module Flydata
116
116
  context 'with no stream option' do
117
117
  before do
118
118
  expect(default_sync_fm).to receive(:save_sync_info).once
119
+ expect(default_sync_fm).to receive(:load_sync_info).and_return(nil, {})
119
120
  expect(subject).to receive(:free_disk_space).and_return(disk_byte)
120
121
  expect(File).to receive(:dirname)
121
122
  end
@@ -139,6 +140,7 @@ module Flydata
139
140
  context 'with stream option' do
140
141
  it 'will export to io' do
141
142
  expect(default_sync_fm).to receive(:save_sync_info).once
143
+ expect(default_sync_fm).to receive(:load_sync_info).and_return(nil, {})
142
144
  expect_any_instance_of(Flydata::SourceMysql::Parser::MysqlDumpGeneratorNoMasterData).to receive(:dump)
143
145
 
144
146
  subject.send(:generate_source_dump, default_data_entry, default_sync_fm, false)
@@ -176,8 +176,13 @@ EOT
176
176
  records = rows.collect do |row|
177
177
  record = row.kind_of?(Hash) && row.keys.include?(:row) ? row : { row: row }
178
178
  @seq += 1
179
+ seq = if [1,2].include?(@seq.to_i)
180
+ "#{@seq.to_i}.sync"
181
+ else
182
+ @seq
183
+ end
179
184
  record.merge({ :type=>type, :table_name=>table, :respect_order=>true,
180
- :seq=>@seq, :src_pos=>"#{binlog_file}\t#{position}", :table_rev=>1,
185
+ :seq=>seq, :src_pos=>"#{binlog_file}\t#{position}", :table_rev=>1,
181
186
  :v=>2 })
182
187
  end
183
188
  expect_emitted_records(event, records)
@@ -383,7 +388,7 @@ EOT
383
388
  respect_order: true,
384
389
  src_pos: "mysql-bin.000048\t689",
385
390
  table_rev: 2, # increment revision
386
- seq: 2,
391
+ seq: "2.sync",
387
392
  v: flydata_record_version,
388
393
  actions: [{
389
394
  action: :add_column, column: "sum", :type=>'int4(11)', :query=>'add column sum integer'}],
@@ -399,7 +404,7 @@ EOT
399
404
  respect_order: true,
400
405
  src_pos: "mysql-bin.000048\t800",
401
406
  table_rev: 2, # increment revision
402
- seq: 2,
407
+ seq: "2.sync",
403
408
  v: flydata_record_version,
404
409
  actions: [{
405
410
  action: :drop_column, column: "sum", :query=>'drop column sum'}],
@@ -416,7 +421,7 @@ EOT
416
421
  respect_order: true,
417
422
  src_pos: "mysql-bin.000048\t#{337 - 217}",
418
423
  table_rev: 1,
419
- seq: 2,
424
+ seq: "2.sync",
420
425
  v: flydata_record_version,
421
426
  actions: [{
422
427
  action: :add_index,
@@ -0,0 +1,80 @@
1
+ # coding: utf-8
2
+ require 'spec_helper'
3
+ require 'flydata/output/ddl_runner'
4
+
5
+ module Flydata
6
+ module Output
7
+ describe RedshiftDDLRunner do
8
+ let(:pg_client) { double('pg_client') }
9
+ let(:dbconf) {
10
+ {
11
+ host: 'dummy-host',
12
+ port: 54399,
13
+ username: 'dummy-user',
14
+ password: 'dummy-pass',
15
+ dbname: 'dummy-db',
16
+ }
17
+ }
18
+ let(:schema_name) { nil }
19
+ let(:de) {
20
+ {'schema_name' => schema_name}
21
+ }
22
+ let(:subject_object) do
23
+ described_class.new(dbconf, de)
24
+ end
25
+
26
+ before do
27
+ allow(FlydataCore::Postgresql::PGClient).to receive(:new).and_return(pg_client)
28
+ allow(pg_client).to receive(:establish_connection)
29
+ allow(pg_client).to receive(:exec)
30
+ allow(pg_client).to receive(:close)
31
+ end
32
+
33
+ describe '.run_ddls' do
34
+ let(:flydata_tabledef) {
35
+ {:table_name=>"test_table",
36
+ :columns=>
37
+ [{:column=>"id",
38
+ :type=>"int4(11)",
39
+ :auto_increment=>true,
40
+ :not_null=>true,
41
+ :primary_key=>true},
42
+ {:column=>"value", :type=>"int4(11)", :default=>nil}],
43
+ :default_charset=>"UTF_8",
44
+ :src_ddl=>
45
+ "CREATE TABLE `test_table` ( `id` int(11) NOT NULL AUTO_INCREMENT, `value` int(11) DEFAULT NULL, PRIMARY KEY (`id`)) ENGINE=InnoDB AUTO_INCREMENT=15 DEFAULT CHARSET=utf8;",
46
+ :pk_override=>nil,
47
+ :ddl_options=>
48
+ {:flydata_ctl_table=>true,
49
+ :schema_name=>schema_name,
50
+ :ctl_only=>false,
51
+ :skip_drop_table=>false,
52
+ :skip_primary_key_check=>false}}
53
+ }
54
+ let(:flydata_tabledefs) { [ flydata_tabledef ] }
55
+ subject { subject_object.run_ddls(flydata_tabledefs) }
56
+
57
+ context 'schema name is nil(not set)' do
58
+ it 'skips creating schema' do
59
+ expect(pg_client).to receive(:exec) do |query|
60
+ expect(query).not_to match(/CREATE SCHEMA/)
61
+ end
62
+ subject
63
+ end
64
+ end
65
+
66
+ context 'schema name is nil(not set)' do
67
+ let(:schema_name) { 'dummy-schema' }
68
+ it 'creates schema' do
69
+ expect(pg_client).to receive(:exec) do |query|
70
+ expect(query).to match(/CREATE SCHEMA/)
71
+ end
72
+ subject
73
+ end
74
+ end
75
+
76
+ end
77
+ end
78
+ end
79
+
80
+ end
@@ -273,7 +273,7 @@ module Flydata
273
273
  subject { subject_object }
274
274
  context 'when an exception happens in the block' do
275
275
  let(:first_pos) { '1.sync' }
276
- let(:last_pos) { first_pos.to_i + 1 }
276
+ let(:last_pos) { "2.sync" }
277
277
  before do
278
278
  subject.increment_and_save_table_position(test_table) do |seq|
279
279
  expect(seq).to eq first_pos
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: flydata
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.17
4
+ version: 0.7.18
5
5
  platform: ruby
6
6
  authors:
7
7
  - Koichi Fujikawa
@@ -12,7 +12,7 @@ authors:
12
12
  autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2017-01-17 00:00:00.000000000 Z
15
+ date: 2017-01-26 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: rest-client
@@ -700,6 +700,7 @@ files:
700
700
  - lib/flydata/json.rb
701
701
  - lib/flydata/json/.gitignore
702
702
  - lib/flydata/log_monitor.rb
703
+ - lib/flydata/output/ddl_runner.rb
703
704
  - lib/flydata/output/forwarder.rb
704
705
  - lib/flydata/parser.rb
705
706
  - lib/flydata/parser/parser_provider.rb
@@ -848,6 +849,7 @@ files:
848
849
  - spec/flydata/helper/worker_spec.rb
849
850
  - spec/flydata/heroku_spec.rb
850
851
  - spec/flydata/json/json_ext_spec.rb
852
+ - spec/flydata/output/ddl_runner_spec.rb
851
853
  - spec/flydata/output/forwarder_spec.rb
852
854
  - spec/flydata/plugin_support/context_spec.rb
853
855
  - spec/flydata/query_based_sync/client_spec.rb
@@ -910,7 +912,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
910
912
  version: '0'
911
913
  requirements: []
912
914
  rubyforge_project:
913
- rubygems_version: 2.4.3
915
+ rubygems_version: 2.0.14.1
914
916
  signing_key:
915
917
  specification_version: 4
916
918
  summary: FlyData Agent