flydata 0.7.17 → 0.7.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 340f31b8701b211b25628d5690237565ff5e46c6
4
- data.tar.gz: 25bbb20fa4a67ca4f25f20917a350e649a224b80
3
+ metadata.gz: 5f6e6673d1001075c0ba38d2c4551aceac38d5b5
4
+ data.tar.gz: ff03d98fe9137ff5e4a10176a0e9b0c000d68c72
5
5
  SHA512:
6
- metadata.gz: ae89812c8d23ad80ec1777ec1a6c39764808d7279ff54db865b7bc1cd090045cef87f426af834aaf50d147f1d7c2d9eef1a16150b6844d79701313f84c7b3a17
7
- data.tar.gz: 4f6c6c97c6c04ebee65780bf31930fc58a6316cf7830bcac5defd63bc8a9db68d3062577d142e09cd1d86f9cc66b7c4bd207d6b5094ae6c4c89f48b983c88d57
6
+ metadata.gz: abedde840231a9460445b0983a7ec8d9e1da3d043792352c626d8202b1792787a150571c502ccf39a3d6ea6f3840a0fd617b96922ae55d0c37c81d86cdd73556
7
+ data.tar.gz: dc1a691a6c514f75339b99ed7c8c46238b8ca6b32bedb3c923e182408382f0bbe13a1e4556c41175876bfa0423a588a9e5af412afb6fe4bd6a7f828f3dedb387
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.7.17
1
+ 0.7.18
@@ -10,14 +10,21 @@ module Postgresql
10
10
  class PGClient
11
11
  PG_CONNECT_TIMEOUT = 10.0
12
12
 
13
- def initialize(dbconf)
13
+ def initialize(dbconf, options = {})
14
14
  @dbconf = FlydataCore::Postgresql::Config.opts_for_pg(dbconf)
15
+ @options = options
15
16
  end
16
17
 
17
18
  attr_reader :dbconf
18
19
 
19
20
  def establish_connection
20
- @conn = create_connection if @conn.nil?
21
+ if @conn.nil?
22
+ @conn = create_connection
23
+ if @options[:notice_receiver]
24
+ @conn.set_notice_receiver{|r| @options[:notice_receiver].call(r) }
25
+ end
26
+ end
27
+ @conn
21
28
  end
22
29
 
23
30
  def query(query, params = [])
@@ -42,6 +49,11 @@ class PGClient
42
49
  : result
43
50
  end
44
51
 
52
+ def exec(query)
53
+ establish_connection
54
+ @conn.exec(query)
55
+ end
56
+
45
57
  def close
46
58
  if @conn
47
59
  @conn.finish
@@ -159,13 +159,14 @@ CREATE TABLE IF NOT EXISTS %s (
159
159
  EOS
160
160
 
161
161
  def self.remove_table_sql(flydata_tabledef, schema_name, options)
162
- return "" if options[:skip_drop_table]
163
162
  table_name = flydata_tabledef[:table_name]
164
163
  redshift_tbl = table_name_for_ddl(table_name, schema_name)
165
164
  sql = ""
166
165
  if options[:backup_postfix]
167
166
  # drop backup table if exists
168
- sql += drop_backup_table_sql(flydata_tabledef, schema_name, options)
167
+ unless options[:skip_drop_table]
168
+ sql += drop_backup_table_sql(flydata_tabledef, schema_name, options)
169
+ end
169
170
  # create an empty table to prevent RENAME TABLE query from failing
170
171
  sql += create_table_sql(flydata_tabledef, schema_name,
171
172
  options.merge(create_table_sql: CREATE_TABLE_IF_NOT_EXISTS_SQL))
@@ -173,6 +174,7 @@ EOS
173
174
  "#{table_name}#{options[:backup_postfix]}", nil)
174
175
  sql += RENAME_TABLE_SQL % [redshift_tbl, backup_tbl]
175
176
  else
177
+ return "" if options[:skip_drop_table]
176
178
  sql += DROP_TABLE_SQL % redshift_tbl
177
179
  end
178
180
  sql
@@ -117,7 +117,16 @@ EOT
117
117
  option.merge!(skip_drop_table: true)
118
118
  end
119
119
 
120
- let(:drop_table_subquery) { ""}
120
+ let(:drop_table_subquery) { <<EOT
121
+ CREATE TABLE IF NOT EXISTS #{schema_prefix}"test_table" (
122
+ "id" int4,
123
+ "age" int8,
124
+ "value" varchar(max),
125
+ PRIMARY KEY ("id")
126
+ ) DISTKEY("id") SORTKEY("id");
127
+ ALTER TABLE #{schema_prefix}"test_table" RENAME TO "test_table_flydata20160125232857";
128
+ EOT
129
+ }
121
130
  it 'should return ddl' do
122
131
  expect(subject).to eq "BEGIN;\n#{create_table_queries}\n#{flydata_ctl_update}\nCOMMIT;\n"
123
132
  end
data/flydata.gemspec CHANGED
Binary file
@@ -9,8 +9,10 @@ module Flydata
9
9
  super
10
10
  end
11
11
 
12
- def run_query(sql)
13
- @client.post("#{@url_path}/query", nil, {redshift_query: {body: sql}})
12
+ def show_default
13
+ # currently one user has one redshift_cluster
14
+ url_path = "#{@url_path}/show_default?password_required=1"
15
+ @client.get(url_path)
14
16
  end
15
17
  end
16
18
  end
@@ -4,6 +4,7 @@ require 'flydata/command_loggable'
4
4
  require 'flydata/source'
5
5
  require 'flydata/command/exclusive_runnable'
6
6
  require 'flydata/preference/data_entry_preference'
7
+ require 'flydata/util/encryptor'
7
8
 
8
9
 
9
10
  module Flydata
@@ -42,14 +43,35 @@ module Flydata
42
43
  log_info_stdout("Your current application name is '#{de['purpose_name']}'")
43
44
  end
44
45
 
45
- def data_entry
46
- @de ||= retrieve_data_entries.first
46
+ def data_entry(refresh: false)
47
+ if @de.nil? || refresh
48
+ @de = retrieve_data_entries.first
49
+ end
47
50
  raise "No data entry exists. Please set one up on the FlyData Console (#{dashboard_url})" unless @de
48
51
  @de
49
52
  end
50
53
 
51
- def source
52
- @source ||= Source.create(data_entry)
54
+ def data_port
55
+ return @data_port if @data_port
56
+ @data_port = flydata.data_port.get
57
+ end
58
+
59
+ def redshift_cluster
60
+ return @redshift_cluster if @redshift_cluster
61
+ @redshift_cluster = flydata.redshift_cluster.show_default
62
+ @redshift_cluster['password'] = Flydata::Util::Encryptor.decrypt(
63
+ @redshift_cluster['encrypted_password'],
64
+ data_port['key'],
65
+ 'redshift_cluster password')
66
+ @redshift_cluster
67
+ end
68
+
69
+ def source(refresh: false)
70
+ if @source.nil? || refresh
71
+ @source = nil
72
+ @source = Source.create(data_entry(refresh: refresh))
73
+ end
74
+ @source
53
75
  end
54
76
 
55
77
  def register_crontab
@@ -9,7 +9,9 @@ module Flydata
9
9
  Slop.new do
10
10
  on 'n', 'no-daemon', 'Start FlyData agent as a regular program'
11
11
  on 'e', 'no-email', 'Skip sending init-sync-start notification email'
12
+ on 'y', 'yes', 'Skip command prompt assuming yes to all questions. Use this for batch operation.'
12
13
  on 'force-run', 'Run forcefully, ignoring exclusive run info'
14
+ on 'auto-create', 'Create tables on Redshift automatically'
13
15
  end
14
16
  end
15
17
  def start(options_or_show_final_message = {show_final_message: true}) # For backward compatibility. Use only as options going forward
@@ -66,7 +68,10 @@ module Flydata
66
68
  options[:quiet] = true
67
69
  Flydata::Command::Sync.new.try_initial_sync(
68
70
  source_pos_ready_callback: start_fluentd,
69
- no_email: opts.no_email?)
71
+ no_email: opts.no_email?,
72
+ auto_create: opts.auto_create?,
73
+ slop_opts: opts,
74
+ )
70
75
  options[:quiet] = quiet_option
71
76
  start_fluentd.call unless fluentd_started
72
77
  if options[:show_final_message] && !options[:quiet]
@@ -12,6 +12,7 @@ require 'flydata/helpers'
12
12
  require 'flydata/json'
13
13
  require 'flydata/queueable_thread'
14
14
  require 'flydata/output/forwarder'
15
+ require 'flydata/output/ddl_runner'
15
16
  require 'flydata/parser'
16
17
  require 'flydata/preference/data_entry_preference'
17
18
  require 'flydata/sync_file_manager'
@@ -20,6 +21,7 @@ require 'flydata-core/table_def'
20
21
  require 'flydata/table_ddl'
21
22
  require 'flydata/event/api_event_sender'
22
23
  require 'flydata-core/event/event_dictionary'
24
+ require 'flydata-core/record/record'
23
25
  require 'sigdump/setup'
24
26
  #require 'ruby-prof' # to enable profiling, also set the class' RUN_PROFILE
25
27
 
@@ -34,6 +36,11 @@ module Flydata
34
36
  INSERT_PROGRESS_INTERVAL = 1000
35
37
  SERVER_DATA_PROCESSING_TIMEOUT = 3600 # seconds
36
38
 
39
+ # for sync_info file auto_create_status
40
+ AUTO_CREATE_STATUS_START = 'START'
41
+ #AUTO_CREATE_STATUS_SENT_DDL = 'SENT_DDL'
42
+ AUTO_CREATE_STATUS_CREATED_TABLES = 'CREATED_TABLES'
43
+
37
44
  # for dump.pos file
38
45
  STATUS_START = 'START' # only :source_pos is available at the begining of parse
39
46
  STATUS_PARSING = 'PARSING'
@@ -74,9 +81,13 @@ module Flydata
74
81
  # Public method
75
82
  # - Called from Sender#start/restart
76
83
  def try_initial_sync(options)
84
+ @opts = options[:slop_opts] if options[:slop_opts]
77
85
  handle_initial_sync(options) if source.sync.supported?
78
86
  rescue Source::UnsupportedSourceError
79
87
  return
88
+ rescue => e
89
+ log_error("[error] Unexpcted error happend during inital sync. error:#{e}")
90
+ raise e
80
91
  end
81
92
 
82
93
  # Command: flydata sync:flush
@@ -130,7 +141,7 @@ EOS
130
141
  tables = []
131
142
  reset_init = false
132
143
  end
133
- sync_resumed = set_current_tables(tables, resume: !opts[:all])
144
+ sync_resumed, auto_create = set_current_tables(tables, resume: !opts[:all])
134
145
  target_tables = opts[:all] ? @full_tables : @input_tables
135
146
  target_append_only_tables = target_tables & @append_only_tables
136
147
  target_full_sync_tables = target_tables - @append_only_tables
@@ -607,7 +618,10 @@ EOS
607
618
  end
608
619
 
609
620
  # Setup instance variables
610
- sync_resumed = set_current_tables(nil, resume: true)
621
+ # Need to try the sync for all tables if auto create mode is on
622
+ include_all_tables = !!options[:auto_create]
623
+ sync_resumed, auto_create = set_current_tables(nil, resume: true, include_all_tables: include_all_tables)
624
+ options[:auto_create] ||= auto_create
611
625
 
612
626
  if sync_resumed
613
627
  # skip confirmation prompts and resume sync right away.
@@ -617,7 +631,7 @@ EOS
617
631
  elsif !@unsynced_tables.empty?
618
632
  show_purpose_name
619
633
  unsynced_table_message = "We've noticed that these tables have not been synced yet: #{@unsynced_tables.join(", ")}\n"
620
- unless @no_ddl_generated_tables.empty?
634
+ if !@no_ddl_generated_tables.empty? && !options[:auto_create]
621
635
  unsynced_table_message <<
622
636
  " WARNING: We've noticed that at least one of these tables have not had their DDL generated yet.\n" +
623
637
  " We recommend you run our 'flydata sync:generate_table_ddl > create_table.sql'\n" +
@@ -655,7 +669,7 @@ EOS
655
669
  _reset(recover_cmd, reset_client_only: false, delete_tbl_ddl: false)
656
670
 
657
671
  # Setup instance variables again
658
- sync_resumed = set_current_tables(nil, resume: true)
672
+ set_current_tables(nil, resume: true)
659
673
  end
660
674
 
661
675
  begin
@@ -675,6 +689,7 @@ EOS
675
689
  perform_initial_sync(de, opt)
676
690
  rescue ServerDataProcessingTimeout => e
677
691
  ee = ServerDataProcessingTimeout.new("Delayed Data Processing")
692
+ log_error("[error] Delayed Data Processing. Please check and resume the sync by running 'flydata start'")
678
693
  ee.description = <<EOS
679
694
  Data processing is taking more than expected. Please contact support@flydata.com to check the system status.
680
695
  Once checked, you can resume sync with the following command.
@@ -684,6 +699,14 @@ EOS
684
699
  EOS
685
700
  ee.set_backtrace e.backtrace
686
701
  raise ee
702
+ rescue AgentInternalError => e
703
+ case e.code
704
+ when AgentInternalError::NO_VALID_TABLE_ERR
705
+ # Proceed normal restart when no valid table exists for initial sync
706
+ return
707
+ else
708
+ raise e
709
+ end
687
710
  end
688
711
  complete(de)
689
712
  end
@@ -713,7 +736,7 @@ EOS
713
736
  dump_pos_info = sync_fm.load_dump_pos
714
737
  if dump_pos_info[:status] == STATUS_PARSED || dump_pos_info[:status] == STATUS_COMPLETE
715
738
  initialize_source_positions_and_call_callback(
716
- nil, options[:source_pos_ready_callback], sync_fm)
739
+ nil, options[:source_pos_ready_callback], sync_fm, options)
717
740
  return
718
741
  end
719
742
 
@@ -721,7 +744,7 @@ EOS
721
744
  fp = sync_fm.dump_file_path
722
745
  if file_dump && File.exists?(fp) && File.size(fp) > 0
723
746
  initialize_source_positions_and_call_callback(
724
- nil, options[:source_pos_ready_callback], sync_fm)
747
+ nil, options[:source_pos_ready_callback], sync_fm, options)
725
748
  return call_block_or_return_io(fp, &dump_ready_callback)
726
749
  end
727
750
 
@@ -770,9 +793,9 @@ EOM
770
793
  end
771
794
  end
772
795
 
773
- log_info_stdout("Setting binary log position and exporting data from the database.")
774
- log_info_stdout("This process can take hours depending on data size and load on your database. Please be patient...")
775
- sync_fm.save_sync_info(@full_initial_sync, target_tables)
796
+ if sync_fm.load_sync_info.nil?
797
+ sync_fm.save_sync_info(@full_initial_sync, target_tables, (options[:auto_create] ? AUTO_CREATE_STATUS_START : nil))
798
+ end
776
799
  # This notification will be uncommented after init_sync_finish email integration is released
777
800
  unless options[:sync_resumed]
778
801
  FlydataCore::Event::ApiEventSender.instance.send_event(
@@ -782,12 +805,19 @@ EOM
782
805
  data_entry_id: de['id'],
783
806
  data_port_id: de['data_port_id'])
784
807
  end
808
+
809
+ handle_auto_create(dp, de, sync_fm)
810
+
811
+ log_info_stdout("Setting binary log position and exporting data from the database.")
812
+ log_info_stdout("This process can take hours depending on data size and load on your database. Please be patient...")
813
+
785
814
  if file_dump
786
815
  source_pos = nil
816
+
787
817
  begin
788
818
  context.dump(target_tables, fp) do |_io, _source_pos|
789
819
  source_pos = _source_pos
790
- initialize_source_positions_and_call_callback(source_pos, options[:source_pos_ready_callback], sync_fm)
820
+ initialize_source_positions_and_call_callback(source_pos, options[:source_pos_ready_callback], sync_fm, options)
791
821
  end
792
822
  log_info_stdout(" -> Database dump done")
793
823
  rescue Exception => e
@@ -800,7 +830,7 @@ EOM
800
830
  call_block_or_return_io(fp, source_pos, &dump_ready_callback)
801
831
  else
802
832
  context.dump(target_tables) do |io, source_pos|
803
- initialize_source_positions_and_call_callback(source_pos, options[:source_pos_ready_callback], sync_fm)
833
+ initialize_source_positions_and_call_callback(source_pos, options[:source_pos_ready_callback], sync_fm, options)
804
834
  dump_ready_callback.call(io, source_pos)
805
835
  end
806
836
  end
@@ -809,9 +839,41 @@ EOM
809
839
  end
810
840
  end
811
841
 
812
- def initialize_source_positions_and_call_callback(source_pos, callback, sync_fm)
842
+ def handle_auto_create(dp, de, sync_fm)
843
+ auto_create_status = sync_fm.load_sync_info[:auto_create_status]
844
+ return if auto_create_status.nil?
845
+ loop do
846
+ case auto_create_status
847
+ when AUTO_CREATE_STATUS_START
848
+ log_info_stdout("Creating tables on Redshift...")
849
+ ddl_context = source.sync_generate_table_ddl(dp)
850
+ flydata_tabledefs = generate_and_run_table_ddl(ddl_context, de, auto_create: true) || []
851
+ if flydata_tabledefs.empty?
852
+ log_error_stderr("No valid table for sync. Please check table errors on Dashboard - tables:#{target_tables.join(", ")}")
853
+ sync_fm.delete_sync_info # No need to keep sync.info
854
+ raise AgentInternalError.new("No valid tables for sync - tables:#{target_tables.join(", ")}",
855
+ AgentInternalError::NO_VALID_TABLE_ERR)
856
+ end
857
+ # no error tables to create ddl
858
+ table_names = flydata_tabledefs.collect{|d| d[:table_name]}
859
+ # Refresh source and data_entry to refrect invalid tables
860
+ # Set tables whose ddl is sent only as input tables because new tables may be added during creating target tables
861
+ auto_create_status = AUTO_CREATE_STATUS_CREATED_TABLES
862
+ sync_fm.save_sync_info(@full_initial_sync, table_names, auto_create_status)
863
+ source(refresh: true)
864
+ set_current_tables(table_names, resume: true)
865
+ when AUTO_CREATE_STATUS_CREATED_TABLES
866
+ log_info_stdout("Tables are created on Redshift... tables:#{target_tables.join(", ")}")
867
+ break
868
+ else
869
+ raise "Invalid auto_create_status in dump/sync.info filedump."
870
+ end
871
+ end
872
+ end
873
+
874
+ def initialize_source_positions_and_call_callback(source_pos, callback, sync_fm, options = {})
813
875
  if source_pos
814
- initialize_positions(sync_fm, source_pos)
876
+ initialize_positions(sync_fm, source_pos, options)
815
877
  else
816
878
  # no source_pos was given because dump was completed in the
817
879
  # previous init sync attempt. Position files must be there already
@@ -1152,14 +1214,14 @@ EOM
1152
1214
  end
1153
1215
  end
1154
1216
 
1155
- def initialize_positions(sync_fm, source_pos)
1217
+ def initialize_positions(sync_fm, source_pos, options)
1156
1218
  sync_fm.save_table_source_pos(target_tables, source_pos)
1157
1219
 
1158
1220
  if @full_initial_sync
1159
1221
  sync_fm.save_source_pos(source_pos)
1160
1222
  end
1161
1223
  sync_fm.install_table_source_pos_files(target_tables)
1162
- sync_fm.reset_table_position_files(target_tables)
1224
+ sync_fm.reset_table_position_files(target_tables, options)
1163
1225
  end
1164
1226
 
1165
1227
  def convert_to_flydata_values(source_table, values)
@@ -1226,7 +1288,53 @@ EOM
1226
1288
  end
1227
1289
  end
1228
1290
 
1229
- def generate_flydata_tabledefs(context, de)
1291
+ # Generate and send table ddl to a data server
1292
+ def generate_and_send_table_ddl(context, de)
1293
+ flydata_tabledefs = generate_flydata_tabledefs(context, de)
1294
+ return flydata_tabledefs if flydata_tabledefs.nil? || flydata_tabledefs.empty?
1295
+
1296
+ #TODO: Check per-table position file and raise an error if pos > 0
1297
+
1298
+ base_record = {
1299
+ table_rev: 1,
1300
+ seq: SyncFileManager::INITIAL_SYNC_SEQ,
1301
+ respect_order: true,
1302
+ type: 'initial_sync',
1303
+ src_pos: '-',
1304
+ v: FlydataCore::Record::V2,
1305
+ }
1306
+
1307
+ records = flydata_tabledefs.collect do |flydata_tabledef|
1308
+ table_name = flydata_tabledef[:table_name]
1309
+ ddl_options = flydata_tabledef[:ddl_options] || {}
1310
+ base_record.merge(
1311
+ flydata_tabledef: flydata_tabledef.dup,
1312
+ skip_drop_table: ddl_options[:skip_drop_table],
1313
+ table_name: table_name
1314
+ )
1315
+ end
1316
+
1317
+ forwarder = build_forwarder(context.dp, de)
1318
+ forwarder.emit(records)
1319
+ forwarder.flush
1320
+
1321
+ flydata_tabledefs
1322
+ ensure
1323
+ if forwarder
1324
+ forwarder.close rescue nil
1325
+ end
1326
+ end
1327
+
1328
+ # Generate and run table ddl on Redshift
1329
+ def generate_and_run_table_ddl(context, de, options = {})
1330
+ flydata_tabledefs = generate_flydata_tabledefs(context, de, options)
1331
+ return flydata_tabledefs if flydata_tabledefs.nil? || flydata_tabledefs.empty?
1332
+ ddl_runner = Flydata::Output::RedshiftDDLRunner.new(redshift_cluster, de)
1333
+ ddl_runner.run_ddls(flydata_tabledefs)
1334
+ flydata_tabledefs
1335
+ end
1336
+
1337
+ def generate_flydata_tabledefs(context, de, options = {})
1230
1338
  schema_name = (de['schema_name'] || nil)
1231
1339
  tables = opts.all_tables? ? @full_tables : (@input_tables.empty? ? @unsynced_tables : @input_tables)
1232
1340
 
@@ -1258,25 +1366,32 @@ EOM
1258
1366
  table_validity_hash = Hash.new {|h,k| h[k] = {}}
1259
1367
  tables_without_error = tables
1260
1368
  unless error_list.empty?
1261
- log_error_stderr("\n\nERROR: FlyData Sync will not sync the following table(s) due to an error.")
1369
+ unless options[:auto_create]
1370
+ log_error_stderr("\n\nERROR: FlyData Sync will not sync the following table(s) due to an error.")
1371
+ end
1372
+
1262
1373
  group_error = error_list.group_by {|d| d[:error]}
1374
+ error_info_hash = {}
1263
1375
  group_error.each_key do |error|
1264
1376
  group_error[error].each do |hash|
1265
1377
  if table = hash[:table]
1378
+ error_info_hash[table] = error
1266
1379
  log_error_stderr(" - #{table} (#{error})")
1267
1380
  table_validity_hash[table][TableAttribute::INVALID_TABLE_REASON] = error
1268
1381
  end
1269
1382
  end
1270
1383
  end
1271
- log_error_stderr(<<EOS)
1272
- To sync these table(s), please fix the error(s) and run "flydata sync:generate_table_ddl" again.
1273
- EOS
1274
- tables_without_error = tables - error_list.inject([]){|arr, err| arr << err[:table] if err[:table]}
1275
- unless tables_without_error.empty?
1276
- log_error_stderr(<<EOS)
1277
1384
 
1278
- The other tables are ready to sync. To start sync, run the generated script on the Redshift cluster and run "flydata start".
1279
- EOS
1385
+ if options[:auto_create]
1386
+ log_error_stderr("[error] FlyData Sync will not sync the following table(s) due to an error. #{error_info_hash.collect{|k,v| "#{k}(#{v})"}.join(", ")}")
1387
+ log_warn_stderr("To sync these table(s), please fix the error(s) and run \"flydata restart --auto-create\" again.")
1388
+ else
1389
+ log_error_stderr("To sync these table(s), please fix the error(s) and run \"flydata sync:generate_table_ddl\" again.")
1390
+ end
1391
+
1392
+ tables_without_error = tables - error_list.inject([]){|arr, err| arr << err[:table] if err[:table]}
1393
+ unless tables_without_error.empty? || options[:auto_create]
1394
+ log_error_stderr("The other tables are ready to sync. To start sync, run the generated script on the Redshift cluster and run \"flydata start\".")
1280
1395
  end
1281
1396
  end
1282
1397
 
@@ -1346,7 +1461,11 @@ Thank you for using FlyData!
1346
1461
  sync_fm = create_sync_file_manager
1347
1462
  sync_info = sync_fm.load_sync_info
1348
1463
  sync_resumed = options[:resume] && !!sync_info
1349
-
1464
+ auto_create = if sync_resumed
1465
+ !!sync_info[:auto_create_status]
1466
+ else
1467
+ false
1468
+ end
1350
1469
  table_lists = source.sync.table_lists
1351
1470
 
1352
1471
  # `full_tables` will either
@@ -1383,13 +1502,14 @@ Thank you for using FlyData!
1383
1502
 
1384
1503
  verify_input_tables(@input_tables, @full_tables)
1385
1504
 
1386
- sync_resumed
1505
+ [sync_resumed, auto_create]
1387
1506
  end
1388
1507
 
1389
1508
  def validate_initial_sync_status
1390
1509
  sync_fm = create_sync_file_manager
1391
1510
  dump_pos_info = sync_fm.load_dump_pos
1392
- sync_info_exists = !!sync_fm.load_sync_info
1511
+ sync_info = sync_fm.load_sync_info || {}
1512
+ sync_info_exists = !sync_info.empty?
1393
1513
  dump_file_deleted = !File.exists?(sync_fm.dump_file_path)
1394
1514
  sync_fm.close
1395
1515
 
@@ -1399,7 +1519,7 @@ Thank you for using FlyData!
1399
1519
  end
1400
1520
 
1401
1521
  # check if the previous initial sync was aborted during dump.
1402
- sync_info_exists && dump_file_deleted
1522
+ sync_info_exists && sync_info[:auto_create_status] == AUTO_CREATE_STATUS_CREATED_TABLES && dump_file_deleted
1403
1523
  end
1404
1524
 
1405
1525
  def target_tables
@@ -1423,8 +1543,8 @@ Thank you for using FlyData!
1423
1543
  end
1424
1544
  end
1425
1545
 
1426
- def data_entry
1427
- unless @sync_de
1546
+ def data_entry(refresh: false)
1547
+ if @sync_de.nil? || refresh
1428
1548
  @sync_de = super
1429
1549
  source.sync.setup # updates the data entry contents
1430
1550
  end
@@ -39,4 +39,16 @@ end
39
39
  class DumpParseError < AgentError
40
40
  end
41
41
 
42
+ class AgentInternalError < AgentError
43
+ NO_VALID_TABLE_ERR = 101
44
+ UNKNOWN_ERR = 999
45
+
46
+ def initialize(message, code = UNKNOWN_ERR)
47
+ super("#{message} code:#{code}")
48
+ @code = code
49
+ end
50
+
51
+ attr_reader :code
52
+ end
53
+
42
54
  end
@@ -0,0 +1,70 @@
1
+ require 'flydata-core/postgresql/pg_client'
2
+ require 'flydata-core/table_def/sync_redshift_table_def'
3
+ require 'flydata/command_loggable'
4
+
5
+ module Flydata
6
+ module Output
7
+ class RedshiftDDLRunner
8
+ include CommandLoggable
9
+
10
+ def initialize(dbconf, de)
11
+ @pg_client = FlydataCore::Postgresql::PGClient.new(dbconf, notice_receiver: Proc.new{|result|
12
+ log_info_stdout(" #{result.error_message.to_s.strip}")
13
+ })
14
+ @schema_name = de['schema_name']
15
+ @schema_name = nil if @schema_name.to_s.strip.empty?
16
+ end
17
+
18
+ def run_ddls(flydata_tabledefs)
19
+ @pg_client.establish_connection
20
+ create_schema
21
+ create_ctl_tables
22
+ flydata_tabledefs.each.with_index(1) do |flydata_tabledef, index|
23
+ run_ddl(flydata_tabledef, index, flydata_tabledefs.size)
24
+ end
25
+ ensure
26
+ @pg_client.close
27
+ end
28
+
29
+ private
30
+
31
+ def create_schema
32
+ return unless @schema_name
33
+ query = FlydataCore::TableDef::SyncRedshiftTableDef.create_schema_sql(@schema_name)
34
+ begin
35
+ @pg_client.exec(query)
36
+ rescue PG::InsufficientPrivilege
37
+ # Ignore
38
+ end
39
+ end
40
+
41
+ def create_ctl_tables
42
+ query = FlydataCore::TableDef::SyncRedshiftTableDef.create_flydata_ctl_table_sql(@schema_name)
43
+ log_info_stdout(" -> Creating flydata ctl tables... schema:\"#{@schema_name}\"")
44
+ log_info("query:\n#{query}")
45
+ @pg_client.exec(query)
46
+ end
47
+
48
+ def run_ddl(flydata_tabledef, index, total)
49
+ ddl = FlydataCore::TableDef::SyncRedshiftTableDef.from_flydata_tabledef(
50
+ flydata_tabledef,
51
+ flydata_tabledef[:ddl_options].merge(flydata_ctl_table: false))
52
+ log_info_stdout(" -> Creating \"#{flydata_tabledef[:table_name]}\"... (#{index}/#{total})")
53
+ log_info("query:\n#{ddl}")
54
+ @pg_client.exec(ddl)
55
+ rescue PG::DependentObjectsStillExist => e
56
+ log_error_stderr(" [error] #{e.to_s.strip}")
57
+ # ignore this error
58
+ end
59
+
60
+ =begin
61
+ def wrap(query)
62
+ query = query.strip
63
+ query = "BEGIN;\n#{query}" unless /^BEGIN;/i.match(query)
64
+ query = "#{query}\nEND;" unless /END;$/i.match(query)
65
+ query
66
+ end
67
+ =end
68
+ end
69
+ end
70
+ end
@@ -11,6 +11,8 @@ module Flydata
11
11
  BACKUP_DIR = ENV['FLYDATA_BACKUP'] || File.join(FLYDATA_HOME, 'backup')
12
12
  TABLE_POSITIONS_DIR = FLYDATA_TABLE_POSITIONS_DIR
13
13
 
14
+ INITIAL_SYNC_SEQ = 1
15
+
14
16
  def initialize(data_entry, source = nil)
15
17
  @data_entry = data_entry
16
18
  @source = source #for Source dependent objects
@@ -171,10 +173,10 @@ module Flydata
171
173
  end
172
174
 
173
175
  # table files
174
- def reset_table_position_files(tables)
176
+ def reset_table_position_files(tables, options = {})
175
177
  tables.each do |table_name|
176
178
  file = File.join(table_positions_dir_path, table_name + ".pos")
177
- File.open(file, "w") {|f| f.write('0') }
179
+ File.open(file, "w") {|f| f.write("0") }
178
180
  end
179
181
  end
180
182
 
@@ -204,7 +206,7 @@ module Flydata
204
206
 
205
207
  def increment_table_position(seq)
206
208
  seq = seq.to_i + 1
207
- seq = FlydataCore::QueryJob::SYNC_FIRST_SEQ if seq == 1
209
+ seq = "#{seq}.sync" if [1,2].include?(seq)
208
210
  seq
209
211
  end
210
212
 
@@ -223,8 +225,12 @@ module Flydata
223
225
  # logical transaction ends
224
226
  end
225
227
 
228
+ def table_position_file_path(table_name)
229
+ File.join(table_positions_dir_path, table_name + ".pos")
230
+ end
231
+
226
232
  def open_table_position_file(table_name)
227
- file = File.join(table_positions_dir_path, table_name + ".pos")
233
+ file = table_position_file_path(table_name)
228
234
  retry_count = 0
229
235
  begin
230
236
  @table_position_files[table_name] ||= (f = File.open(file, File::RDWR); f.sync = true; f)
@@ -253,6 +259,15 @@ module Flydata
253
259
  f.rewind
254
260
  end
255
261
 
262
+ # This doen't cache the File object
263
+ def save_table_positions(table_names, seq)
264
+ table_names = Array(table_names)
265
+ table_names.each do |table_name|
266
+ file_path = table_position_file_path(table_name)
267
+ File.write(file_path, seq.to_s)
268
+ end
269
+ end
270
+
256
271
  def get_table_position(table_name)
257
272
  f = open_table_position_file(table_name)
258
273
  seq = f.read
@@ -269,21 +284,41 @@ module Flydata
269
284
  FLYDATA_LOCK
270
285
  end
271
286
 
287
+ # "sync.info" file includes initial sync information for resuming
288
+ # - initial_sync: True if initial sync is full initial sync
289
+ # - tables: target tables for initial sync
290
+ # - auto_create_status: START|SENT_DDL|CREATED_TABLES
291
+
272
292
  def sync_info_file
273
293
  File.join(dump_dir, "sync.info")
274
294
  end
275
295
 
276
- def save_sync_info(initial_sync, tables)
296
+ def delete_sync_info
297
+ FileUtils.rm(sync_info_file)
298
+ end
299
+
300
+ def save_sync_info(initial_sync, tables, auto_create_status = nil)
277
301
  File.open(sync_info_file, "w") do |f|
278
- f.write([initial_sync, tables.join(" ")].join("\t"))
302
+ content = {
303
+ initial_sync: initial_sync,
304
+ tables: tables,
305
+ auto_create_status: auto_create_status,
306
+ }.to_json
307
+ f.write(content)
279
308
  end
280
309
  end
281
310
 
282
311
  def load_sync_info
283
312
  return nil unless File.exists?(sync_info_file)
284
- items = File.open(sync_info_file, 'r').readline.split("\t")
285
- { initial_sync: (items[0] == 'true'),
286
- tables: items[1].split(" ") }
313
+ line = File.open(sync_info_file, 'r').readline
314
+ begin
315
+ JSON.parse(line, symbolize_names: true)
316
+ rescue
317
+ # For compatibility
318
+ items = line.split("\t")
319
+ { initial_sync: (items[0] == 'true'),
320
+ tables: items[1].split(" ") }
321
+ end
287
322
  end
288
323
 
289
324
  def get_table_source_pos_init(table_name)
@@ -116,6 +116,7 @@ module Flydata
116
116
  context 'with no stream option' do
117
117
  before do
118
118
  expect(default_sync_fm).to receive(:save_sync_info).once
119
+ expect(default_sync_fm).to receive(:load_sync_info).and_return(nil, {})
119
120
  expect(subject).to receive(:free_disk_space).and_return(disk_byte)
120
121
  expect(File).to receive(:dirname)
121
122
  end
@@ -139,6 +140,7 @@ module Flydata
139
140
  context 'with stream option' do
140
141
  it 'will export to io' do
141
142
  expect(default_sync_fm).to receive(:save_sync_info).once
143
+ expect(default_sync_fm).to receive(:load_sync_info).and_return(nil, {})
142
144
  expect_any_instance_of(Flydata::SourceMysql::Parser::MysqlDumpGeneratorNoMasterData).to receive(:dump)
143
145
 
144
146
  subject.send(:generate_source_dump, default_data_entry, default_sync_fm, false)
@@ -176,8 +176,13 @@ EOT
176
176
  records = rows.collect do |row|
177
177
  record = row.kind_of?(Hash) && row.keys.include?(:row) ? row : { row: row }
178
178
  @seq += 1
179
+ seq = if [1,2].include?(@seq.to_i)
180
+ "#{@seq.to_i}.sync"
181
+ else
182
+ @seq
183
+ end
179
184
  record.merge({ :type=>type, :table_name=>table, :respect_order=>true,
180
- :seq=>@seq, :src_pos=>"#{binlog_file}\t#{position}", :table_rev=>1,
185
+ :seq=>seq, :src_pos=>"#{binlog_file}\t#{position}", :table_rev=>1,
181
186
  :v=>2 })
182
187
  end
183
188
  expect_emitted_records(event, records)
@@ -383,7 +388,7 @@ EOT
383
388
  respect_order: true,
384
389
  src_pos: "mysql-bin.000048\t689",
385
390
  table_rev: 2, # increment revision
386
- seq: 2,
391
+ seq: "2.sync",
387
392
  v: flydata_record_version,
388
393
  actions: [{
389
394
  action: :add_column, column: "sum", :type=>'int4(11)', :query=>'add column sum integer'}],
@@ -399,7 +404,7 @@ EOT
399
404
  respect_order: true,
400
405
  src_pos: "mysql-bin.000048\t800",
401
406
  table_rev: 2, # increment revision
402
- seq: 2,
407
+ seq: "2.sync",
403
408
  v: flydata_record_version,
404
409
  actions: [{
405
410
  action: :drop_column, column: "sum", :query=>'drop column sum'}],
@@ -416,7 +421,7 @@ EOT
416
421
  respect_order: true,
417
422
  src_pos: "mysql-bin.000048\t#{337 - 217}",
418
423
  table_rev: 1,
419
- seq: 2,
424
+ seq: "2.sync",
420
425
  v: flydata_record_version,
421
426
  actions: [{
422
427
  action: :add_index,
@@ -0,0 +1,80 @@
1
+ # coding: utf-8
2
+ require 'spec_helper'
3
+ require 'flydata/output/ddl_runner'
4
+
5
+ module Flydata
6
+ module Output
7
+ describe RedshiftDDLRunner do
8
+ let(:pg_client) { double('pg_client') }
9
+ let(:dbconf) {
10
+ {
11
+ host: 'dummy-host',
12
+ port: 54399,
13
+ username: 'dummy-user',
14
+ password: 'dummy-pass',
15
+ dbname: 'dummy-db',
16
+ }
17
+ }
18
+ let(:schema_name) { nil }
19
+ let(:de) {
20
+ {'schema_name' => schema_name}
21
+ }
22
+ let(:subject_object) do
23
+ described_class.new(dbconf, de)
24
+ end
25
+
26
+ before do
27
+ allow(FlydataCore::Postgresql::PGClient).to receive(:new).and_return(pg_client)
28
+ allow(pg_client).to receive(:establish_connection)
29
+ allow(pg_client).to receive(:exec)
30
+ allow(pg_client).to receive(:close)
31
+ end
32
+
33
+ describe '.run_ddls' do
34
+ let(:flydata_tabledef) {
35
+ {:table_name=>"test_table",
36
+ :columns=>
37
+ [{:column=>"id",
38
+ :type=>"int4(11)",
39
+ :auto_increment=>true,
40
+ :not_null=>true,
41
+ :primary_key=>true},
42
+ {:column=>"value", :type=>"int4(11)", :default=>nil}],
43
+ :default_charset=>"UTF_8",
44
+ :src_ddl=>
45
+ "CREATE TABLE `test_table` ( `id` int(11) NOT NULL AUTO_INCREMENT, `value` int(11) DEFAULT NULL, PRIMARY KEY (`id`)) ENGINE=InnoDB AUTO_INCREMENT=15 DEFAULT CHARSET=utf8;",
46
+ :pk_override=>nil,
47
+ :ddl_options=>
48
+ {:flydata_ctl_table=>true,
49
+ :schema_name=>schema_name,
50
+ :ctl_only=>false,
51
+ :skip_drop_table=>false,
52
+ :skip_primary_key_check=>false}}
53
+ }
54
+ let(:flydata_tabledefs) { [ flydata_tabledef ] }
55
+ subject { subject_object.run_ddls(flydata_tabledefs) }
56
+
57
+ context 'schema name is nil(not set)' do
58
+ it 'skips creating schema' do
59
+ expect(pg_client).to receive(:exec) do |query|
60
+ expect(query).not_to match(/CREATE SCHEMA/)
61
+ end
62
+ subject
63
+ end
64
+ end
65
+
66
+ context 'schema name is nil(not set)' do
67
+ let(:schema_name) { 'dummy-schema' }
68
+ it 'creates schema' do
69
+ expect(pg_client).to receive(:exec) do |query|
70
+ expect(query).to match(/CREATE SCHEMA/)
71
+ end
72
+ subject
73
+ end
74
+ end
75
+
76
+ end
77
+ end
78
+ end
79
+
80
+ end
@@ -273,7 +273,7 @@ module Flydata
273
273
  subject { subject_object }
274
274
  context 'when an exception happens in the block' do
275
275
  let(:first_pos) { '1.sync' }
276
- let(:last_pos) { first_pos.to_i + 1 }
276
+ let(:last_pos) { "2.sync" }
277
277
  before do
278
278
  subject.increment_and_save_table_position(test_table) do |seq|
279
279
  expect(seq).to eq first_pos
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: flydata
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.17
4
+ version: 0.7.18
5
5
  platform: ruby
6
6
  authors:
7
7
  - Koichi Fujikawa
@@ -12,7 +12,7 @@ authors:
12
12
  autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2017-01-17 00:00:00.000000000 Z
15
+ date: 2017-01-26 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: rest-client
@@ -700,6 +700,7 @@ files:
700
700
  - lib/flydata/json.rb
701
701
  - lib/flydata/json/.gitignore
702
702
  - lib/flydata/log_monitor.rb
703
+ - lib/flydata/output/ddl_runner.rb
703
704
  - lib/flydata/output/forwarder.rb
704
705
  - lib/flydata/parser.rb
705
706
  - lib/flydata/parser/parser_provider.rb
@@ -848,6 +849,7 @@ files:
848
849
  - spec/flydata/helper/worker_spec.rb
849
850
  - spec/flydata/heroku_spec.rb
850
851
  - spec/flydata/json/json_ext_spec.rb
852
+ - spec/flydata/output/ddl_runner_spec.rb
851
853
  - spec/flydata/output/forwarder_spec.rb
852
854
  - spec/flydata/plugin_support/context_spec.rb
853
855
  - spec/flydata/query_based_sync/client_spec.rb
@@ -910,7 +912,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
910
912
  version: '0'
911
913
  requirements: []
912
914
  rubyforge_project:
913
- rubygems_version: 2.4.3
915
+ rubygems_version: 2.0.14.1
914
916
  signing_key:
915
917
  specification_version: 4
916
918
  summary: FlyData Agent