flydata 0.7.12 → 0.7.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -0
  3. data/VERSION +1 -1
  4. data/flydata-core/lib/flydata-core/oracle/config.rb +25 -0
  5. data/flydata-core/lib/flydata-core/oracle/oracle_client.rb +48 -0
  6. data/flydata-core/lib/flydata-core/oracle/query_helper.rb +20 -0
  7. data/flydata-core/lib/flydata-core/oracle/source_pos.rb +63 -0
  8. data/flydata-core/lib/flydata-core/table_def/oracle_table_def.rb +167 -0
  9. data/flydata-core/spec/oracle/config_spec.rb +45 -0
  10. data/flydata-core/spec/oracle/source_pos_spec.rb +101 -0
  11. data/flydata.gemspec +0 -0
  12. data/lib/flydata/command/sync.rb +14 -4
  13. data/lib/flydata/source.rb +1 -0
  14. data/lib/flydata/source/sync_repair.rb +25 -0
  15. data/lib/flydata/source_mysql/generate_source_dump.rb +2 -1
  16. data/lib/flydata/source_mysql/mysql_accessible.rb +30 -0
  17. data/lib/flydata/source_mysql/parser/dump_parser.rb +0 -40
  18. data/lib/flydata/source_mysql/sync_database_size_check.rb +29 -0
  19. data/lib/flydata/source_mysql/sync_repair.rb +26 -0
  20. data/lib/flydata/source_oracle/data_entry.rb +24 -0
  21. data/lib/flydata/source_oracle/generate_source_dump.rb +184 -0
  22. data/lib/flydata/source_oracle/oracle_component.rb +12 -0
  23. data/lib/flydata/source_oracle/parse_dump_and_send.rb +128 -0
  24. data/lib/flydata/source_oracle/plugin_support/context.rb +13 -0
  25. data/lib/flydata/source_oracle/plugin_support/source_position_file.rb +14 -0
  26. data/lib/flydata/source_oracle/query_based_sync/diff_query_generator.rb +122 -0
  27. data/lib/flydata/source_oracle/setup.rb +24 -0
  28. data/lib/flydata/source_oracle/source_pos.rb +18 -0
  29. data/lib/flydata/source_oracle/sync.rb +15 -0
  30. data/lib/flydata/source_oracle/sync_generate_table_ddl.rb +64 -0
  31. data/lib/flydata/source_oracle/table_meta.rb +220 -0
  32. data/lib/flydata/source_postgresql/sync_repair.rb +13 -0
  33. data/spec/flydata/source_mysql/generate_source_dump_spec.rb +2 -2
  34. metadata +27 -3
data/flydata.gemspec CHANGED
Binary file
@@ -436,6 +436,7 @@ EOS
436
436
  sync_fm = create_sync_file_manager(de)
437
437
  context = source.source_pos
438
438
  set_current_tables
439
+
439
440
  # Stop agent. Check sync and make sure the state is :STUCK_AT_UPLOAD
440
441
  # Get table status for the tables.
441
442
  status, corrupt_master_pos_files, pos_mismatch_tables, gap_tables, table_status_hash =
@@ -538,9 +539,19 @@ EOS
538
539
  end
539
540
  end
540
541
  end
541
- # if sent_source_pos is nil, it means sync has started for none of tables. No need
542
- # to repair positions nor clean buffer data.
542
+
543
+ unless unrepairable_tables.empty?
544
+ # Notify expired source position tables through error logs to us
545
+ log_error_stderr "[error]: Failed to repair tables due to expired source position. These tables need to be re-synced - #{unrepairable_tables.join(", ")}"
546
+ end
547
+
548
+ # If sent_source_pos is nil, it means:
549
+ # - Sync has started for none of tables
550
+ # - None of tables are broken
551
+ # - All of broken tables have an expired source position
552
+ # No need to repair positions nor clean buffer data.
543
553
  if sent_source_pos
554
+ # This logic is unreachable since sent_source_pos cannot be more than oldest_source_pos
544
555
  if oldest_source_pos && sent_source_pos < oldest_source_pos
545
556
  e = AgentError.new("Repair failed due to expired source position")
546
557
  e.description = <<EOS
@@ -1512,9 +1523,8 @@ Thank you for using FlyData!
1512
1523
  end
1513
1524
  end
1514
1525
 
1515
- # TODO implement
1516
1526
  def get_oldest_available_source_pos
1517
- nil
1527
+ source.sync_repair.get_oldest_available_source_pos
1518
1528
  end
1519
1529
  end
1520
1530
  end
@@ -24,6 +24,7 @@ module Source
24
24
  "FileDataEntry" => :source_file,
25
25
  "RedshiftPostgresqlDataEntry" => :source_postgresql,
26
26
  "RedshiftZendeskDataEntry" => :source_zendesk,
27
+ "RedshiftOracleDataEntry" => :source_oracle,
27
28
  }
28
29
  def self.component_class_for(component_sym, de)
29
30
  source_sym = DATA_ENTRY_TYPE_MAP[de['type']]
@@ -0,0 +1,25 @@
1
+ require 'flydata/source'
2
+ require 'flydata/source/component'
3
+ require 'flydata/source/errors'
4
+
5
+ module Flydata
6
+ module Source
7
+
8
+ class SyncRepair < Component
9
+ def self.inherited(child_class)
10
+ Source.register(child_class, self)
11
+ end
12
+
13
+ # Public Interface: Get oldest avilable source pos information
14
+ #
15
+ # Called from sync:repair command to determine a master position for repair.
16
+ # If this returns nil, sync:repair sets any oldest source position to source pos.
17
+ #
18
+ # Raises exception when failing to get source positions
19
+ def self.get_oldest_available_source_pos
20
+ nil
21
+ end
22
+ end
23
+
24
+ end
25
+ end
@@ -3,6 +3,7 @@ require 'flydata/preference/data_entry_preference'
3
3
  require 'flydata/source_mysql/mysql_compatibility_check'
4
4
  require 'flydata/source_mysql/parser/dump_parser'
5
5
  require 'flydata-core/mysql/binlog_pos'
6
+ require 'flydata/source_mysql/sync_database_size_check'
6
7
 
7
8
  module Flydata
8
9
  module SourceMysql
@@ -34,7 +35,7 @@ class GenerateSourceDump < Source::GenerateSourceDump
34
35
 
35
36
  def dump_size(tables)
36
37
  opts = de['mysql_data_entry_preference'].merge({"tables" => tables})
37
- Parser::DatabaseSizeCheck.new(opts).get_db_bytesize
38
+ SyncDatabaseSizeCheck.new(opts).get_db_bytesize
38
39
  end
39
40
 
40
41
  def dump(tables, file_path = nil, &src_pos_callback)
@@ -0,0 +1,30 @@
1
+ require 'mysql2'
2
+ require 'flydata-core/mysql/config'
3
+
4
+ module Flydata
5
+ module SourceMysql
6
+ module MysqlAccessible
7
+ def initialize(conf)
8
+ @conf = conf
9
+ @mysql_db_opts = build_mysql_db_opts(conf)
10
+ end
11
+
12
+ def exec_mysql_query(query)
13
+ cli = mysql_client
14
+ cli.query(query)
15
+ ensure
16
+ if cli
17
+ cli.close rescue nil
18
+ end
19
+ end
20
+
21
+ def build_mysql_db_opts(conf)
22
+ FlydataCore::Mysql::Config.build_mysql_db_opts(conf)
23
+ end
24
+
25
+ def mysql_client
26
+ Mysql2::Client.new(@mysql_db_opts)
27
+ end
28
+ end
29
+ end
30
+ end
@@ -12,18 +12,6 @@ module Flydata
12
12
  module SourceMysql
13
13
  module Parser
14
14
 
15
- module MysqlAccessible
16
- def mysql_conf(conf)
17
- @mysql_conf = FlydataCore::Mysql::Config.build_mysql_db_opts(conf)
18
- end
19
-
20
- def mysql_cli(conf = nil)
21
- mysql_conf(conf) if conf
22
- return FlydataMysqlClient.new(@mysql_conf) if @mysql_conf
23
- nil
24
- end
25
- end
26
-
27
15
  module DumpStreamIO
28
16
  # return position
29
17
  # sync command doesn't resume if pos is -1 in dump position file
@@ -625,34 +613,6 @@ EOS
625
613
  end
626
614
  end
627
615
 
628
- class DatabaseSizeCheck
629
- include MysqlAccessible
630
-
631
- SIZE_CHECK_QUERY = <<EOT
632
- SELECT
633
- SUM(data_length) bytesize
634
- FROM
635
- information_schema.tables
636
- WHERE
637
- table_schema = '%s' AND table_name in (%s);
638
- EOT
639
-
640
- def initialize(de_conf)
641
- @de_conf = de_conf
642
- @database = de_conf['database']
643
- @tables = de_conf['tables']
644
- @query = SIZE_CHECK_QUERY % [@database, @tables.collect{|t| "'#{t}'"}.join(',')]
645
- end
646
-
647
- def get_db_bytesize
648
- client = mysql_cli(@de_conf)
649
- result = client.query(@query)
650
- return result.first['bytesize'].to_i
651
- ensure
652
- client.close rescue nil
653
- end
654
- end
655
-
656
616
  # Read and buffer data in a separate thread
657
617
  class AsyncIO
658
618
  MAX_ITEMS = 200
@@ -0,0 +1,29 @@
1
+ require 'flydata/source_mysql/mysql_accessible'
2
+
3
+ module Flydata
4
+ module SourceMysql
5
+ class SyncDatabaseSizeCheck
6
+ include MysqlAccessible
7
+
8
+ SIZE_CHECK_QUERY = <<EOT
9
+ SELECT
10
+ SUM(data_length) bytesize
11
+ FROM
12
+ information_schema.tables
13
+ WHERE
14
+ table_schema = '%s' AND table_name in (%s);
15
+ EOT
16
+
17
+ def initialize(de_conf)
18
+ super
19
+ @query = SIZE_CHECK_QUERY % [
20
+ de_conf['database'],
21
+ tables = de_conf['tables'].collect{|t| "'#{t}'"}.join(',')]
22
+ end
23
+
24
+ def get_db_bytesize
25
+ exec_mysql_query(@query).first['bytesize'].to_i
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,26 @@
1
+ require 'flydata/source/sync_repair'
2
+ require 'flydata/source_mysql/mysql_accessible'
3
+ require 'flydata/source_mysql/source_pos'
4
+
5
+ module Flydata
6
+ module SourceMysql
7
+
8
+ class SyncRepair < Source::SyncRepair
9
+ def get_oldest_available_source_pos
10
+ binary_logs = BinarylogsCheck.new(de[Sync::SOURCE_PREFERENCE_NAME]).get_binary_logs
11
+ oldest_binlog_str = binary_logs.first.values.join("\t")
12
+ oldest_binlog = source.source_pos.create_source_pos(oldest_binlog_str)
13
+ end
14
+
15
+ class BinarylogsCheck
16
+ include MysqlAccessible
17
+
18
+ SHOW_BINARY_LOGS_QUERY = "SHOW BINARY LOGS;"
19
+ def get_binary_logs
20
+ exec_mysql_query(SHOW_BINARY_LOGS_QUERY)
21
+ end
22
+ end
23
+ end
24
+
25
+ end
26
+ end
@@ -0,0 +1,24 @@
1
+ require 'flydata/source/data_entry'
2
+
3
+ module Flydata
4
+ module SourceOracle
5
+
6
+ class DataEntry < Source::DataEntry
7
+ CONFIG_PARAMS = {
8
+ oracle_data_entry_preference: {
9
+ database: {},
10
+ tables: {},
11
+ tables_append_only: {},
12
+ pk_override: {},
13
+ table_attributes: {},
14
+ host: {},
15
+ port: {},
16
+ username: {},
17
+ password: {encrypted: true},
18
+ schema: {},
19
+ }
20
+ }
21
+ end
22
+
23
+ end
24
+ end
@@ -0,0 +1,184 @@
1
+ require 'flydata/source/generate_source_dump'
2
+ require 'flydata/preference/data_entry_preference'
3
+ require 'flydata/source_oracle/oracle_component'
4
+ require 'flydata/source_oracle/query_based_sync/diff_query_generator'
5
+ require 'flydata/source_oracle/table_meta'
6
+ require 'flydata-core/oracle/source_pos'
7
+ require 'flydata-core/oracle/oracle_client'
8
+ require 'flydata-core/oracle/query_helper'
9
+ require 'msgpack'
10
+
11
+ module Flydata
12
+ module SourceOracle
13
+
14
+ class GenerateSourceDump < Source::GenerateSourceDump
15
+ include OracleComponent
16
+
17
+ def run_compatibility_check(dump_dir, backup_dir)
18
+ %w(host username database).each do |k|
19
+ if de_prefs[k].to_s.empty?
20
+ raise "'#{k}' is required. Set the value in the conf file " +
21
+ "-> #{Flydata::Preference::DataEntryPreference.conf_path(de)}"
22
+ end
23
+ end
24
+ end
25
+
26
+ def confirmation_items
27
+ items = {
28
+ "host" => de_prefs['host'],
29
+ "port" => de_prefs['port'],
30
+ "username" => de_prefs['username'],
31
+ "database" => de_prefs['database'],
32
+ "schema" => de_prefs['schema'],
33
+ }
34
+
35
+ items
36
+ end
37
+
38
+ DUMP_SIZE_QUERY = <<EOS
39
+ SELECT
40
+ sum(bytes) as total_size
41
+ FROM
42
+ (SELECT segment_name table_name, owner, bytes
43
+ FROM dba_segments
44
+ WHERE segment_type IN ('TABLE','TABLE PARTITION','TABLE SUBPARTITION')
45
+ UNION ALL
46
+ SELECT i.table_name, i.owner, s.bytes
47
+ FROM dba_indexes i, dba_segments s
48
+ WHERE s.segment_name = i.index_name
49
+ AND s.owner = i.owner
50
+ AND s.segment_type IN ('INDEX','INDEX PARTITION','INDEX SUBPARTITION')
51
+ UNION ALL
52
+ SELECT l.table_name, l.owner, s.bytes
53
+ FROM dba_lobs l, dba_segments s
54
+ WHERE s.segment_name = l.segment_name
55
+ AND s.owner = l.owner
56
+ AND s.segment_type IN ('LOBSEGMENT','LOB PARTITION')
57
+ UNION ALL
58
+ SELECT l.table_name, l.owner, s.bytes
59
+ FROM dba_lobs l, dba_segments s
60
+ WHERE s.segment_name = l.index_name
61
+ AND s.owner = l.owner
62
+ AND s.segment_type = 'LOBINDEX')
63
+ WHERE owner = %{schema} and table_name in (%{tables})
64
+ EOS
65
+
66
+ def dump_size(tables)
67
+ cli = FlydataCore::Oracle::OracleClient.new(de_prefs)
68
+
69
+ query = DUMP_SIZE_QUERY % {
70
+ schema: FlydataCore::Oracle::QueryHelper.schema_as_value(de_prefs['schema'],
71
+ de_prefs['username']),
72
+ tables: FlydataCore::Oracle::QueryHelper.tables_as_value(tables)
73
+ }
74
+
75
+ cursor = cli.query(query)
76
+ cursor.fetch_hash['TOTAL_SIZE'].to_i
77
+ ensure
78
+ cursor.close rescue nil if cursor
79
+ cli.close if cli
80
+ end
81
+
82
+ def dump(tables, file_path = nil, &src_pos_callback)
83
+ io = nil
84
+ if file_path
85
+ io = File.open(file_path, "w")
86
+ else
87
+ raise "dump via pipe has not been implemented yet"
88
+ end
89
+
90
+ table_meta = Flydata::SourceOracle::TableMeta.new(de_prefs, tables)
91
+ cli = FlydataCore::Oracle::OracleClient.new(de_prefs)
92
+ table_meta.reload(cli)
93
+
94
+ tables_missing_meta = tables.select{|t| tm = table_meta[t]; tm.nil? || tm.empty?}
95
+ unless tables_missing_meta.empty?
96
+ raise "Tables are not available. Check if the following table(s) exist and are visible: #{tables_missing_meta.join(",")}"
97
+ end
98
+
99
+ context = source.sync_generate_table_ddl(dp, nil)
100
+ source_pos = get_source_pos(table_meta.current_scn, &src_pos_callback)
101
+
102
+ options = de_prefs.merge(table_meta: table_meta)
103
+ missing_tables = context.each_source_tabledef(tables, options) do |tabledef, error|
104
+ dump_table(tabledef, source_pos, io, cli) if tabledef
105
+ end
106
+
107
+ nil
108
+ ensure
109
+ cli.close if cli
110
+ io.close if io
111
+ end
112
+
113
+ private
114
+
115
+ def get_source_pos(snapshot, &src_pos_callback)
116
+ src_pos = FlydataCore::Oracle::SourcePos.new(snapshot)
117
+ src_pos_callback.call(nil, src_pos)
118
+
119
+ src_pos
120
+ end
121
+
122
+ NUM_ROWS = 50000
123
+
124
+ def dump_table(tabledef, source_pos, io, cli)
125
+ dump_source_table(tabledef, io)
126
+
127
+ last_pks = nil
128
+ loop do
129
+ num_rows, last_pks =
130
+ dump_table_chunk(tabledef.table_name, de_prefs['schema'], source_pos, NUM_ROWS,
131
+ tabledef, tabledef.pk_columns, last_pks, io, cli)
132
+ break if num_rows < NUM_ROWS
133
+ end
134
+ end
135
+
136
+ def dump_source_table(tabledef, io)
137
+ columns = {}
138
+ tabledef.column_def.each do |column_name, columndef_str|
139
+ columndef_hash = eval columndef_str
140
+ columns[column_name] = {"column_name" => column_name,
141
+ "format_type" => columndef_hash["data_type"]}
142
+ end
143
+ source_table_hash = { "table_name" => tabledef.table_name,
144
+ "columns" => columns }
145
+
146
+ io.write(source_table_hash.to_msgpack)
147
+ end
148
+
149
+ def dump_table_chunk(table, schema, source_pos, num_rows,
150
+ tabledef, pk_columns, last_pks, io, cli)
151
+ query = Flydata::SourceOracle::QueryBasedSync::DiffQueryGenerator.new(
152
+ table, schema,
153
+ columns: tabledef.columns,
154
+ to_scn: source_pos.scn,
155
+ pk_columns: pk_columns,
156
+ last_pks: last_pks,
157
+ limit: num_rows).build_query
158
+
159
+ column_names = tabledef.columns.inject([]) do |ary, h|
160
+ ary << h[:column]
161
+ end
162
+
163
+ res = cli.query(query)
164
+ count = 0
165
+ last_row = nil
166
+ while (row = res.fetch_hash)
167
+ count += 1
168
+ last_row = row
169
+ dump_row(row, column_names, io)
170
+ end
171
+
172
+ last_pks = last_row ? pk_columns.collect{|col| last_row[col]} : nil
173
+
174
+ [count, last_pks]
175
+ end
176
+
177
+ def dump_row(row, columns, io)
178
+ data = columns.collect{|col| row[col] }.to_msgpack
179
+ io.write(data)
180
+ end
181
+ end
182
+
183
+ end
184
+ end