flydata 0.7.12 → 0.7.13

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -0
  3. data/VERSION +1 -1
  4. data/flydata-core/lib/flydata-core/oracle/config.rb +25 -0
  5. data/flydata-core/lib/flydata-core/oracle/oracle_client.rb +48 -0
  6. data/flydata-core/lib/flydata-core/oracle/query_helper.rb +20 -0
  7. data/flydata-core/lib/flydata-core/oracle/source_pos.rb +63 -0
  8. data/flydata-core/lib/flydata-core/table_def/oracle_table_def.rb +167 -0
  9. data/flydata-core/spec/oracle/config_spec.rb +45 -0
  10. data/flydata-core/spec/oracle/source_pos_spec.rb +101 -0
  11. data/flydata.gemspec +0 -0
  12. data/lib/flydata/command/sync.rb +14 -4
  13. data/lib/flydata/source.rb +1 -0
  14. data/lib/flydata/source/sync_repair.rb +25 -0
  15. data/lib/flydata/source_mysql/generate_source_dump.rb +2 -1
  16. data/lib/flydata/source_mysql/mysql_accessible.rb +30 -0
  17. data/lib/flydata/source_mysql/parser/dump_parser.rb +0 -40
  18. data/lib/flydata/source_mysql/sync_database_size_check.rb +29 -0
  19. data/lib/flydata/source_mysql/sync_repair.rb +26 -0
  20. data/lib/flydata/source_oracle/data_entry.rb +24 -0
  21. data/lib/flydata/source_oracle/generate_source_dump.rb +184 -0
  22. data/lib/flydata/source_oracle/oracle_component.rb +12 -0
  23. data/lib/flydata/source_oracle/parse_dump_and_send.rb +128 -0
  24. data/lib/flydata/source_oracle/plugin_support/context.rb +13 -0
  25. data/lib/flydata/source_oracle/plugin_support/source_position_file.rb +14 -0
  26. data/lib/flydata/source_oracle/query_based_sync/diff_query_generator.rb +122 -0
  27. data/lib/flydata/source_oracle/setup.rb +24 -0
  28. data/lib/flydata/source_oracle/source_pos.rb +18 -0
  29. data/lib/flydata/source_oracle/sync.rb +15 -0
  30. data/lib/flydata/source_oracle/sync_generate_table_ddl.rb +64 -0
  31. data/lib/flydata/source_oracle/table_meta.rb +220 -0
  32. data/lib/flydata/source_postgresql/sync_repair.rb +13 -0
  33. data/spec/flydata/source_mysql/generate_source_dump_spec.rb +2 -2
  34. metadata +27 -3
data/flydata.gemspec CHANGED
Binary file
@@ -436,6 +436,7 @@ EOS
436
436
  sync_fm = create_sync_file_manager(de)
437
437
  context = source.source_pos
438
438
  set_current_tables
439
+
439
440
  # Stop agent. Check sync and make sure the state is :STUCK_AT_UPLOAD
440
441
  # Get table status for the tables.
441
442
  status, corrupt_master_pos_files, pos_mismatch_tables, gap_tables, table_status_hash =
@@ -538,9 +539,19 @@ EOS
538
539
  end
539
540
  end
540
541
  end
541
- # if sent_source_pos is nil, it means sync has started for none of tables. No need
542
- # to repair positions nor clean buffer data.
542
+
543
+ unless unrepairable_tables.empty?
544
+ # Notify expired source position tables through error logs to us
545
+ log_error_stderr "[error]: Failed to repair tables due to expired source position. These tables need to be re-synced - #{unrepairable_tables.join(", ")}"
546
+ end
547
+
548
+ # If sent_source_pos is nil, it means:
549
+ # - Sync has started for none of tables
550
+ # - None of tables are broken
551
+ # - All of broken tables have an expired source position
552
+ # No need to repair positions nor clean buffer data.
543
553
  if sent_source_pos
554
+ # This logic is unreachable since sent_source_pos cannot be more than oldest_source_pos
544
555
  if oldest_source_pos && sent_source_pos < oldest_source_pos
545
556
  e = AgentError.new("Repair failed due to expired source position")
546
557
  e.description = <<EOS
@@ -1512,9 +1523,8 @@ Thank you for using FlyData!
1512
1523
  end
1513
1524
  end
1514
1525
 
1515
- # TODO implement
1516
1526
  def get_oldest_available_source_pos
1517
- nil
1527
+ source.sync_repair.get_oldest_available_source_pos
1518
1528
  end
1519
1529
  end
1520
1530
  end
@@ -24,6 +24,7 @@ module Source
24
24
  "FileDataEntry" => :source_file,
25
25
  "RedshiftPostgresqlDataEntry" => :source_postgresql,
26
26
  "RedshiftZendeskDataEntry" => :source_zendesk,
27
+ "RedshiftOracleDataEntry" => :source_oracle,
27
28
  }
28
29
  def self.component_class_for(component_sym, de)
29
30
  source_sym = DATA_ENTRY_TYPE_MAP[de['type']]
@@ -0,0 +1,25 @@
1
+ require 'flydata/source'
2
+ require 'flydata/source/component'
3
+ require 'flydata/source/errors'
4
+
5
+ module Flydata
6
+ module Source
7
+
8
+ class SyncRepair < Component
9
+ def self.inherited(child_class)
10
+ Source.register(child_class, self)
11
+ end
12
+
13
+ # Public Interface: Get oldest avilable source pos information
14
+ #
15
+ # Called from sync:repair command to determine a master position for repair.
16
+ # If this returns nil, sync:repair sets any oldest source position to source pos.
17
+ #
18
+ # Raises exception when failing to get source positions
19
+ def self.get_oldest_available_source_pos
20
+ nil
21
+ end
22
+ end
23
+
24
+ end
25
+ end
@@ -3,6 +3,7 @@ require 'flydata/preference/data_entry_preference'
3
3
  require 'flydata/source_mysql/mysql_compatibility_check'
4
4
  require 'flydata/source_mysql/parser/dump_parser'
5
5
  require 'flydata-core/mysql/binlog_pos'
6
+ require 'flydata/source_mysql/sync_database_size_check'
6
7
 
7
8
  module Flydata
8
9
  module SourceMysql
@@ -34,7 +35,7 @@ class GenerateSourceDump < Source::GenerateSourceDump
34
35
 
35
36
  def dump_size(tables)
36
37
  opts = de['mysql_data_entry_preference'].merge({"tables" => tables})
37
- Parser::DatabaseSizeCheck.new(opts).get_db_bytesize
38
+ SyncDatabaseSizeCheck.new(opts).get_db_bytesize
38
39
  end
39
40
 
40
41
  def dump(tables, file_path = nil, &src_pos_callback)
@@ -0,0 +1,30 @@
1
+ require 'mysql2'
2
+ require 'flydata-core/mysql/config'
3
+
4
+ module Flydata
5
+ module SourceMysql
6
+ module MysqlAccessible
7
+ def initialize(conf)
8
+ @conf = conf
9
+ @mysql_db_opts = build_mysql_db_opts(conf)
10
+ end
11
+
12
+ def exec_mysql_query(query)
13
+ cli = mysql_client
14
+ cli.query(query)
15
+ ensure
16
+ if cli
17
+ cli.close rescue nil
18
+ end
19
+ end
20
+
21
+ def build_mysql_db_opts(conf)
22
+ FlydataCore::Mysql::Config.build_mysql_db_opts(conf)
23
+ end
24
+
25
+ def mysql_client
26
+ Mysql2::Client.new(@mysql_db_opts)
27
+ end
28
+ end
29
+ end
30
+ end
@@ -12,18 +12,6 @@ module Flydata
12
12
  module SourceMysql
13
13
  module Parser
14
14
 
15
- module MysqlAccessible
16
- def mysql_conf(conf)
17
- @mysql_conf = FlydataCore::Mysql::Config.build_mysql_db_opts(conf)
18
- end
19
-
20
- def mysql_cli(conf = nil)
21
- mysql_conf(conf) if conf
22
- return FlydataMysqlClient.new(@mysql_conf) if @mysql_conf
23
- nil
24
- end
25
- end
26
-
27
15
  module DumpStreamIO
28
16
  # return position
29
17
  # sync command doesn't resume if pos is -1 in dump position file
@@ -625,34 +613,6 @@ EOS
625
613
  end
626
614
  end
627
615
 
628
- class DatabaseSizeCheck
629
- include MysqlAccessible
630
-
631
- SIZE_CHECK_QUERY = <<EOT
632
- SELECT
633
- SUM(data_length) bytesize
634
- FROM
635
- information_schema.tables
636
- WHERE
637
- table_schema = '%s' AND table_name in (%s);
638
- EOT
639
-
640
- def initialize(de_conf)
641
- @de_conf = de_conf
642
- @database = de_conf['database']
643
- @tables = de_conf['tables']
644
- @query = SIZE_CHECK_QUERY % [@database, @tables.collect{|t| "'#{t}'"}.join(',')]
645
- end
646
-
647
- def get_db_bytesize
648
- client = mysql_cli(@de_conf)
649
- result = client.query(@query)
650
- return result.first['bytesize'].to_i
651
- ensure
652
- client.close rescue nil
653
- end
654
- end
655
-
656
616
  # Read and buffer data in a separate thread
657
617
  class AsyncIO
658
618
  MAX_ITEMS = 200
@@ -0,0 +1,29 @@
1
+ require 'flydata/source_mysql/mysql_accessible'
2
+
3
+ module Flydata
4
+ module SourceMysql
5
+ class SyncDatabaseSizeCheck
6
+ include MysqlAccessible
7
+
8
+ SIZE_CHECK_QUERY = <<EOT
9
+ SELECT
10
+ SUM(data_length) bytesize
11
+ FROM
12
+ information_schema.tables
13
+ WHERE
14
+ table_schema = '%s' AND table_name in (%s);
15
+ EOT
16
+
17
+ def initialize(de_conf)
18
+ super
19
+ @query = SIZE_CHECK_QUERY % [
20
+ de_conf['database'],
21
+ tables = de_conf['tables'].collect{|t| "'#{t}'"}.join(',')]
22
+ end
23
+
24
+ def get_db_bytesize
25
+ exec_mysql_query(@query).first['bytesize'].to_i
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,26 @@
1
+ require 'flydata/source/sync_repair'
2
+ require 'flydata/source_mysql/mysql_accessible'
3
+ require 'flydata/source_mysql/source_pos'
4
+
5
+ module Flydata
6
+ module SourceMysql
7
+
8
+ class SyncRepair < Source::SyncRepair
9
+ def get_oldest_available_source_pos
10
+ binary_logs = BinarylogsCheck.new(de[Sync::SOURCE_PREFERENCE_NAME]).get_binary_logs
11
+ oldest_binlog_str = binary_logs.first.values.join("\t")
12
+ oldest_binlog = source.source_pos.create_source_pos(oldest_binlog_str)
13
+ end
14
+
15
+ class BinarylogsCheck
16
+ include MysqlAccessible
17
+
18
+ SHOW_BINARY_LOGS_QUERY = "SHOW BINARY LOGS;"
19
+ def get_binary_logs
20
+ exec_mysql_query(SHOW_BINARY_LOGS_QUERY)
21
+ end
22
+ end
23
+ end
24
+
25
+ end
26
+ end
@@ -0,0 +1,24 @@
1
+ require 'flydata/source/data_entry'
2
+
3
+ module Flydata
4
+ module SourceOracle
5
+
6
+ class DataEntry < Source::DataEntry
7
+ CONFIG_PARAMS = {
8
+ oracle_data_entry_preference: {
9
+ database: {},
10
+ tables: {},
11
+ tables_append_only: {},
12
+ pk_override: {},
13
+ table_attributes: {},
14
+ host: {},
15
+ port: {},
16
+ username: {},
17
+ password: {encrypted: true},
18
+ schema: {},
19
+ }
20
+ }
21
+ end
22
+
23
+ end
24
+ end
@@ -0,0 +1,184 @@
1
+ require 'flydata/source/generate_source_dump'
2
+ require 'flydata/preference/data_entry_preference'
3
+ require 'flydata/source_oracle/oracle_component'
4
+ require 'flydata/source_oracle/query_based_sync/diff_query_generator'
5
+ require 'flydata/source_oracle/table_meta'
6
+ require 'flydata-core/oracle/source_pos'
7
+ require 'flydata-core/oracle/oracle_client'
8
+ require 'flydata-core/oracle/query_helper'
9
+ require 'msgpack'
10
+
11
+ module Flydata
12
+ module SourceOracle
13
+
14
+ class GenerateSourceDump < Source::GenerateSourceDump
15
+ include OracleComponent
16
+
17
+ def run_compatibility_check(dump_dir, backup_dir)
18
+ %w(host username database).each do |k|
19
+ if de_prefs[k].to_s.empty?
20
+ raise "'#{k}' is required. Set the value in the conf file " +
21
+ "-> #{Flydata::Preference::DataEntryPreference.conf_path(de)}"
22
+ end
23
+ end
24
+ end
25
+
26
+ def confirmation_items
27
+ items = {
28
+ "host" => de_prefs['host'],
29
+ "port" => de_prefs['port'],
30
+ "username" => de_prefs['username'],
31
+ "database" => de_prefs['database'],
32
+ "schema" => de_prefs['schema'],
33
+ }
34
+
35
+ items
36
+ end
37
+
38
+ DUMP_SIZE_QUERY = <<EOS
39
+ SELECT
40
+ sum(bytes) as total_size
41
+ FROM
42
+ (SELECT segment_name table_name, owner, bytes
43
+ FROM dba_segments
44
+ WHERE segment_type IN ('TABLE','TABLE PARTITION','TABLE SUBPARTITION')
45
+ UNION ALL
46
+ SELECT i.table_name, i.owner, s.bytes
47
+ FROM dba_indexes i, dba_segments s
48
+ WHERE s.segment_name = i.index_name
49
+ AND s.owner = i.owner
50
+ AND s.segment_type IN ('INDEX','INDEX PARTITION','INDEX SUBPARTITION')
51
+ UNION ALL
52
+ SELECT l.table_name, l.owner, s.bytes
53
+ FROM dba_lobs l, dba_segments s
54
+ WHERE s.segment_name = l.segment_name
55
+ AND s.owner = l.owner
56
+ AND s.segment_type IN ('LOBSEGMENT','LOB PARTITION')
57
+ UNION ALL
58
+ SELECT l.table_name, l.owner, s.bytes
59
+ FROM dba_lobs l, dba_segments s
60
+ WHERE s.segment_name = l.index_name
61
+ AND s.owner = l.owner
62
+ AND s.segment_type = 'LOBINDEX')
63
+ WHERE owner = %{schema} and table_name in (%{tables})
64
+ EOS
65
+
66
+ def dump_size(tables)
67
+ cli = FlydataCore::Oracle::OracleClient.new(de_prefs)
68
+
69
+ query = DUMP_SIZE_QUERY % {
70
+ schema: FlydataCore::Oracle::QueryHelper.schema_as_value(de_prefs['schema'],
71
+ de_prefs['username']),
72
+ tables: FlydataCore::Oracle::QueryHelper.tables_as_value(tables)
73
+ }
74
+
75
+ cursor = cli.query(query)
76
+ cursor.fetch_hash['TOTAL_SIZE'].to_i
77
+ ensure
78
+ cursor.close rescue nil if cursor
79
+ cli.close if cli
80
+ end
81
+
82
+ def dump(tables, file_path = nil, &src_pos_callback)
83
+ io = nil
84
+ if file_path
85
+ io = File.open(file_path, "w")
86
+ else
87
+ raise "dump via pipe has not been implemented yet"
88
+ end
89
+
90
+ table_meta = Flydata::SourceOracle::TableMeta.new(de_prefs, tables)
91
+ cli = FlydataCore::Oracle::OracleClient.new(de_prefs)
92
+ table_meta.reload(cli)
93
+
94
+ tables_missing_meta = tables.select{|t| tm = table_meta[t]; tm.nil? || tm.empty?}
95
+ unless tables_missing_meta.empty?
96
+ raise "Tables are not available. Check if the following table(s) exist and are visible: #{tables_missing_meta.join(",")}"
97
+ end
98
+
99
+ context = source.sync_generate_table_ddl(dp, nil)
100
+ source_pos = get_source_pos(table_meta.current_scn, &src_pos_callback)
101
+
102
+ options = de_prefs.merge(table_meta: table_meta)
103
+ missing_tables = context.each_source_tabledef(tables, options) do |tabledef, error|
104
+ dump_table(tabledef, source_pos, io, cli) if tabledef
105
+ end
106
+
107
+ nil
108
+ ensure
109
+ cli.close if cli
110
+ io.close if io
111
+ end
112
+
113
+ private
114
+
115
+ def get_source_pos(snapshot, &src_pos_callback)
116
+ src_pos = FlydataCore::Oracle::SourcePos.new(snapshot)
117
+ src_pos_callback.call(nil, src_pos)
118
+
119
+ src_pos
120
+ end
121
+
122
+ NUM_ROWS = 50000
123
+
124
+ def dump_table(tabledef, source_pos, io, cli)
125
+ dump_source_table(tabledef, io)
126
+
127
+ last_pks = nil
128
+ loop do
129
+ num_rows, last_pks =
130
+ dump_table_chunk(tabledef.table_name, de_prefs['schema'], source_pos, NUM_ROWS,
131
+ tabledef, tabledef.pk_columns, last_pks, io, cli)
132
+ break if num_rows < NUM_ROWS
133
+ end
134
+ end
135
+
136
+ def dump_source_table(tabledef, io)
137
+ columns = {}
138
+ tabledef.column_def.each do |column_name, columndef_str|
139
+ columndef_hash = eval columndef_str
140
+ columns[column_name] = {"column_name" => column_name,
141
+ "format_type" => columndef_hash["data_type"]}
142
+ end
143
+ source_table_hash = { "table_name" => tabledef.table_name,
144
+ "columns" => columns }
145
+
146
+ io.write(source_table_hash.to_msgpack)
147
+ end
148
+
149
+ def dump_table_chunk(table, schema, source_pos, num_rows,
150
+ tabledef, pk_columns, last_pks, io, cli)
151
+ query = Flydata::SourceOracle::QueryBasedSync::DiffQueryGenerator.new(
152
+ table, schema,
153
+ columns: tabledef.columns,
154
+ to_scn: source_pos.scn,
155
+ pk_columns: pk_columns,
156
+ last_pks: last_pks,
157
+ limit: num_rows).build_query
158
+
159
+ column_names = tabledef.columns.inject([]) do |ary, h|
160
+ ary << h[:column]
161
+ end
162
+
163
+ res = cli.query(query)
164
+ count = 0
165
+ last_row = nil
166
+ while (row = res.fetch_hash)
167
+ count += 1
168
+ last_row = row
169
+ dump_row(row, column_names, io)
170
+ end
171
+
172
+ last_pks = last_row ? pk_columns.collect{|col| last_row[col]} : nil
173
+
174
+ [count, last_pks]
175
+ end
176
+
177
+ def dump_row(row, columns, io)
178
+ data = columns.collect{|col| row[col] }.to_msgpack
179
+ io.write(data)
180
+ end
181
+ end
182
+
183
+ end
184
+ end