flydata 0.7.12 → 0.7.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -0
- data/VERSION +1 -1
- data/flydata-core/lib/flydata-core/oracle/config.rb +25 -0
- data/flydata-core/lib/flydata-core/oracle/oracle_client.rb +48 -0
- data/flydata-core/lib/flydata-core/oracle/query_helper.rb +20 -0
- data/flydata-core/lib/flydata-core/oracle/source_pos.rb +63 -0
- data/flydata-core/lib/flydata-core/table_def/oracle_table_def.rb +167 -0
- data/flydata-core/spec/oracle/config_spec.rb +45 -0
- data/flydata-core/spec/oracle/source_pos_spec.rb +101 -0
- data/flydata.gemspec +0 -0
- data/lib/flydata/command/sync.rb +14 -4
- data/lib/flydata/source.rb +1 -0
- data/lib/flydata/source/sync_repair.rb +25 -0
- data/lib/flydata/source_mysql/generate_source_dump.rb +2 -1
- data/lib/flydata/source_mysql/mysql_accessible.rb +30 -0
- data/lib/flydata/source_mysql/parser/dump_parser.rb +0 -40
- data/lib/flydata/source_mysql/sync_database_size_check.rb +29 -0
- data/lib/flydata/source_mysql/sync_repair.rb +26 -0
- data/lib/flydata/source_oracle/data_entry.rb +24 -0
- data/lib/flydata/source_oracle/generate_source_dump.rb +184 -0
- data/lib/flydata/source_oracle/oracle_component.rb +12 -0
- data/lib/flydata/source_oracle/parse_dump_and_send.rb +128 -0
- data/lib/flydata/source_oracle/plugin_support/context.rb +13 -0
- data/lib/flydata/source_oracle/plugin_support/source_position_file.rb +14 -0
- data/lib/flydata/source_oracle/query_based_sync/diff_query_generator.rb +122 -0
- data/lib/flydata/source_oracle/setup.rb +24 -0
- data/lib/flydata/source_oracle/source_pos.rb +18 -0
- data/lib/flydata/source_oracle/sync.rb +15 -0
- data/lib/flydata/source_oracle/sync_generate_table_ddl.rb +64 -0
- data/lib/flydata/source_oracle/table_meta.rb +220 -0
- data/lib/flydata/source_postgresql/sync_repair.rb +13 -0
- data/spec/flydata/source_mysql/generate_source_dump_spec.rb +2 -2
- metadata +27 -3
data/flydata.gemspec
CHANGED
Binary file
|
data/lib/flydata/command/sync.rb
CHANGED
@@ -436,6 +436,7 @@ EOS
|
|
436
436
|
sync_fm = create_sync_file_manager(de)
|
437
437
|
context = source.source_pos
|
438
438
|
set_current_tables
|
439
|
+
|
439
440
|
# Stop agent. Check sync and make sure the state is :STUCK_AT_UPLOAD
|
440
441
|
# Get table status for the tables.
|
441
442
|
status, corrupt_master_pos_files, pos_mismatch_tables, gap_tables, table_status_hash =
|
@@ -538,9 +539,19 @@ EOS
|
|
538
539
|
end
|
539
540
|
end
|
540
541
|
end
|
541
|
-
|
542
|
-
|
542
|
+
|
543
|
+
unless unrepairable_tables.empty?
|
544
|
+
# Notify expired source position tables through error logs to us
|
545
|
+
log_error_stderr "[error]: Failed to repair tables due to expired source position. These tables need to be re-synced - #{unrepairable_tables.join(", ")}"
|
546
|
+
end
|
547
|
+
|
548
|
+
# If sent_source_pos is nil, it means:
|
549
|
+
# - Sync has started for none of tables
|
550
|
+
# - None of tables are broken
|
551
|
+
# - All of broken tables have an expired source position
|
552
|
+
# No need to repair positions nor clean buffer data.
|
543
553
|
if sent_source_pos
|
554
|
+
# This logic is unreachable since sent_source_pos cannot be more than oldest_source_pos
|
544
555
|
if oldest_source_pos && sent_source_pos < oldest_source_pos
|
545
556
|
e = AgentError.new("Repair failed due to expired source position")
|
546
557
|
e.description = <<EOS
|
@@ -1512,9 +1523,8 @@ Thank you for using FlyData!
|
|
1512
1523
|
end
|
1513
1524
|
end
|
1514
1525
|
|
1515
|
-
# TODO implement
|
1516
1526
|
def get_oldest_available_source_pos
|
1517
|
-
|
1527
|
+
source.sync_repair.get_oldest_available_source_pos
|
1518
1528
|
end
|
1519
1529
|
end
|
1520
1530
|
end
|
data/lib/flydata/source.rb
CHANGED
@@ -24,6 +24,7 @@ module Source
|
|
24
24
|
"FileDataEntry" => :source_file,
|
25
25
|
"RedshiftPostgresqlDataEntry" => :source_postgresql,
|
26
26
|
"RedshiftZendeskDataEntry" => :source_zendesk,
|
27
|
+
"RedshiftOracleDataEntry" => :source_oracle,
|
27
28
|
}
|
28
29
|
def self.component_class_for(component_sym, de)
|
29
30
|
source_sym = DATA_ENTRY_TYPE_MAP[de['type']]
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'flydata/source'
|
2
|
+
require 'flydata/source/component'
|
3
|
+
require 'flydata/source/errors'
|
4
|
+
|
5
|
+
module Flydata
|
6
|
+
module Source
|
7
|
+
|
8
|
+
class SyncRepair < Component
|
9
|
+
def self.inherited(child_class)
|
10
|
+
Source.register(child_class, self)
|
11
|
+
end
|
12
|
+
|
13
|
+
# Public Interface: Get oldest avilable source pos information
|
14
|
+
#
|
15
|
+
# Called from sync:repair command to determine a master position for repair.
|
16
|
+
# If this returns nil, sync:repair sets any oldest source position to source pos.
|
17
|
+
#
|
18
|
+
# Raises exception when failing to get source positions
|
19
|
+
def self.get_oldest_available_source_pos
|
20
|
+
nil
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
@@ -3,6 +3,7 @@ require 'flydata/preference/data_entry_preference'
|
|
3
3
|
require 'flydata/source_mysql/mysql_compatibility_check'
|
4
4
|
require 'flydata/source_mysql/parser/dump_parser'
|
5
5
|
require 'flydata-core/mysql/binlog_pos'
|
6
|
+
require 'flydata/source_mysql/sync_database_size_check'
|
6
7
|
|
7
8
|
module Flydata
|
8
9
|
module SourceMysql
|
@@ -34,7 +35,7 @@ class GenerateSourceDump < Source::GenerateSourceDump
|
|
34
35
|
|
35
36
|
def dump_size(tables)
|
36
37
|
opts = de['mysql_data_entry_preference'].merge({"tables" => tables})
|
37
|
-
|
38
|
+
SyncDatabaseSizeCheck.new(opts).get_db_bytesize
|
38
39
|
end
|
39
40
|
|
40
41
|
def dump(tables, file_path = nil, &src_pos_callback)
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'mysql2'
|
2
|
+
require 'flydata-core/mysql/config'
|
3
|
+
|
4
|
+
module Flydata
|
5
|
+
module SourceMysql
|
6
|
+
module MysqlAccessible
|
7
|
+
def initialize(conf)
|
8
|
+
@conf = conf
|
9
|
+
@mysql_db_opts = build_mysql_db_opts(conf)
|
10
|
+
end
|
11
|
+
|
12
|
+
def exec_mysql_query(query)
|
13
|
+
cli = mysql_client
|
14
|
+
cli.query(query)
|
15
|
+
ensure
|
16
|
+
if cli
|
17
|
+
cli.close rescue nil
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def build_mysql_db_opts(conf)
|
22
|
+
FlydataCore::Mysql::Config.build_mysql_db_opts(conf)
|
23
|
+
end
|
24
|
+
|
25
|
+
def mysql_client
|
26
|
+
Mysql2::Client.new(@mysql_db_opts)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -12,18 +12,6 @@ module Flydata
|
|
12
12
|
module SourceMysql
|
13
13
|
module Parser
|
14
14
|
|
15
|
-
module MysqlAccessible
|
16
|
-
def mysql_conf(conf)
|
17
|
-
@mysql_conf = FlydataCore::Mysql::Config.build_mysql_db_opts(conf)
|
18
|
-
end
|
19
|
-
|
20
|
-
def mysql_cli(conf = nil)
|
21
|
-
mysql_conf(conf) if conf
|
22
|
-
return FlydataMysqlClient.new(@mysql_conf) if @mysql_conf
|
23
|
-
nil
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
15
|
module DumpStreamIO
|
28
16
|
# return position
|
29
17
|
# sync command doesn't resume if pos is -1 in dump position file
|
@@ -625,34 +613,6 @@ EOS
|
|
625
613
|
end
|
626
614
|
end
|
627
615
|
|
628
|
-
class DatabaseSizeCheck
|
629
|
-
include MysqlAccessible
|
630
|
-
|
631
|
-
SIZE_CHECK_QUERY = <<EOT
|
632
|
-
SELECT
|
633
|
-
SUM(data_length) bytesize
|
634
|
-
FROM
|
635
|
-
information_schema.tables
|
636
|
-
WHERE
|
637
|
-
table_schema = '%s' AND table_name in (%s);
|
638
|
-
EOT
|
639
|
-
|
640
|
-
def initialize(de_conf)
|
641
|
-
@de_conf = de_conf
|
642
|
-
@database = de_conf['database']
|
643
|
-
@tables = de_conf['tables']
|
644
|
-
@query = SIZE_CHECK_QUERY % [@database, @tables.collect{|t| "'#{t}'"}.join(',')]
|
645
|
-
end
|
646
|
-
|
647
|
-
def get_db_bytesize
|
648
|
-
client = mysql_cli(@de_conf)
|
649
|
-
result = client.query(@query)
|
650
|
-
return result.first['bytesize'].to_i
|
651
|
-
ensure
|
652
|
-
client.close rescue nil
|
653
|
-
end
|
654
|
-
end
|
655
|
-
|
656
616
|
# Read and buffer data in a separate thread
|
657
617
|
class AsyncIO
|
658
618
|
MAX_ITEMS = 200
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'flydata/source_mysql/mysql_accessible'
|
2
|
+
|
3
|
+
module Flydata
|
4
|
+
module SourceMysql
|
5
|
+
class SyncDatabaseSizeCheck
|
6
|
+
include MysqlAccessible
|
7
|
+
|
8
|
+
SIZE_CHECK_QUERY = <<EOT
|
9
|
+
SELECT
|
10
|
+
SUM(data_length) bytesize
|
11
|
+
FROM
|
12
|
+
information_schema.tables
|
13
|
+
WHERE
|
14
|
+
table_schema = '%s' AND table_name in (%s);
|
15
|
+
EOT
|
16
|
+
|
17
|
+
def initialize(de_conf)
|
18
|
+
super
|
19
|
+
@query = SIZE_CHECK_QUERY % [
|
20
|
+
de_conf['database'],
|
21
|
+
tables = de_conf['tables'].collect{|t| "'#{t}'"}.join(',')]
|
22
|
+
end
|
23
|
+
|
24
|
+
def get_db_bytesize
|
25
|
+
exec_mysql_query(@query).first['bytesize'].to_i
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'flydata/source/sync_repair'
|
2
|
+
require 'flydata/source_mysql/mysql_accessible'
|
3
|
+
require 'flydata/source_mysql/source_pos'
|
4
|
+
|
5
|
+
module Flydata
|
6
|
+
module SourceMysql
|
7
|
+
|
8
|
+
class SyncRepair < Source::SyncRepair
|
9
|
+
def get_oldest_available_source_pos
|
10
|
+
binary_logs = BinarylogsCheck.new(de[Sync::SOURCE_PREFERENCE_NAME]).get_binary_logs
|
11
|
+
oldest_binlog_str = binary_logs.first.values.join("\t")
|
12
|
+
oldest_binlog = source.source_pos.create_source_pos(oldest_binlog_str)
|
13
|
+
end
|
14
|
+
|
15
|
+
class BinarylogsCheck
|
16
|
+
include MysqlAccessible
|
17
|
+
|
18
|
+
SHOW_BINARY_LOGS_QUERY = "SHOW BINARY LOGS;"
|
19
|
+
def get_binary_logs
|
20
|
+
exec_mysql_query(SHOW_BINARY_LOGS_QUERY)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'flydata/source/data_entry'
|
2
|
+
|
3
|
+
module Flydata
|
4
|
+
module SourceOracle
|
5
|
+
|
6
|
+
class DataEntry < Source::DataEntry
|
7
|
+
CONFIG_PARAMS = {
|
8
|
+
oracle_data_entry_preference: {
|
9
|
+
database: {},
|
10
|
+
tables: {},
|
11
|
+
tables_append_only: {},
|
12
|
+
pk_override: {},
|
13
|
+
table_attributes: {},
|
14
|
+
host: {},
|
15
|
+
port: {},
|
16
|
+
username: {},
|
17
|
+
password: {encrypted: true},
|
18
|
+
schema: {},
|
19
|
+
}
|
20
|
+
}
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,184 @@
|
|
1
|
+
require 'flydata/source/generate_source_dump'
|
2
|
+
require 'flydata/preference/data_entry_preference'
|
3
|
+
require 'flydata/source_oracle/oracle_component'
|
4
|
+
require 'flydata/source_oracle/query_based_sync/diff_query_generator'
|
5
|
+
require 'flydata/source_oracle/table_meta'
|
6
|
+
require 'flydata-core/oracle/source_pos'
|
7
|
+
require 'flydata-core/oracle/oracle_client'
|
8
|
+
require 'flydata-core/oracle/query_helper'
|
9
|
+
require 'msgpack'
|
10
|
+
|
11
|
+
module Flydata
|
12
|
+
module SourceOracle
|
13
|
+
|
14
|
+
class GenerateSourceDump < Source::GenerateSourceDump
|
15
|
+
include OracleComponent
|
16
|
+
|
17
|
+
def run_compatibility_check(dump_dir, backup_dir)
|
18
|
+
%w(host username database).each do |k|
|
19
|
+
if de_prefs[k].to_s.empty?
|
20
|
+
raise "'#{k}' is required. Set the value in the conf file " +
|
21
|
+
"-> #{Flydata::Preference::DataEntryPreference.conf_path(de)}"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def confirmation_items
|
27
|
+
items = {
|
28
|
+
"host" => de_prefs['host'],
|
29
|
+
"port" => de_prefs['port'],
|
30
|
+
"username" => de_prefs['username'],
|
31
|
+
"database" => de_prefs['database'],
|
32
|
+
"schema" => de_prefs['schema'],
|
33
|
+
}
|
34
|
+
|
35
|
+
items
|
36
|
+
end
|
37
|
+
|
38
|
+
DUMP_SIZE_QUERY = <<EOS
|
39
|
+
SELECT
|
40
|
+
sum(bytes) as total_size
|
41
|
+
FROM
|
42
|
+
(SELECT segment_name table_name, owner, bytes
|
43
|
+
FROM dba_segments
|
44
|
+
WHERE segment_type IN ('TABLE','TABLE PARTITION','TABLE SUBPARTITION')
|
45
|
+
UNION ALL
|
46
|
+
SELECT i.table_name, i.owner, s.bytes
|
47
|
+
FROM dba_indexes i, dba_segments s
|
48
|
+
WHERE s.segment_name = i.index_name
|
49
|
+
AND s.owner = i.owner
|
50
|
+
AND s.segment_type IN ('INDEX','INDEX PARTITION','INDEX SUBPARTITION')
|
51
|
+
UNION ALL
|
52
|
+
SELECT l.table_name, l.owner, s.bytes
|
53
|
+
FROM dba_lobs l, dba_segments s
|
54
|
+
WHERE s.segment_name = l.segment_name
|
55
|
+
AND s.owner = l.owner
|
56
|
+
AND s.segment_type IN ('LOBSEGMENT','LOB PARTITION')
|
57
|
+
UNION ALL
|
58
|
+
SELECT l.table_name, l.owner, s.bytes
|
59
|
+
FROM dba_lobs l, dba_segments s
|
60
|
+
WHERE s.segment_name = l.index_name
|
61
|
+
AND s.owner = l.owner
|
62
|
+
AND s.segment_type = 'LOBINDEX')
|
63
|
+
WHERE owner = %{schema} and table_name in (%{tables})
|
64
|
+
EOS
|
65
|
+
|
66
|
+
def dump_size(tables)
|
67
|
+
cli = FlydataCore::Oracle::OracleClient.new(de_prefs)
|
68
|
+
|
69
|
+
query = DUMP_SIZE_QUERY % {
|
70
|
+
schema: FlydataCore::Oracle::QueryHelper.schema_as_value(de_prefs['schema'],
|
71
|
+
de_prefs['username']),
|
72
|
+
tables: FlydataCore::Oracle::QueryHelper.tables_as_value(tables)
|
73
|
+
}
|
74
|
+
|
75
|
+
cursor = cli.query(query)
|
76
|
+
cursor.fetch_hash['TOTAL_SIZE'].to_i
|
77
|
+
ensure
|
78
|
+
cursor.close rescue nil if cursor
|
79
|
+
cli.close if cli
|
80
|
+
end
|
81
|
+
|
82
|
+
def dump(tables, file_path = nil, &src_pos_callback)
|
83
|
+
io = nil
|
84
|
+
if file_path
|
85
|
+
io = File.open(file_path, "w")
|
86
|
+
else
|
87
|
+
raise "dump via pipe has not been implemented yet"
|
88
|
+
end
|
89
|
+
|
90
|
+
table_meta = Flydata::SourceOracle::TableMeta.new(de_prefs, tables)
|
91
|
+
cli = FlydataCore::Oracle::OracleClient.new(de_prefs)
|
92
|
+
table_meta.reload(cli)
|
93
|
+
|
94
|
+
tables_missing_meta = tables.select{|t| tm = table_meta[t]; tm.nil? || tm.empty?}
|
95
|
+
unless tables_missing_meta.empty?
|
96
|
+
raise "Tables are not available. Check if the following table(s) exist and are visible: #{tables_missing_meta.join(",")}"
|
97
|
+
end
|
98
|
+
|
99
|
+
context = source.sync_generate_table_ddl(dp, nil)
|
100
|
+
source_pos = get_source_pos(table_meta.current_scn, &src_pos_callback)
|
101
|
+
|
102
|
+
options = de_prefs.merge(table_meta: table_meta)
|
103
|
+
missing_tables = context.each_source_tabledef(tables, options) do |tabledef, error|
|
104
|
+
dump_table(tabledef, source_pos, io, cli) if tabledef
|
105
|
+
end
|
106
|
+
|
107
|
+
nil
|
108
|
+
ensure
|
109
|
+
cli.close if cli
|
110
|
+
io.close if io
|
111
|
+
end
|
112
|
+
|
113
|
+
private
|
114
|
+
|
115
|
+
def get_source_pos(snapshot, &src_pos_callback)
|
116
|
+
src_pos = FlydataCore::Oracle::SourcePos.new(snapshot)
|
117
|
+
src_pos_callback.call(nil, src_pos)
|
118
|
+
|
119
|
+
src_pos
|
120
|
+
end
|
121
|
+
|
122
|
+
NUM_ROWS = 50000
|
123
|
+
|
124
|
+
def dump_table(tabledef, source_pos, io, cli)
|
125
|
+
dump_source_table(tabledef, io)
|
126
|
+
|
127
|
+
last_pks = nil
|
128
|
+
loop do
|
129
|
+
num_rows, last_pks =
|
130
|
+
dump_table_chunk(tabledef.table_name, de_prefs['schema'], source_pos, NUM_ROWS,
|
131
|
+
tabledef, tabledef.pk_columns, last_pks, io, cli)
|
132
|
+
break if num_rows < NUM_ROWS
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def dump_source_table(tabledef, io)
|
137
|
+
columns = {}
|
138
|
+
tabledef.column_def.each do |column_name, columndef_str|
|
139
|
+
columndef_hash = eval columndef_str
|
140
|
+
columns[column_name] = {"column_name" => column_name,
|
141
|
+
"format_type" => columndef_hash["data_type"]}
|
142
|
+
end
|
143
|
+
source_table_hash = { "table_name" => tabledef.table_name,
|
144
|
+
"columns" => columns }
|
145
|
+
|
146
|
+
io.write(source_table_hash.to_msgpack)
|
147
|
+
end
|
148
|
+
|
149
|
+
def dump_table_chunk(table, schema, source_pos, num_rows,
|
150
|
+
tabledef, pk_columns, last_pks, io, cli)
|
151
|
+
query = Flydata::SourceOracle::QueryBasedSync::DiffQueryGenerator.new(
|
152
|
+
table, schema,
|
153
|
+
columns: tabledef.columns,
|
154
|
+
to_scn: source_pos.scn,
|
155
|
+
pk_columns: pk_columns,
|
156
|
+
last_pks: last_pks,
|
157
|
+
limit: num_rows).build_query
|
158
|
+
|
159
|
+
column_names = tabledef.columns.inject([]) do |ary, h|
|
160
|
+
ary << h[:column]
|
161
|
+
end
|
162
|
+
|
163
|
+
res = cli.query(query)
|
164
|
+
count = 0
|
165
|
+
last_row = nil
|
166
|
+
while (row = res.fetch_hash)
|
167
|
+
count += 1
|
168
|
+
last_row = row
|
169
|
+
dump_row(row, column_names, io)
|
170
|
+
end
|
171
|
+
|
172
|
+
last_pks = last_row ? pk_columns.collect{|col| last_row[col]} : nil
|
173
|
+
|
174
|
+
[count, last_pks]
|
175
|
+
end
|
176
|
+
|
177
|
+
def dump_row(row, columns, io)
|
178
|
+
data = columns.collect{|col| row[col] }.to_msgpack
|
179
|
+
io.write(data)
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
end
|
184
|
+
end
|