sq-dbsync 1.0.7 → 1.0.8
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/sq/dbsync/database/common.rb +18 -1
- data/lib/sq/dbsync/database/connection.rb +3 -5
- data/lib/sq/dbsync/database/mysql.rb +12 -46
- data/lib/sq/dbsync/database/postgres.rb +2 -10
- data/lib/sq/dbsync/manager.rb +4 -3
- data/lib/sq/dbsync/version.rb +1 -1
- data/spec/database_helper.rb +4 -2
- data/spec/integration/batch_load_action_spec.rb +19 -15
- data/spec/integration/database_connection_spec.rb +7 -9
- data/spec/integration/incremental_load_action_spec.rb +14 -1
- data/spec/integration_helper.rb +4 -4
- metadata +2 -2
@@ -5,6 +5,17 @@ module Sq::Dbsync::Database
|
|
5
5
|
|
6
6
|
SQD = ::Sq::Dbsync
|
7
7
|
|
8
|
+
def initialize(opts, source_or_target)
|
9
|
+
db = Sequel.connect(opts)
|
10
|
+
super(db)
|
11
|
+
@db, @source_or_target = db, source_or_target
|
12
|
+
@charset = opts[:charset] if opts[:charset]
|
13
|
+
end
|
14
|
+
|
15
|
+
def inspect
|
16
|
+
"#<Database::#{self.class.name} #{source_or_target} #{opts[:database]}>"
|
17
|
+
end
|
18
|
+
|
8
19
|
def extract_to_file(table_name, columns, file_name)
|
9
20
|
extract_sql_to_file("SELECT %s FROM %s" % [
|
10
21
|
columns.join(', '),
|
@@ -34,7 +45,11 @@ module Sq::Dbsync::Database
|
|
34
45
|
|
35
46
|
def hash_schema(plan)
|
36
47
|
ensure_connection
|
37
|
-
Hash[schema(plan.source_table_name)]
|
48
|
+
Hash[schema(source? ? plan.source_table_name : plan.table_name)]
|
49
|
+
end
|
50
|
+
|
51
|
+
def source?
|
52
|
+
source_or_target == :source
|
38
53
|
end
|
39
54
|
|
40
55
|
def name
|
@@ -59,6 +74,8 @@ module Sq::Dbsync::Database
|
|
59
74
|
|
60
75
|
protected
|
61
76
|
|
77
|
+
attr_reader :db, :source_or_target, :charset
|
78
|
+
|
62
79
|
def execute!(cmd)
|
63
80
|
# psql doesn't return a non-zero error code when executing commands from
|
64
81
|
# a file. The best way I can come up with is to raise if anything is
|
@@ -9,14 +9,12 @@ module Sq::Dbsync::Database
|
|
9
9
|
# Factory class to abstract selection of a decorator to faciliate databases
|
10
10
|
# other than MySQL.
|
11
11
|
class Connection
|
12
|
-
def self.create(opts)
|
12
|
+
def self.create(opts, direction)
|
13
13
|
case opts[:brand]
|
14
14
|
when 'mysql'
|
15
|
-
|
16
|
-
db.charset = opts[:charset] if opts[:charset]
|
17
|
-
db
|
15
|
+
Sq::Dbsync::Database::Mysql.new(opts, direction)
|
18
16
|
when 'postgresql'
|
19
|
-
Sq::Dbsync::Database::Postgres.new(
|
17
|
+
Sq::Dbsync::Database::Postgres.new(opts, direction)
|
20
18
|
else
|
21
19
|
raise "Unsupported database: #{opts.inspect}"
|
22
20
|
end
|
@@ -13,28 +13,17 @@ module Sq::Dbsync::Database
|
|
13
13
|
# Decorator around a Sequel database object, providing some non-standard
|
14
14
|
# extensions required for effective ETL with MySQL.
|
15
15
|
class Mysql < Delegator
|
16
|
+
# 2 days is chosen as an arbitrary buffer
|
17
|
+
AUX_TIME_BUFFER = 60 * 60 * 24 * 2 # 2 days
|
18
|
+
LOAD_SQL = "LOAD DATA INFILE '%s' %s INTO TABLE %s %s (%s)"
|
16
19
|
|
17
20
|
include Common
|
18
21
|
|
19
|
-
attr_accessor :charset
|
20
|
-
|
21
|
-
def initialize(db)
|
22
|
-
super
|
23
|
-
@db = db
|
24
|
-
end
|
25
|
-
|
26
|
-
def inspect; "#<Database::Mysql #{opts[:database]}>"; end
|
27
|
-
|
28
22
|
def load_from_file(table_name, columns, file_name)
|
29
23
|
ensure_connection
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
table_name,
|
34
|
-
character_set,
|
35
|
-
escape_columns(columns)
|
36
|
-
]
|
37
|
-
db.run sql
|
24
|
+
db.run(LOAD_SQL % [
|
25
|
+
file_name, 'IGNORE', table_name, character_set, escape_columns(columns)
|
26
|
+
])
|
38
27
|
end
|
39
28
|
|
40
29
|
def set_lock_timeout(seconds)
|
@@ -46,13 +35,9 @@ module Sq::Dbsync::Database
|
|
46
35
|
# Very low lock wait timeout, since we don't want loads to be blocked
|
47
36
|
# waiting for long queries.
|
48
37
|
set_lock_timeout(10)
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
table_name,
|
53
|
-
character_set,
|
54
|
-
escape_columns(columns)
|
55
|
-
]
|
38
|
+
db.run(LOAD_SQL % [
|
39
|
+
file_name, 'REPLACE', table_name, character_set, escape_columns(columns)
|
40
|
+
])
|
56
41
|
rescue Sequel::DatabaseError => e
|
57
42
|
transient_regex =
|
58
43
|
/Lock wait timeout exceeded|Deadlock found when trying to get lock/
|
@@ -64,9 +49,6 @@ module Sq::Dbsync::Database
|
|
64
49
|
end
|
65
50
|
end
|
66
51
|
|
67
|
-
# 2 days is chosen as an arbitrary buffer
|
68
|
-
AUX_TIME_BUFFER = 60 * 60 * 24 * 2 # 2 days
|
69
|
-
|
70
52
|
# Deletes recent rows based on timestamp, but also allows filtering by an
|
71
53
|
# auxilary timestamp column for the case where the primary one is not
|
72
54
|
# indexed on the target (such as the DFR reports, where imported_at is not
|
@@ -93,21 +75,6 @@ module Sq::Dbsync::Database
|
|
93
75
|
count
|
94
76
|
end
|
95
77
|
|
96
|
-
# Overriden because the Sequel implementation does not work with partial
|
97
|
-
# permissions on a table. See:
|
98
|
-
# https://github.com/jeremyevans/sequel/issues/422
|
99
|
-
def table_exists?(table_name)
|
100
|
-
begin
|
101
|
-
!!db.schema(table_name, reload: true)
|
102
|
-
rescue Sequel::DatabaseError
|
103
|
-
false
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
|
-
def drop_table(table_name)
|
108
|
-
db.drop_table(table_name)
|
109
|
-
end
|
110
|
-
|
111
78
|
def switch_table(to_replace, new_table)
|
112
79
|
ensure_connection
|
113
80
|
|
@@ -131,8 +98,6 @@ module Sq::Dbsync::Database
|
|
131
98
|
|
132
99
|
protected
|
133
100
|
|
134
|
-
attr_reader :db
|
135
|
-
|
136
101
|
def extract_sql_to_file(sql, file_name)
|
137
102
|
file = sql_to_file(connection_settings + sql)
|
138
103
|
cmd = "set -o pipefail; mysql --skip-column-names"
|
@@ -147,7 +112,7 @@ module Sq::Dbsync::Database
|
|
147
112
|
]
|
148
113
|
end
|
149
114
|
|
150
|
-
cmd += " --default-character-set %s" %
|
115
|
+
cmd += " --default-character-set %s" % charset if charset
|
151
116
|
|
152
117
|
cmd += " %s" % opts.fetch(:database)
|
153
118
|
|
@@ -170,9 +135,10 @@ module Sq::Dbsync::Database
|
|
170
135
|
lock_timeout_sql(10)
|
171
136
|
end
|
172
137
|
|
138
|
+
def character_set; charset ? " character set #{charset}" : "" end
|
139
|
+
|
173
140
|
def lock_timeout_sql(seconds)
|
174
141
|
"SET SESSION innodb_lock_wait_timeout = %i;" % seconds
|
175
142
|
end
|
176
|
-
|
177
143
|
end
|
178
144
|
end
|
@@ -29,13 +29,6 @@ module Sq::Dbsync::Database
|
|
29
29
|
|
30
30
|
include Sq::Dbsync::Database::Common
|
31
31
|
|
32
|
-
def initialize(db)
|
33
|
-
super
|
34
|
-
@db = db
|
35
|
-
end
|
36
|
-
|
37
|
-
def inspect; "#<Database::Postgres #{opts[:database]}>"; end
|
38
|
-
|
39
32
|
def set_lock_timeout(seconds)
|
40
33
|
# Unimplemented
|
41
34
|
end
|
@@ -44,7 +37,8 @@ module Sq::Dbsync::Database
|
|
44
37
|
type_casts = plan.type_casts || {}
|
45
38
|
ensure_connection
|
46
39
|
|
47
|
-
|
40
|
+
table_name = source? ? plan.source_table_name : plan.table_name
|
41
|
+
result = schema(table_name).each do |col, metadata|
|
48
42
|
metadata[:source_db_type] ||= metadata[:db_type]
|
49
43
|
metadata[:db_type] = cast_psql_to_mysql(
|
50
44
|
metadata[:db_type], type_casts[col.to_s]
|
@@ -56,8 +50,6 @@ module Sq::Dbsync::Database
|
|
56
50
|
|
57
51
|
protected
|
58
52
|
|
59
|
-
attr_reader :db
|
60
|
-
|
61
53
|
def cast_psql_to_mysql(db_type, cast=nil)
|
62
54
|
CASTS.fetch(db_type, cast || db_type)
|
63
55
|
end
|
data/lib/sq/dbsync/manager.rb
CHANGED
@@ -105,7 +105,8 @@ class Sq::Dbsync::Manager
|
|
105
105
|
end
|
106
106
|
|
107
107
|
def target
|
108
|
-
|
108
|
+
opts = config[:target]
|
109
|
+
@target ||= Sq::Dbsync::Database::Connection.create(opts, :target)
|
109
110
|
end
|
110
111
|
|
111
112
|
def tables_to_load
|
@@ -124,7 +125,7 @@ class Sq::Dbsync::Manager
|
|
124
125
|
|
125
126
|
def sources
|
126
127
|
@sources ||= Hash[config[:sources].map do |name, opts|
|
127
|
-
[name, Sq::Dbsync::Database::Connection.create(opts)]
|
128
|
+
[name, Sq::Dbsync::Database::Connection.create(opts, :source)]
|
128
129
|
end]
|
129
130
|
end
|
130
131
|
|
@@ -217,7 +218,7 @@ class Sq::Dbsync::Manager
|
|
217
218
|
end
|
218
219
|
|
219
220
|
def db
|
220
|
-
@db ||= Database::Connection.create(config[:target])
|
221
|
+
@db ||= Database::Connection.create(config[:target], :target)
|
221
222
|
end
|
222
223
|
|
223
224
|
def transient_exceptions
|
data/lib/sq/dbsync/version.rb
CHANGED
data/spec/database_helper.rb
CHANGED
@@ -48,12 +48,14 @@ MB4_TEST_TARGET = db_options(database: 'sq_dbsync_test_target', charset:"utf8mb4
|
|
48
48
|
|
49
49
|
$target = nil
|
50
50
|
def test_target
|
51
|
-
$target ||= SQD::Database::Connection.create(TEST_TARGET)
|
51
|
+
$target ||= SQD::Database::Connection.create(TEST_TARGET, :target)
|
52
52
|
end
|
53
53
|
|
54
54
|
$sources = {}
|
55
55
|
def test_source(name)
|
56
|
-
$sources[name] ||= SQD::Database::Connection.create(
|
56
|
+
$sources[name] ||= SQD::Database::Connection.create(
|
57
|
+
TEST_SOURCES.fetch(name), :source
|
58
|
+
)
|
57
59
|
end
|
58
60
|
|
59
61
|
RSpec.configure do |config|
|
@@ -11,8 +11,9 @@ describe SQD::BatchLoadAction do
|
|
11
11
|
let!(:now) { @now = Time.now.utc }
|
12
12
|
let(:last_synced_at) { now - 10 }
|
13
13
|
let(:target) { test_target }
|
14
|
+
let(:target_table_name) { :test_table }
|
14
15
|
let(:table_plan) {{
|
15
|
-
table_name:
|
16
|
+
table_name: target_table_name,
|
16
17
|
source_table_name: :test_table,
|
17
18
|
columns: [:id, :col1, :updated_at],
|
18
19
|
source_db: source,
|
@@ -20,7 +21,7 @@ describe SQD::BatchLoadAction do
|
|
20
21
|
}}
|
21
22
|
let(:index) {{
|
22
23
|
index_on_col1: { columns: [:col1], unique: false }
|
23
|
-
}
|
24
|
+
}}
|
24
25
|
let(:registry) { SQD::TableRegistry.new(target) }
|
25
26
|
let(:action) { SQD::BatchLoadAction.new(
|
26
27
|
target,
|
@@ -59,14 +60,18 @@ describe SQD::BatchLoadAction do
|
|
59
60
|
end
|
60
61
|
end
|
61
62
|
|
62
|
-
|
63
|
-
|
63
|
+
describe 'when the source and destination table names differ' do
|
64
|
+
let(:target_table_name) { :target_test_table }
|
64
65
|
|
65
|
-
|
66
|
+
it 'copies source tables to target with matching schemas' do
|
67
|
+
start_time = now.to_f
|
66
68
|
|
67
|
-
|
68
|
-
|
69
|
-
|
69
|
+
action.call
|
70
|
+
|
71
|
+
verify_schema
|
72
|
+
verify_data
|
73
|
+
verify_metadata(start_time)
|
74
|
+
end
|
70
75
|
end
|
71
76
|
|
72
77
|
it 'handles column that does not exist in source' do
|
@@ -139,13 +144,12 @@ describe SQD::BatchLoadAction do
|
|
139
144
|
|
140
145
|
def test_tables
|
141
146
|
{
|
142
|
-
test_table: source,
|
147
|
+
test_table: [source, :target_test_table],
|
143
148
|
}
|
144
149
|
end
|
145
150
|
|
146
151
|
def verify_schema
|
147
|
-
test_tables.each do |table_name, source_db|
|
148
|
-
target_table_name = table_name
|
152
|
+
test_tables.each do |table_name, (source_db, target_table_name)|
|
149
153
|
target.tables.should include(target_table_name)
|
150
154
|
source_test_table_schema =
|
151
155
|
source_db.schema(table_name).map do |column, hash|
|
@@ -177,8 +181,8 @@ describe SQD::BatchLoadAction do
|
|
177
181
|
end
|
178
182
|
|
179
183
|
def verify_data
|
180
|
-
test_tables.each do |table_name,
|
181
|
-
data = target[
|
184
|
+
test_tables.each do |table_name, (source_db, target_table_name)|
|
185
|
+
data = target[target_table_name].all
|
182
186
|
data.count.should == 1
|
183
187
|
data = data[0]
|
184
188
|
data.keys.length.should == 3
|
@@ -189,8 +193,8 @@ describe SQD::BatchLoadAction do
|
|
189
193
|
end
|
190
194
|
|
191
195
|
def verify_metadata(start_time)
|
192
|
-
test_tables.each do |table_name,
|
193
|
-
meta = registry.get(
|
196
|
+
test_tables.each do |table_name, (source_db, target_table_name)|
|
197
|
+
meta = registry.get(target_table_name)
|
194
198
|
meta[:last_synced_at].should_not be_nil
|
195
199
|
meta[:last_batch_synced_at].should_not be_nil
|
196
200
|
meta[:last_batch_synced_at].to_i.should == start_time.to_i
|
@@ -17,15 +17,13 @@ shared_examples_for 'a decorated database adapter' do
|
|
17
17
|
end
|
18
18
|
|
19
19
|
describe SQD::Database::Postgres do
|
20
|
-
let(:
|
21
|
-
let(:db) { SQD::Database::Postgres.new(source) }
|
20
|
+
let(:db) { test_source(:postgres) }
|
22
21
|
|
23
22
|
it_should_behave_like 'a decorated database adapter'
|
24
23
|
end
|
25
24
|
|
26
25
|
describe SQD::Database::Mysql do
|
27
|
-
let(:
|
28
|
-
let(:db) { SQD::Database::Mysql.new(source) }
|
26
|
+
let(:db) { test_source(:source) }
|
29
27
|
|
30
28
|
it_should_behave_like 'a decorated database adapter'
|
31
29
|
|
@@ -34,26 +32,26 @@ describe SQD::Database::Mysql do
|
|
34
32
|
|
35
33
|
before { @file = Tempfile.new('bogus') }
|
36
34
|
|
37
|
-
def
|
38
|
-
|
35
|
+
def sequel_with_exception(exception_message)
|
36
|
+
db.send(:db).stub(:run).and_raise(
|
39
37
|
Sequel::DatabaseError.new(exception_message)
|
40
38
|
)
|
41
39
|
end
|
42
40
|
|
43
41
|
it 're-raises deadlock related exceptions as TransientError' do
|
44
|
-
|
42
|
+
sequel_with_exception("Deadlock found when trying to get lock")
|
45
43
|
-> { db.load_incrementally_from_file('bogus', ['bogus'], path) }.
|
46
44
|
should raise_error(SQD::Database::TransientError)
|
47
45
|
end
|
48
46
|
|
49
47
|
it 're-raises lock wait timeout exceptions as TransientError' do
|
50
|
-
|
48
|
+
sequel_with_exception("Lock wait timeout exceeded")
|
51
49
|
-> { db.load_incrementally_from_file('bogus', ['bogus'], path) }.
|
52
50
|
should raise_error(SQD::Database::TransientError)
|
53
51
|
end
|
54
52
|
|
55
53
|
it 'does not translate unknown errors' do
|
56
|
-
|
54
|
+
sequel_with_exception("Unknown")
|
57
55
|
-> { db.load_incrementally_from_file('bogus', ['bogus'], path) }.
|
58
56
|
should raise_error(Sequel::DatabaseError)
|
59
57
|
end
|
@@ -11,8 +11,9 @@ describe SQD::IncrementalLoadAction do
|
|
11
11
|
let(:last_synced_at) { now - 10 }
|
12
12
|
let(:source) { test_source(:source) }
|
13
13
|
let(:target) { test_target }
|
14
|
+
let(:target_table_name) { :test_table }
|
14
15
|
let(:table_plan) {{
|
15
|
-
table_name:
|
16
|
+
table_name: target_table_name,
|
16
17
|
source_table_name: :test_table,
|
17
18
|
columns: [:id, :col1, :updated_at],
|
18
19
|
source_db: source,
|
@@ -60,6 +61,18 @@ describe SQD::IncrementalLoadAction do
|
|
60
61
|
end
|
61
62
|
end
|
62
63
|
|
64
|
+
describe 'when source and target are differently named' do
|
65
|
+
let(:target_table_name) { :target_test_table }
|
66
|
+
|
67
|
+
it 'copies all columns to the correctly named target' do
|
68
|
+
setup_target_table(last_synced_at, target_table_name)
|
69
|
+
|
70
|
+
action.call
|
71
|
+
|
72
|
+
target[target_table_name].map { |row| row.values_at(:id, :col1) }.
|
73
|
+
should == [[2, 'new record']]
|
74
|
+
end
|
75
|
+
end
|
63
76
|
|
64
77
|
it 'copies null data to the target' do
|
65
78
|
source[:test_table].update(col1: nil)
|
data/spec/integration_helper.rb
CHANGED
@@ -48,18 +48,18 @@ def create_pg_source_table_with(*rows)
|
|
48
48
|
end
|
49
49
|
end
|
50
50
|
|
51
|
-
def setup_target_table(last_synced_at)
|
52
|
-
target.create_table!
|
51
|
+
def setup_target_table(last_synced_at, name=:test_table)
|
52
|
+
target.create_table! name do
|
53
53
|
Integer :id
|
54
54
|
String :col1
|
55
55
|
DateTime :updated_at
|
56
56
|
DateTime :created_at
|
57
57
|
end
|
58
58
|
|
59
|
-
target.add_index
|
59
|
+
target.add_index name, :id, :unique => true
|
60
60
|
|
61
61
|
registry.ensure_storage_exists
|
62
|
-
registry.set(
|
62
|
+
registry.set(name,
|
63
63
|
last_synced_at: last_synced_at,
|
64
64
|
last_row_at: last_synced_at,
|
65
65
|
last_batch_synced_at: last_synced_at
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sq-dbsync
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.8
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2013-05-
|
13
|
+
date: 2013-05-31 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: rspec
|