sq-dbsync 1.0.7 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/sq/dbsync/database/common.rb +18 -1
- data/lib/sq/dbsync/database/connection.rb +3 -5
- data/lib/sq/dbsync/database/mysql.rb +12 -46
- data/lib/sq/dbsync/database/postgres.rb +2 -10
- data/lib/sq/dbsync/manager.rb +4 -3
- data/lib/sq/dbsync/version.rb +1 -1
- data/spec/database_helper.rb +4 -2
- data/spec/integration/batch_load_action_spec.rb +19 -15
- data/spec/integration/database_connection_spec.rb +7 -9
- data/spec/integration/incremental_load_action_spec.rb +14 -1
- data/spec/integration_helper.rb +4 -4
- metadata +2 -2
@@ -5,6 +5,17 @@ module Sq::Dbsync::Database
|
|
5
5
|
|
6
6
|
SQD = ::Sq::Dbsync
|
7
7
|
|
8
|
+
def initialize(opts, source_or_target)
|
9
|
+
db = Sequel.connect(opts)
|
10
|
+
super(db)
|
11
|
+
@db, @source_or_target = db, source_or_target
|
12
|
+
@charset = opts[:charset] if opts[:charset]
|
13
|
+
end
|
14
|
+
|
15
|
+
def inspect
|
16
|
+
"#<Database::#{self.class.name} #{source_or_target} #{opts[:database]}>"
|
17
|
+
end
|
18
|
+
|
8
19
|
def extract_to_file(table_name, columns, file_name)
|
9
20
|
extract_sql_to_file("SELECT %s FROM %s" % [
|
10
21
|
columns.join(', '),
|
@@ -34,7 +45,11 @@ module Sq::Dbsync::Database
|
|
34
45
|
|
35
46
|
def hash_schema(plan)
|
36
47
|
ensure_connection
|
37
|
-
Hash[schema(plan.source_table_name)]
|
48
|
+
Hash[schema(source? ? plan.source_table_name : plan.table_name)]
|
49
|
+
end
|
50
|
+
|
51
|
+
def source?
|
52
|
+
source_or_target == :source
|
38
53
|
end
|
39
54
|
|
40
55
|
def name
|
@@ -59,6 +74,8 @@ module Sq::Dbsync::Database
|
|
59
74
|
|
60
75
|
protected
|
61
76
|
|
77
|
+
attr_reader :db, :source_or_target, :charset
|
78
|
+
|
62
79
|
def execute!(cmd)
|
63
80
|
# psql doesn't return a non-zero error code when executing commands from
|
64
81
|
# a file. The best way I can come up with is to raise if anything is
|
@@ -9,14 +9,12 @@ module Sq::Dbsync::Database
|
|
9
9
|
# Factory class to abstract selection of a decorator to faciliate databases
|
10
10
|
# other than MySQL.
|
11
11
|
class Connection
|
12
|
-
def self.create(opts)
|
12
|
+
def self.create(opts, direction)
|
13
13
|
case opts[:brand]
|
14
14
|
when 'mysql'
|
15
|
-
|
16
|
-
db.charset = opts[:charset] if opts[:charset]
|
17
|
-
db
|
15
|
+
Sq::Dbsync::Database::Mysql.new(opts, direction)
|
18
16
|
when 'postgresql'
|
19
|
-
Sq::Dbsync::Database::Postgres.new(
|
17
|
+
Sq::Dbsync::Database::Postgres.new(opts, direction)
|
20
18
|
else
|
21
19
|
raise "Unsupported database: #{opts.inspect}"
|
22
20
|
end
|
@@ -13,28 +13,17 @@ module Sq::Dbsync::Database
|
|
13
13
|
# Decorator around a Sequel database object, providing some non-standard
|
14
14
|
# extensions required for effective ETL with MySQL.
|
15
15
|
class Mysql < Delegator
|
16
|
+
# 2 days is chosen as an arbitrary buffer
|
17
|
+
AUX_TIME_BUFFER = 60 * 60 * 24 * 2 # 2 days
|
18
|
+
LOAD_SQL = "LOAD DATA INFILE '%s' %s INTO TABLE %s %s (%s)"
|
16
19
|
|
17
20
|
include Common
|
18
21
|
|
19
|
-
attr_accessor :charset
|
20
|
-
|
21
|
-
def initialize(db)
|
22
|
-
super
|
23
|
-
@db = db
|
24
|
-
end
|
25
|
-
|
26
|
-
def inspect; "#<Database::Mysql #{opts[:database]}>"; end
|
27
|
-
|
28
22
|
def load_from_file(table_name, columns, file_name)
|
29
23
|
ensure_connection
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
table_name,
|
34
|
-
character_set,
|
35
|
-
escape_columns(columns)
|
36
|
-
]
|
37
|
-
db.run sql
|
24
|
+
db.run(LOAD_SQL % [
|
25
|
+
file_name, 'IGNORE', table_name, character_set, escape_columns(columns)
|
26
|
+
])
|
38
27
|
end
|
39
28
|
|
40
29
|
def set_lock_timeout(seconds)
|
@@ -46,13 +35,9 @@ module Sq::Dbsync::Database
|
|
46
35
|
# Very low lock wait timeout, since we don't want loads to be blocked
|
47
36
|
# waiting for long queries.
|
48
37
|
set_lock_timeout(10)
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
table_name,
|
53
|
-
character_set,
|
54
|
-
escape_columns(columns)
|
55
|
-
]
|
38
|
+
db.run(LOAD_SQL % [
|
39
|
+
file_name, 'REPLACE', table_name, character_set, escape_columns(columns)
|
40
|
+
])
|
56
41
|
rescue Sequel::DatabaseError => e
|
57
42
|
transient_regex =
|
58
43
|
/Lock wait timeout exceeded|Deadlock found when trying to get lock/
|
@@ -64,9 +49,6 @@ module Sq::Dbsync::Database
|
|
64
49
|
end
|
65
50
|
end
|
66
51
|
|
67
|
-
# 2 days is chosen as an arbitrary buffer
|
68
|
-
AUX_TIME_BUFFER = 60 * 60 * 24 * 2 # 2 days
|
69
|
-
|
70
52
|
# Deletes recent rows based on timestamp, but also allows filtering by an
|
71
53
|
# auxilary timestamp column for the case where the primary one is not
|
72
54
|
# indexed on the target (such as the DFR reports, where imported_at is not
|
@@ -93,21 +75,6 @@ module Sq::Dbsync::Database
|
|
93
75
|
count
|
94
76
|
end
|
95
77
|
|
96
|
-
# Overriden because the Sequel implementation does not work with partial
|
97
|
-
# permissions on a table. See:
|
98
|
-
# https://github.com/jeremyevans/sequel/issues/422
|
99
|
-
def table_exists?(table_name)
|
100
|
-
begin
|
101
|
-
!!db.schema(table_name, reload: true)
|
102
|
-
rescue Sequel::DatabaseError
|
103
|
-
false
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
|
-
def drop_table(table_name)
|
108
|
-
db.drop_table(table_name)
|
109
|
-
end
|
110
|
-
|
111
78
|
def switch_table(to_replace, new_table)
|
112
79
|
ensure_connection
|
113
80
|
|
@@ -131,8 +98,6 @@ module Sq::Dbsync::Database
|
|
131
98
|
|
132
99
|
protected
|
133
100
|
|
134
|
-
attr_reader :db
|
135
|
-
|
136
101
|
def extract_sql_to_file(sql, file_name)
|
137
102
|
file = sql_to_file(connection_settings + sql)
|
138
103
|
cmd = "set -o pipefail; mysql --skip-column-names"
|
@@ -147,7 +112,7 @@ module Sq::Dbsync::Database
|
|
147
112
|
]
|
148
113
|
end
|
149
114
|
|
150
|
-
cmd += " --default-character-set %s" %
|
115
|
+
cmd += " --default-character-set %s" % charset if charset
|
151
116
|
|
152
117
|
cmd += " %s" % opts.fetch(:database)
|
153
118
|
|
@@ -170,9 +135,10 @@ module Sq::Dbsync::Database
|
|
170
135
|
lock_timeout_sql(10)
|
171
136
|
end
|
172
137
|
|
138
|
+
def character_set; charset ? " character set #{charset}" : "" end
|
139
|
+
|
173
140
|
def lock_timeout_sql(seconds)
|
174
141
|
"SET SESSION innodb_lock_wait_timeout = %i;" % seconds
|
175
142
|
end
|
176
|
-
|
177
143
|
end
|
178
144
|
end
|
@@ -29,13 +29,6 @@ module Sq::Dbsync::Database
|
|
29
29
|
|
30
30
|
include Sq::Dbsync::Database::Common
|
31
31
|
|
32
|
-
def initialize(db)
|
33
|
-
super
|
34
|
-
@db = db
|
35
|
-
end
|
36
|
-
|
37
|
-
def inspect; "#<Database::Postgres #{opts[:database]}>"; end
|
38
|
-
|
39
32
|
def set_lock_timeout(seconds)
|
40
33
|
# Unimplemented
|
41
34
|
end
|
@@ -44,7 +37,8 @@ module Sq::Dbsync::Database
|
|
44
37
|
type_casts = plan.type_casts || {}
|
45
38
|
ensure_connection
|
46
39
|
|
47
|
-
|
40
|
+
table_name = source? ? plan.source_table_name : plan.table_name
|
41
|
+
result = schema(table_name).each do |col, metadata|
|
48
42
|
metadata[:source_db_type] ||= metadata[:db_type]
|
49
43
|
metadata[:db_type] = cast_psql_to_mysql(
|
50
44
|
metadata[:db_type], type_casts[col.to_s]
|
@@ -56,8 +50,6 @@ module Sq::Dbsync::Database
|
|
56
50
|
|
57
51
|
protected
|
58
52
|
|
59
|
-
attr_reader :db
|
60
|
-
|
61
53
|
def cast_psql_to_mysql(db_type, cast=nil)
|
62
54
|
CASTS.fetch(db_type, cast || db_type)
|
63
55
|
end
|
data/lib/sq/dbsync/manager.rb
CHANGED
@@ -105,7 +105,8 @@ class Sq::Dbsync::Manager
|
|
105
105
|
end
|
106
106
|
|
107
107
|
def target
|
108
|
-
|
108
|
+
opts = config[:target]
|
109
|
+
@target ||= Sq::Dbsync::Database::Connection.create(opts, :target)
|
109
110
|
end
|
110
111
|
|
111
112
|
def tables_to_load
|
@@ -124,7 +125,7 @@ class Sq::Dbsync::Manager
|
|
124
125
|
|
125
126
|
def sources
|
126
127
|
@sources ||= Hash[config[:sources].map do |name, opts|
|
127
|
-
[name, Sq::Dbsync::Database::Connection.create(opts)]
|
128
|
+
[name, Sq::Dbsync::Database::Connection.create(opts, :source)]
|
128
129
|
end]
|
129
130
|
end
|
130
131
|
|
@@ -217,7 +218,7 @@ class Sq::Dbsync::Manager
|
|
217
218
|
end
|
218
219
|
|
219
220
|
def db
|
220
|
-
@db ||= Database::Connection.create(config[:target])
|
221
|
+
@db ||= Database::Connection.create(config[:target], :target)
|
221
222
|
end
|
222
223
|
|
223
224
|
def transient_exceptions
|
data/lib/sq/dbsync/version.rb
CHANGED
data/spec/database_helper.rb
CHANGED
@@ -48,12 +48,14 @@ MB4_TEST_TARGET = db_options(database: 'sq_dbsync_test_target', charset:"utf8mb4
|
|
48
48
|
|
49
49
|
$target = nil
|
50
50
|
def test_target
|
51
|
-
$target ||= SQD::Database::Connection.create(TEST_TARGET)
|
51
|
+
$target ||= SQD::Database::Connection.create(TEST_TARGET, :target)
|
52
52
|
end
|
53
53
|
|
54
54
|
$sources = {}
|
55
55
|
def test_source(name)
|
56
|
-
$sources[name] ||= SQD::Database::Connection.create(
|
56
|
+
$sources[name] ||= SQD::Database::Connection.create(
|
57
|
+
TEST_SOURCES.fetch(name), :source
|
58
|
+
)
|
57
59
|
end
|
58
60
|
|
59
61
|
RSpec.configure do |config|
|
@@ -11,8 +11,9 @@ describe SQD::BatchLoadAction do
|
|
11
11
|
let!(:now) { @now = Time.now.utc }
|
12
12
|
let(:last_synced_at) { now - 10 }
|
13
13
|
let(:target) { test_target }
|
14
|
+
let(:target_table_name) { :test_table }
|
14
15
|
let(:table_plan) {{
|
15
|
-
table_name:
|
16
|
+
table_name: target_table_name,
|
16
17
|
source_table_name: :test_table,
|
17
18
|
columns: [:id, :col1, :updated_at],
|
18
19
|
source_db: source,
|
@@ -20,7 +21,7 @@ describe SQD::BatchLoadAction do
|
|
20
21
|
}}
|
21
22
|
let(:index) {{
|
22
23
|
index_on_col1: { columns: [:col1], unique: false }
|
23
|
-
}
|
24
|
+
}}
|
24
25
|
let(:registry) { SQD::TableRegistry.new(target) }
|
25
26
|
let(:action) { SQD::BatchLoadAction.new(
|
26
27
|
target,
|
@@ -59,14 +60,18 @@ describe SQD::BatchLoadAction do
|
|
59
60
|
end
|
60
61
|
end
|
61
62
|
|
62
|
-
|
63
|
-
|
63
|
+
describe 'when the source and destination table names differ' do
|
64
|
+
let(:target_table_name) { :target_test_table }
|
64
65
|
|
65
|
-
|
66
|
+
it 'copies source tables to target with matching schemas' do
|
67
|
+
start_time = now.to_f
|
66
68
|
|
67
|
-
|
68
|
-
|
69
|
-
|
69
|
+
action.call
|
70
|
+
|
71
|
+
verify_schema
|
72
|
+
verify_data
|
73
|
+
verify_metadata(start_time)
|
74
|
+
end
|
70
75
|
end
|
71
76
|
|
72
77
|
it 'handles column that does not exist in source' do
|
@@ -139,13 +144,12 @@ describe SQD::BatchLoadAction do
|
|
139
144
|
|
140
145
|
def test_tables
|
141
146
|
{
|
142
|
-
test_table: source,
|
147
|
+
test_table: [source, :target_test_table],
|
143
148
|
}
|
144
149
|
end
|
145
150
|
|
146
151
|
def verify_schema
|
147
|
-
test_tables.each do |table_name, source_db|
|
148
|
-
target_table_name = table_name
|
152
|
+
test_tables.each do |table_name, (source_db, target_table_name)|
|
149
153
|
target.tables.should include(target_table_name)
|
150
154
|
source_test_table_schema =
|
151
155
|
source_db.schema(table_name).map do |column, hash|
|
@@ -177,8 +181,8 @@ describe SQD::BatchLoadAction do
|
|
177
181
|
end
|
178
182
|
|
179
183
|
def verify_data
|
180
|
-
test_tables.each do |table_name,
|
181
|
-
data = target[
|
184
|
+
test_tables.each do |table_name, (source_db, target_table_name)|
|
185
|
+
data = target[target_table_name].all
|
182
186
|
data.count.should == 1
|
183
187
|
data = data[0]
|
184
188
|
data.keys.length.should == 3
|
@@ -189,8 +193,8 @@ describe SQD::BatchLoadAction do
|
|
189
193
|
end
|
190
194
|
|
191
195
|
def verify_metadata(start_time)
|
192
|
-
test_tables.each do |table_name,
|
193
|
-
meta = registry.get(
|
196
|
+
test_tables.each do |table_name, (source_db, target_table_name)|
|
197
|
+
meta = registry.get(target_table_name)
|
194
198
|
meta[:last_synced_at].should_not be_nil
|
195
199
|
meta[:last_batch_synced_at].should_not be_nil
|
196
200
|
meta[:last_batch_synced_at].to_i.should == start_time.to_i
|
@@ -17,15 +17,13 @@ shared_examples_for 'a decorated database adapter' do
|
|
17
17
|
end
|
18
18
|
|
19
19
|
describe SQD::Database::Postgres do
|
20
|
-
let(:
|
21
|
-
let(:db) { SQD::Database::Postgres.new(source) }
|
20
|
+
let(:db) { test_source(:postgres) }
|
22
21
|
|
23
22
|
it_should_behave_like 'a decorated database adapter'
|
24
23
|
end
|
25
24
|
|
26
25
|
describe SQD::Database::Mysql do
|
27
|
-
let(:
|
28
|
-
let(:db) { SQD::Database::Mysql.new(source) }
|
26
|
+
let(:db) { test_source(:source) }
|
29
27
|
|
30
28
|
it_should_behave_like 'a decorated database adapter'
|
31
29
|
|
@@ -34,26 +32,26 @@ describe SQD::Database::Mysql do
|
|
34
32
|
|
35
33
|
before { @file = Tempfile.new('bogus') }
|
36
34
|
|
37
|
-
def
|
38
|
-
|
35
|
+
def sequel_with_exception(exception_message)
|
36
|
+
db.send(:db).stub(:run).and_raise(
|
39
37
|
Sequel::DatabaseError.new(exception_message)
|
40
38
|
)
|
41
39
|
end
|
42
40
|
|
43
41
|
it 're-raises deadlock related exceptions as TransientError' do
|
44
|
-
|
42
|
+
sequel_with_exception("Deadlock found when trying to get lock")
|
45
43
|
-> { db.load_incrementally_from_file('bogus', ['bogus'], path) }.
|
46
44
|
should raise_error(SQD::Database::TransientError)
|
47
45
|
end
|
48
46
|
|
49
47
|
it 're-raises lock wait timeout exceptions as TransientError' do
|
50
|
-
|
48
|
+
sequel_with_exception("Lock wait timeout exceeded")
|
51
49
|
-> { db.load_incrementally_from_file('bogus', ['bogus'], path) }.
|
52
50
|
should raise_error(SQD::Database::TransientError)
|
53
51
|
end
|
54
52
|
|
55
53
|
it 'does not translate unknown errors' do
|
56
|
-
|
54
|
+
sequel_with_exception("Unknown")
|
57
55
|
-> { db.load_incrementally_from_file('bogus', ['bogus'], path) }.
|
58
56
|
should raise_error(Sequel::DatabaseError)
|
59
57
|
end
|
@@ -11,8 +11,9 @@ describe SQD::IncrementalLoadAction do
|
|
11
11
|
let(:last_synced_at) { now - 10 }
|
12
12
|
let(:source) { test_source(:source) }
|
13
13
|
let(:target) { test_target }
|
14
|
+
let(:target_table_name) { :test_table }
|
14
15
|
let(:table_plan) {{
|
15
|
-
table_name:
|
16
|
+
table_name: target_table_name,
|
16
17
|
source_table_name: :test_table,
|
17
18
|
columns: [:id, :col1, :updated_at],
|
18
19
|
source_db: source,
|
@@ -60,6 +61,18 @@ describe SQD::IncrementalLoadAction do
|
|
60
61
|
end
|
61
62
|
end
|
62
63
|
|
64
|
+
describe 'when source and target are differently named' do
|
65
|
+
let(:target_table_name) { :target_test_table }
|
66
|
+
|
67
|
+
it 'copies all columns to the correctly named target' do
|
68
|
+
setup_target_table(last_synced_at, target_table_name)
|
69
|
+
|
70
|
+
action.call
|
71
|
+
|
72
|
+
target[target_table_name].map { |row| row.values_at(:id, :col1) }.
|
73
|
+
should == [[2, 'new record']]
|
74
|
+
end
|
75
|
+
end
|
63
76
|
|
64
77
|
it 'copies null data to the target' do
|
65
78
|
source[:test_table].update(col1: nil)
|
data/spec/integration_helper.rb
CHANGED
@@ -48,18 +48,18 @@ def create_pg_source_table_with(*rows)
|
|
48
48
|
end
|
49
49
|
end
|
50
50
|
|
51
|
-
def setup_target_table(last_synced_at)
|
52
|
-
target.create_table!
|
51
|
+
def setup_target_table(last_synced_at, name=:test_table)
|
52
|
+
target.create_table! name do
|
53
53
|
Integer :id
|
54
54
|
String :col1
|
55
55
|
DateTime :updated_at
|
56
56
|
DateTime :created_at
|
57
57
|
end
|
58
58
|
|
59
|
-
target.add_index
|
59
|
+
target.add_index name, :id, :unique => true
|
60
60
|
|
61
61
|
registry.ensure_storage_exists
|
62
|
-
registry.set(
|
62
|
+
registry.set(name,
|
63
63
|
last_synced_at: last_synced_at,
|
64
64
|
last_row_at: last_synced_at,
|
65
65
|
last_batch_synced_at: last_synced_at
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sq-dbsync
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.8
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2013-05-
|
13
|
+
date: 2013-05-31 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: rspec
|