sq-dbsync 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/HISTORY.md +5 -0
- data/LICENSE +14 -0
- data/README.md +218 -0
- data/lib/sq/dbsync/all_tables_plan.rb +51 -0
- data/lib/sq/dbsync/batch_load_action.rb +95 -0
- data/lib/sq/dbsync/config.rb +12 -0
- data/lib/sq/dbsync/consistency_verifier.rb +70 -0
- data/lib/sq/dbsync/database/common.rb +91 -0
- data/lib/sq/dbsync/database/connection.rb +23 -0
- data/lib/sq/dbsync/database/mysql.rb +163 -0
- data/lib/sq/dbsync/database/postgres.rb +77 -0
- data/lib/sq/dbsync/error_handler.rb +59 -0
- data/lib/sq/dbsync/example_record_destroyer.rb +77 -0
- data/lib/sq/dbsync/incremental_load_action.rb +95 -0
- data/lib/sq/dbsync/load_action.rb +156 -0
- data/lib/sq/dbsync/loggers.rb +135 -0
- data/lib/sq/dbsync/manager.rb +241 -0
- data/lib/sq/dbsync/pipeline/simple_context.rb +15 -0
- data/lib/sq/dbsync/pipeline/threaded_context.rb +95 -0
- data/lib/sq/dbsync/pipeline.rb +80 -0
- data/lib/sq/dbsync/refresh_recent_load_action.rb +71 -0
- data/lib/sq/dbsync/schema_maker.rb +87 -0
- data/lib/sq/dbsync/static_table_plan.rb +42 -0
- data/lib/sq/dbsync/table_registry.rb +75 -0
- data/lib/sq/dbsync/tempfile_factory.rb +41 -0
- data/lib/sq/dbsync/version.rb +5 -0
- data/lib/sq/dbsync.rb +9 -0
- data/spec/acceptance/loading_spec.rb +237 -0
- data/spec/acceptance_helper.rb +2 -0
- data/spec/database_helper.rb +86 -0
- data/spec/integration/all_tables_plan_spec.rb +36 -0
- data/spec/integration/batch_load_action_spec.rb +229 -0
- data/spec/integration/consistency_verifier_spec.rb +54 -0
- data/spec/integration/database_connection_spec.rb +61 -0
- data/spec/integration/incremental_load_action_spec.rb +196 -0
- data/spec/integration/manager_spec.rb +109 -0
- data/spec/integration/schema_maker_spec.rb +119 -0
- data/spec/integration_helper.rb +43 -0
- data/spec/spec_helper.rb +27 -0
- data/spec/unit/config_spec.rb +18 -0
- data/spec/unit/error_handler_spec.rb +52 -0
- data/spec/unit/pipeline_spec.rb +42 -0
- data/spec/unit/stream_logger_spec.rb +33 -0
- data/spec/unit_helper.rb +1 -0
- data/sq-dbsync.gemspec +32 -0
- metadata +188 -0
data/HISTORY.md
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
|
2
|
+
Copyright 2012 Square Inc.
|
3
|
+
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
you may not use this file except in compliance with the License.
|
6
|
+
You may obtain a copy of the License at
|
7
|
+
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
See the License for the specific language governing permissions and
|
14
|
+
limitations under the License.
|
data/README.md
ADDED
@@ -0,0 +1,218 @@
|
|
1
|
+
Square Dbsync
|
2
|
+
=============
|
3
|
+
|
4
|
+
An extract and load system to shunt data between databases.
|
5
|
+
|
6
|
+
It uses timestamp based replication which is fast and easy to keep running,
|
7
|
+
but has some caveats. Most notably, it does not handle deletes well (see
|
8
|
+
documentation below for details).
|
9
|
+
|
10
|
+
This was useful to us at Square because we needed partial (only select
|
11
|
+
columns), continuous replication from both MySQL and PostgreSQL databases to a
|
12
|
+
single target database with some basic ETL logic along the way. None of the
|
13
|
+
existing solutions were able to do this adequately.
|
14
|
+
|
15
|
+
At some point you will need to bite the bullet and implement a real ETL system,
|
16
|
+
but `sq-dbsync` can tide you over until you get there.
|
17
|
+
|
18
|
+
Usage
|
19
|
+
-----
|
20
|
+
|
21
|
+
``` Ruby
|
22
|
+
include Sq::Dbsync
|
23
|
+
|
24
|
+
# Config will typically differ per environment.
|
25
|
+
config = {
|
26
|
+
sources: {
|
27
|
+
db_a: {
|
28
|
+
database: 'db_a_production',
|
29
|
+
user: 'sqdbsync-ro',
|
30
|
+
password: 'password',
|
31
|
+
host: 'db-a-host',
|
32
|
+
brand: 'mysql',
|
33
|
+
port: 3306,
|
34
|
+
},
|
35
|
+
db_b: {
|
36
|
+
database: 'db_b_production',
|
37
|
+
user: 'sqdbsync-ro',
|
38
|
+
password: 'password',
|
39
|
+
host: 'db-b-host',
|
40
|
+
brand: 'postgresl',
|
41
|
+
port: 5432,
|
42
|
+
}
|
43
|
+
},
|
44
|
+
target: {
|
45
|
+
database: 'replica',
|
46
|
+
user: 'sqdbsync',
|
47
|
+
password: 'password',
|
48
|
+
|
49
|
+
# Only localhost supported, since `LOAD DATA INFILE` is used which
|
50
|
+
# requires a shared temp directory.
|
51
|
+
host: 'localhost',
|
52
|
+
brand: 'mysql',
|
53
|
+
port: 3306,
|
54
|
+
},
|
55
|
+
|
56
|
+
# Optional configuration
|
57
|
+
logger: Loggers::Stream.new, # A graphite logger is provided, see source.
|
58
|
+
clock: ->{ Time.now.utc }, # In test env it can be useful to fix this.
|
59
|
+
error_handler: ->(e) { puts(e) } # Notify your exception system
|
60
|
+
}
|
61
|
+
|
62
|
+
# Write plans that specify how data is replicated.
|
63
|
+
DB_A_PLAN = [{
|
64
|
+
table_name: :users,
|
65
|
+
columns: [
|
66
|
+
# You must replicate the primary key.
|
67
|
+
:id,
|
68
|
+
|
69
|
+
# You must replicate a timestamp column, and it should be indexed on the
|
70
|
+
# target system.
|
71
|
+
:updated_at,
|
72
|
+
|
73
|
+
# Then whatever other columns you require.
|
74
|
+
:name,
|
75
|
+
:account_type,
|
76
|
+
:created_at,
|
77
|
+
|
78
|
+
],
|
79
|
+
indexes: {
|
80
|
+
# Indexing it on the source system is optional
|
81
|
+
index_users_on_updated_at: {:columns=>[:updated_at], :unique=>false},
|
82
|
+
},
|
83
|
+
|
84
|
+
# Basic schema transformations are supported.
|
85
|
+
db_types: {
|
86
|
+
:account_type => [:enum, %w(
|
87
|
+
bronze
|
88
|
+
silver
|
89
|
+
gold
|
90
|
+
)]
|
91
|
+
}
|
92
|
+
},{
|
93
|
+
table_name: :account_types,
|
94
|
+
source_table_name: :user_account_types,
|
95
|
+
columns: :all
|
96
|
+
}]
|
97
|
+
|
98
|
+
plans = [
|
99
|
+
[StaticTablePlan.new(DB_A_PLAN), :db_a],
|
100
|
+
[AllTablesPlan.new, :db_b]
|
101
|
+
]
|
102
|
+
|
103
|
+
manager = Manager.new(config, plans)
|
104
|
+
|
105
|
+
# Run a batch load nightly
|
106
|
+
manager.batch(ALL_TABLES)
|
107
|
+
|
108
|
+
# Run an incremental load continuously
|
109
|
+
manager.increment
|
110
|
+
|
111
|
+
# You can load a subset of tables if necessary
|
112
|
+
manager.batch([:users])
|
113
|
+
```
|
114
|
+
|
115
|
+
Documentation
|
116
|
+
-------------
|
117
|
+
|
118
|
+
### Plan Options
|
119
|
+
|
120
|
+
* `batch_load` whether or not to batch load this table in the default batch
|
121
|
+
load. If the table is specifically requested, it will be loaded regardless of
|
122
|
+
this setting. (default: true)
|
123
|
+
* `charset` charset to use when creating the table. Passed directly through to
|
124
|
+
[Sequel::MySQL::Database#connect](http://sequel.rubyforge.org/rdoc-adapters/classes/Sequel/MySQL/Database.html).
|
125
|
+
MySQL only, ignored for Postgres. (default: 'utf8')
|
126
|
+
* `columns` Either an array of columns to replicate, or `:all` indicating that
|
127
|
+
all columns should be replicated. (required)
|
128
|
+
* `consistency` Perform a basic consistency check on the table regularly during
|
129
|
+
the incremental load by comparing recent counts of the source and target
|
130
|
+
tables. Make sure you have a timestamp index on both tables! This was
|
131
|
+
particularly useful when developing the project, but honestly probably isn't
|
132
|
+
that useful now --- I can't remember the last time I saw an error from this.
|
133
|
+
(default: false)
|
134
|
+
* `db_types` A hash that allows you to modify the target schema from the
|
135
|
+
source. See the example in usage section above. (default: `{}`)
|
136
|
+
* `indexes` A hash defining desired indexes on the target table. Indexes are
|
137
|
+
*not* copied from source tables. See example in usage section above.
|
138
|
+
(default: `{}`)
|
139
|
+
* `refresh_recent` Some table are too large to batch load regularly, but
|
140
|
+
modifications are known to be recent. This setting will cause the last two
|
141
|
+
days of data to be dropped an recreated as part of the nightly batch load.
|
142
|
+
(default: false)
|
143
|
+
* `source_table_name` Allows the source and target tables to be named
|
144
|
+
differently. (default: `table_name` configuration option)
|
145
|
+
* `timestamp_table_name` A hack to workaround the postgres query planner
|
146
|
+
failing to use indexes correctly for `MAX()` on a view that uses `UNION`
|
147
|
+
under the covers. If this describes your source view, and one of the
|
148
|
+
underlying tables is guaranteed to contain the latest record you can set this
|
149
|
+
value to that and it will be used for all timestamp related queries. If not,
|
150
|
+
you must provide a custom view that supports a `MAX` query with a sane query
|
151
|
+
plan. (default: nil)
|
152
|
+
* `table_name` The name of the table to be replicated. If `source_table_name`
|
153
|
+
is specified, this option defines the name of the table in the target
|
154
|
+
database only.
|
155
|
+
* `primary_key` Usually the primary key can be inferred from the source schema,
|
156
|
+
but if you are replicating from a view you will need to specify it explictly
|
157
|
+
with this option. Should be an array of symbols. (default: nil, will
|
158
|
+
auto-detect from source schema)
|
159
|
+
* `timestamp` The column to treat as a timestamp. Must be a member of the
|
160
|
+
`:columns` option. (default: select `updated_at` or `created_at`, in that
|
161
|
+
order)
|
162
|
+
|
163
|
+
### Handling Deletes
|
164
|
+
|
165
|
+
The incremental load has no way of detecting deleted records. The nightly batch
|
166
|
+
load will reload all tables, so there will be at most a one day turn-around on
|
167
|
+
deletes. Some tables will be too big to batch load every night however, so this
|
168
|
+
is not a great solution in that case.
|
169
|
+
|
170
|
+
If you have an "audit" table that contains enough data for you to reconstruct
|
171
|
+
deletes in other tables, then you can provide a custom subclass to the
|
172
|
+
incremental loader that will be able to run this logic.
|
173
|
+
|
174
|
+
``` ruby
|
175
|
+
class IncrementalLoadWithDeletes < Sq::Dbsync::IncrementalLoadAction
|
176
|
+
def process_deletes
|
177
|
+
if plan.table_name == :audit_logs
|
178
|
+
ExampleRecordDestroyer.run(db, registry, :audit_logs, :other_table)
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
CONFIG = {
|
184
|
+
# ...
|
185
|
+
incremental_action: IncrementalLoadWithDeletes,
|
186
|
+
}
|
187
|
+
```
|
188
|
+
|
189
|
+
See `lib/sq/dbsync/example_record_destroyer` for a sample implementation.
|
190
|
+
|
191
|
+
### Database Settings
|
192
|
+
|
193
|
+
If your target database is MySQL, we recommend that you ensure it is running
|
194
|
+
under the `READ COMMITTED` isolation level. This makes it much harder for an
|
195
|
+
analyst to lock a table and block replication. (Statements like `CREATE TABLE
|
196
|
+
AS SELECT FROM ...` tend to be the culprit.)
|
197
|
+
|
198
|
+
Developing
|
199
|
+
----------
|
200
|
+
|
201
|
+
bundle
|
202
|
+
bundle exec rake
|
203
|
+
|
204
|
+
Compatibility
|
205
|
+
-------------
|
206
|
+
|
207
|
+
Requires 1.9. Tested on CRuby 1.9.3 and JRuby.
|
208
|
+
|
209
|
+
## Support
|
210
|
+
|
211
|
+
Make a [new github issue](https://github.com/square/sq-dbsync/issues/new).
|
212
|
+
|
213
|
+
## Contributing
|
214
|
+
|
215
|
+
Fork and patch! Before any changes are merged to master, we need you to sign an
|
216
|
+
[Individual Contributor
|
217
|
+
Agreement](https://spreadsheets.google.com/a/squareup.com/spreadsheet/viewform?formkey=dDViT2xzUHAwRkI3X3k5Z0lQM091OGc6MQ&ndplr=1)
|
218
|
+
(Google Form).
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module Sq::Dbsync
|
2
|
+
# Fetches all tables from the given source, retrieving tables and columns.
|
3
|
+
# Indexes are currently ignored.
|
4
|
+
class AllTablesPlan
|
5
|
+
def tables(source)
|
6
|
+
source.ensure_connection
|
7
|
+
|
8
|
+
source.tables.map do |t|
|
9
|
+
schema_for_table(source, t)
|
10
|
+
end.compact
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def schema_for_table(source, t)
|
16
|
+
schema = source.schema(t, reload: true)
|
17
|
+
|
18
|
+
return unless has_primary_key?(schema)
|
19
|
+
return unless has_timestamp?(schema)
|
20
|
+
|
21
|
+
cols = schema.map do |col|
|
22
|
+
col[0]
|
23
|
+
end
|
24
|
+
|
25
|
+
{
|
26
|
+
source_db: source,
|
27
|
+
source_table_name: t,
|
28
|
+
table_name: t,
|
29
|
+
columns: cols,
|
30
|
+
indexes: {},
|
31
|
+
always_sync: true
|
32
|
+
}
|
33
|
+
rescue Sequel::DatabaseError
|
34
|
+
# This handles a race condition where the table is deleted between us
|
35
|
+
# selecting the list of tables and fetching the schema.
|
36
|
+
nil
|
37
|
+
end
|
38
|
+
|
39
|
+
def has_primary_key?(schema)
|
40
|
+
schema.any? do |table|
|
41
|
+
table[1][:primary_key]
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def has_timestamp?(schema)
|
46
|
+
schema.any? do |table|
|
47
|
+
[:updated_at, :created_at].include?(table[0])
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require 'sq/dbsync/load_action'
|
2
|
+
|
3
|
+
module Sq::Dbsync
|
4
|
+
# Load action to reload an entire table in full. The table will be loaded in
|
5
|
+
# parallel to the existing one, then atomically swapped in on completion.
|
6
|
+
class BatchLoadAction < LoadAction
|
7
|
+
MAX_LAG = 60 * 5
|
8
|
+
|
9
|
+
def operation; 'batch'; end
|
10
|
+
|
11
|
+
def prepare
|
12
|
+
return false if plan.batch_load == false
|
13
|
+
|
14
|
+
if super
|
15
|
+
if target.table_exists?(plan.prefixed_table_name)
|
16
|
+
target.drop_table(plan.prefixed_table_name)
|
17
|
+
end
|
18
|
+
true
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def extract_data
|
23
|
+
@start_time = now.call
|
24
|
+
@file, @last_row_at = measure(:extract) { extract_to_file(nil) }
|
25
|
+
self
|
26
|
+
end
|
27
|
+
|
28
|
+
def load_data
|
29
|
+
measure(:load) do
|
30
|
+
TempfileFactory.split(@file, 1_000_000, logger) do |path|
|
31
|
+
db.load_from_file(
|
32
|
+
plan.prefixed_table_name,
|
33
|
+
plan.columns,
|
34
|
+
path
|
35
|
+
)
|
36
|
+
end
|
37
|
+
@file.close!
|
38
|
+
end
|
39
|
+
self
|
40
|
+
end
|
41
|
+
|
42
|
+
def post_load
|
43
|
+
while @start_time <= now.call - MAX_LAG
|
44
|
+
@start_time = now.call
|
45
|
+
catchup
|
46
|
+
end
|
47
|
+
|
48
|
+
switch_tables
|
49
|
+
self
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
|
54
|
+
def filter_columns
|
55
|
+
source = plan.source_db
|
56
|
+
source_columns = source.hash_schema(plan.source_table_name).keys
|
57
|
+
|
58
|
+
plan.columns = resolve_columns(plan, source_columns)
|
59
|
+
end
|
60
|
+
|
61
|
+
def prefix
|
62
|
+
'new_'
|
63
|
+
end
|
64
|
+
|
65
|
+
def catchup
|
66
|
+
file, @last_row_at = measure(:catchup_extract) {
|
67
|
+
extract_to_file(@last_row_at ? @last_row_at - overlap : nil)
|
68
|
+
}
|
69
|
+
measure(:catchup_load) do
|
70
|
+
db.load_incrementally_from_file(
|
71
|
+
plan.prefixed_table_name,
|
72
|
+
plan.columns,
|
73
|
+
file.path
|
74
|
+
)
|
75
|
+
file.close!
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def switch_tables
|
80
|
+
measure(:switch) do
|
81
|
+
registry.delete(plan.table_name)
|
82
|
+
db.switch_table(
|
83
|
+
plan.table_name,
|
84
|
+
plan.prefixed_table_name
|
85
|
+
)
|
86
|
+
registry.set(plan.table_name,
|
87
|
+
last_synced_at: @start_time,
|
88
|
+
last_batch_synced_at: @start_time,
|
89
|
+
last_row_at: @last_row_at
|
90
|
+
)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'sq/dbsync/loggers'
|
2
|
+
|
3
|
+
# Helper class to provide sane defaults to user-supplied config.
|
4
|
+
class Sq::Dbsync::Config
|
5
|
+
def self.make(hash)
|
6
|
+
{
|
7
|
+
clock: ->{ Time.now.utc },
|
8
|
+
logger: Sq::Dbsync::Loggers::Stream.new,
|
9
|
+
error_handler: ->(e) { $stderr.puts(e.message, e.backtrace) }
|
10
|
+
}.merge(hash)
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'sq/dbsync/load_action' # For overlap, not ideal
|
2
|
+
|
3
|
+
module Sq::Dbsync
|
4
|
+
|
5
|
+
# Performs a cheap check to verify that the number of records present for a
|
6
|
+
# recent time slice are the same across source and target tables.
|
7
|
+
#
|
8
|
+
# This checks consistency on the current tables, not the new_ set.
|
9
|
+
class ConsistencyVerifier
|
10
|
+
def initialize(target, registry)
|
11
|
+
@target = target
|
12
|
+
@registry = registry
|
13
|
+
end
|
14
|
+
|
15
|
+
def check_consistency!(tables)
|
16
|
+
tables.each do |tplan|
|
17
|
+
next unless tplan[:consistency]
|
18
|
+
verify_consistency!(tplan)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def verify_consistency!(tplan)
|
23
|
+
last_row_at = registry.get(tplan[:table_name])[:last_row_at]
|
24
|
+
return unless last_row_at
|
25
|
+
|
26
|
+
now = registry.get(tplan[:table_name])[:last_row_at] - LoadAction.overlap
|
27
|
+
|
28
|
+
counts = [
|
29
|
+
tplan[:source_db],
|
30
|
+
target
|
31
|
+
].map do |x|
|
32
|
+
x.consistency_check(tplan[:table_name], now)
|
33
|
+
end
|
34
|
+
|
35
|
+
delta = counts.reduce(:-)
|
36
|
+
|
37
|
+
unless delta == 0
|
38
|
+
raise ConsistencyError.new(
|
39
|
+
tplan[:table_name],
|
40
|
+
delta,
|
41
|
+
"source: #{tplan[:source_db].name} (count: #{counts[0]}), " +
|
42
|
+
"sink: #{target.name} (count: #{counts[1]})"
|
43
|
+
)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
attr_reader :target, :registry
|
48
|
+
|
49
|
+
# Used to signal an observed error in the number of records between source
|
50
|
+
# and target tables. There are no current known situations in which this
|
51
|
+
# occurs, though in the past buggy handling of replication lag was normally
|
52
|
+
# the culprit.
|
53
|
+
#
|
54
|
+
# If it does occur, a good first response is to set `last_sync_time` to the
|
55
|
+
# last batch time (usually within 24 hours) which will force the
|
56
|
+
# incremental load to reconsider all recent records.
|
57
|
+
class ConsistencyError < RuntimeError
|
58
|
+
def initialize(table_name, delta, description="")
|
59
|
+
@table_name = table_name
|
60
|
+
@delta = delta
|
61
|
+
@description = description
|
62
|
+
end
|
63
|
+
|
64
|
+
def message
|
65
|
+
output = "%s had a count difference of %i" % [@table_name, @delta]
|
66
|
+
output = output + "; " + @description if !@description.empty?
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
require 'sq/dbsync/tempfile_factory'
|
2
|
+
|
3
|
+
module Sq::Dbsync::Database
|
4
|
+
module Common
|
5
|
+
|
6
|
+
SQD = ::Sq::Dbsync
|
7
|
+
|
8
|
+
def extract_to_file(table_name, columns, file_name)
|
9
|
+
extract_sql_to_file("SELECT %s FROM %s" % [
|
10
|
+
columns.join(', '),
|
11
|
+
table_name
|
12
|
+
], file_name)
|
13
|
+
end
|
14
|
+
|
15
|
+
def extract_incrementally_to_file(plan, file_name, last_row_at, overlap)
|
16
|
+
table_name = plan.source_table_name.to_sym
|
17
|
+
db_columns = db.schema(table_name).map(&:first)
|
18
|
+
|
19
|
+
query = self[table_name].select(*plan.columns)
|
20
|
+
if last_row_at
|
21
|
+
query = query.filter("#{plan.timestamp} > ?", last_row_at - overlap)
|
22
|
+
end
|
23
|
+
|
24
|
+
extract_sql_to_file(query.sql, file_name)
|
25
|
+
end
|
26
|
+
|
27
|
+
def hash_schema(table_name)
|
28
|
+
ensure_connection
|
29
|
+
Hash[schema(table_name)]
|
30
|
+
end
|
31
|
+
|
32
|
+
def name
|
33
|
+
self['SELECT database()'].first.fetch(:'database()')
|
34
|
+
end
|
35
|
+
|
36
|
+
# Since we go so long without using connections (during a batch load), they
|
37
|
+
# go stale and raise DatabaseDisconnectError when we try to use them. This
|
38
|
+
# method ensures that the connection is fresh even after a long time
|
39
|
+
# between drinks.
|
40
|
+
def ensure_connection
|
41
|
+
db.disconnect
|
42
|
+
end
|
43
|
+
|
44
|
+
def __getobj__
|
45
|
+
db
|
46
|
+
end
|
47
|
+
|
48
|
+
def __setobj__(db)
|
49
|
+
@db = db
|
50
|
+
end
|
51
|
+
|
52
|
+
protected
|
53
|
+
|
54
|
+
def execute!(cmd)
|
55
|
+
# psql doesn't return a non-zero error code when executing commands from
|
56
|
+
# a file. The best way I can come up with is to raise if anything is
|
57
|
+
# present on stderr.
|
58
|
+
errors_file = SQD::TempfileFactory.make('extract_sql_to_file_errors')
|
59
|
+
|
60
|
+
cmd = %{bash -c "#{cmd.gsub(/"/, '\\"')}"}
|
61
|
+
|
62
|
+
result = run_shell(cmd, errors_file)
|
63
|
+
|
64
|
+
unless result.exitstatus == 0 && File.size(errors_file.path) == 0
|
65
|
+
raise(ExtractError, "Command failed: #{cmd}")
|
66
|
+
end
|
67
|
+
ensure
|
68
|
+
errors_file.close! if errors_file
|
69
|
+
end
|
70
|
+
|
71
|
+
def sql_to_file(sql)
|
72
|
+
SQD::TempfileFactory.make_with_content('extract_sql_to_file', sql)
|
73
|
+
end
|
74
|
+
|
75
|
+
private
|
76
|
+
|
77
|
+
def run_shell(cmd, errors_file)
|
78
|
+
if RUBY_PLATFORM == 'java'
|
79
|
+
IO.popen4(cmd) {|_, _, _, stderr|
|
80
|
+
errors_file.write(stderr.read)
|
81
|
+
errors_file.flush
|
82
|
+
}
|
83
|
+
$?
|
84
|
+
else
|
85
|
+
pid = Process.spawn(cmd, STDERR => errors_file.path)
|
86
|
+
Process.waitpid2(pid)[1]
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
91
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'sequel/no_core_ext'
|
2
|
+
|
3
|
+
Sequel.default_timezone = :utc
|
4
|
+
|
5
|
+
require 'sq/dbsync/database/mysql'
|
6
|
+
require 'sq/dbsync/database/postgres'
|
7
|
+
|
8
|
+
module Sq::Dbsync::Database
|
9
|
+
# Factory class to abstract selection of a decorator to faciliate databases
|
10
|
+
# other than MySQL.
|
11
|
+
class Connection
|
12
|
+
def self.create(opts)
|
13
|
+
case opts[:brand]
|
14
|
+
when 'mysql'
|
15
|
+
Sq::Dbsync::Database::Mysql.new(Sequel.connect(opts))
|
16
|
+
when 'postgresql'
|
17
|
+
Sq::Dbsync::Database::Postgres.new(Sequel.connect(opts))
|
18
|
+
else
|
19
|
+
raise "Unsupported database: #{opts.inspect}"
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|