zdm 1.0.3 → 1.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/gemfiles/4.1.gemfile.lock +1 -1
- data/gemfiles/4.2.gemfile.lock +1 -1
- data/gemfiles/5.0.gemfile.lock +1 -1
- data/lib/version.rb +1 -1
- data/lib/zdm.rb +76 -59
- data/spec/spec_helper.rb +0 -3
- data/spec/zdm_spec.rb +50 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 83bf151ca9cb0909f5e1232b0d78f04dbef8cce2
|
4
|
+
data.tar.gz: 60d49fe72d7e6d6c122d101505e3ad29dafe190a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d3db65316fcd480937bdb39fe7f69b1a725aa7a03de4b677a201abaf35227a4ab43e338618452ebe84c680b236532753d9013fa80eaf631b157e968b4e296ac7
|
7
|
+
data.tar.gz: 6afe1ee9375edc2b7cd61208cf530fe54906dacab4957db70f37d94a5fca8839e05062c4c1e211aae4d1c0d92dec28ae71d4d4e9f7dd99c1054cb61848c63312
|
data/gemfiles/4.1.gemfile.lock
CHANGED
data/gemfiles/4.2.gemfile.lock
CHANGED
data/gemfiles/5.0.gemfile.lock
CHANGED
data/lib/version.rb
CHANGED
data/lib/zdm.rb
CHANGED
@@ -12,22 +12,89 @@ module Zdm
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def cleanup(before: nil)
|
15
|
-
|
16
|
-
zdm_tables = conn.send(tables_method).select { |name| name.starts_with?('zdm_') }
|
15
|
+
zdm_tables = connection.send(tables_method).select { |name| name.starts_with?('zdm_') }
|
17
16
|
zdm_tables.each { |name| Migrator.new(Table.new(name.sub(/^zdm_/, ''))).cleanup }
|
18
17
|
|
19
|
-
zdm_archive_tables =
|
18
|
+
zdm_archive_tables = connection.send(tables_method).select { |name| name.starts_with?('zdma_') }
|
20
19
|
if before
|
21
20
|
zdm_archive_tables.select! { |table|
|
22
21
|
Time.strptime(table, 'zdma_%Y%m%d_%H%M%S%N') <= before
|
23
22
|
}
|
24
23
|
end
|
25
|
-
zdm_archive_tables.each { |name|
|
24
|
+
zdm_archive_tables.each { |name| execute('DROP TABLE `%s`' % name) }
|
26
25
|
end
|
27
26
|
|
28
27
|
def tables_method
|
29
28
|
ActiveRecord.version.to_s =~ /^5/ ? :data_sources : :tables
|
30
29
|
end
|
30
|
+
|
31
|
+
BATCH_SIZE = 40_000
|
32
|
+
DECREASE_THROTTLER = 4 # seconds
|
33
|
+
DECREASE_SIZE = 5_000
|
34
|
+
MIN_BATCH_SIZE = 10_000
|
35
|
+
PROGRESS_EVERY = 30 # seconds
|
36
|
+
def execute_in_batches(table_name, start: nil, finish: nil, batch_size: BATCH_SIZE, progress_every: PROGRESS_EVERY, &block)
|
37
|
+
min = start || connection.select_value('SELECT MIN(`id`) FROM %s' % table_name)
|
38
|
+
return unless min
|
39
|
+
|
40
|
+
max = finish || connection.select_value('SELECT MAX(`id`) FROM %s' % table_name)
|
41
|
+
todo = max - min + 1
|
42
|
+
return unless todo > 0
|
43
|
+
|
44
|
+
batch_end = min - 1
|
45
|
+
start_time = last_progress = Time.now
|
46
|
+
while true
|
47
|
+
batch_start = batch_end + 1
|
48
|
+
batch_end = [batch_start + batch_size - 1, max].min
|
49
|
+
start_batch_time = Time.now
|
50
|
+
|
51
|
+
sql = yield batch_start, batch_end
|
52
|
+
execute(sql) if sql
|
53
|
+
|
54
|
+
if $exit
|
55
|
+
write(table_name, 'Received SIGTERM, exiting...')
|
56
|
+
cleanup
|
57
|
+
exit 1
|
58
|
+
end
|
59
|
+
|
60
|
+
# The end!
|
61
|
+
break if batch_end >= max
|
62
|
+
|
63
|
+
# Throttle
|
64
|
+
current_time = Time.now
|
65
|
+
if (current_time - start_batch_time) > DECREASE_THROTTLER
|
66
|
+
batch_size = [(batch_size - DECREASE_SIZE).to_i, MIN_BATCH_SIZE].max
|
67
|
+
end
|
68
|
+
|
69
|
+
# Periodically show progress
|
70
|
+
if (current_time - last_progress) >= progress_every
|
71
|
+
last_progress = current_time
|
72
|
+
done = batch_end - min + 1
|
73
|
+
write(table_name, "%.2f%% (#{done}/#{todo})" % (done.to_f / todo * 100.0))
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
duration = Time.now - start_time
|
78
|
+
duration = (duration < 2*60) ? "#{duration.to_i} secs" : "#{(duration / 60).to_i} mins"
|
79
|
+
write(table_name, "Completed (#{duration})")
|
80
|
+
end
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
def connection
|
85
|
+
ActiveRecord::Base.connection
|
86
|
+
end
|
87
|
+
|
88
|
+
def execute(stmt)
|
89
|
+
connection.execute(stmt)
|
90
|
+
end
|
91
|
+
|
92
|
+
def write(table_name, msg)
|
93
|
+
return if Zdm.io == false
|
94
|
+
io = Zdm.io || $stderr
|
95
|
+
io.puts("#{table_name}: #{msg}")
|
96
|
+
io.flush
|
97
|
+
end
|
31
98
|
end
|
32
99
|
|
33
100
|
class Table
|
@@ -80,7 +147,7 @@ module Zdm
|
|
80
147
|
drop_copy_indexes
|
81
148
|
apply_ddl_statements
|
82
149
|
create_triggers
|
83
|
-
|
150
|
+
copy_in_batches
|
84
151
|
create_copy_indexes
|
85
152
|
atomic_switcharoo!
|
86
153
|
ensure
|
@@ -180,7 +247,7 @@ module Zdm
|
|
180
247
|
"zdmt_#{trigger_type}_#{table.origin}"[0...64]
|
181
248
|
end
|
182
249
|
|
183
|
-
# Drop indexes to speed up
|
250
|
+
# Drop indexes to speed up copy_in_batches.
|
184
251
|
#
|
185
252
|
# "Online DDL support for adding secondary indexes means that you can
|
186
253
|
# generally speed the overall process of creating and loading a table
|
@@ -215,18 +282,7 @@ module Zdm
|
|
215
282
|
execute('ALTER TABLE `%s` %s' % [table.copy, indexes.join(', ')])
|
216
283
|
end
|
217
284
|
|
218
|
-
|
219
|
-
DECREASE_THROTTLER = 4 # seconds
|
220
|
-
DECREASE_SIZE = 5_000
|
221
|
-
MIN_BATCH_SIZE = 10_000
|
222
|
-
PROGRESS_EVERY = 30 # seconds
|
223
|
-
def batched_copy
|
224
|
-
min = connection.select_value('SELECT MIN(`id`) FROM %s' % table.origin)
|
225
|
-
return unless min
|
226
|
-
|
227
|
-
max = connection.select_value('SELECT MAX(`id`) FROM %s' % table.origin)
|
228
|
-
todo = max - min + 1
|
229
|
-
|
285
|
+
def copy_in_batches
|
230
286
|
insert_columns = common_columns.map {|c| "`#{c}`"}.join(', ')
|
231
287
|
select_columns = common_columns.map {|c| "`#{table.origin}`.`#{c}`"}.join(', ')
|
232
288
|
sql = <<-SQL.squish
|
@@ -236,50 +292,11 @@ module Zdm
|
|
236
292
|
WHERE `#{table.origin}`.`id` BETWEEN %s AND %s
|
237
293
|
SQL
|
238
294
|
|
239
|
-
|
240
|
-
|
241
|
-
start_time = last_progress = Time.now
|
242
|
-
while true
|
243
|
-
batch_start = batch_end + 1
|
244
|
-
batch_end = [batch_start + batch_size - 1, max].min
|
245
|
-
start_batch_time = Time.now
|
246
|
-
|
247
|
-
execute(sql % [batch_start, batch_end])
|
248
|
-
|
249
|
-
if $exit
|
250
|
-
write('Received SIGTERM, exiting...')
|
251
|
-
cleanup
|
252
|
-
exit 1
|
253
|
-
end
|
254
|
-
|
255
|
-
# The end!
|
256
|
-
break if batch_end >= max
|
257
|
-
|
258
|
-
# Throttle
|
259
|
-
current_time = Time.now
|
260
|
-
if (current_time - start_batch_time) > DECREASE_THROTTLER
|
261
|
-
batch_size = [(batch_size - DECREASE_SIZE).to_i, MIN_BATCH_SIZE].max
|
262
|
-
end
|
263
|
-
|
264
|
-
# Periodically show progress
|
265
|
-
if (current_time - last_progress) >= PROGRESS_EVERY
|
266
|
-
last_progress = current_time
|
267
|
-
done = batch_end - min + 1
|
268
|
-
write("%.2f%% (#{done}/#{todo})" % (done.to_f / todo * 100.0))
|
269
|
-
end
|
295
|
+
Zdm.execute_in_batches(table.origin) do |batch_start, batch_end|
|
296
|
+
sql % [batch_start, batch_end]
|
270
297
|
end
|
271
|
-
|
272
|
-
duration = Time.now - start_time
|
273
|
-
duration = (duration < 2*60) ? "#{duration.to_i} secs" : "#{(duration / 60).to_i} mins"
|
274
|
-
write("Completed (#{duration})")
|
275
298
|
end
|
276
299
|
|
277
|
-
def write(msg)
|
278
|
-
return if Zdm.io == false
|
279
|
-
io = Zdm.io || $stderr
|
280
|
-
io.puts("#{table.origin}: #{msg}")
|
281
|
-
io.flush
|
282
|
-
end
|
283
300
|
end
|
284
301
|
end
|
285
302
|
trap('TERM') { $exit = true }
|
data/spec/spec_helper.rb
CHANGED
@@ -34,8 +34,5 @@ ActiveRecord::Schema.define version: 0 do
|
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
|
-
ActiveRecord::Base.connection.execute(%[INSERT INTO people(account_id, name, code, created_at) VALUES (10,'foo','bar','2017-03-01 23:59:59')])
|
38
|
-
ActiveRecord::Base.connection.execute(%[INSERT INTO people(account_id, name, code, created_at) VALUES (20,'foo2','bar2','2017-03-02 23:59:59')])
|
39
|
-
|
40
37
|
# ActiveRecord::Base.logger = Logger.new($stdout)
|
41
38
|
|
data/spec/zdm_spec.rb
CHANGED
@@ -5,6 +5,11 @@ describe Zdm do
|
|
5
5
|
before(:example) {
|
6
6
|
Zdm.io = false
|
7
7
|
Zdm.cleanup
|
8
|
+
|
9
|
+
conn = ActiveRecord::Base.connection
|
10
|
+
conn.execute(%[TRUNCATE people])
|
11
|
+
conn.execute(%[INSERT INTO people(account_id, name, code, created_at) VALUES (10,'foo','bar','2017-03-01 23:59:59')])
|
12
|
+
conn.execute(%[INSERT INTO people(account_id, name, code, created_at) VALUES (20,'foo2','bar2','2017-03-02 23:59:59')])
|
8
13
|
}
|
9
14
|
|
10
15
|
it 'requires an autoincrement primary key `id` field' do
|
@@ -87,4 +92,49 @@ describe Zdm do
|
|
87
92
|
expect(archive_tables.length).to eq(2)
|
88
93
|
end
|
89
94
|
|
95
|
+
context 'execute_in_batches' do
|
96
|
+
before(:example) do
|
97
|
+
@conn = ActiveRecord::Base.connection
|
98
|
+
(1..20).each do |idx|
|
99
|
+
@conn.execute(%[INSERT INTO people(account_id, name, code, created_at) VALUES (10,'person-#{idx}','P#{idx}','2017-03-01 23:59:59')])
|
100
|
+
end
|
101
|
+
Zdm.io = StringIO.new
|
102
|
+
@sql = "UPDATE people SET code = CONCAT(code, 'U') WHERE id BETWEEN %s AND %s"
|
103
|
+
end
|
104
|
+
|
105
|
+
after(:example) do
|
106
|
+
@conn.execute(%[DELETE FROM people WHERE name LIKE 'person%'])
|
107
|
+
end
|
108
|
+
|
109
|
+
it 'updates a table in batches' do
|
110
|
+
Zdm.execute_in_batches('people', batch_size: 4, progress_every: 1) do |batch_start, batch_end|
|
111
|
+
sleep(0.6)
|
112
|
+
@sql % [batch_start, batch_end]
|
113
|
+
end
|
114
|
+
expect(Zdm.io.string).to eq(%[people: 36.36% (8/22)\npeople: 72.73% (16/22)\npeople: Completed (3 secs)\n])
|
115
|
+
expect(@conn.select_value(%[SELECT COUNT(*) FROM people WHERE code LIKE '%U'])).to eq(22)
|
116
|
+
end
|
117
|
+
|
118
|
+
it 'updates part of a table in batches' do
|
119
|
+
batches = []
|
120
|
+
Zdm.execute_in_batches('people', start: 5, finish: 18, batch_size: 4, progress_every: 1) do |batch_start, batch_end|
|
121
|
+
sleep(0.6)
|
122
|
+
batches << @sql % [batch_start, batch_end]
|
123
|
+
@sql % [batch_start, batch_end]
|
124
|
+
end
|
125
|
+
expect(Zdm.io.string).to eq(%[people: 57.14% (8/14)\npeople: Completed (2 secs)\n])
|
126
|
+
expect(batches).to eq([
|
127
|
+
%[UPDATE people SET code = CONCAT(code, 'U') WHERE id BETWEEN 5 AND 8],
|
128
|
+
%[UPDATE people SET code = CONCAT(code, 'U') WHERE id BETWEEN 9 AND 12],
|
129
|
+
%[UPDATE people SET code = CONCAT(code, 'U') WHERE id BETWEEN 13 AND 16],
|
130
|
+
%[UPDATE people SET code = CONCAT(code, 'U') WHERE id BETWEEN 17 AND 18],
|
131
|
+
])
|
132
|
+
expect(@conn.select_values(%[SELECT code FROM people WHERE code LIKE '%U'])).to eq([
|
133
|
+
'P3U', 'P4U', 'P5U', 'P6U', 'P7U', 'P8U', 'P9U', 'P10U', 'P11U', 'P12U', 'P13U', 'P14U', 'P15U', 'P16U'
|
134
|
+
])
|
135
|
+
end
|
136
|
+
|
137
|
+
end
|
138
|
+
|
139
|
+
|
90
140
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zdm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ITRP Institute, Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-05-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|