zdm 1.0.3 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/gemfiles/4.1.gemfile.lock +1 -1
- data/gemfiles/4.2.gemfile.lock +1 -1
- data/gemfiles/5.0.gemfile.lock +1 -1
- data/lib/version.rb +1 -1
- data/lib/zdm.rb +76 -59
- data/spec/spec_helper.rb +0 -3
- data/spec/zdm_spec.rb +50 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 83bf151ca9cb0909f5e1232b0d78f04dbef8cce2
|
4
|
+
data.tar.gz: 60d49fe72d7e6d6c122d101505e3ad29dafe190a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d3db65316fcd480937bdb39fe7f69b1a725aa7a03de4b677a201abaf35227a4ab43e338618452ebe84c680b236532753d9013fa80eaf631b157e968b4e296ac7
|
7
|
+
data.tar.gz: 6afe1ee9375edc2b7cd61208cf530fe54906dacab4957db70f37d94a5fca8839e05062c4c1e211aae4d1c0d92dec28ae71d4d4e9f7dd99c1054cb61848c63312
|
data/gemfiles/4.1.gemfile.lock
CHANGED
data/gemfiles/4.2.gemfile.lock
CHANGED
data/gemfiles/5.0.gemfile.lock
CHANGED
data/lib/version.rb
CHANGED
data/lib/zdm.rb
CHANGED
@@ -12,22 +12,89 @@ module Zdm
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def cleanup(before: nil)
|
15
|
-
|
16
|
-
zdm_tables = conn.send(tables_method).select { |name| name.starts_with?('zdm_') }
|
15
|
+
zdm_tables = connection.send(tables_method).select { |name| name.starts_with?('zdm_') }
|
17
16
|
zdm_tables.each { |name| Migrator.new(Table.new(name.sub(/^zdm_/, ''))).cleanup }
|
18
17
|
|
19
|
-
zdm_archive_tables =
|
18
|
+
zdm_archive_tables = connection.send(tables_method).select { |name| name.starts_with?('zdma_') }
|
20
19
|
if before
|
21
20
|
zdm_archive_tables.select! { |table|
|
22
21
|
Time.strptime(table, 'zdma_%Y%m%d_%H%M%S%N') <= before
|
23
22
|
}
|
24
23
|
end
|
25
|
-
zdm_archive_tables.each { |name|
|
24
|
+
zdm_archive_tables.each { |name| execute('DROP TABLE `%s`' % name) }
|
26
25
|
end
|
27
26
|
|
28
27
|
def tables_method
|
29
28
|
ActiveRecord.version.to_s =~ /^5/ ? :data_sources : :tables
|
30
29
|
end
|
30
|
+
|
31
|
+
BATCH_SIZE = 40_000
|
32
|
+
DECREASE_THROTTLER = 4 # seconds
|
33
|
+
DECREASE_SIZE = 5_000
|
34
|
+
MIN_BATCH_SIZE = 10_000
|
35
|
+
PROGRESS_EVERY = 30 # seconds
|
36
|
+
def execute_in_batches(table_name, start: nil, finish: nil, batch_size: BATCH_SIZE, progress_every: PROGRESS_EVERY, &block)
|
37
|
+
min = start || connection.select_value('SELECT MIN(`id`) FROM %s' % table_name)
|
38
|
+
return unless min
|
39
|
+
|
40
|
+
max = finish || connection.select_value('SELECT MAX(`id`) FROM %s' % table_name)
|
41
|
+
todo = max - min + 1
|
42
|
+
return unless todo > 0
|
43
|
+
|
44
|
+
batch_end = min - 1
|
45
|
+
start_time = last_progress = Time.now
|
46
|
+
while true
|
47
|
+
batch_start = batch_end + 1
|
48
|
+
batch_end = [batch_start + batch_size - 1, max].min
|
49
|
+
start_batch_time = Time.now
|
50
|
+
|
51
|
+
sql = yield batch_start, batch_end
|
52
|
+
execute(sql) if sql
|
53
|
+
|
54
|
+
if $exit
|
55
|
+
write(table_name, 'Received SIGTERM, exiting...')
|
56
|
+
cleanup
|
57
|
+
exit 1
|
58
|
+
end
|
59
|
+
|
60
|
+
# The end!
|
61
|
+
break if batch_end >= max
|
62
|
+
|
63
|
+
# Throttle
|
64
|
+
current_time = Time.now
|
65
|
+
if (current_time - start_batch_time) > DECREASE_THROTTLER
|
66
|
+
batch_size = [(batch_size - DECREASE_SIZE).to_i, MIN_BATCH_SIZE].max
|
67
|
+
end
|
68
|
+
|
69
|
+
# Periodically show progress
|
70
|
+
if (current_time - last_progress) >= progress_every
|
71
|
+
last_progress = current_time
|
72
|
+
done = batch_end - min + 1
|
73
|
+
write(table_name, "%.2f%% (#{done}/#{todo})" % (done.to_f / todo * 100.0))
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
duration = Time.now - start_time
|
78
|
+
duration = (duration < 2*60) ? "#{duration.to_i} secs" : "#{(duration / 60).to_i} mins"
|
79
|
+
write(table_name, "Completed (#{duration})")
|
80
|
+
end
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
def connection
|
85
|
+
ActiveRecord::Base.connection
|
86
|
+
end
|
87
|
+
|
88
|
+
def execute(stmt)
|
89
|
+
connection.execute(stmt)
|
90
|
+
end
|
91
|
+
|
92
|
+
def write(table_name, msg)
|
93
|
+
return if Zdm.io == false
|
94
|
+
io = Zdm.io || $stderr
|
95
|
+
io.puts("#{table_name}: #{msg}")
|
96
|
+
io.flush
|
97
|
+
end
|
31
98
|
end
|
32
99
|
|
33
100
|
class Table
|
@@ -80,7 +147,7 @@ module Zdm
|
|
80
147
|
drop_copy_indexes
|
81
148
|
apply_ddl_statements
|
82
149
|
create_triggers
|
83
|
-
|
150
|
+
copy_in_batches
|
84
151
|
create_copy_indexes
|
85
152
|
atomic_switcharoo!
|
86
153
|
ensure
|
@@ -180,7 +247,7 @@ module Zdm
|
|
180
247
|
"zdmt_#{trigger_type}_#{table.origin}"[0...64]
|
181
248
|
end
|
182
249
|
|
183
|
-
# Drop indexes to speed up
|
250
|
+
# Drop indexes to speed up copy_in_batches.
|
184
251
|
#
|
185
252
|
# "Online DDL support for adding secondary indexes means that you can
|
186
253
|
# generally speed the overall process of creating and loading a table
|
@@ -215,18 +282,7 @@ module Zdm
|
|
215
282
|
execute('ALTER TABLE `%s` %s' % [table.copy, indexes.join(', ')])
|
216
283
|
end
|
217
284
|
|
218
|
-
|
219
|
-
DECREASE_THROTTLER = 4 # seconds
|
220
|
-
DECREASE_SIZE = 5_000
|
221
|
-
MIN_BATCH_SIZE = 10_000
|
222
|
-
PROGRESS_EVERY = 30 # seconds
|
223
|
-
def batched_copy
|
224
|
-
min = connection.select_value('SELECT MIN(`id`) FROM %s' % table.origin)
|
225
|
-
return unless min
|
226
|
-
|
227
|
-
max = connection.select_value('SELECT MAX(`id`) FROM %s' % table.origin)
|
228
|
-
todo = max - min + 1
|
229
|
-
|
285
|
+
def copy_in_batches
|
230
286
|
insert_columns = common_columns.map {|c| "`#{c}`"}.join(', ')
|
231
287
|
select_columns = common_columns.map {|c| "`#{table.origin}`.`#{c}`"}.join(', ')
|
232
288
|
sql = <<-SQL.squish
|
@@ -236,50 +292,11 @@ module Zdm
|
|
236
292
|
WHERE `#{table.origin}`.`id` BETWEEN %s AND %s
|
237
293
|
SQL
|
238
294
|
|
239
|
-
|
240
|
-
|
241
|
-
start_time = last_progress = Time.now
|
242
|
-
while true
|
243
|
-
batch_start = batch_end + 1
|
244
|
-
batch_end = [batch_start + batch_size - 1, max].min
|
245
|
-
start_batch_time = Time.now
|
246
|
-
|
247
|
-
execute(sql % [batch_start, batch_end])
|
248
|
-
|
249
|
-
if $exit
|
250
|
-
write('Received SIGTERM, exiting...')
|
251
|
-
cleanup
|
252
|
-
exit 1
|
253
|
-
end
|
254
|
-
|
255
|
-
# The end!
|
256
|
-
break if batch_end >= max
|
257
|
-
|
258
|
-
# Throttle
|
259
|
-
current_time = Time.now
|
260
|
-
if (current_time - start_batch_time) > DECREASE_THROTTLER
|
261
|
-
batch_size = [(batch_size - DECREASE_SIZE).to_i, MIN_BATCH_SIZE].max
|
262
|
-
end
|
263
|
-
|
264
|
-
# Periodically show progress
|
265
|
-
if (current_time - last_progress) >= PROGRESS_EVERY
|
266
|
-
last_progress = current_time
|
267
|
-
done = batch_end - min + 1
|
268
|
-
write("%.2f%% (#{done}/#{todo})" % (done.to_f / todo * 100.0))
|
269
|
-
end
|
295
|
+
Zdm.execute_in_batches(table.origin) do |batch_start, batch_end|
|
296
|
+
sql % [batch_start, batch_end]
|
270
297
|
end
|
271
|
-
|
272
|
-
duration = Time.now - start_time
|
273
|
-
duration = (duration < 2*60) ? "#{duration.to_i} secs" : "#{(duration / 60).to_i} mins"
|
274
|
-
write("Completed (#{duration})")
|
275
298
|
end
|
276
299
|
|
277
|
-
def write(msg)
|
278
|
-
return if Zdm.io == false
|
279
|
-
io = Zdm.io || $stderr
|
280
|
-
io.puts("#{table.origin}: #{msg}")
|
281
|
-
io.flush
|
282
|
-
end
|
283
300
|
end
|
284
301
|
end
|
285
302
|
trap('TERM') { $exit = true }
|
data/spec/spec_helper.rb
CHANGED
@@ -34,8 +34,5 @@ ActiveRecord::Schema.define version: 0 do
|
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
|
-
ActiveRecord::Base.connection.execute(%[INSERT INTO people(account_id, name, code, created_at) VALUES (10,'foo','bar','2017-03-01 23:59:59')])
|
38
|
-
ActiveRecord::Base.connection.execute(%[INSERT INTO people(account_id, name, code, created_at) VALUES (20,'foo2','bar2','2017-03-02 23:59:59')])
|
39
|
-
|
40
37
|
# ActiveRecord::Base.logger = Logger.new($stdout)
|
41
38
|
|
data/spec/zdm_spec.rb
CHANGED
@@ -5,6 +5,11 @@ describe Zdm do
|
|
5
5
|
before(:example) {
|
6
6
|
Zdm.io = false
|
7
7
|
Zdm.cleanup
|
8
|
+
|
9
|
+
conn = ActiveRecord::Base.connection
|
10
|
+
conn.execute(%[TRUNCATE people])
|
11
|
+
conn.execute(%[INSERT INTO people(account_id, name, code, created_at) VALUES (10,'foo','bar','2017-03-01 23:59:59')])
|
12
|
+
conn.execute(%[INSERT INTO people(account_id, name, code, created_at) VALUES (20,'foo2','bar2','2017-03-02 23:59:59')])
|
8
13
|
}
|
9
14
|
|
10
15
|
it 'requires an autoincrement primary key `id` field' do
|
@@ -87,4 +92,49 @@ describe Zdm do
|
|
87
92
|
expect(archive_tables.length).to eq(2)
|
88
93
|
end
|
89
94
|
|
95
|
+
context 'execute_in_batches' do
|
96
|
+
before(:example) do
|
97
|
+
@conn = ActiveRecord::Base.connection
|
98
|
+
(1..20).each do |idx|
|
99
|
+
@conn.execute(%[INSERT INTO people(account_id, name, code, created_at) VALUES (10,'person-#{idx}','P#{idx}','2017-03-01 23:59:59')])
|
100
|
+
end
|
101
|
+
Zdm.io = StringIO.new
|
102
|
+
@sql = "UPDATE people SET code = CONCAT(code, 'U') WHERE id BETWEEN %s AND %s"
|
103
|
+
end
|
104
|
+
|
105
|
+
after(:example) do
|
106
|
+
@conn.execute(%[DELETE FROM people WHERE name LIKE 'person%'])
|
107
|
+
end
|
108
|
+
|
109
|
+
it 'updates a table in batches' do
|
110
|
+
Zdm.execute_in_batches('people', batch_size: 4, progress_every: 1) do |batch_start, batch_end|
|
111
|
+
sleep(0.6)
|
112
|
+
@sql % [batch_start, batch_end]
|
113
|
+
end
|
114
|
+
expect(Zdm.io.string).to eq(%[people: 36.36% (8/22)\npeople: 72.73% (16/22)\npeople: Completed (3 secs)\n])
|
115
|
+
expect(@conn.select_value(%[SELECT COUNT(*) FROM people WHERE code LIKE '%U'])).to eq(22)
|
116
|
+
end
|
117
|
+
|
118
|
+
it 'updates part of a table in batches' do
|
119
|
+
batches = []
|
120
|
+
Zdm.execute_in_batches('people', start: 5, finish: 18, batch_size: 4, progress_every: 1) do |batch_start, batch_end|
|
121
|
+
sleep(0.6)
|
122
|
+
batches << @sql % [batch_start, batch_end]
|
123
|
+
@sql % [batch_start, batch_end]
|
124
|
+
end
|
125
|
+
expect(Zdm.io.string).to eq(%[people: 57.14% (8/14)\npeople: Completed (2 secs)\n])
|
126
|
+
expect(batches).to eq([
|
127
|
+
%[UPDATE people SET code = CONCAT(code, 'U') WHERE id BETWEEN 5 AND 8],
|
128
|
+
%[UPDATE people SET code = CONCAT(code, 'U') WHERE id BETWEEN 9 AND 12],
|
129
|
+
%[UPDATE people SET code = CONCAT(code, 'U') WHERE id BETWEEN 13 AND 16],
|
130
|
+
%[UPDATE people SET code = CONCAT(code, 'U') WHERE id BETWEEN 17 AND 18],
|
131
|
+
])
|
132
|
+
expect(@conn.select_values(%[SELECT code FROM people WHERE code LIKE '%U'])).to eq([
|
133
|
+
'P3U', 'P4U', 'P5U', 'P6U', 'P7U', 'P8U', 'P9U', 'P10U', 'P11U', 'P12U', 'P13U', 'P14U', 'P15U', 'P16U'
|
134
|
+
])
|
135
|
+
end
|
136
|
+
|
137
|
+
end
|
138
|
+
|
139
|
+
|
90
140
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zdm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ITRP Institute, Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-05-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|