zdm 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5e342d256ff439757a08cda9ad15897c90278527
4
- data.tar.gz: 72894ff47ca9aaca42c6cd0a3b07bc3d0f492490
3
+ metadata.gz: 83bf151ca9cb0909f5e1232b0d78f04dbef8cce2
4
+ data.tar.gz: 60d49fe72d7e6d6c122d101505e3ad29dafe190a
5
5
  SHA512:
6
- metadata.gz: a57d77a2afd53a03508aac7a253d7521ced90318e52ce0c8583e8a2cc5960804b4631f4e94fb6e6424599bc0b7b2c8c578beea3c2ec6124203f1b0232ae11514
7
- data.tar.gz: ff0c63a2eab00afa7fc4ec348fd1088348cdec28ecd23214b42818d2c3c6e9da01b96c452257292248234f30a983ba60e6b8e1cf031a3e778d132ebda75103fc
6
+ metadata.gz: d3db65316fcd480937bdb39fe7f69b1a725aa7a03de4b677a201abaf35227a4ab43e338618452ebe84c680b236532753d9013fa80eaf631b157e968b4e296ac7
7
+ data.tar.gz: 6afe1ee9375edc2b7cd61208cf530fe54906dacab4957db70f37d94a5fca8839e05062c4c1e211aae4d1c0d92dec28ae71d4d4e9f7dd99c1054cb61848c63312
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: ..
3
3
  specs:
4
- zdm (1.0.3)
4
+ zdm (1.0.4)
5
5
  activerecord (>= 4.0)
6
6
 
7
7
  GEM
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: ..
3
3
  specs:
4
- zdm (1.0.3)
4
+ zdm (1.0.4)
5
5
  activerecord (>= 4.0)
6
6
 
7
7
  GEM
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: ..
3
3
  specs:
4
- zdm (1.0.3)
4
+ zdm (1.0.4)
5
5
  activerecord (>= 4.0)
6
6
 
7
7
  GEM
@@ -1,3 +1,3 @@
1
1
  module Zdm
2
- VERSION = '1.0.3'
2
+ VERSION = '1.0.4'
3
3
  end
data/lib/zdm.rb CHANGED
@@ -12,22 +12,89 @@ module Zdm
12
12
  end
13
13
 
14
14
  def cleanup(before: nil)
15
- conn = ActiveRecord::Base.connection
16
- zdm_tables = conn.send(tables_method).select { |name| name.starts_with?('zdm_') }
15
+ zdm_tables = connection.send(tables_method).select { |name| name.starts_with?('zdm_') }
17
16
  zdm_tables.each { |name| Migrator.new(Table.new(name.sub(/^zdm_/, ''))).cleanup }
18
17
 
19
- zdm_archive_tables = conn.send(tables_method).select { |name| name.starts_with?('zdma_') }
18
+ zdm_archive_tables = connection.send(tables_method).select { |name| name.starts_with?('zdma_') }
20
19
  if before
21
20
  zdm_archive_tables.select! { |table|
22
21
  Time.strptime(table, 'zdma_%Y%m%d_%H%M%S%N') <= before
23
22
  }
24
23
  end
25
- zdm_archive_tables.each { |name| conn.execute('DROP TABLE `%s`' % name) }
24
+ zdm_archive_tables.each { |name| execute('DROP TABLE `%s`' % name) }
26
25
  end
27
26
 
28
27
  def tables_method
29
28
  ActiveRecord.version.to_s =~ /^5/ ? :data_sources : :tables
30
29
  end
30
+
31
+ BATCH_SIZE = 40_000
32
+ DECREASE_THROTTLER = 4 # seconds
33
+ DECREASE_SIZE = 5_000
34
+ MIN_BATCH_SIZE = 10_000
35
+ PROGRESS_EVERY = 30 # seconds
36
+ def execute_in_batches(table_name, start: nil, finish: nil, batch_size: BATCH_SIZE, progress_every: PROGRESS_EVERY, &block)
37
+ min = start || connection.select_value('SELECT MIN(`id`) FROM %s' % table_name)
38
+ return unless min
39
+
40
+ max = finish || connection.select_value('SELECT MAX(`id`) FROM %s' % table_name)
41
+ todo = max - min + 1
42
+ return unless todo > 0
43
+
44
+ batch_end = min - 1
45
+ start_time = last_progress = Time.now
46
+ while true
47
+ batch_start = batch_end + 1
48
+ batch_end = [batch_start + batch_size - 1, max].min
49
+ start_batch_time = Time.now
50
+
51
+ sql = yield batch_start, batch_end
52
+ execute(sql) if sql
53
+
54
+ if $exit
55
+ write(table_name, 'Received SIGTERM, exiting...')
56
+ cleanup
57
+ exit 1
58
+ end
59
+
60
+ # The end!
61
+ break if batch_end >= max
62
+
63
+ # Throttle
64
+ current_time = Time.now
65
+ if (current_time - start_batch_time) > DECREASE_THROTTLER
66
+ batch_size = [(batch_size - DECREASE_SIZE).to_i, MIN_BATCH_SIZE].max
67
+ end
68
+
69
+ # Periodically show progress
70
+ if (current_time - last_progress) >= progress_every
71
+ last_progress = current_time
72
+ done = batch_end - min + 1
73
+ write(table_name, "%.2f%% (#{done}/#{todo})" % (done.to_f / todo * 100.0))
74
+ end
75
+ end
76
+
77
+ duration = Time.now - start_time
78
+ duration = (duration < 2*60) ? "#{duration.to_i} secs" : "#{(duration / 60).to_i} mins"
79
+ write(table_name, "Completed (#{duration})")
80
+ end
81
+
82
+ private
83
+
84
+ def connection
85
+ ActiveRecord::Base.connection
86
+ end
87
+
88
+ def execute(stmt)
89
+ connection.execute(stmt)
90
+ end
91
+
92
+ def write(table_name, msg)
93
+ return if Zdm.io == false
94
+ io = Zdm.io || $stderr
95
+ io.puts("#{table_name}: #{msg}")
96
+ io.flush
97
+ end
31
98
  end
32
99
 
33
100
  class Table
@@ -80,7 +147,7 @@ module Zdm
80
147
  drop_copy_indexes
81
148
  apply_ddl_statements
82
149
  create_triggers
83
- batched_copy
150
+ copy_in_batches
84
151
  create_copy_indexes
85
152
  atomic_switcharoo!
86
153
  ensure
@@ -180,7 +247,7 @@ module Zdm
180
247
  "zdmt_#{trigger_type}_#{table.origin}"[0...64]
181
248
  end
182
249
 
183
- # Drop indexes to speed up batched_copy.
250
+ # Drop indexes to speed up copy_in_batches.
184
251
  #
185
252
  # "Online DDL support for adding secondary indexes means that you can
186
253
  # generally speed the overall process of creating and loading a table
@@ -215,18 +282,7 @@ module Zdm
215
282
  execute('ALTER TABLE `%s` %s' % [table.copy, indexes.join(', ')])
216
283
  end
217
284
 
218
- BATCH_SIZE = 40_000
219
- DECREASE_THROTTLER = 4 # seconds
220
- DECREASE_SIZE = 5_000
221
- MIN_BATCH_SIZE = 10_000
222
- PROGRESS_EVERY = 30 # seconds
223
- def batched_copy
224
- min = connection.select_value('SELECT MIN(`id`) FROM %s' % table.origin)
225
- return unless min
226
-
227
- max = connection.select_value('SELECT MAX(`id`) FROM %s' % table.origin)
228
- todo = max - min + 1
229
-
285
+ def copy_in_batches
230
286
  insert_columns = common_columns.map {|c| "`#{c}`"}.join(', ')
231
287
  select_columns = common_columns.map {|c| "`#{table.origin}`.`#{c}`"}.join(', ')
232
288
  sql = <<-SQL.squish
@@ -236,50 +292,11 @@ module Zdm
236
292
  WHERE `#{table.origin}`.`id` BETWEEN %s AND %s
237
293
  SQL
238
294
 
239
- batch_size = BATCH_SIZE
240
- batch_end = min - 1
241
- start_time = last_progress = Time.now
242
- while true
243
- batch_start = batch_end + 1
244
- batch_end = [batch_start + batch_size - 1, max].min
245
- start_batch_time = Time.now
246
-
247
- execute(sql % [batch_start, batch_end])
248
-
249
- if $exit
250
- write('Received SIGTERM, exiting...')
251
- cleanup
252
- exit 1
253
- end
254
-
255
- # The end!
256
- break if batch_end >= max
257
-
258
- # Throttle
259
- current_time = Time.now
260
- if (current_time - start_batch_time) > DECREASE_THROTTLER
261
- batch_size = [(batch_size - DECREASE_SIZE).to_i, MIN_BATCH_SIZE].max
262
- end
263
-
264
- # Periodically show progress
265
- if (current_time - last_progress) >= PROGRESS_EVERY
266
- last_progress = current_time
267
- done = batch_end - min + 1
268
- write("%.2f%% (#{done}/#{todo})" % (done.to_f / todo * 100.0))
269
- end
295
+ Zdm.execute_in_batches(table.origin) do |batch_start, batch_end|
296
+ sql % [batch_start, batch_end]
270
297
  end
271
-
272
- duration = Time.now - start_time
273
- duration = (duration < 2*60) ? "#{duration.to_i} secs" : "#{(duration / 60).to_i} mins"
274
- write("Completed (#{duration})")
275
298
  end
276
299
 
277
- def write(msg)
278
- return if Zdm.io == false
279
- io = Zdm.io || $stderr
280
- io.puts("#{table.origin}: #{msg}")
281
- io.flush
282
- end
283
300
  end
284
301
  end
285
302
  trap('TERM') { $exit = true }
@@ -34,8 +34,5 @@ ActiveRecord::Schema.define version: 0 do
34
34
  end
35
35
  end
36
36
 
37
- ActiveRecord::Base.connection.execute(%[INSERT INTO people(account_id, name, code, created_at) VALUES (10,'foo','bar','2017-03-01 23:59:59')])
38
- ActiveRecord::Base.connection.execute(%[INSERT INTO people(account_id, name, code, created_at) VALUES (20,'foo2','bar2','2017-03-02 23:59:59')])
39
-
40
37
  # ActiveRecord::Base.logger = Logger.new($stdout)
41
38
 
@@ -5,6 +5,11 @@ describe Zdm do
5
5
  before(:example) {
6
6
  Zdm.io = false
7
7
  Zdm.cleanup
8
+
9
+ conn = ActiveRecord::Base.connection
10
+ conn.execute(%[TRUNCATE people])
11
+ conn.execute(%[INSERT INTO people(account_id, name, code, created_at) VALUES (10,'foo','bar','2017-03-01 23:59:59')])
12
+ conn.execute(%[INSERT INTO people(account_id, name, code, created_at) VALUES (20,'foo2','bar2','2017-03-02 23:59:59')])
8
13
  }
9
14
 
10
15
  it 'requires an autoincrement primary key `id` field' do
@@ -87,4 +92,49 @@ describe Zdm do
87
92
  expect(archive_tables.length).to eq(2)
88
93
  end
89
94
 
95
+ context 'execute_in_batches' do
96
+ before(:example) do
97
+ @conn = ActiveRecord::Base.connection
98
+ (1..20).each do |idx|
99
+ @conn.execute(%[INSERT INTO people(account_id, name, code, created_at) VALUES (10,'person-#{idx}','P#{idx}','2017-03-01 23:59:59')])
100
+ end
101
+ Zdm.io = StringIO.new
102
+ @sql = "UPDATE people SET code = CONCAT(code, 'U') WHERE id BETWEEN %s AND %s"
103
+ end
104
+
105
+ after(:example) do
106
+ @conn.execute(%[DELETE FROM people WHERE name LIKE 'person%'])
107
+ end
108
+
109
+ it 'updates a table in batches' do
110
+ Zdm.execute_in_batches('people', batch_size: 4, progress_every: 1) do |batch_start, batch_end|
111
+ sleep(0.6)
112
+ @sql % [batch_start, batch_end]
113
+ end
114
+ expect(Zdm.io.string).to eq(%[people: 36.36% (8/22)\npeople: 72.73% (16/22)\npeople: Completed (3 secs)\n])
115
+ expect(@conn.select_value(%[SELECT COUNT(*) FROM people WHERE code LIKE '%U'])).to eq(22)
116
+ end
117
+
118
+ it 'updates part of a table in batches' do
119
+ batches = []
120
+ Zdm.execute_in_batches('people', start: 5, finish: 18, batch_size: 4, progress_every: 1) do |batch_start, batch_end|
121
+ sleep(0.6)
122
+ batches << @sql % [batch_start, batch_end]
123
+ @sql % [batch_start, batch_end]
124
+ end
125
+ expect(Zdm.io.string).to eq(%[people: 57.14% (8/14)\npeople: Completed (2 secs)\n])
126
+ expect(batches).to eq([
127
+ %[UPDATE people SET code = CONCAT(code, 'U') WHERE id BETWEEN 5 AND 8],
128
+ %[UPDATE people SET code = CONCAT(code, 'U') WHERE id BETWEEN 9 AND 12],
129
+ %[UPDATE people SET code = CONCAT(code, 'U') WHERE id BETWEEN 13 AND 16],
130
+ %[UPDATE people SET code = CONCAT(code, 'U') WHERE id BETWEEN 17 AND 18],
131
+ ])
132
+ expect(@conn.select_values(%[SELECT code FROM people WHERE code LIKE '%U'])).to eq([
133
+ 'P3U', 'P4U', 'P5U', 'P6U', 'P7U', 'P8U', 'P9U', 'P10U', 'P11U', 'P12U', 'P13U', 'P14U', 'P15U', 'P16U'
134
+ ])
135
+ end
136
+
137
+ end
138
+
139
+
90
140
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zdm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.3
4
+ version: 1.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - ITRP Institute, Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-04-23 00:00:00.000000000 Z
11
+ date: 2017-05-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord