zdm 1.0.3 → 1.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5e342d256ff439757a08cda9ad15897c90278527
4
- data.tar.gz: 72894ff47ca9aaca42c6cd0a3b07bc3d0f492490
3
+ metadata.gz: 83bf151ca9cb0909f5e1232b0d78f04dbef8cce2
4
+ data.tar.gz: 60d49fe72d7e6d6c122d101505e3ad29dafe190a
5
5
  SHA512:
6
- metadata.gz: a57d77a2afd53a03508aac7a253d7521ced90318e52ce0c8583e8a2cc5960804b4631f4e94fb6e6424599bc0b7b2c8c578beea3c2ec6124203f1b0232ae11514
7
- data.tar.gz: ff0c63a2eab00afa7fc4ec348fd1088348cdec28ecd23214b42818d2c3c6e9da01b96c452257292248234f30a983ba60e6b8e1cf031a3e778d132ebda75103fc
6
+ metadata.gz: d3db65316fcd480937bdb39fe7f69b1a725aa7a03de4b677a201abaf35227a4ab43e338618452ebe84c680b236532753d9013fa80eaf631b157e968b4e296ac7
7
+ data.tar.gz: 6afe1ee9375edc2b7cd61208cf530fe54906dacab4957db70f37d94a5fca8839e05062c4c1e211aae4d1c0d92dec28ae71d4d4e9f7dd99c1054cb61848c63312
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: ..
3
3
  specs:
4
- zdm (1.0.3)
4
+ zdm (1.0.4)
5
5
  activerecord (>= 4.0)
6
6
 
7
7
  GEM
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: ..
3
3
  specs:
4
- zdm (1.0.3)
4
+ zdm (1.0.4)
5
5
  activerecord (>= 4.0)
6
6
 
7
7
  GEM
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: ..
3
3
  specs:
4
- zdm (1.0.3)
4
+ zdm (1.0.4)
5
5
  activerecord (>= 4.0)
6
6
 
7
7
  GEM
@@ -1,3 +1,3 @@
1
1
  module Zdm
2
- VERSION = '1.0.3'
2
+ VERSION = '1.0.4'
3
3
  end
data/lib/zdm.rb CHANGED
@@ -12,22 +12,89 @@ module Zdm
12
12
  end
13
13
 
14
14
  def cleanup(before: nil)
15
- conn = ActiveRecord::Base.connection
16
- zdm_tables = conn.send(tables_method).select { |name| name.starts_with?('zdm_') }
15
+ zdm_tables = connection.send(tables_method).select { |name| name.starts_with?('zdm_') }
17
16
  zdm_tables.each { |name| Migrator.new(Table.new(name.sub(/^zdm_/, ''))).cleanup }
18
17
 
19
- zdm_archive_tables = conn.send(tables_method).select { |name| name.starts_with?('zdma_') }
18
+ zdm_archive_tables = connection.send(tables_method).select { |name| name.starts_with?('zdma_') }
20
19
  if before
21
20
  zdm_archive_tables.select! { |table|
22
21
  Time.strptime(table, 'zdma_%Y%m%d_%H%M%S%N') <= before
23
22
  }
24
23
  end
25
- zdm_archive_tables.each { |name| conn.execute('DROP TABLE `%s`' % name) }
24
+ zdm_archive_tables.each { |name| execute('DROP TABLE `%s`' % name) }
26
25
  end
27
26
 
28
27
  def tables_method
29
28
  ActiveRecord.version.to_s =~ /^5/ ? :data_sources : :tables
30
29
  end
30
+
31
+ BATCH_SIZE = 40_000
32
+ DECREASE_THROTTLER = 4 # seconds
33
+ DECREASE_SIZE = 5_000
34
+ MIN_BATCH_SIZE = 10_000
35
+ PROGRESS_EVERY = 30 # seconds
36
+ def execute_in_batches(table_name, start: nil, finish: nil, batch_size: BATCH_SIZE, progress_every: PROGRESS_EVERY, &block)
37
+ min = start || connection.select_value('SELECT MIN(`id`) FROM %s' % table_name)
38
+ return unless min
39
+
40
+ max = finish || connection.select_value('SELECT MAX(`id`) FROM %s' % table_name)
41
+ todo = max - min + 1
42
+ return unless todo > 0
43
+
44
+ batch_end = min - 1
45
+ start_time = last_progress = Time.now
46
+ while true
47
+ batch_start = batch_end + 1
48
+ batch_end = [batch_start + batch_size - 1, max].min
49
+ start_batch_time = Time.now
50
+
51
+ sql = yield batch_start, batch_end
52
+ execute(sql) if sql
53
+
54
+ if $exit
55
+ write(table_name, 'Received SIGTERM, exiting...')
56
+ cleanup
57
+ exit 1
58
+ end
59
+
60
+ # The end!
61
+ break if batch_end >= max
62
+
63
+ # Throttle
64
+ current_time = Time.now
65
+ if (current_time - start_batch_time) > DECREASE_THROTTLER
66
+ batch_size = [(batch_size - DECREASE_SIZE).to_i, MIN_BATCH_SIZE].max
67
+ end
68
+
69
+ # Periodically show progress
70
+ if (current_time - last_progress) >= progress_every
71
+ last_progress = current_time
72
+ done = batch_end - min + 1
73
+ write(table_name, "%.2f%% (#{done}/#{todo})" % (done.to_f / todo * 100.0))
74
+ end
75
+ end
76
+
77
+ duration = Time.now - start_time
78
+ duration = (duration < 2*60) ? "#{duration.to_i} secs" : "#{(duration / 60).to_i} mins"
79
+ write(table_name, "Completed (#{duration})")
80
+ end
81
+
82
+ private
83
+
84
+ def connection
85
+ ActiveRecord::Base.connection
86
+ end
87
+
88
+ def execute(stmt)
89
+ connection.execute(stmt)
90
+ end
91
+
92
+ def write(table_name, msg)
93
+ return if Zdm.io == false
94
+ io = Zdm.io || $stderr
95
+ io.puts("#{table_name}: #{msg}")
96
+ io.flush
97
+ end
31
98
  end
32
99
 
33
100
  class Table
@@ -80,7 +147,7 @@ module Zdm
80
147
  drop_copy_indexes
81
148
  apply_ddl_statements
82
149
  create_triggers
83
- batched_copy
150
+ copy_in_batches
84
151
  create_copy_indexes
85
152
  atomic_switcharoo!
86
153
  ensure
@@ -180,7 +247,7 @@ module Zdm
180
247
  "zdmt_#{trigger_type}_#{table.origin}"[0...64]
181
248
  end
182
249
 
183
- # Drop indexes to speed up batched_copy.
250
+ # Drop indexes to speed up copy_in_batches.
184
251
  #
185
252
  # "Online DDL support for adding secondary indexes means that you can
186
253
  # generally speed the overall process of creating and loading a table
@@ -215,18 +282,7 @@ module Zdm
215
282
  execute('ALTER TABLE `%s` %s' % [table.copy, indexes.join(', ')])
216
283
  end
217
284
 
218
- BATCH_SIZE = 40_000
219
- DECREASE_THROTTLER = 4 # seconds
220
- DECREASE_SIZE = 5_000
221
- MIN_BATCH_SIZE = 10_000
222
- PROGRESS_EVERY = 30 # seconds
223
- def batched_copy
224
- min = connection.select_value('SELECT MIN(`id`) FROM %s' % table.origin)
225
- return unless min
226
-
227
- max = connection.select_value('SELECT MAX(`id`) FROM %s' % table.origin)
228
- todo = max - min + 1
229
-
285
+ def copy_in_batches
230
286
  insert_columns = common_columns.map {|c| "`#{c}`"}.join(', ')
231
287
  select_columns = common_columns.map {|c| "`#{table.origin}`.`#{c}`"}.join(', ')
232
288
  sql = <<-SQL.squish
@@ -236,50 +292,11 @@ module Zdm
236
292
  WHERE `#{table.origin}`.`id` BETWEEN %s AND %s
237
293
  SQL
238
294
 
239
- batch_size = BATCH_SIZE
240
- batch_end = min - 1
241
- start_time = last_progress = Time.now
242
- while true
243
- batch_start = batch_end + 1
244
- batch_end = [batch_start + batch_size - 1, max].min
245
- start_batch_time = Time.now
246
-
247
- execute(sql % [batch_start, batch_end])
248
-
249
- if $exit
250
- write('Received SIGTERM, exiting...')
251
- cleanup
252
- exit 1
253
- end
254
-
255
- # The end!
256
- break if batch_end >= max
257
-
258
- # Throttle
259
- current_time = Time.now
260
- if (current_time - start_batch_time) > DECREASE_THROTTLER
261
- batch_size = [(batch_size - DECREASE_SIZE).to_i, MIN_BATCH_SIZE].max
262
- end
263
-
264
- # Periodically show progress
265
- if (current_time - last_progress) >= PROGRESS_EVERY
266
- last_progress = current_time
267
- done = batch_end - min + 1
268
- write("%.2f%% (#{done}/#{todo})" % (done.to_f / todo * 100.0))
269
- end
295
+ Zdm.execute_in_batches(table.origin) do |batch_start, batch_end|
296
+ sql % [batch_start, batch_end]
270
297
  end
271
-
272
- duration = Time.now - start_time
273
- duration = (duration < 2*60) ? "#{duration.to_i} secs" : "#{(duration / 60).to_i} mins"
274
- write("Completed (#{duration})")
275
298
  end
276
299
 
277
- def write(msg)
278
- return if Zdm.io == false
279
- io = Zdm.io || $stderr
280
- io.puts("#{table.origin}: #{msg}")
281
- io.flush
282
- end
283
300
  end
284
301
  end
285
302
  trap('TERM') { $exit = true }
@@ -34,8 +34,5 @@ ActiveRecord::Schema.define version: 0 do
34
34
  end
35
35
  end
36
36
 
37
- ActiveRecord::Base.connection.execute(%[INSERT INTO people(account_id, name, code, created_at) VALUES (10,'foo','bar','2017-03-01 23:59:59')])
38
- ActiveRecord::Base.connection.execute(%[INSERT INTO people(account_id, name, code, created_at) VALUES (20,'foo2','bar2','2017-03-02 23:59:59')])
39
-
40
37
  # ActiveRecord::Base.logger = Logger.new($stdout)
41
38
 
@@ -5,6 +5,11 @@ describe Zdm do
5
5
  before(:example) {
6
6
  Zdm.io = false
7
7
  Zdm.cleanup
8
+
9
+ conn = ActiveRecord::Base.connection
10
+ conn.execute(%[TRUNCATE people])
11
+ conn.execute(%[INSERT INTO people(account_id, name, code, created_at) VALUES (10,'foo','bar','2017-03-01 23:59:59')])
12
+ conn.execute(%[INSERT INTO people(account_id, name, code, created_at) VALUES (20,'foo2','bar2','2017-03-02 23:59:59')])
8
13
  }
9
14
 
10
15
  it 'requires an autoincrement primary key `id` field' do
@@ -87,4 +92,49 @@ describe Zdm do
87
92
  expect(archive_tables.length).to eq(2)
88
93
  end
89
94
 
95
+ context 'execute_in_batches' do
96
+ before(:example) do
97
+ @conn = ActiveRecord::Base.connection
98
+ (1..20).each do |idx|
99
+ @conn.execute(%[INSERT INTO people(account_id, name, code, created_at) VALUES (10,'person-#{idx}','P#{idx}','2017-03-01 23:59:59')])
100
+ end
101
+ Zdm.io = StringIO.new
102
+ @sql = "UPDATE people SET code = CONCAT(code, 'U') WHERE id BETWEEN %s AND %s"
103
+ end
104
+
105
+ after(:example) do
106
+ @conn.execute(%[DELETE FROM people WHERE name LIKE 'person%'])
107
+ end
108
+
109
+ it 'updates a table in batches' do
110
+ Zdm.execute_in_batches('people', batch_size: 4, progress_every: 1) do |batch_start, batch_end|
111
+ sleep(0.6)
112
+ @sql % [batch_start, batch_end]
113
+ end
114
+ expect(Zdm.io.string).to eq(%[people: 36.36% (8/22)\npeople: 72.73% (16/22)\npeople: Completed (3 secs)\n])
115
+ expect(@conn.select_value(%[SELECT COUNT(*) FROM people WHERE code LIKE '%U'])).to eq(22)
116
+ end
117
+
118
+ it 'updates part of a table in batches' do
119
+ batches = []
120
+ Zdm.execute_in_batches('people', start: 5, finish: 18, batch_size: 4, progress_every: 1) do |batch_start, batch_end|
121
+ sleep(0.6)
122
+ batches << @sql % [batch_start, batch_end]
123
+ @sql % [batch_start, batch_end]
124
+ end
125
+ expect(Zdm.io.string).to eq(%[people: 57.14% (8/14)\npeople: Completed (2 secs)\n])
126
+ expect(batches).to eq([
127
+ %[UPDATE people SET code = CONCAT(code, 'U') WHERE id BETWEEN 5 AND 8],
128
+ %[UPDATE people SET code = CONCAT(code, 'U') WHERE id BETWEEN 9 AND 12],
129
+ %[UPDATE people SET code = CONCAT(code, 'U') WHERE id BETWEEN 13 AND 16],
130
+ %[UPDATE people SET code = CONCAT(code, 'U') WHERE id BETWEEN 17 AND 18],
131
+ ])
132
+ expect(@conn.select_values(%[SELECT code FROM people WHERE code LIKE '%U'])).to eq([
133
+ 'P3U', 'P4U', 'P5U', 'P6U', 'P7U', 'P8U', 'P9U', 'P10U', 'P11U', 'P12U', 'P13U', 'P14U', 'P15U', 'P16U'
134
+ ])
135
+ end
136
+
137
+ end
138
+
139
+
90
140
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zdm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.3
4
+ version: 1.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - ITRP Institute, Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-04-23 00:00:00.000000000 Z
11
+ date: 2017-05-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord