tapsoob 0.5.29 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/tapsoob/cli/root.rb +3 -0
- data/lib/tapsoob/multi_progress_bar.rb +234 -0
- data/lib/tapsoob/operation.rb +130 -11
- data/lib/tapsoob/progress_bar.rb +3 -2
- data/lib/tapsoob/schema.rb +39 -4
- data/lib/tapsoob/utils.rb +10 -3
- data/lib/tapsoob/version.rb +1 -1
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 682ebd67a432169d8f72eb47412aaface0df4ba058be6849fba6e91ab96888c1
|
|
4
|
+
data.tar.gz: 3aba7ecf086fc98b21a9c2c6b2e9ae3413fd71d98106de415a3e3223732b0947
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d3b49e8f114d4c2b693a524b60ee3959b36cdddb3dd7e2f6378a9f72a6f86a1ca44ab26b609cf4a4d88747813fffdd233a70efa34465df613ceea0af18af6aa2
|
|
7
|
+
data.tar.gz: 9793f681463d9f92ef4eae1015ed11b35d351a7f5938b2ad79e5d3cc67793fc421a02e195536957a56acde9b8d8cc81ab429a5a0852ae6b2d2eddcf38445222b
|
data/lib/tapsoob/cli/root.rb
CHANGED
|
@@ -22,6 +22,7 @@ module Tapsoob
|
|
|
22
22
|
option :"exclude-tables", desc: "Shortcut to exclude a list of tables", type: :array, aliases: "-e"
|
|
23
23
|
option :"indexes", type: :boolean, default: false
|
|
24
24
|
option :"same-db", type: :boolean, default: false
|
|
25
|
+
option :parallel, desc: "Number of parallel workers for table processing (default: 1)", default: 1, type: :numeric, aliases: "-j"
|
|
25
26
|
option :progress, desc: "Show progress", default: true, type: :boolean
|
|
26
27
|
option :debug, desc: "Enable debug messages", default: false, type: :boolean
|
|
27
28
|
def pull(dump_path, database_url)
|
|
@@ -47,6 +48,7 @@ module Tapsoob
|
|
|
47
48
|
option :purge, desc: "Purge data in tables prior to performing the import", default: false, type: :boolean, aliases: "-p"
|
|
48
49
|
option :"skip-duplicates", desc: "Remove duplicates when loading data", default: false, type: :boolean
|
|
49
50
|
option :"discard-identity", desc: "Remove identity when pushing data (may result in creating duplicates)", default: false, type: :boolean
|
|
51
|
+
option :parallel, desc: "Number of parallel workers for table processing (default: 1)", default: 1, type: :numeric, aliases: "-j"
|
|
50
52
|
option :progress, desc: "Show progress", default: true, type: :boolean
|
|
51
53
|
option :debug, desc: "Enable debug messages", default: false, type: :boolean
|
|
52
54
|
def push(dump_path, database_url)
|
|
@@ -82,6 +84,7 @@ module Tapsoob
|
|
|
82
84
|
indexes_first: options[:"indexes_first"],
|
|
83
85
|
disable_compression: options[:"disable-compression"],
|
|
84
86
|
tables: options[:tables],
|
|
87
|
+
parallel: options[:parallel],
|
|
85
88
|
progress: options[:progress],
|
|
86
89
|
debug: options[:debug]
|
|
87
90
|
})
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
|
2
|
+
require 'tapsoob/progress_bar'
|
|
3
|
+
|
|
4
|
+
# MultiProgressBar manages multiple progress bars in parallel
|
|
5
|
+
# Each bar gets its own line in the terminal
|
|
6
|
+
class MultiProgressBar
|
|
7
|
+
def initialize(max_bars = 4)
|
|
8
|
+
@max_bars = max_bars
|
|
9
|
+
@bars = []
|
|
10
|
+
@mutex = Mutex.new
|
|
11
|
+
@active = true
|
|
12
|
+
@out = STDOUT
|
|
13
|
+
@last_update = Time.now
|
|
14
|
+
@reserved_lines = 0 # Track how many lines we've actually reserved
|
|
15
|
+
@max_title_width = 14 # Minimum width, will grow with longer titles
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Create a new progress bar and return it
|
|
19
|
+
def create_bar(title, total)
|
|
20
|
+
@mutex.synchronize do
|
|
21
|
+
# Remove any existing bar with the same title to prevent duplicates
|
|
22
|
+
@bars.reject! { |b| b.title == title }
|
|
23
|
+
|
|
24
|
+
# Update max title width to accommodate longer titles
|
|
25
|
+
@max_title_width = [@max_title_width, title.length].max
|
|
26
|
+
|
|
27
|
+
bar = ThreadSafeProgressBar.new(title, total, self)
|
|
28
|
+
|
|
29
|
+
# Reserve a line for this new bar during active updates
|
|
30
|
+
# Cap at 2 * max_bars to show active workers + some recent finished bars
|
|
31
|
+
if @reserved_lines < @max_bars * 2
|
|
32
|
+
@out.print "\n"
|
|
33
|
+
@out.flush
|
|
34
|
+
@reserved_lines += 1
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
@bars << bar
|
|
38
|
+
bar
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Get the current maximum title width for alignment
|
|
43
|
+
# Note: Always called from within synchronized methods, so no mutex needed
|
|
44
|
+
def max_title_width
|
|
45
|
+
@max_title_width
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Called by individual bars when they update
|
|
49
|
+
def update
|
|
50
|
+
@mutex.synchronize do
|
|
51
|
+
return unless @active
|
|
52
|
+
return unless should_redraw?
|
|
53
|
+
|
|
54
|
+
@last_update = Time.now
|
|
55
|
+
redraw_all
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Finish a specific bar - mark it as completed
|
|
60
|
+
def finish_bar(bar)
|
|
61
|
+
@mutex.synchronize do
|
|
62
|
+
return unless @active
|
|
63
|
+
|
|
64
|
+
bar.mark_finished
|
|
65
|
+
|
|
66
|
+
# Respect throttle when finishing to avoid spamming redraws
|
|
67
|
+
if should_redraw?
|
|
68
|
+
@last_update = Time.now
|
|
69
|
+
redraw_all
|
|
70
|
+
end
|
|
71
|
+
# If throttled, the next regular update will show the finished state
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Stop all progress bars and keep them visible
|
|
76
|
+
def stop
|
|
77
|
+
@mutex.synchronize do
|
|
78
|
+
@active = false
|
|
79
|
+
|
|
80
|
+
# Final cleanup: remove any duplicate titles (keep the last occurrence of each unique title)
|
|
81
|
+
@bars = @bars.reverse.uniq { |bar| bar.title }.reverse
|
|
82
|
+
|
|
83
|
+
# Final redraw to show completed state (skip active check)
|
|
84
|
+
redraw_all(true)
|
|
85
|
+
# Move cursor past all bars
|
|
86
|
+
@out.print "\n"
|
|
87
|
+
@out.flush
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
private
|
|
92
|
+
|
|
93
|
+
# Check if enough time has passed to redraw (throttle to 10 updates/sec)
|
|
94
|
+
def should_redraw?
|
|
95
|
+
Time.now - @last_update >= 0.1
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def redraw_all(force = false)
|
|
99
|
+
return unless force || @active
|
|
100
|
+
return if @bars.empty?
|
|
101
|
+
|
|
102
|
+
if force && !@active
|
|
103
|
+
render_final_display
|
|
104
|
+
else
|
|
105
|
+
render_active_display
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Final display: show all completed bars
|
|
110
|
+
def render_final_display
|
|
111
|
+
# Clear the reserved lines first
|
|
112
|
+
if @reserved_lines > 0
|
|
113
|
+
@out.print "\r\e[#{@reserved_lines}A"
|
|
114
|
+
@reserved_lines.times { @out.print "\r\e[K\n" }
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Print all bars (adds new lines as needed)
|
|
118
|
+
@bars.each do |bar|
|
|
119
|
+
@out.print "\r\e[K"
|
|
120
|
+
bar.render_to(@out)
|
|
121
|
+
@out.print "\n"
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
@out.flush
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Normal operation: show active bars + recent finished in reserved space
|
|
128
|
+
def render_active_display
|
|
129
|
+
return if @reserved_lines == 0
|
|
130
|
+
|
|
131
|
+
# Partition bars in a single pass for efficiency
|
|
132
|
+
active_bars, finished_bars = @bars.partition { |b| !b.finished? }
|
|
133
|
+
|
|
134
|
+
# Build display: active bars first, then recent finished to fill remaining space
|
|
135
|
+
# Ensure we don't request negative count from .last()
|
|
136
|
+
remaining_space = [@reserved_lines - active_bars.length, 0].max
|
|
137
|
+
bars_to_draw = active_bars + finished_bars.last(remaining_space)
|
|
138
|
+
|
|
139
|
+
# If we have more bars than reserved lines, show only the most recent
|
|
140
|
+
bars_to_draw = bars_to_draw.last(@reserved_lines) if bars_to_draw.length > @reserved_lines
|
|
141
|
+
|
|
142
|
+
# Move up and redraw in reserved space
|
|
143
|
+
@out.print "\r\e[#{@reserved_lines}A"
|
|
144
|
+
@reserved_lines.times do |i|
|
|
145
|
+
@out.print "\r\e[K"
|
|
146
|
+
bars_to_draw[i].render_to(@out) if i < bars_to_draw.length
|
|
147
|
+
@out.print "\n"
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
@out.flush
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Thread-safe progress bar that reports to a MultiProgressBar
|
|
155
|
+
class ThreadSafeProgressBar < ProgressBar
|
|
156
|
+
attr_reader :title
|
|
157
|
+
|
|
158
|
+
def initialize(title, total, multi_progress_bar)
|
|
159
|
+
@multi_progress_bar = multi_progress_bar
|
|
160
|
+
@out = STDOUT # Need this for get_width to work
|
|
161
|
+
# Don't call parent initialize, we'll manage output ourselves
|
|
162
|
+
@title = title
|
|
163
|
+
@total = total
|
|
164
|
+
@terminal_width = 80
|
|
165
|
+
@bar_mark = "="
|
|
166
|
+
@current = 0
|
|
167
|
+
@previous = 0
|
|
168
|
+
@finished_p = false
|
|
169
|
+
@start_time = ::Time.now
|
|
170
|
+
@previous_time = @start_time
|
|
171
|
+
@format_arguments = [:title, :percentage, :bar, :stat]
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
# Override show to notify multi-progress instead of direct output
|
|
175
|
+
def show
|
|
176
|
+
@previous_time = ::Time.now # Update to prevent time-based refresh spam
|
|
177
|
+
@multi_progress_bar.update
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# Render this bar to the given output stream
|
|
181
|
+
def render_to(out)
|
|
182
|
+
# Get dynamic title width from MultiProgressBar for consistent alignment
|
|
183
|
+
# Store as instance variable so parent class fmt_* methods can use it
|
|
184
|
+
@title_width = @multi_progress_bar.max_title_width
|
|
185
|
+
|
|
186
|
+
# Recalculate terminal width to handle resizes and use full width
|
|
187
|
+
width = get_width
|
|
188
|
+
# Calculate bar width: total_width - fixed_elements - padding
|
|
189
|
+
# Fixed: title(variable) + " "(1) + percentage(4) + " "(1) + "|"(1) + "|"(1) + " "(1) + timer(15) = title_width + 25
|
|
190
|
+
# Padding: +3 for timer fluctuations and safety
|
|
191
|
+
fixed_chars = @title_width + 28
|
|
192
|
+
@terminal_width = [width - fixed_chars, 20].max
|
|
193
|
+
|
|
194
|
+
# Build format string with dynamic title width
|
|
195
|
+
format = "%-#{@title_width}s %3d%% %s %s"
|
|
196
|
+
arguments = @format_arguments.map { |method| send("fmt_#{method}") }
|
|
197
|
+
line = sprintf(format, *arguments)
|
|
198
|
+
|
|
199
|
+
# Ensure line doesn't exceed terminal width to prevent wrapping
|
|
200
|
+
# Leave 2 chars margin for safety
|
|
201
|
+
line = line[0, width - 2] if line.length > width - 2
|
|
202
|
+
|
|
203
|
+
out.print(line)
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
# Override clear to do nothing (managed by MultiProgressBar)
|
|
207
|
+
def clear
|
|
208
|
+
# no-op
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# Mark this bar as finished (for tracking)
|
|
212
|
+
def mark_finished
|
|
213
|
+
@finished_p = true
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
# Override to use the same @finished_p flag
|
|
217
|
+
def finished?
|
|
218
|
+
@finished_p
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
# Override finish to notify multi-progress
|
|
222
|
+
def finish
|
|
223
|
+
@current = @total
|
|
224
|
+
@multi_progress_bar.finish_bar(self)
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
# Override inc to check if we need to update
|
|
228
|
+
def inc(step = 1)
|
|
229
|
+
@current += step
|
|
230
|
+
@current = @total if @current > @total
|
|
231
|
+
show_if_needed
|
|
232
|
+
@previous = @current
|
|
233
|
+
end
|
|
234
|
+
end
|
data/lib/tapsoob/operation.rb
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
# -*- encoding : utf-8 -*-
|
|
2
2
|
require 'sequel'
|
|
3
|
+
require 'thread'
|
|
3
4
|
|
|
4
5
|
require 'tapsoob/data_stream'
|
|
5
6
|
require 'tapsoob/log'
|
|
6
7
|
require 'tapsoob/progress_bar'
|
|
8
|
+
require 'tapsoob/multi_progress_bar'
|
|
7
9
|
require 'tapsoob/schema'
|
|
8
10
|
|
|
9
11
|
module Tapsoob
|
|
@@ -117,7 +119,7 @@ module Tapsoob
|
|
|
117
119
|
end
|
|
118
120
|
|
|
119
121
|
def db
|
|
120
|
-
@db ||= Sequel.connect(database_url)
|
|
122
|
+
@db ||= Sequel.connect(database_url, max_connections: parallel_workers * 2)
|
|
121
123
|
@db.extension :schema_dumper
|
|
122
124
|
@db.loggers << Tapsoob.log if opts[:debug]
|
|
123
125
|
|
|
@@ -130,6 +132,24 @@ module Tapsoob
|
|
|
130
132
|
@db
|
|
131
133
|
end
|
|
132
134
|
|
|
135
|
+
def parallel?
|
|
136
|
+
parallel_workers > 1
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def parallel_workers
|
|
140
|
+
@parallel_workers ||= [opts[:parallel].to_i, 1].max
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def completed_tables_mutex
|
|
144
|
+
@completed_tables_mutex ||= Mutex.new
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def add_completed_table(table_name)
|
|
148
|
+
completed_tables_mutex.synchronize do
|
|
149
|
+
completed_tables << table_name.to_s
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
|
|
133
153
|
def format_number(num)
|
|
134
154
|
num.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
|
|
135
155
|
end
|
|
@@ -183,7 +203,8 @@ module Tapsoob
|
|
|
183
203
|
|
|
184
204
|
progress = ProgressBar.new('Schema', tables.size)
|
|
185
205
|
tables.each do |table_name, count|
|
|
186
|
-
|
|
206
|
+
# Reuse existing db connection for better performance
|
|
207
|
+
schema_data = Tapsoob::Schema.dump_table(db, table_name, @opts.slice(:indexes, :same_db))
|
|
187
208
|
log.debug "Table: #{table_name}\n#{schema_data}\n"
|
|
188
209
|
output = Tapsoob::Utils.export_schema(dump_path, table_name, schema_data)
|
|
189
210
|
puts output if dump_path.nil? && output
|
|
@@ -197,6 +218,14 @@ module Tapsoob
|
|
|
197
218
|
|
|
198
219
|
log.info "#{tables.size} tables, #{format_number(record_count)} records"
|
|
199
220
|
|
|
221
|
+
if parallel?
|
|
222
|
+
pull_data_parallel
|
|
223
|
+
else
|
|
224
|
+
pull_data_serial
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
def pull_data_serial
|
|
200
229
|
tables.each do |table_name, count|
|
|
201
230
|
stream = Tapsoob::DataStream.factory(db, {
|
|
202
231
|
:chunksize => default_chunksize,
|
|
@@ -208,6 +237,38 @@ module Tapsoob
|
|
|
208
237
|
end
|
|
209
238
|
end
|
|
210
239
|
|
|
240
|
+
def pull_data_parallel
|
|
241
|
+
log.info "Using #{parallel_workers} parallel workers"
|
|
242
|
+
|
|
243
|
+
multi_progress = opts[:progress] ? MultiProgressBar.new(parallel_workers) : nil
|
|
244
|
+
table_queue = Queue.new
|
|
245
|
+
tables.each { |table_name, count| table_queue << [table_name, count] }
|
|
246
|
+
|
|
247
|
+
workers = (1..parallel_workers).map do
|
|
248
|
+
Thread.new do
|
|
249
|
+
loop do
|
|
250
|
+
break if table_queue.empty?
|
|
251
|
+
|
|
252
|
+
table_name, count = table_queue.pop(true) rescue break
|
|
253
|
+
|
|
254
|
+
# Each thread gets its own connection from the pool
|
|
255
|
+
stream = Tapsoob::DataStream.factory(db, {
|
|
256
|
+
:chunksize => default_chunksize,
|
|
257
|
+
:table_name => table_name
|
|
258
|
+
}, { :debug => opts[:debug] })
|
|
259
|
+
|
|
260
|
+
estimated_chunks = [(count.to_f / default_chunksize).ceil, 1].max
|
|
261
|
+
progress = multi_progress ? multi_progress.create_bar(table_name.to_s, estimated_chunks) : nil
|
|
262
|
+
|
|
263
|
+
pull_data_from_table(stream, progress)
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
workers.each(&:join)
|
|
269
|
+
multi_progress.stop if multi_progress
|
|
270
|
+
end
|
|
271
|
+
|
|
211
272
|
def pull_partial_data
|
|
212
273
|
return if stream_state == {}
|
|
213
274
|
|
|
@@ -279,7 +340,7 @@ module Tapsoob
|
|
|
279
340
|
end
|
|
280
341
|
|
|
281
342
|
progress.finish if progress
|
|
282
|
-
|
|
343
|
+
add_completed_table(stream.table_name)
|
|
283
344
|
self.stream_state = {}
|
|
284
345
|
end
|
|
285
346
|
|
|
@@ -332,9 +393,12 @@ module Tapsoob
|
|
|
332
393
|
raw_idxs = Tapsoob::Schema.indexes_individual(database_url)
|
|
333
394
|
idxs = (raw_idxs && raw_idxs.length >= 2 ? JSON.parse(raw_idxs) : {})
|
|
334
395
|
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
396
|
+
# Calculate max title width for consistent alignment
|
|
397
|
+
filtered_idxs = apply_table_filter(idxs).select { |table, indexes| indexes.size > 0 }
|
|
398
|
+
max_title_width = filtered_idxs.keys.map { |table| "#{table} indexes".length }.max || 14
|
|
399
|
+
|
|
400
|
+
filtered_idxs.each do |table, indexes|
|
|
401
|
+
progress = ProgressBar.new("#{table} indexes", indexes.size, STDOUT, max_title_width)
|
|
338
402
|
indexes.each do |idx|
|
|
339
403
|
output = Tapsoob::Utils.export_indexes(dump_path, table, idx)
|
|
340
404
|
puts output if dump_path.nil? && output
|
|
@@ -388,9 +452,12 @@ module Tapsoob
|
|
|
388
452
|
|
|
389
453
|
log.info "Sending indexes"
|
|
390
454
|
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
455
|
+
# Calculate max title width for consistent alignment
|
|
456
|
+
filtered_idxs = apply_table_filter(idxs).select { |table, indexes| indexes.size > 0 }
|
|
457
|
+
max_title_width = filtered_idxs.keys.map { |table| "#{table} indexes".length }.max || 14
|
|
458
|
+
|
|
459
|
+
filtered_idxs.each do |table, indexes|
|
|
460
|
+
progress = ProgressBar.new("#{table} indexes", indexes.size, STDOUT, max_title_width)
|
|
394
461
|
indexes.each do |idx|
|
|
395
462
|
Tapsoob::Utils.load_indexes(database_url, idx)
|
|
396
463
|
progress.inc(1)
|
|
@@ -405,7 +472,8 @@ module Tapsoob
|
|
|
405
472
|
progress = ProgressBar.new('Schema', tables.size)
|
|
406
473
|
tables.each do |table, count|
|
|
407
474
|
log.debug "Loading '#{table}' schema\n"
|
|
408
|
-
|
|
475
|
+
# Reuse existing db connection for better performance
|
|
476
|
+
Tapsoob::Utils.load_schema(dump_path, db, table)
|
|
409
477
|
progress.inc(1)
|
|
410
478
|
end
|
|
411
479
|
progress.finish
|
|
@@ -435,6 +503,14 @@ module Tapsoob
|
|
|
435
503
|
|
|
436
504
|
log.info "#{tables.size} tables, #{format_number(record_count)} records"
|
|
437
505
|
|
|
506
|
+
if parallel?
|
|
507
|
+
push_data_parallel
|
|
508
|
+
else
|
|
509
|
+
push_data_serial
|
|
510
|
+
end
|
|
511
|
+
end
|
|
512
|
+
|
|
513
|
+
def push_data_serial
|
|
438
514
|
tables.each do |table_name, count|
|
|
439
515
|
# Skip if data file doesn't exist or has no data
|
|
440
516
|
data_file = File.join(dump_path, "data", "#{table_name}.json")
|
|
@@ -455,6 +531,49 @@ module Tapsoob
|
|
|
455
531
|
end
|
|
456
532
|
end
|
|
457
533
|
|
|
534
|
+
def push_data_parallel
|
|
535
|
+
log.info "Using #{parallel_workers} parallel workers"
|
|
536
|
+
|
|
537
|
+
multi_progress = opts[:progress] ? MultiProgressBar.new(parallel_workers) : nil
|
|
538
|
+
table_queue = Queue.new
|
|
539
|
+
|
|
540
|
+
tables.each do |table_name, count|
|
|
541
|
+
data_file = File.join(dump_path, "data", "#{table_name}.json")
|
|
542
|
+
next unless File.exist?(data_file) && count > 0
|
|
543
|
+
table_queue << [table_name, count]
|
|
544
|
+
end
|
|
545
|
+
|
|
546
|
+
workers = (1..parallel_workers).map do
|
|
547
|
+
Thread.new do
|
|
548
|
+
loop do
|
|
549
|
+
break if table_queue.empty?
|
|
550
|
+
|
|
551
|
+
table_name, count = table_queue.pop(true) rescue break
|
|
552
|
+
|
|
553
|
+
# Each thread gets its own connection from the pool
|
|
554
|
+
db[table_name.to_sym].truncate if @opts[:purge]
|
|
555
|
+
stream = Tapsoob::DataStream.factory(db, {
|
|
556
|
+
:table_name => table_name,
|
|
557
|
+
:chunksize => default_chunksize
|
|
558
|
+
}, {
|
|
559
|
+
:"skip-duplicates" => opts[:"skip-duplicates"] || false,
|
|
560
|
+
:"discard-identity" => opts[:"discard-identity"] || false,
|
|
561
|
+
:purge => opts[:purge] || false,
|
|
562
|
+
:debug => opts[:debug]
|
|
563
|
+
})
|
|
564
|
+
|
|
565
|
+
estimated_chunks = [(count.to_f / default_chunksize).ceil, 1].max
|
|
566
|
+
progress = multi_progress ? multi_progress.create_bar(table_name.to_s, estimated_chunks) : nil
|
|
567
|
+
|
|
568
|
+
push_data_from_file(stream, progress)
|
|
569
|
+
end
|
|
570
|
+
end
|
|
571
|
+
end
|
|
572
|
+
|
|
573
|
+
workers.each(&:join)
|
|
574
|
+
multi_progress.stop if multi_progress
|
|
575
|
+
end
|
|
576
|
+
|
|
458
577
|
def push_data_from_file(stream, progress)
|
|
459
578
|
loop do
|
|
460
579
|
if exiting?
|
|
@@ -508,7 +627,7 @@ module Tapsoob
|
|
|
508
627
|
end
|
|
509
628
|
|
|
510
629
|
progress.finish if progress
|
|
511
|
-
|
|
630
|
+
add_completed_table(stream.table_name)
|
|
512
631
|
self.stream_state = {}
|
|
513
632
|
end
|
|
514
633
|
|
data/lib/tapsoob/progress_bar.rb
CHANGED
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
class ProgressBar
|
|
14
14
|
VERSION = "0.9"
|
|
15
15
|
|
|
16
|
-
def initialize (title, total, out = STDOUT)
|
|
16
|
+
def initialize (title, total, out = STDOUT, title_width = nil)
|
|
17
17
|
@title = title
|
|
18
18
|
@total = total
|
|
19
19
|
@out = out
|
|
@@ -24,7 +24,8 @@ class ProgressBar
|
|
|
24
24
|
@finished_p = false
|
|
25
25
|
@start_time = ::Time.now
|
|
26
26
|
@previous_time = @start_time
|
|
27
|
-
|
|
27
|
+
# Set title width: use provided width, or accommodate the title, with a minimum of 14
|
|
28
|
+
@title_width = title_width || [title.length, 14].max
|
|
28
29
|
@format = "%-#{@title_width}s %3d%% %s %s"
|
|
29
30
|
@format_arguments = [:title, :percentage, :bar, :stat]
|
|
30
31
|
clear
|
data/lib/tapsoob/schema.rb
CHANGED
|
@@ -31,9 +31,11 @@ END_MIG
|
|
|
31
31
|
template.result(binding)
|
|
32
32
|
end
|
|
33
33
|
|
|
34
|
-
def dump_table(
|
|
34
|
+
def dump_table(database_url_or_db, table, options)
|
|
35
35
|
table = table.to_sym
|
|
36
|
-
|
|
36
|
+
# Accept either a database URL or an existing connection object
|
|
37
|
+
if database_url_or_db.is_a?(Sequel::Database)
|
|
38
|
+
db = database_url_or_db
|
|
37
39
|
db.extension :schema_dumper
|
|
38
40
|
<<END_MIG
|
|
39
41
|
Class.new(Sequel::Migration) do
|
|
@@ -46,6 +48,21 @@ Class.new(Sequel::Migration) do
|
|
|
46
48
|
end
|
|
47
49
|
end
|
|
48
50
|
END_MIG
|
|
51
|
+
else
|
|
52
|
+
Sequel.connect(database_url_or_db) do |db|
|
|
53
|
+
db.extension :schema_dumper
|
|
54
|
+
<<END_MIG
|
|
55
|
+
Class.new(Sequel::Migration) do
|
|
56
|
+
def up
|
|
57
|
+
#{db.dump_table_schema(table, options)}
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def down
|
|
61
|
+
drop_table("#{table}", if_exists: true)
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
END_MIG
|
|
65
|
+
end
|
|
49
66
|
end
|
|
50
67
|
end
|
|
51
68
|
|
|
@@ -86,8 +103,10 @@ END_MIG
|
|
|
86
103
|
JSON.generate(idxs)
|
|
87
104
|
end
|
|
88
105
|
|
|
89
|
-
def load(
|
|
90
|
-
|
|
106
|
+
def load(database_url_or_db, schema, options = { drop: false })
|
|
107
|
+
# Accept either a database URL or an existing connection object
|
|
108
|
+
if database_url_or_db.is_a?(Sequel::Database)
|
|
109
|
+
db = database_url_or_db
|
|
91
110
|
db.extension :schema_dumper
|
|
92
111
|
klass = eval(schema)
|
|
93
112
|
if options[:drop]
|
|
@@ -101,6 +120,22 @@ END_MIG
|
|
|
101
120
|
db.run("SET foreign_key_checks = 1") if [:mysql, :mysql2].include?(db.adapter_scheme)
|
|
102
121
|
end
|
|
103
122
|
klass.apply(db, :up)
|
|
123
|
+
else
|
|
124
|
+
Sequel.connect(database_url_or_db) do |db|
|
|
125
|
+
db.extension :schema_dumper
|
|
126
|
+
klass = eval(schema)
|
|
127
|
+
if options[:drop]
|
|
128
|
+
# Start special hack for MySQL
|
|
129
|
+
db.run("SET foreign_key_checks = 0") if [:mysql, :mysql2].include?(db.adapter_scheme)
|
|
130
|
+
|
|
131
|
+
# Run down migration
|
|
132
|
+
klass.apply(db, :down)
|
|
133
|
+
|
|
134
|
+
# End special hack for MySQL
|
|
135
|
+
db.run("SET foreign_key_checks = 1") if [:mysql, :mysql2].include?(db.adapter_scheme)
|
|
136
|
+
end
|
|
137
|
+
klass.apply(db, :up)
|
|
138
|
+
end
|
|
104
139
|
end
|
|
105
140
|
end
|
|
106
141
|
|
data/lib/tapsoob/utils.rb
CHANGED
|
@@ -161,9 +161,16 @@ Data : #{data}
|
|
|
161
161
|
end
|
|
162
162
|
end
|
|
163
163
|
|
|
164
|
-
def load_schema(dump_path,
|
|
165
|
-
|
|
166
|
-
|
|
164
|
+
def load_schema(dump_path, database_url_or_db, table)
|
|
165
|
+
schema_file = File.join(dump_path, "schemas", "#{table}.rb")
|
|
166
|
+
schema_content = File.read(schema_file)
|
|
167
|
+
|
|
168
|
+
# If we have a connection object, use it directly for better performance
|
|
169
|
+
if database_url_or_db.is_a?(Sequel::Database)
|
|
170
|
+
Tapsoob::Schema.load(database_url_or_db, schema_content)
|
|
171
|
+
else
|
|
172
|
+
schema_bin(:load, database_url_or_db, schema_file)
|
|
173
|
+
end
|
|
167
174
|
end
|
|
168
175
|
|
|
169
176
|
def load_indexes(database_url, index)
|
data/lib/tapsoob/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: tapsoob
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.6.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Félix Bellanger
|
|
@@ -104,6 +104,7 @@ files:
|
|
|
104
104
|
- lib/tapsoob/data_stream.rb
|
|
105
105
|
- lib/tapsoob/errors.rb
|
|
106
106
|
- lib/tapsoob/log.rb
|
|
107
|
+
- lib/tapsoob/multi_progress_bar.rb
|
|
107
108
|
- lib/tapsoob/operation.rb
|
|
108
109
|
- lib/tapsoob/progress_bar.rb
|
|
109
110
|
- lib/tapsoob/railtie.rb
|