tapsoob 0.5.31 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bff0c57641151d932d224ec7b245a23c4b993eac3c8da25f10724208b46f5953
4
- data.tar.gz: 186003a5f0156be1ebaac8f80ef44a3f985b23544a33a1e127067b776cf50d88
3
+ metadata.gz: 682ebd67a432169d8f72eb47412aaface0df4ba058be6849fba6e91ab96888c1
4
+ data.tar.gz: 3aba7ecf086fc98b21a9c2c6b2e9ae3413fd71d98106de415a3e3223732b0947
5
5
  SHA512:
6
- metadata.gz: c78e9d29d6247823deae5f9f1a86c59792b4e52c34f4d1536cdd16bc6168a2d38b3bfba434ec96b2be72d633e927f672578e70904387435a4152bc212ac544d5
7
- data.tar.gz: 30a3507296d49349a3049fc2e24b61cbc98f49fdd0c38ce05f1714aefa0a598eed832ac5826c213f982b549708b566cc263053acf085b86ecc9fa7beb4ac6837
6
+ metadata.gz: d3b49e8f114d4c2b693a524b60ee3959b36cdddb3dd7e2f6378a9f72a6f86a1ca44ab26b609cf4a4d88747813fffdd233a70efa34465df613ceea0af18af6aa2
7
+ data.tar.gz: 9793f681463d9f92ef4eae1015ed11b35d351a7f5938b2ad79e5d3cc67793fc421a02e195536957a56acde9b8d8cc81ab429a5a0852ae6b2d2eddcf38445222b
@@ -22,6 +22,7 @@ module Tapsoob
22
22
  option :"exclude-tables", desc: "Shortcut to exclude a list of tables", type: :array, aliases: "-e"
23
23
  option :"indexes", type: :boolean, default: false
24
24
  option :"same-db", type: :boolean, default: false
25
+ option :parallel, desc: "Number of parallel workers for table processing (default: 1)", default: 1, type: :numeric, aliases: "-j"
25
26
  option :progress, desc: "Show progress", default: true, type: :boolean
26
27
  option :debug, desc: "Enable debug messages", default: false, type: :boolean
27
28
  def pull(dump_path, database_url)
@@ -47,6 +48,7 @@ module Tapsoob
47
48
  option :purge, desc: "Purge data in tables prior to performing the import", default: false, type: :boolean, aliases: "-p"
48
49
  option :"skip-duplicates", desc: "Remove duplicates when loading data", default: false, type: :boolean
49
50
  option :"discard-identity", desc: "Remove identity when pushing data (may result in creating duplicates)", default: false, type: :boolean
51
+ option :parallel, desc: "Number of parallel workers for table processing (default: 1)", default: 1, type: :numeric, aliases: "-j"
50
52
  option :progress, desc: "Show progress", default: true, type: :boolean
51
53
  option :debug, desc: "Enable debug messages", default: false, type: :boolean
52
54
  def push(dump_path, database_url)
@@ -82,6 +84,7 @@ module Tapsoob
82
84
  indexes_first: options[:"indexes_first"],
83
85
  disable_compression: options[:"disable-compression"],
84
86
  tables: options[:tables],
87
+ parallel: options[:parallel],
85
88
  progress: options[:progress],
86
89
  debug: options[:debug]
87
90
  })
@@ -0,0 +1,234 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require 'tapsoob/progress_bar'
3
+
4
+ # MultiProgressBar manages multiple progress bars in parallel
5
+ # Each bar gets its own line in the terminal
6
+ class MultiProgressBar
7
+ def initialize(max_bars = 4)
8
+ @max_bars = max_bars
9
+ @bars = []
10
+ @mutex = Mutex.new
11
+ @active = true
12
+ @out = STDOUT
13
+ @last_update = Time.now
14
+ @reserved_lines = 0 # Track how many lines we've actually reserved
15
+ @max_title_width = 14 # Minimum width, will grow with longer titles
16
+ end
17
+
18
+ # Create a new progress bar and return it
19
+ def create_bar(title, total)
20
+ @mutex.synchronize do
21
+ # Remove any existing bar with the same title to prevent duplicates
22
+ @bars.reject! { |b| b.title == title }
23
+
24
+ # Update max title width to accommodate longer titles
25
+ @max_title_width = [@max_title_width, title.length].max
26
+
27
+ bar = ThreadSafeProgressBar.new(title, total, self)
28
+
29
+ # Reserve a line for this new bar during active updates
30
+ # Cap at 2 * max_bars to show active workers + some recent finished bars
31
+ if @reserved_lines < @max_bars * 2
32
+ @out.print "\n"
33
+ @out.flush
34
+ @reserved_lines += 1
35
+ end
36
+
37
+ @bars << bar
38
+ bar
39
+ end
40
+ end
41
+
42
+ # Get the current maximum title width for alignment
43
+ # Note: Always called from within synchronized methods, so no mutex needed
44
+ def max_title_width
45
+ @max_title_width
46
+ end
47
+
48
+ # Called by individual bars when they update
49
+ def update
50
+ @mutex.synchronize do
51
+ return unless @active
52
+ return unless should_redraw?
53
+
54
+ @last_update = Time.now
55
+ redraw_all
56
+ end
57
+ end
58
+
59
+ # Finish a specific bar - mark it as completed
60
+ def finish_bar(bar)
61
+ @mutex.synchronize do
62
+ return unless @active
63
+
64
+ bar.mark_finished
65
+
66
+ # Respect throttle when finishing to avoid spamming redraws
67
+ if should_redraw?
68
+ @last_update = Time.now
69
+ redraw_all
70
+ end
71
+ # If throttled, the next regular update will show the finished state
72
+ end
73
+ end
74
+
75
+ # Stop all progress bars and keep them visible
76
+ def stop
77
+ @mutex.synchronize do
78
+ @active = false
79
+
80
+ # Final cleanup: remove any duplicate titles (keep the last occurrence of each unique title)
81
+ @bars = @bars.reverse.uniq { |bar| bar.title }.reverse
82
+
83
+ # Final redraw to show completed state (skip active check)
84
+ redraw_all(true)
85
+ # Move cursor past all bars
86
+ @out.print "\n"
87
+ @out.flush
88
+ end
89
+ end
90
+
91
+ private
92
+
93
+ # Check if enough time has passed to redraw (throttle to 10 updates/sec)
94
+ def should_redraw?
95
+ Time.now - @last_update >= 0.1
96
+ end
97
+
98
+ def redraw_all(force = false)
99
+ return unless force || @active
100
+ return if @bars.empty?
101
+
102
+ if force && !@active
103
+ render_final_display
104
+ else
105
+ render_active_display
106
+ end
107
+ end
108
+
109
+ # Final display: show all completed bars
110
+ def render_final_display
111
+ # Clear the reserved lines first
112
+ if @reserved_lines > 0
113
+ @out.print "\r\e[#{@reserved_lines}A"
114
+ @reserved_lines.times { @out.print "\r\e[K\n" }
115
+ end
116
+
117
+ # Print all bars (adds new lines as needed)
118
+ @bars.each do |bar|
119
+ @out.print "\r\e[K"
120
+ bar.render_to(@out)
121
+ @out.print "\n"
122
+ end
123
+
124
+ @out.flush
125
+ end
126
+
127
+ # Normal operation: show active bars + recent finished in reserved space
128
+ def render_active_display
129
+ return if @reserved_lines == 0
130
+
131
+ # Partition bars in a single pass for efficiency
132
+ active_bars, finished_bars = @bars.partition { |b| !b.finished? }
133
+
134
+ # Build display: active bars first, then recent finished to fill remaining space
135
+ # Ensure we don't request negative count from .last()
136
+ remaining_space = [@reserved_lines - active_bars.length, 0].max
137
+ bars_to_draw = active_bars + finished_bars.last(remaining_space)
138
+
139
+ # If we have more bars than reserved lines, show only the most recent
140
+ bars_to_draw = bars_to_draw.last(@reserved_lines) if bars_to_draw.length > @reserved_lines
141
+
142
+ # Move up and redraw in reserved space
143
+ @out.print "\r\e[#{@reserved_lines}A"
144
+ @reserved_lines.times do |i|
145
+ @out.print "\r\e[K"
146
+ bars_to_draw[i].render_to(@out) if i < bars_to_draw.length
147
+ @out.print "\n"
148
+ end
149
+
150
+ @out.flush
151
+ end
152
+ end
153
+
154
+ # Thread-safe progress bar that reports to a MultiProgressBar
155
+ class ThreadSafeProgressBar < ProgressBar
156
+ attr_reader :title
157
+
158
+ def initialize(title, total, multi_progress_bar)
159
+ @multi_progress_bar = multi_progress_bar
160
+ @out = STDOUT # Need this for get_width to work
161
+ # Don't call parent initialize, we'll manage output ourselves
162
+ @title = title
163
+ @total = total
164
+ @terminal_width = 80
165
+ @bar_mark = "="
166
+ @current = 0
167
+ @previous = 0
168
+ @finished_p = false
169
+ @start_time = ::Time.now
170
+ @previous_time = @start_time
171
+ @format_arguments = [:title, :percentage, :bar, :stat]
172
+ end
173
+
174
+ # Override show to notify multi-progress instead of direct output
175
+ def show
176
+ @previous_time = ::Time.now # Update to prevent time-based refresh spam
177
+ @multi_progress_bar.update
178
+ end
179
+
180
+ # Render this bar to the given output stream
181
+ def render_to(out)
182
+ # Get dynamic title width from MultiProgressBar for consistent alignment
183
+ # Store as instance variable so parent class fmt_* methods can use it
184
+ @title_width = @multi_progress_bar.max_title_width
185
+
186
+ # Recalculate terminal width to handle resizes and use full width
187
+ width = get_width
188
+ # Calculate bar width: total_width - fixed_elements - padding
189
+ # Fixed: title(variable) + " "(1) + percentage(4) + " "(1) + "|"(1) + "|"(1) + " "(1) + timer(15) = title_width + 25
190
+ # Padding: +3 for timer fluctuations and safety
191
+ fixed_chars = @title_width + 28
192
+ @terminal_width = [width - fixed_chars, 20].max
193
+
194
+ # Build format string with dynamic title width
195
+ format = "%-#{@title_width}s %3d%% %s %s"
196
+ arguments = @format_arguments.map { |method| send("fmt_#{method}") }
197
+ line = sprintf(format, *arguments)
198
+
199
+ # Ensure line doesn't exceed terminal width to prevent wrapping
200
+ # Leave 2 chars margin for safety
201
+ line = line[0, width - 2] if line.length > width - 2
202
+
203
+ out.print(line)
204
+ end
205
+
206
+ # Override clear to do nothing (managed by MultiProgressBar)
207
+ def clear
208
+ # no-op
209
+ end
210
+
211
+ # Mark this bar as finished (for tracking)
212
+ def mark_finished
213
+ @finished_p = true
214
+ end
215
+
216
+ # Override to use the same @finished_p flag
217
+ def finished?
218
+ @finished_p
219
+ end
220
+
221
+ # Override finish to notify multi-progress
222
+ def finish
223
+ @current = @total
224
+ @multi_progress_bar.finish_bar(self)
225
+ end
226
+
227
+ # Override inc to check if we need to update
228
+ def inc(step = 1)
229
+ @current += step
230
+ @current = @total if @current > @total
231
+ show_if_needed
232
+ @previous = @current
233
+ end
234
+ end
@@ -1,9 +1,11 @@
1
1
  # -*- encoding : utf-8 -*-
2
2
  require 'sequel'
3
+ require 'thread'
3
4
 
4
5
  require 'tapsoob/data_stream'
5
6
  require 'tapsoob/log'
6
7
  require 'tapsoob/progress_bar'
8
+ require 'tapsoob/multi_progress_bar'
7
9
  require 'tapsoob/schema'
8
10
 
9
11
  module Tapsoob
@@ -117,7 +119,7 @@ module Tapsoob
117
119
  end
118
120
 
119
121
  def db
120
- @db ||= Sequel.connect(database_url)
122
+ @db ||= Sequel.connect(database_url, max_connections: parallel_workers * 2)
121
123
  @db.extension :schema_dumper
122
124
  @db.loggers << Tapsoob.log if opts[:debug]
123
125
 
@@ -130,6 +132,24 @@ module Tapsoob
130
132
  @db
131
133
  end
132
134
 
135
+ def parallel?
136
+ parallel_workers > 1
137
+ end
138
+
139
+ def parallel_workers
140
+ @parallel_workers ||= [opts[:parallel].to_i, 1].max
141
+ end
142
+
143
+ def completed_tables_mutex
144
+ @completed_tables_mutex ||= Mutex.new
145
+ end
146
+
147
+ def add_completed_table(table_name)
148
+ completed_tables_mutex.synchronize do
149
+ completed_tables << table_name.to_s
150
+ end
151
+ end
152
+
133
153
  def format_number(num)
134
154
  num.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
135
155
  end
@@ -198,6 +218,14 @@ module Tapsoob
198
218
 
199
219
  log.info "#{tables.size} tables, #{format_number(record_count)} records"
200
220
 
221
+ if parallel?
222
+ pull_data_parallel
223
+ else
224
+ pull_data_serial
225
+ end
226
+ end
227
+
228
+ def pull_data_serial
201
229
  tables.each do |table_name, count|
202
230
  stream = Tapsoob::DataStream.factory(db, {
203
231
  :chunksize => default_chunksize,
@@ -209,6 +237,38 @@ module Tapsoob
209
237
  end
210
238
  end
211
239
 
240
+ def pull_data_parallel
241
+ log.info "Using #{parallel_workers} parallel workers"
242
+
243
+ multi_progress = opts[:progress] ? MultiProgressBar.new(parallel_workers) : nil
244
+ table_queue = Queue.new
245
+ tables.each { |table_name, count| table_queue << [table_name, count] }
246
+
247
+ workers = (1..parallel_workers).map do
248
+ Thread.new do
249
+ loop do
250
+ break if table_queue.empty?
251
+
252
+ table_name, count = table_queue.pop(true) rescue break
253
+
254
+ # Each thread gets its own connection from the pool
255
+ stream = Tapsoob::DataStream.factory(db, {
256
+ :chunksize => default_chunksize,
257
+ :table_name => table_name
258
+ }, { :debug => opts[:debug] })
259
+
260
+ estimated_chunks = [(count.to_f / default_chunksize).ceil, 1].max
261
+ progress = multi_progress ? multi_progress.create_bar(table_name.to_s, estimated_chunks) : nil
262
+
263
+ pull_data_from_table(stream, progress)
264
+ end
265
+ end
266
+ end
267
+
268
+ workers.each(&:join)
269
+ multi_progress.stop if multi_progress
270
+ end
271
+
212
272
  def pull_partial_data
213
273
  return if stream_state == {}
214
274
 
@@ -280,7 +340,7 @@ module Tapsoob
280
340
  end
281
341
 
282
342
  progress.finish if progress
283
- completed_tables << stream.table_name.to_s
343
+ add_completed_table(stream.table_name)
284
344
  self.stream_state = {}
285
345
  end
286
346
 
@@ -333,9 +393,12 @@ module Tapsoob
333
393
  raw_idxs = Tapsoob::Schema.indexes_individual(database_url)
334
394
  idxs = (raw_idxs && raw_idxs.length >= 2 ? JSON.parse(raw_idxs) : {})
335
395
 
336
- apply_table_filter(idxs).each do |table, indexes|
337
- next unless indexes.size > 0
338
- progress = ProgressBar.new(table, indexes.size)
396
+ # Calculate max title width for consistent alignment
397
+ filtered_idxs = apply_table_filter(idxs).select { |table, indexes| indexes.size > 0 }
398
+ max_title_width = filtered_idxs.keys.map { |table| "#{table} indexes".length }.max || 14
399
+
400
+ filtered_idxs.each do |table, indexes|
401
+ progress = ProgressBar.new("#{table} indexes", indexes.size, STDOUT, max_title_width)
339
402
  indexes.each do |idx|
340
403
  output = Tapsoob::Utils.export_indexes(dump_path, table, idx)
341
404
  puts output if dump_path.nil? && output
@@ -389,9 +452,12 @@ module Tapsoob
389
452
 
390
453
  log.info "Sending indexes"
391
454
 
392
- apply_table_filter(idxs).each do |table, indexes|
393
- next unless indexes.size > 0
394
- progress = ProgressBar.new(table, indexes.size)
455
+ # Calculate max title width for consistent alignment
456
+ filtered_idxs = apply_table_filter(idxs).select { |table, indexes| indexes.size > 0 }
457
+ max_title_width = filtered_idxs.keys.map { |table| "#{table} indexes".length }.max || 14
458
+
459
+ filtered_idxs.each do |table, indexes|
460
+ progress = ProgressBar.new("#{table} indexes", indexes.size, STDOUT, max_title_width)
395
461
  indexes.each do |idx|
396
462
  Tapsoob::Utils.load_indexes(database_url, idx)
397
463
  progress.inc(1)
@@ -437,6 +503,14 @@ module Tapsoob
437
503
 
438
504
  log.info "#{tables.size} tables, #{format_number(record_count)} records"
439
505
 
506
+ if parallel?
507
+ push_data_parallel
508
+ else
509
+ push_data_serial
510
+ end
511
+ end
512
+
513
+ def push_data_serial
440
514
  tables.each do |table_name, count|
441
515
  # Skip if data file doesn't exist or has no data
442
516
  data_file = File.join(dump_path, "data", "#{table_name}.json")
@@ -457,6 +531,49 @@ module Tapsoob
457
531
  end
458
532
  end
459
533
 
534
+ def push_data_parallel
535
+ log.info "Using #{parallel_workers} parallel workers"
536
+
537
+ multi_progress = opts[:progress] ? MultiProgressBar.new(parallel_workers) : nil
538
+ table_queue = Queue.new
539
+
540
+ tables.each do |table_name, count|
541
+ data_file = File.join(dump_path, "data", "#{table_name}.json")
542
+ next unless File.exist?(data_file) && count > 0
543
+ table_queue << [table_name, count]
544
+ end
545
+
546
+ workers = (1..parallel_workers).map do
547
+ Thread.new do
548
+ loop do
549
+ break if table_queue.empty?
550
+
551
+ table_name, count = table_queue.pop(true) rescue break
552
+
553
+ # Each thread gets its own connection from the pool
554
+ db[table_name.to_sym].truncate if @opts[:purge]
555
+ stream = Tapsoob::DataStream.factory(db, {
556
+ :table_name => table_name,
557
+ :chunksize => default_chunksize
558
+ }, {
559
+ :"skip-duplicates" => opts[:"skip-duplicates"] || false,
560
+ :"discard-identity" => opts[:"discard-identity"] || false,
561
+ :purge => opts[:purge] || false,
562
+ :debug => opts[:debug]
563
+ })
564
+
565
+ estimated_chunks = [(count.to_f / default_chunksize).ceil, 1].max
566
+ progress = multi_progress ? multi_progress.create_bar(table_name.to_s, estimated_chunks) : nil
567
+
568
+ push_data_from_file(stream, progress)
569
+ end
570
+ end
571
+ end
572
+
573
+ workers.each(&:join)
574
+ multi_progress.stop if multi_progress
575
+ end
576
+
460
577
  def push_data_from_file(stream, progress)
461
578
  loop do
462
579
  if exiting?
@@ -510,7 +627,7 @@ module Tapsoob
510
627
  end
511
628
 
512
629
  progress.finish if progress
513
- completed_tables << stream.table_name.to_s
630
+ add_completed_table(stream.table_name)
514
631
  self.stream_state = {}
515
632
  end
516
633
 
@@ -13,7 +13,7 @@
13
13
  class ProgressBar
14
14
  VERSION = "0.9"
15
15
 
16
- def initialize (title, total, out = STDOUT)
16
+ def initialize (title, total, out = STDOUT, title_width = nil)
17
17
  @title = title
18
18
  @total = total
19
19
  @out = out
@@ -24,7 +24,8 @@ class ProgressBar
24
24
  @finished_p = false
25
25
  @start_time = ::Time.now
26
26
  @previous_time = @start_time
27
- @title_width = 14
27
+ # Set title width: use provided width, or accommodate the title, with a minimum of 14
28
+ @title_width = title_width || [title.length, 14].max
28
29
  @format = "%-#{@title_width}s %3d%% %s %s"
29
30
  @format_arguments = [:title, :percentage, :bar, :stat]
30
31
  clear
@@ -1,4 +1,4 @@
1
1
  # -*- encoding : utf-8 -*-
2
2
  module Tapsoob
3
- VERSION = "0.5.31".freeze
3
+ VERSION = "0.6.0".freeze
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tapsoob
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.31
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Félix Bellanger
@@ -104,6 +104,7 @@ files:
104
104
  - lib/tapsoob/data_stream.rb
105
105
  - lib/tapsoob/errors.rb
106
106
  - lib/tapsoob/log.rb
107
+ - lib/tapsoob/multi_progress_bar.rb
107
108
  - lib/tapsoob/operation.rb
108
109
  - lib/tapsoob/progress_bar.rb
109
110
  - lib/tapsoob/railtie.rb