tapsoob 0.3.26 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/tapsoob/cli/data_stream.rb +10 -3
- data/lib/tapsoob/cli/root.rb +2 -0
- data/lib/tapsoob/data_stream.rb +20 -20
- data/lib/tapsoob/operation.rb +14 -6
- data/lib/tapsoob/progress_bar.rb +6 -6
- data/lib/tapsoob/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7931ff4460451152355f05ac09c59013ca76b7afe46893620c1da7fdf30288fe
|
4
|
+
data.tar.gz: 0257a6e05c923148436d122f0c2592fd9551cbe23401989e18f720365469d11e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1251f19e8812daec0ed94c7cbd55d3a22beb3112c91cab288159ccd59fd0144b99888ed9b082df3a14182469f20cbc482656108db75c36c4859d1a889f4165ad
|
7
|
+
data.tar.gz: c440040cf75ea377161394335a32bc9f4bc9cb635775d048ae076efeec69af9cb499601c7f63280cb1d76ecc14d6948f7d2a3b32f746094dfed2aec06bfe4747
|
@@ -28,6 +28,9 @@ module Tapsoob
|
|
28
28
|
option :tables, desc: "Shortcut to filter on a list of tables", type: :array, aliases: "-t"
|
29
29
|
option :"exclude-tables", desc: "Shortcut to exclude a list of tables", type: :array, aliases: "-e"
|
30
30
|
option :progress, desc: "Show progress", default: true, type: :boolean, aliases: "-p"
|
31
|
+
option :purge, desc: "Purge data in tables prior to performing the import", default: false, type: :boolean, aliases: "-p"
|
32
|
+
option :"skip-duplicates", desc: "Remove duplicates when loading data", default: false, type: :boolean
|
33
|
+
option :"discard-identity", desc: "Remove identity when pushing data (may result in creating duplicates)", default: false, type: :boolean
|
31
34
|
option :debug, desc: "Enable debug messages", default: false, type: :boolean, aliases: "-d"
|
32
35
|
def push(database_url, dump_path = nil)
|
33
36
|
# instantiate stuff
|
@@ -46,9 +49,8 @@ module Tapsoob
|
|
46
49
|
data.each do |table|
|
47
50
|
stream = Tapsoob::DataStream.factory(db(database_url, opts), {
|
48
51
|
table_name: table[:table_name],
|
49
|
-
chunksize: opts[:default_chunksize]
|
50
|
-
|
51
|
-
})
|
52
|
+
chunksize: opts[:default_chunksize]
|
53
|
+
}, { :"discard-identity" => opts[:"discard-identity"] || false, :purge => opts[:purge] || false, :debug => opts[:debug] })
|
52
54
|
|
53
55
|
begin
|
54
56
|
stream.import_rows(table)
|
@@ -66,6 +68,11 @@ module Tapsoob
|
|
66
68
|
debug: options[:debug]
|
67
69
|
}
|
68
70
|
|
71
|
+
# Push only options
|
72
|
+
opts[:purge] = options[:purge] if options.key?(:purge)
|
73
|
+
opts[:"skip-duplicates"] = options[:"skip-duplicates"] if options.key?(:"skip-duplicates")
|
74
|
+
opts[:"discard-identity"] = options[:"discard-identity"] if options.key?(:"discard-identity")
|
75
|
+
|
69
76
|
# Default chunksize
|
70
77
|
if options[:chunksize]
|
71
78
|
opts[:default_chunksize] = (options[:chunksize] < 10 ? 10 : options[:chunksize])
|
data/lib/tapsoob/cli/root.rb
CHANGED
@@ -39,6 +39,7 @@ module Tapsoob
|
|
39
39
|
option :tables, desc: "Shortcut to filter on a list of tables", type: :array, aliases: "-t"
|
40
40
|
option :"exclude-tables", desc: "Shortcut to exclude a list of tables", type: :array, aliases: "-e"
|
41
41
|
option :purge, desc: "Purge data in tables prior to performing the import", default: false, type: :boolean, aliases: "-p"
|
42
|
+
option :"skip-duplicates", desc: "Remove duplicates when loading data", default: false, type: :boolean
|
42
43
|
option :"discard-identity", desc: "Remove identity when pushing data (may result in creating duplicates)", default: false, type: :boolean
|
43
44
|
option :debug, desc: "Enable debug messages", default: false, type: :boolean, aliases: "-d"
|
44
45
|
def push(dump_path, database_url)
|
@@ -74,6 +75,7 @@ module Tapsoob
|
|
74
75
|
|
75
76
|
# Push only options
|
76
77
|
opts[:purge] = options[:purge] if options.key?(:purge)
|
78
|
+
opts[:"skip-duplicates"] = options[:"skip-duplicates"] if options.key?(:"skip-duplicates")
|
77
79
|
opts[:"discard-identity"] = options[:"discard-identity"] if options.key?(:"discard-identity")
|
78
80
|
|
79
81
|
# Resume
|
data/lib/tapsoob/data_stream.rb
CHANGED
@@ -1,7 +1,4 @@
|
|
1
1
|
# -*- encoding : utf-8 -*-
|
2
|
-
require 'date'
|
3
|
-
require 'time'
|
4
|
-
|
5
2
|
require 'tapsoob/log'
|
6
3
|
require 'tapsoob/utils'
|
7
4
|
|
@@ -94,7 +91,8 @@ module Tapsoob
|
|
94
91
|
rows = {
|
95
92
|
:table_name => ds["table_name"],
|
96
93
|
:header => ds["header"],
|
97
|
-
:data => ds["data"][state[:offset], (state[:offset] + state[:chunksize])] || [ ]
|
94
|
+
:data => ((@options[:"skip-duplicates"] ? ds["data"].uniq : ds["data"])[state[:offset], (state[:offset] + state[:chunksize])] || [ ]),
|
95
|
+
:types => ds["types"]
|
98
96
|
}
|
99
97
|
update_chunksize_stats
|
100
98
|
rows
|
@@ -225,12 +223,24 @@ module Tapsoob
|
|
225
223
|
columns = rows[:header]
|
226
224
|
data = rows[:data]
|
227
225
|
|
226
|
+
# Only import existing columns
|
227
|
+
if table.columns.size != columns.size
|
228
|
+
existing_columns = table.columns.map(&:to_s)
|
229
|
+
additional_columns = columns - existing_columns
|
230
|
+
additional_columns_idxs = additional_columns.map { |c| columns.index(c) }
|
231
|
+
additional_columns_idxs.reverse.each do |idx|
|
232
|
+
columns.delete_at(idx)
|
233
|
+
rows[:types].delete_at(idx)
|
234
|
+
end
|
235
|
+
data.each_index { |didx| additional_columns_idxs.reverse.each { |idx| data[didx].delete_at(idx) } }
|
236
|
+
end
|
237
|
+
|
228
238
|
# Decode blobs
|
229
239
|
if rows.has_key?(:types) && rows[:types].include?("blob")
|
230
240
|
blob_indices = rows[:types].each_index.select { |idx| rows[:types][idx] == "blob" }
|
231
|
-
|
241
|
+
data.each_index do |idx|
|
232
242
|
blob_indices.each do |bi|
|
233
|
-
|
243
|
+
data[idx][bi] = Sequel::SQL::Blob.new(Tapsoob::Utils.base64decode(data[idx][bi])) unless data[idx][bi].nil?
|
234
244
|
end
|
235
245
|
end
|
236
246
|
end
|
@@ -239,20 +249,10 @@ module Tapsoob
|
|
239
249
|
if rows.has_key?(:types)
|
240
250
|
%w(date datetime time).each do |type|
|
241
251
|
if rows[:types].include?(type)
|
242
|
-
klass = case type
|
243
|
-
when "date"
|
244
|
-
Date
|
245
|
-
when "datetime"
|
246
|
-
DateTime
|
247
|
-
when "time"
|
248
|
-
Time
|
249
|
-
end
|
250
|
-
|
251
|
-
|
252
252
|
type_indices = rows[:types].each_index.select { |idx| rows[:types][idx] == type }
|
253
|
-
|
253
|
+
data.each_index do |idx|
|
254
254
|
type_indices.each do |ti|
|
255
|
-
|
255
|
+
data[idx][ti] = Sequel.send("string_to_#{type}".to_sym, data[idx][ti]) unless data[idx][ti].nil?
|
256
256
|
end
|
257
257
|
end
|
258
258
|
end
|
@@ -260,11 +260,11 @@ module Tapsoob
|
|
260
260
|
end
|
261
261
|
|
262
262
|
# Remove id column
|
263
|
-
if @options[:"discard-identity"]
|
263
|
+
if @options[:"discard-identity"] && rows[:header].include?("id")
|
264
264
|
columns = rows[:header] - ["id"]
|
265
265
|
data = data.map { |d| d[1..-1] }
|
266
266
|
end
|
267
|
-
|
267
|
+
|
268
268
|
table.import(columns, data, :commit_every => 100)
|
269
269
|
state[:offset] += rows[:data].size
|
270
270
|
rescue Exception => ex
|
data/lib/tapsoob/operation.rb
CHANGED
@@ -198,7 +198,7 @@ module Tapsoob
|
|
198
198
|
stream = Tapsoob::DataStream.factory(db, {
|
199
199
|
:chunksize => default_chunksize,
|
200
200
|
:table_name => table_name
|
201
|
-
})
|
201
|
+
}, { :debug => opts[:debug] })
|
202
202
|
pull_data_from_table(stream, progress)
|
203
203
|
end
|
204
204
|
end
|
@@ -227,8 +227,6 @@ module Tapsoob
|
|
227
227
|
Tapsoob::Utils.export_rows(dump_path, stream.table_name, rows)
|
228
228
|
end
|
229
229
|
end
|
230
|
-
break if stream.complete?
|
231
|
-
progress.inc(size) if progress && !exiting?
|
232
230
|
stream.error = false
|
233
231
|
self.stream_state = stream.to_hash
|
234
232
|
rescue Tapsoob::CorruptedData => e
|
@@ -236,6 +234,9 @@ module Tapsoob
|
|
236
234
|
stream.error = true
|
237
235
|
next
|
238
236
|
end
|
237
|
+
|
238
|
+
progress.inc(size) if progress && !exiting?
|
239
|
+
break if stream.complete?
|
239
240
|
end
|
240
241
|
|
241
242
|
progress.finish if progress
|
@@ -396,7 +397,13 @@ module Tapsoob
|
|
396
397
|
db[table_name.to_sym].truncate if @opts[:purge]
|
397
398
|
stream = Tapsoob::DataStream.factory(db, {
|
398
399
|
:table_name => table_name,
|
399
|
-
:chunksize => default_chunksize
|
400
|
+
:chunksize => default_chunksize
|
401
|
+
}, {
|
402
|
+
:"skip-duplicates" => opts[:"skip-duplicates"] || false,
|
403
|
+
:"discard-identity" => opts[:"discard-identity"] || false,
|
404
|
+
:purge => opts[:purge] || false,
|
405
|
+
:debug => opts[:debug]
|
406
|
+
})
|
400
407
|
progress = ProgressBar.new(table_name.to_s, count)
|
401
408
|
push_data_from_file(stream, progress)
|
402
409
|
end
|
@@ -429,7 +436,8 @@ module Tapsoob
|
|
429
436
|
}
|
430
437
|
end
|
431
438
|
|
432
|
-
|
439
|
+
row_size = stream.fetch_data_in_database({ :encoded_data => encoded_data, :checksum => data[:checksum] })
|
440
|
+
log.debug "row size: #{row_size}"
|
433
441
|
self.stream_state = stream.to_hash
|
434
442
|
|
435
443
|
c.idle_secs = (d1 + d2)
|
@@ -481,7 +489,7 @@ module Tapsoob
|
|
481
489
|
tbls.each do |table|
|
482
490
|
if File.exists?(File.join(dump_path, "data", "#{table}.json"))
|
483
491
|
data = JSON.parse(File.read(File.join(dump_path, "data", "#{table}.json")))
|
484
|
-
tables_with_counts[table] = data.size
|
492
|
+
tables_with_counts[table] = data["data"].size
|
485
493
|
else
|
486
494
|
tables_with_counts[table] = 0
|
487
495
|
end
|
data/lib/tapsoob/progress_bar.rb
CHANGED
@@ -22,7 +22,7 @@ class ProgressBar
|
|
22
22
|
@current = 0
|
23
23
|
@previous = 0
|
24
24
|
@finished_p = false
|
25
|
-
@start_time = Time.now
|
25
|
+
@start_time = ::Time.now
|
26
26
|
@previous_time = @start_time
|
27
27
|
@title_width = 14
|
28
28
|
@format = "%-#{@title_width}s %3d%% %s %s"
|
@@ -76,7 +76,7 @@ class ProgressBar
|
|
76
76
|
end
|
77
77
|
|
78
78
|
def transfer_rate
|
79
|
-
bytes_per_second = @current.to_f / (Time.now - @start_time)
|
79
|
+
bytes_per_second = @current.to_f / (::Time.now - @start_time)
|
80
80
|
sprintf("%s/s", convert_bytes(bytes_per_second))
|
81
81
|
end
|
82
82
|
|
@@ -97,14 +97,14 @@ class ProgressBar
|
|
97
97
|
if @current == 0
|
98
98
|
"ETA: --:--:--"
|
99
99
|
else
|
100
|
-
elapsed = Time.now - @start_time
|
100
|
+
elapsed = ::Time.now - @start_time
|
101
101
|
eta = elapsed * @total / @current - elapsed;
|
102
102
|
sprintf("ETA: %s", format_time(eta))
|
103
103
|
end
|
104
104
|
end
|
105
105
|
|
106
106
|
def elapsed
|
107
|
-
elapsed = Time.now - @start_time
|
107
|
+
elapsed = ::Time.now - @start_time
|
108
108
|
sprintf("Time: %s", format_time(elapsed))
|
109
109
|
end
|
110
110
|
|
@@ -155,7 +155,7 @@ class ProgressBar
|
|
155
155
|
@terminal_width += width - line.length + 1
|
156
156
|
show
|
157
157
|
end
|
158
|
-
@previous_time = Time.now
|
158
|
+
@previous_time = ::Time.now
|
159
159
|
end
|
160
160
|
|
161
161
|
def show_if_needed
|
@@ -169,7 +169,7 @@ class ProgressBar
|
|
169
169
|
|
170
170
|
# Use "!=" instead of ">" to support negative changes
|
171
171
|
if cur_percentage != prev_percentage ||
|
172
|
-
Time.now - @previous_time >= 1 || @finished_p
|
172
|
+
::Time.now - @previous_time >= 1 || @finished_p
|
173
173
|
show
|
174
174
|
end
|
175
175
|
end
|
data/lib/tapsoob/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tapsoob
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3
|
4
|
+
version: 0.4.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Félix Bellanger
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2021-06-
|
12
|
+
date: 2021-06-30 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ripl
|