tapsoob 0.3.27 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/tapsoob/cli/data_stream.rb +10 -3
- data/lib/tapsoob/cli/root.rb +2 -0
- data/lib/tapsoob/data_stream.rb +21 -8
- data/lib/tapsoob/operation.rb +14 -6
- data/lib/tapsoob/progress_bar.rb +6 -6
- data/lib/tapsoob/utils.rb +2 -1
- data/lib/tapsoob/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 537cc5719c3cb41a1faa9276fec218384620348237b718c8971bab89f0c489de
|
4
|
+
data.tar.gz: f015c3030091f388472f11f0694e9e188cc01583451098a06eb0d9b7528c1897
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 130eacfe744be18347fba69d82d0caa5760b6e33669c1a3b3fa5708ebaabe7bcda5d2da3ca37fdc90740954f659d73b6b3a4175cb6b8c208093478b9c9a689b1
|
7
|
+
data.tar.gz: 020efbddc48cae06d17edb37ca14f8b6d27312476e3281451d0760b554d7727093d0cf830df51d7359cbda5ff23374ad536355468c4e81ed6d0155a9939b3145
|
@@ -28,6 +28,9 @@ module Tapsoob
|
|
28
28
|
option :tables, desc: "Shortcut to filter on a list of tables", type: :array, aliases: "-t"
|
29
29
|
option :"exclude-tables", desc: "Shortcut to exclude a list of tables", type: :array, aliases: "-e"
|
30
30
|
option :progress, desc: "Show progress", default: true, type: :boolean, aliases: "-p"
|
31
|
+
option :purge, desc: "Purge data in tables prior to performing the import", default: false, type: :boolean, aliases: "-p"
|
32
|
+
option :"skip-duplicates", desc: "Remove duplicates when loading data", default: false, type: :boolean
|
33
|
+
option :"discard-identity", desc: "Remove identity when pushing data (may result in creating duplicates)", default: false, type: :boolean
|
31
34
|
option :debug, desc: "Enable debug messages", default: false, type: :boolean, aliases: "-d"
|
32
35
|
def push(database_url, dump_path = nil)
|
33
36
|
# instantiate stuff
|
@@ -46,9 +49,8 @@ module Tapsoob
|
|
46
49
|
data.each do |table|
|
47
50
|
stream = Tapsoob::DataStream.factory(db(database_url, opts), {
|
48
51
|
table_name: table[:table_name],
|
49
|
-
chunksize: opts[:default_chunksize]
|
50
|
-
|
51
|
-
})
|
52
|
+
chunksize: opts[:default_chunksize]
|
53
|
+
}, { :"discard-identity" => opts[:"discard-identity"] || false, :purge => opts[:purge] || false, :debug => opts[:debug] })
|
52
54
|
|
53
55
|
begin
|
54
56
|
stream.import_rows(table)
|
@@ -66,6 +68,11 @@ module Tapsoob
|
|
66
68
|
debug: options[:debug]
|
67
69
|
}
|
68
70
|
|
71
|
+
# Push only options
|
72
|
+
opts[:purge] = options[:purge] if options.key?(:purge)
|
73
|
+
opts[:"skip-duplicates"] = options[:"skip-duplicates"] if options.key?(:"skip-duplicates")
|
74
|
+
opts[:"discard-identity"] = options[:"discard-identity"] if options.key?(:"discard-identity")
|
75
|
+
|
69
76
|
# Default chunksize
|
70
77
|
if options[:chunksize]
|
71
78
|
opts[:default_chunksize] = (options[:chunksize] < 10 ? 10 : options[:chunksize])
|
data/lib/tapsoob/cli/root.rb
CHANGED
@@ -39,6 +39,7 @@ module Tapsoob
|
|
39
39
|
option :tables, desc: "Shortcut to filter on a list of tables", type: :array, aliases: "-t"
|
40
40
|
option :"exclude-tables", desc: "Shortcut to exclude a list of tables", type: :array, aliases: "-e"
|
41
41
|
option :purge, desc: "Purge data in tables prior to performing the import", default: false, type: :boolean, aliases: "-p"
|
42
|
+
option :"skip-duplicates", desc: "Remove duplicates when loading data", default: false, type: :boolean
|
42
43
|
option :"discard-identity", desc: "Remove identity when pushing data (may result in creating duplicates)", default: false, type: :boolean
|
43
44
|
option :debug, desc: "Enable debug messages", default: false, type: :boolean, aliases: "-d"
|
44
45
|
def push(dump_path, database_url)
|
@@ -74,6 +75,7 @@ module Tapsoob
|
|
74
75
|
|
75
76
|
# Push only options
|
76
77
|
opts[:purge] = options[:purge] if options.key?(:purge)
|
78
|
+
opts[:"skip-duplicates"] = options[:"skip-duplicates"] if options.key?(:"skip-duplicates")
|
77
79
|
opts[:"discard-identity"] = options[:"discard-identity"] if options.key?(:"discard-identity")
|
78
80
|
|
79
81
|
# Resume
|
data/lib/tapsoob/data_stream.rb
CHANGED
@@ -91,7 +91,8 @@ module Tapsoob
|
|
91
91
|
rows = {
|
92
92
|
:table_name => ds["table_name"],
|
93
93
|
:header => ds["header"],
|
94
|
-
:data => ds["data"][state[:offset], (state[:offset] + state[:chunksize])] || [ ]
|
94
|
+
:data => ((@options[:"skip-duplicates"] ? ds["data"].uniq : ds["data"])[state[:offset], (state[:offset] + state[:chunksize])] || [ ]),
|
95
|
+
:types => ds["types"]
|
95
96
|
}
|
96
97
|
update_chunksize_stats
|
97
98
|
rows
|
@@ -151,7 +152,7 @@ module Tapsoob
|
|
151
152
|
|
152
153
|
rows = parse_encoded_data(encoded_data, json[:checksum])
|
153
154
|
|
154
|
-
@complete = rows ==
|
155
|
+
@complete = rows[:data] == [ ]
|
155
156
|
|
156
157
|
# update local state
|
157
158
|
state.merge!(json[:state].merge(:chunksize => state[:chunksize]))
|
@@ -222,12 +223,24 @@ module Tapsoob
|
|
222
223
|
columns = rows[:header]
|
223
224
|
data = rows[:data]
|
224
225
|
|
226
|
+
# Only import existing columns
|
227
|
+
if table.columns.size != columns.size
|
228
|
+
existing_columns = table.columns.map(&:to_s)
|
229
|
+
additional_columns = columns - existing_columns
|
230
|
+
additional_columns_idxs = additional_columns.map { |c| columns.index(c) }
|
231
|
+
additional_columns_idxs.reverse.each do |idx|
|
232
|
+
columns.delete_at(idx)
|
233
|
+
rows[:types].delete_at(idx)
|
234
|
+
end
|
235
|
+
data.each_index { |didx| additional_columns_idxs.reverse.each { |idx| data[didx].delete_at(idx) } }
|
236
|
+
end
|
237
|
+
|
225
238
|
# Decode blobs
|
226
239
|
if rows.has_key?(:types) && rows[:types].include?("blob")
|
227
240
|
blob_indices = rows[:types].each_index.select { |idx| rows[:types][idx] == "blob" }
|
228
|
-
|
241
|
+
data.each_index do |idx|
|
229
242
|
blob_indices.each do |bi|
|
230
|
-
|
243
|
+
data[idx][bi] = Sequel::SQL::Blob.new(Tapsoob::Utils.base64decode(data[idx][bi])) unless data[idx][bi].nil?
|
231
244
|
end
|
232
245
|
end
|
233
246
|
end
|
@@ -237,9 +250,9 @@ module Tapsoob
|
|
237
250
|
%w(date datetime time).each do |type|
|
238
251
|
if rows[:types].include?(type)
|
239
252
|
type_indices = rows[:types].each_index.select { |idx| rows[:types][idx] == type }
|
240
|
-
|
253
|
+
data.each_index do |idx|
|
241
254
|
type_indices.each do |ti|
|
242
|
-
|
255
|
+
data[idx][ti] = Sequel.send("string_to_#{type}".to_sym, data[idx][ti]) unless data[idx][ti].nil?
|
243
256
|
end
|
244
257
|
end
|
245
258
|
end
|
@@ -247,11 +260,11 @@ module Tapsoob
|
|
247
260
|
end
|
248
261
|
|
249
262
|
# Remove id column
|
250
|
-
if @options[:"discard-identity"]
|
263
|
+
if @options[:"discard-identity"] && rows[:header].include?("id")
|
251
264
|
columns = rows[:header] - ["id"]
|
252
265
|
data = data.map { |d| d[1..-1] }
|
253
266
|
end
|
254
|
-
|
267
|
+
|
255
268
|
table.import(columns, data, :commit_every => 100)
|
256
269
|
state[:offset] += rows[:data].size
|
257
270
|
rescue Exception => ex
|
data/lib/tapsoob/operation.rb
CHANGED
@@ -198,7 +198,7 @@ module Tapsoob
|
|
198
198
|
stream = Tapsoob::DataStream.factory(db, {
|
199
199
|
:chunksize => default_chunksize,
|
200
200
|
:table_name => table_name
|
201
|
-
})
|
201
|
+
}, { :debug => opts[:debug] })
|
202
202
|
pull_data_from_table(stream, progress)
|
203
203
|
end
|
204
204
|
end
|
@@ -227,8 +227,6 @@ module Tapsoob
|
|
227
227
|
Tapsoob::Utils.export_rows(dump_path, stream.table_name, rows)
|
228
228
|
end
|
229
229
|
end
|
230
|
-
break if stream.complete?
|
231
|
-
progress.inc(size) if progress && !exiting?
|
232
230
|
stream.error = false
|
233
231
|
self.stream_state = stream.to_hash
|
234
232
|
rescue Tapsoob::CorruptedData => e
|
@@ -236,6 +234,9 @@ module Tapsoob
|
|
236
234
|
stream.error = true
|
237
235
|
next
|
238
236
|
end
|
237
|
+
|
238
|
+
progress.inc(size) if progress && !exiting?
|
239
|
+
break if stream.complete?
|
239
240
|
end
|
240
241
|
|
241
242
|
progress.finish if progress
|
@@ -396,7 +397,13 @@ module Tapsoob
|
|
396
397
|
db[table_name.to_sym].truncate if @opts[:purge]
|
397
398
|
stream = Tapsoob::DataStream.factory(db, {
|
398
399
|
:table_name => table_name,
|
399
|
-
:chunksize => default_chunksize
|
400
|
+
:chunksize => default_chunksize
|
401
|
+
}, {
|
402
|
+
:"skip-duplicates" => opts[:"skip-duplicates"] || false,
|
403
|
+
:"discard-identity" => opts[:"discard-identity"] || false,
|
404
|
+
:purge => opts[:purge] || false,
|
405
|
+
:debug => opts[:debug]
|
406
|
+
})
|
400
407
|
progress = ProgressBar.new(table_name.to_s, count)
|
401
408
|
push_data_from_file(stream, progress)
|
402
409
|
end
|
@@ -429,7 +436,8 @@ module Tapsoob
|
|
429
436
|
}
|
430
437
|
end
|
431
438
|
|
432
|
-
|
439
|
+
row_size = stream.fetch_data_in_database({ :encoded_data => encoded_data, :checksum => data[:checksum] })
|
440
|
+
log.debug "row size: #{row_size}"
|
433
441
|
self.stream_state = stream.to_hash
|
434
442
|
|
435
443
|
c.idle_secs = (d1 + d2)
|
@@ -481,7 +489,7 @@ module Tapsoob
|
|
481
489
|
tbls.each do |table|
|
482
490
|
if File.exists?(File.join(dump_path, "data", "#{table}.json"))
|
483
491
|
data = JSON.parse(File.read(File.join(dump_path, "data", "#{table}.json")))
|
484
|
-
tables_with_counts[table] = data.size
|
492
|
+
tables_with_counts[table] = data["data"].size
|
485
493
|
else
|
486
494
|
tables_with_counts[table] = 0
|
487
495
|
end
|
data/lib/tapsoob/progress_bar.rb
CHANGED
@@ -22,7 +22,7 @@ class ProgressBar
|
|
22
22
|
@current = 0
|
23
23
|
@previous = 0
|
24
24
|
@finished_p = false
|
25
|
-
@start_time = Time.now
|
25
|
+
@start_time = ::Time.now
|
26
26
|
@previous_time = @start_time
|
27
27
|
@title_width = 14
|
28
28
|
@format = "%-#{@title_width}s %3d%% %s %s"
|
@@ -76,7 +76,7 @@ class ProgressBar
|
|
76
76
|
end
|
77
77
|
|
78
78
|
def transfer_rate
|
79
|
-
bytes_per_second = @current.to_f / (Time.now - @start_time)
|
79
|
+
bytes_per_second = @current.to_f / (::Time.now - @start_time)
|
80
80
|
sprintf("%s/s", convert_bytes(bytes_per_second))
|
81
81
|
end
|
82
82
|
|
@@ -97,14 +97,14 @@ class ProgressBar
|
|
97
97
|
if @current == 0
|
98
98
|
"ETA: --:--:--"
|
99
99
|
else
|
100
|
-
elapsed = Time.now - @start_time
|
100
|
+
elapsed = ::Time.now - @start_time
|
101
101
|
eta = elapsed * @total / @current - elapsed;
|
102
102
|
sprintf("ETA: %s", format_time(eta))
|
103
103
|
end
|
104
104
|
end
|
105
105
|
|
106
106
|
def elapsed
|
107
|
-
elapsed = Time.now - @start_time
|
107
|
+
elapsed = ::Time.now - @start_time
|
108
108
|
sprintf("Time: %s", format_time(elapsed))
|
109
109
|
end
|
110
110
|
|
@@ -155,7 +155,7 @@ class ProgressBar
|
|
155
155
|
@terminal_width += width - line.length + 1
|
156
156
|
show
|
157
157
|
end
|
158
|
-
@previous_time = Time.now
|
158
|
+
@previous_time = ::Time.now
|
159
159
|
end
|
160
160
|
|
161
161
|
def show_if_needed
|
@@ -169,7 +169,7 @@ class ProgressBar
|
|
169
169
|
|
170
170
|
# Use "!=" instead of ">" to support negative changes
|
171
171
|
if cur_percentage != prev_percentage ||
|
172
|
-
Time.now - @previous_time >= 1 || @finished_p
|
172
|
+
::Time.now - @previous_time >= 1 || @finished_p
|
173
173
|
show
|
174
174
|
end
|
175
175
|
end
|
data/lib/tapsoob/utils.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# -*- encoding : utf-8 -*-
|
2
2
|
require 'zlib'
|
3
|
+
require 'active_support/core_ext/file/atomic.rb'
|
3
4
|
|
4
5
|
require 'tapsoob/errors'
|
5
6
|
require 'tapsoob/chunksize'
|
@@ -146,7 +147,7 @@ Data : #{data}
|
|
146
147
|
data[:data] = previous_data["data"] + row_data[:data]
|
147
148
|
end
|
148
149
|
|
149
|
-
File.
|
150
|
+
File.atomic_write(File.join(dump_path, "data", "#{table}.json")) do |file|
|
150
151
|
file.write(JSON.generate(data))
|
151
152
|
end
|
152
153
|
end
|
data/lib/tapsoob/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tapsoob
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Félix Bellanger
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2021-
|
12
|
+
date: 2021-07-01 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ripl
|