tapsoob 0.3.26 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 59b4084ddb364d71b2409a23fdd8604b3ef46d5dac7a96518fdb664e8f743943
4
- data.tar.gz: 4d72e5bae7bd8fe4acf116c81c1a139e5fdde4d7bbd5731bf25f896b71a00a90
3
+ metadata.gz: 7931ff4460451152355f05ac09c59013ca76b7afe46893620c1da7fdf30288fe
4
+ data.tar.gz: 0257a6e05c923148436d122f0c2592fd9551cbe23401989e18f720365469d11e
5
5
  SHA512:
6
- metadata.gz: b9ca0470b23447547e1967588c1230ed3a2f6f141d5d9b3f14854bdeb155329d604082895dd344a27a69540cbe9ac6e3b630e46ae61bb3e23467cc5194011948
7
- data.tar.gz: ab7f6656768aa0e078a921530e25f8da5498504d9c5e4abd90175acaeeba4540393b6905021b40187ab9070c479d9fc2db1f88c10a82aa283ac7488c25778e7d
6
+ metadata.gz: 1251f19e8812daec0ed94c7cbd55d3a22beb3112c91cab288159ccd59fd0144b99888ed9b082df3a14182469f20cbc482656108db75c36c4859d1a889f4165ad
7
+ data.tar.gz: c440040cf75ea377161394335a32bc9f4bc9cb635775d048ae076efeec69af9cb499601c7f63280cb1d76ecc14d6948f7d2a3b32f746094dfed2aec06bfe4747
@@ -28,6 +28,9 @@ module Tapsoob
28
28
  option :tables, desc: "Shortcut to filter on a list of tables", type: :array, aliases: "-t"
29
29
  option :"exclude-tables", desc: "Shortcut to exclude a list of tables", type: :array, aliases: "-e"
30
30
  option :progress, desc: "Show progress", default: true, type: :boolean, aliases: "-p"
31
+ option :purge, desc: "Purge data in tables prior to performing the import", default: false, type: :boolean, aliases: "-p"
32
+ option :"skip-duplicates", desc: "Remove duplicates when loading data", default: false, type: :boolean
33
+ option :"discard-identity", desc: "Remove identity when pushing data (may result in creating duplicates)", default: false, type: :boolean
31
34
  option :debug, desc: "Enable debug messages", default: false, type: :boolean, aliases: "-d"
32
35
  def push(database_url, dump_path = nil)
33
36
  # instantiate stuff
@@ -46,9 +49,8 @@ module Tapsoob
46
49
  data.each do |table|
47
50
  stream = Tapsoob::DataStream.factory(db(database_url, opts), {
48
51
  table_name: table[:table_name],
49
- chunksize: opts[:default_chunksize],
50
- debug: opts[:debug]
51
- })
52
+ chunksize: opts[:default_chunksize]
53
+ }, { :"discard-identity" => opts[:"discard-identity"] || false, :purge => opts[:purge] || false, :debug => opts[:debug] })
52
54
 
53
55
  begin
54
56
  stream.import_rows(table)
@@ -66,6 +68,11 @@ module Tapsoob
66
68
  debug: options[:debug]
67
69
  }
68
70
 
71
+ # Push only options
72
+ opts[:purge] = options[:purge] if options.key?(:purge)
73
+ opts[:"skip-duplicates"] = options[:"skip-duplicates"] if options.key?(:"skip-duplicates")
74
+ opts[:"discard-identity"] = options[:"discard-identity"] if options.key?(:"discard-identity")
75
+
69
76
  # Default chunksize
70
77
  if options[:chunksize]
71
78
  opts[:default_chunksize] = (options[:chunksize] < 10 ? 10 : options[:chunksize])
@@ -39,6 +39,7 @@ module Tapsoob
39
39
  option :tables, desc: "Shortcut to filter on a list of tables", type: :array, aliases: "-t"
40
40
  option :"exclude-tables", desc: "Shortcut to exclude a list of tables", type: :array, aliases: "-e"
41
41
  option :purge, desc: "Purge data in tables prior to performing the import", default: false, type: :boolean, aliases: "-p"
42
+ option :"skip-duplicates", desc: "Remove duplicates when loading data", default: false, type: :boolean
42
43
  option :"discard-identity", desc: "Remove identity when pushing data (may result in creating duplicates)", default: false, type: :boolean
43
44
  option :debug, desc: "Enable debug messages", default: false, type: :boolean, aliases: "-d"
44
45
  def push(dump_path, database_url)
@@ -74,6 +75,7 @@ module Tapsoob
74
75
 
75
76
  # Push only options
76
77
  opts[:purge] = options[:purge] if options.key?(:purge)
78
+ opts[:"skip-duplicates"] = options[:"skip-duplicates"] if options.key?(:"skip-duplicates")
77
79
  opts[:"discard-identity"] = options[:"discard-identity"] if options.key?(:"discard-identity")
78
80
 
79
81
  # Resume
@@ -1,7 +1,4 @@
1
1
  # -*- encoding : utf-8 -*-
2
- require 'date'
3
- require 'time'
4
-
5
2
  require 'tapsoob/log'
6
3
  require 'tapsoob/utils'
7
4
 
@@ -94,7 +91,8 @@ module Tapsoob
94
91
  rows = {
95
92
  :table_name => ds["table_name"],
96
93
  :header => ds["header"],
97
- :data => ds["data"][state[:offset], (state[:offset] + state[:chunksize])] || [ ]
94
+ :data => ((@options[:"skip-duplicates"] ? ds["data"].uniq : ds["data"])[state[:offset], (state[:offset] + state[:chunksize])] || [ ]),
95
+ :types => ds["types"]
98
96
  }
99
97
  update_chunksize_stats
100
98
  rows
@@ -225,12 +223,24 @@ module Tapsoob
225
223
  columns = rows[:header]
226
224
  data = rows[:data]
227
225
 
226
+ # Only import existing columns
227
+ if table.columns.size != columns.size
228
+ existing_columns = table.columns.map(&:to_s)
229
+ additional_columns = columns - existing_columns
230
+ additional_columns_idxs = additional_columns.map { |c| columns.index(c) }
231
+ additional_columns_idxs.reverse.each do |idx|
232
+ columns.delete_at(idx)
233
+ rows[:types].delete_at(idx)
234
+ end
235
+ data.each_index { |didx| additional_columns_idxs.reverse.each { |idx| data[didx].delete_at(idx) } }
236
+ end
237
+
228
238
  # Decode blobs
229
239
  if rows.has_key?(:types) && rows[:types].include?("blob")
230
240
  blob_indices = rows[:types].each_index.select { |idx| rows[:types][idx] == "blob" }
231
- rows[:data].each_index do |idx|
241
+ data.each_index do |idx|
232
242
  blob_indices.each do |bi|
233
- rows[:data][idx][bi] = Sequel::SQL::Blob.new(Tapsoob::Utils.base64decode(rows[:data][idx][bi])) unless rows[:data][idx][bi].nil?
243
+ data[idx][bi] = Sequel::SQL::Blob.new(Tapsoob::Utils.base64decode(data[idx][bi])) unless data[idx][bi].nil?
234
244
  end
235
245
  end
236
246
  end
@@ -239,20 +249,10 @@ module Tapsoob
239
249
  if rows.has_key?(:types)
240
250
  %w(date datetime time).each do |type|
241
251
  if rows[:types].include?(type)
242
- klass = case type
243
- when "date"
244
- Date
245
- when "datetime"
246
- DateTime
247
- when "time"
248
- Time
249
- end
250
-
251
-
252
252
  type_indices = rows[:types].each_index.select { |idx| rows[:types][idx] == type }
253
- rows[:data].each_index do |idx|
253
+ data.each_index do |idx|
254
254
  type_indices.each do |ti|
255
- rows[:data][idx][ti] = klass.parse(rows[:data][idx][ti])
255
+ data[idx][ti] = Sequel.send("string_to_#{type}".to_sym, data[idx][ti]) unless data[idx][ti].nil?
256
256
  end
257
257
  end
258
258
  end
@@ -260,11 +260,11 @@ module Tapsoob
260
260
  end
261
261
 
262
262
  # Remove id column
263
- if @options[:"discard-identity"]
263
+ if @options[:"discard-identity"] && rows[:header].include?("id")
264
264
  columns = rows[:header] - ["id"]
265
265
  data = data.map { |d| d[1..-1] }
266
266
  end
267
-
267
+
268
268
  table.import(columns, data, :commit_every => 100)
269
269
  state[:offset] += rows[:data].size
270
270
  rescue Exception => ex
@@ -198,7 +198,7 @@ module Tapsoob
198
198
  stream = Tapsoob::DataStream.factory(db, {
199
199
  :chunksize => default_chunksize,
200
200
  :table_name => table_name
201
- })
201
+ }, { :debug => opts[:debug] })
202
202
  pull_data_from_table(stream, progress)
203
203
  end
204
204
  end
@@ -227,8 +227,6 @@ module Tapsoob
227
227
  Tapsoob::Utils.export_rows(dump_path, stream.table_name, rows)
228
228
  end
229
229
  end
230
- break if stream.complete?
231
- progress.inc(size) if progress && !exiting?
232
230
  stream.error = false
233
231
  self.stream_state = stream.to_hash
234
232
  rescue Tapsoob::CorruptedData => e
@@ -236,6 +234,9 @@ module Tapsoob
236
234
  stream.error = true
237
235
  next
238
236
  end
237
+
238
+ progress.inc(size) if progress && !exiting?
239
+ break if stream.complete?
239
240
  end
240
241
 
241
242
  progress.finish if progress
@@ -396,7 +397,13 @@ module Tapsoob
396
397
  db[table_name.to_sym].truncate if @opts[:purge]
397
398
  stream = Tapsoob::DataStream.factory(db, {
398
399
  :table_name => table_name,
399
- :chunksize => default_chunksize }, { :"discard-identity" => @opts[:"discard-identity"] || false })
400
+ :chunksize => default_chunksize
401
+ }, {
402
+ :"skip-duplicates" => opts[:"skip-duplicates"] || false,
403
+ :"discard-identity" => opts[:"discard-identity"] || false,
404
+ :purge => opts[:purge] || false,
405
+ :debug => opts[:debug]
406
+ })
400
407
  progress = ProgressBar.new(table_name.to_s, count)
401
408
  push_data_from_file(stream, progress)
402
409
  end
@@ -429,7 +436,8 @@ module Tapsoob
429
436
  }
430
437
  end
431
438
 
432
- size = stream.fetch_data_in_database({ :encoded_data => encoded_data, :checksum => data[:checksum] })
439
+ row_size = stream.fetch_data_in_database({ :encoded_data => encoded_data, :checksum => data[:checksum] })
440
+ log.debug "row size: #{row_size}"
433
441
  self.stream_state = stream.to_hash
434
442
 
435
443
  c.idle_secs = (d1 + d2)
@@ -481,7 +489,7 @@ module Tapsoob
481
489
  tbls.each do |table|
482
490
  if File.exists?(File.join(dump_path, "data", "#{table}.json"))
483
491
  data = JSON.parse(File.read(File.join(dump_path, "data", "#{table}.json")))
484
- tables_with_counts[table] = data.size
492
+ tables_with_counts[table] = data["data"].size
485
493
  else
486
494
  tables_with_counts[table] = 0
487
495
  end
@@ -22,7 +22,7 @@ class ProgressBar
22
22
  @current = 0
23
23
  @previous = 0
24
24
  @finished_p = false
25
- @start_time = Time.now
25
+ @start_time = ::Time.now
26
26
  @previous_time = @start_time
27
27
  @title_width = 14
28
28
  @format = "%-#{@title_width}s %3d%% %s %s"
@@ -76,7 +76,7 @@ class ProgressBar
76
76
  end
77
77
 
78
78
  def transfer_rate
79
- bytes_per_second = @current.to_f / (Time.now - @start_time)
79
+ bytes_per_second = @current.to_f / (::Time.now - @start_time)
80
80
  sprintf("%s/s", convert_bytes(bytes_per_second))
81
81
  end
82
82
 
@@ -97,14 +97,14 @@ class ProgressBar
97
97
  if @current == 0
98
98
  "ETA: --:--:--"
99
99
  else
100
- elapsed = Time.now - @start_time
100
+ elapsed = ::Time.now - @start_time
101
101
  eta = elapsed * @total / @current - elapsed;
102
102
  sprintf("ETA: %s", format_time(eta))
103
103
  end
104
104
  end
105
105
 
106
106
  def elapsed
107
- elapsed = Time.now - @start_time
107
+ elapsed = ::Time.now - @start_time
108
108
  sprintf("Time: %s", format_time(elapsed))
109
109
  end
110
110
 
@@ -155,7 +155,7 @@ class ProgressBar
155
155
  @terminal_width += width - line.length + 1
156
156
  show
157
157
  end
158
- @previous_time = Time.now
158
+ @previous_time = ::Time.now
159
159
  end
160
160
 
161
161
  def show_if_needed
@@ -169,7 +169,7 @@ class ProgressBar
169
169
 
170
170
  # Use "!=" instead of ">" to support negative changes
171
171
  if cur_percentage != prev_percentage ||
172
- Time.now - @previous_time >= 1 || @finished_p
172
+ ::Time.now - @previous_time >= 1 || @finished_p
173
173
  show
174
174
  end
175
175
  end
@@ -1,4 +1,4 @@
1
1
  # -*- encoding : utf-8 -*-
2
2
  module Tapsoob
3
- VERSION = "0.3.26".freeze
3
+ VERSION = "0.4.3".freeze
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tapsoob
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.26
4
+ version: 0.4.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Félix Bellanger
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-06-11 00:00:00.000000000 Z
12
+ date: 2021-06-30 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ripl