tapsoob 0.3.25 → 0.4.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cfc8db7ed412920a9e55e43d5ee66c57b0dd09a86e322c0b6f37bd90762ef261
4
- data.tar.gz: 46dba518c294c033bf7c12c06f79278700be456d00554ad30d9149decaf8b62e
3
+ metadata.gz: 661f0fd554e2cd6781c9a174c4051331024879fb1abecb9f48f19b9e365c1e46
4
+ data.tar.gz: 10535d8376543d13e16a35b8d7e098cdb4c4757b518e75068d6a83884196cd98
5
5
  SHA512:
6
- metadata.gz: c2ef008de5be1f5e169055026c65b04518163e7987e34f2e1be5c554d7114be96d99f4fe7ae3284798f4e11225c1554f461db8c9cc487d1befb09ef654e8d0ad
7
- data.tar.gz: b532c43909abc6afda5314e55da082ddbceb3a3ed285b5ba133f024f969120ec0d17b3d6f75c0423588b3fb61c015763038219f7b6c025d224c340416f7abf6c
6
+ metadata.gz: 171152e81bcb75da25c9865b8b8211256e7fce176bca88f576b4eb9cf159d67a66badacc51344b3c3a4efa137f16dd57384f606780f0b35984ecfb1d6bfbe96c
7
+ data.tar.gz: a9a59c2a045419d2b580b9d3932055f012f9b192daf6feef8e35602546b0b8dcbd77fc87e56ffa728c1818c6de5004b8027cc0051c9e44d313acc1c5673bc862
@@ -28,6 +28,9 @@ module Tapsoob
28
28
  option :tables, desc: "Shortcut to filter on a list of tables", type: :array, aliases: "-t"
29
29
  option :"exclude-tables", desc: "Shortcut to exclude a list of tables", type: :array, aliases: "-e"
30
30
  option :progress, desc: "Show progress", default: true, type: :boolean, aliases: "-p"
31
+ option :purge, desc: "Purge data in tables prior to performing the import", default: false, type: :boolean, aliases: "-p"
32
+ option :"skip-duplicates", desc: "Remove duplicates when loading data", default: false, type: :boolean
33
+ option :"discard-identity", desc: "Remove identity when pushing data (may result in creating duplicates)", default: false, type: :boolean
31
34
  option :debug, desc: "Enable debug messages", default: false, type: :boolean, aliases: "-d"
32
35
  def push(database_url, dump_path = nil)
33
36
  # instantiate stuff
@@ -46,9 +49,8 @@ module Tapsoob
46
49
  data.each do |table|
47
50
  stream = Tapsoob::DataStream.factory(db(database_url, opts), {
48
51
  table_name: table[:table_name],
49
- chunksize: opts[:default_chunksize],
50
- debug: opts[:debug]
51
- })
52
+ chunksize: opts[:default_chunksize]
53
+ }, { :"discard-identity" => opts[:"discard-identity"] || false, :purge => opts[:purge] || false, :debug => opts[:debug] })
52
54
 
53
55
  begin
54
56
  stream.import_rows(table)
@@ -66,6 +68,11 @@ module Tapsoob
66
68
  debug: options[:debug]
67
69
  }
68
70
 
71
+ # Push only options
72
+ opts[:purge] = options[:purge] if options.key?(:purge)
73
+ opts[:"skip-duplicates"] = options[:"skip-duplicates"] if options.key?(:"skip-duplicates")
74
+ opts[:"discard-identity"] = options[:"discard-identity"] if options.key?(:"discard-identity")
75
+
69
76
  # Default chunksize
70
77
  if options[:chunksize]
71
78
  opts[:default_chunksize] = (options[:chunksize] < 10 ? 10 : options[:chunksize])
@@ -39,6 +39,7 @@ module Tapsoob
39
39
  option :tables, desc: "Shortcut to filter on a list of tables", type: :array, aliases: "-t"
40
40
  option :"exclude-tables", desc: "Shortcut to exclude a list of tables", type: :array, aliases: "-e"
41
41
  option :purge, desc: "Purge data in tables prior to performing the import", default: false, type: :boolean, aliases: "-p"
42
+ option :"skip-duplicates", desc: "Remove duplicates when loading data", default: false, type: :boolean
42
43
  option :"discard-identity", desc: "Remove identity when pushing data (may result in creating duplicates)", default: false, type: :boolean
43
44
  option :debug, desc: "Enable debug messages", default: false, type: :boolean, aliases: "-d"
44
45
  def push(dump_path, database_url)
@@ -74,6 +75,7 @@ module Tapsoob
74
75
 
75
76
  # Push only options
76
77
  opts[:purge] = options[:purge] if options.key?(:purge)
78
+ opts[:"skip-duplicates"] = options[:"skip-duplicates"] if options.key?(:"skip-duplicates")
77
79
  opts[:"discard-identity"] = options[:"discard-identity"] if options.key?(:"discard-identity")
78
80
 
79
81
  # Resume
@@ -91,7 +91,8 @@ module Tapsoob
91
91
  rows = {
92
92
  :table_name => ds["table_name"],
93
93
  :header => ds["header"],
94
- :data => ds["data"][state[:offset], (state[:offset] + state[:chunksize])] || [ ]
94
+ :data => ((@options[:"skip-duplicates"] ? ds["data"].uniq : ds["data"])[state[:offset], (state[:offset] + state[:chunksize])] || [ ]),
95
+ :types => ds["types"]
95
96
  }
96
97
  update_chunksize_stats
97
98
  rows
@@ -222,22 +223,48 @@ module Tapsoob
222
223
  columns = rows[:header]
223
224
  data = rows[:data]
224
225
 
226
+ # Only import existing columns
227
+ if table.columns.size != columns.size
228
+ existing_columns = table.columns.map(&:to_s)
229
+ additional_columns = columns - existing_columns
230
+ additional_columns_idxs = additional_columns.map { |c| columns.index(c) }
231
+ additional_columns_idxs.reverse.each do |idx|
232
+ columns.delete_at(idx)
233
+ rows[:types].delete_at(idx)
234
+ end
235
+ data.each_index { |didx| additional_columns_idxs.reverse.each { |idx| data[didx].delete_at(idx) } }
236
+ end
237
+
225
238
  # Decode blobs
226
239
  if rows.has_key?(:types) && rows[:types].include?("blob")
227
240
  blob_indices = rows[:types].each_index.select { |idx| rows[:types][idx] == "blob" }
228
- rows[:data].each_index do |idx|
241
+ data.each_index do |idx|
229
242
  blob_indices.each do |bi|
230
- rows[:data][idx][bi] = Sequel::SQL::Blob.new(Tapsoob::Utils.base64decode(rows[:data][idx][bi])) unless rows[:data][idx][bi].nil?
243
+ data[idx][bi] = Sequel::SQL::Blob.new(Tapsoob::Utils.base64decode(data[idx][bi])) unless data[idx][bi].nil?
244
+ end
245
+ end
246
+ end
247
+
248
+ # Parse date/datetime/time columns
249
+ if rows.has_key?(:types)
250
+ %w(date datetime time).each do |type|
251
+ if rows[:types].include?(type)
252
+ type_indices = rows[:types].each_index.select { |idx| rows[:types][idx] == type }
253
+ data.each_index do |idx|
254
+ type_indices.each do |ti|
255
+ data[idx][ti] = Sequel.send("string_to_#{type}".to_sym, data[idx][ti]) unless data[idx][ti].nil?
256
+ end
257
+ end
231
258
  end
232
259
  end
233
260
  end
234
261
 
235
262
  # Remove id column
236
- if @options[:"discard-identity"]
263
+ if @options[:"discard-identity"] && rows[:header].include?("id")
237
264
  columns = rows[:header] - ["id"]
238
265
  data = data.map { |d| d[1..-1] }
239
266
  end
240
-
267
+
241
268
  table.import(columns, data, :commit_every => 100)
242
269
  state[:offset] += rows[:data].size
243
270
  rescue Exception => ex
@@ -198,7 +198,7 @@ module Tapsoob
198
198
  stream = Tapsoob::DataStream.factory(db, {
199
199
  :chunksize => default_chunksize,
200
200
  :table_name => table_name
201
- })
201
+ }, { :debug => opts[:debug] })
202
202
  pull_data_from_table(stream, progress)
203
203
  end
204
204
  end
@@ -396,7 +396,13 @@ module Tapsoob
396
396
  db[table_name.to_sym].truncate if @opts[:purge]
397
397
  stream = Tapsoob::DataStream.factory(db, {
398
398
  :table_name => table_name,
399
- :chunksize => default_chunksize }, { :"discard-identity" => @opts[:"discard-identity"] || false })
399
+ :chunksize => default_chunksize
400
+ }, {
401
+ :"skip-duplicates" => opts[:"skip-duplicates"] || false,
402
+ :"discard-identity" => opts[:"discard-identity"] || false,
403
+ :purge => opts[:purge] || false,
404
+ :debug => opts[:debug]
405
+ })
400
406
  progress = ProgressBar.new(table_name.to_s, count)
401
407
  push_data_from_file(stream, progress)
402
408
  end
@@ -429,7 +435,8 @@ module Tapsoob
429
435
  }
430
436
  end
431
437
 
432
- size = stream.fetch_data_in_database({ :encoded_data => encoded_data, :checksum => data[:checksum] })
438
+ row_size = stream.fetch_data_in_database({ :encoded_data => encoded_data, :checksum => data[:checksum] })
439
+ log.debug "row size: #{row_size}"
433
440
  self.stream_state = stream.to_hash
434
441
 
435
442
  c.idle_secs = (d1 + d2)
@@ -481,7 +488,7 @@ module Tapsoob
481
488
  tbls.each do |table|
482
489
  if File.exists?(File.join(dump_path, "data", "#{table}.json"))
483
490
  data = JSON.parse(File.read(File.join(dump_path, "data", "#{table}.json")))
484
- tables_with_counts[table] = data.size
491
+ tables_with_counts[table] = data["data"].size
485
492
  else
486
493
  tables_with_counts[table] = 0
487
494
  end
@@ -22,7 +22,7 @@ class ProgressBar
22
22
  @current = 0
23
23
  @previous = 0
24
24
  @finished_p = false
25
- @start_time = Time.now
25
+ @start_time = ::Time.now
26
26
  @previous_time = @start_time
27
27
  @title_width = 14
28
28
  @format = "%-#{@title_width}s %3d%% %s %s"
@@ -76,7 +76,7 @@ class ProgressBar
76
76
  end
77
77
 
78
78
  def transfer_rate
79
- bytes_per_second = @current.to_f / (Time.now - @start_time)
79
+ bytes_per_second = @current.to_f / (::Time.now - @start_time)
80
80
  sprintf("%s/s", convert_bytes(bytes_per_second))
81
81
  end
82
82
 
@@ -97,14 +97,14 @@ class ProgressBar
97
97
  if @current == 0
98
98
  "ETA: --:--:--"
99
99
  else
100
- elapsed = Time.now - @start_time
100
+ elapsed = ::Time.now - @start_time
101
101
  eta = elapsed * @total / @current - elapsed;
102
102
  sprintf("ETA: %s", format_time(eta))
103
103
  end
104
104
  end
105
105
 
106
106
  def elapsed
107
- elapsed = Time.now - @start_time
107
+ elapsed = ::Time.now - @start_time
108
108
  sprintf("Time: %s", format_time(elapsed))
109
109
  end
110
110
 
@@ -155,7 +155,7 @@ class ProgressBar
155
155
  @terminal_width += width - line.length + 1
156
156
  show
157
157
  end
158
- @previous_time = Time.now
158
+ @previous_time = ::Time.now
159
159
  end
160
160
 
161
161
  def show_if_needed
@@ -169,7 +169,7 @@ class ProgressBar
169
169
 
170
170
  # Use "!=" instead of ">" to support negative changes
171
171
  if cur_percentage != prev_percentage ||
172
- Time.now - @previous_time >= 1 || @finished_p
172
+ ::Time.now - @previous_time >= 1 || @finished_p
173
173
  show
174
174
  end
175
175
  end
@@ -1,4 +1,4 @@
1
1
  # -*- encoding : utf-8 -*-
2
2
  module Tapsoob
3
- VERSION = "0.3.25".freeze
3
+ VERSION = "0.4.2".freeze
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tapsoob
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.25
4
+ version: 0.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Félix Bellanger
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-06-11 00:00:00.000000000 Z
12
+ date: 2021-06-25 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ripl