tapsoob 0.3.27-java → 0.4.4-java

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2c121ad1d7de6889a3cc1e79d39ab7844099a88a3da79654c1bfba1ddaba87f6
4
- data.tar.gz: fac74e80043e1e2418f11541db774650f483a866e52fdfbe7282d4eb3f63a47a
3
+ metadata.gz: f6a5cdcb682019d19aea7c707ceeb795d91aa0678ecc1849b6c66aecf82f7e65
4
+ data.tar.gz: c814f1fd9b3adaf30056959381b1957ff742e4a0f1e2992177154bc0b05c492e
5
5
  SHA512:
6
- metadata.gz: 4091d6a83bd692f59661236077028ee8c082b9fbe46d8adc7bb38afe736a6f26b3fcb404e4eeb9e2f0597cad75438cfbdeea5dabe9ad70f34f1582f202b7f54e
7
- data.tar.gz: 3faea33ccd48dc605cd0f70e8f8fd9c68ed9a0d77ae057af193051a004124a91a9d2b65f3265878368e0aa9d035631ce6af4eb4142b2bd6bcc0070b4a9060152
6
+ metadata.gz: d14de53579df3774b8656d81f1b7d3404fe763ea287d40de7e46d0b275ce760650f29e9bde3f05d704619baf0e742468cd27e8317743f6542ed28f76d8943b99
7
+ data.tar.gz: f2fa89c744a914878e2b4538c36ed4b061178d901cd64438d87780d187e208eeeef05ea01d673eadbfaa3f0eeb41fe191afdef3b7e899b24fdce063d9147176c
@@ -28,6 +28,9 @@ module Tapsoob
28
28
  option :tables, desc: "Shortcut to filter on a list of tables", type: :array, aliases: "-t"
29
29
  option :"exclude-tables", desc: "Shortcut to exclude a list of tables", type: :array, aliases: "-e"
30
30
  option :progress, desc: "Show progress", default: true, type: :boolean, aliases: "-p"
31
+ option :purge, desc: "Purge data in tables prior to performing the import", default: false, type: :boolean, aliases: "-p"
32
+ option :"skip-duplicates", desc: "Remove duplicates when loading data", default: false, type: :boolean
33
+ option :"discard-identity", desc: "Remove identity when pushing data (may result in creating duplicates)", default: false, type: :boolean
31
34
  option :debug, desc: "Enable debug messages", default: false, type: :boolean, aliases: "-d"
32
35
  def push(database_url, dump_path = nil)
33
36
  # instantiate stuff
@@ -46,9 +49,8 @@ module Tapsoob
46
49
  data.each do |table|
47
50
  stream = Tapsoob::DataStream.factory(db(database_url, opts), {
48
51
  table_name: table[:table_name],
49
- chunksize: opts[:default_chunksize],
50
- debug: opts[:debug]
51
- })
52
+ chunksize: opts[:default_chunksize]
53
+ }, { :"discard-identity" => opts[:"discard-identity"] || false, :purge => opts[:purge] || false, :debug => opts[:debug] })
52
54
 
53
55
  begin
54
56
  stream.import_rows(table)
@@ -66,6 +68,11 @@ module Tapsoob
66
68
  debug: options[:debug]
67
69
  }
68
70
 
71
+ # Push only options
72
+ opts[:purge] = options[:purge] if options.key?(:purge)
73
+ opts[:"skip-duplicates"] = options[:"skip-duplicates"] if options.key?(:"skip-duplicates")
74
+ opts[:"discard-identity"] = options[:"discard-identity"] if options.key?(:"discard-identity")
75
+
69
76
  # Default chunksize
70
77
  if options[:chunksize]
71
78
  opts[:default_chunksize] = (options[:chunksize] < 10 ? 10 : options[:chunksize])
@@ -39,6 +39,7 @@ module Tapsoob
39
39
  option :tables, desc: "Shortcut to filter on a list of tables", type: :array, aliases: "-t"
40
40
  option :"exclude-tables", desc: "Shortcut to exclude a list of tables", type: :array, aliases: "-e"
41
41
  option :purge, desc: "Purge data in tables prior to performing the import", default: false, type: :boolean, aliases: "-p"
42
+ option :"skip-duplicates", desc: "Remove duplicates when loading data", default: false, type: :boolean
42
43
  option :"discard-identity", desc: "Remove identity when pushing data (may result in creating duplicates)", default: false, type: :boolean
43
44
  option :debug, desc: "Enable debug messages", default: false, type: :boolean, aliases: "-d"
44
45
  def push(dump_path, database_url)
@@ -74,6 +75,7 @@ module Tapsoob
74
75
 
75
76
  # Push only options
76
77
  opts[:purge] = options[:purge] if options.key?(:purge)
78
+ opts[:"skip-duplicates"] = options[:"skip-duplicates"] if options.key?(:"skip-duplicates")
77
79
  opts[:"discard-identity"] = options[:"discard-identity"] if options.key?(:"discard-identity")
78
80
 
79
81
  # Resume
@@ -91,7 +91,8 @@ module Tapsoob
91
91
  rows = {
92
92
  :table_name => ds["table_name"],
93
93
  :header => ds["header"],
94
- :data => ds["data"][state[:offset], (state[:offset] + state[:chunksize])] || [ ]
94
+ :data => ((@options[:"skip-duplicates"] ? ds["data"].uniq : ds["data"])[state[:offset], (state[:offset] + state[:chunksize])] || [ ]),
95
+ :types => ds["types"]
95
96
  }
96
97
  update_chunksize_stats
97
98
  rows
@@ -151,7 +152,7 @@ module Tapsoob
151
152
 
152
153
  rows = parse_encoded_data(encoded_data, json[:checksum])
153
154
 
154
- @complete = rows == { }
155
+ @complete = rows[:data] == [ ]
155
156
 
156
157
  # update local state
157
158
  state.merge!(json[:state].merge(:chunksize => state[:chunksize]))
@@ -222,12 +223,24 @@ module Tapsoob
222
223
  columns = rows[:header]
223
224
  data = rows[:data]
224
225
 
226
+ # Only import existing columns
227
+ if table.columns.size != columns.size
228
+ existing_columns = table.columns.map(&:to_s)
229
+ additional_columns = columns - existing_columns
230
+ additional_columns_idxs = additional_columns.map { |c| columns.index(c) }
231
+ additional_columns_idxs.reverse.each do |idx|
232
+ columns.delete_at(idx)
233
+ rows[:types].delete_at(idx)
234
+ end
235
+ data.each_index { |didx| additional_columns_idxs.reverse.each { |idx| data[didx].delete_at(idx) } }
236
+ end
237
+
225
238
  # Decode blobs
226
239
  if rows.has_key?(:types) && rows[:types].include?("blob")
227
240
  blob_indices = rows[:types].each_index.select { |idx| rows[:types][idx] == "blob" }
228
- rows[:data].each_index do |idx|
241
+ data.each_index do |idx|
229
242
  blob_indices.each do |bi|
230
- rows[:data][idx][bi] = Sequel::SQL::Blob.new(Tapsoob::Utils.base64decode(rows[:data][idx][bi])) unless rows[:data][idx][bi].nil?
243
+ data[idx][bi] = Sequel::SQL::Blob.new(Tapsoob::Utils.base64decode(data[idx][bi])) unless data[idx][bi].nil?
231
244
  end
232
245
  end
233
246
  end
@@ -237,9 +250,9 @@ module Tapsoob
237
250
  %w(date datetime time).each do |type|
238
251
  if rows[:types].include?(type)
239
252
  type_indices = rows[:types].each_index.select { |idx| rows[:types][idx] == type }
240
- rows[:data].each_index do |idx|
253
+ data.each_index do |idx|
241
254
  type_indices.each do |ti|
242
- rows[:data][idx][ti] = Sequel.send("string_to_#{type}".to_sym, rows[:data][idx][ti])
255
+ data[idx][ti] = Sequel.send("string_to_#{type}".to_sym, data[idx][ti]) unless data[idx][ti].nil?
243
256
  end
244
257
  end
245
258
  end
@@ -247,11 +260,11 @@ module Tapsoob
247
260
  end
248
261
 
249
262
  # Remove id column
250
- if @options[:"discard-identity"]
263
+ if @options[:"discard-identity"] && rows[:header].include?("id")
251
264
  columns = rows[:header] - ["id"]
252
265
  data = data.map { |d| d[1..-1] }
253
266
  end
254
-
267
+
255
268
  table.import(columns, data, :commit_every => 100)
256
269
  state[:offset] += rows[:data].size
257
270
  rescue Exception => ex
@@ -198,7 +198,7 @@ module Tapsoob
198
198
  stream = Tapsoob::DataStream.factory(db, {
199
199
  :chunksize => default_chunksize,
200
200
  :table_name => table_name
201
- })
201
+ }, { :debug => opts[:debug] })
202
202
  pull_data_from_table(stream, progress)
203
203
  end
204
204
  end
@@ -227,8 +227,6 @@ module Tapsoob
227
227
  Tapsoob::Utils.export_rows(dump_path, stream.table_name, rows)
228
228
  end
229
229
  end
230
- break if stream.complete?
231
- progress.inc(size) if progress && !exiting?
232
230
  stream.error = false
233
231
  self.stream_state = stream.to_hash
234
232
  rescue Tapsoob::CorruptedData => e
@@ -236,6 +234,9 @@ module Tapsoob
236
234
  stream.error = true
237
235
  next
238
236
  end
237
+
238
+ progress.inc(size) if progress && !exiting?
239
+ break if stream.complete?
239
240
  end
240
241
 
241
242
  progress.finish if progress
@@ -396,7 +397,13 @@ module Tapsoob
396
397
  db[table_name.to_sym].truncate if @opts[:purge]
397
398
  stream = Tapsoob::DataStream.factory(db, {
398
399
  :table_name => table_name,
399
- :chunksize => default_chunksize }, { :"discard-identity" => @opts[:"discard-identity"] || false })
400
+ :chunksize => default_chunksize
401
+ }, {
402
+ :"skip-duplicates" => opts[:"skip-duplicates"] || false,
403
+ :"discard-identity" => opts[:"discard-identity"] || false,
404
+ :purge => opts[:purge] || false,
405
+ :debug => opts[:debug]
406
+ })
400
407
  progress = ProgressBar.new(table_name.to_s, count)
401
408
  push_data_from_file(stream, progress)
402
409
  end
@@ -429,7 +436,8 @@ module Tapsoob
429
436
  }
430
437
  end
431
438
 
432
- size = stream.fetch_data_in_database({ :encoded_data => encoded_data, :checksum => data[:checksum] })
439
+ row_size = stream.fetch_data_in_database({ :encoded_data => encoded_data, :checksum => data[:checksum] })
440
+ log.debug "row size: #{row_size}"
433
441
  self.stream_state = stream.to_hash
434
442
 
435
443
  c.idle_secs = (d1 + d2)
@@ -481,7 +489,7 @@ module Tapsoob
481
489
  tbls.each do |table|
482
490
  if File.exists?(File.join(dump_path, "data", "#{table}.json"))
483
491
  data = JSON.parse(File.read(File.join(dump_path, "data", "#{table}.json")))
484
- tables_with_counts[table] = data.size
492
+ tables_with_counts[table] = data["data"].size
485
493
  else
486
494
  tables_with_counts[table] = 0
487
495
  end
@@ -22,7 +22,7 @@ class ProgressBar
22
22
  @current = 0
23
23
  @previous = 0
24
24
  @finished_p = false
25
- @start_time = Time.now
25
+ @start_time = ::Time.now
26
26
  @previous_time = @start_time
27
27
  @title_width = 14
28
28
  @format = "%-#{@title_width}s %3d%% %s %s"
@@ -76,7 +76,7 @@ class ProgressBar
76
76
  end
77
77
 
78
78
  def transfer_rate
79
- bytes_per_second = @current.to_f / (Time.now - @start_time)
79
+ bytes_per_second = @current.to_f / (::Time.now - @start_time)
80
80
  sprintf("%s/s", convert_bytes(bytes_per_second))
81
81
  end
82
82
 
@@ -97,14 +97,14 @@ class ProgressBar
97
97
  if @current == 0
98
98
  "ETA: --:--:--"
99
99
  else
100
- elapsed = Time.now - @start_time
100
+ elapsed = ::Time.now - @start_time
101
101
  eta = elapsed * @total / @current - elapsed;
102
102
  sprintf("ETA: %s", format_time(eta))
103
103
  end
104
104
  end
105
105
 
106
106
  def elapsed
107
- elapsed = Time.now - @start_time
107
+ elapsed = ::Time.now - @start_time
108
108
  sprintf("Time: %s", format_time(elapsed))
109
109
  end
110
110
 
@@ -155,7 +155,7 @@ class ProgressBar
155
155
  @terminal_width += width - line.length + 1
156
156
  show
157
157
  end
158
- @previous_time = Time.now
158
+ @previous_time = ::Time.now
159
159
  end
160
160
 
161
161
  def show_if_needed
@@ -169,7 +169,7 @@ class ProgressBar
169
169
 
170
170
  # Use "!=" instead of ">" to support negative changes
171
171
  if cur_percentage != prev_percentage ||
172
- Time.now - @previous_time >= 1 || @finished_p
172
+ ::Time.now - @previous_time >= 1 || @finished_p
173
173
  show
174
174
  end
175
175
  end
data/lib/tapsoob/utils.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  # -*- encoding : utf-8 -*-
2
2
  require 'zlib'
3
+ require 'active_support/core_ext/file/atomic.rb'
3
4
 
4
5
  require 'tapsoob/errors'
5
6
  require 'tapsoob/chunksize'
@@ -146,7 +147,7 @@ Data : #{data}
146
147
  data[:data] = previous_data["data"] + row_data[:data]
147
148
  end
148
149
 
149
- File.open(File.join(dump_path, "data", "#{table}.json"), 'w') do |file|
150
+ File.atomic_write(File.join(dump_path, "data", "#{table}.json")) do |file|
150
151
  file.write(JSON.generate(data))
151
152
  end
152
153
  end
@@ -1,4 +1,4 @@
1
1
  # -*- encoding : utf-8 -*-
2
2
  module Tapsoob
3
- VERSION = "0.3.27".freeze
3
+ VERSION = "0.4.4".freeze
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tapsoob
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.27
4
+ version: 0.4.4
5
5
  platform: java
6
6
  authors:
7
7
  - Félix Bellanger
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-06-11 00:00:00.000000000 Z
12
+ date: 2021-07-01 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  requirement: !ruby/object:Gem::Requirement