tapsoob 0.4.22-java → 0.5.2-java

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 24ebcbf2da1819077b890ed4cd70f997c2198ea2199721ff082f3b27838b86f3
4
- data.tar.gz: 4374df62d0b72a66144d7cc96ce73f66c65e77aad417d85eae9e029f3c729d5c
3
+ metadata.gz: 60db77b693b29afa6f92e80b3236b903cd93bf9ceb110d54095ecde174f5207c
4
+ data.tar.gz: 741dbca318123fc2a9d27b81ebcbbe5ea096c3c2d679ba56394e7596197e4e22
5
5
  SHA512:
6
- metadata.gz: 16630f8226647e49146656d6d6a93602aba3d2154e50bd73f7744fbf11bb942004292fe8e5a2a43e1d2f4c4686cbc3fd6694abc24f241b8c56c9e165455d8d37
7
- data.tar.gz: 48ced3dcfc19dcba1196961ee2fa0544a490f56a4357d3c5cec52ea5759ba3e499fd9fa4fc89e3a260549326034938c9c0bd29840f05f8ba5148af2564c09114
6
+ metadata.gz: a58ca6c767b9cb9f1e35719af347d4dd5971a736bc56c7c979f846e0a2b7e22383f7670a2ef624c04b9d1068830b856b33ed51960c76df7ead65cbd52f4fb22d
7
+ data.tar.gz: b07264f21368c3ce71f876651e65f111e65cfd801757a364b8fd37aba1bffc4f4cec77e2a77f125c06b84b38fab9a998e7c7e56aa5f1238907fab8e67506f943
@@ -12,7 +12,6 @@ module Tapsoob
12
12
  class DataStream < Thor
13
13
  desc "pull DATABASE_URL [DUMP_PATH]", "Pull data from a database."
14
14
  option :chunksize, desc: "Initial chunksize", default: 1000, type: :numeric, aliases: "-c"
15
- option :filter, desc: "Regex Filter for tables", type: :string, aliases: "-f"
16
15
  option :tables, desc: "Shortcut to filter on a list of tables", type: :array, aliases: "-t"
17
16
  option :"exclude-tables", desc: "Shortcut to exclude a list of tables", type: :array, aliases: "-e"
18
17
  option :progress, desc: "Show progress", default: true, type: :boolean, aliases: "-p"
@@ -24,7 +23,6 @@ module Tapsoob
24
23
 
25
24
  desc "push DATABASE_URL [DUMP_PATH]", "Push data to a database."
26
25
  option :chunksize, desc: "Initial chunksize", default: 1000, type: :numeric, aliases: "-c"
27
- option :filter, desc: "Regex Filter for tables", type: :string, aliases: "-f"
28
26
  option :tables, desc: "Shortcut to filter on a list of tables", type: :array, aliases: "-t"
29
27
  option :"exclude-tables", desc: "Shortcut to exclude a list of tables", type: :array, aliases: "-e"
30
28
  option :progress, desc: "Show progress", default: true, type: :boolean, aliases: "-p"
@@ -65,6 +63,7 @@ module Tapsoob
65
63
  # Default options
66
64
  opts = {
67
65
  progress: options[:progress],
66
+ tables: options[:tables],
68
67
  debug: options[:debug]
69
68
  }
70
69
 
@@ -78,15 +77,6 @@ module Tapsoob
78
77
  opts[:default_chunksize] = (options[:chunksize] < 10 ? 10 : options[:chunksize])
79
78
  end
80
79
 
81
- # Regex filter
82
- opts[:table_filter] = options[:filter] if options[:filter]
83
-
84
- # Table filter
85
- if options[:tables]
86
- r_tables = options[:tables].collect { |t| "^#{t}" }.join("|")
87
- opts[:table_filter] = "#{r_tables}"
88
- end
89
-
90
80
  # Exclude tables
91
81
  opts[:exclude_tables] = options[:"exclude-tables"] if options[:"exclude-tables"]
92
82
 
@@ -10,14 +10,15 @@ module Tapsoob
10
10
  module CLI
11
11
  class Root < Thor
12
12
  desc "pull DUMP_PATH DATABASE_URL", "Pull a dump from a database to a folder"
13
+ option :"schema-only", desc: "Don't transfer the data just schema", default: false, type: :boolean
13
14
  option :"skip-schema", desc: "Don't transfer the schema just data", default: false, type: :boolean, aliases: "-s"
14
15
  option :"indexes-first", desc: "Transfer indexes first before data", default: false, type: :boolean, aliases: "-i"
15
16
  option :resume, desc: "Resume a Tapsoob Session from a stored file", type: :string, aliases: "-r"
16
17
  option :chunksize, desc: "Initial chunksize", default: 1000, type: :numeric, aliases: "-c"
17
18
  option :"disable-compression", desc: "Disable Compression", default: false, type: :boolean, aliases: "-g"
18
- option :filter, desc: "Regex Filter for tables", type: :string, aliases: "-f"
19
19
  option :tables, desc: "Shortcut to filter on a list of tables", type: :array, aliases: "-t"
20
20
  option :"exclude-tables", desc: "Shortcut to exclude a list of tables", type: :array, aliases: "-e"
21
+ option :progress, desc: "Show progress", default: true, type: :boolean
21
22
  option :debug, desc: "Enable debug messages", default: false, type: :boolean, aliases: "-d"
22
23
  def pull(dump_path, database_url)
23
24
  opts = parse_opts(options)
@@ -30,17 +31,18 @@ module Tapsoob
30
31
  end
31
32
 
32
33
  desc "push DUMP_PATH DATABASE_URL", "Push a previously tapsoob dump to a database"
34
+ option :"schema-only", desc: "Don't transfer the data just schema", default: false, type: :boolean
33
35
  option :"skip-schema", desc: "Don't transfer the schema just data", default: false, type: :boolean, aliases: "-s"
34
36
  option :"indexes-first", desc: "Transfer indexes first before data", default: false, type: :boolean, aliases: "-i"
35
37
  option :resume, desc: "Resume a Tapsoob Session from a stored file", type: :string, aliases: "-r"
36
38
  option :chunksize, desc: "Initial chunksize", default: 1000, type: :numeric, aliases: "-c"
37
39
  option :"disable-compression", desc: "Disable Compression", default: false, type: :boolean, aliases: "-g"
38
- option :filter, desc: "Regex Filter for tables", type: :string, aliases: "-f"
39
40
  option :tables, desc: "Shortcut to filter on a list of tables", type: :array, aliases: "-t"
40
41
  option :"exclude-tables", desc: "Shortcut to exclude a list of tables", type: :array, aliases: "-e"
41
42
  option :purge, desc: "Purge data in tables prior to performing the import", default: false, type: :boolean, aliases: "-p"
42
43
  option :"skip-duplicates", desc: "Remove duplicates when loading data", default: false, type: :boolean
43
44
  option :"discard-identity", desc: "Remove identity when pushing data (may result in creating duplicates)", default: false, type: :boolean
45
+ option :progress, desc: "Show progress", default: true, type: :boolean
44
46
  option :debug, desc: "Enable debug messages", default: false, type: :boolean, aliases: "-d"
45
47
  def push(dump_path, database_url)
46
48
  opts = parse_opts(options)
@@ -67,9 +69,12 @@ module Tapsoob
67
69
  def parse_opts(options)
68
70
  # Default options
69
71
  opts = {
72
+ schema_only: options[:"schema-only"],
70
73
  skip_schema: options[:"skip-schema"],
71
74
  indexes_first: options[:"indexes_first"],
72
75
  disable_compression: options[:"disable-compression"],
76
+ tables: options[:tables],
77
+ progress: options[:progress],
73
78
  debug: options[:debug]
74
79
  }
75
80
 
@@ -92,15 +97,6 @@ module Tapsoob
92
97
  opts[:default_chunksize] = (options[:chunksize] < 10 ? 10 : options[:chunksize])
93
98
  end
94
99
 
95
- # Regex filter
96
- opts[:table_filter] = options[:filter] if options[:filter]
97
-
98
- # Table filter
99
- if options[:tables]
100
- r_tables = options[:tables].collect { |t| "^#{t}" }.join("|")
101
- opts[:table_filter] = "#{r_tables}"
102
- end
103
-
104
100
  # Exclude tables
105
101
  opts[:exclude_tables] = options[:"exclude-tables"] if options[:"exclude-tables"]
106
102
 
@@ -74,6 +74,7 @@ module Tapsoob
74
74
  def fetch_rows
75
75
  state[:chunksize] = fetch_chunksize
76
76
  ds = table.order(*order_by).limit(state[:chunksize], state[:offset])
77
+ state[:size] = table.count
77
78
  log.debug "DataStream#fetch_rows SQL -> #{ds.sql}"
78
79
  rows = Tapsoob::Utils.format_data(db, ds.all,
79
80
  :string_columns => string_columns,
@@ -87,6 +88,7 @@ module Tapsoob
87
88
  def fetch_file(dump_path)
88
89
  state[:chunksize] = fetch_chunksize
89
90
  ds = JSON.parse(File.read(File.join(dump_path, "data", "#{table_name}.json")))
91
+ state[:size] = ds["data"].size
90
92
  log.debug "DataStream#fetch_file"
91
93
  rows = {
92
94
  :table_name => ds["table_name"],
@@ -132,71 +134,34 @@ module Tapsoob
132
134
  t2 = Time.now
133
135
  elapsed_time = t2 - t1
134
136
 
135
- if opts[:type] == "file"
136
- @complete = rows[:data] == [ ]
137
- else
138
- @complete = rows == { }
139
- end
137
+ state[:offset] += (rows == {} ? 0 : rows[:data].size)
140
138
 
141
- [encoded_data, (@complete ? 0 : rows[:data].size), elapsed_time]
139
+ [encoded_data, (rows == {} ? 0 : rows[:data].size), elapsed_time]
142
140
  end
143
141
 
144
142
  def complete?
145
- @complete
143
+ state[:offset] >= state[:size]
146
144
  end
147
145
 
148
- def fetch_database
149
- params = fetch_from_database
146
+ def fetch_data_from_database(params)
150
147
  encoded_data = params[:encoded_data]
151
- json = params[:json]
152
148
 
153
- rows = parse_encoded_data(encoded_data, json[:checksum])
154
-
155
- @complete = rows == { }
149
+ rows = parse_encoded_data(encoded_data, params[:checksum])
156
150
 
157
151
  # update local state
158
- state.merge!(json[:state].merge(:chunksize => state[:chunksize]))
159
-
160
- unless @complete
161
- yield rows if block_given?
162
- state[:offset] += rows[:data].size
163
- rows[:data].size
164
- else
165
- 0
166
- end
167
- end
152
+ state.merge!(params[:state].merge(:chunksize => state[:chunksize]))
168
153
 
169
- def fetch_from_database
170
- res = nil
171
- log.debug "DataStream#fetch_from_database state -> #{state.inspect}"
172
- state[:chunksize] = Tapsoob::Utils.calculate_chunksize(state[:chunksize]) do |c|
173
- state[:chunksize] = c.to_i
174
- encoded_data = fetch.first
175
-
176
- checksum = Tapsoob::Utils.checksum(encoded_data).to_s
177
-
178
- res = {
179
- :json => { :checksum => checksum, :state => to_hash },
180
- :encoded_data => encoded_data
181
- }
182
- end
183
-
184
- res
154
+ yield rows if block_given?
155
+ (rows == {} ? 0 : rows[:data].size)
185
156
  end
186
157
 
187
- def fetch_data_in_database(params)
158
+ def fetch_data_to_database(params)
188
159
  encoded_data = params[:encoded_data]
189
160
 
190
161
  rows = parse_encoded_data(encoded_data, params[:checksum])
191
-
192
- @complete = rows[:data] == [ ]
193
-
194
- unless @complete
195
- import_rows(rows)
196
- rows[:data].size
197
- else
198
- 0
199
- end
162
+
163
+ import_rows(rows)
164
+ (rows == {} ? 0 : rows[:data].size)
200
165
  end
201
166
 
202
167
  def self.parse_json(json)
@@ -266,7 +231,6 @@ module Tapsoob
266
231
  end
267
232
 
268
233
  table.import(columns, data, :commit_every => 100)
269
- state[:offset] += rows[:data].size
270
234
  rescue Exception => ex
271
235
  case ex.message
272
236
  when /integer out of range/ then
@@ -21,6 +21,10 @@ module Tapsoob
21
21
  "op"
22
22
  end
23
23
 
24
+ def schema_only?
25
+ !!opts[:schema_only]
26
+ end
27
+
24
28
  def skip_schema?
25
29
  !!opts[:skip_schema]
26
30
  end
@@ -30,7 +34,7 @@ module Tapsoob
30
34
  end
31
35
 
32
36
  def table_filter
33
- opts[:table_filter]
37
+ opts[:tables] || []
34
38
  end
35
39
 
36
40
  def exclude_tables
@@ -38,19 +42,18 @@ module Tapsoob
38
42
  end
39
43
 
40
44
  def apply_table_filter(tables)
41
- return tables unless table_filter || exclude_tables
45
+ return tables if table_filter.empty? && exclude_tables.empty?
42
46
 
43
- re = table_filter ? Regexp.new(table_filter) : nil
44
47
  if tables.kind_of?(Hash)
45
48
  ntables = {}
46
49
  tables.each do |t, d|
47
- if !exclude_tables.include?(t.to_s) && (!re || !re.match(t.to_s).nil?)
50
+ if !exclude_tables.include?(t.to_s) && (!table_filter.empty? && table_filter.include?(t.to_s))
48
51
  ntables[t] = d
49
52
  end
50
53
  end
51
54
  ntables
52
55
  else
53
- tables.reject { |t| exclude_tables.include?(t.to_s) || (re && re.match(t.to_s).nil?) }
56
+ tables.reject { |t| exclude_tables.include?(t.to_s) }.select { |t| table_filter.include?(t.to_s) }
54
57
  end
55
58
  end
56
59
 
@@ -168,7 +171,7 @@ module Tapsoob
168
171
  end
169
172
  setup_signal_trap
170
173
  pull_partial_data if resuming?
171
- pull_data
174
+ pull_data unless schema_only?
172
175
  pull_indexes if !indexes_first? && !skip_schema?
173
176
  pull_reset_sequences
174
177
  end
@@ -182,7 +185,7 @@ module Tapsoob
182
185
  schema_data = Tapsoob::Schema.dump_table(database_url, table_name)
183
186
  log.debug "Table: #{table_name}\n#{schema_data}\n"
184
187
  output = Tapsoob::Utils.export_schema(dump_path, table_name, schema_data)
185
- puts output if output
188
+ puts output if dump_path.nil? && output
186
189
  progress.inc(1)
187
190
  end
188
191
  progress.finish
@@ -217,25 +220,56 @@ module Tapsoob
217
220
 
218
221
  def pull_data_from_table(stream, progress)
219
222
  loop do
220
- begin
221
- exit 0 if exiting?
223
+ if exiting?
224
+ store_session
225
+ exit 0
226
+ end
222
227
 
223
- size = stream.fetch_database do |rows|
224
- if dump_path.nil?
225
- puts JSON.generate(rows)
226
- else
227
- Tapsoob::Utils.export_rows(dump_path, stream.table_name, rows)
228
+ row_size = 0
229
+ chunksize = stream.state[:chunksize]
230
+
231
+ begin
232
+ chunksize = Tapsoob::Utils.calculate_chunksize(chunksize) do |c|
233
+ stream.state[:chunksize] = c.to_i
234
+ encoded_data, row_size, elapsed_time = nil
235
+ d1 = c.time_delta do
236
+ encoded_data, row_size, elapsed_time = stream.fetch
237
+ end
238
+
239
+ data = nil
240
+ d2 = c.time_delta do
241
+ data = {
242
+ :state => stream.to_hash,
243
+ :checksum => Tapsoob::Utils.checksum(encoded_data).to_s,
244
+ :encoded_data => encoded_data
245
+ }
246
+ end
247
+
248
+ stream.fetch_data_from_database(data) do |rows|
249
+ next if rows == {}
250
+
251
+ if dump_path.nil?
252
+ puts JSON.generate(rows)
253
+ else
254
+ Tapsoob::Utils.export_rows(dump_path, stream.table_name, rows)
255
+ end
228
256
  end
257
+ log.debug "row size: #{row_size}"
258
+ stream.error = false
259
+ self.stream_state = stream.to_hash
260
+
261
+ c.idle_secs = (d1 + d2)
262
+
263
+ elapsed_time
229
264
  end
230
- stream.error = false
231
- self.stream_state = stream.to_hash
232
265
  rescue Tapsoob::CorruptedData => e
233
266
  log.info "Corrupted Data Received #{e.message}, retrying..."
234
267
  stream.error = true
235
268
  next
236
269
  end
237
270
 
238
- progress.inc(size) if progress && !exiting?
271
+ progress.inc(row_size) if progress
272
+
239
273
  break if stream.complete?
240
274
  end
241
275
 
@@ -290,7 +324,7 @@ module Tapsoob
290
324
  def pull_indexes
291
325
  log.info "Receiving indexes"
292
326
 
293
- raw_idxs = Tapsoob::Utils.schema_bin(:indexes_individual, database_url)
327
+ raw_idxs = Tapsoob::Schema.indexes_individual(database_url)
294
328
  idxs = (raw_idxs && raw_idxs.length >= 2 ? JSON.parse(raw_idxs) : {})
295
329
 
296
330
  apply_table_filter(idxs).each do |table, indexes|
@@ -298,7 +332,7 @@ module Tapsoob
298
332
  progress = ProgressBar.new(table, indexes.size)
299
333
  indexes.each do |idx|
300
334
  output = Tapsoob::Utils.export_indexes(dump_path, table, idx)
301
- puts output if output
335
+ puts output if dump_path.nil? && output
302
336
  progress.inc(1)
303
337
  end
304
338
  progress.finish
@@ -309,7 +343,7 @@ module Tapsoob
309
343
  log.info "Resetting sequences"
310
344
 
311
345
  output = Tapsoob::Utils.schema_bin(:reset_db_sequences, database_url)
312
- puts output if output
346
+ puts output if dump_path.nil? && output
313
347
  end
314
348
  end
315
349
 
@@ -330,7 +364,7 @@ module Tapsoob
330
364
  end
331
365
  setup_signal_trap
332
366
  push_partial_data if resuming?
333
- push_data
367
+ push_data unless schema_only?
334
368
  push_indexes if !indexes_first? && !skip_schema?
335
369
  push_reset_sequences
336
370
  end
@@ -393,7 +427,7 @@ module Tapsoob
393
427
  log.info "#{tables.size} tables, #{format_number(record_count)} records"
394
428
 
395
429
  tables.each do |table_name, count|
396
- next unless File.exists?(File.join(dump_path, "data", "#{table_name}.json")) || JSON.parse(File.read(File.join(dump_path, "data", "#{table_name}.json")))["data"].size == 0
430
+ next unless File.exists?(File.join(dump_path, "data", "#{table_name}.json")) || File.exists?(File.join(dump_path, "data", "#{table_name}.json")) && JSON.parse(File.read(File.join(dump_path, "data", "#{table_name}.json")))["data"].size == 0
397
431
  db[table_name.to_sym].truncate if @opts[:purge]
398
432
  stream = Tapsoob::DataStream.factory(db, {
399
433
  :table_name => table_name,
@@ -404,7 +438,7 @@ module Tapsoob
404
438
  :purge => opts[:purge] || false,
405
439
  :debug => opts[:debug]
406
440
  })
407
- progress = ProgressBar.new(table_name.to_s, count)
441
+ progress = (opts[:progress] ? ProgressBar.new(table_name.to_s, count) : nil)
408
442
  push_data_from_file(stream, progress)
409
443
  end
410
444
  end
@@ -426,17 +460,17 @@ module Tapsoob
426
460
  d1 = c.time_delta do
427
461
  encoded_data, row_size, elapsed_time = stream.fetch({ :type => "file", :source => dump_path })
428
462
  end
429
- break if stream.complete?
430
463
 
431
464
  data = nil
432
465
  d2 = c.time_delta do
433
466
  data = {
434
- :state => stream.to_hash,
435
- :checksum => Tapsoob::Utils.checksum(encoded_data).to_s
467
+ :state => stream.to_hash,
468
+ :checksum => Tapsoob::Utils.checksum(encoded_data).to_s,
469
+ :encoded_data => encoded_data
436
470
  }
437
471
  end
438
472
 
439
- row_size = stream.fetch_data_in_database({ :encoded_data => encoded_data, :checksum => data[:checksum] })
473
+ stream.fetch_data_to_database(data)
440
474
  log.debug "row size: #{row_size}"
441
475
  self.stream_state = stream.to_hash
442
476
 
@@ -455,13 +489,12 @@ module Tapsoob
455
489
  end
456
490
  stream.state[:chunksize] = chunksize
457
491
 
458
- progress.inc(row_size)
492
+ progress.inc(row_size) if progress
459
493
 
460
- stream.increment(row_size)
461
494
  break if stream.complete?
462
495
  end
463
496
 
464
- progress.finish
497
+ progress.finish if progress
465
498
  completed_tables << stream.table_name.to_s
466
499
  self.stream_state = {}
467
500
  end
@@ -1,4 +1,4 @@
1
1
  # -*- encoding : utf-8 -*-
2
2
  module Tapsoob
3
- VERSION = "0.4.22".freeze
3
+ VERSION = "0.5.2".freeze
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tapsoob
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.22
4
+ version: 0.5.2
5
5
  platform: java
6
6
  authors:
7
7
  - Félix Bellanger
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-10-20 00:00:00.000000000 Z
12
+ date: 2022-02-01 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  requirement: !ruby/object:Gem::Requirement