tapsoob 0.4.23 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 02ae7de93cd8aeddb465cd48bafe2e33eec932a72a946d6dbde9c05e5d8232d1
4
- data.tar.gz: c7e6a34e96cfebb2a0cd16fdd3f6f30d84abd918b17f1a78392a08e53783762b
3
+ metadata.gz: af1c77e65193e1c885e35d4e0b7e2c28b933f915b9b2ad0d21398fe5fa7abd8c
4
+ data.tar.gz: 0a286d60df2babfe2282a708fb802d561afa9960ca7e8287030c1e9d9df3500e
5
5
  SHA512:
6
- metadata.gz: af637e1c4486956987ac94dbac997b77d48d34789333161701618588849db8ef50396b607c29f74613bba88f410a608f7ae4f4856296e4a33e1672f2540146a4
7
- data.tar.gz: 70b0f436ed34583a25a8715acd7c60dea54eac1728c7e615bc64ba4d9341ab066944806fd63bfcecf46c1b2120a10ad39e71109ea4b6225e676c9fe3fc2d03db
6
+ metadata.gz: 4fad86e7e1ec643b8d3f3a86ea06a69f2b5878cf20ae0c866f48bbbceae5af09f8d1da55533d875045b6e75470e3bf73be82512db90ef86be931616207c86a30
7
+ data.tar.gz: e4740da5ddbdbe77248707db361cb825afb9130a6db63a6dd212cad06fc0a266a11f6424503ecec6c676ba41d62f68c9c108a38c8b43109890da15f9512f83ed
@@ -12,7 +12,6 @@ module Tapsoob
12
12
  class DataStream < Thor
13
13
  desc "pull DATABASE_URL [DUMP_PATH]", "Pull data from a database."
14
14
  option :chunksize, desc: "Initial chunksize", default: 1000, type: :numeric, aliases: "-c"
15
- option :filter, desc: "Regex Filter for tables", type: :string, aliases: "-f"
16
15
  option :tables, desc: "Shortcut to filter on a list of tables", type: :array, aliases: "-t"
17
16
  option :"exclude-tables", desc: "Shortcut to exclude a list of tables", type: :array, aliases: "-e"
18
17
  option :progress, desc: "Show progress", default: true, type: :boolean, aliases: "-p"
@@ -24,11 +23,10 @@ module Tapsoob
24
23
 
25
24
  desc "push DATABASE_URL [DUMP_PATH]", "Push data to a database."
26
25
  option :chunksize, desc: "Initial chunksize", default: 1000, type: :numeric, aliases: "-c"
27
- option :filter, desc: "Regex Filter for tables", type: :string, aliases: "-f"
28
26
  option :tables, desc: "Shortcut to filter on a list of tables", type: :array, aliases: "-t"
29
27
  option :"exclude-tables", desc: "Shortcut to exclude a list of tables", type: :array, aliases: "-e"
30
28
  option :progress, desc: "Show progress", default: true, type: :boolean, aliases: "-p"
31
- option :purge, desc: "Purge data in tables prior to performing the import", default: false, type: :boolean, aliases: "-p"
29
+ option :purge, desc: "Purge data in tables prior to performing the import", default: false, type: :boolean
32
30
  option :"skip-duplicates", desc: "Remove duplicates when loading data", default: false, type: :boolean
33
31
  option :"discard-identity", desc: "Remove identity when pushing data (may result in creating duplicates)", default: false, type: :boolean
34
32
  option :debug, desc: "Enable debug messages", default: false, type: :boolean, aliases: "-d"
@@ -65,6 +63,7 @@ module Tapsoob
65
63
  # Default options
66
64
  opts = {
67
65
  progress: options[:progress],
66
+ tables: options[:tables],
68
67
  debug: options[:debug]
69
68
  }
70
69
 
@@ -78,15 +77,6 @@ module Tapsoob
78
77
  opts[:default_chunksize] = (options[:chunksize] < 10 ? 10 : options[:chunksize])
79
78
  end
80
79
 
81
- # Regex filter
82
- opts[:table_filter] = options[:filter] if options[:filter]
83
-
84
- # Table filter
85
- if options[:tables]
86
- r_tables = options[:tables].collect { |t| "^#{t}" }.join("|")
87
- opts[:table_filter] = "#{r_tables}"
88
- end
89
-
90
80
  # Exclude tables
91
81
  opts[:exclude_tables] = options[:"exclude-tables"] if options[:"exclude-tables"]
92
82
 
@@ -10,15 +10,16 @@ module Tapsoob
10
10
  module CLI
11
11
  class Root < Thor
12
12
  desc "pull DUMP_PATH DATABASE_URL", "Pull a dump from a database to a folder"
13
- option :"skip-schema", desc: "Don't transfer the schema just data", default: false, type: :boolean, aliases: "-s"
13
+ option :data, desc: "Pull the data to the database", default: true, type: :boolean, aliases: '-d'
14
+ option :schema, desc: "Pull the schema to the database", default: true, type: :boolean, aliases: "-s"
14
15
  option :"indexes-first", desc: "Transfer indexes first before data", default: false, type: :boolean, aliases: "-i"
15
16
  option :resume, desc: "Resume a Tapsoob Session from a stored file", type: :string, aliases: "-r"
16
17
  option :chunksize, desc: "Initial chunksize", default: 1000, type: :numeric, aliases: "-c"
17
18
  option :"disable-compression", desc: "Disable Compression", default: false, type: :boolean, aliases: "-g"
18
- option :filter, desc: "Regex Filter for tables", type: :string, aliases: "-f"
19
19
  option :tables, desc: "Shortcut to filter on a list of tables", type: :array, aliases: "-t"
20
20
  option :"exclude-tables", desc: "Shortcut to exclude a list of tables", type: :array, aliases: "-e"
21
- option :debug, desc: "Enable debug messages", default: false, type: :boolean, aliases: "-d"
21
+ option :progress, desc: "Show progress", default: true, type: :boolean
22
+ option :debug, desc: "Enable debug messages", default: false, type: :boolean
22
23
  def pull(dump_path, database_url)
23
24
  opts = parse_opts(options)
24
25
  Tapsoob.log.level = Logger::DEBUG if opts[:debug]
@@ -30,18 +31,19 @@ module Tapsoob
30
31
  end
31
32
 
32
33
  desc "push DUMP_PATH DATABASE_URL", "Push a previously tapsoob dump to a database"
33
- option :"skip-schema", desc: "Don't transfer the schema just data", default: false, type: :boolean, aliases: "-s"
34
+ option :data, desc: "Push the data to the database", default: true, type: :boolean, aliases: '-d'
35
+ option :schema, desc: "Push the schema to the database", default: true, type: :boolean, aliases: "-s"
34
36
  option :"indexes-first", desc: "Transfer indexes first before data", default: false, type: :boolean, aliases: "-i"
35
37
  option :resume, desc: "Resume a Tapsoob Session from a stored file", type: :string, aliases: "-r"
36
38
  option :chunksize, desc: "Initial chunksize", default: 1000, type: :numeric, aliases: "-c"
37
39
  option :"disable-compression", desc: "Disable Compression", default: false, type: :boolean, aliases: "-g"
38
- option :filter, desc: "Regex Filter for tables", type: :string, aliases: "-f"
39
40
  option :tables, desc: "Shortcut to filter on a list of tables", type: :array, aliases: "-t"
40
41
  option :"exclude-tables", desc: "Shortcut to exclude a list of tables", type: :array, aliases: "-e"
41
42
  option :purge, desc: "Purge data in tables prior to performing the import", default: false, type: :boolean, aliases: "-p"
42
43
  option :"skip-duplicates", desc: "Remove duplicates when loading data", default: false, type: :boolean
43
44
  option :"discard-identity", desc: "Remove identity when pushing data (may result in creating duplicates)", default: false, type: :boolean
44
- option :debug, desc: "Enable debug messages", default: false, type: :boolean, aliases: "-d"
45
+ option :progress, desc: "Show progress", default: true, type: :boolean
46
+ option :debug, desc: "Enable debug messages", default: false, type: :boolean
45
47
  def push(dump_path, database_url)
46
48
  opts = parse_opts(options)
47
49
  Tapsoob.log.level = Logger::DEBUG if opts[:debug]
@@ -67,9 +69,12 @@ module Tapsoob
67
69
  def parse_opts(options)
68
70
  # Default options
69
71
  opts = {
70
- skip_schema: options[:"skip-schema"],
72
+ data: options[:data],
73
+ data: options[:schema],
71
74
  indexes_first: options[:"indexes_first"],
72
75
  disable_compression: options[:"disable-compression"],
76
+ tables: options[:tables],
77
+ progress: options[:progress],
73
78
  debug: options[:debug]
74
79
  }
75
80
 
@@ -92,15 +97,6 @@ module Tapsoob
92
97
  opts[:default_chunksize] = (options[:chunksize] < 10 ? 10 : options[:chunksize])
93
98
  end
94
99
 
95
- # Regex filter
96
- opts[:table_filter] = options[:filter] if options[:filter]
97
-
98
- # Table filter
99
- if options[:tables]
100
- r_tables = options[:tables].collect { |t| "^#{t}" }.join("|")
101
- opts[:table_filter] = "#{r_tables}"
102
- end
103
-
104
100
  # Exclude tables
105
101
  opts[:exclude_tables] = options[:"exclude-tables"] if options[:"exclude-tables"]
106
102
 
@@ -74,6 +74,7 @@ module Tapsoob
74
74
  def fetch_rows
75
75
  state[:chunksize] = fetch_chunksize
76
76
  ds = table.order(*order_by).limit(state[:chunksize], state[:offset])
77
+ state[:size] = table.count
77
78
  log.debug "DataStream#fetch_rows SQL -> #{ds.sql}"
78
79
  rows = Tapsoob::Utils.format_data(db, ds.all,
79
80
  :string_columns => string_columns,
@@ -87,6 +88,7 @@ module Tapsoob
87
88
  def fetch_file(dump_path)
88
89
  state[:chunksize] = fetch_chunksize
89
90
  ds = JSON.parse(File.read(File.join(dump_path, "data", "#{table_name}.json")))
91
+ state[:size] = ds["data"].size
90
92
  log.debug "DataStream#fetch_file"
91
93
  rows = {
92
94
  :table_name => ds["table_name"],
@@ -132,71 +134,34 @@ module Tapsoob
132
134
  t2 = Time.now
133
135
  elapsed_time = t2 - t1
134
136
 
135
- if opts[:type] == "file"
136
- @complete = rows[:data] == [ ]
137
- else
138
- @complete = rows == { }
139
- end
137
+ state[:offset] += (rows == {} ? 0 : rows[:data].size)
140
138
 
141
- [encoded_data, (@complete ? 0 : rows[:data].size), elapsed_time]
139
+ [encoded_data, (rows == {} ? 0 : rows[:data].size), elapsed_time]
142
140
  end
143
141
 
144
142
  def complete?
145
- @complete
143
+ state[:offset] >= state[:size]
146
144
  end
147
145
 
148
- def fetch_database
149
- params = fetch_from_database
146
+ def fetch_data_from_database(params)
150
147
  encoded_data = params[:encoded_data]
151
- json = params[:json]
152
148
 
153
- rows = parse_encoded_data(encoded_data, json[:checksum])
154
-
155
- @complete = rows == { }
149
+ rows = parse_encoded_data(encoded_data, params[:checksum])
156
150
 
157
151
  # update local state
158
- state.merge!(json[:state].merge(:chunksize => state[:chunksize]))
159
-
160
- unless @complete
161
- yield rows if block_given?
162
- state[:offset] += rows[:data].size
163
- rows[:data].size
164
- else
165
- 0
166
- end
167
- end
152
+ state.merge!(params[:state].merge(:chunksize => state[:chunksize]))
168
153
 
169
- def fetch_from_database
170
- res = nil
171
- log.debug "DataStream#fetch_from_database state -> #{state.inspect}"
172
- state[:chunksize] = Tapsoob::Utils.calculate_chunksize(state[:chunksize]) do |c|
173
- state[:chunksize] = c.to_i
174
- encoded_data = fetch.first
175
-
176
- checksum = Tapsoob::Utils.checksum(encoded_data).to_s
177
-
178
- res = {
179
- :json => { :checksum => checksum, :state => to_hash },
180
- :encoded_data => encoded_data
181
- }
182
- end
183
-
184
- res
154
+ yield rows if block_given?
155
+ (rows == {} ? 0 : rows[:data].size)
185
156
  end
186
157
 
187
- def fetch_data_in_database(params)
158
+ def fetch_data_to_database(params)
188
159
  encoded_data = params[:encoded_data]
189
160
 
190
161
  rows = parse_encoded_data(encoded_data, params[:checksum])
191
-
192
- @complete = rows[:data] == [ ]
193
-
194
- unless @complete
195
- import_rows(rows)
196
- rows[:data].size
197
- else
198
- 0
199
- end
162
+
163
+ import_rows(rows)
164
+ (rows == {} ? 0 : rows[:data].size)
200
165
  end
201
166
 
202
167
  def self.parse_json(json)
@@ -266,7 +231,6 @@ module Tapsoob
266
231
  end
267
232
 
268
233
  table.import(columns, data, :commit_every => 100)
269
- state[:offset] += rows[:data].size
270
234
  rescue Exception => ex
271
235
  case ex.message
272
236
  when /integer out of range/ then
@@ -21,8 +21,12 @@ module Tapsoob
21
21
  "op"
22
22
  end
23
23
 
24
- def skip_schema?
25
- !!opts[:skip_schema]
24
+ def data?
25
+ opts[:data]
26
+ end
27
+
28
+ def schema?
29
+ opts[:schema]
26
30
  end
27
31
 
28
32
  def indexes_first?
@@ -30,7 +34,7 @@ module Tapsoob
30
34
  end
31
35
 
32
36
  def table_filter
33
- opts[:table_filter]
37
+ opts[:tables] || []
34
38
  end
35
39
 
36
40
  def exclude_tables
@@ -38,19 +42,18 @@ module Tapsoob
38
42
  end
39
43
 
40
44
  def apply_table_filter(tables)
41
- return tables unless table_filter || exclude_tables
45
+ return tables if table_filter.empty? && exclude_tables.empty?
42
46
 
43
- re = table_filter ? Regexp.new(table_filter) : nil
44
47
  if tables.kind_of?(Hash)
45
48
  ntables = {}
46
49
  tables.each do |t, d|
47
- if !exclude_tables.include?(t.to_s) && (!re || !re.match(t.to_s).nil?)
50
+ if !exclude_tables.include?(t.to_s) && (!table_filter.empty? && table_filter.include?(t.to_s))
48
51
  ntables[t] = d
49
52
  end
50
53
  end
51
54
  ntables
52
55
  else
53
- tables.reject { |t| exclude_tables.include?(t.to_s) || (re && re.match(t.to_s).nil?) }
56
+ tables.reject { |t| exclude_tables.include?(t.to_s) }.select { |t| table_filter.include?(t.to_s) }
54
57
  end
55
58
  end
56
59
 
@@ -163,13 +166,13 @@ module Tapsoob
163
166
  def run
164
167
  catch_errors do
165
168
  unless resuming?
166
- pull_schema if !skip_schema?
167
- pull_indexes if indexes_first? && !skip_schema?
169
+ pull_schema if schema?
170
+ pull_indexes if indexes_first? && schema?
168
171
  end
169
172
  setup_signal_trap
170
- pull_partial_data if resuming?
171
- pull_data
172
- pull_indexes if !indexes_first? && !skip_schema?
173
+ pull_partial_data if data? && resuming?
174
+ pull_data if data?
175
+ pull_indexes if !indexes_first? && schema?
173
176
  pull_reset_sequences
174
177
  end
175
178
  end
@@ -182,7 +185,7 @@ module Tapsoob
182
185
  schema_data = Tapsoob::Schema.dump_table(database_url, table_name)
183
186
  log.debug "Table: #{table_name}\n#{schema_data}\n"
184
187
  output = Tapsoob::Utils.export_schema(dump_path, table_name, schema_data)
185
- puts output if output
188
+ puts output if dump_path.nil? && output
186
189
  progress.inc(1)
187
190
  end
188
191
  progress.finish
@@ -217,25 +220,56 @@ module Tapsoob
217
220
 
218
221
  def pull_data_from_table(stream, progress)
219
222
  loop do
220
- begin
221
- exit 0 if exiting?
223
+ if exiting?
224
+ store_session
225
+ exit 0
226
+ end
222
227
 
223
- size = stream.fetch_database do |rows|
224
- if dump_path.nil?
225
- puts JSON.generate(rows)
226
- else
227
- Tapsoob::Utils.export_rows(dump_path, stream.table_name, rows)
228
+ row_size = 0
229
+ chunksize = stream.state[:chunksize]
230
+
231
+ begin
232
+ chunksize = Tapsoob::Utils.calculate_chunksize(chunksize) do |c|
233
+ stream.state[:chunksize] = c.to_i
234
+ encoded_data, row_size, elapsed_time = nil
235
+ d1 = c.time_delta do
236
+ encoded_data, row_size, elapsed_time = stream.fetch
237
+ end
238
+
239
+ data = nil
240
+ d2 = c.time_delta do
241
+ data = {
242
+ :state => stream.to_hash,
243
+ :checksum => Tapsoob::Utils.checksum(encoded_data).to_s,
244
+ :encoded_data => encoded_data
245
+ }
246
+ end
247
+
248
+ stream.fetch_data_from_database(data) do |rows|
249
+ next if rows == {}
250
+
251
+ if dump_path.nil?
252
+ puts JSON.generate(rows)
253
+ else
254
+ Tapsoob::Utils.export_rows(dump_path, stream.table_name, rows)
255
+ end
228
256
  end
257
+ log.debug "row size: #{row_size}"
258
+ stream.error = false
259
+ self.stream_state = stream.to_hash
260
+
261
+ c.idle_secs = (d1 + d2)
262
+
263
+ elapsed_time
229
264
  end
230
- stream.error = false
231
- self.stream_state = stream.to_hash
232
265
  rescue Tapsoob::CorruptedData => e
233
266
  log.info "Corrupted Data Received #{e.message}, retrying..."
234
267
  stream.error = true
235
268
  next
236
269
  end
237
270
 
238
- progress.inc(size) if progress && !exiting?
271
+ progress.inc(row_size) if progress
272
+
239
273
  break if stream.complete?
240
274
  end
241
275
 
@@ -290,7 +324,7 @@ module Tapsoob
290
324
  def pull_indexes
291
325
  log.info "Receiving indexes"
292
326
 
293
- raw_idxs = Tapsoob::Utils.schema_bin(:indexes_individual, database_url)
327
+ raw_idxs = Tapsoob::Schema.indexes_individual(database_url)
294
328
  idxs = (raw_idxs && raw_idxs.length >= 2 ? JSON.parse(raw_idxs) : {})
295
329
 
296
330
  apply_table_filter(idxs).each do |table, indexes|
@@ -298,7 +332,7 @@ module Tapsoob
298
332
  progress = ProgressBar.new(table, indexes.size)
299
333
  indexes.each do |idx|
300
334
  output = Tapsoob::Utils.export_indexes(dump_path, table, idx)
301
- puts output if output
335
+ puts output if dump_path.nil? && output
302
336
  progress.inc(1)
303
337
  end
304
338
  progress.finish
@@ -309,7 +343,7 @@ module Tapsoob
309
343
  log.info "Resetting sequences"
310
344
 
311
345
  output = Tapsoob::Utils.schema_bin(:reset_db_sequences, database_url)
312
- puts output if output
346
+ puts output if dump_path.nil? && output
313
347
  end
314
348
  end
315
349
 
@@ -325,13 +359,13 @@ module Tapsoob
325
359
  def run
326
360
  catch_errors do
327
361
  unless resuming?
328
- push_schema if !skip_schema?
329
- push_indexes if indexes_first? && !skip_schema?
362
+ push_schema if schema?
363
+ push_indexes if indexes_first? && schema?
330
364
  end
331
365
  setup_signal_trap
332
- push_partial_data if resuming?
333
- push_data
334
- push_indexes if !indexes_first? && !skip_schema?
366
+ push_partial_data if data? && resuming?
367
+ push_data if data?
368
+ push_indexes if !indexes_first? && schema?
335
369
  push_reset_sequences
336
370
  end
337
371
  end
@@ -404,7 +438,7 @@ module Tapsoob
404
438
  :purge => opts[:purge] || false,
405
439
  :debug => opts[:debug]
406
440
  })
407
- progress = ProgressBar.new(table_name.to_s, count)
441
+ progress = (opts[:progress] ? ProgressBar.new(table_name.to_s, count) : nil)
408
442
  push_data_from_file(stream, progress)
409
443
  end
410
444
  end
@@ -426,17 +460,17 @@ module Tapsoob
426
460
  d1 = c.time_delta do
427
461
  encoded_data, row_size, elapsed_time = stream.fetch({ :type => "file", :source => dump_path })
428
462
  end
429
- break if stream.complete?
430
463
 
431
464
  data = nil
432
465
  d2 = c.time_delta do
433
466
  data = {
434
- :state => stream.to_hash,
435
- :checksum => Tapsoob::Utils.checksum(encoded_data).to_s
467
+ :state => stream.to_hash,
468
+ :checksum => Tapsoob::Utils.checksum(encoded_data).to_s,
469
+ :encoded_data => encoded_data
436
470
  }
437
471
  end
438
472
 
439
- row_size = stream.fetch_data_in_database({ :encoded_data => encoded_data, :checksum => data[:checksum] })
473
+ stream.fetch_data_to_database(data)
440
474
  log.debug "row size: #{row_size}"
441
475
  self.stream_state = stream.to_hash
442
476
 
@@ -455,13 +489,12 @@ module Tapsoob
455
489
  end
456
490
  stream.state[:chunksize] = chunksize
457
491
 
458
- progress.inc(row_size)
492
+ progress.inc(row_size) if progress
459
493
 
460
- stream.increment(row_size)
461
494
  break if stream.complete?
462
495
  end
463
496
 
464
- progress.finish
497
+ progress.finish if progress
465
498
  completed_tables << stream.table_name.to_s
466
499
  self.stream_state = {}
467
500
  end
@@ -1,4 +1,4 @@
1
1
  # -*- encoding : utf-8 -*-
2
2
  module Tapsoob
3
- VERSION = "0.4.23".freeze
3
+ VERSION = "0.5.3".freeze
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tapsoob
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.23
4
+ version: 0.5.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Félix Bellanger
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-10-20 00:00:00.000000000 Z
12
+ date: 2022-02-01 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ripl
@@ -149,7 +149,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
149
149
  - !ruby/object:Gem::Version
150
150
  version: '0'
151
151
  requirements: []
152
- rubygems_version: 3.1.6
152
+ rubygems_version: 3.2.32
153
153
  signing_key:
154
154
  specification_version: 4
155
155
  summary: Simple tool to import/export databases.