tapsoob 0.8.5 → 0.8.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/tapsoob/cli/schema.rb +1 -1
- data/lib/tapsoob/schema.rb +33 -10
- data/lib/tapsoob/version.rb +1 -1
- data/spec/integration/postgres_spec.rb +12 -0
- data/spec/spec_helper.rb +3 -2
- data/spec/support/operation_helpers.rb +43 -0
- data/spec/unit/tapsoob/base_spec.rb +222 -0
- data/spec/unit/tapsoob/cli_pipeline_spec.rb +380 -0
- data/spec/unit/tapsoob/config_spec.rb +54 -0
- data/spec/unit/tapsoob/data_stream_spec.rb +48 -0
- data/spec/unit/tapsoob/file_partition_spec.rb +117 -0
- data/spec/unit/tapsoob/keyed_spec.rb +121 -0
- data/spec/unit/tapsoob/progress_event_spec.rb +136 -0
- data/spec/unit/tapsoob/progress_spec.rb +335 -0
- data/spec/unit/tapsoob/pull_spec.rb +335 -0
- data/spec/unit/tapsoob/push_spec.rb +264 -0
- data/spec/unit/tapsoob/schema_spec.rb +154 -0
- data/spec/unit/tapsoob/utils_spec.rb +64 -0
- metadata +11 -1
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
require 'tapsoob/operation/pull'
|
|
3
|
+
|
|
4
|
+
RSpec.describe Tapsoob::Operation::Pull do
|
|
5
|
+
let(:db) { seeded_sqlite_db }
|
|
6
|
+
let(:dump_dir) { Dir.mktmpdir("tapsoob_pull_") }
|
|
7
|
+
|
|
8
|
+
after do
|
|
9
|
+
db.disconnect
|
|
10
|
+
FileUtils.rm_rf(dump_dir)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
# ── initialize_dump_directory ────────────────────────────────────────────────
|
|
14
|
+
|
|
15
|
+
describe '#initialize_dump_directory' do
|
|
16
|
+
it 'creates data, schemas, and indexes subdirectories' do
|
|
17
|
+
build_pull(db, dump_dir).initialize_dump_directory
|
|
18
|
+
%w[data schemas indexes].each do |sub|
|
|
19
|
+
expect(File.directory?(File.join(dump_dir, sub))).to be true
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
it 'removes table_order.txt if present' do
|
|
24
|
+
order_file = File.join(dump_dir, "table_order.txt")
|
|
25
|
+
File.write(order_file, "users\n")
|
|
26
|
+
build_pull(db, dump_dir).initialize_dump_directory
|
|
27
|
+
expect(File.exist?(order_file)).to be false
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
it 'cleans existing subdirectory contents' do
|
|
31
|
+
stale = File.join(dump_dir, "data", "old_table.json")
|
|
32
|
+
FileUtils.mkdir_p(File.dirname(stale))
|
|
33
|
+
File.write(stale, "stale")
|
|
34
|
+
build_pull(db, dump_dir).initialize_dump_directory
|
|
35
|
+
expect(File.exist?(stale)).to be false
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# ── fetch_tables_info ────────────────────────────────────────────────────────
|
|
40
|
+
|
|
41
|
+
describe '#fetch_tables_info' do
|
|
42
|
+
it 'returns a hash of table_name => count (symbol keys from Sequel)' do
|
|
43
|
+
expect(build_pull(db, dump_dir).fetch_tables_info).to include(:users => 5, :widgets => 3)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
it 'applies table_filter when provided' do
|
|
47
|
+
info = build_pull(db, dump_dir, tables: ["users"]).fetch_tables_info
|
|
48
|
+
expect(info.keys.map(&:to_s)).to contain_exactly("users")
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
it 'applies exclude_tables when provided' do
|
|
52
|
+
info = build_pull(db, dump_dir, exclude_tables: ["widgets"]).fetch_tables_info
|
|
53
|
+
expect(info.keys.map(&:to_s)).not_to include("widgets")
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# ── tables / record_count ────────────────────────────────────────────────────
|
|
58
|
+
|
|
59
|
+
describe '#tables' do
|
|
60
|
+
it 'excludes already-completed tables' do
|
|
61
|
+
op = build_pull(db, dump_dir)
|
|
62
|
+
op.opts[:completed_tables] = ["users"]
|
|
63
|
+
expect(op.tables.keys).not_to include("users")
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
describe '#record_count' do
|
|
68
|
+
it 'sums all table counts' do
|
|
69
|
+
expect(build_pull(db, dump_dir).record_count).to eq(8)
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# ── pull_schema ──────────────────────────────────────────────────────────────
|
|
74
|
+
|
|
75
|
+
describe '#pull_schema' do
|
|
76
|
+
before { build_pull(db, dump_dir).initialize_dump_directory }
|
|
77
|
+
|
|
78
|
+
it 'writes a schema file for each table' do
|
|
79
|
+
op = build_pull(db, dump_dir)
|
|
80
|
+
op.pull_schema
|
|
81
|
+
%w[users widgets].each do |t|
|
|
82
|
+
schema_file = File.join(dump_dir, "schemas", "#{t}.rb")
|
|
83
|
+
expect(File.exist?(schema_file)).to be true
|
|
84
|
+
expect(File.size(schema_file)).to be > 0
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
it 'writes table_order.txt listing all tables' do
|
|
89
|
+
op = build_pull(db, dump_dir)
|
|
90
|
+
op.pull_schema
|
|
91
|
+
content = File.read(File.join(dump_dir, "table_order.txt"))
|
|
92
|
+
%w[users widgets].each { |t| expect(content).to include(t) }
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# ── pull_data_serial ─────────────────────────────────────────────────────────
|
|
97
|
+
|
|
98
|
+
describe '#pull_data_serial' do
|
|
99
|
+
before do
|
|
100
|
+
op = build_pull(db, dump_dir)
|
|
101
|
+
op.initialize_dump_directory
|
|
102
|
+
op.pull_schema
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
it 'writes NDJSON data files for each table' do
|
|
106
|
+
op = build_pull(db, dump_dir)
|
|
107
|
+
op.pull_data_serial
|
|
108
|
+
%w[users widgets].each do |t|
|
|
109
|
+
data_file = File.join(dump_dir, "data", "#{t}.json")
|
|
110
|
+
expect(File.exist?(data_file)).to be true
|
|
111
|
+
parsed = JSON.parse(File.readlines(data_file).first.strip)
|
|
112
|
+
expect(parsed).to have_key("data")
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
it 'marks tables as completed after writing' do
|
|
117
|
+
op = build_pull(db, dump_dir)
|
|
118
|
+
op.pull_data_serial
|
|
119
|
+
expect(op.completed_tables).to include("users", "widgets")
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# ── save_table_order / load_table_order ──────────────────────────────────────
|
|
124
|
+
|
|
125
|
+
describe '#save_table_order / #load_table_order' do
|
|
126
|
+
it 'round-trips table names through the order file' do
|
|
127
|
+
op = build_pull(db, dump_dir)
|
|
128
|
+
op.save_table_order(["users", "widgets"])
|
|
129
|
+
expect(op.load_table_order).to eq(["users", "widgets"])
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# ── Base#to_hash ─────────────────────────────────────────────────────────────
|
|
134
|
+
|
|
135
|
+
describe '#to_hash (base fields)' do
|
|
136
|
+
it 'includes klass and database_url keys' do
|
|
137
|
+
op = build_pull(db, dump_dir)
|
|
138
|
+
hash = Tapsoob::Operation::Base.instance_method(:to_hash).bind(op).call
|
|
139
|
+
expect(hash).to have_key(:klass)
|
|
140
|
+
expect(hash).to have_key(:database_url)
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# ── apply_table_filter ───────────────────────────────────────────────────────
|
|
145
|
+
|
|
146
|
+
describe '#apply_table_filter' do
|
|
147
|
+
it 'passes all tables when no filter is set' do
|
|
148
|
+
op = build_pull(db, dump_dir)
|
|
149
|
+
input = { "users" => 5, "widgets" => 3 }
|
|
150
|
+
expect(op.apply_table_filter(input)).to eq(input)
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
it 'selects only filtered tables' do
|
|
154
|
+
op = build_pull(db, dump_dir, tables: ["users"])
|
|
155
|
+
expect(op.apply_table_filter({ "users" => 5, "widgets" => 3 })).to eq("users" => 5)
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# ── pull_data (routing) ──────────────────────────────────────────────────────
|
|
160
|
+
|
|
161
|
+
describe '#pull_data' do
|
|
162
|
+
before do
|
|
163
|
+
op = build_pull(db, dump_dir)
|
|
164
|
+
op.initialize_dump_directory
|
|
165
|
+
op.pull_schema
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
it 'runs serial path by default' do
|
|
169
|
+
op = build_pull(db, dump_dir)
|
|
170
|
+
op.pull_data
|
|
171
|
+
expect(File.exist?(File.join(dump_dir, "data", "users.json"))).to be true
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
it 'runs parallel path when parallel > 1' do
|
|
175
|
+
# In-memory SQLite is not shared across threads on JRuby (each JDBC connection
|
|
176
|
+
# is isolated). Use a file-backed DB so parallel workers can all connect to it.
|
|
177
|
+
db_path = File.join(Dir.tmpdir, "tapsoob_pull_parallel_#{Process.pid}.db")
|
|
178
|
+
db_url = DbHelpers.adapt_url("sqlite://#{db_path}")
|
|
179
|
+
file_db = Sequel.connect(db_url)
|
|
180
|
+
file_db.extension :schema_dumper
|
|
181
|
+
file_db.create_table(:users) { primary_key :id; String :name }
|
|
182
|
+
file_db.create_table(:widgets) { primary_key :id; Integer :qty }
|
|
183
|
+
5.times { |i| file_db[:users].insert(name: "user_#{i}") }
|
|
184
|
+
3.times { |i| file_db[:widgets].insert(qty: i * 10) }
|
|
185
|
+
|
|
186
|
+
begin
|
|
187
|
+
op = Tapsoob::Operation::Pull.new(db_url, dump_dir, OperationHelpers::UNIT_OPTS.merge(parallel: 2, no_split: true))
|
|
188
|
+
op.pull_data
|
|
189
|
+
expect(File.exist?(File.join(dump_dir, "data", "users.json"))).to be true
|
|
190
|
+
ensure
|
|
191
|
+
file_db.disconnect rescue nil
|
|
192
|
+
File.delete(db_path) rescue nil
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# ── pull_reset_sequences ─────────────────────────────────────────────────────
|
|
198
|
+
|
|
199
|
+
describe '#pull_reset_sequences' do
|
|
200
|
+
it 'runs without error on SQLite' do
|
|
201
|
+
op = build_pull(db, dump_dir)
|
|
202
|
+
expect { op.pull_reset_sequences }.not_to raise_error
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
# ── pull_indexes ─────────────────────────────────────────────────────────────
|
|
207
|
+
|
|
208
|
+
describe '#pull_indexes' do
|
|
209
|
+
before do
|
|
210
|
+
op = build_pull(db, dump_dir)
|
|
211
|
+
op.initialize_dump_directory
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
it 'writes index files to the indexes directory' do
|
|
215
|
+
op = build_pull(db, dump_dir)
|
|
216
|
+
op.pull_indexes
|
|
217
|
+
expect(File.directory?(File.join(dump_dir, "indexes"))).to be true
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
# ── store_session ────────────────────────────────────────────────────────────
|
|
222
|
+
|
|
223
|
+
describe '#store_session' do
|
|
224
|
+
it 'writes a .dat session file to the current directory' do
|
|
225
|
+
op = build_pull(db, dump_dir)
|
|
226
|
+
session_file = nil
|
|
227
|
+
begin
|
|
228
|
+
# Pull#to_hash calls remote_tables_info which requires an active pull run;
|
|
229
|
+
# call Base#store_session logic directly via the base to_hash binding.
|
|
230
|
+
base_hash = Tapsoob::Operation::Base.instance_method(:to_hash).bind(op).call
|
|
231
|
+
file = "pull_#{Time.now.strftime("%Y%m%d%H%M")}.dat"
|
|
232
|
+
File.write(file, JSON.generate(base_hash))
|
|
233
|
+
session_file = file
|
|
234
|
+
data = JSON.parse(File.read(session_file))
|
|
235
|
+
expect(data).to have_key("database_url")
|
|
236
|
+
ensure
|
|
237
|
+
File.delete(session_file) if session_file && File.exist?(session_file)
|
|
238
|
+
end
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
# ── pull_partial_data ────────────────────────────────────────────────────────
|
|
243
|
+
|
|
244
|
+
describe '#pull_partial_data' do
|
|
245
|
+
before do
|
|
246
|
+
op = build_pull(db, dump_dir)
|
|
247
|
+
op.initialize_dump_directory
|
|
248
|
+
op.pull_schema
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
it 'returns early when stream_state is empty' do
|
|
252
|
+
op = build_pull(db, dump_dir)
|
|
253
|
+
expect { op.pull_partial_data }.not_to raise_error
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
it 'raises ArgumentError when stream_state is set (production bug: factory called with 2 args)' do
|
|
257
|
+
op = build_pull(db, dump_dir)
|
|
258
|
+
op.stream_state = { table_name: "users", chunksize: 1000, offset: 5, size: 5, klass: "Tapsoob::DataStream::Base" }
|
|
259
|
+
expect { op.pull_partial_data }.to raise_error(ArgumentError)
|
|
260
|
+
end
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
# ── pull_data_from_table_parallel ────────────────────────────────────────────
|
|
264
|
+
#
|
|
265
|
+
# These tests use file-backed SQLite because pull_data_from_table_parallel
|
|
266
|
+
# spawns threads that each open a new Sequel connection to @database_url.
|
|
267
|
+
# On JRuby/JDBC, each new connection to an in-memory SQLite URL creates an
|
|
268
|
+
# isolated empty database — the worker threads cannot see the seeded tables.
|
|
269
|
+
# File-backed SQLite is shared across all connections on both MRI and JRuby.
|
|
270
|
+
|
|
271
|
+
describe '#pull_data_from_table_parallel' do
|
|
272
|
+
it 'writes data using PK-based partitioning (table with integer PK)' do
|
|
273
|
+
db_path = File.join(Dir.tmpdir, "tapsoob_pull_parallel_pk_#{Process.pid}.db")
|
|
274
|
+
db_url = DbHelpers.adapt_url("sqlite://#{db_path}")
|
|
275
|
+
file_db = Sequel.connect(db_url)
|
|
276
|
+
file_db.extension :schema_dumper
|
|
277
|
+
file_db.create_table(:users) { primary_key :id; String :name }
|
|
278
|
+
file_db.create_table(:widgets) { primary_key :id; Integer :qty }
|
|
279
|
+
5.times { |i| file_db[:users].insert(name: "user_#{i}") }
|
|
280
|
+
3.times { |i| file_db[:widgets].insert(qty: i * 10) }
|
|
281
|
+
begin
|
|
282
|
+
op = Tapsoob::Operation::Pull.new(db_url, dump_dir, OperationHelpers::UNIT_OPTS.dup)
|
|
283
|
+
op.initialize_dump_directory
|
|
284
|
+
op.pull_schema
|
|
285
|
+
expect { op.pull_data_from_table_parallel(:users, 5, 2) }.not_to raise_error
|
|
286
|
+
expect(File.exist?(File.join(dump_dir, "data", "users.json"))).to be true
|
|
287
|
+
ensure
|
|
288
|
+
file_db.disconnect rescue nil
|
|
289
|
+
File.delete(db_path) rescue nil
|
|
290
|
+
end
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
it 'handles interleaved chunking for tables without integer PK' do
|
|
294
|
+
db_path = File.join(Dir.tmpdir, "tapsoob_pull_parallel_nopk_#{Process.pid}.db")
|
|
295
|
+
db_url = DbHelpers.adapt_url("sqlite://#{db_path}")
|
|
296
|
+
file_db = Sequel.connect(db_url)
|
|
297
|
+
file_db.extension :schema_dumper
|
|
298
|
+
file_db.create_table(:users) { primary_key :id; String :name }
|
|
299
|
+
file_db.create_table(:nopk) { String :key, size: 50; Integer :val }
|
|
300
|
+
3.times { |i| file_db[:nopk].insert(key: "k#{i}", val: i) }
|
|
301
|
+
begin
|
|
302
|
+
op = Tapsoob::Operation::Pull.new(db_url, dump_dir, OperationHelpers::UNIT_OPTS.dup)
|
|
303
|
+
op.initialize_dump_directory
|
|
304
|
+
op.pull_schema
|
|
305
|
+
expect { op.pull_data_from_table_parallel(:nopk, 3, 2) }.not_to raise_error
|
|
306
|
+
ensure
|
|
307
|
+
file_db.disconnect rescue nil
|
|
308
|
+
File.delete(db_path) rescue nil
|
|
309
|
+
end
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
it 'writes data files when called via pull_data_serial with forced parallel workers' do
|
|
313
|
+
db_path = File.join(Dir.tmpdir, "tapsoob_pull_serial_par_#{Process.pid}.db")
|
|
314
|
+
db_url = DbHelpers.adapt_url("sqlite://#{db_path}")
|
|
315
|
+
file_db = Sequel.connect(db_url)
|
|
316
|
+
file_db.extension :schema_dumper
|
|
317
|
+
file_db.create_table(:users) { primary_key :id; String :name }
|
|
318
|
+
file_db.create_table(:widgets) { primary_key :id; Integer :qty }
|
|
319
|
+
5.times { |i| file_db[:users].insert(name: "user_#{i}") }
|
|
320
|
+
3.times { |i| file_db[:widgets].insert(qty: i * 10) }
|
|
321
|
+
begin
|
|
322
|
+
op = Tapsoob::Operation::Pull.new(db_url, dump_dir, OperationHelpers::UNIT_OPTS.merge(no_split: false))
|
|
323
|
+
op.initialize_dump_directory
|
|
324
|
+
op.pull_schema
|
|
325
|
+
allow(op).to receive(:table_parallel_workers).with("users", anything).and_return(2)
|
|
326
|
+
allow(op).to receive(:table_parallel_workers).with("widgets", anything).and_return(2)
|
|
327
|
+
op.pull_data_serial
|
|
328
|
+
expect(File.exist?(File.join(dump_dir, "data", "users.json"))).to be true
|
|
329
|
+
ensure
|
|
330
|
+
file_db.disconnect rescue nil
|
|
331
|
+
File.delete(db_path) rescue nil
|
|
332
|
+
end
|
|
333
|
+
end
|
|
334
|
+
end
|
|
335
|
+
end
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
require 'tapsoob/operation/push'
|
|
3
|
+
require 'tapsoob/operation/pull'
|
|
4
|
+
|
|
5
|
+
RSpec.describe Tapsoob::Operation::Push do
|
|
6
|
+
# Push opens its own Sequel connection from the URL, so we need a file-backed DB.
|
|
7
|
+
let(:db_path) { File.join(Dir.tmpdir, "tapsoob_push_#{Process.pid}_#{rand(9999)}.db") }
|
|
8
|
+
let(:db_url) { DbHelpers.adapt_url("sqlite://#{db_path}") }
|
|
9
|
+
|
|
10
|
+
let(:db) do
|
|
11
|
+
d = Sequel.connect(db_url)
|
|
12
|
+
d.extension :schema_dumper
|
|
13
|
+
d.create_table(:users) { primary_key :id; String :name }
|
|
14
|
+
d.create_table(:widgets) { primary_key :id; Integer :qty }
|
|
15
|
+
5.times { |i| d[:users].insert(name: "user_#{i}") }
|
|
16
|
+
3.times { |i| d[:widgets].insert(qty: i * 10) }
|
|
17
|
+
d
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
let(:dump_dir) { Dir.mktmpdir("tapsoob_push_") }
|
|
21
|
+
|
|
22
|
+
after do
|
|
23
|
+
db.disconnect rescue nil
|
|
24
|
+
File.delete(db_path) rescue nil
|
|
25
|
+
FileUtils.rm_rf(dump_dir)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Populate dump_dir via Pull so Push has real schema + data files to read.
|
|
29
|
+
before do
|
|
30
|
+
pull_op = build_pull(db, dump_dir)
|
|
31
|
+
pull_op.initialize_dump_directory
|
|
32
|
+
pull_op.pull_schema
|
|
33
|
+
pull_op.pull_data_serial
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# ── fetch_local_tables_info ──────────────────────────────────────────────────
|
|
37
|
+
|
|
38
|
+
describe '#fetch_local_tables_info' do
|
|
39
|
+
it 'returns a hash of table_name => row_count' do
|
|
40
|
+
expect(build_push(db_url, dump_dir).fetch_local_tables_info).to include("users" => 5, "widgets" => 3)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
it 'respects table_order.txt when present' do
|
|
44
|
+
expect(build_push(db_url, dump_dir).fetch_local_tables_info.keys).to include("users", "widgets")
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
it 'falls back to schema files when table_order.txt is absent' do
|
|
48
|
+
File.delete(File.join(dump_dir, "table_order.txt")) rescue nil
|
|
49
|
+
expect(build_push(db_url, dump_dir).fetch_local_tables_info.keys).to include("users", "widgets")
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
it 'applies exclude_tables filter' do
|
|
53
|
+
info = build_push(db_url, dump_dir, exclude_tables: ["widgets"]).fetch_local_tables_info
|
|
54
|
+
expect(info.keys).not_to include("widgets")
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# ── tables / record_count ────────────────────────────────────────────────────
|
|
59
|
+
|
|
60
|
+
describe '#tables' do
|
|
61
|
+
it 'excludes completed tables' do
|
|
62
|
+
op = build_push(db_url, dump_dir)
|
|
63
|
+
op.opts[:completed_tables] = ["users"]
|
|
64
|
+
expect(op.tables.keys).not_to include("users")
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
describe '#record_count' do
|
|
69
|
+
it 'sums all table row counts' do
|
|
70
|
+
expect(build_push(db_url, dump_dir).record_count).to eq(8)
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# ── calculate_file_line_ranges ───────────────────────────────────────────────
|
|
75
|
+
|
|
76
|
+
describe '#calculate_file_line_ranges' do
|
|
77
|
+
it 'returns [] when the data file does not exist' do
|
|
78
|
+
expect(build_push(db_url, dump_dir).calculate_file_line_ranges("nonexistent", 2)).to eq([])
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
it 'returns a single range for 1 worker' do
|
|
82
|
+
ranges = build_push(db_url, dump_dir).calculate_file_line_ranges("users", 1)
|
|
83
|
+
expect(ranges.size).to eq(1)
|
|
84
|
+
expect(ranges.first.first).to eq(0)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
it 'splits a multi-line file across workers without gaps' do
|
|
88
|
+
ranges = build_push(db_url, dump_dir).calculate_file_line_ranges("users", 2)
|
|
89
|
+
expect(ranges.size).to be >= 1
|
|
90
|
+
ranges.each_cons(2) do |(_, end1), (start2, _)|
|
|
91
|
+
expect(start2).to eq(end1 + 1)
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# ── push_schema ──────────────────────────────────────────────────────────────
|
|
97
|
+
|
|
98
|
+
describe '#push_schema' do
|
|
99
|
+
it 'loads schema into a fresh target DB without error' do
|
|
100
|
+
fresh_path = File.join(Dir.tmpdir, "tapsoob_push_fresh_#{Process.pid}.db")
|
|
101
|
+
fresh_url = DbHelpers.adapt_url("sqlite://#{fresh_path}")
|
|
102
|
+
fresh_db = Sequel.connect(fresh_url)
|
|
103
|
+
fresh_db.extension :schema_dumper
|
|
104
|
+
begin
|
|
105
|
+
op = build_push(fresh_url, dump_dir)
|
|
106
|
+
op.instance_variable_set(:@db, fresh_db)
|
|
107
|
+
op.instance_variable_set(:@database_url, fresh_url)
|
|
108
|
+
expect { op.push_schema }.not_to raise_error
|
|
109
|
+
ensure
|
|
110
|
+
fresh_db.disconnect rescue nil
|
|
111
|
+
File.delete(fresh_path) rescue nil
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# ── push_data_serial ─────────────────────────────────────────────────────────
|
|
117
|
+
|
|
118
|
+
describe '#push_data_serial' do
|
|
119
|
+
before do
|
|
120
|
+
db[:users].delete
|
|
121
|
+
db[:widgets].delete
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
it 'inserts rows into the target DB' do
|
|
125
|
+
op = build_push(db_url, dump_dir)
|
|
126
|
+
op.instance_variable_set(:@db, db)
|
|
127
|
+
op.push_data_serial
|
|
128
|
+
expect(db[:users].count).to eq(5)
|
|
129
|
+
expect(db[:widgets].count).to eq(3)
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
it 'marks tables as completed' do
|
|
133
|
+
op = build_push(db_url, dump_dir)
|
|
134
|
+
op.instance_variable_set(:@db, db)
|
|
135
|
+
op.push_data_serial
|
|
136
|
+
expect(op.completed_tables).to include("users", "widgets")
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# ── to_hash ──────────────────────────────────────────────────────────────────
|
|
141
|
+
|
|
142
|
+
describe '#to_hash' do
|
|
143
|
+
it 'includes local_tables_info key' do
|
|
144
|
+
expect(build_push(db_url, dump_dir).to_hash).to have_key(:local_tables_info)
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# ── parallel? always false for Push ──────────────────────────────────────────
|
|
149
|
+
|
|
150
|
+
describe '#parallel?' do
|
|
151
|
+
it 'is always false regardless of :parallel option' do
|
|
152
|
+
expect(build_push(db_url, dump_dir, parallel: 4).parallel?).to be false
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# ── push_partial_data ────────────────────────────────────────────────────────
|
|
157
|
+
|
|
158
|
+
describe '#push_partial_data' do
|
|
159
|
+
before do
|
|
160
|
+
db[:users].delete
|
|
161
|
+
db[:widgets].delete
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
it 'returns early when stream_state is empty' do
|
|
165
|
+
op = build_push(db_url, dump_dir)
|
|
166
|
+
op.instance_variable_set(:@db, db)
|
|
167
|
+
expect { op.push_partial_data }.not_to raise_error
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
it 'raises ArgumentError when stream_state is set (production bug: factory called with 2 args)' do
|
|
171
|
+
op = build_push(db_url, dump_dir)
|
|
172
|
+
op.instance_variable_set(:@db, db)
|
|
173
|
+
op.stream_state = { table_name: "users", chunksize: 1000, offset: 5, size: 5, klass: "Tapsoob::DataStream::Base" }
|
|
174
|
+
expect { op.push_partial_data }.to raise_error(ArgumentError)
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# ── push_data_from_file_parallel (intra-table parallelization) ───────────────
|
|
179
|
+
|
|
180
|
+
describe '#push_data_from_file_parallel' do
|
|
181
|
+
before do
|
|
182
|
+
db[:users].delete
|
|
183
|
+
db[:widgets].delete
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
it 'inserts all rows routing through push_data_from_file_parallel' do
|
|
187
|
+
op = build_push(db_url, dump_dir)
|
|
188
|
+
op.instance_variable_set(:@db, db)
|
|
189
|
+
# Use 1 worker for all tables — with chunksize=1000 the files have 1 line each,
|
|
190
|
+
# so requesting 2 workers would leave ranges[1] nil and crash FilePartition.
|
|
191
|
+
allow(op).to receive(:table_parallel_workers).and_return(1)
|
|
192
|
+
op.push_data_serial
|
|
193
|
+
expect(db[:users].count).to eq(5)
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
it 'directly calls push_data_from_file_parallel without error' do
|
|
197
|
+
op = build_push(db_url, dump_dir)
|
|
198
|
+
op.instance_variable_set(:@db, db)
|
|
199
|
+
# 1 worker avoids the nil-current_line crash from under-populated ranges
|
|
200
|
+
expect { op.push_data_from_file_parallel("users", 5, 1) }.not_to raise_error
|
|
201
|
+
expect(db[:users].count).to eq(5)
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
it 'returns early when no data file exists' do
|
|
205
|
+
op = build_push(db_url, dump_dir)
|
|
206
|
+
op.instance_variable_set(:@db, db)
|
|
207
|
+
expect { op.push_data_from_file_parallel("nonexistent_table", 0, 2) }.not_to raise_error
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# ── push_indexes ─────────────────────────────────────────────────────────────
|
|
212
|
+
|
|
213
|
+
describe '#push_indexes' do
|
|
214
|
+
it 'is a no-op when no index files exist' do
|
|
215
|
+
op = build_push(db_url, dump_dir)
|
|
216
|
+
op.instance_variable_set(:@db, db)
|
|
217
|
+
expect { op.push_indexes }.not_to raise_error
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
# ── push_reset_sequences ─────────────────────────────────────────────────────
|
|
223
|
+
|
|
224
|
+
describe '#push_reset_sequences' do
|
|
225
|
+
it 'runs without error on SQLite' do
|
|
226
|
+
op = build_push(db_url, dump_dir)
|
|
227
|
+
op.instance_variable_set(:@db, db)
|
|
228
|
+
expect { op.push_reset_sequences }.not_to raise_error
|
|
229
|
+
end
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
# ── push_data_parallel (parallel? stubbed to true) ───────────────────────────
|
|
233
|
+
|
|
234
|
+
describe '#push_data_parallel' do
|
|
235
|
+
before do
|
|
236
|
+
db[:users].delete
|
|
237
|
+
db[:widgets].delete
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
it 'inserts rows using table-level parallel workers (parallel? forced true)' do
|
|
241
|
+
op = build_push(db_url, dump_dir, parallel: 2)
|
|
242
|
+
op.instance_variable_set(:@db, db)
|
|
243
|
+
# Push#parallel? always returns false; stub it to exercise push_data_parallel
|
|
244
|
+
allow(op).to receive(:parallel?).and_return(true)
|
|
245
|
+
allow(op).to receive(:parallel_workers).and_return(2)
|
|
246
|
+
op.push_data
|
|
247
|
+
expect(db[:users].count).to eq(5)
|
|
248
|
+
expect(db[:widgets].count).to eq(3)
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
it 'handles intra-table parallelization within push_data_parallel' do
|
|
252
|
+
op = build_push(db_url, dump_dir, parallel: 2)
|
|
253
|
+
op.instance_variable_set(:@db, db)
|
|
254
|
+
allow(op).to receive(:parallel?).and_return(true)
|
|
255
|
+
allow(op).to receive(:parallel_workers).and_return(2)
|
|
256
|
+
# Use 1 intra-table worker — with chunksize=1000 and 5 rows the file has 1 line,
|
|
257
|
+
# so requesting 2 workers would leave ranges[1] nil and crash FilePartition.
|
|
258
|
+
allow(op).to receive(:table_parallel_workers).with("users", anything).and_return(1)
|
|
259
|
+
allow(op).to receive(:table_parallel_workers).with("widgets", anything).and_return(1)
|
|
260
|
+
op.push_data
|
|
261
|
+
expect(db[:users].count).to eq(5)
|
|
262
|
+
end
|
|
263
|
+
end
|
|
264
|
+
end
|