tapsoob 0.8.5 → 0.8.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/tapsoob/cli/schema.rb +1 -1
- data/lib/tapsoob/schema.rb +33 -10
- data/lib/tapsoob/version.rb +1 -1
- data/spec/integration/postgres_spec.rb +12 -0
- data/spec/spec_helper.rb +3 -2
- data/spec/support/operation_helpers.rb +43 -0
- data/spec/unit/tapsoob/base_spec.rb +222 -0
- data/spec/unit/tapsoob/cli_pipeline_spec.rb +380 -0
- data/spec/unit/tapsoob/config_spec.rb +54 -0
- data/spec/unit/tapsoob/data_stream_spec.rb +48 -0
- data/spec/unit/tapsoob/file_partition_spec.rb +117 -0
- data/spec/unit/tapsoob/keyed_spec.rb +121 -0
- data/spec/unit/tapsoob/progress_event_spec.rb +136 -0
- data/spec/unit/tapsoob/progress_spec.rb +335 -0
- data/spec/unit/tapsoob/pull_spec.rb +335 -0
- data/spec/unit/tapsoob/push_spec.rb +264 -0
- data/spec/unit/tapsoob/schema_spec.rb +154 -0
- data/spec/unit/tapsoob/utils_spec.rb +64 -0
- metadata +11 -1
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
require 'tapsoob/cli'
|
|
3
|
+
|
|
4
|
+
# CLI pipeline specs — invoke Thor commands the same way clients do in rake tasks:
|
|
5
|
+
#
|
|
6
|
+
# tapsoob schema dump <src_url>
|
|
7
|
+
# tapsoob schema load <dst_url>
|
|
8
|
+
# tapsoob schema indexes <src_url>
|
|
9
|
+
# tapsoob schema load_indexes <dst_url>
|
|
10
|
+
# tapsoob schema foreign_keys <src_url>
|
|
11
|
+
# tapsoob schema load_foreign_keys <dst_url>
|
|
12
|
+
# tapsoob data pull <src_url> [dump_path]
|
|
13
|
+
# tapsoob data push <dst_url> [dump_path]
|
|
14
|
+
# tapsoob schema reset_db_sequences <dst_url>
|
|
15
|
+
# tapsoob pull <dump_path> <src_url>
|
|
16
|
+
# tapsoob push <dump_path> <dst_url>
|
|
17
|
+
# tapsoob version
|
|
18
|
+
|
|
19
|
+
RSpec.describe "CLI pipelines" do
|
|
20
|
+
# ── helpers ──────────────────────────────────────────────────────────────────
|
|
21
|
+
|
|
22
|
+
def make_db(path)
|
|
23
|
+
url = DbHelpers.adapt_url("sqlite://#{path}")
|
|
24
|
+
db = Sequel.connect(url)
|
|
25
|
+
db.extension :schema_dumper
|
|
26
|
+
db
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def seed_db(db)
|
|
30
|
+
db.create_table(:users) { primary_key :id; String :name, null: false }
|
|
31
|
+
db.create_table(:widgets) { primary_key :id; Integer :qty, default: 0 }
|
|
32
|
+
3.times { |i| db[:users].insert(name: "user_#{i}") }
|
|
33
|
+
2.times { |i| db[:widgets].insert(qty: i * 5) }
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Invoke a Thor subclass with argv, swallowing stdout/stderr output.
|
|
37
|
+
def run_cli(klass, argv)
|
|
38
|
+
klass.start(argv, debug: false)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# ── shared setup ─────────────────────────────────────────────────────────────
|
|
42
|
+
|
|
43
|
+
let(:tmp) { Dir.mktmpdir("tapsoob_cli_") }
|
|
44
|
+
let(:src_path) { File.join(tmp, "src.db") }
|
|
45
|
+
let(:dst_path) { File.join(tmp, "dst.db") }
|
|
46
|
+
let(:dump_dir) { File.join(tmp, "dump") }
|
|
47
|
+
let(:src_url) { DbHelpers.adapt_url("sqlite://#{src_path}") }
|
|
48
|
+
let(:dst_url) { DbHelpers.adapt_url("sqlite://#{dst_path}") }
|
|
49
|
+
|
|
50
|
+
let(:src_db) do
|
|
51
|
+
db = make_db(src_path)
|
|
52
|
+
seed_db(db)
|
|
53
|
+
db
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Ensure src_db is created before tests run and everything is cleaned up after.
|
|
57
|
+
before { src_db }
|
|
58
|
+
after do
|
|
59
|
+
src_db.disconnect rescue nil
|
|
60
|
+
FileUtils.rm_rf(tmp)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# ── tapsoob version ───────────────────────────────────────────────────────────
|
|
64
|
+
|
|
65
|
+
describe "tapsoob version" do
|
|
66
|
+
it 'prints the version string' do
|
|
67
|
+
expect { run_cli(Tapsoob::CLI::Root, ["version"]) }.to output(/\d+\.\d+/).to_stdout
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# ── schema dump | schema load pipeline ───────────────────────────────────────
|
|
72
|
+
|
|
73
|
+
describe "schema dump → load pipeline" do
|
|
74
|
+
it 'dumps schema to stdout and loads it into a fresh DB' do
|
|
75
|
+
schema_text = capture_stdout { run_cli(Tapsoob::CLI::Schema, ["dump", src_url]) }
|
|
76
|
+
expect(schema_text).to include("users", "widgets")
|
|
77
|
+
|
|
78
|
+
schema_file = File.join(tmp, "schema.rb")
|
|
79
|
+
File.write(schema_file, schema_text)
|
|
80
|
+
|
|
81
|
+
dst_db = make_db(dst_path)
|
|
82
|
+
begin
|
|
83
|
+
run_cli(Tapsoob::CLI::Schema, ["load", dst_url, schema_file])
|
|
84
|
+
expect(dst_db.table_exists?(:users)).to be true
|
|
85
|
+
expect(dst_db.table_exists?(:widgets)).to be true
|
|
86
|
+
ensure
|
|
87
|
+
dst_db.disconnect
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
it 'schema load is idempotent when destination is fresh' do
|
|
92
|
+
schema_text = capture_stdout { run_cli(Tapsoob::CLI::Schema, ["dump", src_url]) }
|
|
93
|
+
schema_file = File.join(tmp, "schema.rb")
|
|
94
|
+
File.write(schema_file, schema_text)
|
|
95
|
+
|
|
96
|
+
dst_db = make_db(dst_path)
|
|
97
|
+
begin
|
|
98
|
+
run_cli(Tapsoob::CLI::Schema, ["load", dst_url, schema_file])
|
|
99
|
+
expect(dst_db.table_exists?(:users)).to be true
|
|
100
|
+
expect(dst_db.table_exists?(:widgets)).to be true
|
|
101
|
+
ensure
|
|
102
|
+
dst_db.disconnect
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# ── indexes pipeline ──────────────────────────────────────────────────────────
|
|
108
|
+
|
|
109
|
+
describe "schema indexes → load_indexes pipeline" do
|
|
110
|
+
it 'dumps indexes and loads them without error' do
|
|
111
|
+
index_text = capture_stdout { run_cli(Tapsoob::CLI::Schema, ["indexes", src_url]) }
|
|
112
|
+
index_file = File.join(tmp, "indexes.rb")
|
|
113
|
+
File.write(index_file, index_text)
|
|
114
|
+
|
|
115
|
+
# Load schema first so destination tables exist
|
|
116
|
+
schema_text = capture_stdout { run_cli(Tapsoob::CLI::Schema, ["dump", src_url]) }
|
|
117
|
+
schema_file = File.join(tmp, "schema.rb")
|
|
118
|
+
File.write(schema_file, schema_text)
|
|
119
|
+
run_cli(Tapsoob::CLI::Schema, ["load", dst_url, schema_file])
|
|
120
|
+
|
|
121
|
+
expect { run_cli(Tapsoob::CLI::Schema, ["load_indexes", dst_url, index_file]) }.not_to raise_error
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# ── foreign_keys pipeline ─────────────────────────────────────────────────────
|
|
126
|
+
|
|
127
|
+
describe "schema foreign_keys → load_foreign_keys pipeline" do
|
|
128
|
+
it 'dumps foreign keys and loads them without error' do
|
|
129
|
+
fk_text = capture_stdout { run_cli(Tapsoob::CLI::Schema, ["foreign_keys", src_url]) }
|
|
130
|
+
fk_file = File.join(tmp, "fk.rb")
|
|
131
|
+
File.write(fk_file, fk_text)
|
|
132
|
+
|
|
133
|
+
schema_text = capture_stdout { run_cli(Tapsoob::CLI::Schema, ["dump", src_url]) }
|
|
134
|
+
schema_file = File.join(tmp, "schema.rb")
|
|
135
|
+
File.write(schema_file, schema_text)
|
|
136
|
+
run_cli(Tapsoob::CLI::Schema, ["load", dst_url, schema_file])
|
|
137
|
+
|
|
138
|
+
expect { run_cli(Tapsoob::CLI::Schema, ["load_foreign_keys", dst_url, fk_file]) }.not_to raise_error
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# ── reset_db_sequences ────────────────────────────────────────────────────────
|
|
143
|
+
|
|
144
|
+
describe "schema reset_db_sequences" do
|
|
145
|
+
it 'resets sequences on the destination DB without error' do
|
|
146
|
+
schema_text = capture_stdout { run_cli(Tapsoob::CLI::Schema, ["dump", src_url]) }
|
|
147
|
+
schema_file = File.join(tmp, "schema.rb")
|
|
148
|
+
File.write(schema_file, schema_text)
|
|
149
|
+
run_cli(Tapsoob::CLI::Schema, ["load", dst_url, schema_file])
|
|
150
|
+
|
|
151
|
+
expect { run_cli(Tapsoob::CLI::Schema, ["reset_db_sequences", dst_url]) }.not_to raise_error
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# ── data pull → push pipeline (dump_path mode) ───────────────────────────────
|
|
156
|
+
|
|
157
|
+
describe "data pull → push pipeline" do
|
|
158
|
+
before do
|
|
159
|
+
%w[data schemas indexes].each { |d| FileUtils.mkdir_p(File.join(dump_dir, d)) }
|
|
160
|
+
ordered = src_db.send(:sort_dumped_tables, src_db.tables, {}).map(&:to_s)
|
|
161
|
+
File.write(File.join(dump_dir, "table_order.txt"), ordered.join("\n") + "\n")
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
it 'pulls data into dump_dir and pushes it to destination' do
|
|
165
|
+
# Load schema into dst first
|
|
166
|
+
schema_text = capture_stdout { run_cli(Tapsoob::CLI::Schema, ["dump", src_url]) }
|
|
167
|
+
schema_file = File.join(tmp, "schema.rb")
|
|
168
|
+
File.write(schema_file, schema_text)
|
|
169
|
+
run_cli(Tapsoob::CLI::Schema, ["load", dst_url, schema_file])
|
|
170
|
+
|
|
171
|
+
run_cli(Tapsoob::CLI::DataStream, ["pull", src_url, dump_dir, "--progress=false", "--chunksize=1000"])
|
|
172
|
+
run_cli(Tapsoob::CLI::DataStream, ["push", dst_url, dump_dir, "--progress=false", "--chunksize=1000"])
|
|
173
|
+
|
|
174
|
+
dst_db = make_db(dst_path)
|
|
175
|
+
begin
|
|
176
|
+
expect(dst_db[:users].count).to eq(3)
|
|
177
|
+
expect(dst_db[:widgets].count).to eq(2)
|
|
178
|
+
ensure
|
|
179
|
+
dst_db.disconnect
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
# ── tapsoob pull → push (Root command, full round-trip) ──────────────────────
|
|
185
|
+
|
|
186
|
+
describe "tapsoob pull → push (Root commands)" do
|
|
187
|
+
it 'performs a full schema+data round-trip via pull/push commands' do
|
|
188
|
+
run_cli(Tapsoob::CLI::Root, ["pull", dump_dir, src_url,
|
|
189
|
+
"--progress=false", "--chunksize=1000", "--no-split"])
|
|
190
|
+
run_cli(Tapsoob::CLI::Root, ["push", dump_dir, dst_url,
|
|
191
|
+
"--progress=false", "--chunksize=1000"])
|
|
192
|
+
|
|
193
|
+
dst_db = make_db(dst_path)
|
|
194
|
+
begin
|
|
195
|
+
expect(dst_db.table_exists?(:users)).to be true
|
|
196
|
+
expect(dst_db[:users].count).to eq(3)
|
|
197
|
+
expect(dst_db[:widgets].count).to eq(2)
|
|
198
|
+
ensure
|
|
199
|
+
dst_db.disconnect
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
# ── schema dump_table ─────────────────────────────────────────────────────────
|
|
205
|
+
|
|
206
|
+
describe "schema dump_table" do
|
|
207
|
+
it 'dumps a single table schema to stdout' do
|
|
208
|
+
output = capture_stdout { run_cli(Tapsoob::CLI::Schema, ["dump_table", src_url, "users"]) }
|
|
209
|
+
expect(output).to include("users")
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
# ── data push --purge flag ────────────────────────────────────────────────────
|
|
214
|
+
|
|
215
|
+
describe "data push --purge" do
|
|
216
|
+
before do
|
|
217
|
+
%w[data schemas indexes].each { |d| FileUtils.mkdir_p(File.join(dump_dir, d)) }
|
|
218
|
+
ordered = src_db.send(:sort_dumped_tables, src_db.tables, {}).map(&:to_s)
|
|
219
|
+
File.write(File.join(dump_dir, "table_order.txt"), ordered.join("\n") + "\n")
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
it 'truncates destination tables before inserting' do
|
|
223
|
+
schema_text = capture_stdout { run_cli(Tapsoob::CLI::Schema, ["dump", src_url]) }
|
|
224
|
+
schema_file = File.join(tmp, "schema.rb")
|
|
225
|
+
File.write(schema_file, schema_text)
|
|
226
|
+
run_cli(Tapsoob::CLI::Schema, ["load", dst_url, schema_file])
|
|
227
|
+
|
|
228
|
+
run_cli(Tapsoob::CLI::DataStream, ["pull", src_url, dump_dir, "--progress=false"])
|
|
229
|
+
run_cli(Tapsoob::CLI::DataStream, ["push", dst_url, dump_dir, "--progress=false", "--purge"])
|
|
230
|
+
|
|
231
|
+
dst_db = make_db(dst_path)
|
|
232
|
+
begin
|
|
233
|
+
expect(dst_db[:users].count).to eq(3)
|
|
234
|
+
ensure
|
|
235
|
+
dst_db.disconnect
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
# ── schema indexes_individual ─────────────────────────────────────────────────
|
|
241
|
+
|
|
242
|
+
describe "schema indexes_individual" do
|
|
243
|
+
it 'dumps per-table index JSON without error' do
|
|
244
|
+
expect {
|
|
245
|
+
capture_stdout { run_cli(Tapsoob::CLI::Schema, ["indexes_individual", src_url]) }
|
|
246
|
+
}.not_to raise_error
|
|
247
|
+
end
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
# ── schema load via STDIN ────────────────────────────────────────────────────
|
|
251
|
+
|
|
252
|
+
describe "schema load via STDIN" do
|
|
253
|
+
it 'reads schema from STDIN when no filename is given' do
|
|
254
|
+
schema_text = capture_stdout { run_cli(Tapsoob::CLI::Schema, ["dump", src_url]) }
|
|
255
|
+
|
|
256
|
+
stub_const("STDIN", StringIO.new(schema_text))
|
|
257
|
+
|
|
258
|
+
dst_db = make_db(dst_path)
|
|
259
|
+
begin
|
|
260
|
+
run_cli(Tapsoob::CLI::Schema, ["load", dst_url])
|
|
261
|
+
expect(dst_db.table_exists?(:users)).to be true
|
|
262
|
+
ensure
|
|
263
|
+
dst_db.disconnect
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
# ── schema load_foreign_keys via STDIN ───────────────────────────────────────
|
|
269
|
+
|
|
270
|
+
describe "schema load_foreign_keys via STDIN" do
|
|
271
|
+
it 'reads foreign keys from STDIN when no filename is given' do
|
|
272
|
+
fk_text = capture_stdout { run_cli(Tapsoob::CLI::Schema, ["foreign_keys", src_url]) }
|
|
273
|
+
|
|
274
|
+
schema_text = capture_stdout { run_cli(Tapsoob::CLI::Schema, ["dump", src_url]) }
|
|
275
|
+
schema_file = File.join(tmp, "schema.rb")
|
|
276
|
+
File.write(schema_file, schema_text)
|
|
277
|
+
run_cli(Tapsoob::CLI::Schema, ["load", dst_url, schema_file])
|
|
278
|
+
|
|
279
|
+
stub_const("STDIN", StringIO.new(fk_text))
|
|
280
|
+
expect { run_cli(Tapsoob::CLI::Schema, ["load_foreign_keys", dst_url]) }.not_to raise_error
|
|
281
|
+
end
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
# ── schema load_indexes via STDIN ────────────────────────────────────────────
|
|
285
|
+
|
|
286
|
+
describe "schema load_indexes via STDIN" do
|
|
287
|
+
it 'reads indexes from STDIN when no filename is given' do
|
|
288
|
+
index_text = capture_stdout { run_cli(Tapsoob::CLI::Schema, ["indexes", src_url]) }
|
|
289
|
+
|
|
290
|
+
schema_text = capture_stdout { run_cli(Tapsoob::CLI::Schema, ["dump", src_url]) }
|
|
291
|
+
schema_file = File.join(tmp, "schema.rb")
|
|
292
|
+
File.write(schema_file, schema_text)
|
|
293
|
+
run_cli(Tapsoob::CLI::Schema, ["load", dst_url, schema_file])
|
|
294
|
+
|
|
295
|
+
stub_const("STDIN", StringIO.new(index_text))
|
|
296
|
+
expect { run_cli(Tapsoob::CLI::Schema, ["load_indexes", dst_url]) }.not_to raise_error
|
|
297
|
+
end
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
# ── root pull --resume with missing file (parse_opts error path) ─────────────
|
|
301
|
+
|
|
302
|
+
describe "root pull --resume with non-existent file" do
|
|
303
|
+
it 'raises when the resume file does not exist' do
|
|
304
|
+
expect {
|
|
305
|
+
run_cli(Tapsoob::CLI::Root, ["pull", dump_dir, src_url,
|
|
306
|
+
"--resume=/tmp/nonexistent_tapsoob_#{Process.pid}.dat",
|
|
307
|
+
"--progress=false"])
|
|
308
|
+
}.to raise_error(RuntimeError, /Unable to find resume file/)
|
|
309
|
+
end
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
# ── root pull --config option ─────────────────────────────────────────────────
|
|
313
|
+
|
|
314
|
+
describe "root pull --config with a YAML config file" do
|
|
315
|
+
it 'loads options from a config YAML file' do
|
|
316
|
+
config_file = File.join(tmp, "tapsoob.yml")
|
|
317
|
+
File.write(config_file, { "progress" => false }.to_yaml)
|
|
318
|
+
|
|
319
|
+
expect {
|
|
320
|
+
run_cli(Tapsoob::CLI::Root, ["pull", dump_dir, src_url,
|
|
321
|
+
"--config=#{config_file}", "--progress=false", "--chunksize=1000", "--no-split"])
|
|
322
|
+
}.not_to raise_error
|
|
323
|
+
end
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
# ── data push via STDIN ───────────────────────────────────────────────────────
|
|
327
|
+
|
|
328
|
+
describe "data push via STDIN" do
|
|
329
|
+
it 'imports rows from STDIN JSON when no dump_path is given' do
|
|
330
|
+
# Set up schema in destination first
|
|
331
|
+
schema_text = capture_stdout { run_cli(Tapsoob::CLI::Schema, ["dump", src_url]) }
|
|
332
|
+
schema_file = File.join(tmp, "schema.rb")
|
|
333
|
+
File.write(schema_file, schema_text)
|
|
334
|
+
run_cli(Tapsoob::CLI::Schema, ["load", dst_url, schema_file])
|
|
335
|
+
|
|
336
|
+
# Generate valid NDJSON for the users table
|
|
337
|
+
ndjson_line = JSON.generate({
|
|
338
|
+
table_name: "users",
|
|
339
|
+
header: ["id", "name"],
|
|
340
|
+
types: ["integer", "string"],
|
|
341
|
+
data: [[100, "stdin_user"]]
|
|
342
|
+
})
|
|
343
|
+
|
|
344
|
+
fake_stdin = StringIO.new(ndjson_line + "\n")
|
|
345
|
+
stub_const("STDIN", fake_stdin)
|
|
346
|
+
|
|
347
|
+
run_cli(Tapsoob::CLI::DataStream, ["push", dst_url, "--progress=false"])
|
|
348
|
+
|
|
349
|
+
dst_db = make_db(dst_path)
|
|
350
|
+
begin
|
|
351
|
+
expect(dst_db[:users].where(id: 100).first).not_to be_nil
|
|
352
|
+
ensure
|
|
353
|
+
dst_db.disconnect
|
|
354
|
+
end
|
|
355
|
+
end
|
|
356
|
+
end
|
|
357
|
+
|
|
358
|
+
# ── data pull --parallel warning ──────────────────────────────────────────────
|
|
359
|
+
|
|
360
|
+
describe "data pull parallel-to-STDOUT warning" do
|
|
361
|
+
it 'falls back to serial (no error) when parallel > 1 and no dump_path' do
|
|
362
|
+
# The code emits a warning to STDERR and resets parallel to 1, then runs serial pull.
|
|
363
|
+
expect {
|
|
364
|
+
capture_stdout { run_cli(Tapsoob::CLI::DataStream, ["pull", src_url, "--parallel=2", "--progress=false"]) }
|
|
365
|
+
}.not_to raise_error
|
|
366
|
+
end
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
# ── helper ───────────────────────────────────────────────────────────────────
|
|
370
|
+
|
|
371
|
+
def capture_stdout(&block)
|
|
372
|
+
old = $stdout
|
|
373
|
+
$stdout = StringIO.new
|
|
374
|
+
block.call
|
|
375
|
+
$stdout.string
|
|
376
|
+
ensure
|
|
377
|
+
$stdout = old
|
|
378
|
+
end
|
|
379
|
+
|
|
380
|
+
end
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
require 'tapsoob/config'
|
|
3
|
+
|
|
4
|
+
RSpec.describe Tapsoob::Config do
|
|
5
|
+
describe '.verify_database_url' do
|
|
6
|
+
it 'connects and lists tables without error for a valid URL' do
|
|
7
|
+
expect { described_class.verify_database_url(sqlite_memory_url) }.not_to raise_error
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
it 'uses self.database_url when no argument is given' do
|
|
11
|
+
described_class.database_url = sqlite_memory_url
|
|
12
|
+
expect { described_class.verify_database_url }.not_to raise_error
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
it 'prints an error and exits on an invalid URL' do
|
|
16
|
+
expect {
|
|
17
|
+
expect { described_class.verify_database_url("sqlite:///nonexistent/bad/path.db") }.to raise_error(SystemExit)
|
|
18
|
+
}.to output(/Failed to connect/).to_stdout
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
RSpec.describe Tapsoob do
|
|
24
|
+
describe '.exiting= / #exiting?' do
|
|
25
|
+
it 'sets and reads the exiting flag' do
|
|
26
|
+
obj = Object.new
|
|
27
|
+
obj.extend(Tapsoob)
|
|
28
|
+
Tapsoob.exiting = true
|
|
29
|
+
expect(obj.exiting?).to be true
|
|
30
|
+
Tapsoob.exiting = false
|
|
31
|
+
expect(obj.exiting?).to be false
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
RSpec.describe Tapsoob do
|
|
37
|
+
describe '.log' do
|
|
38
|
+
it 'returns a Logger instance' do
|
|
39
|
+
expect(Tapsoob.log).to be_a(Logger)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
it 'returns the same instance on repeated calls' do
|
|
43
|
+
expect(Tapsoob.log).to equal(Tapsoob.log)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
it 'accepts a custom logger' do
|
|
47
|
+
custom = Logger.new(StringIO.new)
|
|
48
|
+
Tapsoob.log = custom
|
|
49
|
+
expect(Tapsoob.log).to equal(custom)
|
|
50
|
+
# Reset to default for other tests
|
|
51
|
+
Tapsoob.log = nil
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
@@ -83,6 +83,54 @@ RSpec.describe Tapsoob::DataStream do
|
|
|
83
83
|
end
|
|
84
84
|
end
|
|
85
85
|
|
|
86
|
+
describe '#error / #error=' do
|
|
87
|
+
it 'defaults to false' do
|
|
88
|
+
expect(stream.error).to be false
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
it 'stores and retrieves error state' do
|
|
92
|
+
stream.error = true
|
|
93
|
+
expect(stream.error).to be true
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
describe '#to_json' do
|
|
98
|
+
it 'returns a JSON string including the class name' do
|
|
99
|
+
json = stream.to_json
|
|
100
|
+
expect(json).to be_a(String)
|
|
101
|
+
parsed = JSON.parse(json)
|
|
102
|
+
expect(parsed).to have_key("klass")
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
describe '#increment' do
|
|
107
|
+
it 'advances the state offset by the row count' do
|
|
108
|
+
initial = stream.state[:offset]
|
|
109
|
+
stream.increment(5)
|
|
110
|
+
expect(stream.state[:offset]).to eq(initial + 5)
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
describe '#verify_stream' do
|
|
115
|
+
it 'sets offset to the current table row count' do
|
|
116
|
+
stream.verify_stream
|
|
117
|
+
expect(stream.state[:offset]).to eq(db[:stream_test].count)
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
describe '#import_rows with extra columns' do
|
|
122
|
+
it 'drops unknown columns and imports the rest' do
|
|
123
|
+
rows = {
|
|
124
|
+
table_name: "stream_test",
|
|
125
|
+
header: ["id", "label", "value", "ghost_column"],
|
|
126
|
+
types: ["integer", "string", "integer", "string"],
|
|
127
|
+
data: [[999, "imported", 42, "extra"]]
|
|
128
|
+
}
|
|
129
|
+
expect { stream.import_rows(rows) }.not_to raise_error
|
|
130
|
+
expect(db[:stream_test].where(id: 999).first).not_to be_nil
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
86
134
|
describe '#parse_encoded_data' do
|
|
87
135
|
it 'raises CorruptedData on checksum mismatch' do
|
|
88
136
|
encoded, _, _ = stream.fetch
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
require 'tapsoob/data_stream/file_partition'
|
|
3
|
+
|
|
4
|
+
RSpec.describe Tapsoob::DataStream::FilePartition do
|
|
5
|
+
let(:db) do
|
|
6
|
+
d = connect_sqlite
|
|
7
|
+
d.extension :schema_dumper
|
|
8
|
+
d.create_table(:widgets) { primary_key :id; String :name }
|
|
9
|
+
d
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
after { db.disconnect }
|
|
13
|
+
|
|
14
|
+
def make_ndjson(dir, table, chunks)
|
|
15
|
+
FileUtils.mkdir_p(File.join(dir, 'data'))
|
|
16
|
+
path = File.join(dir, 'data', "#{table}.json")
|
|
17
|
+
File.open(path, 'w') do |f|
|
|
18
|
+
chunks.each { |chunk| f.puts(JSON.generate(chunk)) }
|
|
19
|
+
end
|
|
20
|
+
path
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def new_stream(state_overrides = {})
|
|
24
|
+
base_state = { table_name: :widgets, chunksize: 10 }
|
|
25
|
+
described_class.new(db, base_state.merge(state_overrides))
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# ── initialize ────────────────────────────────────────────────────────────────
|
|
29
|
+
|
|
30
|
+
describe '#initialize' do
|
|
31
|
+
it 'sets current_line to start_line when line_range is provided' do
|
|
32
|
+
stream = new_stream(line_range: [3, 7])
|
|
33
|
+
expect(stream.state[:current_line]).to eq(3)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
it 'leaves current_line unset when no line_range' do
|
|
37
|
+
stream = new_stream
|
|
38
|
+
expect(stream.state[:current_line]).to be_nil
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# ── complete? ────────────────────────────────────────────────────────────────
|
|
43
|
+
|
|
44
|
+
describe '#complete?' do
|
|
45
|
+
it 'returns true when line_range is nil' do
|
|
46
|
+
expect(new_stream.complete?).to be true
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
it 'returns false when current_line is at start' do
|
|
50
|
+
stream = new_stream(line_range: [0, 4])
|
|
51
|
+
expect(stream.complete?).to be false
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
it 'returns true when current_line exceeds end_line' do
|
|
55
|
+
stream = new_stream(line_range: [0, 2])
|
|
56
|
+
stream.state[:current_line] = 3
|
|
57
|
+
expect(stream.complete?).to be true
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# ── fetch_file ───────────────────────────────────────────────────────────────
|
|
62
|
+
|
|
63
|
+
describe '#fetch_file' do
|
|
64
|
+
let(:dir) { Dir.mktmpdir }
|
|
65
|
+
after { FileUtils.rm_rf(dir) }
|
|
66
|
+
|
|
67
|
+
it 'returns {} when line_range is nil' do
|
|
68
|
+
stream = new_stream
|
|
69
|
+
expect(stream.fetch_file(dir)).to eq({})
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
it 'reads rows within the assigned line range' do
|
|
73
|
+
chunks = [
|
|
74
|
+
{ "table_name" => "widgets", "header" => ["id", "name"], "types" => ["integer", "string"], "data" => [[1, "a"]] },
|
|
75
|
+
{ "table_name" => "widgets", "header" => ["id", "name"], "types" => ["integer", "string"], "data" => [[2, "b"]] },
|
|
76
|
+
{ "table_name" => "widgets", "header" => ["id", "name"], "types" => ["integer", "string"], "data" => [[3, "c"]] },
|
|
77
|
+
]
|
|
78
|
+
make_ndjson(dir, :widgets, chunks)
|
|
79
|
+
|
|
80
|
+
stream = new_stream(line_range: [0, 1])
|
|
81
|
+
result = stream.fetch_file(dir)
|
|
82
|
+
|
|
83
|
+
expect(result[:table_name]).to eq("widgets")
|
|
84
|
+
expect(result[:header]).to eq(["id", "name"])
|
|
85
|
+
expect(result[:data]).to eq([[1, "a"], [2, "b"]])
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
it 'advances current_line after each fetch' do
|
|
89
|
+
chunks = 3.times.map { |i| { "table_name" => "widgets", "header" => ["id"], "types" => ["integer"], "data" => [[i]] } }
|
|
90
|
+
make_ndjson(dir, :widgets, chunks)
|
|
91
|
+
|
|
92
|
+
stream = new_stream(line_range: [0, 2], chunksize: 2)
|
|
93
|
+
stream.fetch_file(dir)
|
|
94
|
+
expect(stream.state[:current_line]).to eq(2)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
it 'marks complete after reading all lines in range' do
|
|
98
|
+
chunks = 2.times.map { |i| { "table_name" => "widgets", "header" => ["id"], "types" => ["integer"], "data" => [[i]] } }
|
|
99
|
+
make_ndjson(dir, :widgets, chunks)
|
|
100
|
+
|
|
101
|
+
stream = new_stream(line_range: [0, 1], chunksize: 10)
|
|
102
|
+
stream.fetch_file(dir)
|
|
103
|
+
expect(stream.complete?).to be true
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
it 'respects skip-duplicates option' do
|
|
107
|
+
chunks = [
|
|
108
|
+
{ "table_name" => "widgets", "header" => ["id"], "types" => ["integer"], "data" => [[1], [1], [2]] },
|
|
109
|
+
]
|
|
110
|
+
make_ndjson(dir, :widgets, chunks)
|
|
111
|
+
|
|
112
|
+
stream = described_class.new(db, { table_name: :widgets, chunksize: 10, line_range: [0, 0] }, { "skip-duplicates": true })
|
|
113
|
+
result = stream.fetch_file(dir)
|
|
114
|
+
expect(result[:data].uniq).to eq(result[:data])
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|