tapsoob 0.7.17 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitlab-ci.yml +150 -7
- data/Gemfile +5 -2
- data/README.md +11 -7
- data/lib/tapsoob/operation/base.rb +4 -8
- data/lib/tapsoob/operation/pull.rb +4 -1
- data/lib/tapsoob/utils.rb +3 -3
- data/lib/tapsoob/version.rb +1 -1
- data/spec/integration/mysql_spec.rb +89 -0
- data/spec/integration/postgres_spec.rb +97 -0
- data/spec/integration/sqlite_spec.rb +119 -0
- data/spec/spec_helper.rb +40 -78
- data/spec/support/db_helpers.rb +115 -0
- data/spec/support/fixtures.rb +304 -0
- data/spec/support/round_trip_helper.rb +70 -0
- data/spec/support/shared_examples/round_trip.rb +83 -0
- data/spec/system/large_dataset_spec.rb +163 -0
- data/spec/unit/tapsoob/chunksize_spec.rb +105 -0
- data/spec/unit/tapsoob/data_stream_spec.rb +220 -0
- data/spec/unit/tapsoob/operation_base_spec.rb +134 -0
- data/spec/unit/tapsoob/schema_spec.rb +102 -0
- data/spec/unit/tapsoob/utils_spec.rb +260 -0
- data/spec/unit/tapsoob/version_spec.rb +8 -0
- metadata +15 -3
- data/spec/lib/tapsoob/chunksize_spec.rb +0 -92
- data/spec/lib/tapsoob/version_spec.rb +0 -7
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# Shared examples that any adapter-specific integration suite can include.
|
|
2
|
+
# The including example group must define before(:all) that sets:
|
|
3
|
+
# @src_url, @dst_url — Sequel connection URLs
|
|
4
|
+
# @src_db, @dst_db — connected Sequel::Database objects
|
|
5
|
+
# Individual examples access these via the src_url/dst_url/src_db/dst_db helpers
|
|
6
|
+
# defined in DbHelpers (which delegate to the ivars set in before(:all)).
|
|
7
|
+
|
|
8
|
+
RSpec.shared_examples 'a complete round-trip' do
|
|
9
|
+
it 'pulls without error' do
|
|
10
|
+
expect { pull(src_url, dump_dir) }.not_to raise_error
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
it 'creates schema dump files for every table' do
|
|
14
|
+
pull(src_url, dump_dir)
|
|
15
|
+
src_db.tables.each do |table|
|
|
16
|
+
expect(File).to exist(File.join(dump_dir, 'schemas', "#{table}.rb"))
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
it 'creates data dump files for every seeded table' do
|
|
21
|
+
pull(src_url, dump_dir)
|
|
22
|
+
%i[users orders products documents attachments events large_table null_heavy].each do |table|
|
|
23
|
+
expect(File).to exist(File.join(dump_dir, 'data', "#{table}.json"))
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
it 'pushes without error' do
|
|
28
|
+
pull(src_url, dump_dir)
|
|
29
|
+
expect { push(dst_url, dump_dir) }.not_to raise_error
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
it 'preserves row counts for all tables' do
|
|
33
|
+
round_trip(src_url, dst_url, dump_dir)
|
|
34
|
+
expect_same_counts(src_db, dst_db)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
it 'preserves NULL values in null_heavy' do
|
|
38
|
+
round_trip(src_url, dst_url, dump_dir)
|
|
39
|
+
null_rows = dst_db[:null_heavy].where(maybe_name: nil).count
|
|
40
|
+
expect(null_rows).to be > 0
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
it 'preserves string content in users.email' do
|
|
44
|
+
round_trip(src_url, dst_url, dump_dir)
|
|
45
|
+
src_emails = src_db[:users].select_map(:email).sort
|
|
46
|
+
dst_emails = dst_db[:users].select_map(:email).sort
|
|
47
|
+
expect(dst_emails).to eq(src_emails)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
it 'preserves BLOB payloads in attachments' do
|
|
51
|
+
round_trip(src_url, dst_url, dump_dir)
|
|
52
|
+
src_db[:attachments].order(:id).each do |src_row|
|
|
53
|
+
dst_row = dst_db[:attachments][id: src_row[:id]]
|
|
54
|
+
expect(dst_row).not_to be_nil
|
|
55
|
+
expect(dst_row[:payload].to_s.bytes).to eq(src_row[:payload].to_s.bytes)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
it 'preserves large TEXT bodies in documents' do
|
|
60
|
+
round_trip(src_url, dst_url, dump_dir)
|
|
61
|
+
src_db[:documents].order(:id).each do |src_row|
|
|
62
|
+
dst_row = dst_db[:documents][id: src_row[:id]]
|
|
63
|
+
expect(dst_row[:body]).to eq(src_row[:body])
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
it 'handles the no-PK events table' do
|
|
68
|
+
round_trip(src_url, dst_url, dump_dir)
|
|
69
|
+
expect(dst_db[:events].count).to eq(src_db[:events].count)
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
RSpec.shared_examples 'a parallel round-trip' do |workers:|
|
|
74
|
+
it "preserves row counts with #{workers} parallel workers" do
|
|
75
|
+
round_trip(src_url, dst_url, dump_dir, parallel: workers)
|
|
76
|
+
expect_same_counts(src_db, dst_db)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
it "handles the large_table (>100K rows) with #{workers} workers" do
|
|
80
|
+
round_trip(src_url, dst_url, dump_dir, parallel: workers)
|
|
81
|
+
expect(dst_db[:large_table].count).to eq(Fixtures::LARGE_TABLE_ROWS)
|
|
82
|
+
end
|
|
83
|
+
end
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
RSpec.describe 'Large dataset system tests', :system do
|
|
4
|
+
before(:all) do
|
|
5
|
+
@src_url = DbHelpers.adapt_url(ENV.fetch('SRC_DATABASE_URL', 'sqlite://tmp/tapsoob_system_src.db'))
|
|
6
|
+
@dst_url = DbHelpers.adapt_url(ENV.fetch('DST_DATABASE_URL', 'sqlite://tmp/tapsoob_system_dst.db'))
|
|
7
|
+
|
|
8
|
+
FileUtils.mkdir_p('tmp')
|
|
9
|
+
File.delete('tmp/tapsoob_system_src.db') rescue nil
|
|
10
|
+
File.delete('tmp/tapsoob_system_dst.db') rescue nil
|
|
11
|
+
|
|
12
|
+
@src_db = DbHelpers.connect(@src_url)
|
|
13
|
+
@dst_db = DbHelpers.connect(@dst_url)
|
|
14
|
+
|
|
15
|
+
Fixtures.create_tables(@src_db)
|
|
16
|
+
Fixtures.seed(@src_db)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
before(:each) do
|
|
20
|
+
Fixtures.drop_tables(@dst_db)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
after(:all) do
|
|
24
|
+
Fixtures.drop_tables(@src_db)
|
|
25
|
+
Fixtures.drop_tables(@dst_db)
|
|
26
|
+
DbHelpers.disconnect_all
|
|
27
|
+
File.delete('tmp/tapsoob_system_src.db') rescue nil
|
|
28
|
+
File.delete('tmp/tapsoob_system_dst.db') rescue nil
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# ── large_table: intra-table parallelization threshold ───────────────────────
|
|
32
|
+
|
|
33
|
+
describe 'large_table (150K rows)' do
|
|
34
|
+
it 'transfers all rows in serial mode' do
|
|
35
|
+
round_trip(src_url, dst_url, dump_dir)
|
|
36
|
+
expect(dst_db[:large_table].count).to eq(Fixtures::LARGE_TABLE_ROWS)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
it 'transfers all rows with parallel: 2' do
|
|
40
|
+
round_trip(src_url, dst_url, dump_dir, parallel: 2)
|
|
41
|
+
expect(dst_db[:large_table].count).to eq(Fixtures::LARGE_TABLE_ROWS)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
it 'transfers all rows with parallel: 4' do
|
|
45
|
+
round_trip(src_url, dst_url, dump_dir, parallel: 4)
|
|
46
|
+
expect(dst_db[:large_table].count).to eq(Fixtures::LARGE_TABLE_ROWS)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
it 'has no duplicate rows after parallel pull' do
|
|
50
|
+
round_trip(src_url, dst_url, dump_dir, parallel: 4)
|
|
51
|
+
total = dst_db[:large_table].count
|
|
52
|
+
distinct = dst_db[:large_table].select(:id).distinct.count
|
|
53
|
+
expect(distinct).to eq(total)
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# ── documents: large TEXT columns ────────────────────────────────────────────
|
|
58
|
+
|
|
59
|
+
describe 'documents table (large TEXT)' do
|
|
60
|
+
it 'preserves body content exactly' do
|
|
61
|
+
round_trip(src_url, dst_url, dump_dir)
|
|
62
|
+
src_db[:documents].order(:id).each do |src_row|
|
|
63
|
+
dst_row = dst_db[:documents][id: src_row[:id]]
|
|
64
|
+
expect(dst_row[:body]).to eq(src_row[:body]),
|
|
65
|
+
"body mismatch for document #{src_row[:id]}: " \
|
|
66
|
+
"src=#{src_row[:body]&.length} bytes dst=#{dst_row[:body]&.length} bytes"
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
it 'handles documents with nil body' do
|
|
71
|
+
round_trip(src_url, dst_url, dump_dir)
|
|
72
|
+
expect(dst_db[:documents].where(body: nil).count).to eq(src_db[:documents].where(body: nil).count)
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# ── attachments: BLOB encoding/decoding ──────────────────────────────────────
|
|
77
|
+
|
|
78
|
+
describe 'attachments table (binary BLOBs up to 256 KB)' do
|
|
79
|
+
it 'preserves every byte of every payload' do
|
|
80
|
+
round_trip(src_url, dst_url, dump_dir)
|
|
81
|
+
mismatch_count = 0
|
|
82
|
+
src_db[:attachments].order(:id).each do |src_row|
|
|
83
|
+
dst_row = dst_db[:attachments][id: src_row[:id]]
|
|
84
|
+
mismatch_count += 1 unless dst_row[:payload].to_s.bytes == src_row[:payload].to_s.bytes
|
|
85
|
+
end
|
|
86
|
+
expect(mismatch_count).to eq(0)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
it 'preserves size_bytes metadata' do
|
|
90
|
+
round_trip(src_url, dst_url, dump_dir)
|
|
91
|
+
src_db[:attachments].order(:id).each do |src_row|
|
|
92
|
+
dst_row = dst_db[:attachments][id: src_row[:id]]
|
|
93
|
+
expect(dst_row[:size_bytes]).to eq(src_row[:size_bytes])
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# ── null_heavy: NULL preservation ────────────────────────────────────────────
|
|
99
|
+
|
|
100
|
+
describe 'null_heavy table' do
|
|
101
|
+
it 'preserves NULLs in every nullable column' do
|
|
102
|
+
round_trip(src_url, dst_url, dump_dir)
|
|
103
|
+
%i[maybe_name maybe_number maybe_score maybe_date maybe_text].each do |col|
|
|
104
|
+
src_nulls = src_db[:null_heavy].where(col => nil).count
|
|
105
|
+
dst_nulls = dst_db[:null_heavy].where(col => nil).count
|
|
106
|
+
expect(dst_nulls).to eq(src_nulls),
|
|
107
|
+
"NULL count mismatch for null_heavy.#{col}: src=#{src_nulls} dst=#{dst_nulls}"
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# ── events: table without primary key ────────────────────────────────────────
|
|
113
|
+
|
|
114
|
+
describe 'events table (no primary key)' do
|
|
115
|
+
it 'uses the Base (non-keyed) stream' do
|
|
116
|
+
round_trip(src_url, dst_url, dump_dir)
|
|
117
|
+
expect(dst_db[:events].count).to eq(src_db[:events].count)
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# ── adaptive chunksize: very small chunks ────────────────────────────────────
|
|
122
|
+
|
|
123
|
+
describe 'adaptive chunksize under load' do
|
|
124
|
+
it 'completes with chunksize=1 (extreme case)' do
|
|
125
|
+
small_src = DbHelpers.adapt_url('sqlite://tmp/tapsoob_small_src.db')
|
|
126
|
+
small_dst = DbHelpers.adapt_url('sqlite://tmp/tapsoob_small_dst.db')
|
|
127
|
+
small_dir = Dir.mktmpdir
|
|
128
|
+
|
|
129
|
+
begin
|
|
130
|
+
sdb = DbHelpers.connect(small_src)
|
|
131
|
+
sdb.create_table!(:small_test) { primary_key :id; String :v, size: 50 }
|
|
132
|
+
100.times { |i| sdb[:small_test].insert(v: "row_#{i}") }
|
|
133
|
+
|
|
134
|
+
round_trip(small_src, small_dst, small_dir, default_chunksize: 1)
|
|
135
|
+
expect(DbHelpers.connect(small_dst)[:small_test].count).to eq(100)
|
|
136
|
+
ensure
|
|
137
|
+
FileUtils.rm_rf(small_dir)
|
|
138
|
+
File.delete('tmp/tapsoob_small_src.db') rescue nil
|
|
139
|
+
File.delete('tmp/tapsoob_small_dst.db') rescue nil
|
|
140
|
+
# Reconnect suite DBs after disconnect_all clears the pool
|
|
141
|
+
DbHelpers.disconnect_all
|
|
142
|
+
@src_db = DbHelpers.connect(@src_url)
|
|
143
|
+
@dst_db = DbHelpers.connect(@dst_url)
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# ── FK order: orders depends on users ────────────────────────────────────────
|
|
149
|
+
|
|
150
|
+
describe 'foreign key dependency ordering' do
|
|
151
|
+
it 'pushes users before orders (table_order.txt respected)' do
|
|
152
|
+
pull(src_url, dump_dir)
|
|
153
|
+
order_file = File.join(dump_dir, 'table_order.txt')
|
|
154
|
+
if File.exist?(order_file)
|
|
155
|
+
order = File.readlines(order_file).map(&:strip)
|
|
156
|
+
users_idx = order.index('users')
|
|
157
|
+
orders_idx = order.index('orders')
|
|
158
|
+
expect(users_idx).to be < orders_idx if users_idx && orders_idx
|
|
159
|
+
end
|
|
160
|
+
expect { push(dst_url, dump_dir) }.not_to raise_error
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
end
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
require 'tapsoob/chunksize'
|
|
3
|
+
|
|
4
|
+
RSpec.describe Tapsoob::Chunksize do
|
|
5
|
+
subject(:cs) { described_class.new(1000) }
|
|
6
|
+
|
|
7
|
+
describe '#initialize' do
|
|
8
|
+
it 'stores the initial chunksize' do
|
|
9
|
+
expect(cs.to_i).to eq(1000)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
it 'starts with zero retries' do
|
|
13
|
+
expect(cs.retries).to eq(0)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
it 'starts with zero idle_secs' do
|
|
17
|
+
expect(cs.idle_secs).to eq(0.0)
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
describe '#reset_chunksize' do
|
|
22
|
+
context 'with 0 retries (first failure)' do
|
|
23
|
+
it 'resets to 10' do
|
|
24
|
+
expect(cs.reset_chunksize).to eq(10)
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
context 'with 1 retry' do
|
|
29
|
+
it 'resets to 10' do
|
|
30
|
+
cs.retries = 1
|
|
31
|
+
expect(cs.reset_chunksize).to eq(10)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
context 'with 2+ retries' do
|
|
36
|
+
it 'resets to 1' do
|
|
37
|
+
cs.retries = 2
|
|
38
|
+
expect(cs.reset_chunksize).to eq(1)
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
describe '#diff' do
|
|
44
|
+
before do
|
|
45
|
+
cs.start_time = 0.0
|
|
46
|
+
cs.end_time = 10.0
|
|
47
|
+
cs.time_in_db = 3.0
|
|
48
|
+
cs.idle_secs = 2.0
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
it 'returns end_time - start_time - time_in_db - idle_secs' do
|
|
52
|
+
expect(cs.diff).to eq(5.0)
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
describe '#calc_new_chunksize' do
|
|
57
|
+
def make(chunksize, diff_val)
|
|
58
|
+
c = described_class.new(chunksize)
|
|
59
|
+
# manufacture a diff by setting times such that diff == diff_val
|
|
60
|
+
c.start_time = 0.0
|
|
61
|
+
c.end_time = diff_val
|
|
62
|
+
c.time_in_db = 0.0
|
|
63
|
+
c.idle_secs = 0.0
|
|
64
|
+
c
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
it 'halves (roughly) when diff > 3.0' do
|
|
68
|
+
c = make(900, 3.5)
|
|
69
|
+
expect(c.calc_new_chunksize).to eq((900 / 3.0).ceil)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
it 'decrements by 100 when diff is 1.1..3.0' do
|
|
73
|
+
c = make(900, 2.0)
|
|
74
|
+
expect(c.calc_new_chunksize).to eq(800)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
it 'doubles when diff < 0.8' do
|
|
78
|
+
c = make(500, 0.5)
|
|
79
|
+
expect(c.calc_new_chunksize).to eq(1000)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
it 'increments by 100 when diff is 0.8..1.1' do
|
|
83
|
+
c = make(500, 0.9)
|
|
84
|
+
expect(c.calc_new_chunksize).to eq(600)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
it 'never returns less than 1' do
|
|
88
|
+
c = make(1, 5.0)
|
|
89
|
+
expect(c.calc_new_chunksize).to be >= 1
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
it 'holds chunksize unchanged when retries > 0' do
|
|
93
|
+
c = make(500, 0.5)
|
|
94
|
+
c.retries = 1
|
|
95
|
+
expect(c.calc_new_chunksize).to eq(500)
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
describe '#time_delta' do
|
|
100
|
+
it 'returns elapsed seconds for the block' do
|
|
101
|
+
delta = cs.time_delta { sleep 0.01 }
|
|
102
|
+
expect(delta).to be_between(0.005, 1.0)
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
require 'tapsoob/data_stream'
|
|
3
|
+
|
|
4
|
+
# Shared low-level stream tests run against an in-process SQLite connection.
|
|
5
|
+
# They exercise the fetch/encode/decode cycle, completion logic, and the
|
|
6
|
+
# factory method – all without any filesystem dump.
|
|
7
|
+
|
|
8
|
+
RSpec.describe Tapsoob::DataStream do
|
|
9
|
+
# ── shared DB setup ──────────────────────────────────────────────────────────
|
|
10
|
+
|
|
11
|
+
let(:db) do
|
|
12
|
+
d = connect_sqlite
|
|
13
|
+
d.extension :schema_dumper
|
|
14
|
+
d.create_table(:stream_test) do
|
|
15
|
+
primary_key :id
|
|
16
|
+
String :label, size: 50
|
|
17
|
+
Integer :value
|
|
18
|
+
end
|
|
19
|
+
# Insert 50 rows
|
|
20
|
+
50.times { |i| d[:stream_test].insert(label: "row_#{i}", value: i) }
|
|
21
|
+
d
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
let(:db_nopk) do
|
|
25
|
+
d = connect_sqlite
|
|
26
|
+
d.extension :schema_dumper
|
|
27
|
+
d.create_table(:nopk_test) do
|
|
28
|
+
String :key, size: 50
|
|
29
|
+
Integer :val
|
|
30
|
+
end
|
|
31
|
+
20.times { |i| d[:nopk_test].insert(key: "k#{i}", val: i) }
|
|
32
|
+
d
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
after { db.disconnect; db_nopk.disconnect }
|
|
36
|
+
|
|
37
|
+
# ── Base ─────────────────────────────────────────────────────────────────────
|
|
38
|
+
|
|
39
|
+
describe Tapsoob::DataStream::Base do
|
|
40
|
+
subject(:stream) do
|
|
41
|
+
described_class.new(db, { table_name: :stream_test, chunksize: 10 })
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
describe '#fetch' do
|
|
45
|
+
it 'returns [encoded_data, row_count, elapsed]' do
|
|
46
|
+
encoded, count, elapsed = stream.fetch
|
|
47
|
+
expect(encoded).to be_a(String)
|
|
48
|
+
expect(count).to eq(10)
|
|
49
|
+
expect(elapsed).to be_a(Float)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
it 'advances offset on each call' do
|
|
53
|
+
stream.fetch
|
|
54
|
+
expect(stream.state[:offset]).to eq(10)
|
|
55
|
+
stream.fetch
|
|
56
|
+
expect(stream.state[:offset]).to eq(20)
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
describe '#complete?' do
|
|
61
|
+
it 'is false before all rows are fetched' do
|
|
62
|
+
stream.fetch
|
|
63
|
+
expect(stream.complete?).to be false
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
it 'is true after all rows are fetched' do
|
|
67
|
+
5.times { stream.fetch }
|
|
68
|
+
expect(stream.complete?).to be true
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
describe '#fetch_data_from_database' do
|
|
73
|
+
it 'yields a hash with :table_name, :header, :data, :types' do
|
|
74
|
+
encoded, _, _ = stream.fetch
|
|
75
|
+
data_params = {
|
|
76
|
+
state: stream.to_hash,
|
|
77
|
+
checksum: Tapsoob::Utils.checksum(encoded).to_s,
|
|
78
|
+
encoded_data: encoded
|
|
79
|
+
}
|
|
80
|
+
yielded = nil
|
|
81
|
+
stream.fetch_data_from_database(data_params) { |rows| yielded = rows }
|
|
82
|
+
expect(yielded).to include(:table_name, :header, :data)
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
describe '#parse_encoded_data' do
|
|
87
|
+
it 'raises CorruptedData on checksum mismatch' do
|
|
88
|
+
encoded, _, _ = stream.fetch
|
|
89
|
+
expect {
|
|
90
|
+
stream.parse_encoded_data(encoded, '0')
|
|
91
|
+
}.to raise_error(Tapsoob::CorruptedData)
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
describe '.factory' do
|
|
96
|
+
it 'returns Keyed stream for table with integer PK' do
|
|
97
|
+
result = described_class.factory(db, { table_name: :stream_test, chunksize: 10 }, {})
|
|
98
|
+
expect(result).to be_a(Tapsoob::DataStream::Keyed)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
it 'returns Base stream for table without integer PK' do
|
|
102
|
+
result = described_class.factory(db_nopk, { table_name: :nopk_test, chunksize: 10 }, {})
|
|
103
|
+
expect(result).to be_a(Tapsoob::DataStream::Base)
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# ── Keyed ────────────────────────────────────────────────────────────────────
|
|
109
|
+
|
|
110
|
+
describe Tapsoob::DataStream::Keyed do
|
|
111
|
+
subject(:stream) do
|
|
112
|
+
described_class.new(db, { table_name: :stream_test, chunksize: 10 })
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
it 'fetches all 50 rows without duplicates' do
|
|
116
|
+
all_ids = []
|
|
117
|
+
loop do
|
|
118
|
+
encoded, count, _ = stream.fetch
|
|
119
|
+
break if count == 0
|
|
120
|
+
data_params = {
|
|
121
|
+
state: stream.to_hash,
|
|
122
|
+
checksum: Tapsoob::Utils.checksum(encoded).to_s,
|
|
123
|
+
encoded_data: encoded
|
|
124
|
+
}
|
|
125
|
+
stream.fetch_data_from_database(data_params) do |rows|
|
|
126
|
+
id_idx = rows[:header].index(:id)
|
|
127
|
+
all_ids.concat(rows[:data].map { |r| r[id_idx] })
|
|
128
|
+
end
|
|
129
|
+
break if stream.complete?
|
|
130
|
+
end
|
|
131
|
+
expect(all_ids.uniq.size).to eq(50)
|
|
132
|
+
expect(all_ids.size).to eq(50)
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
describe '.calculate_pk_ranges' do
|
|
136
|
+
it 'returns the right number of ranges' do
|
|
137
|
+
ranges = described_class.calculate_pk_ranges(db, :stream_test, 4)
|
|
138
|
+
expect(ranges.size).to eq(4)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
it 'covers the full PK range' do
|
|
142
|
+
min = db[:stream_test].min(:id)
|
|
143
|
+
max = db[:stream_test].max(:id)
|
|
144
|
+
ranges = described_class.calculate_pk_ranges(db, :stream_test, 4)
|
|
145
|
+
expect(ranges.first.first).to eq(min)
|
|
146
|
+
expect(ranges.last.last).to eq(max)
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# ── KeyedPartition ───────────────────────────────────────────────────────────
|
|
152
|
+
|
|
153
|
+
describe Tapsoob::DataStream::KeyedPartition do
|
|
154
|
+
it 'fetches only rows within its assigned PK range' do
|
|
155
|
+
min = db[:stream_test].min(:id)
|
|
156
|
+
max = db[:stream_test].max(:id)
|
|
157
|
+
mid = (min + max) / 2
|
|
158
|
+
|
|
159
|
+
stream = described_class.new(db, {
|
|
160
|
+
table_name: :stream_test,
|
|
161
|
+
chunksize: 100,
|
|
162
|
+
partition_range: [min, mid]
|
|
163
|
+
})
|
|
164
|
+
|
|
165
|
+
all_ids = []
|
|
166
|
+
until stream.complete?
|
|
167
|
+
encoded, count, _ = stream.fetch
|
|
168
|
+
break if count == 0
|
|
169
|
+
data_params = {
|
|
170
|
+
state: stream.to_hash,
|
|
171
|
+
checksum: Tapsoob::Utils.checksum(encoded).to_s,
|
|
172
|
+
encoded_data: encoded
|
|
173
|
+
}
|
|
174
|
+
stream.fetch_data_from_database(data_params) do |rows|
|
|
175
|
+
id_idx = rows[:header].index(:id)
|
|
176
|
+
all_ids.concat(rows[:data].map { |r| r[id_idx] })
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
expect(all_ids).to all(be_between(min, mid))
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
# ── Interleaved ──────────────────────────────────────────────────────────────
|
|
185
|
+
|
|
186
|
+
describe Tapsoob::DataStream::Interleaved do
|
|
187
|
+
it 'two workers together cover all rows without overlap' do
|
|
188
|
+
worker0 = described_class.new(db, {
|
|
189
|
+
table_name: :stream_test, chunksize: 10, worker_id: 0, num_workers: 2
|
|
190
|
+
})
|
|
191
|
+
worker1 = described_class.new(db, {
|
|
192
|
+
table_name: :stream_test, chunksize: 10, worker_id: 1, num_workers: 2
|
|
193
|
+
})
|
|
194
|
+
|
|
195
|
+
def drain(stream, db)
|
|
196
|
+
ids = []
|
|
197
|
+
until stream.complete?
|
|
198
|
+
encoded, count, _ = stream.fetch
|
|
199
|
+
break if count == 0
|
|
200
|
+
params = {
|
|
201
|
+
state: stream.to_hash,
|
|
202
|
+
checksum: Tapsoob::Utils.checksum(encoded).to_s,
|
|
203
|
+
encoded_data: encoded
|
|
204
|
+
}
|
|
205
|
+
stream.fetch_data_from_database(params) do |rows|
|
|
206
|
+
id_idx = rows[:header].index(:id)
|
|
207
|
+
ids.concat(rows[:data].map { |r| r[id_idx] })
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
ids
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
ids0 = drain(worker0, db)
|
|
214
|
+
ids1 = drain(worker1, db)
|
|
215
|
+
|
|
216
|
+
expect((ids0 + ids1).sort).to eq((ids0 + ids1).uniq.sort)
|
|
217
|
+
expect((ids0 + ids1).size).to eq(50)
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
end
|