tapsoob 0.7.17 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitlab-ci.yml +150 -7
- data/Gemfile +5 -2
- data/README.md +11 -7
- data/lib/tapsoob/operation/base.rb +4 -8
- data/lib/tapsoob/operation/pull.rb +4 -1
- data/lib/tapsoob/utils.rb +3 -3
- data/lib/tapsoob/version.rb +1 -1
- data/spec/integration/mysql_spec.rb +89 -0
- data/spec/integration/postgres_spec.rb +97 -0
- data/spec/integration/sqlite_spec.rb +119 -0
- data/spec/spec_helper.rb +40 -78
- data/spec/support/db_helpers.rb +115 -0
- data/spec/support/fixtures.rb +304 -0
- data/spec/support/round_trip_helper.rb +70 -0
- data/spec/support/shared_examples/round_trip.rb +83 -0
- data/spec/system/large_dataset_spec.rb +163 -0
- data/spec/unit/tapsoob/chunksize_spec.rb +105 -0
- data/spec/unit/tapsoob/data_stream_spec.rb +220 -0
- data/spec/unit/tapsoob/operation_base_spec.rb +134 -0
- data/spec/unit/tapsoob/schema_spec.rb +102 -0
- data/spec/unit/tapsoob/utils_spec.rb +260 -0
- data/spec/unit/tapsoob/version_spec.rb +8 -0
- metadata +15 -3
- data/spec/lib/tapsoob/chunksize_spec.rb +0 -92
- data/spec/lib/tapsoob/version_spec.rb +0 -7
data/spec/spec_helper.rb
CHANGED
|
@@ -1,91 +1,53 @@
|
|
|
1
1
|
require 'simplecov'
|
|
2
|
-
SimpleCov.start
|
|
2
|
+
SimpleCov.start do
|
|
3
|
+
add_filter '/spec/'
|
|
4
|
+
add_group 'Operation', 'lib/tapsoob/operation'
|
|
5
|
+
add_group 'DataStream', 'lib/tapsoob/data_stream'
|
|
6
|
+
add_group 'CLI', 'lib/tapsoob/cli'
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
require 'tapsoob'
|
|
10
|
+
require 'sequel'
|
|
11
|
+
require 'fileutils'
|
|
12
|
+
require 'tmpdir'
|
|
13
|
+
|
|
14
|
+
Dir[File.join(__dir__, 'support', '**', '*.rb')].sort.each { |f| require f }
|
|
15
|
+
|
|
16
|
+
# ── JRuby-aware SQLite helpers ───────────────────────────────────────────────
|
|
17
|
+
# Use these everywhere instead of Sequel.sqlite / 'sqlite::memory:' so the
|
|
18
|
+
# unit specs run identically under MRI and JRuby.
|
|
19
|
+
|
|
20
|
+
def sqlite_memory_url
|
|
21
|
+
DbHelpers.adapt_url('sqlite::memory:')
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def connect_sqlite
|
|
25
|
+
db = Sequel.connect(sqlite_memory_url)
|
|
26
|
+
db.extension :schema_dumper
|
|
27
|
+
db
|
|
28
|
+
end
|
|
3
29
|
|
|
4
|
-
# This file was generated by the `rspec --init` command. Conventionally, all
|
|
5
|
-
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
|
|
6
|
-
# The generated `.rspec` file contains
|
|
7
|
-
# `--require spec_helper` which will cause this
|
|
8
|
-
# file to always be loaded, without
|
|
9
|
-
# a need to explicitly require it in any files.
|
|
10
|
-
#
|
|
11
|
-
# Given that it is always loaded, you are encouraged to keep this file as
|
|
12
|
-
# light-weight as possible. Requiring heavyweight dependencies from this file
|
|
13
|
-
# will add to the boot time of your test suite on EVERY test run, even for an
|
|
14
|
-
# individual file that may not need all of that loaded. Instead, consider making
|
|
15
|
-
# a separate helper file that requires the additional dependencies and performs
|
|
16
|
-
# the additional setup,
|
|
17
|
-
# and require it from the spec files that actually need it.
|
|
18
|
-
#
|
|
19
|
-
# The `.rspec` file also contains a few flags that are not defaults but that
|
|
20
|
-
# users commonly want.
|
|
21
|
-
#
|
|
22
|
-
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
|
23
30
|
RSpec.configure do |config|
|
|
24
|
-
# rspec-expectations config goes here. You can use an alternate
|
|
25
|
-
# assertion/expectation library such as wrong or the stdlib/minitest
|
|
26
|
-
# assertions if you prefer.
|
|
27
31
|
config.expect_with :rspec do |expectations|
|
|
28
|
-
# This option will default to `true` in RSpec 4. It makes the `description`
|
|
29
|
-
# and `failure_message` of custom matchers include text for helper methods
|
|
30
|
-
# defined using `chain`, e.g.:
|
|
31
|
-
# be_bigger_than(2).and_smaller_than(4).description
|
|
32
|
-
# # => "be bigger than 2 and smaller than 4"
|
|
33
|
-
# ...rather than:
|
|
34
|
-
# # => "be bigger than 2"
|
|
35
32
|
expectations.include_chain_clauses_in_custom_matcher_descriptions = true
|
|
36
33
|
end
|
|
37
34
|
|
|
38
|
-
# rspec-mocks config goes here. You can use an alternate test double
|
|
39
|
-
# library (such as bogus or mocha) by changing the `mock_with` option here.
|
|
40
35
|
config.mock_with :rspec do |mocks|
|
|
41
|
-
# Prevents you from mocking or stubbing a method that does not exist on
|
|
42
|
-
# a real object. This is generally recommended, and will default to
|
|
43
|
-
# `true` in RSpec 4.
|
|
44
36
|
mocks.verify_partial_doubles = true
|
|
45
37
|
end
|
|
46
38
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
#
|
|
54
|
-
|
|
55
|
-
|
|
39
|
+
config.shared_context_metadata_behavior = :apply_to_host_groups
|
|
40
|
+
config.filter_run_when_matching :focus
|
|
41
|
+
config.disable_monkey_patching!
|
|
42
|
+
config.order = :random
|
|
43
|
+
Kernel.srand config.seed
|
|
44
|
+
|
|
45
|
+
# Integration/system tests require a real DB — skip unless env vars are set.
|
|
46
|
+
config.filter_run_excluding :integration unless ENV['INTEGRATION_TESTS'] || ENV['SRC_DATABASE_URL']
|
|
47
|
+
config.filter_run_excluding :system unless ENV['SYSTEM_TESTS'] || ENV['SRC_DATABASE_URL']
|
|
56
48
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
# config.warnings = true
|
|
62
|
-
#
|
|
63
|
-
# # Many RSpec users commonly either run the entire suite or an individual
|
|
64
|
-
# # file, and it's useful to allow more verbose output when running an
|
|
65
|
-
# # individual spec file.
|
|
66
|
-
# if config.files_to_run.one?
|
|
67
|
-
# # Use the documentation formatter for detailed output,
|
|
68
|
-
# # unless a formatter has already been configured
|
|
69
|
-
# # (e.g. via a command-line flag).
|
|
70
|
-
# config.default_formatter = 'doc'
|
|
71
|
-
# end
|
|
72
|
-
#
|
|
73
|
-
# # Print the 10 slowest examples and example groups at the
|
|
74
|
-
# # end of the spec run, to help surface which specs are running
|
|
75
|
-
# # particularly slow.
|
|
76
|
-
# config.profile_examples = 10
|
|
77
|
-
#
|
|
78
|
-
# # Run specs in random order to surface order dependencies. If you find an
|
|
79
|
-
# # order dependency and want to debug it,
|
|
80
|
-
# # you can fix the order by providing
|
|
81
|
-
# # the seed, which is printed after each run.
|
|
82
|
-
# # --seed 1234
|
|
83
|
-
# config.order = :random
|
|
84
|
-
#
|
|
85
|
-
# # Seed global randomization in this process using the `--seed` CLI option.
|
|
86
|
-
# # Setting this allows you to use `--seed` to deterministically reproduce
|
|
87
|
-
# # test failures
|
|
88
|
-
# # related to randomization by passing the same `--seed` value
|
|
89
|
-
# # as the one that triggered the failure.
|
|
90
|
-
# Kernel.srand config.seed
|
|
49
|
+
config.include DbHelpers, :integration
|
|
50
|
+
config.include DbHelpers, :system
|
|
51
|
+
config.include RoundTripHelper, :integration
|
|
52
|
+
config.include RoundTripHelper, :system
|
|
91
53
|
end
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
require 'sequel'
|
|
2
|
+
require 'database_cleaner/sequel'
|
|
3
|
+
|
|
4
|
+
# Helpers included in all :integration and :system examples.
|
|
5
|
+
# Provides connection management, table creation, and DatabaseCleaner wiring.
|
|
6
|
+
module DbHelpers
|
|
7
|
+
def self.included(base)
|
|
8
|
+
base.instance_eval do
|
|
9
|
+
# dump_dir is per-example (let is fine here — it's never used in before/after(:all))
|
|
10
|
+
let(:dump_dir) { Dir.mktmpdir('tapsoob_dump_') }
|
|
11
|
+
after(:each) { FileUtils.rm_rf(dump_dir) }
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Instance-method accessors used by examples and shared examples.
|
|
16
|
+
# Host specs must assign @src_url / @dst_url / @src_db / @dst_db in before(:all).
|
|
17
|
+
def src_url; @src_url; end
|
|
18
|
+
def dst_url; @dst_url; end
|
|
19
|
+
def src_db; @src_db; end
|
|
20
|
+
def dst_db; @dst_db; end
|
|
21
|
+
|
|
22
|
+
# ── URL normalisation ────────────────────────────────────────────────────────
|
|
23
|
+
#
|
|
24
|
+
# CI and local env vars always carry MRI-style URLs (mysql2://, postgres://, sqlite://).
|
|
25
|
+
# Under JRuby, Sequel requires JDBC-style URLs (jdbc:mysql://, jdbc:postgresql://, jdbc:sqlite:).
|
|
26
|
+
# This method rewrites the URL transparently so every caller gets the right scheme.
|
|
27
|
+
#
|
|
28
|
+
# Mapping:
|
|
29
|
+
# sqlite://path/to/file → jdbc:sqlite:path/to/file (JRuby)
|
|
30
|
+
# sqlite::memory: → jdbc:sqlite::memory: (JRuby)
|
|
31
|
+
# mysql2://host/db → jdbc:mysql://host/db (JRuby)
|
|
32
|
+
# postgres://host/db → jdbc:postgresql://host/db (JRuby)
|
|
33
|
+
# postgresql://host/db → jdbc:postgresql://host/db (JRuby)
|
|
34
|
+
# anything jdbc:* → left unchanged (already JDBC)
|
|
35
|
+
#
|
|
36
|
+
JRUBY = (RUBY_PLATFORM =~ /java/)
|
|
37
|
+
|
|
38
|
+
def self.adapt_url(url)
|
|
39
|
+
return url unless JRUBY
|
|
40
|
+
return url if url.start_with?('jdbc:')
|
|
41
|
+
|
|
42
|
+
case url
|
|
43
|
+
when /\Asqlite:(?:\/\/)?(.*)\z/ then "jdbc:sqlite:#{$1}"
|
|
44
|
+
when /\Amysql2?:\/\/(.*)\z/ then "jdbc:mysql://#{$1}"
|
|
45
|
+
when /\Apostgres(?:ql)?:\/\/(.*)\z/ then "jdbc:postgresql://#{$1}"
|
|
46
|
+
else url
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# ── connection pool ──────────────────────────────────────────────────────────
|
|
51
|
+
|
|
52
|
+
CONNECTIONS = {}
|
|
53
|
+
CONNECTIONS_MUTEX = Mutex.new
|
|
54
|
+
|
|
55
|
+
def self.connect(url)
|
|
56
|
+
CONNECTIONS_MUTEX.synchronize do
|
|
57
|
+
CONNECTIONS[url] ||= begin
|
|
58
|
+
db = Sequel.connect(url, max_connections: 10)
|
|
59
|
+
db.extension :schema_dumper
|
|
60
|
+
db
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def self.disconnect_all
|
|
66
|
+
CONNECTIONS_MUTEX.synchronize do
|
|
67
|
+
CONNECTIONS.each_value(&:disconnect)
|
|
68
|
+
CONNECTIONS.clear
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# ── table lifecycle helpers ──────────────────────────────────────────────────
|
|
73
|
+
|
|
74
|
+
def drop_and_create(db, &block)
|
|
75
|
+
db.instance_eval(&block)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Truncate every test table on db after each example (faster than drop/create).
|
|
79
|
+
def truncate_tables(db, *tables)
|
|
80
|
+
tables.each do |t|
|
|
81
|
+
next unless db.table_exists?(t)
|
|
82
|
+
if [:mysql, :mysql2].include?(db.adapter_scheme)
|
|
83
|
+
db.run("SET foreign_key_checks = 0")
|
|
84
|
+
db[t].truncate
|
|
85
|
+
db.run("SET foreign_key_checks = 1")
|
|
86
|
+
else
|
|
87
|
+
db[t].truncate(cascade: true)
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# ── generic row-count assertion helpers ─────────────────────────────────────
|
|
93
|
+
|
|
94
|
+
def row_count(db, table)
|
|
95
|
+
db[table].count
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def table_exists?(db, table)
|
|
99
|
+
db.table_exists?(table)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# ── schema helpers ───────────────────────────────────────────────────────────
|
|
103
|
+
|
|
104
|
+
# Drop a table if it exists, accounting for FK checks on MySQL.
|
|
105
|
+
def safe_drop(db, table)
|
|
106
|
+
return unless db.table_exists?(table)
|
|
107
|
+
if [:mysql, :mysql2].include?(db.adapter_scheme)
|
|
108
|
+
db.run("SET foreign_key_checks = 0")
|
|
109
|
+
db.drop_table(table)
|
|
110
|
+
db.run("SET foreign_key_checks = 1")
|
|
111
|
+
else
|
|
112
|
+
db.drop_table(table, cascade: true)
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
require 'faker'
|
|
2
|
+
require 'sequel'
|
|
3
|
+
|
|
4
|
+
# Fixtures.setup_source(db) creates all test tables and seeds them with
|
|
5
|
+
# realistic data. Each table targets a distinct edge-case scenario.
|
|
6
|
+
#
|
|
7
|
+
# Tables created:
|
|
8
|
+
# users – common mixed-type table (name, email, timestamps)
|
|
9
|
+
# orders – FK relationship to users, decimal amounts
|
|
10
|
+
# products – nullable fields, boolean flag, float price
|
|
11
|
+
# documents – large TEXT body (up to 64 KB per row)
|
|
12
|
+
# attachments – BLOB payloads (up to 256 KB per row)
|
|
13
|
+
# events – no primary key (exercises Base stream, not Keyed)
|
|
14
|
+
# large_table – 150 000+ integer rows (triggers intra-table parallelization)
|
|
15
|
+
# null_heavy – every nullable column is NULL for half the rows
|
|
16
|
+
#
|
|
17
|
+
module Fixtures
|
|
18
|
+
LARGE_TABLE_ROWS = 150_000
|
|
19
|
+
DOCUMENT_ROWS = 500
|
|
20
|
+
ATTACHMENT_ROWS = 200
|
|
21
|
+
STANDARD_ROWS = 1_000
|
|
22
|
+
NULL_HEAVY_ROWS = 400
|
|
23
|
+
|
|
24
|
+
# ── schema ───────────────────────────────────────────────────────────────────
|
|
25
|
+
|
|
26
|
+
def self.create_tables(db)
|
|
27
|
+
db.create_table!(:users) do
|
|
28
|
+
primary_key :id
|
|
29
|
+
String :name, null: false, size: 100
|
|
30
|
+
String :email, null: false, size: 255
|
|
31
|
+
String :locale, size: 10
|
|
32
|
+
Integer :age
|
|
33
|
+
Date :birthday
|
|
34
|
+
DateTime :created_at
|
|
35
|
+
DateTime :updated_at
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
db.create_table!(:orders) do
|
|
39
|
+
primary_key :id
|
|
40
|
+
foreign_key :user_id, :users, null: false
|
|
41
|
+
String :reference, null: false, size: 50
|
|
42
|
+
Float :amount
|
|
43
|
+
String :status, size: 20, default: 'pending'
|
|
44
|
+
DateTime :placed_at
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
db.create_table!(:products) do
|
|
48
|
+
primary_key :id
|
|
49
|
+
String :sku, null: false, size: 50
|
|
50
|
+
String :name, null: false, size: 200
|
|
51
|
+
Float :price
|
|
52
|
+
Integer :stock, default: 0
|
|
53
|
+
TrueClass :available, default: true
|
|
54
|
+
String :description, text: true
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
body_type = db.database_type == :mysql ? :mediumtext : :text
|
|
58
|
+
db.create_table!(:documents) do
|
|
59
|
+
primary_key :id
|
|
60
|
+
String :title, null: false, size: 255
|
|
61
|
+
column :body, body_type
|
|
62
|
+
String :author, size: 100
|
|
63
|
+
DateTime :published_at
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
payload_type = case db.database_type
|
|
67
|
+
when :mysql then :mediumblob
|
|
68
|
+
when :postgres then :bytea
|
|
69
|
+
else :blob
|
|
70
|
+
end
|
|
71
|
+
db.create_table!(:attachments) do
|
|
72
|
+
primary_key :id
|
|
73
|
+
String :filename, null: false, size: 255
|
|
74
|
+
String :mime_type, size: 100
|
|
75
|
+
Integer :size_bytes
|
|
76
|
+
column :payload, payload_type
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
db.create_table!(:events) do
|
|
80
|
+
String :event_type, null: false, size: 50
|
|
81
|
+
String :actor, size: 100
|
|
82
|
+
String :target, size: 100
|
|
83
|
+
DateTime :occurred_at
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
db.create_table!(:large_table) do
|
|
87
|
+
primary_key :id
|
|
88
|
+
String :data, null: false, size: 100
|
|
89
|
+
Integer :sequence
|
|
90
|
+
Float :value
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
db.create_table!(:null_heavy) do
|
|
94
|
+
primary_key :id
|
|
95
|
+
String :maybe_name, size: 100
|
|
96
|
+
Integer :maybe_number
|
|
97
|
+
Float :maybe_score
|
|
98
|
+
Date :maybe_date
|
|
99
|
+
String :maybe_text, text: true
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def self.drop_tables(db)
|
|
104
|
+
# Drop in reverse FK order
|
|
105
|
+
[:null_heavy, :large_table, :events, :attachments,
|
|
106
|
+
:documents, :products, :orders, :users].each do |t|
|
|
107
|
+
db.drop_table(t, if_exists: true)
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# ── seeding ──────────────────────────────────────────────────────────────────
|
|
112
|
+
|
|
113
|
+
def self.seed(db)
|
|
114
|
+
seed_users(db)
|
|
115
|
+
seed_orders(db)
|
|
116
|
+
seed_products(db)
|
|
117
|
+
seed_documents(db)
|
|
118
|
+
seed_attachments(db)
|
|
119
|
+
seed_events(db)
|
|
120
|
+
seed_large_table(db)
|
|
121
|
+
seed_null_heavy(db)
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# ── users ────────────────────────────────────────────────────────────────────
|
|
125
|
+
# Realistic person records: names from multiple locales, valid emails,
|
|
126
|
+
# age range 18–80, random birthdays, ISO timestamps.
|
|
127
|
+
|
|
128
|
+
def self.seed_users(db)
|
|
129
|
+
Faker::Config.locale = :en
|
|
130
|
+
rows = STANDARD_ROWS.times.map do
|
|
131
|
+
now = Faker::Time.between(from: DateTime.new(2020, 1, 1), to: DateTime.now)
|
|
132
|
+
{
|
|
133
|
+
name: Faker::Name.name,
|
|
134
|
+
email: Faker::Internet.unique.email,
|
|
135
|
+
locale: %w[en fr de es ja pt it nl].sample,
|
|
136
|
+
age: Faker::Number.between(from: 18, to: 80),
|
|
137
|
+
birthday: Faker::Date.birthday(min_age: 18, max_age: 80).to_s,
|
|
138
|
+
created_at: now.strftime('%Y-%m-%d %H:%M:%S'),
|
|
139
|
+
updated_at: now.strftime('%Y-%m-%d %H:%M:%S')
|
|
140
|
+
}
|
|
141
|
+
end
|
|
142
|
+
db[:users].multi_insert(rows)
|
|
143
|
+
Faker::UniqueGenerator.clear
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# ── orders ───────────────────────────────────────────────────────────────────
|
|
147
|
+
# Each order references a real user. Amounts use realistic e-commerce prices
|
|
148
|
+
# (0.99 – 9999.99). Statuses mirror a typical order lifecycle.
|
|
149
|
+
|
|
150
|
+
STATUSES = %w[pending processing shipped delivered cancelled refunded].freeze
|
|
151
|
+
|
|
152
|
+
def self.seed_orders(db)
|
|
153
|
+
user_ids = db[:users].select_map(:id)
|
|
154
|
+
rows = STANDARD_ROWS.times.map do
|
|
155
|
+
placed = Faker::Time.between(from: DateTime.new(2022, 1, 1), to: DateTime.now)
|
|
156
|
+
{
|
|
157
|
+
user_id: user_ids.sample,
|
|
158
|
+
reference: "ORD-#{Faker::Alphanumeric.unique.alphanumeric(number: 10).upcase}",
|
|
159
|
+
amount: (Faker::Commerce.price(range: 0.99..9999.99) * 100).round / 100.0,
|
|
160
|
+
status: STATUSES.sample,
|
|
161
|
+
placed_at: placed.strftime('%Y-%m-%d %H:%M:%S')
|
|
162
|
+
}
|
|
163
|
+
end
|
|
164
|
+
db[:orders].multi_insert(rows)
|
|
165
|
+
Faker::UniqueGenerator.clear
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# ── products ─────────────────────────────────────────────────────────────────
|
|
169
|
+
# SKUs follow a realistic pattern (3-letter category + 6-digit number).
|
|
170
|
+
# Descriptions are nullable Markdown-ish paragraphs (~200–800 chars).
|
|
171
|
+
|
|
172
|
+
def self.seed_products(db)
|
|
173
|
+
categories = %w[ELC CLT FRN SPT HOM KIT OFC]
|
|
174
|
+
rows = STANDARD_ROWS.times.map do
|
|
175
|
+
{
|
|
176
|
+
sku: "#{categories.sample}-#{Faker::Number.number(digits: 6)}",
|
|
177
|
+
name: Faker::Commerce.product_name,
|
|
178
|
+
price: (Faker::Commerce.price * 100).round / 100.0,
|
|
179
|
+
stock: Faker::Number.between(from: 0, to: 5_000),
|
|
180
|
+
available: [true, true, true, false].sample,
|
|
181
|
+
description: [nil, Faker::Lorem.paragraphs(number: rand(1..3)).join("\n\n")].sample
|
|
182
|
+
}
|
|
183
|
+
end
|
|
184
|
+
db[:products].multi_insert(rows)
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# ── documents ────────────────────────────────────────────────────────────────
|
|
188
|
+
# Bodies range from a few hundred bytes up to ~64 KB to stress TEXT columns
|
|
189
|
+
# and the adaptive chunksize logic.
|
|
190
|
+
|
|
191
|
+
LOREM_BASE = Faker::Lorem.characters(number: 1_000)
|
|
192
|
+
|
|
193
|
+
def self.seed_documents(db)
|
|
194
|
+
rows = DOCUMENT_ROWS.times.map do
|
|
195
|
+
target_bytes = [256, 1_024, 4_096, 16_384, 65_536].sample
|
|
196
|
+
body = (LOREM_BASE * ((target_bytes / LOREM_BASE.size) + 2))[0, target_bytes]
|
|
197
|
+
published = Faker::Time.between(from: DateTime.new(2010, 1, 1), to: DateTime.now)
|
|
198
|
+
{
|
|
199
|
+
title: Faker::Lorem.sentence(word_count: rand(4..10)).chomp('.'),
|
|
200
|
+
body: body,
|
|
201
|
+
author: Faker::Name.name,
|
|
202
|
+
published_at: published.strftime('%Y-%m-%d %H:%M:%S')
|
|
203
|
+
}
|
|
204
|
+
end
|
|
205
|
+
db[:documents].multi_insert(rows)
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
# ── attachments ──────────────────────────────────────────────────────────────
|
|
209
|
+
# Binary payloads: realistic mix of small thumbnails (1–4 KB), medium images
|
|
210
|
+
# (~50 KB), and occasional large files (~256 KB). Uses Random.bytes so the
|
|
211
|
+
# data is genuinely binary and exercises base64 encode/decode faithfully.
|
|
212
|
+
|
|
213
|
+
MIME_TYPES = {
|
|
214
|
+
'image/png' => '.png',
|
|
215
|
+
'image/jpeg' => '.jpg',
|
|
216
|
+
'application/pdf' => '.pdf',
|
|
217
|
+
'application/zip' => '.zip',
|
|
218
|
+
'video/mp4' => '.mp4'
|
|
219
|
+
}.freeze
|
|
220
|
+
|
|
221
|
+
def self.seed_attachments(db)
|
|
222
|
+
rows = ATTACHMENT_ROWS.times.map do
|
|
223
|
+
size = [
|
|
224
|
+
rand(1_024..4_096), # thumbnail (1–4 KB)
|
|
225
|
+
rand(40_000..60_000), # image (~50 KB)
|
|
226
|
+
rand(200_000..262_144) # large file (~256 KB)
|
|
227
|
+
].sample
|
|
228
|
+
mime, ext = MIME_TYPES.to_a.sample
|
|
229
|
+
{
|
|
230
|
+
filename: "#{Faker::File.file_name(dir: '', ext: ext.delete('.'))}",
|
|
231
|
+
mime_type: mime,
|
|
232
|
+
size_bytes: size,
|
|
233
|
+
payload: Sequel::SQL::Blob.new(Random.bytes(size))
|
|
234
|
+
}
|
|
235
|
+
end
|
|
236
|
+
db[:attachments].multi_insert(rows)
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
# ── events ───────────────────────────────────────────────────────────────────
|
|
240
|
+
# No primary key — exercises DataStream::Base (non-keyed path) for both
|
|
241
|
+
# pull and push.
|
|
242
|
+
|
|
243
|
+
EVENT_TYPES = %w[login logout purchase refund view search click signup].freeze
|
|
244
|
+
|
|
245
|
+
def self.seed_events(db)
|
|
246
|
+
rows = STANDARD_ROWS.times.map do
|
|
247
|
+
{
|
|
248
|
+
event_type: EVENT_TYPES.sample,
|
|
249
|
+
actor: Faker::Internet.username,
|
|
250
|
+
target: "/#{Faker::Internet.slug}",
|
|
251
|
+
occurred_at: Faker::Time.between(
|
|
252
|
+
from: DateTime.new(2023, 1, 1),
|
|
253
|
+
to: DateTime.now
|
|
254
|
+
).strftime('%Y-%m-%d %H:%M:%S')
|
|
255
|
+
}
|
|
256
|
+
end
|
|
257
|
+
db[:events].multi_insert(rows)
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
# ── large_table ──────────────────────────────────────────────────────────────
|
|
261
|
+
# 150 000 rows to push past the 100 K intra-table parallelization threshold.
|
|
262
|
+
# Inserted in batches of 5 000 to keep memory low.
|
|
263
|
+
|
|
264
|
+
LARGE_TABLE_BATCH = 5_000
|
|
265
|
+
|
|
266
|
+
def self.seed_large_table(db)
|
|
267
|
+
total = LARGE_TABLE_ROWS
|
|
268
|
+
batches = (total.to_f / LARGE_TABLE_BATCH).ceil
|
|
269
|
+
batches.times do |b|
|
|
270
|
+
count = [LARGE_TABLE_BATCH, total - b * LARGE_TABLE_BATCH].min
|
|
271
|
+
rows = count.times.map do |i|
|
|
272
|
+
seq = b * LARGE_TABLE_BATCH + i
|
|
273
|
+
{
|
|
274
|
+
data: Faker::Lorem.characters(number: rand(20..80)),
|
|
275
|
+
sequence: seq,
|
|
276
|
+
value: rand * 10_000.0
|
|
277
|
+
}
|
|
278
|
+
end
|
|
279
|
+
db[:large_table].multi_insert(rows)
|
|
280
|
+
end
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
# ── null_heavy ───────────────────────────────────────────────────────────────
|
|
284
|
+
# Half the rows have every nullable column set to NULL.
|
|
285
|
+
# Ensures NULL survives Marshal encode/decode and DB round-trips.
|
|
286
|
+
|
|
287
|
+
def self.seed_null_heavy(db)
|
|
288
|
+
rows = NULL_HEAVY_ROWS.times.map do |i|
|
|
289
|
+
if i.even?
|
|
290
|
+
{ maybe_name: nil, maybe_number: nil, maybe_score: nil,
|
|
291
|
+
maybe_date: nil, maybe_text: nil }
|
|
292
|
+
else
|
|
293
|
+
{
|
|
294
|
+
maybe_name: Faker::Name.name,
|
|
295
|
+
maybe_number: rand(-1_000_000..1_000_000),
|
|
296
|
+
maybe_score: rand * 100.0,
|
|
297
|
+
maybe_date: Faker::Date.between(from: '2000-01-01', to: '2030-12-31').to_s,
|
|
298
|
+
maybe_text: Faker::Lorem.paragraph
|
|
299
|
+
}
|
|
300
|
+
end
|
|
301
|
+
end
|
|
302
|
+
db[:null_heavy].multi_insert(rows)
|
|
303
|
+
end
|
|
304
|
+
end
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
require 'tapsoob/operation/pull'
|
|
2
|
+
require 'tapsoob/operation/push'
|
|
3
|
+
|
|
4
|
+
# Helpers for running pull and push operations in specs.
|
|
5
|
+
# Included automatically in :integration and :system examples.
|
|
6
|
+
module RoundTripHelper
|
|
7
|
+
DEFAULT_OPTS = {
|
|
8
|
+
data: true,
|
|
9
|
+
schema: true,
|
|
10
|
+
indexes: true,
|
|
11
|
+
indexes_first: false,
|
|
12
|
+
progress: false,
|
|
13
|
+
default_chunksize: 1000
|
|
14
|
+
}.freeze
|
|
15
|
+
|
|
16
|
+
# Run a full pull from +url+ into +dir+.
|
|
17
|
+
def pull(url, dir, opts = {})
|
|
18
|
+
op = Tapsoob::Operation::Pull.new(url, dir, DEFAULT_OPTS.merge(opts))
|
|
19
|
+
op.run
|
|
20
|
+
op
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Run a full push from +dir+ into +url+.
|
|
24
|
+
def push(url, dir, opts = {})
|
|
25
|
+
op = Tapsoob::Operation::Push.new(url, dir, DEFAULT_OPTS.merge(opts))
|
|
26
|
+
op.run
|
|
27
|
+
op
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Pull from +src_url+ into +dir+, then push from +dir+ into +dst_url+.
|
|
31
|
+
# Returns [pull_op, push_op].
|
|
32
|
+
def round_trip(src_url, dst_url, dir, opts = {})
|
|
33
|
+
p = pull(src_url, dir, opts)
|
|
34
|
+
q = push(dst_url, dir, opts)
|
|
35
|
+
[p, q]
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Assert that every table present in src_db also exists in dst_db with the
|
|
39
|
+
# same number of rows.
|
|
40
|
+
def expect_same_counts(src_db, dst_db)
|
|
41
|
+
src_db.tables.each do |table|
|
|
42
|
+
expect(dst_db.table_exists?(table)).to be(true),
|
|
43
|
+
"expected table #{table} to exist in destination"
|
|
44
|
+
src_count = src_db[table].count
|
|
45
|
+
dst_count = dst_db[table].count
|
|
46
|
+
expect(dst_count).to eq(src_count),
|
|
47
|
+
"row count mismatch for #{table}: src=#{src_count} dst=#{dst_count}"
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Assert that every row in src_db[table] has an identical row in dst_db[table]
|
|
52
|
+
# when compared by ordered primary key. Only usable for small tables.
|
|
53
|
+
def expect_identical_rows(src_db, dst_db, table, order_col: :id)
|
|
54
|
+
src_rows = src_db[table].order(order_col).all
|
|
55
|
+
dst_rows = dst_db[table].order(order_col).all
|
|
56
|
+
expect(dst_rows.size).to eq(src_rows.size)
|
|
57
|
+
src_rows.zip(dst_rows).each_with_index do |(src, dst), i|
|
|
58
|
+
src.each do |col, val|
|
|
59
|
+
if val.is_a?(String) && val.encoding == Encoding::ASCII_8BIT
|
|
60
|
+
# binary – compare as bytes
|
|
61
|
+
expect(dst[col].to_s.bytes).to eq(val.bytes),
|
|
62
|
+
"blob mismatch in #{table}[#{i}].#{col}"
|
|
63
|
+
else
|
|
64
|
+
expect(dst[col]).to eq(val),
|
|
65
|
+
"value mismatch in #{table}[#{i}].#{col}: expected #{val.inspect}, got #{dst[col].inspect}"
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|