tapsoob 0.8.5 → 0.8.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 64dd145a81db1be94164524f8ec22cef232c76d7a3d455646c5289496c8e1ee2
4
- data.tar.gz: 678fe127e48278964db8475c2d8001a085fb7a56546b4912b6fdf08a90ea2718
3
+ metadata.gz: 612a3ebed6eb1091017de69eafd6907418e3d23685d0dfd0e88c1ec4fc1589ab
4
+ data.tar.gz: 4de16885893a76191c7e372f36884ae34af6c4b87f629c18d568f9371a79e6ac
5
5
  SHA512:
6
- metadata.gz: 4263e7bdec44a5c31af5ebeba0fe1cc2b969e69f93a206a2c1b580a8141583eb94fd305626acc923d7499b20f38871294e41e1cdb9e14666fc2581e2fbbe8966
7
- data.tar.gz: ebc49b6130cdcaa98ffe70405e8aac442012e451409a5d9e902c7c0518cf23dd7c3a158c850aac9392a9b349bae004b324efa42ad688d5dca10fd260f663fc2c
6
+ metadata.gz: 474583bb8a802d100f6603f4de125e60ed9583addf961252a3bc2496e00174d9aed54012c317a3df92077db5d806b3a4f700541d17bd7a5b134bd7c3162e86c5
7
+ data.tar.gz: b65389a1279e6b86bf132e0f2878913b868a91e49405eeebfaf4c998f941f0dadbf80b8f74d5c388aa90c7dcd45473577f1619d64e5ccf6a988c46b2b694d8f0
@@ -29,7 +29,7 @@ module Tapsoob
29
29
 
30
30
  desc "dump_table DATABASE_URL TABLE", "Dump a table from a database using a database URL"
31
31
  def dump_table(database_url, table)
32
- puts Tapsoob::Schema.dump_table(database_url, table)
32
+ puts Tapsoob::Schema.dump_table(database_url, table, {})
33
33
  end
34
34
 
35
35
  desc "foreign_keys DATABASE_URL", "Dump foreign_keys from a database using a database URL"
@@ -11,9 +11,9 @@ module Tapsoob
11
11
  extend self
12
12
 
13
13
  def dump(database_url, options = {})
14
- db = Sequel.connect(database_url)
15
- db.extension :schema_dumper
16
- template = ERB.new <<-END_MIG
14
+ Sequel.connect(database_url) do |db|
15
+ db.extension :schema_dumper
16
+ template = ERB.new <<-END_MIG
17
17
  Class.new(Sequel::Migration) do
18
18
  def up
19
19
  <% db.send(:sort_dumped_tables, db.tables, {}).each do |table| %>
@@ -29,7 +29,8 @@ Class.new(Sequel::Migration) do
29
29
  end
30
30
  END_MIG
31
31
 
32
- template.result(binding)
32
+ template.result(binding)
33
+ end
33
34
  end
34
35
 
35
36
  def dump_table(database_url_or_db, table, options)
@@ -68,15 +69,17 @@ END_MIG
68
69
  end
69
70
 
70
71
  def foreign_keys(database_url)
71
- db = Sequel.connect(database_url)
72
- db.extension :schema_dumper
73
- db.dump_foreign_key_migration
72
+ Sequel.connect(database_url) do |db|
73
+ db.extension :schema_dumper
74
+ db.dump_foreign_key_migration
75
+ end
74
76
  end
75
77
 
76
78
  def indexes(database_url)
77
- db = Sequel.connect(database_url)
78
- db.extension :schema_dumper
79
- db.dump_indexes_migration
79
+ Sequel.connect(database_url) do |db|
80
+ db.extension :schema_dumper
81
+ db.dump_indexes_migration
82
+ end
80
83
  end
81
84
 
82
85
  def indexes_individual(database_url)
@@ -105,6 +108,7 @@ END_MIG
105
108
  end
106
109
 
107
110
  def load(database_url_or_db, schema, options = { drop: false })
111
+ schema = rewrite_non_integer_primary_keys(schema)
108
112
  # Accept either a database URL or an existing connection object
109
113
  if database_url_or_db.is_a?(Sequel::Database)
110
114
  db = database_url_or_db
@@ -158,6 +162,25 @@ END_MIG
158
162
  end
159
163
  end
160
164
 
165
+ NON_INTEGER_PK_PATTERN = /^(\s*)primary_key\s+(:?\w+),\s*:type=>"([^"]+)"(.*)$/
166
+ INTEGER_DB_TYPES = /\A(?:int(?:eger|\d+)?|bigint|smallint|serial|bigserial|smallserial)/i
167
+
168
+ # On PG 10+, Sequel's CreateTableGenerator injects `identity: true` into
169
+ # every primary_key call via serial_primary_key_options. PG rejects IDENTITY
170
+ # on non-integer types. Rewrite `primary_key :col, :type=>"varchar..."` to
171
+ # `column :col, "varchar...", primary_key: true, null: false` which bypasses
172
+ # that code path entirely.
173
+ def rewrite_non_integer_primary_keys(schema_str)
174
+ schema_str.gsub(NON_INTEGER_PK_PATTERN) do
175
+ indent, col, db_type, rest = $1, $2, $3, $4
176
+ if db_type =~ INTEGER_DB_TYPES
177
+ "#{indent}primary_key #{col}, :type=>\"#{db_type}\"#{rest}"
178
+ else
179
+ "#{indent}column #{col}, \"#{db_type}\", primary_key: true, null: false#{rest}"
180
+ end
181
+ end
182
+ end
183
+
161
184
  def reset_db_sequences(database_url)
162
185
  Sequel.connect(database_url) do |db|
163
186
  db.extension :schema_dumper
@@ -1,4 +1,4 @@
1
1
  # -*- encoding : utf-8 -*-
2
2
  module Tapsoob
3
- VERSION = "0.8.5".freeze
3
+ VERSION = "0.8.7".freeze
4
4
  end
@@ -105,12 +105,24 @@ RSpec.describe 'PostgreSQL round-trip', :integration do
105
105
 
106
106
  after(:each) do
107
107
  @src_db.run("DROP TABLE IF EXISTS varchar_pk_table")
108
+ @dst_db.run("DROP TABLE IF EXISTS varchar_pk_table")
108
109
  end
109
110
 
110
111
  it 'skips reset without logging a warning (no sequence attached to varchar PK)' do
111
112
  expect(Tapsoob.log).not_to receive(:warn)
112
113
  Tapsoob::Schema.reset_db_sequences(@src_url)
113
114
  end
115
+
116
+ it 'round-trips the table without identity column errors' do
117
+ dump_dir = Dir.mktmpdir
118
+ begin
119
+ pull(src_url, dump_dir)
120
+ expect { push(dst_url, dump_dir) }.not_to raise_error
121
+ expect(dst_db[:varchar_pk_table].where(id: 'abc-123').count).to eq(1)
122
+ ensure
123
+ FileUtils.rm_rf(dump_dir)
124
+ end
125
+ end
114
126
  end
115
127
 
116
128
  context 'when reset_primary_key_sequence raises a DatabaseError' do
data/spec/spec_helper.rb CHANGED
@@ -45,6 +45,7 @@ RSpec.configure do |config|
45
45
  # Integration tests require a real DB — skip unless env vars are set.
46
46
  config.filter_run_excluding :integration unless ENV['INTEGRATION_TESTS'] || ENV['SRC_DATABASE_URL']
47
47
 
48
- config.include DbHelpers, :integration
49
- config.include RoundTripHelper, :integration
48
+ config.include DbHelpers, :integration
49
+ config.include RoundTripHelper, :integration
50
+ config.include OperationHelpers
50
51
  end
@@ -0,0 +1,43 @@
1
+ require 'tapsoob/operation/pull'
2
+ require 'tapsoob/operation/push'
3
+
4
+ # Shared helpers for unit specs that exercise Pull / Push / Base.
5
+ # Included automatically via spec_helper for all unit specs.
6
+ module OperationHelpers
7
+ # Default opts used across pull/push/base unit tests.
8
+ UNIT_OPTS = {
9
+ data: true,
10
+ schema: true,
11
+ indexes: false,
12
+ progress: false,
13
+ default_chunksize: 1000,
14
+ no_split: true,
15
+ }.freeze
16
+
17
+ # A pre-seeded in-memory SQLite DB with :users (5 rows) and :widgets (3 rows).
18
+ # Returns a new connection each call — callers own disconnection.
19
+ def seeded_sqlite_db
20
+ d = connect_sqlite
21
+ d.create_table(:users) { primary_key :id; String :name }
22
+ d.create_table(:widgets) { primary_key :id; Integer :qty }
23
+ 5.times { |i| d[:users].insert(name: "user_#{i}") }
24
+ 3.times { |i| d[:widgets].insert(qty: i * 10) }
25
+ d
26
+ end
27
+
28
+ def build_pull(db, dump_dir, extra_opts = {})
29
+ op = Tapsoob::Operation::Pull.new(sqlite_memory_url, dump_dir, UNIT_OPTS.merge(extra_opts))
30
+ op.instance_variable_set(:@db, db)
31
+ op
32
+ end
33
+
34
+ def build_push(db_url, dump_dir, extra_opts = {})
35
+ Tapsoob::Operation::Push.new(db_url, dump_dir, UNIT_OPTS.merge(extra_opts))
36
+ end
37
+
38
+ def build_base(db, dump_dir, extra_opts = {})
39
+ op = Tapsoob::Operation::Pull.new(sqlite_memory_url, dump_dir, UNIT_OPTS.merge(extra_opts))
40
+ op.instance_variable_set(:@db, db)
41
+ op
42
+ end
43
+ end
@@ -0,0 +1,222 @@
1
+ require 'spec_helper'
2
+ require 'tapsoob/operation/base'
3
+ require 'tapsoob/operation/pull'
4
+ require 'tapsoob/operation/push'
5
+
6
+ RSpec.describe Tapsoob::Operation::Base do
7
+ let(:db) { seeded_sqlite_db }
8
+ let(:dump_dir) { Dir.mktmpdir("tapsoob_base_") }
9
+
10
+ after do
11
+ db.disconnect
12
+ FileUtils.rm_rf(dump_dir)
13
+ end
14
+
15
+ # ── format_number ────────────────────────────────────────────────────────────
16
+
17
+ describe '#format_number' do
18
+ it 'formats numbers with commas' do
19
+ op = build_base(db, dump_dir)
20
+ expect(op.format_number(1_000_000)).to eq("1,000,000")
21
+ expect(op.format_number(1234)).to eq("1,234")
22
+ expect(op.format_number(999)).to eq("999")
23
+ end
24
+ end
25
+
26
+ # ── resuming? ────────────────────────────────────────────────────────────────
27
+
28
+ describe '#resuming?' do
29
+ it 'returns false by default' do
30
+ expect(build_base(db, dump_dir).resuming?).to be false
31
+ end
32
+
33
+ it 'returns true when :resume is set' do
34
+ expect(build_base(db, dump_dir, resume: true).resuming?).to be true
35
+ end
36
+ end
37
+
38
+ # ── parallel? / parallel_workers ─────────────────────────────────────────────
39
+
40
+ describe '#parallel?' do
41
+ it 'returns false when parallel is 1' do
42
+ expect(build_base(db, dump_dir, parallel: 1).parallel?).to be false
43
+ end
44
+
45
+ it 'returns true when parallel > 1' do
46
+ expect(build_base(db, dump_dir, parallel: 2).parallel?).to be true
47
+ end
48
+ end
49
+
50
+ describe '#parallel_workers' do
51
+ it 'defaults to 1' do
52
+ expect(build_base(db, dump_dir).parallel_workers).to eq(1)
53
+ end
54
+
55
+ it 'returns the requested count' do
56
+ expect(build_base(db, dump_dir, parallel: 4).parallel_workers).to eq(4)
57
+ end
58
+ end
59
+
60
+ # ── table_parallel_workers ───────────────────────────────────────────────────
61
+
62
+ describe '#table_parallel_workers' do
63
+ it 'returns 1 when no_split is set' do
64
+ expect(build_base(db, dump_dir, no_split: true).table_parallel_workers(:users, 5_000_000)).to eq(1)
65
+ end
66
+
67
+ it 'returns 1 when dump_path is nil' do
68
+ op = Tapsoob::Operation::Pull.new(sqlite_memory_url, nil, { default_chunksize: 1000 })
69
+ expect(op.table_parallel_workers(:users, 5_000_000)).to eq(1)
70
+ end
71
+
72
+ it 'returns 1 when row_count is below threshold' do
73
+ expect(build_base(db, dump_dir, no_split: false).table_parallel_workers(:users, 50_000)).to eq(1)
74
+ end
75
+
76
+ it 'returns >= 2 for a very large table' do
77
+ expect(build_base(db, dump_dir, no_split: false).table_parallel_workers(:users, 5_000_000)).to be >= 2
78
+ end
79
+
80
+ it 'returns >= 2 for a 1M+ row table' do
81
+ expect(build_base(db, dump_dir, no_split: false).table_parallel_workers(:users, 1_000_000)).to be >= 2
82
+ end
83
+
84
+ it 'returns >= 2 for a 500K+ row table' do
85
+ expect(build_base(db, dump_dir, no_split: false).table_parallel_workers(:users, 500_000)).to be >= 2
86
+ end
87
+
88
+ it 'returns 2 for a table just over the 100K threshold' do
89
+ expect(build_base(db, dump_dir, no_split: false).table_parallel_workers(:users, 150_000)).to eq(2)
90
+ end
91
+ end
92
+
93
+ # ── stream_state ─────────────────────────────────────────────────────────────
94
+
95
+ describe '#stream_state / #stream_state=' do
96
+ it 'defaults to empty hash' do
97
+ expect(build_base(db, dump_dir).stream_state).to eq({})
98
+ end
99
+
100
+ it 'stores and retrieves state' do
101
+ op = build_base(db, dump_dir)
102
+ op.stream_state = { table_name: :users }
103
+ expect(op.stream_state).to eq({ table_name: :users })
104
+ end
105
+ end
106
+
107
+ # ── add_completed_table ──────────────────────────────────────────────────────
108
+
109
+ describe '#add_completed_table' do
110
+ it 'appends to completed_tables thread-safely' do
111
+ op = build_base(db, dump_dir)
112
+ op.add_completed_table(:users)
113
+ op.add_completed_table(:widgets)
114
+ expect(op.completed_tables).to include("users", "widgets")
115
+ end
116
+ end
117
+
118
+ # ── max_intra_table_workers ──────────────────────────────────────────────────
119
+
120
+ describe '#max_intra_table_workers' do
121
+ it 'returns at least 2' do
122
+ expect(build_base(db, dump_dir).max_intra_table_workers).to be >= 2
123
+ end
124
+ end
125
+
126
+ # ── catch_errors ─────────────────────────────────────────────────────────────
127
+
128
+ describe '#catch_errors' do
129
+ it 'yields and returns the block result' do
130
+ expect(build_base(db, dump_dir).send(:catch_errors) { 42 }).to eq(42)
131
+ end
132
+
133
+ it 're-raises exceptions' do
134
+ op = build_base(db, dump_dir)
135
+ expect { op.send(:catch_errors) { raise ArgumentError, "boom" } }.to raise_error(ArgumentError, "boom")
136
+ end
137
+ end
138
+
139
+ # ── apply_table_filter (array form) ──────────────────────────────────────────
140
+
141
+ describe '#apply_table_filter' do
142
+ it 'filters an array by table_filter' do
143
+ op = build_base(db, dump_dir, tables: ["users"])
144
+ expect(op.apply_table_filter(["users", "widgets"])).to eq(["users"])
145
+ end
146
+
147
+ it 'excludes tables from an array' do
148
+ op = build_base(db, dump_dir, exclude_tables: ["widgets"])
149
+ expect(op.apply_table_filter(["users", "widgets"])).to eq(["users"])
150
+ end
151
+ end
152
+
153
+ # ── Base.factory ─────────────────────────────────────────────────────────────
154
+
155
+ describe '.factory' do
156
+ it 'returns a Pull instance for :pull type' do
157
+ expect(described_class.factory(:pull, sqlite_memory_url, dump_dir, { default_chunksize: 1000 })).to be_a(Tapsoob::Operation::Pull)
158
+ end
159
+
160
+ it 'returns a Push instance for :push type' do
161
+ expect(described_class.factory(:push, sqlite_memory_url, dump_dir, { default_chunksize: 1000 })).to be_a(Tapsoob::Operation::Push)
162
+ end
163
+
164
+ it 'raises for unknown type' do
165
+ expect { described_class.factory(:unknown, sqlite_memory_url, dump_dir, {}) }
166
+ .to raise_error(RuntimeError, /Unknown Operation Type/)
167
+ end
168
+
169
+ it 'returns a resume instance when opts[:resume] is true' do
170
+ op = build_pull(db, dump_dir)
171
+ op.initialize_dump_directory
172
+ op.pull_schema
173
+
174
+ # Pull#to_hash calls remote_tables_info which requires an active pull run;
175
+ # use the base to_hash binding to get just the serializable fields.
176
+ hash = Tapsoob::Operation::Base.instance_method(:to_hash).bind(op).call
177
+ resumed = described_class.factory(:pull, sqlite_memory_url, dump_dir,
178
+ hash.merge(resume: true, klass: "Tapsoob::Operation::Pull", default_chunksize: 1000))
179
+ expect(resumed).to be_a(Tapsoob::Operation::Pull)
180
+ end
181
+ end
182
+
183
+ # ── exiting? / setup_signal_trap ─────────────────────────────────────────────
184
+
185
+ describe '#exiting?' do
186
+ it 'returns false initially' do
187
+ expect(build_base(db, dump_dir).exiting?).to be false
188
+ end
189
+ end
190
+
191
+ describe '#setup_signal_trap' do
192
+ it 'registers signal handlers without error' do
193
+ op = build_base(db, dump_dir)
194
+ expect { op.setup_signal_trap }.not_to raise_error
195
+ end
196
+ end
197
+
198
+ # ── can_use_pk_partitioning? ─────────────────────────────────────────────────
199
+
200
+ describe '#can_use_pk_partitioning?' do
201
+ it 'returns true for a table with a single integer PK' do
202
+ op = build_base(db, dump_dir)
203
+ expect(op.can_use_pk_partitioning?(:users)).to be true
204
+ end
205
+ end
206
+
207
+ # ── db / default_chunksize ───────────────────────────────────────────────────
208
+
209
+ describe '#default_chunksize' do
210
+ it 'returns the value from opts' do
211
+ expect(build_base(db, dump_dir, default_chunksize: 500).default_chunksize).to eq(500)
212
+ end
213
+ end
214
+
215
+ describe '#table_filter / #exclude_tables' do
216
+ it 'returns empty arrays by default' do
217
+ op = build_base(db, dump_dir)
218
+ expect(op.table_filter).to eq([])
219
+ expect(op.exclude_tables).to eq([])
220
+ end
221
+ end
222
+ end