tapsoob 0.8.6 → 0.8.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,54 @@
1
+ require 'spec_helper'
2
+ require 'tapsoob/config'
3
+
4
+ RSpec.describe Tapsoob::Config do
5
+ describe '.verify_database_url' do
6
+ it 'connects and lists tables without error for a valid URL' do
7
+ expect { described_class.verify_database_url(sqlite_memory_url) }.not_to raise_error
8
+ end
9
+
10
+ it 'uses self.database_url when no argument is given' do
11
+ described_class.database_url = sqlite_memory_url
12
+ expect { described_class.verify_database_url }.not_to raise_error
13
+ end
14
+
15
+ it 'prints an error and exits on an invalid URL' do
16
+ expect {
17
+ expect { described_class.verify_database_url("sqlite:///nonexistent/bad/path.db") }.to raise_error(SystemExit)
18
+ }.to output(/Failed to connect/).to_stdout
19
+ end
20
+ end
21
+ end
22
+
23
+ RSpec.describe Tapsoob do
24
+ describe '.exiting= / #exiting?' do
25
+ it 'sets and reads the exiting flag' do
26
+ obj = Object.new
27
+ obj.extend(Tapsoob)
28
+ Tapsoob.exiting = true
29
+ expect(obj.exiting?).to be true
30
+ Tapsoob.exiting = false
31
+ expect(obj.exiting?).to be false
32
+ end
33
+ end
34
+ end
35
+
36
+ RSpec.describe Tapsoob do
37
+ describe '.log' do
38
+ it 'returns a Logger instance' do
39
+ expect(Tapsoob.log).to be_a(Logger)
40
+ end
41
+
42
+ it 'returns the same instance on repeated calls' do
43
+ expect(Tapsoob.log).to equal(Tapsoob.log)
44
+ end
45
+
46
+ it 'accepts a custom logger' do
47
+ custom = Logger.new(StringIO.new)
48
+ Tapsoob.log = custom
49
+ expect(Tapsoob.log).to equal(custom)
50
+ # Reset to default for other tests
51
+ Tapsoob.log = nil
52
+ end
53
+ end
54
+ end
@@ -83,6 +83,54 @@ RSpec.describe Tapsoob::DataStream do
83
83
  end
84
84
  end
85
85
 
86
+ describe '#error / #error=' do
87
+ it 'defaults to false' do
88
+ expect(stream.error).to be false
89
+ end
90
+
91
+ it 'stores and retrieves error state' do
92
+ stream.error = true
93
+ expect(stream.error).to be true
94
+ end
95
+ end
96
+
97
+ describe '#to_json' do
98
+ it 'returns a JSON string including the class name' do
99
+ json = stream.to_json
100
+ expect(json).to be_a(String)
101
+ parsed = JSON.parse(json)
102
+ expect(parsed).to have_key("klass")
103
+ end
104
+ end
105
+
106
+ describe '#increment' do
107
+ it 'advances the state offset by the row count' do
108
+ initial = stream.state[:offset]
109
+ stream.increment(5)
110
+ expect(stream.state[:offset]).to eq(initial + 5)
111
+ end
112
+ end
113
+
114
+ describe '#verify_stream' do
115
+ it 'sets offset to the current table row count' do
116
+ stream.verify_stream
117
+ expect(stream.state[:offset]).to eq(db[:stream_test].count)
118
+ end
119
+ end
120
+
121
+ describe '#import_rows with extra columns' do
122
+ it 'drops unknown columns and imports the rest' do
123
+ rows = {
124
+ table_name: "stream_test",
125
+ header: ["id", "label", "value", "ghost_column"],
126
+ types: ["integer", "string", "integer", "string"],
127
+ data: [[999, "imported", 42, "extra"]]
128
+ }
129
+ expect { stream.import_rows(rows) }.not_to raise_error
130
+ expect(db[:stream_test].where(id: 999).first).not_to be_nil
131
+ end
132
+ end
133
+
86
134
  describe '#parse_encoded_data' do
87
135
  it 'raises CorruptedData on checksum mismatch' do
88
136
  encoded, _, _ = stream.fetch
@@ -0,0 +1,117 @@
1
+ require 'spec_helper'
2
+ require 'tapsoob/data_stream/file_partition'
3
+
4
+ RSpec.describe Tapsoob::DataStream::FilePartition do
5
+ let(:db) do
6
+ d = connect_sqlite
7
+ d.extension :schema_dumper
8
+ d.create_table(:widgets) { primary_key :id; String :name }
9
+ d
10
+ end
11
+
12
+ after { db.disconnect }
13
+
14
+ def make_ndjson(dir, table, chunks)
15
+ FileUtils.mkdir_p(File.join(dir, 'data'))
16
+ path = File.join(dir, 'data', "#{table}.json")
17
+ File.open(path, 'w') do |f|
18
+ chunks.each { |chunk| f.puts(JSON.generate(chunk)) }
19
+ end
20
+ path
21
+ end
22
+
23
+ def new_stream(state_overrides = {})
24
+ base_state = { table_name: :widgets, chunksize: 10 }
25
+ described_class.new(db, base_state.merge(state_overrides))
26
+ end
27
+
28
+ # ── initialize ────────────────────────────────────────────────────────────────
29
+
30
+ describe '#initialize' do
31
+ it 'sets current_line to start_line when line_range is provided' do
32
+ stream = new_stream(line_range: [3, 7])
33
+ expect(stream.state[:current_line]).to eq(3)
34
+ end
35
+
36
+ it 'leaves current_line unset when no line_range' do
37
+ stream = new_stream
38
+ expect(stream.state[:current_line]).to be_nil
39
+ end
40
+ end
41
+
42
+ # ── complete? ────────────────────────────────────────────────────────────────
43
+
44
+ describe '#complete?' do
45
+ it 'returns true when line_range is nil' do
46
+ expect(new_stream.complete?).to be true
47
+ end
48
+
49
+ it 'returns false when current_line is at start' do
50
+ stream = new_stream(line_range: [0, 4])
51
+ expect(stream.complete?).to be false
52
+ end
53
+
54
+ it 'returns true when current_line exceeds end_line' do
55
+ stream = new_stream(line_range: [0, 2])
56
+ stream.state[:current_line] = 3
57
+ expect(stream.complete?).to be true
58
+ end
59
+ end
60
+
61
+ # ── fetch_file ───────────────────────────────────────────────────────────────
62
+
63
+ describe '#fetch_file' do
64
+ let(:dir) { Dir.mktmpdir }
65
+ after { FileUtils.rm_rf(dir) }
66
+
67
+ it 'returns {} when line_range is nil' do
68
+ stream = new_stream
69
+ expect(stream.fetch_file(dir)).to eq({})
70
+ end
71
+
72
+ it 'reads rows within the assigned line range' do
73
+ chunks = [
74
+ { "table_name" => "widgets", "header" => ["id", "name"], "types" => ["integer", "string"], "data" => [[1, "a"]] },
75
+ { "table_name" => "widgets", "header" => ["id", "name"], "types" => ["integer", "string"], "data" => [[2, "b"]] },
76
+ { "table_name" => "widgets", "header" => ["id", "name"], "types" => ["integer", "string"], "data" => [[3, "c"]] },
77
+ ]
78
+ make_ndjson(dir, :widgets, chunks)
79
+
80
+ stream = new_stream(line_range: [0, 1])
81
+ result = stream.fetch_file(dir)
82
+
83
+ expect(result[:table_name]).to eq("widgets")
84
+ expect(result[:header]).to eq(["id", "name"])
85
+ expect(result[:data]).to eq([[1, "a"], [2, "b"]])
86
+ end
87
+
88
+ it 'advances current_line after each fetch' do
89
+ chunks = 3.times.map { |i| { "table_name" => "widgets", "header" => ["id"], "types" => ["integer"], "data" => [[i]] } }
90
+ make_ndjson(dir, :widgets, chunks)
91
+
92
+ stream = new_stream(line_range: [0, 2], chunksize: 2)
93
+ stream.fetch_file(dir)
94
+ expect(stream.state[:current_line]).to eq(2)
95
+ end
96
+
97
+ it 'marks complete after reading all lines in range' do
98
+ chunks = 2.times.map { |i| { "table_name" => "widgets", "header" => ["id"], "types" => ["integer"], "data" => [[i]] } }
99
+ make_ndjson(dir, :widgets, chunks)
100
+
101
+ stream = new_stream(line_range: [0, 1], chunksize: 10)
102
+ stream.fetch_file(dir)
103
+ expect(stream.complete?).to be true
104
+ end
105
+
106
+ it 'respects skip-duplicates option' do
107
+ chunks = [
108
+ { "table_name" => "widgets", "header" => ["id"], "types" => ["integer"], "data" => [[1], [1], [2]] },
109
+ ]
110
+ make_ndjson(dir, :widgets, chunks)
111
+
112
+ stream = described_class.new(db, { table_name: :widgets, chunksize: 10, line_range: [0, 0] }, { "skip-duplicates": true })
113
+ result = stream.fetch_file(dir)
114
+ expect(result[:data].uniq).to eq(result[:data])
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,121 @@
1
+ require 'spec_helper'
2
+ require 'tapsoob/data_stream/keyed'
3
+
4
+ Sequel.extension :core_extensions
5
+
6
+ RSpec.describe Tapsoob::DataStream::Keyed do
7
+ let(:db) do
8
+ d = connect_sqlite
9
+ d.extension :schema_dumper
10
+ d.create_table(:keyed_items) { primary_key :id; String :label }
11
+ 10.times { |i| d[:keyed_items].insert(label: "item_#{i}") }
12
+ d
13
+ end
14
+
15
+ after { db.disconnect }
16
+
17
+ subject(:stream) { described_class.new(db, { table_name: :keyed_items, chunksize: 4 }) }
18
+
19
+ # ── primary_key / buffer_limit ───────────────────────────────────────────────
20
+
21
+ describe '#primary_key' do
22
+ it 'returns :id for this table' do
23
+ expect(stream.primary_key).to eq(:id)
24
+ end
25
+ end
26
+
27
+ describe '#buffer_limit' do
28
+ it 'returns filter when buffer is non-empty' do
29
+ stream.state[:filter] = 5
30
+ stream.buffer << { id: 1 }
31
+ expect(stream.buffer_limit).to eq(5)
32
+ end
33
+
34
+ it 'returns last_fetched when it is less than filter and buffer is empty' do
35
+ stream.state[:filter] = 10
36
+ stream.state[:last_fetched] = 3
37
+ stream.buffer.clear
38
+ expect(stream.buffer_limit).to eq(3)
39
+ end
40
+
41
+ it 'returns filter when last_fetched >= filter' do
42
+ stream.state[:filter] = 5
43
+ stream.state[:last_fetched] = 5
44
+ stream.buffer.clear
45
+ expect(stream.buffer_limit).to eq(5)
46
+ end
47
+ end
48
+
49
+ # ── calc_limit ───────────────────────────────────────────────────────────────
50
+
51
+ describe '#calc_limit' do
52
+ it 'returns chunksize * 3 outside Sinatra' do
53
+ expect(stream.calc_limit(100)).to eq(300)
54
+ end
55
+ end
56
+
57
+ # ── load_buffer ──────────────────────────────────────────────────────────────
58
+
59
+ describe '#load_buffer' do
60
+ it 'loads rows into the buffer' do
61
+ stream.load_buffer(4)
62
+ expect(stream.buffer.size).to be >= 4
63
+ end
64
+
65
+ it 'updates state[:filter] to the last PK in the buffer' do
66
+ stream.load_buffer(4)
67
+ expect(stream.state[:filter]).to eq(stream.buffer.last[:id])
68
+ end
69
+
70
+ it 'stops early when the table is exhausted' do
71
+ stream.load_buffer(1000)
72
+ expect(stream.buffer.size).to eq(10)
73
+ end
74
+ end
75
+
76
+ # ── fetch_buffered ───────────────────────────────────────────────────────────
77
+
78
+ describe '#fetch_buffered' do
79
+ it 'returns up to chunksize rows' do
80
+ rows = stream.fetch_buffered(4)
81
+ expect(rows.size).to eq(4)
82
+ end
83
+
84
+ it 'records last_fetched as the PK of the last returned row' do
85
+ rows = stream.fetch_buffered(4)
86
+ expect(stream.state[:last_fetched]).to eq(rows.last[:id])
87
+ end
88
+
89
+ it 'returns nil last_fetched when no rows available' do
90
+ # drain the table
91
+ stream.load_buffer(100)
92
+ stream.buffer.clear
93
+ rows = stream.fetch_buffered(4)
94
+ expect(rows).to be_empty
95
+ expect(stream.state[:last_fetched]).to be_nil
96
+ end
97
+ end
98
+
99
+ # ── increment ────────────────────────────────────────────────────────────────
100
+
101
+ describe '#increment' do
102
+ it 'removes n rows from the front of the buffer' do
103
+ stream.load_buffer(6)
104
+ original_fourth = stream.buffer[3]
105
+ stream.increment(3)
106
+ expect(stream.buffer.first).to eq(original_fourth)
107
+ end
108
+ end
109
+
110
+ # ── verify_stream ────────────────────────────────────────────────────────────
111
+
112
+ describe '#verify_stream' do
113
+ it 'sets filter to the max PK and clears last_fetched' do
114
+ stream.state[:last_fetched] = 3
115
+ stream.verify_stream
116
+ max_id = db[:keyed_items].max(:id)
117
+ expect(stream.state[:filter]).to eq(max_id)
118
+ expect(stream.state[:last_fetched]).to be_nil
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,136 @@
1
+ require 'spec_helper'
2
+ require 'tapsoob/progress_event'
3
+
4
+ RSpec.describe Tapsoob::ProgressEvent do
5
+ before do
6
+ described_class.enabled = true
7
+ # Reset throttle state between examples
8
+ described_class.instance_variable_set(:@last_progress_time, {})
9
+ end
10
+
11
+ after { described_class.enabled = false }
12
+
13
+ # ── enabled? ─────────────────────────────────────────────────────────────────
14
+
15
+ describe '.enabled?' do
16
+ it 'reflects the value set by .enabled=' do
17
+ described_class.enabled = false
18
+ expect(described_class.enabled?).to be false
19
+ described_class.enabled = true
20
+ expect(described_class.enabled?).to be true
21
+ end
22
+ end
23
+
24
+ # ── emit ─────────────────────────────────────────────────────────────────────
25
+
26
+ describe '.emit' do
27
+ it 'writes a PROGRESS: JSON line to STDERR when enabled' do
28
+ err = StringIO.new
29
+ stub_const('STDERR', err)
30
+ described_class.emit('test_event', foo: 'bar')
31
+ output = err.string
32
+ expect(output).to include('PROGRESS:')
33
+ data = JSON.parse(output.sub('PROGRESS: ', ''))
34
+ expect(data['event']).to eq('test_event')
35
+ expect(data['foo']).to eq('bar')
36
+ end
37
+
38
+ it 'does nothing when disabled' do
39
+ described_class.enabled = false
40
+ err = StringIO.new
41
+ stub_const('STDERR', err)
42
+ described_class.emit('ignored')
43
+ expect(err.string).to be_empty
44
+ end
45
+ end
46
+
47
+ # ── throttle helpers ─────────────────────────────────────────────────────────
48
+
49
+ describe '.should_emit_progress?' do
50
+ it 'returns true on the first call for a table' do
51
+ expect(described_class.should_emit_progress?(:my_table)).to be true
52
+ end
53
+
54
+ it 'returns false when called again immediately' do
55
+ described_class.should_emit_progress?(:my_table)
56
+ expect(described_class.should_emit_progress?(:my_table)).to be false
57
+ end
58
+ end
59
+
60
+ describe '.clear_throttle' do
61
+ it 'resets state so the next call returns true' do
62
+ described_class.should_emit_progress?(:my_table)
63
+ described_class.clear_throttle(:my_table)
64
+ expect(described_class.should_emit_progress?(:my_table)).to be true
65
+ end
66
+ end
67
+
68
+ # ── high-level event helpers ──────────────────────────────────────────────────
69
+
70
+ {
71
+ schema_start: [3],
72
+ schema_complete: [3],
73
+ data_start: [3, 100],
74
+ data_complete: [3, 100],
75
+ indexes_start: [3],
76
+ indexes_complete: [3],
77
+ sequences_start: [],
78
+ sequences_complete: [],
79
+ }.each do |method, args|
80
+ describe ".#{method}" do
81
+ it 'does not raise' do
82
+ expect { described_class.send(method, *args) }.not_to raise_error
83
+ end
84
+ end
85
+ end
86
+
87
+ describe '.table_start' do
88
+ it 'resets throttle and emits without error' do
89
+ expect { described_class.table_start(:users, 500, workers: 2) }.not_to raise_error
90
+ end
91
+ end
92
+
93
+ describe '.table_progress' do
94
+ it 'emits when throttle allows' do
95
+ described_class.clear_throttle(:users)
96
+ err = StringIO.new
97
+ stub_const('STDERR', err)
98
+ described_class.table_progress(:users, 50, 200)
99
+ expect(err.string).to include('table_progress')
100
+ end
101
+
102
+ it 'skips emission when throttled' do
103
+ described_class.should_emit_progress?(:users) # consume the token
104
+ err = StringIO.new
105
+ stub_const('STDERR', err)
106
+ described_class.table_progress(:users, 50, 200)
107
+ expect(err.string).to be_empty
108
+ end
109
+
110
+ it 'emits 0% when total is 0' do
111
+ described_class.clear_throttle(:empty_table)
112
+ err = StringIO.new
113
+ stub_const('STDERR', err)
114
+ described_class.table_progress(:empty_table, 0, 0)
115
+ data = JSON.parse(err.string.sub('PROGRESS: ', '').strip)
116
+ expect(data['percentage']).to eq(0)
117
+ end
118
+ end
119
+
120
+ describe '.table_complete' do
121
+ it 'clears throttle and emits without error' do
122
+ expect { described_class.table_complete(:users, 500) }.not_to raise_error
123
+ end
124
+ end
125
+
126
+ describe '.error' do
127
+ it 'emits an error event with the message' do
128
+ err = StringIO.new
129
+ stub_const('STDERR', err)
130
+ described_class.error('something broke', table: 'users')
131
+ data = JSON.parse(err.string.sub('PROGRESS: ', '').strip)
132
+ expect(data['event']).to eq('error')
133
+ expect(data['message']).to eq('something broke')
134
+ end
135
+ end
136
+ end