staging_table 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/rbs.yml +30 -0
- data/.github/workflows/test.yml +124 -0
- data/.gitignore +40 -0
- data/.rspec +3 -0
- data/Gemfile +14 -0
- data/README.md +327 -0
- data/Rakefile +19 -0
- data/lib/staging_table/adapters/base.rb +36 -0
- data/lib/staging_table/adapters/mysql.rb +14 -0
- data/lib/staging_table/adapters/postgresql.rb +16 -0
- data/lib/staging_table/adapters/sqlite.rb +54 -0
- data/lib/staging_table/bulk_inserter.rb +43 -0
- data/lib/staging_table/configuration.rb +12 -0
- data/lib/staging_table/errors.rb +20 -0
- data/lib/staging_table/instrumentation.rb +71 -0
- data/lib/staging_table/model_factory.rb +24 -0
- data/lib/staging_table/session.rb +186 -0
- data/lib/staging_table/transfer_result.rb +36 -0
- data/lib/staging_table/transfer_strategies/insert.rb +33 -0
- data/lib/staging_table/transfer_strategies/upsert.rb +159 -0
- data/lib/staging_table/version.rb +5 -0
- data/lib/staging_table.rb +70 -0
- data/rbs_collection.yaml +18 -0
- data/sig/manifest.yaml +5 -0
- data/sig/staging_table/adapters/base.rbs +18 -0
- data/sig/staging_table/adapters/mysql.rbs +7 -0
- data/sig/staging_table/adapters/postgresql.rbs +7 -0
- data/sig/staging_table/adapters/sqlite.rbs +11 -0
- data/sig/staging_table/bulk_inserter.rbs +16 -0
- data/sig/staging_table/configuration.rbs +8 -0
- data/sig/staging_table/errors.rbs +25 -0
- data/sig/staging_table/instrumentation.rbs +19 -0
- data/sig/staging_table/model_factory.rbs +6 -0
- data/sig/staging_table/session.rbs +40 -0
- data/sig/staging_table/transfer_result.rbs +22 -0
- data/sig/staging_table/transfer_strategies/insert.rbs +15 -0
- data/sig/staging_table/transfer_strategies/upsert.rbs +26 -0
- data/sig/staging_table/version.rbs +3 -0
- data/sig/staging_table.rbs +9 -0
- data/staging_table.gemspec +35 -0
- metadata +195 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module StagingTable
|
|
4
|
+
class BulkInserter
|
|
5
|
+
attr_reader :model, :batch_size
|
|
6
|
+
|
|
7
|
+
def initialize(model, batch_size: 1000)
|
|
8
|
+
@model = model
|
|
9
|
+
@batch_size = batch_size
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def insert(records)
|
|
13
|
+
return if records.empty?
|
|
14
|
+
|
|
15
|
+
unless records.all? { |r| r.is_a?(Hash) }
|
|
16
|
+
raise RecordError, "All records must be hashes. If passing ActiveRecord objects, use Session#insert which normalizes them automatically."
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
columns = records.first.keys.map(&:to_s)
|
|
20
|
+
quoted_columns = columns.map { |c| connection.quote_column_name(c) }.join(", ")
|
|
21
|
+
quoted_table = connection.quote_table_name(model.table_name)
|
|
22
|
+
|
|
23
|
+
records.each_slice(batch_size) do |batch|
|
|
24
|
+
values_list = batch.map do |record|
|
|
25
|
+
"(" + columns.map { |col| quote(record.key?(col.to_sym) ? record[col.to_sym] : record[col]) }.join(", ") + ")"
|
|
26
|
+
end.join(", ")
|
|
27
|
+
|
|
28
|
+
sql = "INSERT INTO #{quoted_table} (#{quoted_columns}) VALUES #{values_list}"
|
|
29
|
+
connection.execute(sql)
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
private
|
|
34
|
+
|
|
35
|
+
def connection
|
|
36
|
+
model.connection
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def quote(value)
|
|
40
|
+
connection.quote(value)
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module StagingTable
|
|
4
|
+
class Error < StandardError; end
|
|
5
|
+
|
|
6
|
+
# Raised when configuration options are invalid
|
|
7
|
+
class ConfigurationError < Error; end
|
|
8
|
+
|
|
9
|
+
# Raised when the database adapter is not supported
|
|
10
|
+
class AdapterError < Error; end
|
|
11
|
+
|
|
12
|
+
# Raised when staging table operations fail
|
|
13
|
+
class TableError < Error; end
|
|
14
|
+
|
|
15
|
+
# Raised when transfer strategy fails or is misconfigured
|
|
16
|
+
class TransferError < Error; end
|
|
17
|
+
|
|
18
|
+
# Raised when record data is invalid for insertion
|
|
19
|
+
class RecordError < Error; end
|
|
20
|
+
end
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "active_support/notifications"
|
|
4
|
+
|
|
5
|
+
module StagingTable
|
|
6
|
+
# Provides ActiveSupport::Notifications instrumentation for StagingTable operations.
|
|
7
|
+
#
|
|
8
|
+
# Available events:
|
|
9
|
+
# - staging_table.create_table - When a staging table is created
|
|
10
|
+
# - staging_table.drop_table - When a staging table is dropped
|
|
11
|
+
# - staging_table.insert - When records are inserted into staging
|
|
12
|
+
# - staging_table.transfer - When data is transferred to target table
|
|
13
|
+
# - staging_table.stage - Wraps the entire staging block operation
|
|
14
|
+
#
|
|
15
|
+
# Example:
|
|
16
|
+
# ActiveSupport::Notifications.subscribe('staging_table.transfer') do |event|
|
|
17
|
+
# Rails.logger.info "Transfer completed in #{event.duration}ms"
|
|
18
|
+
# StatsD.measure('staging_table.transfer.duration', event.duration)
|
|
19
|
+
# end
|
|
20
|
+
#
|
|
21
|
+
module Instrumentation
|
|
22
|
+
NAMESPACE = "staging_table"
|
|
23
|
+
|
|
24
|
+
EVENTS = %i[
|
|
25
|
+
create_table
|
|
26
|
+
drop_table
|
|
27
|
+
insert
|
|
28
|
+
transfer
|
|
29
|
+
stage
|
|
30
|
+
].freeze
|
|
31
|
+
|
|
32
|
+
class << self
|
|
33
|
+
# Instruments a block with the given event name.
|
|
34
|
+
#
|
|
35
|
+
# @param event_name [Symbol] The event name (without namespace)
|
|
36
|
+
# @param payload [Hash] Additional payload data
|
|
37
|
+
# @yield The block to instrument
|
|
38
|
+
# @return The result of the block
|
|
39
|
+
def instrument(event_name, payload = {}, &block)
|
|
40
|
+
full_event_name = "#{NAMESPACE}.#{event_name}"
|
|
41
|
+
ActiveSupport::Notifications.instrument(full_event_name, payload, &block)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Subscribe to a StagingTable event.
|
|
45
|
+
#
|
|
46
|
+
# @param event_name [Symbol, String] Event name (with or without namespace)
|
|
47
|
+
# @yield [event] Block called for each event
|
|
48
|
+
# @yieldparam event [ActiveSupport::Notifications::Event]
|
|
49
|
+
# @return [ActiveSupport::Notifications::Fanout::Subscribers::Evented]
|
|
50
|
+
def subscribe(event_name, &block)
|
|
51
|
+
full_name = event_name.to_s.start_with?(NAMESPACE) ? event_name : "#{NAMESPACE}.#{event_name}"
|
|
52
|
+
ActiveSupport::Notifications.subscribe(full_name, &block)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Unsubscribe from a StagingTable event.
|
|
56
|
+
#
|
|
57
|
+
# @param subscriber [Object] The subscriber to remove
|
|
58
|
+
def unsubscribe(subscriber)
|
|
59
|
+
ActiveSupport::Notifications.unsubscribe(subscriber)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Subscribe to all StagingTable events.
|
|
63
|
+
#
|
|
64
|
+
# @yield [event] Block called for each event
|
|
65
|
+
# @return [ActiveSupport::Notifications::Fanout::Subscribers::Evented]
|
|
66
|
+
def subscribe_all(&block)
|
|
67
|
+
ActiveSupport::Notifications.subscribe(/^#{NAMESPACE}\./o, &block)
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module StagingTable
|
|
4
|
+
class ModelFactory
|
|
5
|
+
def self.build(source_model, table_name, excluded_columns: [])
|
|
6
|
+
Class.new(source_model) do
|
|
7
|
+
self.table_name = table_name
|
|
8
|
+
self.ignored_columns = excluded_columns
|
|
9
|
+
|
|
10
|
+
# Ensure we don't inherit STI behavior unless intended for the temp table
|
|
11
|
+
self.inheritance_column = nil unless source_model.inheritance_column == "type" && source_model.columns_hash["type"]
|
|
12
|
+
|
|
13
|
+
def self.model_name
|
|
14
|
+
ActiveModel::Name.new(self, nil, superclass.name)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Prevent the dynamic class from being added to the global constant namespace
|
|
18
|
+
def self.name
|
|
19
|
+
"#{superclass.name}::Staging_#{table_name}"
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "securerandom"
|
|
4
|
+
|
|
5
|
+
module StagingTable
|
|
6
|
+
class Session
|
|
7
|
+
attr_reader :source_model, :staging_model, :options
|
|
8
|
+
|
|
9
|
+
# Supported callback options:
|
|
10
|
+
# - before_insert: ->(session) { ... }
|
|
11
|
+
# - after_insert: ->(session, records) { ... }
|
|
12
|
+
# - before_transfer: ->(session) { ... }
|
|
13
|
+
# - after_transfer: ->(session, result) { ... }
|
|
14
|
+
CALLBACK_OPTIONS = %i[before_insert after_insert before_transfer after_transfer].freeze
|
|
15
|
+
|
|
16
|
+
def initialize(source_model, **options)
|
|
17
|
+
@source_model = source_model
|
|
18
|
+
config = StagingTable.configuration
|
|
19
|
+
@callbacks = options.slice(*CALLBACK_OPTIONS)
|
|
20
|
+
@options = {
|
|
21
|
+
batch_size: config.default_batch_size,
|
|
22
|
+
transfer_strategy: config.default_transfer_strategy
|
|
23
|
+
}.merge(options.except(*CALLBACK_OPTIONS))
|
|
24
|
+
@table_created = false
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def create_table
|
|
28
|
+
return if @table_created
|
|
29
|
+
|
|
30
|
+
payload = {
|
|
31
|
+
source_model: source_model,
|
|
32
|
+
source_table: source_model.table_name,
|
|
33
|
+
staging_table: staging_table_name
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
Instrumentation.instrument(:create_table, payload) do
|
|
37
|
+
adapter.create_table(staging_table_name, source_model.table_name, options)
|
|
38
|
+
@staging_model = ModelFactory.build(source_model, staging_table_name, excluded_columns: options[:excluded_columns] || [])
|
|
39
|
+
@table_created = true
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def drop_table
|
|
44
|
+
return unless @table_created
|
|
45
|
+
|
|
46
|
+
payload = {
|
|
47
|
+
source_model: source_model,
|
|
48
|
+
source_table: source_model.table_name,
|
|
49
|
+
staging_table: staging_table_name
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
Instrumentation.instrument(:drop_table, payload) do
|
|
53
|
+
adapter.drop_table(staging_table_name)
|
|
54
|
+
@table_created = false
|
|
55
|
+
@staging_model = nil
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def insert(records)
|
|
60
|
+
ensure_table_created!
|
|
61
|
+
|
|
62
|
+
run_callback(:before_insert, self)
|
|
63
|
+
|
|
64
|
+
normalized_records = normalize_records(records)
|
|
65
|
+
|
|
66
|
+
payload = {
|
|
67
|
+
source_model: source_model,
|
|
68
|
+
source_table: source_model.table_name,
|
|
69
|
+
staging_table: staging_table_name,
|
|
70
|
+
record_count: normalized_records.size,
|
|
71
|
+
batch_size: options[:batch_size] || 1000
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
Instrumentation.instrument(:insert, payload) do
|
|
75
|
+
BulkInserter.new(staging_model, batch_size: options[:batch_size] || 1000).insert(normalized_records)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
run_callback(:after_insert, self, normalized_records)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def insert_from_query(relation)
|
|
82
|
+
ensure_table_created!
|
|
83
|
+
|
|
84
|
+
run_callback(:before_insert, self)
|
|
85
|
+
|
|
86
|
+
# TODO: Implement direct INSERT INTO SELECT for query-based insertion
|
|
87
|
+
# For now, we'll iterate, but this should be optimized
|
|
88
|
+
all_records = []
|
|
89
|
+
|
|
90
|
+
payload = {
|
|
91
|
+
source_model: source_model,
|
|
92
|
+
source_table: source_model.table_name,
|
|
93
|
+
staging_table: staging_table_name,
|
|
94
|
+
batch_size: options[:batch_size] || 1000
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
Instrumentation.instrument(:insert, payload) do |instrumentation_payload|
|
|
98
|
+
relation.find_in_batches(batch_size: options[:batch_size] || 1000) do |batch|
|
|
99
|
+
records = batch.map(&:attributes)
|
|
100
|
+
all_records.concat(records)
|
|
101
|
+
BulkInserter.new(staging_model, batch_size: options[:batch_size] || 1000).insert(records)
|
|
102
|
+
end
|
|
103
|
+
instrumentation_payload[:record_count] = all_records.size
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
run_callback(:after_insert, self, all_records)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def transfer
|
|
110
|
+
ensure_table_created!
|
|
111
|
+
|
|
112
|
+
run_callback(:before_transfer, self)
|
|
113
|
+
|
|
114
|
+
strategy_name = options[:transfer_strategy].to_s.camelize
|
|
115
|
+
begin
|
|
116
|
+
strategy_class = TransferStrategies.const_get(strategy_name)
|
|
117
|
+
rescue NameError
|
|
118
|
+
raise ConfigurationError, "Invalid transfer strategy: #{options[:transfer_strategy]}. Available strategies: insert, upsert."
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
payload = {
|
|
122
|
+
source_model: source_model,
|
|
123
|
+
source_table: source_model.table_name,
|
|
124
|
+
staging_table: staging_table_name,
|
|
125
|
+
strategy: options[:transfer_strategy],
|
|
126
|
+
staged_count: staging_model.count
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
result = Instrumentation.instrument(:transfer, payload) do |instrumentation_payload|
|
|
130
|
+
transfer_result = strategy_class.new(source_model, staging_model, options).transfer
|
|
131
|
+
instrumentation_payload[:result] = transfer_result
|
|
132
|
+
transfer_result
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
run_callback(:after_transfer, self, result)
|
|
136
|
+
|
|
137
|
+
result
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Delegate unknown methods to the staging model (e.g. for querying)
|
|
141
|
+
def method_missing(method, *args, &block)
|
|
142
|
+
if staging_model.respond_to?(method)
|
|
143
|
+
staging_model.send(method, *args, &block)
|
|
144
|
+
else
|
|
145
|
+
super
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def respond_to_missing?(method, include_private = false)
|
|
150
|
+
staging_model.respond_to?(method, include_private) || super
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
private
|
|
154
|
+
|
|
155
|
+
def run_callback(name, *args)
|
|
156
|
+
callback = @callbacks[name]
|
|
157
|
+
return unless callback
|
|
158
|
+
|
|
159
|
+
callback.call(*args)
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
def adapter
|
|
163
|
+
@adapter ||= Adapters::Base.for(source_model.connection)
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def staging_table_name
|
|
167
|
+
@staging_table_name ||= "staging_#{source_model.table_name}_#{SecureRandom.hex(8)}"
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def ensure_table_created!
|
|
171
|
+
raise TableError, "Staging table has not been created. You must call #create_table or use StagingTable.stage with a block before inserting or transferring data." unless @table_created
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def normalize_records(records)
|
|
175
|
+
if records.is_a?(ActiveRecord::Relation)
|
|
176
|
+
records.map(&:attributes)
|
|
177
|
+
elsif records.respond_to?(:to_a)
|
|
178
|
+
records.to_a.map do |record|
|
|
179
|
+
record.is_a?(ActiveRecord::Base) ? record.attributes : record
|
|
180
|
+
end
|
|
181
|
+
else
|
|
182
|
+
records
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
end
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module StagingTable
|
|
4
|
+
# Holds statistics about a transfer operation
|
|
5
|
+
class TransferResult
|
|
6
|
+
attr_reader :inserted, :updated, :skipped, :total
|
|
7
|
+
|
|
8
|
+
def initialize(inserted: 0, updated: 0, skipped: 0)
|
|
9
|
+
@inserted = inserted
|
|
10
|
+
@updated = updated
|
|
11
|
+
@skipped = skipped
|
|
12
|
+
@total = inserted + updated + skipped
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def to_h
|
|
16
|
+
{
|
|
17
|
+
inserted: inserted,
|
|
18
|
+
updated: updated,
|
|
19
|
+
skipped: skipped,
|
|
20
|
+
total: total
|
|
21
|
+
}
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def success?
|
|
25
|
+
inserted > 0 || updated > 0
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def empty?
|
|
29
|
+
total.zero?
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def inspect
|
|
33
|
+
"#<StagingTable::TransferResult inserted=#{inserted} updated=#{updated} skipped=#{skipped} total=#{total}>"
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module StagingTable
|
|
4
|
+
module TransferStrategies
|
|
5
|
+
class Insert
|
|
6
|
+
def initialize(source_model, staging_model, options = {})
|
|
7
|
+
@source_model = source_model
|
|
8
|
+
@staging_model = staging_model
|
|
9
|
+
@options = options
|
|
10
|
+
@connection = source_model.connection
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def transfer
|
|
14
|
+
staged_count = @staging_model.count
|
|
15
|
+
return TransferResult.new if staged_count.zero?
|
|
16
|
+
|
|
17
|
+
columns = @staging_model.column_names.map { |c| @connection.quote_column_name(c) }.join(", ")
|
|
18
|
+
source_table = @connection.quote_table_name(@source_model.table_name)
|
|
19
|
+
staging_table = @connection.quote_table_name(@staging_model.table_name)
|
|
20
|
+
|
|
21
|
+
sql = <<~SQL
|
|
22
|
+
INSERT INTO #{source_table} (#{columns})
|
|
23
|
+
SELECT #{columns} FROM #{staging_table}
|
|
24
|
+
SQL
|
|
25
|
+
|
|
26
|
+
@connection.execute(sql)
|
|
27
|
+
|
|
28
|
+
# For plain INSERT, all staged records are inserted (assuming no constraint violations)
|
|
29
|
+
TransferResult.new(inserted: staged_count)
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module StagingTable
|
|
4
|
+
module TransferStrategies
|
|
5
|
+
class Upsert
|
|
6
|
+
def initialize(source_model, staging_model, options = {})
|
|
7
|
+
@source_model = source_model
|
|
8
|
+
@staging_model = staging_model
|
|
9
|
+
@options = options
|
|
10
|
+
@connection = source_model.connection
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def transfer
|
|
14
|
+
@staged_count = @staging_model.count
|
|
15
|
+
return TransferResult.new if @staged_count.zero?
|
|
16
|
+
|
|
17
|
+
adapter_name = @connection.adapter_name.downcase
|
|
18
|
+
case adapter_name
|
|
19
|
+
when /postgresql/
|
|
20
|
+
postgresql_upsert
|
|
21
|
+
when /mysql/
|
|
22
|
+
mysql_upsert
|
|
23
|
+
when /sqlite/
|
|
24
|
+
sqlite_upsert
|
|
25
|
+
else
|
|
26
|
+
raise AdapterError, "Upsert strategy not supported for adapter: #{adapter_name}. Supported adapters are PostgreSQL, MySQL, and SQLite."
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
private
|
|
31
|
+
|
|
32
|
+
def postgresql_upsert
|
|
33
|
+
conflict_target = Array(@options[:conflict_target])
|
|
34
|
+
if conflict_target.empty?
|
|
35
|
+
raise ConfigurationError, "PostgreSQL upsert requires :conflict_target option specifying the unique constraint columns. Example: transfer_strategy: :upsert, conflict_target: [:email]"
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
columns = column_names.map { |c| quote_column(c) }.join(", ")
|
|
39
|
+
conflict_target_sql = conflict_target.map { |c| quote_column(c) }.join(", ")
|
|
40
|
+
source_table = quote_table(@source_model.table_name)
|
|
41
|
+
staging_table = quote_table(@staging_model.table_name)
|
|
42
|
+
|
|
43
|
+
if @options[:conflict_action] == :ignore
|
|
44
|
+
# Use RETURNING to count actual inserts (rows where xmax = 0 are new inserts)
|
|
45
|
+
sql = "INSERT INTO #{source_table} (#{columns}) SELECT #{columns} FROM #{staging_table}"
|
|
46
|
+
sql += " ON CONFLICT (#{conflict_target_sql}) DO NOTHING"
|
|
47
|
+
|
|
48
|
+
count_before = @source_model.count
|
|
49
|
+
@connection.execute(sql)
|
|
50
|
+
count_after = @source_model.count
|
|
51
|
+
|
|
52
|
+
inserted = count_after - count_before
|
|
53
|
+
skipped = @staged_count - inserted
|
|
54
|
+
TransferResult.new(inserted: inserted, skipped: skipped)
|
|
55
|
+
else
|
|
56
|
+
updates = column_names.reject { |c| conflict_target.map(&:to_s).include?(c.to_s) || c == "id" }
|
|
57
|
+
.map { |c| "#{quote_column(c)} = EXCLUDED.#{quote_column(c)}" }.join(", ")
|
|
58
|
+
|
|
59
|
+
# Count existing records that match conflict target before upsert
|
|
60
|
+
count_before = @source_model.count
|
|
61
|
+
@connection.execute(
|
|
62
|
+
"INSERT INTO #{source_table} (#{columns}) SELECT #{columns} FROM #{staging_table} " \
|
|
63
|
+
"ON CONFLICT (#{conflict_target_sql}) DO UPDATE SET #{updates}"
|
|
64
|
+
)
|
|
65
|
+
count_after = @source_model.count
|
|
66
|
+
|
|
67
|
+
inserted = count_after - count_before
|
|
68
|
+
updated = @staged_count - inserted
|
|
69
|
+
TransferResult.new(inserted: inserted, updated: updated)
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def mysql_upsert
|
|
74
|
+
columns = column_names.map { |c| quote_column(c) }.join(", ")
|
|
75
|
+
source_table = quote_table(@source_model.table_name)
|
|
76
|
+
staging_table = quote_table(@staging_model.table_name)
|
|
77
|
+
|
|
78
|
+
count_before = @source_model.count
|
|
79
|
+
|
|
80
|
+
if @options[:conflict_action] == :ignore
|
|
81
|
+
sql = "INSERT IGNORE INTO #{source_table} (#{columns}) SELECT #{columns} FROM #{staging_table}"
|
|
82
|
+
@connection.execute(sql)
|
|
83
|
+
|
|
84
|
+
count_after = @source_model.count
|
|
85
|
+
inserted = count_after - count_before
|
|
86
|
+
skipped = @staged_count - inserted
|
|
87
|
+
TransferResult.new(inserted: inserted, skipped: skipped)
|
|
88
|
+
else
|
|
89
|
+
sql = "INSERT INTO #{source_table} (#{columns}) SELECT #{columns} FROM #{staging_table}"
|
|
90
|
+
updates = column_names.reject { |c| c == "id" }
|
|
91
|
+
.map { |c| "#{quote_column(c)} = VALUES(#{quote_column(c)})" }.join(", ")
|
|
92
|
+
sql += " ON DUPLICATE KEY UPDATE #{updates}"
|
|
93
|
+
@connection.execute(sql)
|
|
94
|
+
|
|
95
|
+
count_after = @source_model.count
|
|
96
|
+
inserted = count_after - count_before
|
|
97
|
+
updated = @staged_count - inserted
|
|
98
|
+
TransferResult.new(inserted: inserted, updated: updated)
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def sqlite_upsert
|
|
103
|
+
conflict_target = Array(@options[:conflict_target])
|
|
104
|
+
if conflict_target.empty?
|
|
105
|
+
raise ConfigurationError, "SQLite upsert requires :conflict_target option specifying the unique constraint columns. Example: transfer_strategy: :upsert, conflict_target: [:email]"
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
columns = column_names.map { |c| quote_column(c) }.join(", ")
|
|
109
|
+
source_table = quote_table(@source_model.table_name)
|
|
110
|
+
staging_table = quote_table(@staging_model.table_name)
|
|
111
|
+
|
|
112
|
+
count_before = @source_model.count
|
|
113
|
+
|
|
114
|
+
if @options[:conflict_action] == :ignore
|
|
115
|
+
sql = "INSERT OR IGNORE INTO #{source_table} (#{columns}) SELECT #{columns} FROM #{staging_table}"
|
|
116
|
+
@connection.execute(sql)
|
|
117
|
+
|
|
118
|
+
count_after = @source_model.count
|
|
119
|
+
inserted = count_after - count_before
|
|
120
|
+
skipped = @staged_count - inserted
|
|
121
|
+
TransferResult.new(inserted: inserted, skipped: skipped)
|
|
122
|
+
else
|
|
123
|
+
conflict_target_sql = conflict_target.map { |c| quote_column(c) }.join(", ")
|
|
124
|
+
updates = column_names.reject { |c| conflict_target.map(&:to_s).include?(c.to_s) || c == "id" }
|
|
125
|
+
.map { |c| "#{quote_column(c)} = excluded.#{quote_column(c)}" }.join(", ")
|
|
126
|
+
|
|
127
|
+
# Build individual upsert statements for each record
|
|
128
|
+
@staging_model.all.each do |record|
|
|
129
|
+
values_sql = column_names.map { |c| quote(record[c]) }.join(", ")
|
|
130
|
+
upsert_sql = "INSERT INTO #{source_table} (#{columns}) VALUES (#{values_sql}) " \
|
|
131
|
+
"ON CONFLICT (#{conflict_target_sql}) DO UPDATE SET #{updates}"
|
|
132
|
+
@connection.execute(upsert_sql)
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
count_after = @source_model.count
|
|
136
|
+
inserted = count_after - count_before
|
|
137
|
+
updated = @staged_count - inserted
|
|
138
|
+
TransferResult.new(inserted: inserted, updated: updated)
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def quote(value)
|
|
143
|
+
@connection.quote(value)
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def column_names
|
|
147
|
+
@staging_model.column_names
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def quote_column(name)
|
|
151
|
+
@connection.quote_column_name(name)
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def quote_table(name)
|
|
155
|
+
@connection.quote_table_name(name)
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
end
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "active_record"
|
|
4
|
+
require "staging_table/version"
|
|
5
|
+
require "staging_table/errors"
|
|
6
|
+
require "staging_table/configuration"
|
|
7
|
+
require "staging_table/instrumentation"
|
|
8
|
+
require "staging_table/transfer_result"
|
|
9
|
+
require "staging_table/session"
|
|
10
|
+
require "staging_table/model_factory"
|
|
11
|
+
require "staging_table/bulk_inserter"
|
|
12
|
+
require "staging_table/adapters/base"
|
|
13
|
+
require "staging_table/adapters/postgresql"
|
|
14
|
+
require "staging_table/adapters/mysql"
|
|
15
|
+
require "staging_table/adapters/sqlite"
|
|
16
|
+
require "staging_table/transfer_strategies/insert"
|
|
17
|
+
require "staging_table/transfer_strategies/upsert"
|
|
18
|
+
|
|
19
|
+
module StagingTable
|
|
20
|
+
class << self
|
|
21
|
+
def configuration
|
|
22
|
+
@configuration ||= Configuration.new
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def configure
|
|
26
|
+
yield(configuration)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Stage data for bulk import into a model's table.
|
|
30
|
+
#
|
|
31
|
+
# @param source_model [Class] The ActiveRecord model to stage data for
|
|
32
|
+
# @param options [Hash] Configuration options
|
|
33
|
+
# @option options [Integer] :batch_size Number of records per batch (default: 1000)
|
|
34
|
+
# @option options [Symbol] :transfer_strategy :insert or :upsert (default: :insert)
|
|
35
|
+
# @option options [Array<Symbol>] :conflict_target Columns for upsert conflict detection
|
|
36
|
+
# @option options [Symbol] :conflict_action :update or :ignore for upsert conflicts
|
|
37
|
+
# @option options [Proc] :before_insert Called before inserting into staging
|
|
38
|
+
# @option options [Proc] :after_insert Called after inserting into staging
|
|
39
|
+
# @option options [Proc] :before_transfer Called before transferring to target
|
|
40
|
+
# @option options [Proc] :after_transfer Called after transferring to target
|
|
41
|
+
#
|
|
42
|
+
# @yield [session] Block for staging operations
|
|
43
|
+
# @yieldparam session [Session] The staging session
|
|
44
|
+
# @return [TransferResult, Session] TransferResult when block given, Session otherwise
|
|
45
|
+
def stage(source_model, **options, &block)
|
|
46
|
+
session = Session.new(source_model, **options)
|
|
47
|
+
|
|
48
|
+
if block
|
|
49
|
+
payload = {
|
|
50
|
+
source_model: source_model,
|
|
51
|
+
source_table: source_model.table_name,
|
|
52
|
+
options: options.except(*Session::CALLBACK_OPTIONS)
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
Instrumentation.instrument(:stage, payload) do |instrumentation_payload|
|
|
56
|
+
session.create_table
|
|
57
|
+
yield(session)
|
|
58
|
+
result = session.transfer
|
|
59
|
+
instrumentation_payload[:result] = result
|
|
60
|
+
result
|
|
61
|
+
ensure
|
|
62
|
+
session.drop_table
|
|
63
|
+
end
|
|
64
|
+
else
|
|
65
|
+
session.create_table
|
|
66
|
+
session
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
data/rbs_collection.yaml
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Run `rbs collection install` to install required type definitions
|
|
2
|
+
# See: https://github.com/ruby/rbs/blob/master/docs/collection.md
|
|
3
|
+
|
|
4
|
+
sources:
|
|
5
|
+
- type: git
|
|
6
|
+
name: ruby/gem_rbs_collection
|
|
7
|
+
remote: https://github.com/ruby/gem_rbs_collection.git
|
|
8
|
+
revision: main
|
|
9
|
+
repo_dir: gems
|
|
10
|
+
|
|
11
|
+
path: .gem_rbs_collection
|
|
12
|
+
|
|
13
|
+
gems:
|
|
14
|
+
- name: activerecord
|
|
15
|
+
- name: activesupport
|
|
16
|
+
# Ignore gems without RBS definitions in the collection
|
|
17
|
+
- name: prism
|
|
18
|
+
ignore: true
|