staging_table 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/rbs.yml +30 -0
  3. data/.github/workflows/test.yml +124 -0
  4. data/.gitignore +40 -0
  5. data/.rspec +3 -0
  6. data/Gemfile +14 -0
  7. data/README.md +327 -0
  8. data/Rakefile +19 -0
  9. data/lib/staging_table/adapters/base.rb +36 -0
  10. data/lib/staging_table/adapters/mysql.rb +14 -0
  11. data/lib/staging_table/adapters/postgresql.rb +16 -0
  12. data/lib/staging_table/adapters/sqlite.rb +54 -0
  13. data/lib/staging_table/bulk_inserter.rb +43 -0
  14. data/lib/staging_table/configuration.rb +12 -0
  15. data/lib/staging_table/errors.rb +20 -0
  16. data/lib/staging_table/instrumentation.rb +71 -0
  17. data/lib/staging_table/model_factory.rb +24 -0
  18. data/lib/staging_table/session.rb +186 -0
  19. data/lib/staging_table/transfer_result.rb +36 -0
  20. data/lib/staging_table/transfer_strategies/insert.rb +33 -0
  21. data/lib/staging_table/transfer_strategies/upsert.rb +159 -0
  22. data/lib/staging_table/version.rb +5 -0
  23. data/lib/staging_table.rb +70 -0
  24. data/rbs_collection.yaml +18 -0
  25. data/sig/manifest.yaml +5 -0
  26. data/sig/staging_table/adapters/base.rbs +18 -0
  27. data/sig/staging_table/adapters/mysql.rbs +7 -0
  28. data/sig/staging_table/adapters/postgresql.rbs +7 -0
  29. data/sig/staging_table/adapters/sqlite.rbs +11 -0
  30. data/sig/staging_table/bulk_inserter.rbs +16 -0
  31. data/sig/staging_table/configuration.rbs +8 -0
  32. data/sig/staging_table/errors.rbs +25 -0
  33. data/sig/staging_table/instrumentation.rbs +19 -0
  34. data/sig/staging_table/model_factory.rbs +6 -0
  35. data/sig/staging_table/session.rbs +40 -0
  36. data/sig/staging_table/transfer_result.rbs +22 -0
  37. data/sig/staging_table/transfer_strategies/insert.rbs +15 -0
  38. data/sig/staging_table/transfer_strategies/upsert.rbs +26 -0
  39. data/sig/staging_table/version.rbs +3 -0
  40. data/sig/staging_table.rbs +9 -0
  41. data/staging_table.gemspec +35 -0
  42. metadata +195 -0
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module StagingTable
4
+ class BulkInserter
5
+ attr_reader :model, :batch_size
6
+
7
+ def initialize(model, batch_size: 1000)
8
+ @model = model
9
+ @batch_size = batch_size
10
+ end
11
+
12
+ def insert(records)
13
+ return if records.empty?
14
+
15
+ unless records.all? { |r| r.is_a?(Hash) }
16
+ raise RecordError, "All records must be hashes. If passing ActiveRecord objects, use Session#insert which normalizes them automatically."
17
+ end
18
+
19
+ columns = records.first.keys.map(&:to_s)
20
+ quoted_columns = columns.map { |c| connection.quote_column_name(c) }.join(", ")
21
+ quoted_table = connection.quote_table_name(model.table_name)
22
+
23
+ records.each_slice(batch_size) do |batch|
24
+ values_list = batch.map do |record|
25
+ "(" + columns.map { |col| quote(record.key?(col.to_sym) ? record[col.to_sym] : record[col]) }.join(", ") + ")"
26
+ end.join(", ")
27
+
28
+ sql = "INSERT INTO #{quoted_table} (#{quoted_columns}) VALUES #{values_list}"
29
+ connection.execute(sql)
30
+ end
31
+ end
32
+
33
+ private
34
+
35
+ def connection
36
+ model.connection
37
+ end
38
+
39
+ def quote(value)
40
+ connection.quote(value)
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module StagingTable
4
+ class Configuration
5
+ attr_accessor :default_batch_size, :default_transfer_strategy
6
+
7
+ def initialize
8
+ @default_batch_size = 1000
9
+ @default_transfer_strategy = :insert
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module StagingTable
4
+ class Error < StandardError; end
5
+
6
+ # Raised when configuration options are invalid
7
+ class ConfigurationError < Error; end
8
+
9
+ # Raised when the database adapter is not supported
10
+ class AdapterError < Error; end
11
+
12
+ # Raised when staging table operations fail
13
+ class TableError < Error; end
14
+
15
+ # Raised when transfer strategy fails or is misconfigured
16
+ class TransferError < Error; end
17
+
18
+ # Raised when record data is invalid for insertion
19
+ class RecordError < Error; end
20
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "active_support/notifications"
4
+
5
+ module StagingTable
6
+ # Provides ActiveSupport::Notifications instrumentation for StagingTable operations.
7
+ #
8
+ # Available events:
9
+ # - staging_table.create_table - When a staging table is created
10
+ # - staging_table.drop_table - When a staging table is dropped
11
+ # - staging_table.insert - When records are inserted into staging
12
+ # - staging_table.transfer - When data is transferred to target table
13
+ # - staging_table.stage - Wraps the entire staging block operation
14
+ #
15
+ # Example:
16
+ # ActiveSupport::Notifications.subscribe('staging_table.transfer') do |event|
17
+ # Rails.logger.info "Transfer completed in #{event.duration}ms"
18
+ # StatsD.measure('staging_table.transfer.duration', event.duration)
19
+ # end
20
+ #
21
+ module Instrumentation
22
+ NAMESPACE = "staging_table"
23
+
24
+ EVENTS = %i[
25
+ create_table
26
+ drop_table
27
+ insert
28
+ transfer
29
+ stage
30
+ ].freeze
31
+
32
+ class << self
33
+ # Instruments a block with the given event name.
34
+ #
35
+ # @param event_name [Symbol] The event name (without namespace)
36
+ # @param payload [Hash] Additional payload data
37
+ # @yield The block to instrument
38
+ # @return The result of the block
39
+ def instrument(event_name, payload = {}, &block)
40
+ full_event_name = "#{NAMESPACE}.#{event_name}"
41
+ ActiveSupport::Notifications.instrument(full_event_name, payload, &block)
42
+ end
43
+
44
+ # Subscribe to a StagingTable event.
45
+ #
46
+ # @param event_name [Symbol, String] Event name (with or without namespace)
47
+ # @yield [event] Block called for each event
48
+ # @yieldparam event [ActiveSupport::Notifications::Event]
49
+ # @return [ActiveSupport::Notifications::Fanout::Subscribers::Evented]
50
+ def subscribe(event_name, &block)
51
+ full_name = event_name.to_s.start_with?(NAMESPACE) ? event_name : "#{NAMESPACE}.#{event_name}"
52
+ ActiveSupport::Notifications.subscribe(full_name, &block)
53
+ end
54
+
55
+ # Unsubscribe from a StagingTable event.
56
+ #
57
+ # @param subscriber [Object] The subscriber to remove
58
+ def unsubscribe(subscriber)
59
+ ActiveSupport::Notifications.unsubscribe(subscriber)
60
+ end
61
+
62
+ # Subscribe to all StagingTable events.
63
+ #
64
+ # @yield [event] Block called for each event
65
+ # @return [ActiveSupport::Notifications::Fanout::Subscribers::Evented]
66
+ def subscribe_all(&block)
67
+ ActiveSupport::Notifications.subscribe(/^#{NAMESPACE}\./o, &block)
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module StagingTable
4
+ class ModelFactory
5
+ def self.build(source_model, table_name, excluded_columns: [])
6
+ Class.new(source_model) do
7
+ self.table_name = table_name
8
+ self.ignored_columns = excluded_columns
9
+
10
+ # Ensure we don't inherit STI behavior unless intended for the temp table
11
+ self.inheritance_column = nil unless source_model.inheritance_column == "type" && source_model.columns_hash["type"]
12
+
13
+ def self.model_name
14
+ ActiveModel::Name.new(self, nil, superclass.name)
15
+ end
16
+
17
+ # Prevent the dynamic class from being added to the global constant namespace
18
+ def self.name
19
+ "#{superclass.name}::Staging_#{table_name}"
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,186 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "securerandom"
4
+
5
+ module StagingTable
6
+ class Session
7
+ attr_reader :source_model, :staging_model, :options
8
+
9
+ # Supported callback options:
10
+ # - before_insert: ->(session) { ... }
11
+ # - after_insert: ->(session, records) { ... }
12
+ # - before_transfer: ->(session) { ... }
13
+ # - after_transfer: ->(session, result) { ... }
14
+ CALLBACK_OPTIONS = %i[before_insert after_insert before_transfer after_transfer].freeze
15
+
16
+ def initialize(source_model, **options)
17
+ @source_model = source_model
18
+ config = StagingTable.configuration
19
+ @callbacks = options.slice(*CALLBACK_OPTIONS)
20
+ @options = {
21
+ batch_size: config.default_batch_size,
22
+ transfer_strategy: config.default_transfer_strategy
23
+ }.merge(options.except(*CALLBACK_OPTIONS))
24
+ @table_created = false
25
+ end
26
+
27
+ def create_table
28
+ return if @table_created
29
+
30
+ payload = {
31
+ source_model: source_model,
32
+ source_table: source_model.table_name,
33
+ staging_table: staging_table_name
34
+ }
35
+
36
+ Instrumentation.instrument(:create_table, payload) do
37
+ adapter.create_table(staging_table_name, source_model.table_name, options)
38
+ @staging_model = ModelFactory.build(source_model, staging_table_name, excluded_columns: options[:excluded_columns] || [])
39
+ @table_created = true
40
+ end
41
+ end
42
+
43
+ def drop_table
44
+ return unless @table_created
45
+
46
+ payload = {
47
+ source_model: source_model,
48
+ source_table: source_model.table_name,
49
+ staging_table: staging_table_name
50
+ }
51
+
52
+ Instrumentation.instrument(:drop_table, payload) do
53
+ adapter.drop_table(staging_table_name)
54
+ @table_created = false
55
+ @staging_model = nil
56
+ end
57
+ end
58
+
59
+ def insert(records)
60
+ ensure_table_created!
61
+
62
+ run_callback(:before_insert, self)
63
+
64
+ normalized_records = normalize_records(records)
65
+
66
+ payload = {
67
+ source_model: source_model,
68
+ source_table: source_model.table_name,
69
+ staging_table: staging_table_name,
70
+ record_count: normalized_records.size,
71
+ batch_size: options[:batch_size] || 1000
72
+ }
73
+
74
+ Instrumentation.instrument(:insert, payload) do
75
+ BulkInserter.new(staging_model, batch_size: options[:batch_size] || 1000).insert(normalized_records)
76
+ end
77
+
78
+ run_callback(:after_insert, self, normalized_records)
79
+ end
80
+
81
+ def insert_from_query(relation)
82
+ ensure_table_created!
83
+
84
+ run_callback(:before_insert, self)
85
+
86
+ # TODO: Implement direct INSERT INTO SELECT for query-based insertion
87
+ # For now, we'll iterate, but this should be optimized
88
+ all_records = []
89
+
90
+ payload = {
91
+ source_model: source_model,
92
+ source_table: source_model.table_name,
93
+ staging_table: staging_table_name,
94
+ batch_size: options[:batch_size] || 1000
95
+ }
96
+
97
+ Instrumentation.instrument(:insert, payload) do |instrumentation_payload|
98
+ relation.find_in_batches(batch_size: options[:batch_size] || 1000) do |batch|
99
+ records = batch.map(&:attributes)
100
+ all_records.concat(records)
101
+ BulkInserter.new(staging_model, batch_size: options[:batch_size] || 1000).insert(records)
102
+ end
103
+ instrumentation_payload[:record_count] = all_records.size
104
+ end
105
+
106
+ run_callback(:after_insert, self, all_records)
107
+ end
108
+
109
+ def transfer
110
+ ensure_table_created!
111
+
112
+ run_callback(:before_transfer, self)
113
+
114
+ strategy_name = options[:transfer_strategy].to_s.camelize
115
+ begin
116
+ strategy_class = TransferStrategies.const_get(strategy_name)
117
+ rescue NameError
118
+ raise ConfigurationError, "Invalid transfer strategy: #{options[:transfer_strategy]}. Available strategies: insert, upsert."
119
+ end
120
+
121
+ payload = {
122
+ source_model: source_model,
123
+ source_table: source_model.table_name,
124
+ staging_table: staging_table_name,
125
+ strategy: options[:transfer_strategy],
126
+ staged_count: staging_model.count
127
+ }
128
+
129
+ result = Instrumentation.instrument(:transfer, payload) do |instrumentation_payload|
130
+ transfer_result = strategy_class.new(source_model, staging_model, options).transfer
131
+ instrumentation_payload[:result] = transfer_result
132
+ transfer_result
133
+ end
134
+
135
+ run_callback(:after_transfer, self, result)
136
+
137
+ result
138
+ end
139
+
140
+ # Delegate unknown methods to the staging model (e.g. for querying)
141
+ def method_missing(method, *args, &block)
142
+ if staging_model.respond_to?(method)
143
+ staging_model.send(method, *args, &block)
144
+ else
145
+ super
146
+ end
147
+ end
148
+
149
+ def respond_to_missing?(method, include_private = false)
150
+ staging_model.respond_to?(method, include_private) || super
151
+ end
152
+
153
+ private
154
+
155
+ def run_callback(name, *args)
156
+ callback = @callbacks[name]
157
+ return unless callback
158
+
159
+ callback.call(*args)
160
+ end
161
+
162
+ def adapter
163
+ @adapter ||= Adapters::Base.for(source_model.connection)
164
+ end
165
+
166
+ def staging_table_name
167
+ @staging_table_name ||= "staging_#{source_model.table_name}_#{SecureRandom.hex(8)}"
168
+ end
169
+
170
+ def ensure_table_created!
171
+ raise TableError, "Staging table has not been created. You must call #create_table or use StagingTable.stage with a block before inserting or transferring data." unless @table_created
172
+ end
173
+
174
+ def normalize_records(records)
175
+ if records.is_a?(ActiveRecord::Relation)
176
+ records.map(&:attributes)
177
+ elsif records.respond_to?(:to_a)
178
+ records.to_a.map do |record|
179
+ record.is_a?(ActiveRecord::Base) ? record.attributes : record
180
+ end
181
+ else
182
+ records
183
+ end
184
+ end
185
+ end
186
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module StagingTable
4
+ # Holds statistics about a transfer operation
5
+ class TransferResult
6
+ attr_reader :inserted, :updated, :skipped, :total
7
+
8
+ def initialize(inserted: 0, updated: 0, skipped: 0)
9
+ @inserted = inserted
10
+ @updated = updated
11
+ @skipped = skipped
12
+ @total = inserted + updated + skipped
13
+ end
14
+
15
+ def to_h
16
+ {
17
+ inserted: inserted,
18
+ updated: updated,
19
+ skipped: skipped,
20
+ total: total
21
+ }
22
+ end
23
+
24
+ def success?
25
+ inserted > 0 || updated > 0
26
+ end
27
+
28
+ def empty?
29
+ total.zero?
30
+ end
31
+
32
+ def inspect
33
+ "#<StagingTable::TransferResult inserted=#{inserted} updated=#{updated} skipped=#{skipped} total=#{total}>"
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module StagingTable
4
+ module TransferStrategies
5
+ class Insert
6
+ def initialize(source_model, staging_model, options = {})
7
+ @source_model = source_model
8
+ @staging_model = staging_model
9
+ @options = options
10
+ @connection = source_model.connection
11
+ end
12
+
13
+ def transfer
14
+ staged_count = @staging_model.count
15
+ return TransferResult.new if staged_count.zero?
16
+
17
+ columns = @staging_model.column_names.map { |c| @connection.quote_column_name(c) }.join(", ")
18
+ source_table = @connection.quote_table_name(@source_model.table_name)
19
+ staging_table = @connection.quote_table_name(@staging_model.table_name)
20
+
21
+ sql = <<~SQL
22
+ INSERT INTO #{source_table} (#{columns})
23
+ SELECT #{columns} FROM #{staging_table}
24
+ SQL
25
+
26
+ @connection.execute(sql)
27
+
28
+ # For plain INSERT, all staged records are inserted (assuming no constraint violations)
29
+ TransferResult.new(inserted: staged_count)
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,159 @@
1
+ # frozen_string_literal: true
2
+
3
+ module StagingTable
4
+ module TransferStrategies
5
+ class Upsert
6
+ def initialize(source_model, staging_model, options = {})
7
+ @source_model = source_model
8
+ @staging_model = staging_model
9
+ @options = options
10
+ @connection = source_model.connection
11
+ end
12
+
13
+ def transfer
14
+ @staged_count = @staging_model.count
15
+ return TransferResult.new if @staged_count.zero?
16
+
17
+ adapter_name = @connection.adapter_name.downcase
18
+ case adapter_name
19
+ when /postgresql/
20
+ postgresql_upsert
21
+ when /mysql/
22
+ mysql_upsert
23
+ when /sqlite/
24
+ sqlite_upsert
25
+ else
26
+ raise AdapterError, "Upsert strategy not supported for adapter: #{adapter_name}. Supported adapters are PostgreSQL, MySQL, and SQLite."
27
+ end
28
+ end
29
+
30
+ private
31
+
32
+ def postgresql_upsert
33
+ conflict_target = Array(@options[:conflict_target])
34
+ if conflict_target.empty?
35
+ raise ConfigurationError, "PostgreSQL upsert requires :conflict_target option specifying the unique constraint columns. Example: transfer_strategy: :upsert, conflict_target: [:email]"
36
+ end
37
+
38
+ columns = column_names.map { |c| quote_column(c) }.join(", ")
39
+ conflict_target_sql = conflict_target.map { |c| quote_column(c) }.join(", ")
40
+ source_table = quote_table(@source_model.table_name)
41
+ staging_table = quote_table(@staging_model.table_name)
42
+
43
+ if @options[:conflict_action] == :ignore
44
+ # Use RETURNING to count actual inserts (rows where xmax = 0 are new inserts)
45
+ sql = "INSERT INTO #{source_table} (#{columns}) SELECT #{columns} FROM #{staging_table}"
46
+ sql += " ON CONFLICT (#{conflict_target_sql}) DO NOTHING"
47
+
48
+ count_before = @source_model.count
49
+ @connection.execute(sql)
50
+ count_after = @source_model.count
51
+
52
+ inserted = count_after - count_before
53
+ skipped = @staged_count - inserted
54
+ TransferResult.new(inserted: inserted, skipped: skipped)
55
+ else
56
+ updates = column_names.reject { |c| conflict_target.map(&:to_s).include?(c.to_s) || c == "id" }
57
+ .map { |c| "#{quote_column(c)} = EXCLUDED.#{quote_column(c)}" }.join(", ")
58
+
59
+ # Count existing records that match conflict target before upsert
60
+ count_before = @source_model.count
61
+ @connection.execute(
62
+ "INSERT INTO #{source_table} (#{columns}) SELECT #{columns} FROM #{staging_table} " \
63
+ "ON CONFLICT (#{conflict_target_sql}) DO UPDATE SET #{updates}"
64
+ )
65
+ count_after = @source_model.count
66
+
67
+ inserted = count_after - count_before
68
+ updated = @staged_count - inserted
69
+ TransferResult.new(inserted: inserted, updated: updated)
70
+ end
71
+ end
72
+
73
+ def mysql_upsert
74
+ columns = column_names.map { |c| quote_column(c) }.join(", ")
75
+ source_table = quote_table(@source_model.table_name)
76
+ staging_table = quote_table(@staging_model.table_name)
77
+
78
+ count_before = @source_model.count
79
+
80
+ if @options[:conflict_action] == :ignore
81
+ sql = "INSERT IGNORE INTO #{source_table} (#{columns}) SELECT #{columns} FROM #{staging_table}"
82
+ @connection.execute(sql)
83
+
84
+ count_after = @source_model.count
85
+ inserted = count_after - count_before
86
+ skipped = @staged_count - inserted
87
+ TransferResult.new(inserted: inserted, skipped: skipped)
88
+ else
89
+ sql = "INSERT INTO #{source_table} (#{columns}) SELECT #{columns} FROM #{staging_table}"
90
+ updates = column_names.reject { |c| c == "id" }
91
+ .map { |c| "#{quote_column(c)} = VALUES(#{quote_column(c)})" }.join(", ")
92
+ sql += " ON DUPLICATE KEY UPDATE #{updates}"
93
+ @connection.execute(sql)
94
+
95
+ count_after = @source_model.count
96
+ inserted = count_after - count_before
97
+ updated = @staged_count - inserted
98
+ TransferResult.new(inserted: inserted, updated: updated)
99
+ end
100
+ end
101
+
102
+ def sqlite_upsert
103
+ conflict_target = Array(@options[:conflict_target])
104
+ if conflict_target.empty?
105
+ raise ConfigurationError, "SQLite upsert requires :conflict_target option specifying the unique constraint columns. Example: transfer_strategy: :upsert, conflict_target: [:email]"
106
+ end
107
+
108
+ columns = column_names.map { |c| quote_column(c) }.join(", ")
109
+ source_table = quote_table(@source_model.table_name)
110
+ staging_table = quote_table(@staging_model.table_name)
111
+
112
+ count_before = @source_model.count
113
+
114
+ if @options[:conflict_action] == :ignore
115
+ sql = "INSERT OR IGNORE INTO #{source_table} (#{columns}) SELECT #{columns} FROM #{staging_table}"
116
+ @connection.execute(sql)
117
+
118
+ count_after = @source_model.count
119
+ inserted = count_after - count_before
120
+ skipped = @staged_count - inserted
121
+ TransferResult.new(inserted: inserted, skipped: skipped)
122
+ else
123
+ conflict_target_sql = conflict_target.map { |c| quote_column(c) }.join(", ")
124
+ updates = column_names.reject { |c| conflict_target.map(&:to_s).include?(c.to_s) || c == "id" }
125
+ .map { |c| "#{quote_column(c)} = excluded.#{quote_column(c)}" }.join(", ")
126
+
127
+ # Build individual upsert statements for each record
128
+ @staging_model.all.each do |record|
129
+ values_sql = column_names.map { |c| quote(record[c]) }.join(", ")
130
+ upsert_sql = "INSERT INTO #{source_table} (#{columns}) VALUES (#{values_sql}) " \
131
+ "ON CONFLICT (#{conflict_target_sql}) DO UPDATE SET #{updates}"
132
+ @connection.execute(upsert_sql)
133
+ end
134
+
135
+ count_after = @source_model.count
136
+ inserted = count_after - count_before
137
+ updated = @staged_count - inserted
138
+ TransferResult.new(inserted: inserted, updated: updated)
139
+ end
140
+ end
141
+
142
+ def quote(value)
143
+ @connection.quote(value)
144
+ end
145
+
146
+ def column_names
147
+ @staging_model.column_names
148
+ end
149
+
150
+ def quote_column(name)
151
+ @connection.quote_column_name(name)
152
+ end
153
+
154
+ def quote_table(name)
155
+ @connection.quote_table_name(name)
156
+ end
157
+ end
158
+ end
159
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module StagingTable
4
+ VERSION = "0.1.0"
5
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "active_record"
4
+ require "staging_table/version"
5
+ require "staging_table/errors"
6
+ require "staging_table/configuration"
7
+ require "staging_table/instrumentation"
8
+ require "staging_table/transfer_result"
9
+ require "staging_table/session"
10
+ require "staging_table/model_factory"
11
+ require "staging_table/bulk_inserter"
12
+ require "staging_table/adapters/base"
13
+ require "staging_table/adapters/postgresql"
14
+ require "staging_table/adapters/mysql"
15
+ require "staging_table/adapters/sqlite"
16
+ require "staging_table/transfer_strategies/insert"
17
+ require "staging_table/transfer_strategies/upsert"
18
+
19
+ module StagingTable
20
+ class << self
21
+ def configuration
22
+ @configuration ||= Configuration.new
23
+ end
24
+
25
+ def configure
26
+ yield(configuration)
27
+ end
28
+
29
+ # Stage data for bulk import into a model's table.
30
+ #
31
+ # @param source_model [Class] The ActiveRecord model to stage data for
32
+ # @param options [Hash] Configuration options
33
+ # @option options [Integer] :batch_size Number of records per batch (default: 1000)
34
+ # @option options [Symbol] :transfer_strategy :insert or :upsert (default: :insert)
35
+ # @option options [Array<Symbol>] :conflict_target Columns for upsert conflict detection
36
+ # @option options [Symbol] :conflict_action :update or :ignore for upsert conflicts
37
+ # @option options [Proc] :before_insert Called before inserting into staging
38
+ # @option options [Proc] :after_insert Called after inserting into staging
39
+ # @option options [Proc] :before_transfer Called before transferring to target
40
+ # @option options [Proc] :after_transfer Called after transferring to target
41
+ #
42
+ # @yield [session] Block for staging operations
43
+ # @yieldparam session [Session] The staging session
44
+ # @return [TransferResult, Session] TransferResult when block given, Session otherwise
45
+ def stage(source_model, **options, &block)
46
+ session = Session.new(source_model, **options)
47
+
48
+ if block
49
+ payload = {
50
+ source_model: source_model,
51
+ source_table: source_model.table_name,
52
+ options: options.except(*Session::CALLBACK_OPTIONS)
53
+ }
54
+
55
+ Instrumentation.instrument(:stage, payload) do |instrumentation_payload|
56
+ session.create_table
57
+ yield(session)
58
+ result = session.transfer
59
+ instrumentation_payload[:result] = result
60
+ result
61
+ ensure
62
+ session.drop_table
63
+ end
64
+ else
65
+ session.create_table
66
+ session
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,18 @@
1
+ # Run `rbs collection install` to install required type definitions
2
+ # See: https://github.com/ruby/rbs/blob/master/docs/collection.md
3
+
4
+ sources:
5
+ - type: git
6
+ name: ruby/gem_rbs_collection
7
+ remote: https://github.com/ruby/gem_rbs_collection.git
8
+ revision: main
9
+ repo_dir: gems
10
+
11
+ path: .gem_rbs_collection
12
+
13
+ gems:
14
+ - name: activerecord
15
+ - name: activesupport
16
+ # Ignore gems without RBS definitions in the collection
17
+ - name: prism
18
+ ignore: true
data/sig/manifest.yaml ADDED
@@ -0,0 +1,5 @@
1
+ # RBS manifest file
2
+ # Defines gem dependencies for type checking
3
+ dependencies:
4
+ - name: activerecord
5
+ - name: activesupport