staging_table 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/rbs.yml +30 -0
- data/.github/workflows/test.yml +124 -0
- data/.gitignore +40 -0
- data/.rspec +3 -0
- data/Gemfile +14 -0
- data/README.md +327 -0
- data/Rakefile +19 -0
- data/lib/staging_table/adapters/base.rb +36 -0
- data/lib/staging_table/adapters/mysql.rb +14 -0
- data/lib/staging_table/adapters/postgresql.rb +16 -0
- data/lib/staging_table/adapters/sqlite.rb +54 -0
- data/lib/staging_table/bulk_inserter.rb +43 -0
- data/lib/staging_table/configuration.rb +12 -0
- data/lib/staging_table/errors.rb +20 -0
- data/lib/staging_table/instrumentation.rb +71 -0
- data/lib/staging_table/model_factory.rb +24 -0
- data/lib/staging_table/session.rb +186 -0
- data/lib/staging_table/transfer_result.rb +36 -0
- data/lib/staging_table/transfer_strategies/insert.rb +33 -0
- data/lib/staging_table/transfer_strategies/upsert.rb +159 -0
- data/lib/staging_table/version.rb +5 -0
- data/lib/staging_table.rb +70 -0
- data/rbs_collection.yaml +18 -0
- data/sig/manifest.yaml +5 -0
- data/sig/staging_table/adapters/base.rbs +18 -0
- data/sig/staging_table/adapters/mysql.rbs +7 -0
- data/sig/staging_table/adapters/postgresql.rbs +7 -0
- data/sig/staging_table/adapters/sqlite.rbs +11 -0
- data/sig/staging_table/bulk_inserter.rbs +16 -0
- data/sig/staging_table/configuration.rbs +8 -0
- data/sig/staging_table/errors.rbs +25 -0
- data/sig/staging_table/instrumentation.rbs +19 -0
- data/sig/staging_table/model_factory.rbs +6 -0
- data/sig/staging_table/session.rbs +40 -0
- data/sig/staging_table/transfer_result.rbs +22 -0
- data/sig/staging_table/transfer_strategies/insert.rbs +15 -0
- data/sig/staging_table/transfer_strategies/upsert.rbs +26 -0
- data/sig/staging_table/version.rbs +3 -0
- data/sig/staging_table.rbs +9 -0
- data/staging_table.gemspec +35 -0
- metadata +195 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: c89644e006af09c46ea8ea69f9a5687b20c88e400fbb1436615c10982ce63eb6
|
|
4
|
+
data.tar.gz: 90a9f0c62c6a63b60fb512411c2a74a26f927cb66a8407b082a8bb1e6421ca6c
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: fc32f9c5d72b5971002d503bf3969c8941e6675d1764d59976a366a14a95134c2ca22259b70c1a84b29c04f085df9c217c4b9451f506094d7c32150b902cc309
|
|
7
|
+
data.tar.gz: bb3e29593966192d22fc8168656100ba7880d5f0cfa14f520667dec0ad0822c0efd0212c4d4d53e4e0c1bb49fd06f3b0ad63431a78111ee2ddd535efc0f7cf31
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
name: RBS
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
validate:
|
|
11
|
+
name: Validate Type Signatures
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
|
|
17
|
+
- name: Set up Ruby
|
|
18
|
+
uses: ruby/setup-ruby@v1
|
|
19
|
+
with:
|
|
20
|
+
ruby-version: '3.3'
|
|
21
|
+
bundler-cache: true
|
|
22
|
+
|
|
23
|
+
- name: Install RBS
|
|
24
|
+
run: gem install rbs
|
|
25
|
+
|
|
26
|
+
- name: Install RBS collection
|
|
27
|
+
run: rbs collection install
|
|
28
|
+
|
|
29
|
+
- name: Validate RBS signatures
|
|
30
|
+
run: rbs -I sig validate
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
name: Tests
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test-sqlite:
|
|
11
|
+
name: Ruby ${{ matrix.ruby }} - SQLite
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
strategy:
|
|
14
|
+
fail-fast: false
|
|
15
|
+
matrix:
|
|
16
|
+
ruby: ['3.1', '3.2', '3.3']
|
|
17
|
+
|
|
18
|
+
steps:
|
|
19
|
+
- uses: actions/checkout@v4
|
|
20
|
+
|
|
21
|
+
- name: Set up Ruby ${{ matrix.ruby }}
|
|
22
|
+
uses: ruby/setup-ruby@v1
|
|
23
|
+
with:
|
|
24
|
+
ruby-version: ${{ matrix.ruby }}
|
|
25
|
+
bundler-cache: true
|
|
26
|
+
|
|
27
|
+
- name: Run tests with SQLite
|
|
28
|
+
run: bundle exec rspec
|
|
29
|
+
|
|
30
|
+
test-postgresql:
|
|
31
|
+
name: Ruby ${{ matrix.ruby }} - PostgreSQL
|
|
32
|
+
runs-on: ubuntu-latest
|
|
33
|
+
strategy:
|
|
34
|
+
fail-fast: false
|
|
35
|
+
matrix:
|
|
36
|
+
ruby: ['3.1', '3.2', '3.3']
|
|
37
|
+
|
|
38
|
+
services:
|
|
39
|
+
postgres:
|
|
40
|
+
image: postgres:15
|
|
41
|
+
env:
|
|
42
|
+
POSTGRES_USER: postgres
|
|
43
|
+
POSTGRES_PASSWORD: postgres
|
|
44
|
+
POSTGRES_DB: staging_table_test
|
|
45
|
+
ports:
|
|
46
|
+
- 5432:5432
|
|
47
|
+
options: >-
|
|
48
|
+
--health-cmd pg_isready
|
|
49
|
+
--health-interval 10s
|
|
50
|
+
--health-timeout 5s
|
|
51
|
+
--health-retries 5
|
|
52
|
+
|
|
53
|
+
steps:
|
|
54
|
+
- uses: actions/checkout@v4
|
|
55
|
+
|
|
56
|
+
- name: Set up Ruby ${{ matrix.ruby }}
|
|
57
|
+
uses: ruby/setup-ruby@v1
|
|
58
|
+
with:
|
|
59
|
+
ruby-version: ${{ matrix.ruby }}
|
|
60
|
+
bundler-cache: true
|
|
61
|
+
|
|
62
|
+
- name: Run tests with PostgreSQL
|
|
63
|
+
run: bundle exec rspec
|
|
64
|
+
env:
|
|
65
|
+
POSTGRES_HOST: localhost
|
|
66
|
+
POSTGRES_PORT: 5432
|
|
67
|
+
POSTGRES_USER: postgres
|
|
68
|
+
POSTGRES_PASSWORD: postgres
|
|
69
|
+
POSTGRES_DB: staging_table_test
|
|
70
|
+
|
|
71
|
+
test-mysql:
|
|
72
|
+
name: Ruby ${{ matrix.ruby }} - MySQL
|
|
73
|
+
runs-on: ubuntu-latest
|
|
74
|
+
strategy:
|
|
75
|
+
fail-fast: false
|
|
76
|
+
matrix:
|
|
77
|
+
ruby: ['3.1', '3.2', '3.3']
|
|
78
|
+
|
|
79
|
+
services:
|
|
80
|
+
mysql:
|
|
81
|
+
image: mysql:8.0
|
|
82
|
+
env:
|
|
83
|
+
MYSQL_ROOT_PASSWORD: root
|
|
84
|
+
MYSQL_DATABASE: staging_table_test
|
|
85
|
+
ports:
|
|
86
|
+
- 3306:3306
|
|
87
|
+
options: >-
|
|
88
|
+
--health-cmd "mysqladmin ping -h localhost"
|
|
89
|
+
--health-interval 10s
|
|
90
|
+
--health-timeout 5s
|
|
91
|
+
--health-retries 5
|
|
92
|
+
|
|
93
|
+
steps:
|
|
94
|
+
- uses: actions/checkout@v4
|
|
95
|
+
|
|
96
|
+
- name: Set up Ruby ${{ matrix.ruby }}
|
|
97
|
+
uses: ruby/setup-ruby@v1
|
|
98
|
+
with:
|
|
99
|
+
ruby-version: ${{ matrix.ruby }}
|
|
100
|
+
bundler-cache: true
|
|
101
|
+
|
|
102
|
+
- name: Run tests with MySQL
|
|
103
|
+
run: bundle exec rspec
|
|
104
|
+
env:
|
|
105
|
+
MYSQL_HOST: 127.0.0.1
|
|
106
|
+
MYSQL_PORT: 3306
|
|
107
|
+
MYSQL_USER: root
|
|
108
|
+
MYSQL_PASSWORD: root
|
|
109
|
+
MYSQL_DB: staging_table_test
|
|
110
|
+
|
|
111
|
+
lint:
|
|
112
|
+
name: Standard Ruby
|
|
113
|
+
runs-on: ubuntu-latest
|
|
114
|
+
steps:
|
|
115
|
+
- uses: actions/checkout@v4
|
|
116
|
+
|
|
117
|
+
- name: Set up Ruby
|
|
118
|
+
uses: ruby/setup-ruby@v1
|
|
119
|
+
with:
|
|
120
|
+
ruby-version: '3.3'
|
|
121
|
+
bundler-cache: true
|
|
122
|
+
|
|
123
|
+
- name: Run Standard Ruby
|
|
124
|
+
run: bundle exec standardrb
|
data/.gitignore
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Gem build artifacts
|
|
2
|
+
*.gem
|
|
3
|
+
pkg/
|
|
4
|
+
|
|
5
|
+
# Bundle
|
|
6
|
+
/.bundle/
|
|
7
|
+
vendor/bundle
|
|
8
|
+
|
|
9
|
+
# Ruby version managers
|
|
10
|
+
.ruby-version
|
|
11
|
+
.ruby-gemset
|
|
12
|
+
.rvmrc
|
|
13
|
+
|
|
14
|
+
# RSpec
|
|
15
|
+
spec/examples.txt
|
|
16
|
+
|
|
17
|
+
# Coverage
|
|
18
|
+
/coverage/
|
|
19
|
+
|
|
20
|
+
# IDE
|
|
21
|
+
.idea/
|
|
22
|
+
*.swp
|
|
23
|
+
*.swo
|
|
24
|
+
.vscode/
|
|
25
|
+
|
|
26
|
+
# macOS
|
|
27
|
+
.DS_Store
|
|
28
|
+
|
|
29
|
+
# Logs
|
|
30
|
+
*.log
|
|
31
|
+
|
|
32
|
+
# Temporary files
|
|
33
|
+
tmp/
|
|
34
|
+
*.tmp
|
|
35
|
+
|
|
36
|
+
Gemfile.lock
|
|
37
|
+
|
|
38
|
+
# RBS type collection
|
|
39
|
+
.gem_rbs_collection/
|
|
40
|
+
rbs_collection.lock.yaml
|
data/.rspec
ADDED
data/Gemfile
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
source "https://rubygems.org"
|
|
2
|
+
|
|
3
|
+
# Specify your gem's dependencies in staging_table.gemspec
|
|
4
|
+
gemspec
|
|
5
|
+
|
|
6
|
+
group :development, :test do
|
|
7
|
+
gem "rspec", "~> 3.0"
|
|
8
|
+
gem "sqlite3"
|
|
9
|
+
gem "pg"
|
|
10
|
+
gem "mysql2"
|
|
11
|
+
gem "ruby-lsp"
|
|
12
|
+
gem "ruby-lsp-rspec"
|
|
13
|
+
gem "standard"
|
|
14
|
+
end
|
data/README.md
ADDED
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
# 🎭 StagingTable
|
|
2
|
+
|
|
3
|
+
**The red carpet for your data before it hits the main stage.**
|
|
4
|
+
|
|
5
|
+
[](https://badge.fury.io/rb/staging_table)
|
|
6
|
+
[](https://github.com/eagerworks/staging_table/actions)
|
|
7
|
+
[](https://github.com/eagerworks/staging_table/actions/workflows/rbs.yml)
|
|
8
|
+
|
|
9
|
+
Stop shoving data directly into your production tables like a savage. Give it a dressing room first!
|
|
10
|
+
|
|
11
|
+
`StagingTable` lets you bulk import data into a temporary "staging" table, validate/massage it, and *then* gracefully transfer it to your real tables using efficient SQL strategies.
|
|
12
|
+
|
|
13
|
+
It's like `INSERT INTO ... SELECT` but with a Ruby DSL that makes you smile.
|
|
14
|
+
|
|
15
|
+
## 🌟 Why?
|
|
16
|
+
|
|
17
|
+
Importing large datasets is hard.
|
|
18
|
+
- **Direct inserts** are slow and bypass validations.
|
|
19
|
+
- **ActiveRecord** is safe but slow for millions of records.
|
|
20
|
+
- **Raw SQL** is fast but messy and hard to maintain.
|
|
21
|
+
|
|
22
|
+
**StagingTable** gives you the best of both worlds:
|
|
23
|
+
1. **🚀 Speed**: Bulk insert into a temp table (no index overhead yet).
|
|
24
|
+
2. **🛡️ Safety**: Validate or query the data *before* it touches your real table.
|
|
25
|
+
3. **🧹 Cleanliness**: Automatic cleanup of temp tables.
|
|
26
|
+
4. **🔄 Power**: Built-in support for `UPSERT` (INSERT ON CONFLICT) and duplicate handling.
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## 📦 Installation
|
|
31
|
+
|
|
32
|
+
Add this line to your application's Gemfile:
|
|
33
|
+
|
|
34
|
+
```ruby
|
|
35
|
+
gem 'staging_table'
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
And then execute:
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
bundle install
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
## 🛠️ Usage
|
|
47
|
+
|
|
48
|
+
### The "Happy Path" (Block Syntax)
|
|
49
|
+
|
|
50
|
+
The simplest way to use StagingTable. It handles the creation and cleanup of the temporary table automatically.
|
|
51
|
+
|
|
52
|
+
```ruby
|
|
53
|
+
# 1. Create a staging table that mirrors the 'users' table
|
|
54
|
+
StagingTable.stage(User) do |staging|
|
|
55
|
+
|
|
56
|
+
# 2. Bulk insert data (Hashes, AR Objects, or Relations)
|
|
57
|
+
staging.insert([
|
|
58
|
+
{ name: 'John Doe', email: 'john@example.com' },
|
|
59
|
+
{ name: 'Jane Doe', email: 'jane@example.com' }
|
|
60
|
+
])
|
|
61
|
+
|
|
62
|
+
# 3. The 'staging' object is a real ActiveRecord model!
|
|
63
|
+
# You can query it, validate it, or massage data.
|
|
64
|
+
puts "Staged count: #{staging.count}"
|
|
65
|
+
staging.where(email: nil).delete_all
|
|
66
|
+
|
|
67
|
+
# 4. When the block exits, data is automatically transferred
|
|
68
|
+
# to the 'users' table using a single SQL statement.
|
|
69
|
+
end
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### 📥 Importing Data
|
|
73
|
+
|
|
74
|
+
The `insert` method is flexible. Feed it whatever you have:
|
|
75
|
+
|
|
76
|
+
```ruby
|
|
77
|
+
StagingTable.stage(User) do |staging|
|
|
78
|
+
# 🍎 Array of Hashes
|
|
79
|
+
staging.insert([
|
|
80
|
+
{ name: 'John', email: 'john@example.com' },
|
|
81
|
+
{ name: 'Jane', email: 'jane@example.com' }
|
|
82
|
+
])
|
|
83
|
+
|
|
84
|
+
# 🍊 Array of ActiveRecord objects
|
|
85
|
+
staging.insert(User.where(active: true).to_a)
|
|
86
|
+
|
|
87
|
+
# 🍇 ActiveRecord::Relation (Lazy loading)
|
|
88
|
+
staging.insert(User.where(role: 'admin'))
|
|
89
|
+
end
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
For massive datasets, use `insert_from_query` to process in batches and keep memory usage low:
|
|
93
|
+
|
|
94
|
+
```ruby
|
|
95
|
+
StagingTable.stage(User) do |staging|
|
|
96
|
+
# Processes in batches of 1000 (configurable)
|
|
97
|
+
staging.insert_from_query(User.where(needs_migration: true))
|
|
98
|
+
end
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### ⚔️ Handling Duplicates (Upsert)
|
|
102
|
+
|
|
103
|
+
Don't let duplicates crash your party. Configure the transfer strategy to handle conflicts gracefully.
|
|
104
|
+
|
|
105
|
+
```ruby
|
|
106
|
+
StagingTable.stage(User,
|
|
107
|
+
transfer_strategy: :upsert, # Default is :insert
|
|
108
|
+
conflict_target: [:email], # Column(s) to check for conflicts
|
|
109
|
+
conflict_action: :update # :update (overwrite) or :ignore (skip)
|
|
110
|
+
) do |staging|
|
|
111
|
+
staging.insert(records)
|
|
112
|
+
end
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### 📊 Transfer Results
|
|
116
|
+
|
|
117
|
+
Every transfer returns a `TransferResult` with detailed statistics:
|
|
118
|
+
|
|
119
|
+
```ruby
|
|
120
|
+
result = StagingTable.stage(User) do |staging|
|
|
121
|
+
staging.insert(records)
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
puts result.inserted # => 450 (new records)
|
|
125
|
+
puts result.updated # => 50 (updated via upsert)
|
|
126
|
+
puts result.skipped # => 10 (ignored conflicts)
|
|
127
|
+
puts result.total # => 510 (total processed)
|
|
128
|
+
puts result.success? # => true (any inserts or updates?)
|
|
129
|
+
|
|
130
|
+
# Also available as a hash
|
|
131
|
+
result.to_h # => { inserted: 450, updated: 50, skipped: 10, total: 510 }
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### 🪝 Callbacks
|
|
135
|
+
|
|
136
|
+
Hook into the staging lifecycle to validate, transform, or log:
|
|
137
|
+
|
|
138
|
+
```ruby
|
|
139
|
+
StagingTable.stage(User,
|
|
140
|
+
before_insert: ->(session) {
|
|
141
|
+
Rails.logger.info "Starting import..."
|
|
142
|
+
},
|
|
143
|
+
after_insert: ->(session, records) {
|
|
144
|
+
Rails.logger.info "Staged #{records.count} records"
|
|
145
|
+
},
|
|
146
|
+
before_transfer: ->(session) {
|
|
147
|
+
# Clean up invalid data before transfer
|
|
148
|
+
session.where(email: nil).delete_all
|
|
149
|
+
session.where(status: 'banned').delete_all
|
|
150
|
+
},
|
|
151
|
+
after_transfer: ->(session, result) {
|
|
152
|
+
Rails.logger.info "Imported #{result.inserted} new, updated #{result.updated}"
|
|
153
|
+
}
|
|
154
|
+
) do |staging|
|
|
155
|
+
staging.insert(records)
|
|
156
|
+
end
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
### 📡 Instrumentation (ActiveSupport::Notifications)
|
|
160
|
+
|
|
161
|
+
Monitor and debug your imports in production with built-in instrumentation:
|
|
162
|
+
|
|
163
|
+
```ruby
|
|
164
|
+
# Subscribe to transfer events
|
|
165
|
+
StagingTable::Instrumentation.subscribe(:transfer) do |event|
|
|
166
|
+
Rails.logger.info "[StagingTable] Transfer to #{event.payload[:source_table]} " \
|
|
167
|
+
"completed in #{event.duration.round(2)}ms"
|
|
168
|
+
StatsD.measure('staging_table.transfer.duration', event.duration)
|
|
169
|
+
StatsD.increment('staging_table.transfer.inserted', event.payload[:result].inserted)
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
# Subscribe to all StagingTable events
|
|
173
|
+
StagingTable::Instrumentation.subscribe_all do |event|
|
|
174
|
+
Rails.logger.debug "[StagingTable] #{event.name}: #{event.duration.round(2)}ms"
|
|
175
|
+
end
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
**Available Events:**
|
|
179
|
+
|
|
180
|
+
| Event | Payload | Description |
|
|
181
|
+
|-------|---------|-------------|
|
|
182
|
+
| `staging_table.stage` | `source_model`, `source_table`, `options`, `result` | Wraps the entire staging block |
|
|
183
|
+
| `staging_table.create_table` | `source_model`, `source_table`, `staging_table` | When staging table is created |
|
|
184
|
+
| `staging_table.insert` | `source_model`, `source_table`, `staging_table`, `record_count`, `batch_size` | When records are inserted |
|
|
185
|
+
| `staging_table.transfer` | `source_model`, `source_table`, `staging_table`, `strategy`, `staged_count`, `result` | When data is transferred |
|
|
186
|
+
| `staging_table.drop_table` | `source_model`, `source_table`, `staging_table` | When staging table is dropped |
|
|
187
|
+
|
|
188
|
+
You can also use standard ActiveSupport::Notifications directly:
|
|
189
|
+
|
|
190
|
+
```ruby
|
|
191
|
+
ActiveSupport::Notifications.subscribe('staging_table.transfer') do |event|
|
|
192
|
+
# Your monitoring code here
|
|
193
|
+
end
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### 🎛️ Manual Control
|
|
197
|
+
|
|
198
|
+
Need to keep the staging table alive across multiple background jobs? We got you.
|
|
199
|
+
|
|
200
|
+
**Note:** Temporary tables in PostgreSQL are session-specific. This only works if you stay in the same DB connection!
|
|
201
|
+
|
|
202
|
+
```ruby
|
|
203
|
+
# Create the session
|
|
204
|
+
session = StagingTable::Session.new(User, excluded_columns: %w[created_at updated_at])
|
|
205
|
+
session.create_table
|
|
206
|
+
|
|
207
|
+
begin
|
|
208
|
+
# Insert data in chunks
|
|
209
|
+
session.insert(batch_1)
|
|
210
|
+
session.insert(batch_2)
|
|
211
|
+
|
|
212
|
+
# Run some sanity checks
|
|
213
|
+
if session.where(status: 'banned').exists?
|
|
214
|
+
raise "Whoa there! No banned users allowed."
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
# Commit to the real table
|
|
218
|
+
result = session.transfer
|
|
219
|
+
puts "Transferred #{result.total} records"
|
|
220
|
+
ensure
|
|
221
|
+
# Always clean up your mess
|
|
222
|
+
session.drop_table
|
|
223
|
+
end
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
---
|
|
227
|
+
|
|
228
|
+
## ⚙️ Configuration
|
|
229
|
+
|
|
230
|
+
Set global defaults in an initializer (e.g., `config/initializers/staging_table.rb`):
|
|
231
|
+
|
|
232
|
+
```ruby
|
|
233
|
+
StagingTable.configure do |config|
|
|
234
|
+
config.default_batch_size = 2000
|
|
235
|
+
config.default_transfer_strategy = :insert # or :upsert
|
|
236
|
+
end
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
---
|
|
240
|
+
|
|
241
|
+
## 🔍 Type Checking (RBS)
|
|
242
|
+
|
|
243
|
+
This gem ships with [RBS](https://github.com/ruby/rbs) type signatures for static type checking. The signatures are located in the `sig/` directory and are validated in CI.
|
|
244
|
+
|
|
245
|
+
### Using the Type Signatures
|
|
246
|
+
|
|
247
|
+
If you want to type-check your own code that uses StagingTable:
|
|
248
|
+
|
|
249
|
+
```bash
|
|
250
|
+
# Install RBS and the collection for dependencies
|
|
251
|
+
gem install rbs
|
|
252
|
+
rbs collection install
|
|
253
|
+
|
|
254
|
+
# Validate signatures
|
|
255
|
+
rbs -I sig validate
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
### Using with Steep
|
|
259
|
+
|
|
260
|
+
For full type checking with [Steep](https://github.com/soutaro/steep):
|
|
261
|
+
|
|
262
|
+
```ruby
|
|
263
|
+
# Gemfile
|
|
264
|
+
gem 'steep', group: :development
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
```ruby
|
|
268
|
+
# Steepfile
|
|
269
|
+
target :lib do
|
|
270
|
+
signature "sig"
|
|
271
|
+
check "lib"
|
|
272
|
+
library "activerecord"
|
|
273
|
+
library "activesupport"
|
|
274
|
+
end
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
```bash
|
|
278
|
+
bundle exec steep check
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
---
|
|
282
|
+
|
|
283
|
+
## 💾 Supported Databases
|
|
284
|
+
|
|
285
|
+
We speak your language.
|
|
286
|
+
|
|
287
|
+
| Database | Strategy |
|
|
288
|
+
|----------|----------|
|
|
289
|
+
| **PostgreSQL** | `CREATE TABLE ... (LIKE ... INCLUDING DEFAULTS)` + `INSERT ... ON CONFLICT` |
|
|
290
|
+
| **MySQL** | `CREATE TABLE ... LIKE ...` + `INSERT ... ON DUPLICATE KEY UPDATE` |
|
|
291
|
+
| **SQLite** | Copies structure from `sqlite_master` + `INSERT ... ON CONFLICT` |
|
|
292
|
+
|
|
293
|
+
---
|
|
294
|
+
|
|
295
|
+
## 🤝 Contributing
|
|
296
|
+
|
|
297
|
+
Found a bug? Want to add support for Oracle? (Please don't, but if you must...)
|
|
298
|
+
|
|
299
|
+
1. Fork it
|
|
300
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
|
301
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
|
302
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
|
303
|
+
5. Create new Pull Request
|
|
304
|
+
|
|
305
|
+
### Running Tests
|
|
306
|
+
|
|
307
|
+
We support the big three. Set up your environment variables for PG/MySQL or just run SQLite tests out of the box.
|
|
308
|
+
|
|
309
|
+
```bash
|
|
310
|
+
# Run everything
|
|
311
|
+
bundle exec rake spec
|
|
312
|
+
|
|
313
|
+
# Pick your poison
|
|
314
|
+
bundle exec rake spec:postgresql
|
|
315
|
+
bundle exec rake spec:mysql
|
|
316
|
+
bundle exec rspec --tag sqlite
|
|
317
|
+
```
|
|
318
|
+
|
|
319
|
+
---
|
|
320
|
+
|
|
321
|
+
## 🙏 Special Thanks
|
|
322
|
+
|
|
323
|
+
Special thanks to [agustin-peluffo](https://github.com/agustin-peluffo) who created the first implementation for a project!
|
|
324
|
+
|
|
325
|
+
---
|
|
326
|
+
|
|
327
|
+
*Made with ❤️ by [eagerworks](https://eagerworks.com)*
|
data/Rakefile
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
require "bundler/gem_tasks"
|
|
2
|
+
require "rspec/core/rake_task"
|
|
3
|
+
require "standard/rake"
|
|
4
|
+
|
|
5
|
+
RSpec::Core::RakeTask.new(:spec)
|
|
6
|
+
|
|
7
|
+
namespace :spec do
|
|
8
|
+
desc "Run PostgreSQL specs only"
|
|
9
|
+
RSpec::Core::RakeTask.new(:postgresql) do |t|
|
|
10
|
+
t.rspec_opts = "--tag postgresql"
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
desc "Run MySQL specs only"
|
|
14
|
+
RSpec::Core::RakeTask.new(:mysql) do |t|
|
|
15
|
+
t.rspec_opts = "--tag mysql"
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
task default: :spec
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module StagingTable
|
|
4
|
+
module Adapters
|
|
5
|
+
class Base
|
|
6
|
+
attr_reader :connection
|
|
7
|
+
|
|
8
|
+
def initialize(connection)
|
|
9
|
+
@connection = connection
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def create_table(temp_table_name, source_table_name, options = {})
|
|
13
|
+
raise NotImplementedError
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def drop_table(temp_table_name)
|
|
17
|
+
quoted_table = connection.quote_table_name(temp_table_name)
|
|
18
|
+
connection.execute("DROP TABLE IF EXISTS #{quoted_table}")
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def self.for(connection)
|
|
22
|
+
adapter_name = connection.adapter_name.downcase
|
|
23
|
+
case adapter_name
|
|
24
|
+
when /postgresql/
|
|
25
|
+
Postgresql.new(connection)
|
|
26
|
+
when /mysql/
|
|
27
|
+
Mysql.new(connection)
|
|
28
|
+
when /sqlite/
|
|
29
|
+
Sqlite.new(connection)
|
|
30
|
+
else
|
|
31
|
+
raise AdapterError, "Unsupported adapter: #{adapter_name}. StagingTable supports PostgreSQL, MySQL, and SQLite adapters."
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module StagingTable
|
|
4
|
+
module Adapters
|
|
5
|
+
class Mysql < Base
|
|
6
|
+
def create_table(temp_table_name, source_table_name, options = {})
|
|
7
|
+
# MySQL's LIKE copies structure and indexes by default
|
|
8
|
+
quoted_temp = connection.quote_table_name(temp_table_name)
|
|
9
|
+
quoted_source = connection.quote_table_name(source_table_name)
|
|
10
|
+
connection.execute("CREATE TABLE #{quoted_temp} LIKE #{quoted_source}")
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module StagingTable
|
|
4
|
+
module Adapters
|
|
5
|
+
class Postgresql < Base
|
|
6
|
+
def create_table(temp_table_name, source_table_name, options = {})
|
|
7
|
+
quoted_temp = connection.quote_table_name(temp_table_name)
|
|
8
|
+
quoted_source = connection.quote_table_name(source_table_name)
|
|
9
|
+
sql = "CREATE TABLE #{quoted_temp} (LIKE #{quoted_source} INCLUDING DEFAULTS"
|
|
10
|
+
sql += " INCLUDING INDEXES" if options[:include_indexes]
|
|
11
|
+
sql += ")"
|
|
12
|
+
connection.execute(sql)
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module StagingTable
|
|
4
|
+
module Adapters
|
|
5
|
+
class Sqlite < Base
|
|
6
|
+
def create_table(temp_table_name, source_table_name, options = {})
|
|
7
|
+
quoted_temp = connection.quote_table_name(temp_table_name)
|
|
8
|
+
connection.quote_table_name(source_table_name)
|
|
9
|
+
|
|
10
|
+
# SQLite doesn't support CREATE TABLE ... LIKE, so we copy the structure
|
|
11
|
+
# by getting the original CREATE TABLE statement and modifying it
|
|
12
|
+
create_sql = connection.select_value(
|
|
13
|
+
"SELECT sql FROM sqlite_master WHERE type='table' AND name=#{connection.quote(source_table_name)}"
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
if create_sql.nil?
|
|
17
|
+
raise TableError, "Source table '#{source_table_name}' does not exist"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Replace the table name in the CREATE TABLE statement
|
|
21
|
+
new_create_sql = create_sql.sub(/CREATE\s+TABLE\s+["'`]?#{Regexp.escape(source_table_name)}["'`]?/i, "CREATE TABLE #{quoted_temp}")
|
|
22
|
+
|
|
23
|
+
connection.execute(new_create_sql)
|
|
24
|
+
|
|
25
|
+
# Copy indexes if requested
|
|
26
|
+
if options[:include_indexes]
|
|
27
|
+
copy_indexes(temp_table_name, source_table_name)
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
private
|
|
32
|
+
|
|
33
|
+
def copy_indexes(temp_table_name, source_table_name)
|
|
34
|
+
indexes = connection.select_all(
|
|
35
|
+
"SELECT sql FROM sqlite_master WHERE type='index' AND tbl_name=#{connection.quote(source_table_name)} AND sql IS NOT NULL"
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
indexes.each do |row|
|
|
39
|
+
index_sql = row["sql"]
|
|
40
|
+
next if index_sql.nil?
|
|
41
|
+
|
|
42
|
+
# Replace the table name and generate a new index name
|
|
43
|
+
new_index_sql = index_sql.gsub(/\b#{Regexp.escape(source_table_name)}\b/, temp_table_name)
|
|
44
|
+
# Make index name unique by appending temp table name suffix
|
|
45
|
+
new_index_sql = new_index_sql.sub(/INDEX\s+["'`]?(\w+)["'`]?/i) do
|
|
46
|
+
"INDEX #{$1}_#{temp_table_name.split("_").last}"
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
connection.execute(new_index_sql)
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|