sequelizer 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.beads/.gitignore +54 -0
- data/.beads/.jsonl.lock +0 -0
- data/.beads/.migration-hint-ts +1 -0
- data/.beads/README.md +81 -0
- data/.beads/config.yaml +42 -0
- data/.beads/issues.jsonl +20 -0
- data/.beads/metadata.json +7 -0
- data/.coderabbit.yaml +94 -0
- data/.devcontainer/.p10k.zsh +1713 -0
- data/.devcontainer/.zshrc +29 -0
- data/.devcontainer/Dockerfile +137 -0
- data/.devcontainer/copy-claude-credentials.sh +32 -0
- data/.devcontainer/devcontainer.json +102 -0
- data/.devcontainer/init-firewall.sh +123 -0
- data/.devcontainer/setup-credentials.sh +95 -0
- data/.github/dependabot.yml +18 -0
- data/.github/workflows/dependabot-auto-merge.yml +36 -0
- data/.github/workflows/test.yml +44 -9
- data/.gitignore +6 -1
- data/.overcommit.yml +73 -0
- data/.rubocop.yml +167 -0
- data/AGENTS.md +126 -0
- data/CHANGELOG.md +41 -0
- data/CLAUDE.md +230 -0
- data/Gemfile +6 -2
- data/Gemfile.lock +189 -0
- data/Guardfile +1 -1
- data/Rakefile +28 -3
- data/config/platforms/base.csv +5 -0
- data/config/platforms/rdbms/athena.csv +4 -0
- data/config/platforms/rdbms/postgres.csv +3 -0
- data/config/platforms/rdbms/snowflake.csv +1 -0
- data/config/platforms/rdbms/spark.csv +3 -0
- data/lib/sequel/extensions/cold_col.rb +436 -0
- data/lib/sequel/extensions/db_opts.rb +65 -4
- data/lib/sequel/extensions/funky.rb +136 -0
- data/lib/sequel/extensions/make_readyable.rb +146 -30
- data/lib/sequel/extensions/more_sql.rb +76 -0
- data/lib/sequel/extensions/platform.rb +301 -0
- data/lib/sequel/extensions/settable.rb +64 -0
- data/lib/sequel/extensions/sql_recorder.rb +85 -0
- data/lib/sequel/extensions/unionize.rb +169 -0
- data/lib/sequel/extensions/usable.rb +30 -1
- data/lib/sequelizer/cli.rb +61 -18
- data/lib/sequelizer/connection_maker.rb +54 -72
- data/lib/sequelizer/env_config.rb +6 -6
- data/lib/sequelizer/gemfile_modifier.rb +23 -21
- data/lib/sequelizer/monkey_patches/database_in_after_connect.rb +7 -5
- data/lib/sequelizer/options.rb +102 -19
- data/lib/sequelizer/options_hash.rb +2 -0
- data/lib/sequelizer/version.rb +3 -1
- data/lib/sequelizer/yaml_config.rb +9 -4
- data/lib/sequelizer.rb +65 -9
- data/sequelizer.gemspec +20 -12
- data/test/lib/sequel/extensions/test_cold_col.rb +251 -0
- data/test/lib/sequel/extensions/test_db_opts.rb +10 -8
- data/test/lib/sequel/extensions/test_make_readyable.rb +198 -28
- data/test/lib/sequel/extensions/test_more_sql.rb +132 -0
- data/test/lib/sequel/extensions/test_platform.rb +222 -0
- data/test/lib/sequel/extensions/test_settable.rb +109 -0
- data/test/lib/sequel/extensions/test_sql_recorder.rb +231 -0
- data/test/lib/sequel/extensions/test_unionize.rb +76 -0
- data/test/lib/sequel/extensions/test_usable.rb +5 -2
- data/test/lib/sequelizer/test_connection_maker.rb +21 -17
- data/test/lib/sequelizer/test_env_config.rb +5 -2
- data/test/lib/sequelizer/test_gemfile_modifier.rb +7 -6
- data/test/lib/sequelizer/test_options.rb +42 -9
- data/test/lib/sequelizer/test_yaml_config.rb +13 -12
- data/test/test_helper.rb +37 -8
- metadata +196 -39
- data/lib/sequel/extensions/sqls.rb +0 -31
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
#
|
|
4
|
+
# The platform extension provides a unified interface for platform-specific
|
|
5
|
+
# database behavior. It uses KVCSV configuration files to define capabilities
|
|
6
|
+
# and preferences, with Ruby classes only for function translations.
|
|
7
|
+
#
|
|
8
|
+
# DB.extension :platform
|
|
9
|
+
# DB.platform.supports?(:cte) # => true (from config)
|
|
10
|
+
# DB.platform.prefers?(:cte) # => true (from config)
|
|
11
|
+
# DB.platform[:interim_schema] # => nil (from config)
|
|
12
|
+
# DB.platform.date_diff(from, to) # => Sequel expression (from code)
|
|
13
|
+
#
|
|
14
|
+
# Configuration is loaded from CSV files in config/platforms/:
|
|
15
|
+
# - base.csv: conservative defaults
|
|
16
|
+
# - rdbms/<adapter>.csv: RDBMS-specific overrides
|
|
17
|
+
# - Additional configs can be stacked via platform_configs option
|
|
18
|
+
#
|
|
19
|
+
# Related module: Sequel::Platform
|
|
20
|
+
|
|
21
|
+
require 'kvcsv'
|
|
22
|
+
|
|
23
|
+
module Sequel
|
|
24
|
+
|
|
25
|
+
# The Platform module provides database platform abstraction through
|
|
26
|
+
# configuration-driven capabilities and code-driven function translations.
|
|
27
|
+
module Platform
|
|
28
|
+
|
|
29
|
+
# Base platform class with KVCSV config loading and default implementations.
|
|
30
|
+
# Subclasses override function translation methods for platform-specific SQL.
|
|
31
|
+
class Base
|
|
32
|
+
|
|
33
|
+
attr_reader :db, :config
|
|
34
|
+
|
|
35
|
+
# Initialize platform with database connection and config paths.
|
|
36
|
+
#
|
|
37
|
+
# @param db [Sequel::Database] The database connection
|
|
38
|
+
# @param config_paths [Array<String>] Paths to CSV config files (stacked in order)
|
|
39
|
+
def initialize(db, *config_paths)
|
|
40
|
+
@db = db
|
|
41
|
+
@config = config_paths.empty? ? {} : KVCSV::Settings.new(*config_paths)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Check if the platform supports a feature.
|
|
45
|
+
#
|
|
46
|
+
# @param feature [Symbol] Feature name (e.g., :cte, :temp_tables)
|
|
47
|
+
# @return [Boolean] true if supported
|
|
48
|
+
#
|
|
49
|
+
# @example
|
|
50
|
+
# platform.supports?(:cte) # checks supports_cte in config
|
|
51
|
+
# platform.supports?(:temp_tables) # checks supports_temp_tables in config
|
|
52
|
+
def supports?(feature)
|
|
53
|
+
config[:"supports_#{feature}"] || false
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Check if the platform prefers a feature (may support but not prefer).
|
|
57
|
+
#
|
|
58
|
+
# @param feature [Symbol] Feature name (e.g., :cte, :parquet)
|
|
59
|
+
# @return [Boolean] true if preferred
|
|
60
|
+
#
|
|
61
|
+
# @example
|
|
62
|
+
# platform.prefers?(:cte) # Spark supports CTEs but doesn't prefer them
|
|
63
|
+
# platform.prefers?(:parquet) # Spark prefers parquet format
|
|
64
|
+
def prefers?(feature)
|
|
65
|
+
config[:"prefers_#{feature}"] || false
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Access arbitrary config values.
|
|
69
|
+
#
|
|
70
|
+
# @param key [Symbol] Config key
|
|
71
|
+
# @return [Object] Config value or nil
|
|
72
|
+
#
|
|
73
|
+
# @example
|
|
74
|
+
# platform[:interim_schema] # => "scratch"
|
|
75
|
+
# platform[:schema_switching_method] # => "use"
|
|
76
|
+
def [](key)
|
|
77
|
+
config[key]
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Fetch config value with default.
|
|
81
|
+
#
|
|
82
|
+
# @param key [Symbol] Config key
|
|
83
|
+
# @param default [Object] Default value if key not found
|
|
84
|
+
# @return [Object] Config value or default
|
|
85
|
+
def fetch(key, default = nil)
|
|
86
|
+
config.respond_to?(:fetch) ? config.fetch(key, default) : (config[key] || default)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# ---- Function translations (override in subclasses) ----
|
|
90
|
+
|
|
91
|
+
# Calculate date difference between two dates.
|
|
92
|
+
#
|
|
93
|
+
# @param from [Symbol, Sequel::SQL::Expression] Start date
|
|
94
|
+
# @param to [Symbol, Sequel::SQL::Expression] End date
|
|
95
|
+
# @return [Sequel::SQL::Expression] Date difference expression
|
|
96
|
+
def date_diff(from, to)
|
|
97
|
+
Sequel.function(:datediff, from, to)
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Cast expression to date type.
|
|
101
|
+
#
|
|
102
|
+
# @param expr [Object] Expression to cast
|
|
103
|
+
# @return [Sequel::SQL::Cast] Cast expression
|
|
104
|
+
def cast_date(expr)
|
|
105
|
+
Sequel.cast(expr, Date)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Parse string to date with format.
|
|
109
|
+
#
|
|
110
|
+
# @param value [Object] String value to parse
|
|
111
|
+
# @param format [String] Date format string
|
|
112
|
+
# @return [Sequel::SQL::Expression] Parsed date expression
|
|
113
|
+
def str_to_date(value, format)
|
|
114
|
+
Sequel.function(:to_date, value, format)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Calculate days between two dates.
|
|
118
|
+
#
|
|
119
|
+
# @param from [Symbol, Sequel::SQL::Expression] Start date
|
|
120
|
+
# @param to [Symbol, Sequel::SQL::Expression] End date
|
|
121
|
+
# @return [Sequel::SQL::Expression] Days between expression
|
|
122
|
+
def days_between(from, to)
|
|
123
|
+
date_diff(from, to)
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# PostgreSQL platform with Postgres-specific function translations.
|
|
129
|
+
class Postgres < Base
|
|
130
|
+
|
|
131
|
+
def date_diff(from, to)
|
|
132
|
+
# Postgres uses date subtraction
|
|
133
|
+
Sequel.lit('(? - ?)', to, from)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def days_between(from, to)
|
|
137
|
+
# Postgres date subtraction returns integer days
|
|
138
|
+
Sequel.lit('(? - ?)', to, from)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# Spark platform with Spark SQL-specific function translations.
|
|
144
|
+
class Spark < Base
|
|
145
|
+
|
|
146
|
+
def date_diff(from, to)
|
|
147
|
+
# Spark datediff has reversed argument order (end, start)
|
|
148
|
+
Sequel.function(:datediff, to, from)
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def str_to_date(value, format)
|
|
152
|
+
Sequel.function(:to_date, Sequel.cast_string(value), format)
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Snowflake platform with Snowflake-specific function translations.
|
|
158
|
+
class Snowflake < Base
|
|
159
|
+
|
|
160
|
+
def date_diff(from, to)
|
|
161
|
+
# Snowflake requires unit parameter
|
|
162
|
+
Sequel.function(:datediff, 'day', from, to)
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def days_between(from, to)
|
|
166
|
+
Sequel.function(:datediff, 'day', from, to)
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# Athena platform (Presto/Trino based) with Athena-specific function translations.
|
|
172
|
+
class Athena < Base
|
|
173
|
+
|
|
174
|
+
def date_diff(from, to)
|
|
175
|
+
# Athena/Presto uses date_diff with unit
|
|
176
|
+
Sequel.function(:date_diff, 'day', from, to)
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def days_between(from, to)
|
|
180
|
+
Sequel.function(:date_diff, 'day', from, to)
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# Map adapter schemes to platform classes
|
|
186
|
+
PLATFORM_CLASSES = {
|
|
187
|
+
postgres: Postgres,
|
|
188
|
+
postgresql: Postgres,
|
|
189
|
+
spark: Spark,
|
|
190
|
+
athena: Athena,
|
|
191
|
+
presto: Athena,
|
|
192
|
+
trino: Athena,
|
|
193
|
+
snowflake: Snowflake,
|
|
194
|
+
}.freeze
|
|
195
|
+
|
|
196
|
+
# Map adapter schemes to config file names
|
|
197
|
+
ADAPTER_CONFIG_NAMES = {
|
|
198
|
+
postgres: 'postgres',
|
|
199
|
+
postgresql: 'postgres',
|
|
200
|
+
spark: 'spark',
|
|
201
|
+
athena: 'athena',
|
|
202
|
+
presto: 'athena',
|
|
203
|
+
trino: 'athena',
|
|
204
|
+
snowflake: 'snowflake',
|
|
205
|
+
}.freeze
|
|
206
|
+
|
|
207
|
+
class << self
|
|
208
|
+
|
|
209
|
+
# Find the config directory, searching gem paths
|
|
210
|
+
def config_dir
|
|
211
|
+
@config_dir ||= find_config_dir
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
# Allow overriding config dir for testing
|
|
215
|
+
attr_writer :config_dir
|
|
216
|
+
|
|
217
|
+
private
|
|
218
|
+
|
|
219
|
+
def find_config_dir
|
|
220
|
+
# Check relative to this file (gem's config)
|
|
221
|
+
gem_config = File.expand_path('../../../config/platforms', __dir__)
|
|
222
|
+
return gem_config if File.directory?(gem_config)
|
|
223
|
+
|
|
224
|
+
# Fallback to working directory
|
|
225
|
+
local_config = File.join(Dir.pwd, 'config/platforms')
|
|
226
|
+
return local_config if File.directory?(local_config)
|
|
227
|
+
|
|
228
|
+
nil
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
# Build config paths for the given adapter
|
|
234
|
+
#
|
|
235
|
+
# @param adapter_scheme [Symbol] Database adapter scheme
|
|
236
|
+
# @param extra_configs [Array<String>] Additional config paths to stack
|
|
237
|
+
# @return [Array<String>] Ordered config paths
|
|
238
|
+
def self.config_paths_for(adapter_scheme, extra_configs = [])
|
|
239
|
+
paths = []
|
|
240
|
+
|
|
241
|
+
if config_dir
|
|
242
|
+
base_config = File.join(config_dir, 'base.csv')
|
|
243
|
+
paths << base_config if File.exist?(base_config)
|
|
244
|
+
|
|
245
|
+
adapter_name = ADAPTER_CONFIG_NAMES[adapter_scheme]
|
|
246
|
+
if adapter_name
|
|
247
|
+
rdbms_config = File.join(config_dir, 'rdbms', "#{adapter_name}.csv")
|
|
248
|
+
paths << rdbms_config if File.exist?(rdbms_config)
|
|
249
|
+
end
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
paths.concat(extra_configs.select { |p| File.exist?(p) })
|
|
253
|
+
paths
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
# Build platform instance for database
|
|
257
|
+
#
|
|
258
|
+
# @param db [Sequel::Database] Database connection
|
|
259
|
+
# @param extra_configs [Array<String>] Additional config paths
|
|
260
|
+
# @return [Platform::Base] Platform instance
|
|
261
|
+
def self.build_platform(db, extra_configs = [])
|
|
262
|
+
adapter = effective_adapter(db)
|
|
263
|
+
platform_class = PLATFORM_CLASSES.fetch(adapter, Base)
|
|
264
|
+
config_paths = config_paths_for(adapter, extra_configs)
|
|
265
|
+
platform_class.new(db, *config_paths)
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
# Detect the effective adapter for platform selection.
|
|
269
|
+
# For mock databases, use database_type or host option; otherwise use adapter_scheme.
|
|
270
|
+
#
|
|
271
|
+
# @param db [Sequel::Database] Database connection
|
|
272
|
+
# @return [Symbol] Effective adapter type
|
|
273
|
+
def self.effective_adapter(db)
|
|
274
|
+
return db.adapter_scheme unless db.adapter_scheme == :mock
|
|
275
|
+
|
|
276
|
+
# Mock databases: try database_type first (set for known types like postgres, spark)
|
|
277
|
+
db_type = db.database_type
|
|
278
|
+
return db_type if db_type && db_type != :mock
|
|
279
|
+
|
|
280
|
+
# Fall back to host option (for unknown types like snowflake, athena)
|
|
281
|
+
db.opts[:host]
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
# Extension hook - called when extension is loaded
|
|
285
|
+
def self.extended(db)
|
|
286
|
+
extra_configs = db.opts[:platform_configs] || []
|
|
287
|
+
db.instance_variable_set(:@platform, build_platform(db, extra_configs))
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
# Access the platform instance
|
|
291
|
+
#
|
|
292
|
+
# @return [Platform::Base] Platform instance for this database
|
|
293
|
+
def platform
|
|
294
|
+
@platform
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
Database.register_extension(:platform, Platform)
|
|
300
|
+
|
|
301
|
+
end
|
|
@@ -1,5 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
#
|
|
4
|
+
# The settable extension adds a convenient +set+ method to database connections
|
|
5
|
+
# for executing SET statements with key-value pairs. This is particularly useful
|
|
6
|
+
# for configuring database session parameters.
|
|
7
|
+
#
|
|
8
|
+
# DB.extension :settable
|
|
9
|
+
# DB.set(search_path: 'public', timezone: 'UTC')
|
|
10
|
+
# # Executes: SET search_path=public
|
|
11
|
+
# # SET timezone=UTC
|
|
12
|
+
#
|
|
13
|
+
# DB.set(work_mem: '256MB')
|
|
14
|
+
# # Executes: SET work_mem=256MB
|
|
15
|
+
#
|
|
16
|
+
# The extension works with any database adapter and supports various value types
|
|
17
|
+
# including strings, numbers, booleans, and nil values.
|
|
18
|
+
#
|
|
19
|
+
# Related module: Sequel::Settable
|
|
20
|
+
|
|
1
21
|
module Sequel
|
|
22
|
+
|
|
23
|
+
# The Settable module provides database configuration functionality through
|
|
24
|
+
# SET statements. When loaded as an extension, it adds the +set+ method to
|
|
25
|
+
# database connections.
|
|
2
26
|
module Settable
|
|
27
|
+
|
|
28
|
+
# Execute SET statements for the given options hash.
|
|
29
|
+
#
|
|
30
|
+
# Each key-value pair in the options hash is converted to a SET statement
|
|
31
|
+
# and executed against the database. Multiple options result in multiple
|
|
32
|
+
# SET statements being executed in sequence.
|
|
33
|
+
#
|
|
34
|
+
# @param opts [Hash] Hash of configuration options to set
|
|
35
|
+
# @option opts [Object] key The configuration parameter name
|
|
36
|
+
# @option opts [Object] value The value to set for the parameter
|
|
37
|
+
#
|
|
38
|
+
# @example Set a single parameter
|
|
39
|
+
# DB.set(timezone: 'UTC')
|
|
40
|
+
# # Executes: SET timezone=UTC
|
|
41
|
+
#
|
|
42
|
+
# @example Set multiple parameters
|
|
43
|
+
# DB.set(search_path: 'public', work_mem: '256MB')
|
|
44
|
+
# # Executes: SET search_path=public
|
|
45
|
+
# # SET work_mem=256MB
|
|
46
|
+
#
|
|
47
|
+
# @example Different value types
|
|
48
|
+
# DB.set(port: 5432, autocommit: true, custom_setting: nil)
|
|
49
|
+
# # Executes: SET port=5432
|
|
50
|
+
# # SET autocommit=true
|
|
51
|
+
# # SET custom_setting=
|
|
52
|
+
#
|
|
53
|
+
# @return [void]
|
|
3
54
|
def set(opts = {})
|
|
4
55
|
set_sql(opts).each do |sql|
|
|
5
56
|
run(sql)
|
|
@@ -8,10 +59,23 @@ module Sequel
|
|
|
8
59
|
|
|
9
60
|
private
|
|
10
61
|
|
|
62
|
+
# Generate SET SQL statements from options hash.
|
|
63
|
+
#
|
|
64
|
+
# Converts each key-value pair in the options hash into a SET SQL statement
|
|
65
|
+
# string. This is a private helper method used internally by the +set+ method.
|
|
66
|
+
#
|
|
67
|
+
# @param opts [Hash] Hash of options to convert to SET statements
|
|
68
|
+
# @return [Array<String>] Array of SET SQL statement strings
|
|
69
|
+
#
|
|
70
|
+
# @example
|
|
71
|
+
# set_sql(timezone: 'UTC', port: 5432)
|
|
72
|
+
# # => ["SET timezone=UTC", "SET port=5432"]
|
|
11
73
|
def set_sql(opts)
|
|
12
74
|
opts.map { |k, v| "SET #{k}=#{v}" }
|
|
13
75
|
end
|
|
76
|
+
|
|
14
77
|
end
|
|
15
78
|
|
|
16
79
|
Database.register_extension(:settable, Settable)
|
|
80
|
+
|
|
17
81
|
end
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# == Overview
|
|
4
|
+
#
|
|
5
|
+
# The sql_recorder extension records each SQL statement sent to the database
|
|
6
|
+
# in a thread-safe array accessible via the +sql_recorder+ method.
|
|
7
|
+
#
|
|
8
|
+
# == Usage
|
|
9
|
+
#
|
|
10
|
+
# DB.extension :sql_recorder
|
|
11
|
+
# DB[:users].all
|
|
12
|
+
# DB[:posts].where(id: 1).first
|
|
13
|
+
#
|
|
14
|
+
# # Access recorded SQL statements
|
|
15
|
+
# DB.sql_recorder
|
|
16
|
+
# # => ["SELECT * FROM users", "SELECT * FROM posts WHERE (id = 1) LIMIT 1"]
|
|
17
|
+
#
|
|
18
|
+
# # Clear the recorded statements
|
|
19
|
+
# DB.sql_recorder.clear
|
|
20
|
+
#
|
|
21
|
+
# == Thread Safety
|
|
22
|
+
#
|
|
23
|
+
# The extension is thread-safe and uses a mutex to synchronize access to the
|
|
24
|
+
# SQL recording array when multiple threads are executing queries simultaneously.
|
|
25
|
+
#
|
|
26
|
+
# == Compatibility
|
|
27
|
+
#
|
|
28
|
+
# This extension is designed to work alongside mock databases and other SQL
|
|
29
|
+
# recording mechanisms. It uses the method name +sql_recorder+ to avoid
|
|
30
|
+
# conflicts with existing +sqls+ methods that may be present in test frameworks.
|
|
31
|
+
#
|
|
32
|
+
# Related module: Sequel::SqlRecorder
|
|
33
|
+
|
|
34
|
+
module Sequel
|
|
35
|
+
|
|
36
|
+
# Extension module that adds SQL recording capabilities to Sequel databases.
|
|
37
|
+
# When included, it provides a +sql_recorder+ method that returns an array
|
|
38
|
+
# of all SQL statements executed against the database.
|
|
39
|
+
module SqlRecorder
|
|
40
|
+
|
|
41
|
+
# Returns the array of recorded SQL statements.
|
|
42
|
+
#
|
|
43
|
+
# The array accumulates all SQL statements sent to the database since the
|
|
44
|
+
# extension was loaded or since the last time +clear+ was called on the array.
|
|
45
|
+
#
|
|
46
|
+
# @return [Array<String>] array of SQL statement strings
|
|
47
|
+
# @example
|
|
48
|
+
# DB.extension :sql_recorder
|
|
49
|
+
# DB[:users].all
|
|
50
|
+
# DB.sql_recorder #=> ["SELECT * FROM users"]
|
|
51
|
+
attr_reader :sql_recorder
|
|
52
|
+
|
|
53
|
+
# Intercepts SQL execution to record statements.
|
|
54
|
+
#
|
|
55
|
+
# This method overrides Sequel's +log_connection_yield+ to capture each SQL
|
|
56
|
+
# statement in a thread-safe manner before delegating to the parent implementation.
|
|
57
|
+
#
|
|
58
|
+
# @param sql [String] the SQL statement being executed
|
|
59
|
+
# @param conn [Object] the database connection object
|
|
60
|
+
# @param args [Object] additional arguments (optional)
|
|
61
|
+
# @return [Object] result from the parent +log_connection_yield+ method
|
|
62
|
+
def log_connection_yield(sql, conn, args = nil)
|
|
63
|
+
@sql_recorder_mutex.synchronize { sql_recorder.push(sql) }
|
|
64
|
+
super
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Initializes the SQL recording infrastructure when the extension is loaded.
|
|
68
|
+
#
|
|
69
|
+
# Sets up the mutex for thread-safe access and initializes the SQL recording
|
|
70
|
+
# array. This method is automatically called when the extension is loaded
|
|
71
|
+
# via +DB.extension :sql_recorder+.
|
|
72
|
+
#
|
|
73
|
+
# @param db [Sequel::Database] the database instance being extended
|
|
74
|
+
def self.extended(db)
|
|
75
|
+
db.instance_exec do
|
|
76
|
+
@sql_recorder_mutex ||= Mutex.new
|
|
77
|
+
@sql_recorder ||= []
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
Database.register_extension(:sql_recorder, SqlRecorder)
|
|
84
|
+
|
|
85
|
+
end
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'digest'
|
|
4
|
+
|
|
5
|
+
module Sequel
|
|
6
|
+
|
|
7
|
+
# Provides efficient handling of large UNION operations.
|
|
8
|
+
#
|
|
9
|
+
# The unionize extension allows combining many datasets through UNION operations
|
|
10
|
+
# by chunking them into manageable temporary tables or views. This is particularly
|
|
11
|
+
# useful when dealing with databases that have limitations on the number of UNION
|
|
12
|
+
# operations in a single query (e.g., Spark SQL, DuckDB).
|
|
13
|
+
#
|
|
14
|
+
# @example Load the extension
|
|
15
|
+
# DB.extension :unionize
|
|
16
|
+
#
|
|
17
|
+
# @example Basic usage
|
|
18
|
+
# DB.unionize([dataset1, dataset2, dataset3, dataset4])
|
|
19
|
+
#
|
|
20
|
+
# @example With options
|
|
21
|
+
# DB.unionize(datasets, chunk_size: 50, all: true, temp_table_prefix: 'my_union')
|
|
22
|
+
module Unionize
|
|
23
|
+
|
|
24
|
+
# Handles the chunking and union of multiple datasets.
|
|
25
|
+
#
|
|
26
|
+
# This class manages the process of splitting a large collection of datasets
|
|
27
|
+
# into smaller chunks, creating temporary tables/views for each chunk, and
|
|
28
|
+
# then recursively combining them until a single unified dataset is produced.
|
|
29
|
+
class Unionizer
|
|
30
|
+
|
|
31
|
+
# Default number of datasets to combine in each chunk
|
|
32
|
+
DEFAULT_CHUNK_SIZE = 100
|
|
33
|
+
|
|
34
|
+
# Represents a chunk of datasets to be combined via UNION.
|
|
35
|
+
#
|
|
36
|
+
# Each chunk handles a subset of datasets, creates a temporary table/view
|
|
37
|
+
# for the combined result, and provides access to the unified dataset.
|
|
38
|
+
class Chunk
|
|
39
|
+
|
|
40
|
+
# @!attribute [r] db
|
|
41
|
+
# @return [Sequel::Database] The database connection
|
|
42
|
+
# @!attribute [r] dses
|
|
43
|
+
# @return [Array<Sequel::Dataset>] The datasets in this chunk
|
|
44
|
+
# @!attribute [r] opts
|
|
45
|
+
# @return [Hash] Options for the union operation
|
|
46
|
+
attr_reader :db, :dses, :opts
|
|
47
|
+
|
|
48
|
+
# Creates a new chunk instance.
|
|
49
|
+
#
|
|
50
|
+
# @param db [Sequel::Database] The database connection
|
|
51
|
+
# @param dses [Array<Sequel::Dataset>] The datasets to combine
|
|
52
|
+
# @param opts [Hash] Options for the union operation
|
|
53
|
+
def initialize(db, dses, opts)
|
|
54
|
+
@db = db
|
|
55
|
+
@dses = dses
|
|
56
|
+
@opts = opts
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Returns the unified dataset created by combining all datasets in this chunk.
|
|
60
|
+
#
|
|
61
|
+
# @return [Sequel::Dataset] The combined dataset
|
|
62
|
+
def union
|
|
63
|
+
@union ||= dses.reduce { |a, b| a.union(b, all: opts[:all], from_self: opts[:from_self]) }
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Generates a unique name for the temporary table/view.
|
|
67
|
+
#
|
|
68
|
+
# The name is based on a hash of the SQL query to ensure uniqueness
|
|
69
|
+
# and avoid collisions when multiple unionize operations are running.
|
|
70
|
+
#
|
|
71
|
+
# @return [Symbol] The temporary table/view name
|
|
72
|
+
def name
|
|
73
|
+
@name ||= :"#{opts[:temp_table_prefix]}_#{Digest::SHA1.hexdigest(union.sql)}"
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Creates a temporary table or view for this chunk's union result.
|
|
77
|
+
#
|
|
78
|
+
# The method used depends on the database type:
|
|
79
|
+
# - Spark: Creates a temporary view
|
|
80
|
+
# - DuckDB: Creates a temporary table
|
|
81
|
+
#
|
|
82
|
+
# @raise [RuntimeError] If the database type is not supported
|
|
83
|
+
# @return [void]
|
|
84
|
+
def create
|
|
85
|
+
if db.database_type == :spark
|
|
86
|
+
db.create_view(name, union, temp: true)
|
|
87
|
+
elsif db.database_type == :duckdb
|
|
88
|
+
db.create_table(name, temp: true, as: union)
|
|
89
|
+
else
|
|
90
|
+
raise "Unsupported database type: #{db.database_type}"
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# @!attribute [r] db
|
|
97
|
+
# @return [Sequel::Database] The database connection
|
|
98
|
+
attr_reader :db
|
|
99
|
+
|
|
100
|
+
# Creates a new Unionizer instance.
|
|
101
|
+
#
|
|
102
|
+
# @param db [Sequel::Database] The database connection
|
|
103
|
+
# @param ds_set [Array<Sequel::Dataset>] The datasets to combine
|
|
104
|
+
# @param opts [Hash] Options for the union operation
|
|
105
|
+
# @option opts [Integer] :chunk_size (100) Number of datasets per chunk
|
|
106
|
+
# @option opts [String] :temp_table_prefix ('temp_union') Prefix for temporary tables
|
|
107
|
+
# @option opts [Boolean] :all (false) Use UNION ALL instead of UNION
|
|
108
|
+
# @option opts [Boolean] :from_self (true) Wrap individual datasets in subqueries
|
|
109
|
+
def initialize(db, ds_set, opts = {})
|
|
110
|
+
@db = db
|
|
111
|
+
@ds_set = ds_set
|
|
112
|
+
@opts = opts
|
|
113
|
+
opts[:chunk_size] ||= DEFAULT_CHUNK_SIZE
|
|
114
|
+
opts[:temp_table_prefix] ||= 'temp_union'
|
|
115
|
+
opts[:all] ||= false
|
|
116
|
+
opts[:from_self] = opts.fetch(:from_self, true)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Performs the unionization of datasets.
|
|
120
|
+
#
|
|
121
|
+
# This method recursively chunks the datasets, creates temporary tables/views
|
|
122
|
+
# for each chunk, and then combines them until a single dataset remains.
|
|
123
|
+
#
|
|
124
|
+
# @param dses [Array<Sequel::Dataset>] The datasets to combine (defaults to @ds_set)
|
|
125
|
+
# @return [Sequel::Dataset] The final combined dataset
|
|
126
|
+
def unionize(dses = @ds_set)
|
|
127
|
+
chunks = dses.each_slice(@opts[:chunk_size]).map do |chunk_of_dses|
|
|
128
|
+
Chunk.new(db, chunk_of_dses, @opts)
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
return chunks.first.union if chunks.size == 1
|
|
132
|
+
|
|
133
|
+
unionize(chunks.each(&:create).map { |chunk| db[chunk.name] })
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Efficiently combines multiple datasets using UNION operations.
|
|
139
|
+
#
|
|
140
|
+
# This method handles large numbers of datasets by chunking them into
|
|
141
|
+
# manageable groups, creating temporary tables/views for intermediate
|
|
142
|
+
# results, and recursively combining them until a single dataset is produced.
|
|
143
|
+
#
|
|
144
|
+
# @param ds_set [Array<Sequel::Dataset>] The datasets to combine via UNION
|
|
145
|
+
# @param opts [Hash] Options for the union operation
|
|
146
|
+
# @option opts [Integer] :chunk_size (100) Number of datasets to combine in each chunk
|
|
147
|
+
# @option opts [String] :temp_table_prefix ('temp_union') Prefix for temporary table names
|
|
148
|
+
# @option opts [Boolean] :all (false) Use UNION ALL instead of UNION (keeps duplicates)
|
|
149
|
+
# @option opts [Boolean] :from_self (true) Wrap individual datasets in subqueries
|
|
150
|
+
#
|
|
151
|
+
# @return [Sequel::Dataset] The combined dataset
|
|
152
|
+
#
|
|
153
|
+
# @example Basic union of datasets
|
|
154
|
+
# db.unionize([ds1, ds2, ds3, ds4])
|
|
155
|
+
#
|
|
156
|
+
# @example Union all with custom chunk size
|
|
157
|
+
# db.unionize(datasets, all: true, chunk_size: 50)
|
|
158
|
+
#
|
|
159
|
+
# @example Custom temporary table prefix
|
|
160
|
+
# db.unionize(datasets, temp_table_prefix: 'my_union_batch')
|
|
161
|
+
def unionize(ds_set, opts = {})
|
|
162
|
+
Unionizer.new(self, ds_set, opts).unionize
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
Database.register_extension(:unionize, Unionize)
|
|
168
|
+
|
|
169
|
+
end
|
|
@@ -1,15 +1,44 @@
|
|
|
1
1
|
module Sequel
|
|
2
|
+
|
|
3
|
+
# = Usable
|
|
4
|
+
#
|
|
5
|
+
# Sequel extension that provides a convenient +use+ method for switching
|
|
6
|
+
# the current database/schema context. This is particularly useful for
|
|
7
|
+
# databases that support the USE statement like MySQL, SQL Server, and
|
|
8
|
+
# some big data engines.
|
|
9
|
+
#
|
|
10
|
+
# @example
|
|
11
|
+
# db.extension :usable
|
|
12
|
+
# db.use(:my_schema)
|
|
13
|
+
# # Executes: USE `my_schema`
|
|
2
14
|
module Usable
|
|
15
|
+
|
|
16
|
+
# Switches to the specified database or schema.
|
|
17
|
+
#
|
|
18
|
+
# Executes a USE statement to change the current database context.
|
|
19
|
+
# The schema name is properly quoted using the database's identifier
|
|
20
|
+
# quoting rules.
|
|
21
|
+
#
|
|
22
|
+
# @param schema_name [Symbol, String] the name of the schema/database to use
|
|
23
|
+
# @example
|
|
24
|
+
# db.use(:production_db)
|
|
25
|
+
# db.use('test_schema')
|
|
3
26
|
def use(schema_name)
|
|
4
27
|
run(use_sql(schema_name))
|
|
5
28
|
end
|
|
6
29
|
|
|
7
30
|
private
|
|
8
31
|
|
|
32
|
+
# Generates the USE SQL statement for the given schema name.
|
|
33
|
+
#
|
|
34
|
+
# @param schema_name [Symbol, String] the schema name to use
|
|
35
|
+
# @return [String] the USE SQL statement
|
|
9
36
|
def use_sql(schema_name)
|
|
10
|
-
"USE #{
|
|
37
|
+
"USE #{literal(schema_name)}"
|
|
11
38
|
end
|
|
39
|
+
|
|
12
40
|
end
|
|
13
41
|
|
|
14
42
|
Database.register_extension(:usable, Usable)
|
|
43
|
+
|
|
15
44
|
end
|