sequel-duckdb 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.kiro/specs/advanced-sql-features-implementation/design.md +24 -0
- data/.kiro/specs/advanced-sql-features-implementation/requirements.md +43 -0
- data/.kiro/specs/advanced-sql-features-implementation/tasks.md +24 -0
- data/.kiro/specs/duckdb-sql-syntax-compatibility/design.md +258 -0
- data/.kiro/specs/duckdb-sql-syntax-compatibility/requirements.md +84 -0
- data/.kiro/specs/duckdb-sql-syntax-compatibility/tasks.md +94 -0
- data/.kiro/specs/edge-cases-and-validation-fixes/requirements.md +32 -0
- data/.kiro/specs/integration-test-database-setup/design.md +0 -0
- data/.kiro/specs/integration-test-database-setup/requirements.md +117 -0
- data/.kiro/specs/sequel-duckdb-adapter/design.md +542 -0
- data/.kiro/specs/sequel-duckdb-adapter/requirements.md +202 -0
- data/.kiro/specs/sequel-duckdb-adapter/tasks.md +247 -0
- data/.kiro/specs/sql-expression-handling-fix/design.md +298 -0
- data/.kiro/specs/sql-expression-handling-fix/requirements.md +86 -0
- data/.kiro/specs/sql-expression-handling-fix/tasks.md +22 -0
- data/.kiro/specs/test-infrastructure-improvements/requirements.md +106 -0
- data/.kiro/steering/product.md +22 -0
- data/.kiro/steering/structure.md +88 -0
- data/.kiro/steering/tech.md +124 -0
- data/.kiro/steering/testing.md +192 -0
- data/.rubocop.yml +103 -0
- data/.yardopts +8 -0
- data/API_DOCUMENTATION.md +919 -0
- data/CHANGELOG.md +131 -0
- data/LICENSE +21 -0
- data/MIGRATION_EXAMPLES.md +740 -0
- data/PERFORMANCE_OPTIMIZATIONS.md +723 -0
- data/README.md +692 -0
- data/Rakefile +27 -0
- data/TASK_10.2_IMPLEMENTATION_SUMMARY.md +164 -0
- data/docs/DUCKDB_SQL_PATTERNS.md +410 -0
- data/docs/TASK_12_VERIFICATION_SUMMARY.md +122 -0
- data/lib/sequel/adapters/duckdb.rb +256 -0
- data/lib/sequel/adapters/shared/duckdb.rb +2349 -0
- data/lib/sequel/duckdb/version.rb +16 -0
- data/lib/sequel/duckdb.rb +43 -0
- data/sig/sequel/duckdb.rbs +6 -0
- metadata +235 -0
@@ -0,0 +1,2349 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "duckdb"
|
4
|
+
|
5
|
+
# Sequel is the database toolkit for Ruby
|
6
|
+
module Sequel
|
7
|
+
module DuckDB
|
8
|
+
# DatabaseMethods module provides shared database functionality for DuckDB adapter
|
9
|
+
#
|
10
|
+
# This module is included by the main Database class to provide connection management,
|
11
|
+
# schema introspection, and SQL execution capabilities. It implements the core
|
12
|
+
# database operations required by Sequel's adapter interface.
|
13
|
+
#
|
14
|
+
# Key responsibilities:
|
15
|
+
# - Connection management (connect, disconnect, validation)
|
16
|
+
# - SQL execution with proper error handling and logging
|
17
|
+
# - Schema introspection (tables, columns, indexes, constraints)
|
18
|
+
# - Transaction support with commit/rollback capabilities
|
19
|
+
# - Data type mapping between Ruby and DuckDB types
|
20
|
+
# - Performance optimizations for analytical workloads
|
21
|
+
#
|
22
|
+
# @example Connection management
|
23
|
+
# db = Sequel.connect('duckdb:///path/to/database.duckdb')
|
24
|
+
# db.test_connection # => true
|
25
|
+
# db.disconnect
|
26
|
+
#
|
27
|
+
# @example Schema introspection
|
28
|
+
# db.tables # => [:users, :products, :orders]
|
29
|
+
# db.schema(:users) # => [[:id, {...}], [:name, {...}]]
|
30
|
+
# db.indexes(:users) # => {:users_email_index => {...}}
|
31
|
+
# db.table_exists?(:users) # => true
|
32
|
+
#
|
33
|
+
# @example SQL execution
|
34
|
+
# db.execute("SELECT COUNT(*) FROM users")
|
35
|
+
# db.execute("INSERT INTO users (name) VALUES (?)", ["John"])
|
36
|
+
#
|
37
|
+
# @example Transactions
|
38
|
+
# db.transaction do
|
39
|
+
# db[:users].insert(name: 'Alice')
|
40
|
+
# db[:orders].insert(user_id: db[:users].max(:id), total: 100)
|
41
|
+
# end
|
42
|
+
#
|
43
|
+
# @see Database
|
44
|
+
# @since 0.1.0
|
45
|
+
module DatabaseMethods
|
46
|
+
# DuckDB uses the :duckdb database type.
|
47
|
+
def database_type
|
48
|
+
:duckdb
|
49
|
+
end
|
50
|
+
|
51
|
+
# DuckDB doesn't support AUTOINCREMENT
|
52
|
+
def supports_autoincrement?
|
53
|
+
false
|
54
|
+
end
|
55
|
+
|
56
|
+
# Whether to quote identifiers by default for this database
|
57
|
+
def quote_identifiers_default # rubocop:disable Naming/PredicateMethod
|
58
|
+
true
|
59
|
+
end
|
60
|
+
|
61
|
+
private
|
62
|
+
|
63
|
+
# DuckDB doesn't fold unquoted identifiers to uppercase
|
64
|
+
def folds_unquoted_identifiers_to_uppercase?
|
65
|
+
false
|
66
|
+
end
|
67
|
+
|
68
|
+
public
|
69
|
+
|
70
|
+
# Execute SQL statement
|
71
|
+
#
|
72
|
+
# @param sql [String] SQL statement to execute
|
73
|
+
# @param opts [Hash, Array] Options for execution or parameters array
|
74
|
+
# @return [Object] Result of execution
|
75
|
+
def execute(sql, opts = {}, &block)
|
76
|
+
# Handle both old-style (sql, opts) and new-style (sql, params) calls
|
77
|
+
if opts.is_a?(Array)
|
78
|
+
params = opts
|
79
|
+
opts = {}
|
80
|
+
elsif opts.is_a?(Hash)
|
81
|
+
params = opts[:params] || []
|
82
|
+
else
|
83
|
+
# Handle other types (like strings) by treating as empty params
|
84
|
+
params = []
|
85
|
+
opts = {}
|
86
|
+
end
|
87
|
+
|
88
|
+
synchronize(opts[:server]) do |conn|
|
89
|
+
result = execute_statement(conn, sql, params, opts, &block)
|
90
|
+
|
91
|
+
# For UPDATE/DELETE operations without a block, return the number of affected rows
|
92
|
+
# This is what Sequel models expect
|
93
|
+
if !block && result.is_a?(::DuckDB::Result) \
|
94
|
+
&& (sql.strip.upcase.start_with?("UPDATE ") \
|
95
|
+
|| sql.strip.upcase.start_with?("DELETE "))
|
96
|
+
return result.rows_changed
|
97
|
+
end
|
98
|
+
|
99
|
+
return result
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
# Execute INSERT statement
|
104
|
+
#
|
105
|
+
# @param sql [String] INSERT SQL statement
|
106
|
+
# @param opts [Hash] Options for execution
|
107
|
+
# @return [Object] Result of execution
|
108
|
+
def execute_insert(sql, opts = {})
|
109
|
+
execute(sql, opts)
|
110
|
+
# For INSERT statements, we should return the inserted ID if possible
|
111
|
+
# Since DuckDB doesn't support AUTOINCREMENT, we'll return nil for now
|
112
|
+
# This matches the behavior expected by Sequel
|
113
|
+
nil
|
114
|
+
end
|
115
|
+
|
116
|
+
# Execute UPDATE statement
|
117
|
+
#
|
118
|
+
# @param sql [String] UPDATE SQL statement
|
119
|
+
# @param opts [Hash] Options for execution
|
120
|
+
# @return [Object] Result of execution
|
121
|
+
def execute_update(sql, opts = {})
|
122
|
+
result = execute(sql, opts)
|
123
|
+
# For UPDATE/DELETE statements, return the number of affected rows
|
124
|
+
# DuckDB::Result has a rows_changed method for affected row count
|
125
|
+
if result.respond_to?(:rows_changed)
|
126
|
+
result.rows_changed
|
127
|
+
else
|
128
|
+
# Fallback: try to get row count from result
|
129
|
+
result.is_a?(Integer) ? result : 0
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
private
|
134
|
+
|
135
|
+
# Get database error classes that should be caught and converted to Sequel exceptions
|
136
|
+
#
|
137
|
+
# @return [Array<Class>] Array of DuckDB error classes
|
138
|
+
def database_error_classes
|
139
|
+
[::DuckDB::Error]
|
140
|
+
end
|
141
|
+
|
142
|
+
# Extract SQL state from DuckDB exception if available
|
143
|
+
#
|
144
|
+
# @param _exception [::DuckDB::Error] The DuckDB exception
|
145
|
+
# @param _opts [Hash] Additional options
|
146
|
+
# @return [String, nil] SQL state code or nil if not available
|
147
|
+
def database_exception_sqlstate(_exception, _opts)
|
148
|
+
# DuckDB errors may not always have SQL state codes
|
149
|
+
# This can be enhanced when more detailed error information is available
|
150
|
+
nil
|
151
|
+
end
|
152
|
+
|
153
|
+
# Whether to use SQL states for exception handling
|
154
|
+
#
|
155
|
+
# @return [Boolean] true if SQL states should be used
|
156
|
+
def database_exception_use_sqlstates?
|
157
|
+
false
|
158
|
+
end
|
159
|
+
|
160
|
+
# Map DuckDB errors to appropriate Sequel exception types (Requirements 8.1, 8.2, 8.3, 8.7)
|
161
|
+
#
|
162
|
+
# @param exception [::DuckDB::Error] The DuckDB exception
|
163
|
+
# @param _opts [Hash] Additional options
|
164
|
+
# @return [Class] Sequel exception class to use
|
165
|
+
def database_exception_class(exception, _opts)
|
166
|
+
message = exception.message.to_s
|
167
|
+
|
168
|
+
# Map specific DuckDB error patterns to appropriate Sequel exceptions
|
169
|
+
case message
|
170
|
+
when /connection/i, /database.*not.*found/i, /cannot.*open/i
|
171
|
+
# Connection-related errors (Requirement 8.1)
|
172
|
+
Sequel::DatabaseConnectionError
|
173
|
+
when /violates.*not.*null/i, /not.*null.*constraint/i, /null.*value.*not.*allowed/i
|
174
|
+
# NOT NULL constraint violations (Requirement 8.3) - moved up for priority
|
175
|
+
Sequel::NotNullConstraintViolation
|
176
|
+
when /unique.*constraint/i, /duplicate.*key/i, /already.*exists/i,
|
177
|
+
/primary.*key.*constraint/i, /duplicate.*primary.*key/i
|
178
|
+
# UNIQUE and PRIMARY KEY constraint violations (Requirement 8.3)
|
179
|
+
# Primary key violations are a type of unique constraint
|
180
|
+
Sequel::UniqueConstraintViolation
|
181
|
+
when /foreign.*key.*constraint/i, /violates.*foreign.*key/i
|
182
|
+
# Foreign key constraint violations (Requirement 8.3)
|
183
|
+
Sequel::ForeignKeyConstraintViolation
|
184
|
+
when /check.*constraint/i, /violates.*check/i
|
185
|
+
# CHECK constraint violations (Requirement 8.3)
|
186
|
+
Sequel::CheckConstraintViolation
|
187
|
+
when /constraint.*violation/i, /violates.*constraint/i
|
188
|
+
# Generic constraint violations (Requirement 8.3) - moved to end for lower priority
|
189
|
+
Sequel::ConstraintViolation
|
190
|
+
else
|
191
|
+
# when /syntax.*error/i, /parse.*error/i, /unexpected.*token/i,
|
192
|
+
# /table.*does.*not.*exist/i, /relation.*does.*not.*exist/i,
|
193
|
+
# /no.*such.*table/i, /column.*does.*not.*exist/i,
|
194
|
+
# /no.*such.*column/i, /unknown.*column/i,
|
195
|
+
# /referenced.*column.*not.*found/i,
|
196
|
+
# /does.*not.*have.*a.*column/i, /schema.*does.*not.*exist/i,
|
197
|
+
# /no.*such.*schema/i, /function.*does.*not.*exist/i,
|
198
|
+
# /no.*such.*function/i, /unknown.*function/i, /type.*error/i,
|
199
|
+
# /cannot.*cast/i, /invalid.*type/i, /permission.*denied/i,
|
200
|
+
# /access.*denied/i, /insufficient.*privileges/i
|
201
|
+
# Various database errors (Requirements 8.2, 8.7):
|
202
|
+
# - SQL syntax errors
|
203
|
+
# - Table/column/schema/function not found errors
|
204
|
+
# - Type conversion errors
|
205
|
+
# - Permission/access errors
|
206
|
+
Sequel::DatabaseError
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
# Enhanced error message formatting for better debugging (Requirements 8.2, 8.7)
|
211
|
+
#
|
212
|
+
# @param exception [::DuckDB::Error] The DuckDB exception
|
213
|
+
# @param opts [Hash] Additional options including SQL and parameters
|
214
|
+
# @return [String] Enhanced error message
|
215
|
+
def database_exception_message(exception, opts)
|
216
|
+
message = "DuckDB error: #{exception.message}"
|
217
|
+
|
218
|
+
# Add SQL context if available for better debugging
|
219
|
+
message += " -- SQL: #{opts[:sql]}" if opts[:sql]
|
220
|
+
|
221
|
+
# Add parameter context if available
|
222
|
+
message += " -- Parameters: #{opts[:params].inspect}" if opts[:params] && !opts[:params].empty?
|
223
|
+
|
224
|
+
message
|
225
|
+
end
|
226
|
+
|
227
|
+
# Handle constraint violation errors with specific categorization (Requirement 8.3)
|
228
|
+
#
|
229
|
+
# @param exception [::DuckDB::Error] The DuckDB exception
|
230
|
+
# @param opts [Hash] Additional options
|
231
|
+
# @return [Exception] Appropriate Sequel constraint exception
|
232
|
+
def handle_constraint_violation(exception, opts = {})
|
233
|
+
message = database_exception_message(exception, opts)
|
234
|
+
exception_class = database_exception_class(exception, opts)
|
235
|
+
|
236
|
+
# Create the appropriate exception with enhanced message
|
237
|
+
exception_class.new(message)
|
238
|
+
end
|
239
|
+
|
240
|
+
# Schema introspection methods
|
241
|
+
|
242
|
+
# Parse table list from database
|
243
|
+
#
|
244
|
+
# @param opts [Hash] Options for table parsing
|
245
|
+
# @return [Array<Symbol>] Array of table names as symbols
|
246
|
+
def schema_parse_tables(opts = {})
|
247
|
+
schema_name = opts[:schema] || "main"
|
248
|
+
|
249
|
+
sql = "SELECT table_name FROM information_schema.tables WHERE table_schema = ? AND table_type = 'BASE TABLE'"
|
250
|
+
|
251
|
+
tables = []
|
252
|
+
execute(sql, [schema_name]) do |row|
|
253
|
+
tables << row[:table_name].to_sym
|
254
|
+
end
|
255
|
+
|
256
|
+
tables
|
257
|
+
end
|
258
|
+
|
259
|
+
# Parse table schema information
|
260
|
+
#
|
261
|
+
# @param table_name [Symbol, String] Name of the table
|
262
|
+
# @param opts [Hash] Options for schema parsing
|
263
|
+
# @return [Array<Array>] Array of [column_name, column_info] pairs
|
264
|
+
def schema_parse_table(table_name, opts = {})
|
265
|
+
schema_name = opts[:schema] || "main"
|
266
|
+
|
267
|
+
# First check if table exists
|
268
|
+
raise Sequel::DatabaseError, "Table '#{table_name}' does not exist" unless table_exists?(table_name, opts)
|
269
|
+
|
270
|
+
# Use information_schema.columns for detailed column information
|
271
|
+
sql = <<~SQL
|
272
|
+
SELECT
|
273
|
+
column_name,
|
274
|
+
ordinal_position,
|
275
|
+
column_default,
|
276
|
+
is_nullable,
|
277
|
+
data_type,
|
278
|
+
character_maximum_length,
|
279
|
+
numeric_precision,
|
280
|
+
numeric_scale
|
281
|
+
FROM information_schema.columns
|
282
|
+
WHERE table_schema = ? AND table_name = ?
|
283
|
+
ORDER BY ordinal_position
|
284
|
+
SQL
|
285
|
+
|
286
|
+
columns = []
|
287
|
+
execute(sql, [schema_name, table_name.to_s]) do |row|
|
288
|
+
column_name = row[:column_name].to_sym
|
289
|
+
|
290
|
+
# Map DuckDB types to Sequel types
|
291
|
+
sequel_type = map_duckdb_type_to_sequel(row[:data_type])
|
292
|
+
|
293
|
+
# Parse nullable flag
|
294
|
+
allow_null = row[:is_nullable] == "YES"
|
295
|
+
|
296
|
+
# Parse default value
|
297
|
+
default_value = parse_default_value(row[:column_default])
|
298
|
+
|
299
|
+
column_info = {
|
300
|
+
type: sequel_type,
|
301
|
+
db_type: row[:data_type],
|
302
|
+
allow_null: allow_null,
|
303
|
+
default: default_value,
|
304
|
+
primary_key: false # Will be updated below
|
305
|
+
}
|
306
|
+
|
307
|
+
# Add size information for string types
|
308
|
+
column_info[:max_length] = row[:character_maximum_length] if row[:character_maximum_length]
|
309
|
+
|
310
|
+
# Add precision/scale for numeric types
|
311
|
+
column_info[:precision] = row[:numeric_precision] if row[:numeric_precision]
|
312
|
+
column_info[:scale] = row[:numeric_scale] if row[:numeric_scale]
|
313
|
+
|
314
|
+
columns << [column_name, column_info]
|
315
|
+
end
|
316
|
+
|
317
|
+
# Update primary key information
|
318
|
+
update_primary_key_info(table_name, columns, opts)
|
319
|
+
|
320
|
+
columns
|
321
|
+
end
|
322
|
+
|
323
|
+
# Parse index information for a table
|
324
|
+
#
|
325
|
+
# @param table_name [Symbol, String] Name of the table
|
326
|
+
# @param opts [Hash] Options for index parsing
|
327
|
+
# @return [Hash] Hash of index_name => index_info
|
328
|
+
def schema_parse_indexes(table_name, opts = {})
|
329
|
+
schema_name = opts[:schema] || "main"
|
330
|
+
|
331
|
+
# First check if table exists
|
332
|
+
raise Sequel::DatabaseError, "Table '#{table_name}' does not exist" unless table_exists?(table_name, opts)
|
333
|
+
|
334
|
+
# Use duckdb_indexes() function to get index information
|
335
|
+
sql = <<~SQL
|
336
|
+
SELECT
|
337
|
+
index_name,
|
338
|
+
is_unique,
|
339
|
+
is_primary,
|
340
|
+
expressions,
|
341
|
+
sql
|
342
|
+
FROM duckdb_indexes()
|
343
|
+
WHERE schema_name = ? AND table_name = ?
|
344
|
+
SQL
|
345
|
+
|
346
|
+
indexes = {}
|
347
|
+
execute(sql, [schema_name, table_name.to_s]) do |row|
|
348
|
+
index_name = row[:index_name].to_sym
|
349
|
+
|
350
|
+
# Parse column expressions - DuckDB returns them as JSON array strings
|
351
|
+
columns = parse_index_columns(row[:expressions])
|
352
|
+
|
353
|
+
index_info = {
|
354
|
+
columns: columns,
|
355
|
+
unique: row[:is_unique],
|
356
|
+
primary: row[:is_primary]
|
357
|
+
}
|
358
|
+
|
359
|
+
indexes[index_name] = index_info
|
360
|
+
end
|
361
|
+
|
362
|
+
indexes
|
363
|
+
end
|
364
|
+
|
365
|
+
public
|
366
|
+
|
367
|
+
# Configuration convenience methods (Requirements 3.1, 3.2)
|
368
|
+
|
369
|
+
# Set a DuckDB PRAGMA setting
|
370
|
+
#
|
371
|
+
# This method provides a user-friendly wrapper around DuckDB's PRAGMA statements.
|
372
|
+
# PRAGMA statements are used to configure various DuckDB settings and behaviors.
|
373
|
+
#
|
374
|
+
# @param key [String, Symbol] The pragma setting name
|
375
|
+
# @param value [Object] The value to set (will be converted to appropriate format)
|
376
|
+
# @return [void]
|
377
|
+
#
|
378
|
+
# @raise [Sequel::DatabaseError] If the pragma setting is invalid or fails
|
379
|
+
#
|
380
|
+
# @example Set memory limit
|
381
|
+
# db.set_pragma("memory_limit", "2GB")
|
382
|
+
# db.set_pragma(:memory_limit, "1GB")
|
383
|
+
#
|
384
|
+
# @example Set thread count
|
385
|
+
# db.set_pragma("threads", 4)
|
386
|
+
#
|
387
|
+
# @example Enable/disable features
|
388
|
+
# db.set_pragma("enable_progress_bar", true)
|
389
|
+
# db.set_pragma("enable_profiling", false)
|
390
|
+
#
|
391
|
+
# @see configure_duckdb
|
392
|
+
# @since 0.1.0
|
393
|
+
def set_pragma(key, value)
|
394
|
+
# Convert key to string for consistency
|
395
|
+
pragma_key = key.to_s
|
396
|
+
|
397
|
+
# Format value appropriately for SQL
|
398
|
+
formatted_value = case value
|
399
|
+
when String
|
400
|
+
"'#{value.gsub("'", "''")}'" # Escape single quotes
|
401
|
+
when TrueClass, FalseClass, Numeric
|
402
|
+
value.to_s
|
403
|
+
else
|
404
|
+
"'#{value}'"
|
405
|
+
end
|
406
|
+
|
407
|
+
# Execute PRAGMA statement
|
408
|
+
pragma_sql = "PRAGMA #{pragma_key} = #{formatted_value}"
|
409
|
+
|
410
|
+
begin
|
411
|
+
execute(pragma_sql)
|
412
|
+
rescue StandardError => e
|
413
|
+
raise Sequel::DatabaseError, "Failed to set pragma #{pragma_key}: #{e.message}"
|
414
|
+
end
|
415
|
+
end
|
416
|
+
|
417
|
+
# Configure multiple DuckDB settings at once
|
418
|
+
#
|
419
|
+
# This method allows batch configuration of multiple DuckDB PRAGMA settings
|
420
|
+
# in a single method call. It's a convenience wrapper around multiple set_pragma calls.
|
421
|
+
#
|
422
|
+
# @param options [Hash] Hash of pragma_name => value pairs
|
423
|
+
# @return [void]
|
424
|
+
#
|
425
|
+
# @raise [Sequel::DatabaseError] If any pragma setting fails
|
426
|
+
#
|
427
|
+
# @example Configure multiple settings
|
428
|
+
# db.configure_duckdb(
|
429
|
+
# memory_limit: "2GB",
|
430
|
+
# threads: 8,
|
431
|
+
# enable_progress_bar: true,
|
432
|
+
# default_order: "ASC"
|
433
|
+
# )
|
434
|
+
#
|
435
|
+
# @example Configure with string keys
|
436
|
+
# db.configure_duckdb(
|
437
|
+
# "memory_limit" => "1GB",
|
438
|
+
# "threads" => 4
|
439
|
+
# )
|
440
|
+
#
|
441
|
+
# @see set_pragma
|
442
|
+
# @since 0.1.0
|
443
|
+
def configure_duckdb(options = {})
|
444
|
+
return if options.empty?
|
445
|
+
|
446
|
+
# Apply each configuration option
|
447
|
+
options.each do |key, value|
|
448
|
+
set_pragma(key, value)
|
449
|
+
end
|
450
|
+
end
|
451
|
+
|
452
|
+
# Check if table exists
|
453
|
+
#
|
454
|
+
# @param table_name [Symbol, String] Name of the table
|
455
|
+
# @param opts [Hash] Options
|
456
|
+
# @return [Boolean] true if table exists
|
457
|
+
def table_exists?(table_name, opts = {})
|
458
|
+
schema_name = opts[:schema] || "main"
|
459
|
+
|
460
|
+
sql = "SELECT 1 FROM information_schema.tables WHERE table_schema = ? AND table_name = ? LIMIT 1"
|
461
|
+
|
462
|
+
result = nil
|
463
|
+
execute(sql, [schema_name, table_name.to_s]) do |_row|
|
464
|
+
result = true
|
465
|
+
end
|
466
|
+
|
467
|
+
!!result
|
468
|
+
end
|
469
|
+
|
470
|
+
# Get list of tables
|
471
|
+
#
|
472
|
+
# @param opts [Hash] Options
|
473
|
+
# @return [Array<Symbol>] Array of table names
|
474
|
+
def tables(opts = {})
|
475
|
+
schema_parse_tables(opts)
|
476
|
+
end
|
477
|
+
|
478
|
+
# Get schema information for a table
|
479
|
+
#
|
480
|
+
# @param table_name [Symbol, String, Dataset] Name of the table or dataset
|
481
|
+
# @param opts [Hash] Options
|
482
|
+
# @return [Array<Array>] Schema information
|
483
|
+
def schema(table_name, opts = {})
|
484
|
+
# Handle case where Sequel passes a Dataset object instead of table name
|
485
|
+
if table_name.is_a?(Sequel::Dataset)
|
486
|
+
# Extract table name from dataset
|
487
|
+
if table_name.opts[:from]&.first
|
488
|
+
actual_table_name = table_name.opts[:from].first
|
489
|
+
# Handle case where table name is wrapped in an identifier
|
490
|
+
actual_table_name = actual_table_name.value if actual_table_name.respond_to?(:value)
|
491
|
+
else
|
492
|
+
# Fallback: try to extract from SQL
|
493
|
+
sql = table_name.sql
|
494
|
+
raise Sequel::Error, "Cannot determine table name from dataset: #{table_name}" unless sql =~ /FROM\s+(\w+)/i
|
495
|
+
|
496
|
+
actual_table_name = ::Regexp.last_match(1).to_sym
|
497
|
+
|
498
|
+
end
|
499
|
+
else
|
500
|
+
actual_table_name = table_name
|
501
|
+
end
|
502
|
+
|
503
|
+
# Cache schema information for type conversion
|
504
|
+
schema_info = schema_parse_table(actual_table_name, opts)
|
505
|
+
@schema_cache ||= {}
|
506
|
+
@schema_cache[actual_table_name] = {}
|
507
|
+
|
508
|
+
schema_info.each do |column_name, column_info|
|
509
|
+
@schema_cache[actual_table_name][column_name] = column_info
|
510
|
+
end
|
511
|
+
|
512
|
+
schema_info
|
513
|
+
end
|
514
|
+
|
515
|
+
# Get index information for a table
|
516
|
+
#
|
517
|
+
# @param table_name [Symbol, String] Name of the table
|
518
|
+
# @param opts [Hash] Options
|
519
|
+
# @return [Hash] Index information
|
520
|
+
def indexes(table_name, opts = {})
|
521
|
+
schema_parse_indexes(table_name, opts)
|
522
|
+
end
|
523
|
+
|
524
|
+
private
|
525
|
+
|
526
|
+
# Map DuckDB data types to Sequel types
|
527
|
+
#
|
528
|
+
# @param duckdb_type [String] DuckDB data type
|
529
|
+
# @return [Symbol] Sequel type symbol
|
530
|
+
def map_duckdb_type_to_sequel(duckdb_type)
|
531
|
+
case duckdb_type.upcase
|
532
|
+
when "INTEGER", "INT", "INT4", "SMALLINT", "INT2", "TINYINT", "INT1"
|
533
|
+
:integer
|
534
|
+
when "BIGINT", "INT8"
|
535
|
+
:bigint
|
536
|
+
when "REAL", "FLOAT4", "DOUBLE", "FLOAT8"
|
537
|
+
:float
|
538
|
+
when /^DECIMAL/, /^NUMERIC/
|
539
|
+
:decimal
|
540
|
+
when "BOOLEAN", "BOOL"
|
541
|
+
:boolean
|
542
|
+
when "DATE"
|
543
|
+
:date
|
544
|
+
when "TIMESTAMP", "DATETIME"
|
545
|
+
:datetime
|
546
|
+
when "TIME"
|
547
|
+
:time
|
548
|
+
when "BLOB", "BYTEA"
|
549
|
+
:blob
|
550
|
+
when "UUID"
|
551
|
+
:uuid
|
552
|
+
else
|
553
|
+
# when "VARCHAR", "TEXT", "STRING"
|
554
|
+
:string # Default fallback
|
555
|
+
end
|
556
|
+
end
|
557
|
+
|
558
|
+
# Parse default value from DuckDB format
|
559
|
+
#
|
560
|
+
# @param default_str [String, nil] Default value string from DuckDB
|
561
|
+
# @return [Object, nil] Parsed default value
|
562
|
+
def parse_default_value(default_str)
|
563
|
+
return nil if default_str.nil? || default_str.empty?
|
564
|
+
|
565
|
+
# Handle common DuckDB default formats
|
566
|
+
case default_str
|
567
|
+
when /^CAST\('(.+)' AS BOOLEAN\)$/
|
568
|
+
::Regexp.last_match(1) == "t"
|
569
|
+
when /^'(.+)'$/
|
570
|
+
::Regexp.last_match(1) # String literal
|
571
|
+
when /^\d+$/
|
572
|
+
default_str.to_i # Integer literal
|
573
|
+
when /^\d+\.\d+$/
|
574
|
+
default_str.to_f # Float literal
|
575
|
+
when "NULL"
|
576
|
+
nil
|
577
|
+
else
|
578
|
+
default_str # Return as-is for complex expressions
|
579
|
+
end
|
580
|
+
end
|
581
|
+
|
582
|
+
# Update primary key information in column schema
|
583
|
+
#
|
584
|
+
# @param table_name [Symbol, String] Table name
|
585
|
+
# @param columns [Array] Array of column information
|
586
|
+
# @param opts [Hash] Options
|
587
|
+
def update_primary_key_info(table_name, columns, opts = {})
|
588
|
+
schema_name = opts[:schema] || "main"
|
589
|
+
|
590
|
+
# Query for primary key constraints
|
591
|
+
sql = <<~SQL
|
592
|
+
SELECT column_name
|
593
|
+
FROM information_schema.table_constraints tc
|
594
|
+
JOIN information_schema.key_column_usage kcu
|
595
|
+
ON tc.constraint_name = kcu.constraint_name
|
596
|
+
AND tc.table_schema = kcu.table_schema
|
597
|
+
AND tc.table_name = kcu.table_name
|
598
|
+
WHERE tc.constraint_type = 'PRIMARY KEY'
|
599
|
+
AND tc.table_schema = ?
|
600
|
+
AND tc.table_name = ?
|
601
|
+
SQL
|
602
|
+
|
603
|
+
primary_key_columns = []
|
604
|
+
execute(sql, [schema_name, table_name.to_s]) do |row|
|
605
|
+
primary_key_columns << row[:column_name].to_sym
|
606
|
+
end
|
607
|
+
|
608
|
+
# Update primary key flag for matching columns
|
609
|
+
columns.each do |column_name, column_info|
|
610
|
+
if primary_key_columns.include?(column_name)
|
611
|
+
column_info[:primary_key] = true
|
612
|
+
column_info[:allow_null] = false # Primary keys cannot be null
|
613
|
+
end
|
614
|
+
end
|
615
|
+
end
|
616
|
+
|
617
|
+
# Parse index column expressions from DuckDB format
|
618
|
+
#
|
619
|
+
# @param expressions_str [String] JSON array string of column expressions
|
620
|
+
# @return [Array<Symbol>] Array of column names
|
621
|
+
def parse_index_columns(expressions_str)
|
622
|
+
return [] if expressions_str.nil? || expressions_str.empty?
|
623
|
+
|
624
|
+
# DuckDB returns expressions as JSON array like "[column_name]" or "['\"column_name\"']"
|
625
|
+
# Remove brackets and quotes, split by comma
|
626
|
+
cleaned = expressions_str.gsub(/^\[|\]$/, "").gsub(/['"]/, "")
|
627
|
+
cleaned.split(",").map(&:strip).map(&:to_sym)
|
628
|
+
end
|
629
|
+
|
630
|
+
public
|
631
|
+
|
632
|
+
# Advanced transaction support methods (Requirements 5.5, 5.6, 5.7)
|
633
|
+
|
634
|
+
# Check if DuckDB supports savepoints for nested transactions
|
635
|
+
#
|
636
|
+
# @return [Boolean] true if savepoints are supported
|
637
|
+
def supports_savepoints?
|
638
|
+
# DuckDB does not currently support SAVEPOINT/ROLLBACK TO SAVEPOINT syntax
|
639
|
+
# Nested transactions are handled by Sequel's default behavior
|
640
|
+
false
|
641
|
+
end
|
642
|
+
|
643
|
+
# Check if DuckDB supports the specified transaction isolation level
|
644
|
+
#
|
645
|
+
# @param _level [Symbol] Isolation level (:read_uncommitted, :read_committed, :repeatable_read, :serializable)
|
646
|
+
# @return [Boolean] true if the isolation level is supported
|
647
|
+
def supports_transaction_isolation_level?(_level)
|
648
|
+
# DuckDB does not currently support setting transaction isolation levels
|
649
|
+
# It uses a default isolation level similar to READ_COMMITTED
|
650
|
+
false
|
651
|
+
end
|
652
|
+
|
653
|
+
# Check if DuckDB supports manual transaction control
|
654
|
+
#
|
655
|
+
# @return [Boolean] true if manual transaction control is supported
|
656
|
+
def supports_manual_transaction_control?
|
657
|
+
# DuckDB supports BEGIN, COMMIT, and ROLLBACK statements
|
658
|
+
true
|
659
|
+
end
|
660
|
+
|
661
|
+
# Check if DuckDB supports autocommit control
|
662
|
+
#
|
663
|
+
# @return [Boolean] true if autocommit can be controlled
|
664
|
+
def supports_autocommit_control?
|
665
|
+
# DuckDB has autocommit behavior but limited control over it
|
666
|
+
false
|
667
|
+
end
|
668
|
+
|
669
|
+
# Check if DuckDB supports disabling autocommit
|
670
|
+
#
|
671
|
+
# @return [Boolean] true if autocommit can be disabled
|
672
|
+
def supports_autocommit_disable?
|
673
|
+
# DuckDB doesn't support disabling autocommit mode
|
674
|
+
false
|
675
|
+
end
|
676
|
+
|
677
|
+
# Check if currently in a transaction
|
678
|
+
#
|
679
|
+
# @return [Boolean] true if in a transaction
|
680
|
+
def in_transaction?
|
681
|
+
# Use Sequel's built-in transaction tracking
|
682
|
+
# Sequel tracks transaction state internally
|
683
|
+
@transactions && !@transactions.empty?
|
684
|
+
end
|
685
|
+
|
686
|
+
# Begin a transaction manually
|
687
|
+
# Sequel calls this with (conn, opts) arguments
|
688
|
+
#
|
689
|
+
# @param conn [::DuckDB::Connection] Database connection
|
690
|
+
# @param opts [Hash] Transaction options
|
691
|
+
# @return [void]
|
692
|
+
def begin_transaction(conn, opts = {})
|
693
|
+
if opts[:isolation]
|
694
|
+
isolation_sql = case opts[:isolation]
|
695
|
+
when :read_uncommitted
|
696
|
+
"SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED"
|
697
|
+
when :read_committed
|
698
|
+
"SET TRANSACTION ISOLATION LEVEL READ COMMITTED"
|
699
|
+
else
|
700
|
+
raise Sequel::DatabaseError, "Unsupported isolation level: #{opts[:isolation]}"
|
701
|
+
end
|
702
|
+
conn.query(isolation_sql)
|
703
|
+
end
|
704
|
+
|
705
|
+
conn.query("BEGIN TRANSACTION")
|
706
|
+
end
|
707
|
+
|
708
|
+
# Commit the current transaction manually
|
709
|
+
# Sequel calls this with (conn, opts) arguments
|
710
|
+
#
|
711
|
+
# @param conn [::DuckDB::Connection] Database connection
|
712
|
+
# @param _opts [Hash] Options
|
713
|
+
# @return [void]
|
714
|
+
def commit_transaction(conn, _opts = {})
|
715
|
+
conn.query("COMMIT")
|
716
|
+
end
|
717
|
+
|
718
|
+
# Rollback the current transaction manually
|
719
|
+
# Sequel calls this with (conn, opts) arguments
|
720
|
+
#
|
721
|
+
# @param conn [::DuckDB::Connection] Database connection
|
722
|
+
# @param _opts [Hash] Options
|
723
|
+
# @return [void]
|
724
|
+
def rollback_transaction(conn, _opts = {})
|
725
|
+
conn.query("ROLLBACK")
|
726
|
+
end
|
727
|
+
|
728
|
+
# Override Sequel's transaction method to support advanced features
|
729
|
+
def transaction(opts = {}, &)
|
730
|
+
# Handle savepoint transactions (nested transactions)
|
731
|
+
return savepoint_transaction(opts, &) if opts[:savepoint] && supports_savepoints?
|
732
|
+
|
733
|
+
# Handle isolation level setting
|
734
|
+
if opts[:isolation] && supports_transaction_isolation_level?(opts[:isolation])
|
735
|
+
return isolation_transaction(
|
736
|
+
opts,
|
737
|
+
&
|
738
|
+
)
|
739
|
+
end
|
740
|
+
|
741
|
+
# Fall back to standard Sequel transaction handling
|
742
|
+
super
|
743
|
+
end
|
744
|
+
|
745
|
+
private
|
746
|
+
|
747
|
+
# Handle savepoint-based nested transactions
|
748
|
+
#
|
749
|
+
# @param opts [Hash] Transaction options
|
750
|
+
# @return [Object] Result of the transaction block
|
751
|
+
def savepoint_transaction(opts = {})
|
752
|
+
# Generate a unique savepoint name
|
753
|
+
savepoint_name = "sp_#{Time.now.to_f.to_s.gsub(".", "_")}"
|
754
|
+
|
755
|
+
synchronize(opts[:server]) do |conn|
|
756
|
+
# Create savepoint
|
757
|
+
conn.query("SAVEPOINT #{savepoint_name}")
|
758
|
+
|
759
|
+
# Execute the block
|
760
|
+
result = yield
|
761
|
+
|
762
|
+
# Release savepoint on success
|
763
|
+
conn.query("RELEASE SAVEPOINT #{savepoint_name}")
|
764
|
+
|
765
|
+
result
|
766
|
+
rescue Sequel::Rollback
|
767
|
+
# Rollback to savepoint on explicit rollback
|
768
|
+
conn.query("ROLLBACK TO SAVEPOINT #{savepoint_name}")
|
769
|
+
conn.query("RELEASE SAVEPOINT #{savepoint_name}")
|
770
|
+
nil
|
771
|
+
rescue StandardError => e
|
772
|
+
# Rollback to savepoint on any other exception
|
773
|
+
begin
|
774
|
+
conn.query("ROLLBACK TO SAVEPOINT #{savepoint_name}")
|
775
|
+
conn.query("RELEASE SAVEPOINT #{savepoint_name}")
|
776
|
+
rescue ::DuckDB::Error
|
777
|
+
# Ignore errors during rollback cleanup
|
778
|
+
end
|
779
|
+
raise e
|
780
|
+
end
|
781
|
+
end
|
782
|
+
|
783
|
+
# Handle transactions with specific isolation levels
|
784
|
+
#
|
785
|
+
# @param opts [Hash] Transaction options including :isolation
|
786
|
+
# @return [Object] Result of the transaction block
|
787
|
+
def isolation_transaction(opts = {})
|
788
|
+
synchronize(opts[:server]) do |conn|
|
789
|
+
# Set isolation level before beginning transaction
|
790
|
+
isolation_sql = case opts[:isolation]
|
791
|
+
when :read_uncommitted
|
792
|
+
"SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED"
|
793
|
+
when :read_committed
|
794
|
+
"SET TRANSACTION ISOLATION LEVEL READ COMMITTED"
|
795
|
+
else
|
796
|
+
raise Sequel::DatabaseError, "Unsupported isolation level: #{opts[:isolation]}"
|
797
|
+
end
|
798
|
+
|
799
|
+
conn.query(isolation_sql)
|
800
|
+
conn.query("BEGIN TRANSACTION")
|
801
|
+
|
802
|
+
# Execute the block
|
803
|
+
result = yield
|
804
|
+
|
805
|
+
# Commit on success
|
806
|
+
conn.query("COMMIT")
|
807
|
+
|
808
|
+
result
|
809
|
+
rescue Sequel::Rollback
|
810
|
+
# Rollback on explicit rollback
|
811
|
+
conn.query("ROLLBACK")
|
812
|
+
nil
|
813
|
+
rescue StandardError => e
|
814
|
+
# Rollback on any other exception
|
815
|
+
begin
|
816
|
+
conn.query("ROLLBACK")
|
817
|
+
rescue ::DuckDB::Error
|
818
|
+
# Ignore errors during rollback cleanup
|
819
|
+
end
|
820
|
+
raise e
|
821
|
+
end
|
822
|
+
end
|
823
|
+
|
824
|
+
# DuckDB-specific schema generation methods
|
825
|
+
|
826
|
+
# Generate SQL for primary key column
|
827
|
+
#
|
828
|
+
# @param column [Symbol] Column name
|
829
|
+
# @param _opts [Hash] Column options
|
830
|
+
# @return [String] SQL for primary key column
|
831
|
+
def primary_key_column_sql(column, _opts)
|
832
|
+
# DuckDB doesn't support AUTOINCREMENT, so we just use INTEGER PRIMARY KEY
|
833
|
+
col_sql = String.new
|
834
|
+
quote_identifier_append(col_sql, column)
|
835
|
+
"#{col_sql} INTEGER PRIMARY KEY"
|
836
|
+
# Don't add AUTOINCREMENT for DuckDB
|
837
|
+
end
|
838
|
+
|
839
|
+
# Override to prevent AUTOINCREMENT from being added
|
840
|
+
def auto_increment_sql
|
841
|
+
""
|
842
|
+
end
|
843
|
+
|
844
|
+
# Generate SQL for auto-incrementing column
|
845
|
+
# DuckDB doesn't support AUTOINCREMENT, use sequences instead
|
846
|
+
#
|
847
|
+
# @param column [Symbol] Column name
|
848
|
+
# @param _opts [Hash] Column options
|
849
|
+
# @return [String] SQL for auto-incrementing column
|
850
|
+
def auto_increment_column_sql(column, _opts)
|
851
|
+
# DuckDB uses sequences for auto-increment, but for primary keys
|
852
|
+
# we can just use INTEGER PRIMARY KEY without AUTOINCREMENT
|
853
|
+
col_sql = String.new
|
854
|
+
quote_identifier_append(col_sql, column)
|
855
|
+
"#{col_sql} INTEGER PRIMARY KEY"
|
856
|
+
end
|
857
|
+
|
858
|
+
# Map Ruby types to DuckDB types
|
859
|
+
#
|
860
|
+
# @param opts [Hash] Column options
|
861
|
+
# @return [String] DuckDB type
|
862
|
+
def type_literal(opts)
|
863
|
+
case opts[:type]
|
864
|
+
when :primary_key, :integer
|
865
|
+
"INTEGER"
|
866
|
+
when :string, :text
|
867
|
+
if opts[:size]
|
868
|
+
"VARCHAR(#{opts[:size]})"
|
869
|
+
else
|
870
|
+
"VARCHAR"
|
871
|
+
end
|
872
|
+
when :bigint
|
873
|
+
"BIGINT"
|
874
|
+
when :float, :real
|
875
|
+
"REAL"
|
876
|
+
when :double
|
877
|
+
"DOUBLE"
|
878
|
+
when :decimal, :numeric
|
879
|
+
if opts[:size]
|
880
|
+
"DECIMAL(#{Array(opts[:size]).join(",")})"
|
881
|
+
else
|
882
|
+
"DECIMAL"
|
883
|
+
end
|
884
|
+
when :boolean
|
885
|
+
"BOOLEAN"
|
886
|
+
when :date
|
887
|
+
"DATE"
|
888
|
+
when :datetime, :timestamp
|
889
|
+
"TIMESTAMP"
|
890
|
+
when :time
|
891
|
+
"TIME"
|
892
|
+
when :blob, :binary
|
893
|
+
"BLOB"
|
894
|
+
else
|
895
|
+
super
|
896
|
+
end
|
897
|
+
end
|
898
|
+
|
899
|
+
# Execute SQL statement against DuckDB connection
|
900
|
+
#
|
901
|
+
# @param conn [::DuckDB::Connection] Database connection (already connected)
|
902
|
+
# @param sql [String] SQL statement to execute
|
903
|
+
# @param params [Array] Parameters for prepared statement
|
904
|
+
# @param _opts [Hash] Options for execution
|
905
|
+
# @return [Object] Result of execution
|
906
|
+
def execute_statement(conn, sql, params = [], _opts = {})
|
907
|
+
# Log the SQL query with timing information (Requirements 8.4, 8.5)
|
908
|
+
start_time = Time.now
|
909
|
+
|
910
|
+
begin
|
911
|
+
# Log the SQL query before execution
|
912
|
+
log_sql_query(sql, params)
|
913
|
+
|
914
|
+
# Handle parameterized queries
|
915
|
+
if params && !params.empty?
|
916
|
+
# Prepare statement with ? placeholders
|
917
|
+
stmt = conn.prepare(sql)
|
918
|
+
|
919
|
+
# Bind parameters using 1-based indexing
|
920
|
+
params.each_with_index do |param, index|
|
921
|
+
stmt.bind(index + 1, param)
|
922
|
+
end
|
923
|
+
|
924
|
+
# Execute the prepared statement
|
925
|
+
result = stmt.execute
|
926
|
+
else
|
927
|
+
# Execute directly without parameters
|
928
|
+
result = conn.query(sql)
|
929
|
+
end
|
930
|
+
|
931
|
+
# Log timing information for the operation
|
932
|
+
end_time = Time.now
|
933
|
+
execution_time = end_time - start_time
|
934
|
+
log_sql_timing(sql, execution_time)
|
935
|
+
|
936
|
+
if block_given?
|
937
|
+
# Get column names from the result
|
938
|
+
columns = result.columns
|
939
|
+
|
940
|
+
# Iterate through each row
|
941
|
+
result.each do |row_array|
|
942
|
+
# Convert array to hash with column names as keys
|
943
|
+
row_hash = {}
|
944
|
+
columns.each_with_index do |column, index|
|
945
|
+
# DuckDB::Column objects have a name method
|
946
|
+
column_name = column.respond_to?(:name) ? column.name : column.to_s
|
947
|
+
row_hash[column_name.to_sym] = row_array[index]
|
948
|
+
end
|
949
|
+
yield row_hash
|
950
|
+
end
|
951
|
+
else
|
952
|
+
result
|
953
|
+
end
|
954
|
+
rescue ::DuckDB::Error => e
|
955
|
+
# Log the error for debugging (Requirement 8.6)
|
956
|
+
end_time = Time.now
|
957
|
+
execution_time = end_time - start_time
|
958
|
+
log_sql_error(sql, params, e, execution_time)
|
959
|
+
|
960
|
+
# Use enhanced error mapping for better exception categorization (Requirements 8.1, 8.2, 8.3, 8.7)
|
961
|
+
error_opts = { sql: sql, params: params }
|
962
|
+
exception_class = database_exception_class(e, error_opts)
|
963
|
+
enhanced_message = database_exception_message(e, error_opts)
|
964
|
+
|
965
|
+
raise exception_class, enhanced_message
|
966
|
+
rescue StandardError => e
|
967
|
+
# Log unexpected errors
|
968
|
+
end_time = Time.now
|
969
|
+
execution_time = end_time - start_time
|
970
|
+
log_sql_error(sql, params, e, execution_time)
|
971
|
+
raise e
|
972
|
+
end
|
973
|
+
end
|
974
|
+
|
975
|
+
# Log SQL query execution (Requirement 8.4)
|
976
|
+
#
|
977
|
+
# @param sql [String] SQL statement
|
978
|
+
# @param params [Array] Parameters for the query
|
979
|
+
def log_sql_query(sql, params = [])
|
980
|
+
return unless log_connection_info?
|
981
|
+
|
982
|
+
if params && !params.empty?
|
983
|
+
# Log parameterized query with parameters
|
984
|
+
log_info("SQL Query: #{sql} -- Parameters: #{params.inspect}")
|
985
|
+
else
|
986
|
+
# Log simple query
|
987
|
+
log_info("SQL Query: #{sql}")
|
988
|
+
end
|
989
|
+
end
|
990
|
+
|
991
|
+
# Log SQL query timing information (Requirement 8.5)
|
992
|
+
#
|
993
|
+
# @param sql [String] SQL statement
|
994
|
+
# @param execution_time [Float] Time taken to execute in seconds
|
995
|
+
def log_sql_timing(sql, execution_time)
|
996
|
+
return unless log_connection_info?
|
997
|
+
|
998
|
+
# Log timing information, highlighting slow operations
|
999
|
+
time_ms = (execution_time * 1000).round(2)
|
1000
|
+
|
1001
|
+
if execution_time > 1.0 # Log slow operations (> 1 second) as warnings
|
1002
|
+
log_warn("SLOW SQL Query (#{time_ms}ms): #{sql}")
|
1003
|
+
else
|
1004
|
+
log_info("SQL Query completed in #{time_ms}ms")
|
1005
|
+
end
|
1006
|
+
end
|
1007
|
+
|
1008
|
+
# Log SQL query errors (Requirement 8.6)
|
1009
|
+
#
|
1010
|
+
# @param sql [String] SQL statement that failed
|
1011
|
+
# @param params [Array] Parameters for the query
|
1012
|
+
# @param error [Exception] The error that occurred
|
1013
|
+
# @param execution_time [Float] Time taken before error
|
1014
|
+
def log_sql_error(sql, params, error, execution_time)
|
1015
|
+
return unless log_connection_info?
|
1016
|
+
|
1017
|
+
time_ms = (execution_time * 1000).round(2)
|
1018
|
+
|
1019
|
+
if params && !params.empty?
|
1020
|
+
log_error("SQL Error after #{time_ms}ms: #{error.message} -- SQL: #{sql} -- Parameters: #{params.inspect}")
|
1021
|
+
else
|
1022
|
+
log_error("SQL Error after #{time_ms}ms: #{error.message} -- SQL: #{sql}")
|
1023
|
+
end
|
1024
|
+
end
|
1025
|
+
|
1026
|
+
# Check if connection info should be logged
|
1027
|
+
#
|
1028
|
+
# @return [Boolean] true if logging is enabled
|
1029
|
+
def log_connection_info?
|
1030
|
+
# Use Sequel's built-in logging mechanism
|
1031
|
+
!loggers.empty?
|
1032
|
+
end
|
1033
|
+
|
1034
|
+
# Log info message using Sequel's logging system
|
1035
|
+
#
|
1036
|
+
# @param message [String] Message to log
|
1037
|
+
def log_info(message)
|
1038
|
+
log_connection_yield(message, nil) { nil }
|
1039
|
+
end
|
1040
|
+
|
1041
|
+
# Log warning message using Sequel's logging system
|
1042
|
+
#
|
1043
|
+
# @param message [String] Message to log
|
1044
|
+
def log_warn(message)
|
1045
|
+
log_connection_yield("WARNING: #{message}", nil) { nil }
|
1046
|
+
end
|
1047
|
+
|
1048
|
+
# Log error message using Sequel's logging system
|
1049
|
+
#
|
1050
|
+
# @param message [String] Message to log
|
1051
|
+
def log_error(message)
|
1052
|
+
log_connection_yield("ERROR: #{message}", nil) { nil }
|
1053
|
+
end
|
1054
|
+
|
1055
|
+
public
|
1056
|
+
|
1057
|
+
# EXPLAIN functionality access for query plans (Requirement 9.6)
|
1058
|
+
#
|
1059
|
+
# @param sql [String] SQL query to explain
|
1060
|
+
# @return [Array<Hash>] Query plan information
|
1061
|
+
def explain_query(sql)
|
1062
|
+
explain_sql = "EXPLAIN #{sql}"
|
1063
|
+
plan_rows = []
|
1064
|
+
|
1065
|
+
execute(explain_sql) do |row|
|
1066
|
+
plan_rows << row
|
1067
|
+
end
|
1068
|
+
|
1069
|
+
plan_rows
|
1070
|
+
end
|
1071
|
+
|
1072
|
+
# Get query plan for a SQL statement
|
1073
|
+
#
|
1074
|
+
# @param sql [String] SQL statement to analyze
|
1075
|
+
# @return [String] Query plan as string
|
1076
|
+
def query_plan(sql)
|
1077
|
+
plan_rows = explain_query(sql)
|
1078
|
+
|
1079
|
+
if plan_rows.empty?
|
1080
|
+
"No query plan available"
|
1081
|
+
else
|
1082
|
+
# Format the plan rows into a readable string
|
1083
|
+
plan_rows.map { |row| row.values.join(" | ") }.join("\n")
|
1084
|
+
end
|
1085
|
+
end
|
1086
|
+
|
1087
|
+
# Check if EXPLAIN functionality is supported
|
1088
|
+
#
|
1089
|
+
# @return [Boolean] true if EXPLAIN is supported
|
1090
|
+
def supports_explain?
|
1091
|
+
true # DuckDB supports EXPLAIN
|
1092
|
+
end
|
1093
|
+
|
1094
|
+
# Get detailed query analysis information
|
1095
|
+
#
|
1096
|
+
# @param sql [String] SQL statement to analyze
|
1097
|
+
# @return [Hash] Analysis information including plan, timing estimates, etc.
|
1098
|
+
def analyze_query(sql)
|
1099
|
+
{
|
1100
|
+
plan: query_plan(sql),
|
1101
|
+
explain_output: explain_query(sql),
|
1102
|
+
supports_explain: supports_explain?
|
1103
|
+
}
|
1104
|
+
end
|
1105
|
+
|
1106
|
+
# DuckDB configuration methods for performance optimization
|
1107
|
+
|
1108
|
+
# Set DuckDB configuration value
|
1109
|
+
#
|
1110
|
+
# @param key [String] Configuration key
|
1111
|
+
# @param value [Object] Configuration value
|
1112
|
+
def set_config_value(key, value)
|
1113
|
+
synchronize do |conn|
|
1114
|
+
# Use PRAGMA for DuckDB configuration
|
1115
|
+
conn.query("PRAGMA #{key} = #{value}")
|
1116
|
+
end
|
1117
|
+
end
|
1118
|
+
|
1119
|
+
# Get DuckDB configuration value
|
1120
|
+
#
|
1121
|
+
# @param key [String] Configuration key
|
1122
|
+
# @return [Object] Configuration value
|
1123
|
+
def get_config_value(key)
|
1124
|
+
result = nil
|
1125
|
+
synchronize do |conn|
|
1126
|
+
# Use PRAGMA to get configuration values
|
1127
|
+
conn.query("PRAGMA #{key}") do |row|
|
1128
|
+
result = row.values.first
|
1129
|
+
break
|
1130
|
+
end
|
1131
|
+
end
|
1132
|
+
result
|
1133
|
+
end
|
1134
|
+
|
1135
|
+
# Configure DuckDB for optimal parallel execution
|
1136
|
+
#
|
1137
|
+
# @param thread_count [Integer] Number of threads to use
|
1138
|
+
def configure_parallel_execution(thread_count = nil)
|
1139
|
+
thread_count ||= [4, cpu_count].min
|
1140
|
+
|
1141
|
+
set_config_value("threads", thread_count)
|
1142
|
+
set_config_value("enable_optimizer", true)
|
1143
|
+
set_config_value("enable_profiling", false) # Disable for performance
|
1144
|
+
end
|
1145
|
+
|
1146
|
+
# Configure DuckDB for memory-efficient operations
|
1147
|
+
#
|
1148
|
+
# @param memory_limit [String] Memory limit (e.g., "1GB", "512MB")
|
1149
|
+
def configure_memory_optimization(memory_limit = "1GB")
|
1150
|
+
set_config_value("memory_limit", "'#{memory_limit}'")
|
1151
|
+
set_config_value("temp_directory", "'/tmp'")
|
1152
|
+
end
|
1153
|
+
|
1154
|
+
# Configure DuckDB for columnar storage optimization
|
1155
|
+
def configure_columnar_optimization
|
1156
|
+
set_config_value("enable_optimizer", true)
|
1157
|
+
set_config_value("enable_profiling", false)
|
1158
|
+
set_config_value("enable_progress_bar", false)
|
1159
|
+
end
|
1160
|
+
|
1161
|
+
private
|
1162
|
+
|
1163
|
+
# Get CPU count for parallel execution configuration
|
1164
|
+
def cpu_count
|
1165
|
+
require "etc"
|
1166
|
+
Etc.nprocessors
|
1167
|
+
rescue StandardError
|
1168
|
+
4 # Default fallback
|
1169
|
+
end
|
1170
|
+
|
1171
|
+
# Type conversion methods for DuckDB-specific handling
|
1172
|
+
|
1173
|
+
# Convert DuckDB TIME values to Ruby time-only objects
|
1174
|
+
# DuckDB TIME columns should only contain time-of-day information
|
1175
|
+
def typecast_value_time(value)
|
1176
|
+
case value
|
1177
|
+
when Time
|
1178
|
+
# Extract only the time portion, discarding date information
|
1179
|
+
# Create a new Time object with today's date but the original time
|
1180
|
+
Time.local(1970, 1, 1, value.hour, value.min, value.sec, value.usec)
|
1181
|
+
when String
|
1182
|
+
# Parse time string and create time-only object
|
1183
|
+
if value =~ /\A(\d{1,2}):(\d{2}):(\d{2})(?:\.(\d+))?\z/
|
1184
|
+
hour = ::Regexp.last_match(1).to_i
|
1185
|
+
min = ::Regexp.last_match(2).to_i
|
1186
|
+
sec = ::Regexp.last_match(3).to_i
|
1187
|
+
usec = (::Regexp.last_match(4) || "0").ljust(6, "0").to_i
|
1188
|
+
Time.local(1970, 1, 1, hour, min, sec, usec)
|
1189
|
+
else
|
1190
|
+
# Fallback: parse as time and extract time portion
|
1191
|
+
parsed = Time.parse(value.to_s)
|
1192
|
+
Time.local(1970, 1, 1, parsed.hour, parsed.min, parsed.sec, parsed.usec)
|
1193
|
+
end
|
1194
|
+
else
|
1195
|
+
value
|
1196
|
+
end
|
1197
|
+
end
|
1198
|
+
|
1199
|
+
# Override the default type conversion to use our custom TIME handling
|
1200
|
+
# This method needs to be public for Sequel models to access it
|
1201
|
+
public
|
1202
|
+
|
1203
|
+
def typecast_value(column, value)
|
1204
|
+
return value if value.nil?
|
1205
|
+
|
1206
|
+
# Get column schema information to determine the correct type
|
1207
|
+
if @schema_cache && @schema_cache[column]
|
1208
|
+
column_type = @schema_cache[column][:type]
|
1209
|
+
case column_type
|
1210
|
+
when :time
|
1211
|
+
return typecast_value_time(value)
|
1212
|
+
end
|
1213
|
+
end
|
1214
|
+
|
1215
|
+
# Fall back to default Sequel type conversion
|
1216
|
+
super
|
1217
|
+
end
|
1218
|
+
end
|
1219
|
+
|
1220
|
+
# DatasetMethods module provides shared dataset functionality for DuckDB adapter
|
1221
|
+
# This module is included by the main Dataset class to provide SQL generation
|
1222
|
+
# and query execution capabilities.
|
1223
|
+
module DatasetMethods
|
1224
|
+
# DuckDB reserved words that must be quoted
|
1225
|
+
DUCKDB_RESERVED_WORDS = %w[
|
1226
|
+
order group select from where having limit offset union all distinct
|
1227
|
+
case when then else end and or not in like between is null true false
|
1228
|
+
join inner left right full outer on using as with recursive
|
1229
|
+
create table view index drop alter insert update delete
|
1230
|
+
primary key foreign references constraint unique check default
|
1231
|
+
auto_increment serial bigserial smallserial
|
1232
|
+
integer int bigint smallint tinyint boolean bool
|
1233
|
+
varchar char text string blob
|
1234
|
+
date time timestamp datetime interval
|
1235
|
+
float double real decimal numeric
|
1236
|
+
array struct map
|
1237
|
+
].freeze
|
1238
|
+
|
1239
|
+
private
|
1240
|
+
|
1241
|
+
# DuckDB uses lowercase identifiers
|
1242
|
+
def input_identifier(value)
|
1243
|
+
value.to_s
|
1244
|
+
end
|
1245
|
+
|
1246
|
+
# DuckDB uses lowercase identifiers
|
1247
|
+
def output_identifier(value)
|
1248
|
+
value == "" ? :untitled : value.to_sym
|
1249
|
+
end
|
1250
|
+
|
1251
|
+
public
|
1252
|
+
|
1253
|
+
# Delegate quote_identifiers_default to the database
|
1254
|
+
def quote_identifiers_default
|
1255
|
+
db.quote_identifiers_default
|
1256
|
+
end
|
1257
|
+
|
1258
|
+
# Check if an identifier needs quoting
|
1259
|
+
def identifier_needs_quoting?(name)
|
1260
|
+
return true if super
|
1261
|
+
|
1262
|
+
DUCKDB_RESERVED_WORDS.include?(name.to_s.downcase)
|
1263
|
+
end
|
1264
|
+
|
1265
|
+
# Generate INSERT SQL statement
|
1266
|
+
#
|
1267
|
+
# @param values [Hash, Array] Values to insert
|
1268
|
+
# @return [String] The INSERT SQL statement
|
1269
|
+
def insert_sql(*values)
|
1270
|
+
return @opts[:sql] if @opts[:sql]
|
1271
|
+
|
1272
|
+
# Handle empty values case
|
1273
|
+
if values.empty? || (values.length == 1 && values.first.empty?)
|
1274
|
+
return "INSERT INTO #{table_name_sql} DEFAULT VALUES"
|
1275
|
+
end
|
1276
|
+
|
1277
|
+
# Handle single hash of values
|
1278
|
+
if values.length == 1 && values.first.is_a?(Hash)
|
1279
|
+
values_hash = values.first
|
1280
|
+
columns = values_hash.keys
|
1281
|
+
column_list = literal(columns)
|
1282
|
+
values_list = literal(columns.map { |k| values_hash[k] })
|
1283
|
+
|
1284
|
+
return "INSERT INTO #{table_name_sql} #{column_list} VALUES #{values_list}"
|
1285
|
+
end
|
1286
|
+
|
1287
|
+
# Handle array of hashes (multiple records)
|
1288
|
+
if values.length == 1 && values.first.is_a?(Array)
|
1289
|
+
records = values.first
|
1290
|
+
return "INSERT INTO #{table_name_sql} DEFAULT VALUES" if records.empty?
|
1291
|
+
|
1292
|
+
first_record = records.first
|
1293
|
+
columns = first_record.keys
|
1294
|
+
column_list = literal(columns)
|
1295
|
+
|
1296
|
+
values_lists = records.map do |record|
|
1297
|
+
literal(columns.map { |k| record[k] })
|
1298
|
+
end
|
1299
|
+
|
1300
|
+
return "INSERT INTO #{table_name_sql} #{column_list} VALUES #{values_lists.join(", ")}"
|
1301
|
+
end
|
1302
|
+
|
1303
|
+
# Fallback for other cases
|
1304
|
+
"INSERT INTO #{table_name_sql} DEFAULT VALUES"
|
1305
|
+
end
|
1306
|
+
|
1307
|
+
# Generate UPDATE SQL statement
|
1308
|
+
#
|
1309
|
+
# @param values [Hash] Values to update
|
1310
|
+
# @return [String] The UPDATE SQL statement
|
1311
|
+
def update_sql(values = {})
|
1312
|
+
return @opts[:sql] if @opts[:sql]
|
1313
|
+
|
1314
|
+
sql = "UPDATE #{table_name_sql} SET "
|
1315
|
+
|
1316
|
+
# Add SET clause
|
1317
|
+
set_clauses = values.map do |column, value|
|
1318
|
+
col_sql = String.new
|
1319
|
+
quote_identifier_append(col_sql, column)
|
1320
|
+
"#{col_sql} = #{literal(value)}"
|
1321
|
+
end
|
1322
|
+
sql << set_clauses.join(", ")
|
1323
|
+
|
1324
|
+
# Add WHERE clause
|
1325
|
+
select_where_sql(sql) if @opts[:where]
|
1326
|
+
|
1327
|
+
sql
|
1328
|
+
end
|
1329
|
+
|
1330
|
+
# Generate DELETE SQL statement
|
1331
|
+
#
|
1332
|
+
# @return [String] The DELETE SQL statement
|
1333
|
+
def delete_sql
|
1334
|
+
return @opts[:sql] if @opts[:sql]
|
1335
|
+
|
1336
|
+
sql = "DELETE FROM #{table_name_sql}"
|
1337
|
+
|
1338
|
+
# Add WHERE clause
|
1339
|
+
select_where_sql(sql) if @opts[:where]
|
1340
|
+
|
1341
|
+
sql
|
1342
|
+
end
|
1343
|
+
|
1344
|
+
# DuckDB capability flags
|
1345
|
+
def supports_window_functions?
|
1346
|
+
true
|
1347
|
+
end
|
1348
|
+
|
1349
|
+
def supports_cte?
|
1350
|
+
true
|
1351
|
+
end
|
1352
|
+
|
1353
|
+
def supports_returning?(_type = nil)
|
1354
|
+
false
|
1355
|
+
end
|
1356
|
+
|
1357
|
+
def supports_select_all_and_offset?
|
1358
|
+
true
|
1359
|
+
end
|
1360
|
+
|
1361
|
+
def supports_join_using?
|
1362
|
+
true
|
1363
|
+
end
|
1364
|
+
|
1365
|
+
# Validate table name for SELECT operations
|
1366
|
+
def validate_table_name_for_select
|
1367
|
+
return unless @opts[:from] # Skip if no FROM clause
|
1368
|
+
|
1369
|
+
@opts[:from].each do |table|
|
1370
|
+
if table.nil? || (table.respond_to?(:to_s) && table.to_s.strip.empty?)
|
1371
|
+
raise ArgumentError,
|
1372
|
+
"Table name cannot be nil or empty"
|
1373
|
+
end
|
1374
|
+
end
|
1375
|
+
end
|
1376
|
+
|
1377
|
+
# Check if a word is a SQL reserved word that needs quoting
|
1378
|
+
def reserved_word?(word)
|
1379
|
+
%w[order group select from where having limit offset].include?(word.downcase)
|
1380
|
+
end
|
1381
|
+
|
1382
|
+
# Get properly quoted table name
|
1383
|
+
def table_name_sql
|
1384
|
+
raise ArgumentError, "Table name cannot be nil or empty" if @opts[:from].nil? || @opts[:from].empty?
|
1385
|
+
|
1386
|
+
# Check if the table name is nil
|
1387
|
+
table_name = @opts[:from].first
|
1388
|
+
raise ArgumentError, "Table name cannot be nil" if table_name.nil?
|
1389
|
+
|
1390
|
+
table_name = table_name.to_s
|
1391
|
+
raise ArgumentError, "Table name cannot be empty" if table_name.empty?
|
1392
|
+
|
1393
|
+
# Use quote_identifier_append to respect quote_identifiers? setting
|
1394
|
+
sql = String.new
|
1395
|
+
quote_identifier_append(sql, table_name)
|
1396
|
+
sql
|
1397
|
+
end
|
1398
|
+
|
1399
|
+
private
|
1400
|
+
|
1401
|
+
# Override the WITH clause generation to support RECURSIVE keyword
|
1402
|
+
def select_with_sql(sql)
|
1403
|
+
return unless opts[:with]
|
1404
|
+
|
1405
|
+
# Check if any WITH clause is recursive (either explicitly marked or auto-detected)
|
1406
|
+
has_recursive = opts[:with].any? { |w| w[:recursive] || cte_is_recursive?(w) }
|
1407
|
+
|
1408
|
+
# Add WITH or WITH RECURSIVE prefix
|
1409
|
+
sql << (has_recursive ? "WITH RECURSIVE " : "WITH ")
|
1410
|
+
|
1411
|
+
# Add each CTE
|
1412
|
+
opts[:with].each_with_index do |w, i|
|
1413
|
+
sql << ", " if i.positive?
|
1414
|
+
name_sql = String.new
|
1415
|
+
quote_identifier_append(name_sql, w[:name])
|
1416
|
+
sql << "#{name_sql} AS (#{w[:dataset].sql})"
|
1417
|
+
end
|
1418
|
+
|
1419
|
+
sql << " "
|
1420
|
+
end
|
1421
|
+
|
1422
|
+
# Auto-detect if a CTE is recursive by analyzing its SQL for self-references
|
1423
|
+
#
|
1424
|
+
# @param cte_info [Hash] CTE information hash with :name and :dataset
|
1425
|
+
# @return [Boolean] true if the CTE appears to be recursive
|
1426
|
+
def cte_is_recursive?(cte_info)
|
1427
|
+
return false unless cte_info[:dataset]
|
1428
|
+
|
1429
|
+
cte_name = cte_info[:name].to_s
|
1430
|
+
cte_sql = cte_info[:dataset].sql
|
1431
|
+
|
1432
|
+
# Check if the CTE SQL contains references to its own name
|
1433
|
+
# Look for patterns like "FROM table_name" or "JOIN table_name"
|
1434
|
+
# Use word boundaries to avoid false positives with partial matches
|
1435
|
+
recursive_pattern = /\b(?:FROM|JOIN)\s+#{Regexp.escape(cte_name)}\b/i
|
1436
|
+
|
1437
|
+
cte_sql.match?(recursive_pattern)
|
1438
|
+
end
|
1439
|
+
|
1440
|
+
public
|
1441
|
+
|
1442
|
+
# Override select_from_sql to validate table names
|
1443
|
+
def select_from_sql(sql)
|
1444
|
+
if (f = @opts[:from])
|
1445
|
+
# Validate that no table names are nil
|
1446
|
+
f.each do |table|
|
1447
|
+
raise ArgumentError, "Table name cannot be nil" if table.nil?
|
1448
|
+
end
|
1449
|
+
end
|
1450
|
+
|
1451
|
+
# Call parent implementation
|
1452
|
+
super
|
1453
|
+
end
|
1454
|
+
|
1455
|
+
# Add JOIN clauses to SQL (Requirement 6.9)
|
1456
|
+
def select_join_sql(sql)
|
1457
|
+
return unless @opts[:join]
|
1458
|
+
|
1459
|
+
@opts[:join].each do |join| # rubocop:disable Metrics/BlockLength
|
1460
|
+
# Handle different join clause types
|
1461
|
+
case join
|
1462
|
+
when Sequel::SQL::JoinOnClause
|
1463
|
+
join_type = join.join_type || :inner
|
1464
|
+
table = join.table
|
1465
|
+
conditions = join.on
|
1466
|
+
|
1467
|
+
# Format join type
|
1468
|
+
join_clause = case join_type
|
1469
|
+
when :left, :left_outer
|
1470
|
+
"LEFT JOIN"
|
1471
|
+
when :right, :right_outer
|
1472
|
+
"RIGHT JOIN"
|
1473
|
+
when :full, :full_outer
|
1474
|
+
"FULL JOIN"
|
1475
|
+
else
|
1476
|
+
# when :inner
|
1477
|
+
"INNER JOIN"
|
1478
|
+
end
|
1479
|
+
|
1480
|
+
sql << " #{join_clause} "
|
1481
|
+
|
1482
|
+
# Add table name
|
1483
|
+
sql << if table.is_a?(Sequel::Dataset)
|
1484
|
+
alias_sql = String.new
|
1485
|
+
quote_identifier_append(alias_sql, join.table_alias || "subquery")
|
1486
|
+
"(#{table.sql}) AS #{alias_sql}"
|
1487
|
+
else
|
1488
|
+
literal(table)
|
1489
|
+
end
|
1490
|
+
|
1491
|
+
# Add ON conditions
|
1492
|
+
if conditions
|
1493
|
+
sql << " ON "
|
1494
|
+
literal_append(sql, conditions)
|
1495
|
+
end
|
1496
|
+
|
1497
|
+
when Sequel::SQL::JoinUsingClause
|
1498
|
+
join_type = join.join_type || :inner
|
1499
|
+
table = join.table
|
1500
|
+
using_columns = join.using
|
1501
|
+
|
1502
|
+
join_clause = case join_type
|
1503
|
+
when :left, :left_outer
|
1504
|
+
"LEFT JOIN"
|
1505
|
+
when :right, :right_outer
|
1506
|
+
"RIGHT JOIN"
|
1507
|
+
when :full, :full_outer
|
1508
|
+
"FULL JOIN"
|
1509
|
+
else
|
1510
|
+
# when :inner
|
1511
|
+
"INNER JOIN"
|
1512
|
+
end
|
1513
|
+
|
1514
|
+
sql << " #{join_clause} "
|
1515
|
+
|
1516
|
+
# Handle table with alias
|
1517
|
+
sql << if table.is_a?(Sequel::Dataset)
|
1518
|
+
# Subquery with alias
|
1519
|
+
"(#{table.sql})"
|
1520
|
+
else
|
1521
|
+
# Regular table (may have alias)
|
1522
|
+
literal(table)
|
1523
|
+
# Add alias if present
|
1524
|
+
end
|
1525
|
+
if join.table_alias
|
1526
|
+
sql << " AS "
|
1527
|
+
quote_identifier_append(sql, join.table_alias)
|
1528
|
+
end
|
1529
|
+
|
1530
|
+
if using_columns
|
1531
|
+
sql << " USING ("
|
1532
|
+
Array(using_columns).each_with_index do |col, i|
|
1533
|
+
sql << ", " if i.positive?
|
1534
|
+
quote_identifier_append(sql, col)
|
1535
|
+
end
|
1536
|
+
sql << ")"
|
1537
|
+
end
|
1538
|
+
|
1539
|
+
when Sequel::SQL::JoinClause
|
1540
|
+
join_type = join.join_type || :inner
|
1541
|
+
table = join.table
|
1542
|
+
|
1543
|
+
join_clause = case join_type
|
1544
|
+
when :cross
|
1545
|
+
"CROSS JOIN"
|
1546
|
+
when :natural
|
1547
|
+
"NATURAL JOIN"
|
1548
|
+
else
|
1549
|
+
"INNER JOIN"
|
1550
|
+
end
|
1551
|
+
|
1552
|
+
sql << " #{join_clause} "
|
1553
|
+
sql << literal(table)
|
1554
|
+
end
|
1555
|
+
end
|
1556
|
+
end
|
1557
|
+
|
1558
|
+
# Add WHERE clause to SQL (enhanced for complex conditions - Requirement 6.4)
|
1559
|
+
def select_where_sql(sql)
|
1560
|
+
return unless @opts[:where]
|
1561
|
+
|
1562
|
+
sql << " WHERE "
|
1563
|
+
literal_append(sql, @opts[:where])
|
1564
|
+
end
|
1565
|
+
|
1566
|
+
# Add GROUP BY clause to SQL (Requirement 6.7)
|
1567
|
+
def select_group_sql(sql)
|
1568
|
+
return unless @opts[:group]
|
1569
|
+
|
1570
|
+
sql << " GROUP BY "
|
1571
|
+
if @opts[:group].is_a?(Array)
|
1572
|
+
sql << @opts[:group].map { |col| literal(col) }.join(", ")
|
1573
|
+
else
|
1574
|
+
literal_append(sql, @opts[:group])
|
1575
|
+
end
|
1576
|
+
end
|
1577
|
+
|
1578
|
+
# Add HAVING clause to SQL (Requirement 6.8)
|
1579
|
+
def select_having_sql(sql)
|
1580
|
+
return unless @opts[:having]
|
1581
|
+
|
1582
|
+
sql << " HAVING "
|
1583
|
+
literal_append(sql, @opts[:having])
|
1584
|
+
end
|
1585
|
+
|
1586
|
+
# Add ORDER BY clause to SQL (enhanced - Requirement 6.5)
|
1587
|
+
def select_order_sql(sql)
|
1588
|
+
return unless @opts[:order]
|
1589
|
+
|
1590
|
+
sql << " ORDER BY "
|
1591
|
+
sql << if @opts[:order].is_a?(Array)
|
1592
|
+
@opts[:order].map { |col| order_column_sql(col) }.join(", ")
|
1593
|
+
else
|
1594
|
+
order_column_sql(@opts[:order])
|
1595
|
+
end
|
1596
|
+
end
|
1597
|
+
|
1598
|
+
# Format individual ORDER BY column
|
1599
|
+
def order_column_sql(column)
|
1600
|
+
case column
|
1601
|
+
when Sequel::SQL::OrderedExpression
|
1602
|
+
col_sql = literal(column.expression)
|
1603
|
+
col_sql << (column.descending ? " DESC" : " ASC")
|
1604
|
+
# Check if nulls option exists (may not be available in all Sequel versions)
|
1605
|
+
if column.respond_to?(:nulls) && column.nulls
|
1606
|
+
col_sql << (column.nulls == :first ? " NULLS FIRST" : " NULLS LAST")
|
1607
|
+
end
|
1608
|
+
col_sql
|
1609
|
+
else
|
1610
|
+
literal(column)
|
1611
|
+
end
|
1612
|
+
end
|
1613
|
+
|
1614
|
+
# DuckDB-specific SQL generation enhancements
|
1615
|
+
|
1616
|
+
# Override complex_expression_sql_append for DuckDB-specific handling
|
1617
|
+
|
1618
|
+
def complex_expression_sql_append(sql, operator, args)
|
1619
|
+
case operator
|
1620
|
+
when :LIKE
|
1621
|
+
# Generate clean LIKE without ESCAPE clause (Requirement 1.1)
|
1622
|
+
sql << "("
|
1623
|
+
literal_append(sql, args.first)
|
1624
|
+
sql << " LIKE "
|
1625
|
+
literal_append(sql, args.last)
|
1626
|
+
sql << ")"
|
1627
|
+
when :"NOT LIKE"
|
1628
|
+
# Generate clean NOT LIKE without ESCAPE clause (Requirement 1.1)
|
1629
|
+
sql << "("
|
1630
|
+
literal_append(sql, args.first)
|
1631
|
+
sql << " NOT LIKE "
|
1632
|
+
literal_append(sql, args.last)
|
1633
|
+
sql << ")"
|
1634
|
+
when :ILIKE
|
1635
|
+
# DuckDB doesn't have ILIKE, use UPPER() workaround with proper parentheses (Requirement 1.3)
|
1636
|
+
sql << "(UPPER("
|
1637
|
+
literal_append(sql, args.first)
|
1638
|
+
sql << ") LIKE UPPER("
|
1639
|
+
literal_append(sql, args.last)
|
1640
|
+
sql << "))"
|
1641
|
+
when :"NOT ILIKE"
|
1642
|
+
# Generate clean NOT ILIKE without ESCAPE clause (Requirement 1.3)
|
1643
|
+
sql << "(UPPER("
|
1644
|
+
literal_append(sql, args.first)
|
1645
|
+
sql << ") NOT LIKE UPPER("
|
1646
|
+
literal_append(sql, args.last)
|
1647
|
+
sql << "))"
|
1648
|
+
when :~
|
1649
|
+
# Regular expression matching for DuckDB with proper parentheses (Requirement 4.1, 4.3)
|
1650
|
+
# DuckDB's ~ operator has limitations with anchors, so we use regexp_matches for reliability
|
1651
|
+
sql << "(regexp_matches("
|
1652
|
+
literal_append(sql, args.first)
|
1653
|
+
sql << ", "
|
1654
|
+
literal_append(sql, args.last)
|
1655
|
+
sql << "))"
|
1656
|
+
when :"~*"
|
1657
|
+
# Case-insensitive regular expression matching for DuckDB (Requirement 4.2)
|
1658
|
+
# Use regexp_matches with case-insensitive flag
|
1659
|
+
sql << "(regexp_matches("
|
1660
|
+
literal_append(sql, args.first)
|
1661
|
+
sql << ", "
|
1662
|
+
literal_append(sql, args.last)
|
1663
|
+
sql << ", 'i'))"
|
1664
|
+
else
|
1665
|
+
super
|
1666
|
+
end
|
1667
|
+
end
|
1668
|
+
|
1669
|
+
# Override join method to support USING clause syntax
|
1670
|
+
def join(table, expr = nil, options = {})
|
1671
|
+
# Handle the case where using parameter is passed
|
1672
|
+
if options.is_a?(Hash) && options[:using]
|
1673
|
+
using_columns = Array(options[:using])
|
1674
|
+
join_type = options[:type] || :inner
|
1675
|
+
join_clause = Sequel::SQL::JoinUsingClause.new(using_columns, join_type, table)
|
1676
|
+
clone(join: (@opts[:join] || []) + [join_clause])
|
1677
|
+
else
|
1678
|
+
# Fall back to standard Sequel join behavior
|
1679
|
+
super
|
1680
|
+
end
|
1681
|
+
end
|
1682
|
+
|
1683
|
+
# Override literal methods for DuckDB-specific formatting
|
1684
|
+
def literal_string_append(sql, string)
|
1685
|
+
sql << "'" << string.gsub("'", "''") << "'"
|
1686
|
+
end
|
1687
|
+
|
1688
|
+
def literal_date(date)
|
1689
|
+
"'#{date.strftime("%Y-%m-%d")}'"
|
1690
|
+
end
|
1691
|
+
|
1692
|
+
def literal_datetime(datetime)
|
1693
|
+
"'#{datetime.strftime("%Y-%m-%d %H:%M:%S")}'"
|
1694
|
+
end
|
1695
|
+
|
1696
|
+
def literal_time(time)
|
1697
|
+
"'#{time.strftime("%H:%M:%S")}'"
|
1698
|
+
end
|
1699
|
+
|
1700
|
+
# Override symbol literal handling to prevent asterisk from being quoted
|
1701
|
+
# This fixes count(*) function calls which should not quote the asterisk
|
1702
|
+
def literal_symbol_append(sql, value)
|
1703
|
+
# Special case for asterisk - don't quote it
|
1704
|
+
if value == :*
|
1705
|
+
sql << "*"
|
1706
|
+
else
|
1707
|
+
# Use standard Sequel symbol handling for all other symbols
|
1708
|
+
super
|
1709
|
+
end
|
1710
|
+
end
|
1711
|
+
|
1712
|
+
def literal_boolean(value)
|
1713
|
+
value ? "TRUE" : "FALSE"
|
1714
|
+
end
|
1715
|
+
|
1716
|
+
def literal_true
|
1717
|
+
"TRUE"
|
1718
|
+
end
|
1719
|
+
|
1720
|
+
def literal_false
|
1721
|
+
"FALSE"
|
1722
|
+
end
|
1723
|
+
|
1724
|
+
# Override literal_append to handle DuckDB-specific type conversions
|
1725
|
+
# Only handles cases that differ from Sequel's default behavior
|
1726
|
+
def literal_append(sql, value)
|
1727
|
+
case value
|
1728
|
+
when Time
|
1729
|
+
# Special handling for time-only values (year 1970 indicates time-only)
|
1730
|
+
if value.year == 1970 && value.month == 1 && value.day == 1
|
1731
|
+
# This is a time-only value, use TIME format
|
1732
|
+
sql << "'#{value.strftime("%H:%M:%S")}'"
|
1733
|
+
else
|
1734
|
+
# Use our custom datetime formatting for consistency
|
1735
|
+
literal_datetime_append(sql, value)
|
1736
|
+
end
|
1737
|
+
when DateTime
|
1738
|
+
# Use our custom datetime formatting for consistency
|
1739
|
+
literal_datetime_append(sql, value)
|
1740
|
+
when String
|
1741
|
+
# Only handle binary data differently for DuckDB's hex format
|
1742
|
+
if value.encoding == Encoding::ASCII_8BIT
|
1743
|
+
literal_blob_append(sql, value)
|
1744
|
+
else
|
1745
|
+
# Let Sequel handle LiteralString and regular strings
|
1746
|
+
super
|
1747
|
+
end
|
1748
|
+
else
|
1749
|
+
super
|
1750
|
+
end
|
1751
|
+
end
|
1752
|
+
|
1753
|
+
# Helper method for datetime literal appending
|
1754
|
+
def literal_datetime_append(sql, datetime)
|
1755
|
+
sql << "'#{datetime.strftime("%Y-%m-%d %H:%M:%S")}'"
|
1756
|
+
end
|
1757
|
+
|
1758
|
+
# Helper method for binary data literal appending
|
1759
|
+
def literal_blob_append(sql, blob)
|
1760
|
+
# DuckDB expects BLOB literals in hex format without \x prefix
|
1761
|
+
sql << "'#{blob.unpack1("H*")}'"
|
1762
|
+
end
|
1763
|
+
|
1764
|
+
# Literal conversion for binary data (BLOB type)
|
1765
|
+
def literal_blob(blob)
|
1766
|
+
"'#{blob.unpack1("H*")}'"
|
1767
|
+
end
|
1768
|
+
|
1769
|
+
# Dataset operation methods (Requirements 6.1, 6.2, 6.3, 9.5)
|
1770
|
+
|
1771
|
+
# Override all method to ensure proper model instantiation
|
1772
|
+
# Sequel's default all method doesn't always apply row_proc correctly
|
1773
|
+
def all
|
1774
|
+
records = []
|
1775
|
+
fetch_rows(select_sql) do |row|
|
1776
|
+
# Apply row_proc if it exists (for model instantiation)
|
1777
|
+
row_proc = @row_proc || opts[:row_proc]
|
1778
|
+
processed_row = row_proc ? row_proc.call(row) : row
|
1779
|
+
records << processed_row
|
1780
|
+
end
|
1781
|
+
records
|
1782
|
+
end
|
1783
|
+
|
1784
|
+
# Insert a record into the dataset's table
|
1785
|
+
#
|
1786
|
+
# @param values [Hash] Column values to insert
|
1787
|
+
# @return [Integer, nil] Number of affected rows (always nil for DuckDB due to no AUTOINCREMENT)
|
1788
|
+
def insert(values = {})
|
1789
|
+
sql = insert_sql(values)
|
1790
|
+
result = db.execute(sql)
|
1791
|
+
|
1792
|
+
# For DuckDB, we need to return the number of affected rows
|
1793
|
+
# Since DuckDB doesn't support AUTOINCREMENT, we return nil for the ID
|
1794
|
+
# but we should return 1 to indicate successful insertion
|
1795
|
+
if result.is_a?(::DuckDB::Result)
|
1796
|
+
# DuckDB::Result doesn't have a direct way to get affected rows for INSERT
|
1797
|
+
# For INSERT operations, if no error occurred, assume 1 row was affected
|
1798
|
+
1
|
1799
|
+
else
|
1800
|
+
result
|
1801
|
+
end
|
1802
|
+
end
|
1803
|
+
|
1804
|
+
# Update records in the dataset
|
1805
|
+
#
|
1806
|
+
# @param values [Hash] Column values to update
|
1807
|
+
# @return [Integer] Number of affected rows
|
1808
|
+
def update(values = {})
|
1809
|
+
sql = update_sql(values)
|
1810
|
+
# Use execute_update which properly returns the row count
|
1811
|
+
db.execute_update(sql)
|
1812
|
+
end
|
1813
|
+
|
1814
|
+
# Delete records from the dataset
|
1815
|
+
#
|
1816
|
+
# @return [Integer] Number of affected rows
|
1817
|
+
def delete
|
1818
|
+
sql = delete_sql
|
1819
|
+
# Use execute_update which properly returns the row count
|
1820
|
+
db.execute_update(sql)
|
1821
|
+
end
|
1822
|
+
|
1823
|
+
# Streaming result support where possible (Requirement 9.5)
|
1824
|
+
#
|
1825
|
+
# @param sql [String] SQL to execute
|
1826
|
+
# @yield [Hash] Block to process each row
|
1827
|
+
# @return [Enumerator] If no block given, returns enumerator
|
1828
|
+
def stream(sql = select_sql, &)
|
1829
|
+
if block_given?
|
1830
|
+
# Stream results by processing them one at a time
|
1831
|
+
fetch_rows(sql, &)
|
1832
|
+
else
|
1833
|
+
# Return enumerator for lazy evaluation
|
1834
|
+
enum_for(:stream, sql)
|
1835
|
+
end
|
1836
|
+
end
|
1837
|
+
|
1838
|
+
# Performance optimization methods (Requirements 9.1, 9.2, 9.3, 9.4)
|
1839
|
+
# These methods are public to provide enhanced performance capabilities
|
1840
|
+
|
1841
|
+
# Optimized fetch_rows method for large result sets (Requirement 9.1)
|
1842
|
+
# This method provides efficient row fetching with streaming capabilities
|
1843
|
+
# Override the existing fetch_rows method to make it public and optimized
|
1844
|
+
def fetch_rows(sql)
|
1845
|
+
# Use streaming approach to avoid loading all results into memory at once
|
1846
|
+
# This is particularly important for large result sets
|
1847
|
+
if block_given?
|
1848
|
+
# Get schema information for type conversion
|
1849
|
+
table_schema = table_schema_for_conversion
|
1850
|
+
|
1851
|
+
# Execute with type conversion
|
1852
|
+
db.execute(sql) do |row|
|
1853
|
+
# Apply type conversion for TIME columns
|
1854
|
+
converted_row = convert_row_types(row, table_schema)
|
1855
|
+
yield converted_row
|
1856
|
+
end
|
1857
|
+
else
|
1858
|
+
# Return enumerator if no block given (for compatibility)
|
1859
|
+
enum_for(:fetch_rows, sql)
|
1860
|
+
end
|
1861
|
+
end
|
1862
|
+
|
1863
|
+
private
|
1864
|
+
|
1865
|
+
# Get table schema information for type conversion
|
1866
|
+
def table_schema_for_conversion
|
1867
|
+
return nil unless @opts[:from]&.first
|
1868
|
+
|
1869
|
+
table_name = @opts[:from].first
|
1870
|
+
# Handle case where table name is wrapped in an identifier
|
1871
|
+
table_name = table_name.value if table_name.respond_to?(:value)
|
1872
|
+
|
1873
|
+
begin
|
1874
|
+
schema_info = db.schema(table_name)
|
1875
|
+
schema_hash = {}
|
1876
|
+
schema_info.each do |column_name, column_info|
|
1877
|
+
schema_hash[column_name] = column_info
|
1878
|
+
end
|
1879
|
+
schema_hash
|
1880
|
+
rescue StandardError
|
1881
|
+
# If schema lookup fails, return nil to skip type conversion
|
1882
|
+
nil
|
1883
|
+
end
|
1884
|
+
end
|
1885
|
+
|
1886
|
+
# Convert row values based on column types
|
1887
|
+
def convert_row_types(row, table_schema)
|
1888
|
+
return row unless table_schema
|
1889
|
+
|
1890
|
+
converted_row = {}
|
1891
|
+
row.each do |column_name, value|
|
1892
|
+
column_info = table_schema[column_name]
|
1893
|
+
converted_row[column_name] = if column_info && column_info[:type] == :time && value.is_a?(Time)
|
1894
|
+
# Convert TIME columns to time-only values
|
1895
|
+
Time.local(1970, 1, 1, value.hour, value.min, value.sec, value.usec)
|
1896
|
+
else
|
1897
|
+
value
|
1898
|
+
end
|
1899
|
+
end
|
1900
|
+
converted_row
|
1901
|
+
end
|
1902
|
+
|
1903
|
+
public
|
1904
|
+
|
1905
|
+
# Enhanced bulk insert optimization (Requirement 9.3)
|
1906
|
+
# Override multi_insert to use DuckDB's efficient bulk loading capabilities
|
1907
|
+
def multi_insert(columns = nil, &)
|
1908
|
+
if columns.is_a?(Array) && !columns.empty? && columns.first.is_a?(Hash)
|
1909
|
+
# Handle array of hashes (most common case)
|
1910
|
+
bulk_insert_optimized(columns)
|
1911
|
+
else
|
1912
|
+
# Fall back to standard Sequel behavior for other cases
|
1913
|
+
super
|
1914
|
+
end
|
1915
|
+
end
|
1916
|
+
|
1917
|
+
# Optimized bulk insert implementation using DuckDB's capabilities
|
1918
|
+
def bulk_insert_optimized(rows)
|
1919
|
+
return 0 if rows.empty?
|
1920
|
+
|
1921
|
+
# Get column names from first row
|
1922
|
+
columns = rows.first.keys
|
1923
|
+
|
1924
|
+
# Get table name from opts[:from]
|
1925
|
+
table_name = @opts[:from].first
|
1926
|
+
|
1927
|
+
# Build optimized INSERT statement with VALUES clause
|
1928
|
+
# DuckDB handles multiple VALUES efficiently
|
1929
|
+
values_placeholders = rows.map { |_| "(#{columns.map { "?" }.join(", ")})" }.join(", ")
|
1930
|
+
table_sql = String.new
|
1931
|
+
quote_identifier_append(table_sql, table_name)
|
1932
|
+
col_list = columns.map do |c|
|
1933
|
+
col_sql = String.new
|
1934
|
+
quote_identifier_append(col_sql, c)
|
1935
|
+
col_sql
|
1936
|
+
end.join(", ")
|
1937
|
+
sql = "INSERT INTO #{table_sql} (#{col_list}) VALUES #{values_placeholders}"
|
1938
|
+
|
1939
|
+
# Flatten all row values for parameter binding
|
1940
|
+
params = rows.flat_map { |row| columns.map { |col| row[col] } }
|
1941
|
+
|
1942
|
+
# Execute the bulk insert
|
1943
|
+
db.execute(sql, params)
|
1944
|
+
|
1945
|
+
rows.length
|
1946
|
+
end
|
1947
|
+
|
1948
|
+
# Prepared statement support for performance (Requirement 9.2)
|
1949
|
+
# Enhanced prepare method that leverages DuckDB's prepared statement capabilities
|
1950
|
+
def prepare(type, name = nil, *values)
|
1951
|
+
# Check if DuckDB connection supports prepared statements
|
1952
|
+
if db.respond_to?(:prepare_statement)
|
1953
|
+
# Use DuckDB's native prepared statement support
|
1954
|
+
sql = case type
|
1955
|
+
when :select, :all
|
1956
|
+
select_sql
|
1957
|
+
when :first
|
1958
|
+
clone(limit: 1).select_sql
|
1959
|
+
when :insert
|
1960
|
+
insert_sql(*values)
|
1961
|
+
when :update
|
1962
|
+
update_sql(*values)
|
1963
|
+
when :delete
|
1964
|
+
delete_sql
|
1965
|
+
else
|
1966
|
+
raise ArgumentError, "Unsupported prepared statement type: #{type}"
|
1967
|
+
end
|
1968
|
+
|
1969
|
+
# Create and cache prepared statement
|
1970
|
+
prepared_stmt = db.prepare_statement(sql)
|
1971
|
+
|
1972
|
+
# Return a callable object that executes the prepared statement
|
1973
|
+
lambda do |*params|
|
1974
|
+
case type
|
1975
|
+
when :select, :all
|
1976
|
+
prepared_stmt.execute(*params).to_a
|
1977
|
+
when :first
|
1978
|
+
result = prepared_stmt.execute(*params).first
|
1979
|
+
result
|
1980
|
+
else
|
1981
|
+
prepared_stmt.execute(*params)
|
1982
|
+
end
|
1983
|
+
end
|
1984
|
+
else
|
1985
|
+
# Fall back to standard Sequel prepared statement handling
|
1986
|
+
super
|
1987
|
+
end
|
1988
|
+
end
|
1989
|
+
|
1990
|
+
# Connection pooling optimization (Requirement 9.4)
|
1991
|
+
# Enhanced connection management for better performance
|
1992
|
+
def with_connection_pooling
|
1993
|
+
# Ensure efficient connection reuse
|
1994
|
+
db.synchronize do |conn|
|
1995
|
+
# Verify connection is still valid before use
|
1996
|
+
unless db.valid_connection?(conn)
|
1997
|
+
# Reconnect if connection is invalid
|
1998
|
+
conn = db.connect(db.opts)
|
1999
|
+
end
|
2000
|
+
|
2001
|
+
yield conn
|
2002
|
+
end
|
2003
|
+
end
|
2004
|
+
|
2005
|
+
# Memory-efficient streaming for large result sets (Requirement 9.5)
|
2006
|
+
# Enhanced each method with better memory management
|
2007
|
+
def each(&)
|
2008
|
+
return enum_for(:each) unless block_given?
|
2009
|
+
|
2010
|
+
# Use streaming approach to minimize memory usage
|
2011
|
+
sql = select_sql
|
2012
|
+
|
2013
|
+
# Check if SQL already has LIMIT/OFFSET - if so, don't add batching
|
2014
|
+
if sql.match?(/\bLIMIT\b/i) || sql.match?(/\bOFFSET\b/i)
|
2015
|
+
# SQL already has LIMIT/OFFSET, execute directly without batching
|
2016
|
+
fetch_rows(sql, &)
|
2017
|
+
return self
|
2018
|
+
end
|
2019
|
+
|
2020
|
+
# Process results in batches to balance memory usage and performance
|
2021
|
+
batch_size = @opts[:stream_batch_size] || 1000
|
2022
|
+
offset = 0
|
2023
|
+
|
2024
|
+
loop do
|
2025
|
+
# Fetch a batch of results
|
2026
|
+
batch_sql = "#{sql} LIMIT #{batch_size} OFFSET #{offset}"
|
2027
|
+
batch_count = 0
|
2028
|
+
|
2029
|
+
fetch_rows(batch_sql) do |row|
|
2030
|
+
yield row
|
2031
|
+
batch_count += 1
|
2032
|
+
end
|
2033
|
+
|
2034
|
+
# Break if we got fewer rows than the batch size (end of results)
|
2035
|
+
break if batch_count < batch_size
|
2036
|
+
|
2037
|
+
offset += batch_size
|
2038
|
+
end
|
2039
|
+
|
2040
|
+
self
|
2041
|
+
end
|
2042
|
+
|
2043
|
+
# Set custom batch size for streaming operations (Requirement 9.5)
|
2044
|
+
#
|
2045
|
+
# @param size [Integer] Batch size for streaming
|
2046
|
+
# @return [Dataset] New dataset with custom batch size
|
2047
|
+
def stream_batch_size(size)
|
2048
|
+
clone(stream_batch_size: size)
|
2049
|
+
end
|
2050
|
+
|
2051
|
+
# Stream results with memory limit enforcement (Requirement 9.5)
|
2052
|
+
#
|
2053
|
+
# @param memory_limit [Integer] Maximum memory growth allowed in bytes
|
2054
|
+
# @yield [Hash] Block to process each row
|
2055
|
+
# @return [Enumerator] If no block given
|
2056
|
+
def stream_with_memory_limit(memory_limit, &)
|
2057
|
+
return enum_for(:stream_with_memory_limit, memory_limit) unless block_given?
|
2058
|
+
|
2059
|
+
sql = select_sql
|
2060
|
+
|
2061
|
+
# Check if SQL already has LIMIT/OFFSET - if so, don't add batching
|
2062
|
+
if sql.match?(/\bLIMIT\b/i) || sql.match?(/\bOFFSET\b/i)
|
2063
|
+
# SQL already has LIMIT/OFFSET, execute directly without batching
|
2064
|
+
fetch_rows(sql, &)
|
2065
|
+
return self
|
2066
|
+
end
|
2067
|
+
|
2068
|
+
initial_memory = memory_usage
|
2069
|
+
batch_size = @opts[:stream_batch_size] || 500
|
2070
|
+
offset = 0
|
2071
|
+
|
2072
|
+
loop do
|
2073
|
+
# Check memory usage before processing batch
|
2074
|
+
current_memory = memory_usage
|
2075
|
+
memory_growth = current_memory - initial_memory
|
2076
|
+
|
2077
|
+
# Reduce batch size if memory usage is high
|
2078
|
+
batch_size = [batch_size / 2, 100].max if memory_growth > memory_limit * 0.8
|
2079
|
+
|
2080
|
+
batch_sql = "#{sql} LIMIT #{batch_size} OFFSET #{offset}"
|
2081
|
+
batch_count = 0
|
2082
|
+
|
2083
|
+
fetch_rows(batch_sql) do |row|
|
2084
|
+
yield row
|
2085
|
+
batch_count += 1
|
2086
|
+
|
2087
|
+
# Force garbage collection periodically to manage memory
|
2088
|
+
GC.start if (batch_count % 100).zero?
|
2089
|
+
end
|
2090
|
+
|
2091
|
+
break if batch_count < batch_size
|
2092
|
+
|
2093
|
+
offset += batch_size
|
2094
|
+
end
|
2095
|
+
|
2096
|
+
self
|
2097
|
+
end
|
2098
|
+
|
2099
|
+
private
|
2100
|
+
|
2101
|
+
# Get approximate memory usage for streaming optimization
|
2102
|
+
def memory_usage
|
2103
|
+
GC.start
|
2104
|
+
ObjectSpace.count_objects[:TOTAL] * 40
|
2105
|
+
end
|
2106
|
+
|
2107
|
+
public
|
2108
|
+
|
2109
|
+
# Optimized count method for DuckDB
|
2110
|
+
# Provides fast path for simple COUNT(*) queries on base tables
|
2111
|
+
# Falls back to Sequel's implementation for complex scenarios
|
2112
|
+
def count(*args, &block)
|
2113
|
+
# Only optimize if:
|
2114
|
+
# - No arguments or block provided
|
2115
|
+
# - No grouping, having, distinct, or where clauses
|
2116
|
+
# - Has a from clause with a table
|
2117
|
+
if args.empty? && !block && !@opts[:group] && !@opts[:having] &&
|
2118
|
+
!@opts[:distinct] && !@opts[:where] && @opts[:from]&.first
|
2119
|
+
# Use optimized COUNT(*) for simple cases
|
2120
|
+
table_name = @opts[:from].first
|
2121
|
+
table_sql = String.new
|
2122
|
+
quote_identifier_append(table_sql, table_name)
|
2123
|
+
single_value("SELECT COUNT(*) FROM #{table_sql}")
|
2124
|
+
else
|
2125
|
+
# Fall back to standard Sequel count behavior for complex cases
|
2126
|
+
super
|
2127
|
+
end
|
2128
|
+
end
|
2129
|
+
|
2130
|
+
private
|
2131
|
+
|
2132
|
+
# Get a single value from a SQL query (used by count)
|
2133
|
+
def single_value(sql)
|
2134
|
+
value = nil
|
2135
|
+
fetch_rows(sql) do |row|
|
2136
|
+
value = row.values.first
|
2137
|
+
break
|
2138
|
+
end
|
2139
|
+
value
|
2140
|
+
end
|
2141
|
+
|
2142
|
+
# Helper method to check if bulk operations should be used
|
2143
|
+
def should_use_bulk_operations?(row_count)
|
2144
|
+
# Use bulk operations for more than 10 rows
|
2145
|
+
row_count > 10
|
2146
|
+
end
|
2147
|
+
|
2148
|
+
# Helper method to optimize query execution based on result set size
|
2149
|
+
def optimize_for_result_size(sql)
|
2150
|
+
# Add DuckDB-specific optimization hints if needed
|
2151
|
+
if @opts[:small_result_set]
|
2152
|
+
# For small result sets, DuckDB can use different optimization strategies
|
2153
|
+
end
|
2154
|
+
sql
|
2155
|
+
end
|
2156
|
+
|
2157
|
+
public
|
2158
|
+
|
2159
|
+
# Index-aware query generation methods (Requirement 9.7)
|
2160
|
+
|
2161
|
+
# Get query execution plan with index usage information
|
2162
|
+
#
|
2163
|
+
# @return [String] Query execution plan
|
2164
|
+
def explain
|
2165
|
+
explain_sql = "EXPLAIN #{select_sql}"
|
2166
|
+
plan_text = ""
|
2167
|
+
|
2168
|
+
fetch_rows(explain_sql) do |row|
|
2169
|
+
plan_text += "#{row.values.join(" ")}\n"
|
2170
|
+
end
|
2171
|
+
|
2172
|
+
plan_text
|
2173
|
+
end
|
2174
|
+
|
2175
|
+
# Get detailed query analysis including index usage
|
2176
|
+
#
|
2177
|
+
# @return [Hash] Analysis information
|
2178
|
+
def analyze_query
|
2179
|
+
{
|
2180
|
+
plan: explain,
|
2181
|
+
indexes_used: extract_indexes_from_plan(explain),
|
2182
|
+
optimization_hints: generate_optimization_hints
|
2183
|
+
}
|
2184
|
+
end
|
2185
|
+
|
2186
|
+
# Override where method to add index-aware optimization hints
|
2187
|
+
def where(*cond, &)
|
2188
|
+
result = super
|
2189
|
+
|
2190
|
+
# Add index optimization hints based on WHERE conditions
|
2191
|
+
result = result.add_index_hints(cond.first.keys) if cond.length == 1 && cond.first.is_a?(Hash)
|
2192
|
+
|
2193
|
+
result
|
2194
|
+
end
|
2195
|
+
|
2196
|
+
# Override order method to leverage index optimization
|
2197
|
+
def order(*columns)
|
2198
|
+
result = super
|
2199
|
+
|
2200
|
+
# Add index hints for ORDER BY optimization
|
2201
|
+
order_columns = columns.map do |col|
|
2202
|
+
case col
|
2203
|
+
when Sequel::SQL::OrderedExpression
|
2204
|
+
col.expression
|
2205
|
+
else
|
2206
|
+
col
|
2207
|
+
end
|
2208
|
+
end
|
2209
|
+
|
2210
|
+
result.add_index_hints(order_columns)
|
2211
|
+
end
|
2212
|
+
|
2213
|
+
# Add index optimization hints to the dataset
|
2214
|
+
#
|
2215
|
+
# @param columns [Array] Columns that might benefit from index usage
|
2216
|
+
# @return [Dataset] Dataset with index hints
|
2217
|
+
def add_index_hints(columns)
|
2218
|
+
# Get available indexes for the table
|
2219
|
+
table_name = @opts[:from]&.first
|
2220
|
+
return self unless table_name
|
2221
|
+
|
2222
|
+
available_indexes = begin
|
2223
|
+
db.indexes(table_name)
|
2224
|
+
rescue StandardError
|
2225
|
+
{}
|
2226
|
+
end
|
2227
|
+
|
2228
|
+
# Find indexes that match the columns
|
2229
|
+
matching_indexes = available_indexes.select do |_index_name, index_info|
|
2230
|
+
index_columns = index_info[:columns] || []
|
2231
|
+
columns.any? { |col| index_columns.include?(col.to_sym) }
|
2232
|
+
end
|
2233
|
+
|
2234
|
+
# Add index hints to options
|
2235
|
+
clone(index_hints: matching_indexes.keys)
|
2236
|
+
end
|
2237
|
+
|
2238
|
+
# Columnar storage optimization methods (Requirement 9.7)
|
2239
|
+
|
2240
|
+
# Override select method to add columnar optimization
|
2241
|
+
def select(*columns)
|
2242
|
+
result = super
|
2243
|
+
|
2244
|
+
# Mark as columnar-optimized if selecting specific columns
|
2245
|
+
result = result.clone(columnar_optimized: true) if columns.length.positive? && columns.length < 10
|
2246
|
+
|
2247
|
+
result
|
2248
|
+
end
|
2249
|
+
|
2250
|
+
# Optimize aggregation queries for columnar storage
|
2251
|
+
def group(*columns)
|
2252
|
+
result = super
|
2253
|
+
|
2254
|
+
# Add columnar aggregation optimization hints
|
2255
|
+
result.clone(columnar_aggregation: true)
|
2256
|
+
end
|
2257
|
+
|
2258
|
+
# Parallel query execution support (Requirement 9.7)
|
2259
|
+
|
2260
|
+
# Enable parallel execution for the query
|
2261
|
+
#
|
2262
|
+
# @param thread_count [Integer] Number of threads to use (optional)
|
2263
|
+
# @return [Dataset] Dataset configured for parallel execution
|
2264
|
+
def parallel(thread_count = nil)
|
2265
|
+
opts = { parallel_execution: true }
|
2266
|
+
opts[:parallel_threads] = thread_count if thread_count
|
2267
|
+
clone(opts)
|
2268
|
+
end
|
2269
|
+
|
2270
|
+
private
|
2271
|
+
|
2272
|
+
# Extract index names from query execution plan
|
2273
|
+
def extract_indexes_from_plan(plan)
|
2274
|
+
indexes = []
|
2275
|
+
plan.scan(/idx_\w+|index\s+(\w+)/i) do |match|
|
2276
|
+
indexes << (match.is_a?(Array) ? match.first : match)
|
2277
|
+
end
|
2278
|
+
indexes.compact.uniq
|
2279
|
+
end
|
2280
|
+
|
2281
|
+
# Generate optimization hints based on query structure
|
2282
|
+
def generate_optimization_hints
|
2283
|
+
hints = []
|
2284
|
+
|
2285
|
+
# Check for potential index usage
|
2286
|
+
hints << "Consider adding indexes on WHERE clause columns" if @opts[:where]
|
2287
|
+
|
2288
|
+
# Check for ORDER BY optimization
|
2289
|
+
hints << "ORDER BY may benefit from index on ordered columns" if @opts[:order]
|
2290
|
+
|
2291
|
+
# Check for GROUP BY optimization
|
2292
|
+
hints << "GROUP BY operations are optimized for columnar storage" if @opts[:group]
|
2293
|
+
|
2294
|
+
hints
|
2295
|
+
end
|
2296
|
+
|
2297
|
+
# Optimize SQL for columnar projection
|
2298
|
+
def optimize_for_columnar_projection(sql)
|
2299
|
+
# Add DuckDB-specific hints for columnar projection
|
2300
|
+
if @opts[:columnar_optimized]
|
2301
|
+
# DuckDB automatically optimizes column access, but we can add hints
|
2302
|
+
end
|
2303
|
+
sql
|
2304
|
+
end
|
2305
|
+
|
2306
|
+
# Determine if parallel execution should be used
|
2307
|
+
def should_use_parallel_execution?
|
2308
|
+
# Use parallel execution for:
|
2309
|
+
# 1. Explicit parallel requests
|
2310
|
+
# 2. Complex aggregations
|
2311
|
+
# 3. Large joins
|
2312
|
+
# 4. Window functions
|
2313
|
+
|
2314
|
+
return true if @opts[:parallel_execution]
|
2315
|
+
return true if @opts[:group] && @opts[:columnar_aggregation]
|
2316
|
+
return true if @opts[:join] && @opts[:join].length > 1
|
2317
|
+
return true if sql.downcase.include?("over(")
|
2318
|
+
|
2319
|
+
false
|
2320
|
+
end
|
2321
|
+
|
2322
|
+
# Add parallel execution hints to SQL
|
2323
|
+
def add_parallel_hints(sql)
|
2324
|
+
# DuckDB handles parallelization automatically, but we can add configuration
|
2325
|
+
if @opts[:parallel_threads]
|
2326
|
+
# NOTE: This would require connection-level configuration in practice
|
2327
|
+
# For now, we'll rely on DuckDB's automatic parallelization
|
2328
|
+
end
|
2329
|
+
|
2330
|
+
sql
|
2331
|
+
end
|
2332
|
+
end
|
2333
|
+
end
|
2334
|
+
|
2335
|
+
# Setup mock adapter when using Sequel.mock(host: :duckdb)
|
2336
|
+
def self.mock_adapter_setup(db)
|
2337
|
+
db.instance_exec do
|
2338
|
+
# Just do the minimal setup like SQLite
|
2339
|
+
def schema_parse_table(*)
|
2340
|
+
[]
|
2341
|
+
end
|
2342
|
+
singleton_class.send(:private, :schema_parse_table)
|
2343
|
+
end
|
2344
|
+
end
|
2345
|
+
|
2346
|
+
# Register DuckDB adapter for mock databases
|
2347
|
+
# This allows Sequel.mock(host: :duckdb) to work properly
|
2348
|
+
Sequel::Database.set_shared_adapter_scheme(:duckdb, Sequel::DuckDB)
|
2349
|
+
end
|