sequelizer 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.beads/.gitignore +54 -0
- data/.beads/.jsonl.lock +0 -0
- data/.beads/.migration-hint-ts +1 -0
- data/.beads/README.md +81 -0
- data/.beads/config.yaml +42 -0
- data/.beads/issues.jsonl +20 -0
- data/.beads/metadata.json +7 -0
- data/.coderabbit.yaml +94 -0
- data/.devcontainer/.p10k.zsh +1713 -0
- data/.devcontainer/.zshrc +29 -0
- data/.devcontainer/Dockerfile +137 -0
- data/.devcontainer/copy-claude-credentials.sh +32 -0
- data/.devcontainer/devcontainer.json +102 -0
- data/.devcontainer/init-firewall.sh +123 -0
- data/.devcontainer/setup-credentials.sh +95 -0
- data/.github/dependabot.yml +18 -0
- data/.github/workflows/dependabot-auto-merge.yml +36 -0
- data/.github/workflows/test.yml +44 -9
- data/.gitignore +6 -1
- data/.overcommit.yml +73 -0
- data/.rubocop.yml +167 -0
- data/AGENTS.md +126 -0
- data/CHANGELOG.md +41 -0
- data/CLAUDE.md +230 -0
- data/Gemfile +6 -2
- data/Gemfile.lock +189 -0
- data/Guardfile +1 -1
- data/Rakefile +28 -3
- data/config/platforms/base.csv +5 -0
- data/config/platforms/rdbms/athena.csv +4 -0
- data/config/platforms/rdbms/postgres.csv +3 -0
- data/config/platforms/rdbms/snowflake.csv +1 -0
- data/config/platforms/rdbms/spark.csv +3 -0
- data/lib/sequel/extensions/cold_col.rb +436 -0
- data/lib/sequel/extensions/db_opts.rb +65 -4
- data/lib/sequel/extensions/funky.rb +136 -0
- data/lib/sequel/extensions/make_readyable.rb +146 -30
- data/lib/sequel/extensions/more_sql.rb +76 -0
- data/lib/sequel/extensions/platform.rb +301 -0
- data/lib/sequel/extensions/settable.rb +64 -0
- data/lib/sequel/extensions/sql_recorder.rb +85 -0
- data/lib/sequel/extensions/unionize.rb +169 -0
- data/lib/sequel/extensions/usable.rb +30 -1
- data/lib/sequelizer/cli.rb +61 -18
- data/lib/sequelizer/connection_maker.rb +54 -72
- data/lib/sequelizer/env_config.rb +6 -6
- data/lib/sequelizer/gemfile_modifier.rb +23 -21
- data/lib/sequelizer/monkey_patches/database_in_after_connect.rb +7 -5
- data/lib/sequelizer/options.rb +102 -19
- data/lib/sequelizer/options_hash.rb +2 -0
- data/lib/sequelizer/version.rb +3 -1
- data/lib/sequelizer/yaml_config.rb +9 -4
- data/lib/sequelizer.rb +65 -9
- data/sequelizer.gemspec +20 -12
- data/test/lib/sequel/extensions/test_cold_col.rb +251 -0
- data/test/lib/sequel/extensions/test_db_opts.rb +10 -8
- data/test/lib/sequel/extensions/test_make_readyable.rb +198 -28
- data/test/lib/sequel/extensions/test_more_sql.rb +132 -0
- data/test/lib/sequel/extensions/test_platform.rb +222 -0
- data/test/lib/sequel/extensions/test_settable.rb +109 -0
- data/test/lib/sequel/extensions/test_sql_recorder.rb +231 -0
- data/test/lib/sequel/extensions/test_unionize.rb +76 -0
- data/test/lib/sequel/extensions/test_usable.rb +5 -2
- data/test/lib/sequelizer/test_connection_maker.rb +21 -17
- data/test/lib/sequelizer/test_env_config.rb +5 -2
- data/test/lib/sequelizer/test_gemfile_modifier.rb +7 -6
- data/test/lib/sequelizer/test_options.rb +42 -9
- data/test/lib/sequelizer/test_yaml_config.rb +13 -12
- data/test/test_helper.rb +37 -8
- metadata +196 -39
- data/lib/sequel/extensions/sqls.rb +0 -31
|
@@ -1,41 +1,102 @@
|
|
|
1
1
|
module Sequel
|
|
2
|
+
|
|
3
|
+
# = DbOpts
|
|
4
|
+
#
|
|
5
|
+
# Sequel extension that provides database-specific options handling.
|
|
6
|
+
# This extension allows setting database-specific configuration options
|
|
7
|
+
# that get applied during connection establishment.
|
|
8
|
+
#
|
|
9
|
+
# The extension looks for options in the database configuration that match
|
|
10
|
+
# the pattern `{database_type}_db_opt_{option_name}` and converts them to
|
|
11
|
+
# appropriate SQL SET statements.
|
|
12
|
+
#
|
|
13
|
+
# @example
|
|
14
|
+
# # For PostgreSQL, options like:
|
|
15
|
+
# postgres_db_opt_work_mem: '256MB'
|
|
16
|
+
# postgres_db_opt_shared_preload_libraries: 'pg_stat_statements'
|
|
17
|
+
#
|
|
18
|
+
# # Will generate:
|
|
19
|
+
# SET work_mem='256MB'
|
|
20
|
+
# SET shared_preload_libraries='pg_stat_statements'
|
|
2
21
|
module DbOpts
|
|
22
|
+
|
|
23
|
+
# Handles extraction and application of database-specific options.
|
|
3
24
|
class DbOptions
|
|
4
|
-
|
|
25
|
+
|
|
26
|
+
# @!attribute [r] db
|
|
27
|
+
# @return [Sequel::Database] the database instance
|
|
28
|
+
attr_reader :db
|
|
29
|
+
|
|
30
|
+
# Creates a new DbOptions instance for the given database.
|
|
31
|
+
#
|
|
32
|
+
# @param db [Sequel::Database] the database to configure
|
|
5
33
|
def initialize(db)
|
|
6
34
|
db.extension :settable
|
|
7
35
|
@db = db
|
|
8
36
|
end
|
|
9
37
|
|
|
38
|
+
# Returns a hash of database-specific options extracted from the database configuration.
|
|
39
|
+
#
|
|
40
|
+
# @return [Hash] hash of option names to values
|
|
10
41
|
def to_hash
|
|
11
42
|
@_to_hash ||= extract_db_opts
|
|
12
43
|
end
|
|
13
44
|
|
|
45
|
+
# Extracts database-specific options from the database configuration.
|
|
46
|
+
#
|
|
47
|
+
# Looks for options matching the pattern `{database_type}_db_opt_{option_name}`
|
|
48
|
+
# and extracts the option name and value.
|
|
49
|
+
#
|
|
50
|
+
# @return [Hash] extracted options with symbolic keys
|
|
14
51
|
def extract_db_opts
|
|
15
52
|
opt_regexp = /^#{db.database_type}_db_opt_/i
|
|
16
53
|
|
|
17
|
-
|
|
54
|
+
db.opts.select do |k, _|
|
|
55
|
+
k.to_s.match(opt_regexp)
|
|
56
|
+
end.to_h { |k, v| [k.to_s.gsub(opt_regexp, '').to_sym, prep_value(k, v)] }
|
|
18
57
|
end
|
|
19
58
|
|
|
59
|
+
# Applies the database options to the given connection.
|
|
60
|
+
#
|
|
61
|
+
# Executes the SQL statements generated from the options on the connection.
|
|
62
|
+
#
|
|
63
|
+
# @param c [Object] the database connection
|
|
20
64
|
def apply(c)
|
|
21
65
|
sql_statements.each do |stmt|
|
|
22
66
|
db.send(:log_connection_execute, c, stmt)
|
|
23
67
|
end
|
|
24
68
|
end
|
|
25
69
|
|
|
26
|
-
|
|
27
|
-
|
|
70
|
+
# Prepares a value for use in SQL statements.
|
|
71
|
+
#
|
|
72
|
+
# Values containing non-word characters are treated as literals and quoted,
|
|
73
|
+
# while simple values are used as-is.
|
|
74
|
+
#
|
|
75
|
+
# @param _k [Symbol] the option key (unused)
|
|
76
|
+
# @param v [Object] the option value
|
|
77
|
+
# @return [String] the prepared value
|
|
78
|
+
def prep_value(_k, v)
|
|
79
|
+
v =~ /\W/ ? db.literal(v.to_s) : v
|
|
28
80
|
end
|
|
29
81
|
|
|
82
|
+
# Generates SQL SET statements for the database options.
|
|
83
|
+
#
|
|
84
|
+
# @return [Array<String>] array of SQL SET statements
|
|
30
85
|
def sql_statements
|
|
31
86
|
db.send(:set_sql, to_hash)
|
|
32
87
|
end
|
|
88
|
+
|
|
33
89
|
end
|
|
34
90
|
|
|
91
|
+
# Returns a DbOptions instance for this database.
|
|
92
|
+
#
|
|
93
|
+
# @return [DbOptions] the database options handler
|
|
35
94
|
def db_opts
|
|
36
95
|
@db_opts ||= DbOptions.new(self)
|
|
37
96
|
end
|
|
97
|
+
|
|
38
98
|
end
|
|
39
99
|
|
|
40
100
|
Database.register_extension(:db_opts, DbOpts)
|
|
101
|
+
|
|
41
102
|
end
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
module Sequel
|
|
2
|
+
|
|
3
|
+
module Funky
|
|
4
|
+
|
|
5
|
+
class FunkyBase
|
|
6
|
+
|
|
7
|
+
def initialize(db)
|
|
8
|
+
@db = db
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def to_strptime(format)
|
|
12
|
+
return format if format =~ /%/
|
|
13
|
+
|
|
14
|
+
format.gsub('yyyy', '%Y').gsub('MM', '%m').gsub('dd', '%d')
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def from_strptime(format)
|
|
18
|
+
return format unless format =~ /%/
|
|
19
|
+
|
|
20
|
+
format.gsub('%Y', 'yyyy').gsub('%m', 'MM').gsub('%d', 'dd')
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
class FunkySpark < FunkyBase
|
|
26
|
+
|
|
27
|
+
def str_to_date(value, format, try: false) # rubocop:disable Lint/UnusedMethodArgument
|
|
28
|
+
Sequel.function(:to_date, Sequel.cast_string(value), format)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def hash(*)
|
|
32
|
+
Sequel.function(:xxhash64, *)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def make_json_column(ds, key_column, value_column)
|
|
36
|
+
json_object_col = Sequel.function(
|
|
37
|
+
:named_struct,
|
|
38
|
+
'key',
|
|
39
|
+
key_column,
|
|
40
|
+
'value',
|
|
41
|
+
value_column,
|
|
42
|
+
).then do |json_object_col|
|
|
43
|
+
Sequel.function(
|
|
44
|
+
:collect_list,
|
|
45
|
+
json_object_col,
|
|
46
|
+
)
|
|
47
|
+
end.then do |list_col|
|
|
48
|
+
Sequel.function(
|
|
49
|
+
:map_from_entries,
|
|
50
|
+
list_col,
|
|
51
|
+
)
|
|
52
|
+
end.then do |map_from_entries_col|
|
|
53
|
+
Sequel.function(
|
|
54
|
+
:to_json,
|
|
55
|
+
map_from_entries_col,
|
|
56
|
+
)
|
|
57
|
+
end
|
|
58
|
+
ds.from_self
|
|
59
|
+
.select(json_object_col)
|
|
60
|
+
.limit(1)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def collect_list(column)
|
|
64
|
+
Sequel.function(:collect_list, column)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
class FunkyDuckDB < FunkyBase
|
|
70
|
+
|
|
71
|
+
def str_to_date(value, format, try: false)
|
|
72
|
+
strptime_func = try ? :try_strptime : :strptime
|
|
73
|
+
Sequel.function(strptime_func, Sequel.cast_string(value), to_strptime(format))
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def hash(*)
|
|
77
|
+
Sequel.function(:hash, concat(*)).then do |hash_val|
|
|
78
|
+
Sequel.case(
|
|
79
|
+
{ hash_val <= 9_223_372_036_854_775_807 => hash_val },
|
|
80
|
+
hash_val - 18_446_744_073_709_551_616,
|
|
81
|
+
).cast(:bigint)
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def concat(*)
|
|
86
|
+
Sequel.function(:concat, *)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def make_json_column(ds, key_column, value_column)
|
|
90
|
+
json_object_col = Sequel.function(
|
|
91
|
+
:json_object,
|
|
92
|
+
key_column,
|
|
93
|
+
value_column,
|
|
94
|
+
).then do |json_object_col|
|
|
95
|
+
Sequel.function(
|
|
96
|
+
:list,
|
|
97
|
+
json_object_col,
|
|
98
|
+
)
|
|
99
|
+
end
|
|
100
|
+
ds.from_self
|
|
101
|
+
.select(json_object_col)
|
|
102
|
+
.limit(1)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def collect_list(column)
|
|
106
|
+
Sequel.function(:list, column)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def self.extended(db)
|
|
112
|
+
db.instance_exec do
|
|
113
|
+
@funky = get_funky(db.database_type)
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def funky
|
|
118
|
+
@funky
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def get_funky(database_type)
|
|
122
|
+
case database_type
|
|
123
|
+
when :spark
|
|
124
|
+
FunkySpark.new(self)
|
|
125
|
+
when :duckdb
|
|
126
|
+
FunkyDuckDB.new(self)
|
|
127
|
+
else
|
|
128
|
+
raise "No known functions for #{database_type}"
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
Database.register_extension(:funky, Funky)
|
|
135
|
+
|
|
136
|
+
end
|
|
@@ -1,60 +1,99 @@
|
|
|
1
1
|
module Sequel
|
|
2
|
+
|
|
3
|
+
# = MakeReadyable
|
|
4
|
+
#
|
|
5
|
+
# Sequel extension that provides database readiness functionality,
|
|
6
|
+
# primarily geared towards Spark SQL-based databases. This extension
|
|
7
|
+
# allows setting up temporary views and schema configurations to prepare
|
|
8
|
+
# a database for use.
|
|
9
|
+
#
|
|
10
|
+
# @example Basic schema usage
|
|
11
|
+
# db.extension :make_readyable
|
|
12
|
+
# db.make_ready(use_schema: :my_schema)
|
|
13
|
+
#
|
|
14
|
+
# @example Search path with schema precedence
|
|
15
|
+
# db.make_ready(search_path: [:schema1, :schema2])
|
|
16
|
+
#
|
|
17
|
+
# @example External file sources
|
|
18
|
+
# db.make_ready(search_path: [Pathname.new('data.parquet')])
|
|
2
19
|
module MakeReadyable
|
|
3
|
-
|
|
4
|
-
#
|
|
20
|
+
|
|
21
|
+
# Prepares the database by setting up schemas, views, and external data sources.
|
|
5
22
|
#
|
|
23
|
+
# This method is primarily geared towards Spark SQL-based databases.
|
|
6
24
|
# Given some options, prepares a set of views to represent a set
|
|
7
25
|
# of tables across a collection of different schemas and external,
|
|
8
26
|
# unmanaged tables.
|
|
9
27
|
#
|
|
28
|
+
# @param opts [Hash] the options used to prepare the database
|
|
29
|
+
# @option opts [Symbol] :use_schema The schema to be used as the primary schema
|
|
30
|
+
# @option opts [Array] :search_path A set of symbols (schemas) or Pathnames (external files)
|
|
31
|
+
# @option opts [Array] :only Limit view creation to these tables only
|
|
32
|
+
# @option opts [Array] :except Skip view creation for these tables
|
|
33
|
+
#
|
|
34
|
+
# @example Set primary schema
|
|
10
35
|
# DB.make_ready(use_schema: :schema)
|
|
11
36
|
# # => USE `schema`
|
|
12
37
|
#
|
|
13
|
-
#
|
|
14
|
-
#
|
|
15
|
-
# the PATH variable for programs.
|
|
16
|
-
#
|
|
17
|
-
# Assuming the following tables: schema1.a, schema2.a, schema2.b
|
|
18
|
-
#
|
|
38
|
+
# @example Search path with precedence
|
|
39
|
+
# # Assuming tables: schema1.a, schema2.a, schema2.b
|
|
19
40
|
# DB.make_ready(search_path: [:schema1, :schema2])
|
|
20
41
|
# # => CREATE TEMPORARY VIEW `a` AS SELECT * FROM `schema1`.`a;`
|
|
21
42
|
# # => CREATE TEMPORARY VIEW `b` AS SELECT * FROM `schema2`.`b;`
|
|
22
43
|
#
|
|
23
|
-
#
|
|
24
|
-
# to try to read from the file.
|
|
25
|
-
#
|
|
44
|
+
# @example External file sources
|
|
26
45
|
# DB.make_ready(search_path: [Pathname.new("c.parquet"), Pathname.new("d.orc")])
|
|
27
46
|
# # => CREATE TEMPORARY VIEW `c` USING parquet OPTIONS ('path'='c.parquet')
|
|
28
47
|
# # => CREATE TEMPORARY VIEW `d` USING orc OPTIONS ('path'='d.orc')
|
|
29
|
-
#
|
|
30
|
-
# @param [Hash] opts the options used to prepare the database
|
|
31
|
-
# @option opts [String] :use_schema The schema to be used as the primary schema
|
|
32
|
-
# @option opts [Array] :search_path A set of sympbols (to represent schemas) or Pathnames (to represent externally managed data files)
|
|
33
48
|
def make_ready(opts = {})
|
|
34
49
|
ReadyMaker.new(self, opts).run
|
|
35
50
|
end
|
|
51
|
+
|
|
36
52
|
end
|
|
37
53
|
|
|
38
|
-
|
|
54
|
+
# = ReadyMaker
|
|
55
|
+
#
|
|
56
|
+
# Internal class that handles the actual database preparation logic.
|
|
57
|
+
# This class processes the make_ready options and executes the necessary
|
|
58
|
+
# SQL statements to set up schemas, views, and external data sources.
|
|
39
59
|
class ReadyMaker
|
|
60
|
+
|
|
61
|
+
# @!attribute [r] db
|
|
62
|
+
# @return [Sequel::Database] the database instance
|
|
63
|
+
# @!attribute [r] opts
|
|
64
|
+
# @return [Hash] the preparation options
|
|
40
65
|
attr_reader :db, :opts
|
|
41
66
|
|
|
67
|
+
# Creates a new ReadyMaker instance.
|
|
68
|
+
#
|
|
69
|
+
# @param db [Sequel::Database] the database to prepare
|
|
70
|
+
# @param opts [Hash] the preparation options
|
|
42
71
|
def initialize(db, opts)
|
|
43
72
|
@db = db
|
|
44
73
|
@opts = opts
|
|
45
74
|
end
|
|
46
|
-
|
|
75
|
+
|
|
76
|
+
# Executes the database preparation process.
|
|
77
|
+
#
|
|
78
|
+
# This method handles:
|
|
79
|
+
# 1. Setting the primary schema if specified
|
|
80
|
+
# 2. Processing the search path to create views
|
|
81
|
+
# 3. Handling table filtering (only/except options)
|
|
47
82
|
def run
|
|
48
83
|
if opts[:use_schema]
|
|
49
84
|
db.extension :usable
|
|
50
85
|
db.use(opts[:use_schema])
|
|
51
86
|
end
|
|
52
87
|
only_tables = Array(opts[:only])
|
|
53
|
-
created_views =
|
|
54
|
-
(opts[:search_path] || []).each do |schema|
|
|
55
|
-
schema = schema.is_a?(Pathname)
|
|
88
|
+
created_views = Array(opts[:except]) || []
|
|
89
|
+
(opts[:search_path] || []).flatten.each do |schema|
|
|
90
|
+
schema = schema.to_sym unless schema.is_a?(Pathname)
|
|
56
91
|
source = get_source(db, schema)
|
|
57
|
-
tables =
|
|
92
|
+
tables = if schema.is_a?(Pathname)
|
|
93
|
+
source.tables - created_views
|
|
94
|
+
else
|
|
95
|
+
source.tables(schema: schema) - created_views
|
|
96
|
+
end
|
|
58
97
|
tables &= only_tables unless only_tables.empty?
|
|
59
98
|
tables.each do |table|
|
|
60
99
|
create_view(source, table, schema)
|
|
@@ -63,46 +102,123 @@ module Sequel
|
|
|
63
102
|
end
|
|
64
103
|
end
|
|
65
104
|
|
|
105
|
+
# Creates a temporary view for the given table.
|
|
106
|
+
#
|
|
107
|
+
# @param source [Object] the source (database or FileSourcerer)
|
|
108
|
+
# @param table [Symbol] the table name
|
|
109
|
+
# @param schema [Symbol, Pathname] the schema or file path
|
|
66
110
|
def create_view(source, table, schema)
|
|
67
111
|
if schema.to_s =~ %r{/}
|
|
68
112
|
source.create_view(table, temp: true)
|
|
69
113
|
else
|
|
114
|
+
# For schema-based tables, just create temporary views
|
|
115
|
+
# This extension is primarily for Spark SQL-based databases
|
|
70
116
|
source.create_view(table, db[Sequel.qualify(schema, table)], temp: true)
|
|
71
117
|
end
|
|
72
118
|
end
|
|
73
119
|
|
|
120
|
+
# Gets the appropriate source handler for the schema.
|
|
121
|
+
#
|
|
122
|
+
# @param db [Sequel::Database] the database instance
|
|
123
|
+
# @param schema [Symbol, Pathname] the schema or file path
|
|
124
|
+
# @return [Sequel::Database, FileSourcerer] the source handler
|
|
74
125
|
def get_source(db, schema)
|
|
75
126
|
if schema.to_s =~ %r{/}
|
|
76
|
-
FileSourcerer.new(db, Pathname.new(schema))
|
|
127
|
+
FileSourcerer.new(db, Pathname.new(schema.to_s))
|
|
77
128
|
else
|
|
78
129
|
db
|
|
79
130
|
end
|
|
80
131
|
end
|
|
81
132
|
|
|
133
|
+
# = FileSourcerer
|
|
134
|
+
#
|
|
135
|
+
# Handles external file sources for the make_ready functionality.
|
|
136
|
+
# This class creates temporary views that read from external files
|
|
137
|
+
# like Parquet, ORC, etc.
|
|
82
138
|
class FileSourcerer
|
|
139
|
+
|
|
140
|
+
# @!attribute [r] db
|
|
141
|
+
# @return [Sequel::Database] the database instance
|
|
142
|
+
# @!attribute [r] schema
|
|
143
|
+
# @return [Pathname] the file path
|
|
83
144
|
attr_reader :db, :schema
|
|
145
|
+
|
|
146
|
+
# Creates a new FileSourcerer instance.
|
|
147
|
+
#
|
|
148
|
+
# @param db [Sequel::Database] the database instance
|
|
149
|
+
# @param schema [Pathname] the file path
|
|
84
150
|
def initialize(db, schema)
|
|
85
151
|
@db = db
|
|
86
152
|
@schema = schema
|
|
87
153
|
end
|
|
88
154
|
|
|
89
|
-
|
|
90
|
-
|
|
155
|
+
# Returns the table name derived from the file name.
|
|
156
|
+
#
|
|
157
|
+
# @param _opts [Hash] unused options parameter
|
|
158
|
+
# @return [Array<Symbol>] array containing the table name
|
|
159
|
+
def tables(_opts = {})
|
|
160
|
+
[schema.basename(schema.extname).to_s.to_sym]
|
|
91
161
|
end
|
|
92
162
|
|
|
163
|
+
# Creates a temporary view that reads from the external file.
|
|
164
|
+
#
|
|
165
|
+
# @param table [Symbol] the table/view name
|
|
166
|
+
# @param opts [Hash] additional options to merge
|
|
93
167
|
def create_view(table, opts = {})
|
|
94
|
-
db.
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
168
|
+
case db.database_type
|
|
169
|
+
when :spark
|
|
170
|
+
# Spark SQL uses USING clause for external tables
|
|
171
|
+
db.create_view(table, {
|
|
172
|
+
temp: true,
|
|
173
|
+
using: format,
|
|
174
|
+
options: { path: schema.expand_path },
|
|
175
|
+
}.merge(opts))
|
|
176
|
+
when :duckdb
|
|
177
|
+
# DuckDB uses direct file reading with read_* functions
|
|
178
|
+
create_duckdb_view(table, opts)
|
|
179
|
+
else
|
|
180
|
+
raise Sequel::Error, "External file sources are not supported on #{db.database_type}"
|
|
181
|
+
end
|
|
99
182
|
end
|
|
100
183
|
|
|
184
|
+
private
|
|
185
|
+
|
|
186
|
+
# Creates a view for DuckDB to read external files
|
|
187
|
+
#
|
|
188
|
+
# @param table [Symbol] the table/view name
|
|
189
|
+
# @param _opts [Hash] additional options to merge (currently unused for DuckDB)
|
|
190
|
+
def create_duckdb_view(table, _opts)
|
|
191
|
+
file_path = if schema.directory?
|
|
192
|
+
schema.expand_path.join('**').join("*.#{format}").to_s
|
|
193
|
+
else
|
|
194
|
+
schema.expand_path.to_s
|
|
195
|
+
end
|
|
196
|
+
read_function = case format
|
|
197
|
+
when 'parquet'
|
|
198
|
+
:read_parquet
|
|
199
|
+
when 'csv'
|
|
200
|
+
:read_csv_auto
|
|
201
|
+
when 'json'
|
|
202
|
+
:read_json_auto
|
|
203
|
+
else
|
|
204
|
+
raise Sequel::Error, "Unsupported file format '#{format}' for DuckDB"
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
# DuckDB doesn't support TEMPORARY views, use regular CREATE VIEW
|
|
208
|
+
db.create_view(table, db.from(Sequel.function(read_function, file_path)))
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# Returns the file format based on the file extension.
|
|
212
|
+
#
|
|
213
|
+
# @return [String] the file format (e.g., 'parquet', 'orc')
|
|
101
214
|
def format
|
|
102
|
-
schema.extname[1
|
|
215
|
+
schema.extname[1..]
|
|
103
216
|
end
|
|
217
|
+
|
|
104
218
|
end
|
|
219
|
+
|
|
105
220
|
end
|
|
106
221
|
|
|
107
222
|
Database.register_extension(:make_readyable, MakeReadyable)
|
|
223
|
+
|
|
108
224
|
end
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sequel
|
|
4
|
+
|
|
5
|
+
# Provides additional SQL helper methods for database operations.
|
|
6
|
+
#
|
|
7
|
+
# The more_sql extension adds convenience methods for SQL operations that
|
|
8
|
+
# aren't covered by Sequel's core functionality, particularly schema-related
|
|
9
|
+
# operations like CREATE SCHEMA.
|
|
10
|
+
#
|
|
11
|
+
# @example Load the extension
|
|
12
|
+
# DB.extension :more_sql
|
|
13
|
+
#
|
|
14
|
+
# @example Create a schema
|
|
15
|
+
# DB.create_schema(:analytics)
|
|
16
|
+
# # Executes: CREATE SCHEMA "analytics"
|
|
17
|
+
#
|
|
18
|
+
# @example Create schema if it doesn't exist
|
|
19
|
+
# DB.create_schema(:staging, if_not_exists: true)
|
|
20
|
+
# # Executes: CREATE SCHEMA IF NOT EXISTS "staging"
|
|
21
|
+
module MoreSql
|
|
22
|
+
|
|
23
|
+
# Creates a database schema.
|
|
24
|
+
#
|
|
25
|
+
# Generates and executes a CREATE SCHEMA statement with optional
|
|
26
|
+
# IF NOT EXISTS clause for idempotent schema creation.
|
|
27
|
+
#
|
|
28
|
+
# @param schema_name [Symbol, String] The name of the schema to create
|
|
29
|
+
# @param opts [Hash] Options for schema creation
|
|
30
|
+
# @option opts [Boolean] :if_not_exists (false) Only create the schema if it doesn't already exist
|
|
31
|
+
#
|
|
32
|
+
# @return [nil]
|
|
33
|
+
#
|
|
34
|
+
# @example Basic schema creation
|
|
35
|
+
# DB.create_schema(:reports)
|
|
36
|
+
# # Executes: CREATE SCHEMA "reports"
|
|
37
|
+
#
|
|
38
|
+
# @example Idempotent schema creation
|
|
39
|
+
# DB.create_schema(:analytics, if_not_exists: true)
|
|
40
|
+
# # Executes: CREATE SCHEMA IF NOT EXISTS "analytics"
|
|
41
|
+
#
|
|
42
|
+
# @example With string schema name
|
|
43
|
+
# DB.create_schema('user_data')
|
|
44
|
+
# # Executes: CREATE SCHEMA "user_data"
|
|
45
|
+
def create_schema(schema_name, opts = {})
|
|
46
|
+
run(create_schema_sql(schema_name, opts))
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
private
|
|
50
|
+
|
|
51
|
+
# Generates the SQL for creating a schema.
|
|
52
|
+
#
|
|
53
|
+
# Builds a CREATE SCHEMA statement with proper identifier quoting
|
|
54
|
+
# and optional IF NOT EXISTS clause.
|
|
55
|
+
#
|
|
56
|
+
# @param schema_name [Symbol, String] The name of the schema to create
|
|
57
|
+
# @param opts [Hash] Options for schema creation
|
|
58
|
+
# @option opts [Boolean] :if_not_exists (false) Include IF NOT EXISTS clause
|
|
59
|
+
#
|
|
60
|
+
# @return [String] The CREATE SCHEMA SQL statement
|
|
61
|
+
#
|
|
62
|
+
# @example
|
|
63
|
+
# create_schema_sql(:test, if_not_exists: true)
|
|
64
|
+
# # => 'CREATE SCHEMA IF NOT EXISTS "test"'
|
|
65
|
+
def create_schema_sql(schema_name, opts)
|
|
66
|
+
sql = 'CREATE SCHEMA '
|
|
67
|
+
sql += 'IF NOT EXISTS ' if opts[:if_not_exists]
|
|
68
|
+
sql += literal(schema_name)
|
|
69
|
+
sql
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
Database.register_extension(:more_sql, MoreSql)
|
|
75
|
+
|
|
76
|
+
end
|