quill-sql 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/db/databricks.rb +207 -0
- data/lib/db/db_helper.rb +20 -2
- data/lib/db/postgres.rb +60 -25
- data/lib/quill-sql.rb +1 -3
- metadata +17 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b3d503d9223878a246289559453c27d45e4393620f9b548433ea5c072fc4c396
|
4
|
+
data.tar.gz: 1820f9f817ab7a29c947555a564e6981b283486cb0cdd0f65396cd01aaaecffd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b25af374393d2d9f6bc083f7cb426f631802df7582c0e4bc540c574e54123884e723030ca879dc9c937da21e40e6047be35c9ee59c001ee9bce78339db353ed9
|
7
|
+
data.tar.gz: 7a3251a382f48867d20e7600a4d00f69b95963a435328a4ae3348505ceeb1fe9671834b2715e0f3545f53cee507bad086d87f1acf75150dde218a9005d758d80
|
@@ -0,0 +1,207 @@
|
|
1
|
+
require 'uri'
|
2
|
+
require 'json'
|
3
|
+
require 'net/http'
|
4
|
+
require 'active_support/inflector'
|
5
|
+
|
6
|
+
module DatabricksHelper
|
7
|
+
DATABRICKS_DATATYPE_NAME_MAP = {
|
8
|
+
# Numeric Types
|
9
|
+
'BIGINT' => 20, # int8
|
10
|
+
'BINARY' => 17, # bytea
|
11
|
+
'BOOLEAN' => 16, # bool
|
12
|
+
'DATE' => 1082, # date
|
13
|
+
'DECIMAL' => 1700, # numeric
|
14
|
+
'DOUBLE' => 701, # float8
|
15
|
+
'FLOAT' => 701, # float8
|
16
|
+
'INT' => 23, # int4
|
17
|
+
'INTERVAL' => 1186, # interval
|
18
|
+
'VOID' => 2278, # void
|
19
|
+
'SMALLINT' => 21, # int2
|
20
|
+
'STRING' => 1043, # varchar
|
21
|
+
'TIMESTAMP' => 1114, # timestamp
|
22
|
+
'TIMESTAMP_NTZ' => 1114, # timestamp without timezone
|
23
|
+
'TINYINT' => 21, # mapped to smallint (int2) as postgres has no tinyint
|
24
|
+
|
25
|
+
# Complex Types - mapping to closest PostgreSQL equivalents
|
26
|
+
'ARRAY' => 2277, # anyarray
|
27
|
+
'MAP' => 3802, # jsonb
|
28
|
+
'STRUCT' => 3802, # jsonb
|
29
|
+
'VARIANT' => 3802, # jsonb
|
30
|
+
'OBJECT' => 3802 # jsonb
|
31
|
+
}.freeze
|
32
|
+
|
33
|
+
class << self
|
34
|
+
def connect_to_databricks(config)
|
35
|
+
warehouse_id = config[:path].split('/').last
|
36
|
+
|
37
|
+
{
|
38
|
+
host: config[:host],
|
39
|
+
warehouse_id: warehouse_id,
|
40
|
+
token: config[:token]
|
41
|
+
}
|
42
|
+
end
|
43
|
+
|
44
|
+
def disconnect_from_databricks(client)
|
45
|
+
# No-op as we're using REST API
|
46
|
+
end
|
47
|
+
|
48
|
+
def run_query_databricks(sql, client)
|
49
|
+
uri = URI("https://#{client[:host]}/api/2.0/sql/statements")
|
50
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
51
|
+
http.use_ssl = true
|
52
|
+
|
53
|
+
request = Net::HTTP::Post.new(uri)
|
54
|
+
request['Authorization'] = "Bearer #{client[:token]}"
|
55
|
+
request['Content-Type'] = 'application/json'
|
56
|
+
request.body = JSON.generate({
|
57
|
+
statement: sql,
|
58
|
+
warehouse_id: client[:warehouse_id],
|
59
|
+
wait_timeout: '50s'
|
60
|
+
})
|
61
|
+
|
62
|
+
response = http.request(request)
|
63
|
+
|
64
|
+
raise "Query failed: #{response.body}" unless response.is_a?(Net::HTTPSuccess)
|
65
|
+
|
66
|
+
result = JSON.parse(response.body)
|
67
|
+
|
68
|
+
raise "Query state: #{result['status']['state']}" unless result['status']['state'] == 'SUCCEEDED'
|
69
|
+
|
70
|
+
columns = result['manifest']['schema']['columns']
|
71
|
+
column_names = columns.map { |col| col['name'] }
|
72
|
+
column_types = columns.map { |col| col['type_name'] }
|
73
|
+
|
74
|
+
json_like_types = ['ARRAY', 'MAP', 'STRUCT', 'JSON']
|
75
|
+
|
76
|
+
|
77
|
+
fields = columns.map do |column|
|
78
|
+
{
|
79
|
+
name: column['name'],
|
80
|
+
dataTypeID: DATABRICKS_DATATYPE_NAME_MAP[column['type_name']] || 1043 # default to varchar if type not found
|
81
|
+
}
|
82
|
+
end
|
83
|
+
|
84
|
+
# Transform the data array into rows of hashes
|
85
|
+
rows = result['result']['data_array'].map do |row_array|
|
86
|
+
row_hash = column_names.zip(row_array).to_h
|
87
|
+
transformed_row = {}
|
88
|
+
|
89
|
+
column_names.each_with_index do |key, idx|
|
90
|
+
value = row_hash[key]
|
91
|
+
type = column_types[idx]
|
92
|
+
# parse JSON if the field is a JSON type
|
93
|
+
transformed_value = if value.is_a?(String) && json_like_types.include?(type.upcase)
|
94
|
+
begin
|
95
|
+
parsed = JSON.parse(value)
|
96
|
+
parsed.is_a?(Array) ? parsed.map(&:to_s) : parsed
|
97
|
+
rescue JSON::ParserError
|
98
|
+
value
|
99
|
+
end
|
100
|
+
else
|
101
|
+
value
|
102
|
+
end
|
103
|
+
|
104
|
+
transformed_row[key] = transformed_value
|
105
|
+
end
|
106
|
+
|
107
|
+
transformed_row
|
108
|
+
end
|
109
|
+
|
110
|
+
{
|
111
|
+
fields: fields,
|
112
|
+
rows: rows
|
113
|
+
}
|
114
|
+
end
|
115
|
+
|
116
|
+
def get_schemas_databricks(client)
|
117
|
+
sql = <<~SQL
|
118
|
+
SELECT schema_name
|
119
|
+
FROM system.information_schema.schemata
|
120
|
+
WHERE schema_name != "information_schema"
|
121
|
+
SQL
|
122
|
+
|
123
|
+
results = run_query_databricks(sql, client)
|
124
|
+
results[:rows].map { |row| row['schema_name'] }
|
125
|
+
end
|
126
|
+
|
127
|
+
def get_tables_by_schema_databricks(client, schema_names)
|
128
|
+
all_tables = schema_names.flat_map do |schema|
|
129
|
+
sql = <<~SQL
|
130
|
+
SELECT
|
131
|
+
table_name as tableName,
|
132
|
+
table_schema as schemaName,
|
133
|
+
table_catalog as catalogName
|
134
|
+
FROM system.information_schema.tables
|
135
|
+
WHERE table_schema = '#{schema}'
|
136
|
+
SQL
|
137
|
+
|
138
|
+
results = run_query_databricks(sql, client)
|
139
|
+
results[:rows].map do |row|
|
140
|
+
{
|
141
|
+
tableName: row['tableName'],
|
142
|
+
schemaName: row['schemaName'],
|
143
|
+
catalogName: row['catalogName']
|
144
|
+
}
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
all_tables
|
149
|
+
end
|
150
|
+
|
151
|
+
def get_columns_by_table_databricks(client, schema_name, table_name)
|
152
|
+
sql = <<~SQL
|
153
|
+
SELECT
|
154
|
+
column_name as columnName
|
155
|
+
FROM system.information_schema.columns
|
156
|
+
WHERE table_schema = '#{schema_name}'
|
157
|
+
AND table_name = '#{table_name}'
|
158
|
+
ORDER BY ordinal_position
|
159
|
+
SQL
|
160
|
+
|
161
|
+
results = run_query_databricks(sql, client)
|
162
|
+
results[:rows].map { |row| row['columnName'] }
|
163
|
+
end
|
164
|
+
|
165
|
+
def get_foreign_keys_databricks(client, schema_name, table_name, primary_key)
|
166
|
+
# Databricks doesn't support foreign keys
|
167
|
+
[]
|
168
|
+
end
|
169
|
+
|
170
|
+
def get_schema_column_info_databricks(client, schema_name, table_names)
|
171
|
+
table_names.map do |table|
|
172
|
+
sql = <<~SQL
|
173
|
+
SELECT
|
174
|
+
column_name as columnName,
|
175
|
+
data_type as dataType
|
176
|
+
FROM system.information_schema.columns
|
177
|
+
WHERE table_schema = '#{table[:schemaName]}'
|
178
|
+
AND table_name = '#{table[:tableName]}'
|
179
|
+
ORDER BY ordinal_position
|
180
|
+
SQL
|
181
|
+
|
182
|
+
results = run_query_databricks(sql, client)
|
183
|
+
{
|
184
|
+
tableName: "#{table[:schemaName]}.#{table[:tableName]}",
|
185
|
+
columns: results[:rows].map do |row|
|
186
|
+
data_type = row['dataType'].split('<')[0].upcase
|
187
|
+
{
|
188
|
+
columnName: row['columnName'],
|
189
|
+
displayName: row['columnName'],
|
190
|
+
fieldType: row['dataType'],
|
191
|
+
dataTypeID: DATABRICKS_DATATYPE_ID_MAP[data_type] || 1043
|
192
|
+
}
|
193
|
+
end
|
194
|
+
}
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
def format_databricks_config(connection_string)
|
199
|
+
parsed = URI(connection_string)
|
200
|
+
{
|
201
|
+
host: parsed.host,
|
202
|
+
path: parsed.path[1..], # Remove leading slash
|
203
|
+
token: parsed.password
|
204
|
+
}
|
205
|
+
end
|
206
|
+
end
|
207
|
+
end
|
data/lib/db/db_helper.rb
CHANGED
@@ -2,9 +2,9 @@ require 'json'
|
|
2
2
|
require 'uri'
|
3
3
|
require_relative 'clickhouse'
|
4
4
|
require_relative 'postgres'
|
5
|
-
|
5
|
+
require_relative 'databricks'
|
6
6
|
module DatabaseHelper
|
7
|
-
SUPPORTED_DATABASES = ['clickhouse', 'postgresql'].freeze
|
7
|
+
SUPPORTED_DATABASES = ['clickhouse', 'postgresql', 'databricks'].freeze
|
8
8
|
|
9
9
|
class QuillQueryResults
|
10
10
|
attr_reader :fields, :rows
|
@@ -23,6 +23,8 @@ module DatabaseHelper
|
|
23
23
|
ClickHouseHelper.format_clickhouse_config(connection_string)
|
24
24
|
when 'postgresql'
|
25
25
|
PostgresHelper.format_postgres_config(connection_string)
|
26
|
+
when 'databricks'
|
27
|
+
DatabricksHelper.format_databricks_config(connection_string)
|
26
28
|
else
|
27
29
|
raise DatabaseError, "Invalid database type: #{database_type}"
|
28
30
|
end
|
@@ -34,6 +36,8 @@ module DatabaseHelper
|
|
34
36
|
ClickHouseHelper.connect_to_clickhouse(config)
|
35
37
|
when 'postgresql'
|
36
38
|
PostgresHelper.connect_to_postgres(config)
|
39
|
+
when 'databricks'
|
40
|
+
DatabricksHelper.connect_to_databricks(config)
|
37
41
|
else
|
38
42
|
raise DatabaseError, "Invalid database type: #{database_type}"
|
39
43
|
end
|
@@ -57,6 +61,8 @@ module DatabaseHelper
|
|
57
61
|
ClickHouseHelper.run_query_clickhouse(sql, connection)
|
58
62
|
when 'postgresql'
|
59
63
|
PostgresHelper.run_query_postgres(sql, connection)
|
64
|
+
when 'databricks'
|
65
|
+
DatabricksHelper.run_query_databricks(sql, connection)
|
60
66
|
else
|
61
67
|
raise DatabaseError, "Invalid database type: #{database_type}"
|
62
68
|
end
|
@@ -74,6 +80,8 @@ module DatabaseHelper
|
|
74
80
|
ClickHouseHelper.disconnect_from_clickhouse(database)
|
75
81
|
when 'postgresql'
|
76
82
|
PostgresHelper.disconnect_from_postgres(database)
|
83
|
+
when 'databricks'
|
84
|
+
DatabricksHelper.disconnect_from_databricks(database)
|
77
85
|
end
|
78
86
|
end
|
79
87
|
|
@@ -83,6 +91,8 @@ module DatabaseHelper
|
|
83
91
|
ClickHouseHelper.get_schemas_clickhouse(connection)
|
84
92
|
when 'postgresql'
|
85
93
|
PostgresHelper.get_schemas_postgres(connection)
|
94
|
+
when 'databricks'
|
95
|
+
DatabricksHelper.get_schemas_databricks(connection)
|
86
96
|
else
|
87
97
|
raise DatabaseError, "Invalid database type: #{database_type}"
|
88
98
|
end
|
@@ -94,6 +104,8 @@ module DatabaseHelper
|
|
94
104
|
ClickHouseHelper.get_tables_by_schema_clickhouse(connection, schema_name)
|
95
105
|
when 'postgresql'
|
96
106
|
PostgresHelper.get_tables_by_schema_postgres(connection, schema_name)
|
107
|
+
when 'databricks'
|
108
|
+
DatabricksHelper.get_tables_by_schema_databricks(connection, schema_name)
|
97
109
|
else
|
98
110
|
raise DatabaseError, "Invalid database type: #{database_type}"
|
99
111
|
end
|
@@ -105,6 +117,8 @@ module DatabaseHelper
|
|
105
117
|
ClickHouseHelper.get_columns_by_table_clickhouse(connection, schema_name, table_name)
|
106
118
|
when 'postgresql'
|
107
119
|
PostgresHelper.get_columns_by_table_postgres(connection, schema_name, table_name)
|
120
|
+
when 'databricks'
|
121
|
+
DatabricksHelper.get_columns_by_table_databricks(connection, schema_name, table_name)
|
108
122
|
else
|
109
123
|
raise DatabaseError, "Invalid database type: #{database_type}"
|
110
124
|
end
|
@@ -116,6 +130,8 @@ module DatabaseHelper
|
|
116
130
|
ClickHouseHelper.get_foreign_keys_clickhouse(connection, schema_name, table_name, primary_key)
|
117
131
|
when 'postgresql'
|
118
132
|
PostgresHelper.get_foreign_keys_postgres(connection, schema_name, table_name, primary_key)
|
133
|
+
when 'databricks'
|
134
|
+
DatabricksHelper.get_foreign_keys_databricks(connection, schema_name, table_name, primary_key)
|
119
135
|
else
|
120
136
|
raise DatabaseError, "Invalid database type: #{database_type}"
|
121
137
|
end
|
@@ -127,6 +143,8 @@ module DatabaseHelper
|
|
127
143
|
ClickHouseHelper.get_schema_column_info_clickhouse(connection, schema_name, tables)
|
128
144
|
when 'postgresql'
|
129
145
|
PostgresHelper.get_schema_column_info_postgres(connection, schema_name, tables)
|
146
|
+
when 'databricks'
|
147
|
+
DatabricksHelper.get_schema_column_info_databricks(connection, schema_name, tables)
|
130
148
|
else
|
131
149
|
raise DatabaseError, "Invalid database type: #{database_type}"
|
132
150
|
end
|
data/lib/db/postgres.rb
CHANGED
@@ -2,42 +2,77 @@ require 'pg'
|
|
2
2
|
require 'json'
|
3
3
|
require 'uri'
|
4
4
|
require 'active_support/inflector'
|
5
|
+
require 'connection_pool'
|
5
6
|
|
6
7
|
module PostgresHelper
|
7
8
|
class << self
|
8
9
|
def connect_to_postgres(config)
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
10
|
+
# Create a connection pool that matches Puma's thread count
|
11
|
+
ConnectionPool.new(size: 5, timeout: 5) do
|
12
|
+
PG.connect(
|
13
|
+
host: config[:host],
|
14
|
+
port: config[:port],
|
15
|
+
dbname: config[:database],
|
16
|
+
user: config[:username],
|
17
|
+
password: config[:password],
|
18
|
+
sslmode: config[:sslmode] || 'prefer',
|
19
|
+
gssencmode: 'disable', # Disable GSSAPI encryption
|
20
|
+
krbsrvname: nil, # Disable Kerberos service name
|
21
|
+
target_session_attrs: 'read-write'
|
22
|
+
)
|
23
|
+
end
|
21
24
|
end
|
22
25
|
|
23
|
-
def disconnect_from_postgres(
|
24
|
-
|
26
|
+
def disconnect_from_postgres(pool)
|
27
|
+
return unless pool.respond_to?(:shutdown)
|
28
|
+
pool.shutdown { |conn| conn.close if conn.respond_to?(:close) }
|
29
|
+
rescue PG::Error => e
|
30
|
+
puts "Error closing connection pool: #{e.message}"
|
25
31
|
end
|
26
32
|
|
27
|
-
def run_query_postgres(sql,
|
28
|
-
|
29
|
-
|
30
|
-
|
33
|
+
def run_query_postgres(sql, pool)
|
34
|
+
pool.with do |client|
|
35
|
+
result = client.exec(sql)
|
36
|
+
|
37
|
+
fields = result.fields.map do |field|
|
38
|
+
{
|
39
|
+
name: field,
|
40
|
+
dataTypeID: result.ftype(result.fields.index(field))
|
41
|
+
}
|
42
|
+
end
|
43
|
+
|
44
|
+
# Create a map of field names to their types
|
45
|
+
field_types = result.fields.map.with_index { |field, i| [field, result.ftype(i)] }.to_h
|
46
|
+
json_types = [114, 3802] # JSON and JSONB OIDs
|
47
|
+
|
48
|
+
rows = result.values.map do |row|
|
49
|
+
result.fields.zip(row).map do |field_name, value|
|
50
|
+
# only parse JSON if the field is a JSON type
|
51
|
+
parsed_value = if json_types.include?(field_types[field_name])
|
52
|
+
begin
|
53
|
+
json_value = JSON.parse(value.to_s)
|
54
|
+
if json_value.is_a?(Array)
|
55
|
+
json_value.map(&:to_s)
|
56
|
+
else
|
57
|
+
json_value
|
58
|
+
end
|
59
|
+
rescue JSON::ParserError
|
60
|
+
value
|
61
|
+
end
|
62
|
+
else
|
63
|
+
value
|
64
|
+
end
|
65
|
+
[field_name, parsed_value]
|
66
|
+
end.to_h
|
67
|
+
end
|
68
|
+
|
31
69
|
{
|
32
|
-
|
33
|
-
|
70
|
+
fields: fields,
|
71
|
+
rows: rows
|
34
72
|
}
|
73
|
+
ensure
|
74
|
+
result&.clear if result.respond_to?(:clear)
|
35
75
|
end
|
36
|
-
|
37
|
-
{
|
38
|
-
fields: fields,
|
39
|
-
rows: result.values.map { |row| result.fields.zip(row).to_h }
|
40
|
-
}
|
41
76
|
end
|
42
77
|
|
43
78
|
def get_schemas_postgres(client)
|
data/lib/quill-sql.rb
CHANGED
@@ -14,10 +14,8 @@ Dotenv.load(File.expand_path('../.env', __dir__))
|
|
14
14
|
|
15
15
|
module DatabaseType
|
16
16
|
POSTGRESQL = 'postgresql'.freeze
|
17
|
-
SNOWFLAKE = 'snowflake'.freeze
|
18
|
-
BIGQUERY = 'bigquery'.freeze
|
19
|
-
MYSQL = 'mysql'.freeze
|
20
17
|
CLICKHOUSE = 'clickhouse'.freeze
|
18
|
+
DATABRICKS = 'databricks'.freeze
|
21
19
|
|
22
20
|
def self.valid?(type)
|
23
21
|
constants.map { |c| const_get(c) }.include?(type)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: quill-sql
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shawn Magee, Albert Yan
|
8
8
|
- Sam Bishop
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-
|
11
|
+
date: 2025-05-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|
@@ -94,6 +94,20 @@ dependencies:
|
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: connection_pool
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
97
111
|
- !ruby/object:Gem::Dependency
|
98
112
|
name: rubocop
|
99
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -148,6 +162,7 @@ files:
|
|
148
162
|
- lib/assets/pg_types.rb
|
149
163
|
- lib/db/cached_connection.rb
|
150
164
|
- lib/db/clickhouse.rb
|
165
|
+
- lib/db/databricks.rb
|
151
166
|
- lib/db/db_helper.rb
|
152
167
|
- lib/db/postgres.rb
|
153
168
|
- lib/models/filters.rb
|