quill-sql 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2a0a484270d6c53d3aace8649706262abc4025f036b2fcbe8b65e9f0e15d017e
4
- data.tar.gz: 350c28eb59616ef2f23e23015fbb5c4909d01f90e218730e20b3cae48cd613ad
3
+ metadata.gz: 5b91688c1cc7cc762aabc68cf5a68306f0fca7b45fcdd8adfef93e845f2c1d1d
4
+ data.tar.gz: f69676f9e1f4f1046ddef72ae2a68c476f8dc2c28ad3cd856d05e68a4c72c768
5
5
  SHA512:
6
- metadata.gz: c7952deda10956bebd8bbb43e6f5a38f1f58f2a488e8ca2d250a2b68b5bde1701f869f27e2e661461a19a7538994f5b3ca6104f6df820fb72ef607c8016afeeb
7
- data.tar.gz: 94fddf190d5903b22b90d21f3f2f886592df35440d8babf4c499f59c8e57033834294acb02a8f6f4f1ec25484713981b489042540607cbba39cfd148a2691d0d
6
+ metadata.gz: 9541d51c08c50c7f03dc34358aba4b48635a2a6f2acdbcda78e6fbcdbdd85bb9bd02db4045398db13445266101576128fb7d121ffafdc5ddc718ab0d49f830df
7
+ data.tar.gz: 27006da9ca08d16c6ee34fb1c6fa1449b5e448ad140ed7b393c1b3f67c72a472711ec57e58e3eb227cc28ace871744b5f2d339a4cfdc1de4e6fd3f2ca37f6ddc
@@ -0,0 +1,210 @@
1
+ require 'uri'
2
+ require 'json'
3
+ require 'net/http'
4
+ require 'active_support/inflector'
5
+
6
+ module DatabricksHelper
7
+ DATABRICKS_DATATYPE_NAME_MAP = {
8
+ # Numeric Types
9
+ 'BIGINT' => 20, # int8
10
+ 'BINARY' => 17, # bytea
11
+ 'BOOLEAN' => 16, # bool
12
+ 'DATE' => 1082, # date
13
+ 'DECIMAL' => 1700, # numeric
14
+ 'DOUBLE' => 701, # float8
15
+ 'FLOAT' => 701, # float8
16
+ 'INT' => 23, # int4
17
+ 'INTERVAL' => 1186, # interval
18
+ 'VOID' => 2278, # void
19
+ 'SMALLINT' => 21, # int2
20
+ 'STRING' => 1043, # varchar
21
+ 'TIMESTAMP' => 1114, # timestamp
22
+ 'TIMESTAMP_NTZ' => 1114, # timestamp without timezone
23
+ 'TINYINT' => 21, # mapped to smallint (int2) as postgres has no tinyint
24
+
25
+ # Complex Types - mapping to closest PostgreSQL equivalents
26
+ 'ARRAY' => 2277, # anyarray
27
+ 'MAP' => 3802, # jsonb
28
+ 'STRUCT' => 3802, # jsonb
29
+ 'VARIANT' => 3802, # jsonb
30
+ 'OBJECT' => 3802 # jsonb
31
+ }.freeze
32
+
33
+ class << self
34
+ def connect_to_databricks(config)
35
+ warehouse_id = config[:path].split('/').last
36
+
37
+ {
38
+ host: config[:host],
39
+ warehouse_id: warehouse_id,
40
+ token: config[:token]
41
+ }
42
+ end
43
+
44
+ def disconnect_from_databricks(client)
45
+ # No-op as we're using REST API
46
+ end
47
+
48
+ def run_query_databricks(sql, client)
49
+ uri = URI("https://#{client[:host]}/api/2.0/sql/statements")
50
+ http = Net::HTTP.new(uri.host, uri.port)
51
+ http.use_ssl = true
52
+
53
+ request = Net::HTTP::Post.new(uri)
54
+ request['Authorization'] = "Bearer #{client[:token]}"
55
+ request['Content-Type'] = 'application/json'
56
+ request.body = JSON.generate({
57
+ statement: sql,
58
+ warehouse_id: client[:warehouse_id],
59
+ wait_timeout: '50s'
60
+ })
61
+
62
+ response = http.request(request)
63
+
64
+ raise "Query failed: #{response.body}" unless response.is_a?(Net::HTTPSuccess)
65
+
66
+ result = JSON.parse(response.body)
67
+
68
+ raise "Query state: #{result['status']['state']}" unless result['status']['state'] == 'SUCCEEDED'
69
+
70
+ columns = result['manifest']['schema']['columns']
71
+ column_names = columns.map { |col| col['name'] }
72
+ column_types = columns.map { |col| col['type_name'] }
73
+
74
+ json_like_types = ['ARRAY', 'MAP', 'STRUCT', 'JSON']
75
+
76
+ fields = columns.map do |column|
77
+ {
78
+ name: column['name'],
79
+ dataTypeID: DATABRICKS_DATATYPE_NAME_MAP[column['type_name']] || 1043 # default to varchar if type not found
80
+ }
81
+ end
82
+
83
+ # Handle empty results
84
+ rows = if result['result'].nil? || result['result']['data_array'].nil?
85
+ []
86
+ else
87
+ # Transform the data array into rows of hashes
88
+ result['result']['data_array'].map do |row_array|
89
+ row_hash = column_names.zip(row_array).to_h
90
+ transformed_row = {}
91
+
92
+ column_names.each_with_index do |key, idx|
93
+ value = row_hash[key]
94
+ type = column_types[idx]
95
+ # parse JSON if the field is a JSON type
96
+ transformed_value = if value.is_a?(String) && json_like_types.include?(type.upcase)
97
+ begin
98
+ parsed = JSON.parse(value)
99
+ parsed.is_a?(Array) ? parsed.map(&:to_s) : parsed
100
+ rescue JSON::ParserError
101
+ value
102
+ end
103
+ else
104
+ value
105
+ end
106
+ transformed_row[key] = transformed_value
107
+ end
108
+
109
+ transformed_row
110
+ end
111
+ end
112
+
113
+ {
114
+ fields: fields,
115
+ rows: rows
116
+ }
117
+ end
118
+
119
+ def get_schemas_databricks(client)
120
+ sql = <<~SQL
121
+ SELECT schema_name
122
+ FROM system.information_schema.schemata
123
+ WHERE schema_name != "information_schema"
124
+ SQL
125
+
126
+ results = run_query_databricks(sql, client)
127
+ results[:rows].map { |row| row['schema_name'] }
128
+ end
129
+
130
+ def get_tables_by_schema_databricks(client, schema_names)
131
+ all_tables = schema_names.flat_map do |schema|
132
+ sql = <<~SQL
133
+ SELECT
134
+ table_name as tableName,
135
+ table_schema as schemaName,
136
+ table_catalog as catalogName
137
+ FROM system.information_schema.tables
138
+ WHERE table_schema = '#{schema}'
139
+ SQL
140
+
141
+ results = run_query_databricks(sql, client)
142
+ results[:rows].map do |row|
143
+ {
144
+ tableName: row['tableName'],
145
+ schemaName: row['schemaName'],
146
+ catalogName: row['catalogName']
147
+ }
148
+ end
149
+ end
150
+
151
+ all_tables
152
+ end
153
+
154
+ def get_columns_by_table_databricks(client, schema_name, table_name)
155
+ sql = <<~SQL
156
+ SELECT
157
+ column_name as columnName
158
+ FROM system.information_schema.columns
159
+ WHERE table_schema = '#{schema_name}'
160
+ AND table_name = '#{table_name}'
161
+ ORDER BY ordinal_position
162
+ SQL
163
+
164
+ results = run_query_databricks(sql, client)
165
+ results[:rows].map { |row| row['columnName'] }
166
+ end
167
+
168
+ def get_foreign_keys_databricks(client, schema_name, table_name, primary_key)
169
+ # Databricks doesn't support foreign keys
170
+ []
171
+ end
172
+
173
+ def get_schema_column_info_databricks(client, schema_name, table_names)
174
+ table_names.map do |table|
175
+ sql = <<~SQL
176
+ SELECT
177
+ column_name as columnName,
178
+ data_type as dataType
179
+ FROM system.information_schema.columns
180
+ WHERE table_schema = '#{table[:schemaName]}'
181
+ AND table_name = '#{table[:tableName]}'
182
+ ORDER BY ordinal_position
183
+ SQL
184
+
185
+ results = run_query_databricks(sql, client)
186
+ {
187
+ tableName: "#{table[:schemaName]}.#{table[:tableName]}",
188
+ columns: results[:rows].map do |row|
189
+ data_type = row['dataType'].split('<')[0].upcase
190
+ {
191
+ columnName: row['columnName'],
192
+ displayName: row['columnName'],
193
+ fieldType: row['dataType'],
194
+ dataTypeID: DATABRICKS_DATATYPE_ID_MAP[data_type] || 1043
195
+ }
196
+ end
197
+ }
198
+ end
199
+ end
200
+
201
+ def format_databricks_config(connection_string)
202
+ parsed = URI(connection_string)
203
+ {
204
+ host: parsed.host,
205
+ path: parsed.path[1..], # Remove leading slash
206
+ token: parsed.password
207
+ }
208
+ end
209
+ end
210
+ end
data/lib/db/db_helper.rb CHANGED
@@ -2,9 +2,9 @@ require 'json'
2
2
  require 'uri'
3
3
  require_relative 'clickhouse'
4
4
  require_relative 'postgres'
5
-
5
+ require_relative 'databricks'
6
6
  module DatabaseHelper
7
- SUPPORTED_DATABASES = ['clickhouse', 'postgresql'].freeze
7
+ SUPPORTED_DATABASES = ['clickhouse', 'postgresql', 'databricks'].freeze
8
8
 
9
9
  class QuillQueryResults
10
10
  attr_reader :fields, :rows
@@ -23,6 +23,8 @@ module DatabaseHelper
23
23
  ClickHouseHelper.format_clickhouse_config(connection_string)
24
24
  when 'postgresql'
25
25
  PostgresHelper.format_postgres_config(connection_string)
26
+ when 'databricks'
27
+ DatabricksHelper.format_databricks_config(connection_string)
26
28
  else
27
29
  raise DatabaseError, "Invalid database type: #{database_type}"
28
30
  end
@@ -34,6 +36,8 @@ module DatabaseHelper
34
36
  ClickHouseHelper.connect_to_clickhouse(config)
35
37
  when 'postgresql'
36
38
  PostgresHelper.connect_to_postgres(config)
39
+ when 'databricks'
40
+ DatabricksHelper.connect_to_databricks(config)
37
41
  else
38
42
  raise DatabaseError, "Invalid database type: #{database_type}"
39
43
  end
@@ -57,6 +61,8 @@ module DatabaseHelper
57
61
  ClickHouseHelper.run_query_clickhouse(sql, connection)
58
62
  when 'postgresql'
59
63
  PostgresHelper.run_query_postgres(sql, connection)
64
+ when 'databricks'
65
+ DatabricksHelper.run_query_databricks(sql, connection)
60
66
  else
61
67
  raise DatabaseError, "Invalid database type: #{database_type}"
62
68
  end
@@ -74,6 +80,8 @@ module DatabaseHelper
74
80
  ClickHouseHelper.disconnect_from_clickhouse(database)
75
81
  when 'postgresql'
76
82
  PostgresHelper.disconnect_from_postgres(database)
83
+ when 'databricks'
84
+ DatabricksHelper.disconnect_from_databricks(database)
77
85
  end
78
86
  end
79
87
 
@@ -83,6 +91,8 @@ module DatabaseHelper
83
91
  ClickHouseHelper.get_schemas_clickhouse(connection)
84
92
  when 'postgresql'
85
93
  PostgresHelper.get_schemas_postgres(connection)
94
+ when 'databricks'
95
+ DatabricksHelper.get_schemas_databricks(connection)
86
96
  else
87
97
  raise DatabaseError, "Invalid database type: #{database_type}"
88
98
  end
@@ -94,6 +104,8 @@ module DatabaseHelper
94
104
  ClickHouseHelper.get_tables_by_schema_clickhouse(connection, schema_name)
95
105
  when 'postgresql'
96
106
  PostgresHelper.get_tables_by_schema_postgres(connection, schema_name)
107
+ when 'databricks'
108
+ DatabricksHelper.get_tables_by_schema_databricks(connection, schema_name)
97
109
  else
98
110
  raise DatabaseError, "Invalid database type: #{database_type}"
99
111
  end
@@ -105,6 +117,8 @@ module DatabaseHelper
105
117
  ClickHouseHelper.get_columns_by_table_clickhouse(connection, schema_name, table_name)
106
118
  when 'postgresql'
107
119
  PostgresHelper.get_columns_by_table_postgres(connection, schema_name, table_name)
120
+ when 'databricks'
121
+ DatabricksHelper.get_columns_by_table_databricks(connection, schema_name, table_name)
108
122
  else
109
123
  raise DatabaseError, "Invalid database type: #{database_type}"
110
124
  end
@@ -116,6 +130,8 @@ module DatabaseHelper
116
130
  ClickHouseHelper.get_foreign_keys_clickhouse(connection, schema_name, table_name, primary_key)
117
131
  when 'postgresql'
118
132
  PostgresHelper.get_foreign_keys_postgres(connection, schema_name, table_name, primary_key)
133
+ when 'databricks'
134
+ DatabricksHelper.get_foreign_keys_databricks(connection, schema_name, table_name, primary_key)
119
135
  else
120
136
  raise DatabaseError, "Invalid database type: #{database_type}"
121
137
  end
@@ -127,6 +143,8 @@ module DatabaseHelper
127
143
  ClickHouseHelper.get_schema_column_info_clickhouse(connection, schema_name, tables)
128
144
  when 'postgresql'
129
145
  PostgresHelper.get_schema_column_info_postgres(connection, schema_name, tables)
146
+ when 'databricks'
147
+ DatabricksHelper.get_schema_column_info_databricks(connection, schema_name, tables)
130
148
  else
131
149
  raise DatabaseError, "Invalid database type: #{database_type}"
132
150
  end
data/lib/db/postgres.rb CHANGED
@@ -41,25 +41,26 @@ module PostgresHelper
41
41
  }
42
42
  end
43
43
 
44
+ # Create a map of field names to their types
45
+ field_types = result.fields.map.with_index { |field, i| [field, result.ftype(i)] }.to_h
46
+ json_types = [114, 3802] # JSON and JSONB OIDs
47
+
44
48
  rows = result.values.map do |row|
45
49
  result.fields.zip(row).map do |field_name, value|
46
- # First convert all values to strings
47
- string_value = value.to_s
48
-
49
- # Then try to parse JSON if it looks like JSON
50
- parsed_value = begin
51
- if string_value.start_with?('[') || string_value.start_with?('{')
52
- json_value = JSON.parse(string_value)
50
+ # only parse JSON if the field is a JSON type
51
+ parsed_value = if json_types.include?(field_types[field_name])
52
+ begin
53
+ json_value = JSON.parse(value.to_s)
53
54
  if json_value.is_a?(Array)
54
55
  json_value.map(&:to_s)
55
56
  else
56
57
  json_value
57
58
  end
58
- else
59
- string_value
59
+ rescue JSON::ParserError
60
+ value
60
61
  end
61
- rescue JSON::ParserError
62
- string_value
62
+ else
63
+ value
63
64
  end
64
65
  [field_name, parsed_value]
65
66
  end.to_h
data/lib/quill-sql.rb CHANGED
@@ -14,10 +14,8 @@ Dotenv.load(File.expand_path('../.env', __dir__))
14
14
 
15
15
  module DatabaseType
16
16
  POSTGRESQL = 'postgresql'.freeze
17
- SNOWFLAKE = 'snowflake'.freeze
18
- BIGQUERY = 'bigquery'.freeze
19
- MYSQL = 'mysql'.freeze
20
17
  CLICKHOUSE = 'clickhouse'.freeze
18
+ DATABRICKS = 'databricks'.freeze
21
19
 
22
20
  def self.valid?(type)
23
21
  constants.map { |c| const_get(c) }.include?(type)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: quill-sql
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shawn Magee, Albert Yan
8
8
  - Sam Bishop
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-04-17 00:00:00.000000000 Z
11
+ date: 2025-05-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json
@@ -162,6 +162,7 @@ files:
162
162
  - lib/assets/pg_types.rb
163
163
  - lib/db/cached_connection.rb
164
164
  - lib/db/clickhouse.rb
165
+ - lib/db/databricks.rb
165
166
  - lib/db/db_helper.rb
166
167
  - lib/db/postgres.rb
167
168
  - lib/models/filters.rb