dwh 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +7 -0
  2. data/.rubocop.yml +36 -0
  3. data/CHANGELOG.md +5 -0
  4. data/LICENSE +21 -0
  5. data/README.md +130 -0
  6. data/Rakefile +42 -0
  7. data/docs/DWH/Adapters/Adapter.html +3053 -0
  8. data/docs/DWH/Adapters/Athena.html +1704 -0
  9. data/docs/DWH/Adapters/Boolean.html +121 -0
  10. data/docs/DWH/Adapters/Druid.html +1626 -0
  11. data/docs/DWH/Adapters/DuckDb.html +2012 -0
  12. data/docs/DWH/Adapters/MySql.html +1704 -0
  13. data/docs/DWH/Adapters/OpenAuthorizable/ClassMethods.html +265 -0
  14. data/docs/DWH/Adapters/OpenAuthorizable.html +1102 -0
  15. data/docs/DWH/Adapters/Postgres.html +2000 -0
  16. data/docs/DWH/Adapters/Snowflake.html +1662 -0
  17. data/docs/DWH/Adapters/SqlServer.html +2084 -0
  18. data/docs/DWH/Adapters/Trino.html +1835 -0
  19. data/docs/DWH/Adapters.html +129 -0
  20. data/docs/DWH/AuthenticationError.html +142 -0
  21. data/docs/DWH/Behaviors.html +767 -0
  22. data/docs/DWH/Capabilities.html +748 -0
  23. data/docs/DWH/Column.html +1115 -0
  24. data/docs/DWH/ConfigError.html +143 -0
  25. data/docs/DWH/ConnectionError.html +143 -0
  26. data/docs/DWH/DWHError.html +138 -0
  27. data/docs/DWH/ExecutionError.html +143 -0
  28. data/docs/DWH/Factory.html +1133 -0
  29. data/docs/DWH/Functions/Arrays.html +505 -0
  30. data/docs/DWH/Functions/Dates.html +1644 -0
  31. data/docs/DWH/Functions/ExtractDatePart.html +804 -0
  32. data/docs/DWH/Functions/Nulls.html +377 -0
  33. data/docs/DWH/Functions.html +846 -0
  34. data/docs/DWH/Logger.html +258 -0
  35. data/docs/DWH/OAuthError.html +138 -0
  36. data/docs/DWH/Settings.html +658 -0
  37. data/docs/DWH/StreamingStats.html +804 -0
  38. data/docs/DWH/Table.html +1260 -0
  39. data/docs/DWH/TableStats.html +583 -0
  40. data/docs/DWH/TokenExpiredError.html +142 -0
  41. data/docs/DWH/UnsupportedCapability.html +135 -0
  42. data/docs/DWH.html +220 -0
  43. data/docs/_index.html +471 -0
  44. data/docs/class_list.html +54 -0
  45. data/docs/css/common.css +1 -0
  46. data/docs/css/full_list.css +58 -0
  47. data/docs/css/style.css +503 -0
  48. data/docs/file.README.html +210 -0
  49. data/docs/file.adapters.html +514 -0
  50. data/docs/file.creating-adapters.html +497 -0
  51. data/docs/file.getting-started.html +288 -0
  52. data/docs/file.usage.html +446 -0
  53. data/docs/file_list.html +79 -0
  54. data/docs/frames.html +22 -0
  55. data/docs/guides/adapters.md +445 -0
  56. data/docs/guides/creating-adapters.md +430 -0
  57. data/docs/guides/getting-started.md +225 -0
  58. data/docs/guides/usage.md +378 -0
  59. data/docs/index.html +210 -0
  60. data/docs/js/app.js +344 -0
  61. data/docs/js/full_list.js +242 -0
  62. data/docs/js/jquery.js +4 -0
  63. data/docs/method_list.html +2038 -0
  64. data/docs/top-level-namespace.html +110 -0
  65. data/lib/dwh/adapters/athena.rb +359 -0
  66. data/lib/dwh/adapters/druid.rb +267 -0
  67. data/lib/dwh/adapters/duck_db.rb +235 -0
  68. data/lib/dwh/adapters/my_sql.rb +235 -0
  69. data/lib/dwh/adapters/open_authorizable.rb +215 -0
  70. data/lib/dwh/adapters/postgres.rb +250 -0
  71. data/lib/dwh/adapters/snowflake.rb +489 -0
  72. data/lib/dwh/adapters/sql_server.rb +257 -0
  73. data/lib/dwh/adapters/trino.rb +213 -0
  74. data/lib/dwh/adapters.rb +363 -0
  75. data/lib/dwh/behaviors.rb +67 -0
  76. data/lib/dwh/capabilities.rb +39 -0
  77. data/lib/dwh/column.rb +79 -0
  78. data/lib/dwh/errors.rb +29 -0
  79. data/lib/dwh/factory.rb +125 -0
  80. data/lib/dwh/functions/arrays.rb +42 -0
  81. data/lib/dwh/functions/dates.rb +162 -0
  82. data/lib/dwh/functions/extract_date_part.rb +70 -0
  83. data/lib/dwh/functions/nulls.rb +31 -0
  84. data/lib/dwh/functions.rb +86 -0
  85. data/lib/dwh/logger.rb +50 -0
  86. data/lib/dwh/settings/athena.yml +77 -0
  87. data/lib/dwh/settings/base.yml +81 -0
  88. data/lib/dwh/settings/databricks.yml +51 -0
  89. data/lib/dwh/settings/druid.yml +59 -0
  90. data/lib/dwh/settings/duckdb.yml +44 -0
  91. data/lib/dwh/settings/mysql.yml +67 -0
  92. data/lib/dwh/settings/postgres.yml +30 -0
  93. data/lib/dwh/settings/redshift.yml +52 -0
  94. data/lib/dwh/settings/snowflake.yml +45 -0
  95. data/lib/dwh/settings/sqlserver.yml +80 -0
  96. data/lib/dwh/settings/trino.yml +77 -0
  97. data/lib/dwh/settings.rb +79 -0
  98. data/lib/dwh/streaming_stats.rb +69 -0
  99. data/lib/dwh/table.rb +105 -0
  100. data/lib/dwh/table_stats.rb +51 -0
  101. data/lib/dwh/version.rb +5 -0
  102. data/lib/dwh.rb +54 -0
  103. data/sig/dwh.rbs +4 -0
  104. metadata +231 -0
@@ -0,0 +1,257 @@
1
+ module DWH
2
+ module Adapters
3
+ # Microsoft SQL Server adapter. This adapter requires the {https://github.com/rails-sqlserver/tiny_tds tiny_tds}
4
+ # gem. For Mac OS and Linux, you will need to instal FreeTDS and most likely needs OpenSSL. Please follow the
5
+ # the instructions there before using the adapter.
6
+ #
7
+ # Create and adatper instance using {DWH::Factory#create DWH.create}.
8
+ #
9
+ # @example Basic connection with required only options
10
+ # DWH.create(:sqlserver, {host: 'localhost', database: 'my_db', username: 'sa'})
11
+ #
12
+ # @example Connect to Azuer SQL Server
13
+ # DWH.create(:sqlserver, {host: 'localhost', database: 'my_db', username: 'sa', azure: true})
14
+ # @example Connection sending custom application name
15
+ # DWH.create(:sqlserver, {host: 'localhost', database: 'my_db', username: 'sa', client_name: 'Strata CLI'})
16
+ #
17
+ # @example Pass extra connection params
18
+ # DWH.create(:sqlserver, {host: 'localhost', database: 'my_db',
19
+ # username: 'sa', client_name: 'Strata CLI',
20
+ # extra_connection_params: {
21
+ # container: true,
22
+ # use_utf16: false
23
+ # })
24
+ #
25
+ # @example fetch tables in database
26
+ # adapter.tables
27
+ #
28
+ # @example fetch tables from another database
29
+ # adapter.tables(catalog: 'other_db')
30
+ #
31
+ # @example get table metadata for table in another db
32
+ # adapter.metadata('other_db.dbo.my_table') or adapter.metadata('my_table', catalog: 'other_db')
33
+ class SqlServer < Adapter
34
+ config :host, String, required: true, message: 'server host ip address or domain name'
35
+ config :port, Integer, required: false, default: 1433, message: 'port to connect to'
36
+ config :database, String, required: true, message: 'name of database to connect to'
37
+ config :username, String, required: true, message: 'connection username'
38
+ config :password, String, required: false, default: nil, message: 'connection password'
39
+ config :query_timeout, String, required: false, default: 3600, message: 'query execution timeout in seconds'
40
+ config :client_name, String, required: false, default: 'DWH Ruby Gem', message: 'The name of the connecting app'
41
+ config :azure, Boolean, required: false, default: false, message: 'signal whether this is an azure connection'
42
+
43
+ # (see Adapter#connection)
44
+ def connection
45
+ return @connection if @connection
46
+
47
+ properties = {
48
+ host: config[:host],
49
+ port: config[:port],
50
+ database: config[:database],
51
+ username: config[:username],
52
+ password: config[:password],
53
+ appname: config[:client_name],
54
+ timeout: config[:query_timeout],
55
+ azure: config[:azure]
56
+ }.merge(extra_connection_params)
57
+
58
+ @connection = TinyTds::Client.new(**properties)
59
+
60
+ @connection
61
+ rescue StandardError => e
62
+ raise ConfigError, e.message
63
+ end
64
+
65
+ # (see Adapter#test_connection)
66
+ def test_connection(raise_exception: false)
67
+ connection
68
+ true
69
+ rescue StandardError => e
70
+ raise ConnectionError, e.message if raise_exception
71
+
72
+ false
73
+ end
74
+
75
+ # (see Adapter#tables)
76
+ def tables(**qualifiers)
77
+ change_current_database(qualifiers[:catalog])
78
+ table_catalog = qualifiers[:catalog] || config[:database]
79
+ table_schema_where = qualifiers[:schema] ? " AND table_schema = '#{qualifiers[:schema]}'" : ''
80
+
81
+ sql = <<~SQL
82
+ SELECT table_name
83
+ FROM information_schema.tables
84
+ WHERE table_catalog = '#{table_catalog}'
85
+ #{table_schema_where}
86
+ SQL
87
+
88
+ res = execute(sql)
89
+ res.flatten
90
+ ensure
91
+ reset_current_database
92
+ end
93
+
94
+ # Changes the default database to the one specified here.
95
+ # will store the default db in @current_database attr.
96
+ # @param catalog [String] new database to use
97
+ def change_current_database(catalog = nil)
98
+ return unless catalog && catalog != config[:database]
99
+
100
+ @current_database = catalog
101
+ use(catalog)
102
+ end
103
+
104
+ # Resets the default database to the configured one
105
+ # if it was changed
106
+ def reset_current_database
107
+ return if @current_database.nil? || @current_database == config[:database]
108
+
109
+ use(config[:database])
110
+ end
111
+
112
+ # (see Adapter#table?)
113
+ def table?(table_name)
114
+ tables.include?(table_name)
115
+ end
116
+
117
+ # (see Adapter#stats)
118
+ def stats(table, date_column: nil, **qualifiers)
119
+ change_current_database(qualifiers[:catalog])
120
+ table_name = qualifiers[:schema] ? "#{qualifiers[:schema]}.#{table}" : table
121
+ sql = <<-SQL
122
+ SELECT count(*) ROW_COUNT
123
+ #{date_column.nil? ? nil : ", min(#{date_column}) DATE_START"}
124
+ #{date_column.nil? ? nil : ", max(#{date_column}) DATE_END"}
125
+ FROM #{quote(table_name)}
126
+ SQL
127
+
128
+ result = connection.execute(sql)
129
+ row = result.to_a(empty_sets: true).first
130
+ TableStats.new(
131
+ row_count: row['ROW_COUNT'],
132
+ date_start: row['DATE_START'],
133
+ date_end: row['DATE_END']
134
+ )
135
+ ensure
136
+ reset_current_database
137
+ end
138
+
139
+ # (see Adapter#metadata)
140
+ def metadata(table, **qualifiers)
141
+ db_table = Table.new table, **qualifiers
142
+ change_current_database(db_table.catalog)
143
+
144
+ schema_where = ''
145
+ schema_where = "AND table_schema = '#{db_table.schema}'" if db_table.schema.present?
146
+
147
+ sql = <<-SQL
148
+ SELECT column_name, data_type, character_maximum_length, numeric_precision,numeric_scale
149
+ FROM information_schema.columns
150
+ WHERE table_name = '#{db_table.physical_name}'
151
+ #{schema_where}
152
+ SQL
153
+ cols = execute(sql, format: 'object')
154
+ cols.each do |col|
155
+ db_table << Column.new(
156
+ name: col['column_name'],
157
+ data_type: col['data_type'],
158
+ precision: col['numeric_precision'],
159
+ scale: col['numeric_scale'],
160
+ max_char_length: col['character_maximum_length']
161
+ )
162
+ end
163
+
164
+ db_table
165
+ ensure
166
+ reset_current_database
167
+ end
168
+
169
+ # (see Adapter#execute)
170
+ def execute(sql, format: :array, retries: 0)
171
+ result = with_debug(sql) { with_retry(retries) { connection.execute(sql) } }
172
+
173
+ format = format.downcase if format.is_a?(String)
174
+ case format.to_sym
175
+ when :array
176
+ result.to_a(as: :array, empty_sets: true, timezone: :utc)
177
+ when :object
178
+ result.to_a(as: :hash, empty_sets: true, timezone: :utc)
179
+ when :csv
180
+ result_to_csv(result)
181
+ when :native
182
+ result
183
+ else
184
+ raise UnsupportedCapability, "Unsupported format: #{format} for this #{name}"
185
+ end
186
+ rescue TinyTds::Error => e
187
+ raise ExecutionError, e.message
188
+ end
189
+
190
+ # (see Adapter#execute_stream)
191
+ def execute_stream(sql, io, stats: nil, retries: 0)
192
+ with_debug(sql) do
193
+ with_retry(retries) do
194
+ result = connection.execute(sql)
195
+ io.write(CSV.generate_line(result.fields))
196
+ result.each(as: :array, empty_sets: true, cache_rows: false, timezone: :utc) do |row|
197
+ stats << row unless stats.nil?
198
+ io.write(CSV.generate_line(row))
199
+ end
200
+ end
201
+ end
202
+
203
+ io.rewind
204
+ io
205
+ rescue StandardError => e
206
+ raise ExecutionError, e.message
207
+ end
208
+
209
+ # (see Adapter#stream)
210
+ def stream(sql, &block)
211
+ with_debug(sql) do
212
+ result = connection.execute(sql)
213
+ result.each(empty_sets: true, cache_rows: false, timezone: :utc) do |row|
214
+ block.call(row)
215
+ end
216
+ end
217
+ end
218
+
219
+ def extract_day_name(exp, abbreviate: false)
220
+ exp = cast(exp, 'date') unless exp =~ /cast/i
221
+ super(exp, abbreviate: abbreviate).downcase
222
+ end
223
+
224
+ def extract_month_name(exp, abbreviate: false)
225
+ exp = cast(exp, 'date') unless exp =~ /cast/i
226
+ if abbreviate
227
+ "UPPER(LEFT(DATENAME(month, #{exp}), 3))"
228
+ else
229
+ "UPPER(DATENAME(month, #{exp}))"
230
+ end
231
+ end
232
+
233
+ def valid_config?
234
+ super
235
+ require 'tiny_tds'
236
+ rescue LoadError
237
+ raise ConfigError, "Required 'tiny_tds' gem missing. Please add it to your Gemfile."
238
+ end
239
+
240
+ private
241
+
242
+ def use(new_database)
243
+ res = connection.execute("use #{quote(new_database)}")
244
+ res.do
245
+ end
246
+
247
+ def result_to_csv(result)
248
+ CSV.generate do |csv|
249
+ csv << result.fields
250
+ result.each(as: :array, empty_sets: true) do |row|
251
+ csv << row
252
+ end
253
+ end
254
+ end
255
+ end
256
+ end
257
+ end
@@ -0,0 +1,213 @@
1
+ module DWH
2
+ module Adapters
3
+ # Trino adapter. This should work for Presto as well.
4
+ # This adapter requires the {https://github.com/treasure-data/trino-client-ruby trino-client-ruby} gem.
5
+ #
6
+ # Create adatper instances using {DWH::Factory#create DWH.create}.
7
+ #
8
+ # @example Basic connection with required only options
9
+ # DWH.create(:trino, {host: 'localhost', catalog: 'native', username: 'Ajo'})
10
+ #
11
+ # @example Connect with extra http headers
12
+ # DWH.create(:trino, {host: 'localhost', port: 8080,
13
+ # catalog: 'native', username: 'Ajo',
14
+ # extra_connection_params: {
15
+ # http_headers: {
16
+ # 'X-Trino-User' => 'True User Name',
17
+ # 'X-Forwarded-Request' => '<request passed down from client'
18
+ # }
19
+ # }
20
+ # })
21
+ class Trino < Adapter
22
+ config :host, String, required: true, message: 'server host ip address or domain name'
23
+ config :port, Integer, required: true, default: 8080, message: 'port to connect to'
24
+ config :ssl, Boolean, required: false, default: false, message: 'use ssl?'
25
+ config :catalog, String, required: true, message: 'catalog to connect to'
26
+ config :schema, String, required: false, message: 'default schema'
27
+ config :username, String, required: true, message: 'connection username'
28
+ config :password, String, required: false, default: nil, message: 'connection password'
29
+ config :query_timeout, Integer, required: false, default: 3600, message: 'query execution timeout in seconds'
30
+ config :client_name, String, required: false, default: 'DWH Ruby Gem', message: 'client name for tracking'
31
+
32
+ def connection
33
+ return @connection if @connection
34
+
35
+ ssl_setup = config[:ssl] ? { verify: false } : config[:ssl]
36
+
37
+ properties = {
38
+ server: "#{config[:host]}:#{config[:port]}",
39
+ ssl: ssl_setup,
40
+ schema: config[:schema],
41
+ catalog: config[:catalog],
42
+ user: config[:username],
43
+ password: config[:password],
44
+ query_timeout: config[:query_timeout],
45
+ source: config[:client_name]
46
+ }.merge(extra_connection_params)
47
+
48
+ @connection = ::Trino::Client.new(properties)
49
+ rescue StandardError => e
50
+ raise ConfigError, e.message
51
+ end
52
+
53
+ # (see Adapter#test_conection)
54
+ def test_connection(raise_exception: false)
55
+ connection.run('select 1')
56
+ true
57
+ rescue ::Trino::Client::TrinoHttpError, Faraday::ConnectionFailed => e
58
+ raise ConnectionError, e.message if raise_exception
59
+
60
+ false
61
+ end
62
+
63
+ # (see Adapter#tables)
64
+ def tables(**qualifiers)
65
+ catalog, schema = qualifiers.values_at(:catalog, :schema)
66
+ query = ['SHOW TABLES']
67
+ query << 'FROM' if catalog || schema
68
+
69
+ if catalog && schema
70
+ query << "#{catalog}.#{schema}"
71
+ else
72
+ query << catalog
73
+ query << schema
74
+ end
75
+
76
+ rows = execute(query.compact.join(' '), retries: 1)
77
+ rows.flatten
78
+ end
79
+
80
+ # (see Adapter#table?)
81
+ def table?(table, **qualifiers)
82
+ db_table = Table.new(table, **qualifiers)
83
+
84
+ query = ['SHOW TABLES']
85
+
86
+ if db_table.catalog_or_schema?
87
+ query << 'FROM'
88
+ query << db_table.fully_qualified_schema_name
89
+ end
90
+ query << "LIKE '#{db_table.physical_name}'"
91
+
92
+ rows = execute(query.compact.join(' '), retries: 1)
93
+ !rows.empty?
94
+ end
95
+
96
+ # (see Adapter#stats)
97
+ def stats(table, date_column: nil, **qualifiers)
98
+ db_table = Table.new(table, **qualifiers)
99
+ sql = <<-SQL
100
+ SELECT count(*) ROW_COUNT
101
+ #{date_column.nil? ? nil : ", min(#{date_column}) DATE_START"}
102
+ #{date_column.nil? ? nil : ", max(#{date_column}) DATE_END"}
103
+ FROM #{db_table.fully_qualified_table_name}
104
+ SQL
105
+
106
+ rows = execute(sql, retries: 1)
107
+ row = rows[0]
108
+
109
+ TableStats.new(
110
+ date_start: row[1],
111
+ date_end: row[2],
112
+ row_count: row[0]
113
+ )
114
+ end
115
+
116
+ # (see Adapter#metadata)
117
+ def metadata(table, **qualifiers)
118
+ db_table = Table.new table, **qualifiers
119
+ sql = "SHOW COLUMNS FROM #{db_table.fully_qualified_table_name}"
120
+
121
+ _, cols = execute(sql, format: :native, retries: 1)
122
+
123
+ cols.each do |col|
124
+ dt = col[1].start_with?('row(') ? 'struct' : col[1]
125
+ db_table << Column.new(
126
+ name: col[0],
127
+ data_type: dt
128
+ )
129
+ end
130
+
131
+ db_table
132
+ end
133
+
134
+ def schema?
135
+ config.key?(:schema)
136
+ end
137
+
138
+ # (see Adapter#execute)
139
+ def execute(sql, format: :array, retries: 2)
140
+ result = with_debug(sql) do
141
+ with_retry(retries) do
142
+ if format == :object
143
+ connection.run_with_names(sql)
144
+ else
145
+ connection.run(sql)
146
+ end
147
+ end
148
+ end
149
+
150
+ case format
151
+ when :native
152
+ result
153
+ when :csv
154
+ result_to_csv(result)
155
+ when :array
156
+ result[1]
157
+ when :object
158
+ result
159
+ else
160
+ raise UnsupportedCapability, "Unknown format type: #{format}. Should be :native, :array, :object, or :csv"
161
+ end
162
+ rescue ::Trino::Client::TrinoQueryError => e
163
+ raise ExecutionError, e.message
164
+ end
165
+
166
+ # (see Adapter#execute_stream)
167
+ def execute_stream(sql, io, stats: nil, retries: 1)
168
+ with_debug(sql) do
169
+ with_retry(retries) do
170
+ connection.query(sql) do |result|
171
+ io.write(CSV.generate_line(result.columns.map(&:name)))
172
+ result.each_row do |row|
173
+ stats << row if stats
174
+ io << CSV.generate_line(row)
175
+ end
176
+ end
177
+ end
178
+ end
179
+
180
+ io.rewind
181
+ io
182
+ end
183
+
184
+ # (see Adapter#stream)
185
+ def stream(sql, &block)
186
+ with_debug(sql) do
187
+ connection.query(sql) do |result|
188
+ result.each_row(&block)
189
+ end
190
+ end
191
+ end
192
+
193
+ def valid_config?
194
+ super
195
+ require 'trino-client'
196
+ rescue LoadError
197
+ raise ConfigError, "Required 'trino-client' gem missing. Please add it to your Gemfile."
198
+ end
199
+
200
+ private
201
+
202
+ def result_to_csv(result)
203
+ columns, rows = result
204
+ CSV.generate do |csv|
205
+ csv << columns.map(&:name)
206
+ rows.each do |row|
207
+ csv << row
208
+ end
209
+ end
210
+ end
211
+ end
212
+ end
213
+ end