dwh 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +7 -0
  2. data/.rubocop.yml +36 -0
  3. data/CHANGELOG.md +5 -0
  4. data/LICENSE +21 -0
  5. data/README.md +130 -0
  6. data/Rakefile +42 -0
  7. data/docs/DWH/Adapters/Adapter.html +3053 -0
  8. data/docs/DWH/Adapters/Athena.html +1704 -0
  9. data/docs/DWH/Adapters/Boolean.html +121 -0
  10. data/docs/DWH/Adapters/Druid.html +1626 -0
  11. data/docs/DWH/Adapters/DuckDb.html +2012 -0
  12. data/docs/DWH/Adapters/MySql.html +1704 -0
  13. data/docs/DWH/Adapters/OpenAuthorizable/ClassMethods.html +265 -0
  14. data/docs/DWH/Adapters/OpenAuthorizable.html +1102 -0
  15. data/docs/DWH/Adapters/Postgres.html +2000 -0
  16. data/docs/DWH/Adapters/Snowflake.html +1662 -0
  17. data/docs/DWH/Adapters/SqlServer.html +2084 -0
  18. data/docs/DWH/Adapters/Trino.html +1835 -0
  19. data/docs/DWH/Adapters.html +129 -0
  20. data/docs/DWH/AuthenticationError.html +142 -0
  21. data/docs/DWH/Behaviors.html +767 -0
  22. data/docs/DWH/Capabilities.html +748 -0
  23. data/docs/DWH/Column.html +1115 -0
  24. data/docs/DWH/ConfigError.html +143 -0
  25. data/docs/DWH/ConnectionError.html +143 -0
  26. data/docs/DWH/DWHError.html +138 -0
  27. data/docs/DWH/ExecutionError.html +143 -0
  28. data/docs/DWH/Factory.html +1133 -0
  29. data/docs/DWH/Functions/Arrays.html +505 -0
  30. data/docs/DWH/Functions/Dates.html +1644 -0
  31. data/docs/DWH/Functions/ExtractDatePart.html +804 -0
  32. data/docs/DWH/Functions/Nulls.html +377 -0
  33. data/docs/DWH/Functions.html +846 -0
  34. data/docs/DWH/Logger.html +258 -0
  35. data/docs/DWH/OAuthError.html +138 -0
  36. data/docs/DWH/Settings.html +658 -0
  37. data/docs/DWH/StreamingStats.html +804 -0
  38. data/docs/DWH/Table.html +1260 -0
  39. data/docs/DWH/TableStats.html +583 -0
  40. data/docs/DWH/TokenExpiredError.html +142 -0
  41. data/docs/DWH/UnsupportedCapability.html +135 -0
  42. data/docs/DWH.html +220 -0
  43. data/docs/_index.html +471 -0
  44. data/docs/class_list.html +54 -0
  45. data/docs/css/common.css +1 -0
  46. data/docs/css/full_list.css +58 -0
  47. data/docs/css/style.css +503 -0
  48. data/docs/file.README.html +210 -0
  49. data/docs/file.adapters.html +514 -0
  50. data/docs/file.creating-adapters.html +497 -0
  51. data/docs/file.getting-started.html +288 -0
  52. data/docs/file.usage.html +446 -0
  53. data/docs/file_list.html +79 -0
  54. data/docs/frames.html +22 -0
  55. data/docs/guides/adapters.md +445 -0
  56. data/docs/guides/creating-adapters.md +430 -0
  57. data/docs/guides/getting-started.md +225 -0
  58. data/docs/guides/usage.md +378 -0
  59. data/docs/index.html +210 -0
  60. data/docs/js/app.js +344 -0
  61. data/docs/js/full_list.js +242 -0
  62. data/docs/js/jquery.js +4 -0
  63. data/docs/method_list.html +2038 -0
  64. data/docs/top-level-namespace.html +110 -0
  65. data/lib/dwh/adapters/athena.rb +359 -0
  66. data/lib/dwh/adapters/druid.rb +267 -0
  67. data/lib/dwh/adapters/duck_db.rb +235 -0
  68. data/lib/dwh/adapters/my_sql.rb +235 -0
  69. data/lib/dwh/adapters/open_authorizable.rb +215 -0
  70. data/lib/dwh/adapters/postgres.rb +250 -0
  71. data/lib/dwh/adapters/snowflake.rb +489 -0
  72. data/lib/dwh/adapters/sql_server.rb +257 -0
  73. data/lib/dwh/adapters/trino.rb +213 -0
  74. data/lib/dwh/adapters.rb +363 -0
  75. data/lib/dwh/behaviors.rb +67 -0
  76. data/lib/dwh/capabilities.rb +39 -0
  77. data/lib/dwh/column.rb +79 -0
  78. data/lib/dwh/errors.rb +29 -0
  79. data/lib/dwh/factory.rb +125 -0
  80. data/lib/dwh/functions/arrays.rb +42 -0
  81. data/lib/dwh/functions/dates.rb +162 -0
  82. data/lib/dwh/functions/extract_date_part.rb +70 -0
  83. data/lib/dwh/functions/nulls.rb +31 -0
  84. data/lib/dwh/functions.rb +86 -0
  85. data/lib/dwh/logger.rb +50 -0
  86. data/lib/dwh/settings/athena.yml +77 -0
  87. data/lib/dwh/settings/base.yml +81 -0
  88. data/lib/dwh/settings/databricks.yml +51 -0
  89. data/lib/dwh/settings/druid.yml +59 -0
  90. data/lib/dwh/settings/duckdb.yml +44 -0
  91. data/lib/dwh/settings/mysql.yml +67 -0
  92. data/lib/dwh/settings/postgres.yml +30 -0
  93. data/lib/dwh/settings/redshift.yml +52 -0
  94. data/lib/dwh/settings/snowflake.yml +45 -0
  95. data/lib/dwh/settings/sqlserver.yml +80 -0
  96. data/lib/dwh/settings/trino.yml +77 -0
  97. data/lib/dwh/settings.rb +79 -0
  98. data/lib/dwh/streaming_stats.rb +69 -0
  99. data/lib/dwh/table.rb +105 -0
  100. data/lib/dwh/table_stats.rb +51 -0
  101. data/lib/dwh/version.rb +5 -0
  102. data/lib/dwh.rb +54 -0
  103. data/sig/dwh.rbs +4 -0
  104. metadata +231 -0
@@ -0,0 +1,445 @@
1
+ <!--
2
+ # @title Adapter Configuration
3
+ -->
4
+ # Adapter Configuration
5
+
6
+ This guide covers all the database adapters supported by DWH and their specific configuration options. Each adapter is designed to work with specific database clients and provides database-specific optimizations.
7
+
8
+ ## PostgreSQL Adapter
9
+
10
+ The PostgreSQL adapter uses the `pg` gem and provides full-featured RDBMS support.
11
+
12
+ ### Basic Configuration
13
+
14
+ ```ruby
15
+ postgres = DWH.create(:postgres, {
16
+ host: 'localhost',
17
+ port: 5432, # Default: 5432
18
+ database: 'mydb',
19
+ schema: 'public', # Default: 'public'
20
+ username: 'user',
21
+ password: 'password',
22
+ client_name: 'My Application' # Default: 'DWH Ruby Gem'
23
+ })
24
+ ```
25
+
26
+ ### SSL Configuration
27
+
28
+ ```ruby
29
+ # Basic SSL
30
+ postgres = DWH.create(:postgres, {
31
+ host: 'localhost',
32
+ database: 'mydb',
33
+ username: 'user',
34
+ password: 'password',
35
+ ssl: true,
36
+ extra_connection_params: {
37
+ sslmode: 'require' # disable, prefer, require, verify-ca, verify-full
38
+ }
39
+ })
40
+
41
+ # Certificate-based SSL
42
+ postgres = DWH.create(:postgres, {
43
+ host: 'localhost',
44
+ database: 'mydb',
45
+ username: 'user',
46
+ ssl: true,
47
+ extra_connection_params: {
48
+ sslmode: 'verify-full',
49
+ sslrootcert: '/path/to/ca-cert.pem',
50
+ sslcert: '/path/to/client-cert.pem',
51
+ sslkey: '/path/to/client-key.pem'
52
+ }
53
+ })
54
+ ```
55
+
56
+ ### Advanced Configuration
57
+
58
+ ```ruby
59
+ postgres = DWH.create(:postgres, {
60
+ host: 'localhost',
61
+ database: 'mydb',
62
+ username: 'user',
63
+ password: 'password',
64
+ query_timeout: 3600, # seconds, default: 3600
65
+ extra_connection_params: {
66
+ application_name: 'Data Analysis Tool',
67
+ connect_timeout: 10,
68
+ options: '-c maintenance_work_mem=256MB'
69
+ }
70
+ })
71
+ ```
72
+
73
+ ## Snowflake
74
+
75
+ Snowflake adapter use the REST apis (https) to connect and query. This adapter also supports Multi-Database
76
+ authentication methods: Personal Access Token, Key Pair, and OAuth.
77
+
78
+ ### Basic connection with Personal Access Token
79
+
80
+ ```ruby
81
+ DWH.create(:snowflake, {
82
+ auth_mode: 'pat',
83
+ account_identifier: 'myorg-myaccount',
84
+ personal_access_token: 'your-token-here',
85
+ warehouse: 'COMPUTE_WH',
86
+ database: 'ANALYTICS',
87
+ schema: 'PUBLIC'
88
+ })
89
+
90
+ ```
91
+
92
+ ### Connection with Key Pair Authentication
93
+
94
+ ```ruby
95
+ DWH.create(:snowflake, {
96
+ auth_mode: 'kp',
97
+ account_identifier: 'myorg-myaccount.us-east-1',
98
+ username: 'john_doe',
99
+ private_key: '/path/to/private_key.pem',
100
+ warehouse: 'COMPUTE_WH',
101
+ database: 'ANALYTICS'
102
+ })
103
+
104
+ ```
105
+
106
+ ### Connecting with OAuth
107
+
108
+ This is the Snowflake OAuth mechanism. Not the External one. You must first create an OAuth security integration and apply to the releveant roles.
109
+ Follow this [document](https://docs.snowflake.com/en/user-guide/oauth-custom) for more.
110
+
111
+ ```ruby
112
+ adapter = DWH.create(:snowflake, {
113
+ auth_mode: 'oauth',
114
+ account_identifier: 'myorg-myaccount.us-east-1',
115
+ oauth_client_id: '<YOUR_CLIENT_ID>',
116
+ oauth_client_secret: '<YOUR_CLIENT_SECRET>',
117
+ oauth_redirect_url: 'https://localhost:3030/some/path',
118
+ database: 'ANALYTICS',
119
+ client_name: 'myapp' # sent as user agent header value
120
+ })
121
+ ```
122
+
123
+ To successfully use OAuth you have to pass the adapter valid access and refresh tokens. Or, it can generate them from a valid authorization code.
124
+
125
+ The typical flow is like so:
126
+
127
+ 1. Generate an authorization code by visiting the url generated by `adapter.authorization_url.` This will redirect to the configured `oauth_redirect_url.` You must be able to retrieve the `code` from there.
128
+ 2. Take the code from above and generate new access tokens: `adapter.generate_oauth_tokens(code)`. This will return Hash with access_token and refresh_token. You can cache and reuse this until the refresh_token gets expired. This method will also apply the token to the current adapter instance.
129
+ 3. You can apply an existing set of tokens like so:`adapter.apply_oauth_tokens(access_token: token, refresh_token: token, expires_at: Time.now)`
130
+
131
+ ## MySQL Adapter
132
+
133
+ The MySQL adapter uses the `mysql2` gem. Note that MySQL's concept of "database" maps to "schema" in DWH.
134
+
135
+ ### Basic Configuration
136
+
137
+ ```ruby
138
+ mysql = DWH.create(:mysql, {
139
+ host: '127.0.0.1', # Use 127.0.0.1 for local Docker instances
140
+ port: 3306, # Default: 3306
141
+ database: 'mydb',
142
+ username: 'user',
143
+ password: 'password',
144
+ client_name: 'My Application' # Default: 'DWH Ruby Gem'
145
+ })
146
+ ```
147
+
148
+ ### SSL Configuration
149
+
150
+ ```ruby
151
+ # Basic SSL
152
+ mysql = DWH.create(:mysql, {
153
+ host: '127.0.0.1',
154
+ database: 'mydb',
155
+ username: 'user',
156
+ password: 'password',
157
+ ssl: true, # Defaults ssl_mode to 'required'
158
+ extra_connection_params: {
159
+ ssl_mode: 'verify_identity', # disabled, preferred, required, verify_ca, verify_identity
160
+ sslca: '/path/to/ca-cert.pem',
161
+ sslcert: '/path/to/client-cert.pem',
162
+ sslkey: '/path/to/client-key.pem'
163
+ }
164
+ })
165
+ ```
166
+
167
+ ### Advanced Configuration
168
+
169
+ ```ruby
170
+ mysql = DWH.create(:mysql, {
171
+ host: 'mysql.example.com',
172
+ database: 'analytics',
173
+ username: 'analyst',
174
+ password: 'password',
175
+ client_name: "My App", # defaults to 'DWH Ruby Gem'
176
+ query_timeout: 1800, # seconds, default: 3600
177
+ extra_connection_params: {
178
+ encoding: 'utf8mb4',
179
+ read_timeout: 60,
180
+ write_timeout: 60,
181
+ connect_timeout: 10
182
+ }
183
+ })
184
+ ```
185
+
186
+ ## SQL Server Adapter
187
+
188
+ The SQL Server adapter uses the `tiny_tds` gem and supports both on-premises and Azure SQL Server.
189
+
190
+ ### Basic Configuration
191
+
192
+ ```ruby
193
+ sqlserver = DWH.create(:sqlserver, {
194
+ host: 'localhost',
195
+ port: 1433, # Default: 1433
196
+ database: 'mydb',
197
+ username: 'sa',
198
+ password: 'password',
199
+ client_name: 'My Application' # Default: 'DWH Ruby Gem'
200
+ })
201
+ ```
202
+
203
+ ### Azure SQL Server
204
+
205
+ ```ruby
206
+ azure_sql = DWH.create(:sqlserver, {
207
+ host: 'myserver.database.windows.net',
208
+ database: 'mydb',
209
+ username: 'myuser@myserver',
210
+ password: 'password',
211
+ azure: true,
212
+ client_name: 'My Application'
213
+ })
214
+ ```
215
+
216
+ ### Advanced Configuration
217
+
218
+ ```ruby
219
+ sqlserver = DWH.create(:sqlserver, {
220
+ host: 'sql.example.com',
221
+ database: 'analytics',
222
+ username: 'analyst',
223
+ password: 'password',
224
+ query_timeout: 1800, # seconds, default: 3600
225
+ extra_connection_params: {
226
+ container: true, # For SQL Server running in containers
227
+ use_utf16: false, # Character encoding options
228
+ timeout: 60, # Connection timeout
229
+ login_timeout: 60 # Login timeout
230
+ }
231
+ })
232
+ ```
233
+
234
+ ### Multi-Database Operations
235
+
236
+ ```ruby
237
+ # List tables in another database
238
+ tables = sqlserver.tables(catalog: 'other_database')
239
+
240
+ # Get metadata for table in another database
241
+ metadata = sqlserver.metadata('other_database.dbo.my_table')
242
+ # OR
243
+ metadata = sqlserver.metadata('my_table', catalog: 'other_database')
244
+ ```
245
+
246
+ ## DuckDB Adapter
247
+
248
+ The DuckDB adapter uses the `ruby-duckdb` gem for in-process analytical queries. This requires DuckDB header files and library to already be installed.
249
+
250
+ ### Basic Configuration
251
+
252
+ ```ruby
253
+ # File-based database
254
+ duckdb = DWH.create(:duckdb, {
255
+ file: '/path/to/my/database.duckdb',
256
+ schema: 'main' # Default: 'main'
257
+ })
258
+
259
+ # In-memory database
260
+ duckdb = DWH.create(:duckdb, {
261
+ file: ':memory:'
262
+ })
263
+ ```
264
+
265
+ ### Read-Only Mode
266
+
267
+ ```ruby
268
+ duckdb = DWH.create(:duckdb, {
269
+ file: '/path/to/readonly/database.duckdb',
270
+ duck_config: {
271
+ access_mode: 'READ_ONLY'
272
+ }
273
+ })
274
+ ```
275
+
276
+ ### Advanced Configuration
277
+
278
+ ```ruby
279
+ duckdb = DWH.create(:duckdb, {
280
+ file: '/path/to/my/database.duckdb',
281
+ duck_config: {
282
+ access_mode: 'READ_WRITE',
283
+ max_memory: '2GB',
284
+ threads: 4,
285
+ temp_directory: '/tmp/duckdb'
286
+ }
287
+ })
288
+ ```
289
+
290
+ ## Trino Adapter
291
+
292
+ The Trino adapter requires the `trino-client-ruby` gem and works with both Trino and Presto.
293
+
294
+ ### Basic Configuration
295
+
296
+ ```ruby
297
+ trino = DWH.create(:trino, {
298
+ host: 'localhost',
299
+ port: 8080, # Default: 8080
300
+ catalog: 'hive', # Required
301
+ schema: 'default', # Optional
302
+ username: 'analyst',
303
+ password: 'password', # Optional
304
+ client_name: 'My Application' # Default: 'DWH Ruby Gem'
305
+ })
306
+ ```
307
+
308
+ ### SSL Configuration
309
+
310
+ ```ruby
311
+ trino = DWH.create(:trino, {
312
+ host: 'trino.example.com',
313
+ port: 443,
314
+ ssl: true, # will set {ssl: {verify: false}}
315
+ catalog: 'hive',
316
+ username: 'analyst',
317
+ password: 'password',
318
+ client_name: "My App"
319
+ })
320
+ ```
321
+
322
+ ### Advanced Configuration with Headers
323
+
324
+ ```ruby
325
+ trino = DWH.create(:trino, {
326
+ host: 'trino.example.com',
327
+ port: 8080,
328
+ catalog: 'delta_lake',
329
+ schema: 'analytics',
330
+ username: 'analyst',
331
+ query_timeout: 1800, # seconds, default: 3600
332
+ extra_connection_params: {
333
+ http_headers: {
334
+ 'X-Trino-User' => 'Real User Name',
335
+ 'X-Trino-Source' => 'Analytics Dashboard',
336
+ 'X-Forwarded-Request' => 'client-request-id'
337
+ },
338
+ ssl: {
339
+ verify: true,
340
+ }
341
+ }
342
+ })
343
+ ```
344
+
345
+ ## Apache Druid Adapter
346
+
347
+ The Druid adapter uses HTTP API calls via the `faraday` gem for real-time analytics.
348
+
349
+ ### Basic Configuration
350
+
351
+ ```ruby
352
+ druid = DWH.create(:druid, {
353
+ protocol: 'http', # 'http' or 'https'
354
+ host: 'localhost',
355
+ port: 8080, # Default: 8081
356
+ client_name: 'My Application' # Default: 'DWH Ruby Gem'
357
+ })
358
+ ```
359
+
360
+ ### HTTPS with Basic Authentication
361
+
362
+ ```ruby
363
+ druid = DWH.create(:druid, {
364
+ protocol: 'https',
365
+ host: 'druid.example.com',
366
+ port: 443,
367
+ basic_auth: 'base64_encoded_credentials', # Base64 encoded username:password
368
+ query_timeout: 600, # seconds, default: 600
369
+ open_timeout: 30 # connection timeout, default: nil
370
+ })
371
+ ```
372
+
373
+ ### Advanced Configuration with Context
374
+
375
+ ```ruby
376
+ druid = DWH.create(:druid, {
377
+ protocol: 'https',
378
+ host: 'druid.example.com',
379
+ port: 8080,
380
+ basic_auth: 'dXNlcjpwYXNz', # base64 for 'user:pass'
381
+ extra_connection_params: {
382
+ context: {
383
+ user: 'analyst_name',
384
+ team: 'data_engineering',
385
+ priority: 10,
386
+ useCache: true
387
+ }
388
+ }
389
+ })
390
+ ```
391
+
392
+ ## AWS Athena Adapter
393
+
394
+ The Athean adapter requires the `aws-athena-sdk` gem and works with both Trino and Presto.
395
+
396
+ ### Basic Configuration
397
+
398
+ ```ruby
399
+ athena = DWH.create(:athena, {
400
+ region: 'us-east-1',
401
+ database: 'default',
402
+ s3_output_location: 's3://my-athena-results-bucket/queries/',
403
+ access_key_id: 'AKIAIOSFODNN7EXAMPLE',
404
+ secret_access_key: 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY',
405
+ catalog: 'hive', # optional will default to awsdatacatalog
406
+ database: 'default', # Optional. Db or schema
407
+ workgroup: 'my-dept-strata' # optional workgroup
408
+ })
409
+ ```
410
+
411
+ ### SSL Configuration
412
+
413
+ ```ruby
414
+ athena = DWH.create(:athena, {
415
+ region: 'us-east-1',
416
+ database: 'default',
417
+ s3_output_location: 's3://my-athena-results-bucket/queries/',
418
+ access_key_id: 'AKIAIOSFODNN7EXAMPLE',
419
+ secret_access_key: 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY',
420
+ catalog: 'hive', # optional will default to awsdatacatalog
421
+ database: 'default', # Optional. Db or schema
422
+ workgroup: 'my-dept-strata', # optional workgroup
423
+ extra_connection_params: {
424
+ ssl_ca_directory: 'path/to/certs/'
425
+ }
426
+ })
427
+ ```
428
+
429
+ ### Advanced Configuration with Headers
430
+
431
+ See full list of config options here: [athena-api](https://docs.aws.amazon.com/sdk-for-ruby/v2/api/Aws/Athena/Client.html#initialize-instance_method)
432
+
433
+ ## Configuration Validation
434
+
435
+ DWH validates configuration parameters at creation time:
436
+
437
+ ```ruby
438
+ begin
439
+ adapter = DWH.create(:postgres, { host: 'localhost' }) # Missing required database
440
+ rescue DWH::ConfigError => e
441
+ puts "Configuration error: #{e.message}"
442
+ end
443
+ ```
444
+
445
+ Each adapter defines required and optional parameters with validation rules. Check the adapter-specific sections above for the complete list of supported parameters.