dwh 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rubocop.yml +36 -0
- data/CHANGELOG.md +5 -0
- data/LICENSE +21 -0
- data/README.md +130 -0
- data/Rakefile +42 -0
- data/docs/DWH/Adapters/Adapter.html +3053 -0
- data/docs/DWH/Adapters/Athena.html +1704 -0
- data/docs/DWH/Adapters/Boolean.html +121 -0
- data/docs/DWH/Adapters/Druid.html +1626 -0
- data/docs/DWH/Adapters/DuckDb.html +2012 -0
- data/docs/DWH/Adapters/MySql.html +1704 -0
- data/docs/DWH/Adapters/OpenAuthorizable/ClassMethods.html +265 -0
- data/docs/DWH/Adapters/OpenAuthorizable.html +1102 -0
- data/docs/DWH/Adapters/Postgres.html +2000 -0
- data/docs/DWH/Adapters/Snowflake.html +1662 -0
- data/docs/DWH/Adapters/SqlServer.html +2084 -0
- data/docs/DWH/Adapters/Trino.html +1835 -0
- data/docs/DWH/Adapters.html +129 -0
- data/docs/DWH/AuthenticationError.html +142 -0
- data/docs/DWH/Behaviors.html +767 -0
- data/docs/DWH/Capabilities.html +748 -0
- data/docs/DWH/Column.html +1115 -0
- data/docs/DWH/ConfigError.html +143 -0
- data/docs/DWH/ConnectionError.html +143 -0
- data/docs/DWH/DWHError.html +138 -0
- data/docs/DWH/ExecutionError.html +143 -0
- data/docs/DWH/Factory.html +1133 -0
- data/docs/DWH/Functions/Arrays.html +505 -0
- data/docs/DWH/Functions/Dates.html +1644 -0
- data/docs/DWH/Functions/ExtractDatePart.html +804 -0
- data/docs/DWH/Functions/Nulls.html +377 -0
- data/docs/DWH/Functions.html +846 -0
- data/docs/DWH/Logger.html +258 -0
- data/docs/DWH/OAuthError.html +138 -0
- data/docs/DWH/Settings.html +658 -0
- data/docs/DWH/StreamingStats.html +804 -0
- data/docs/DWH/Table.html +1260 -0
- data/docs/DWH/TableStats.html +583 -0
- data/docs/DWH/TokenExpiredError.html +142 -0
- data/docs/DWH/UnsupportedCapability.html +135 -0
- data/docs/DWH.html +220 -0
- data/docs/_index.html +471 -0
- data/docs/class_list.html +54 -0
- data/docs/css/common.css +1 -0
- data/docs/css/full_list.css +58 -0
- data/docs/css/style.css +503 -0
- data/docs/file.README.html +210 -0
- data/docs/file.adapters.html +514 -0
- data/docs/file.creating-adapters.html +497 -0
- data/docs/file.getting-started.html +288 -0
- data/docs/file.usage.html +446 -0
- data/docs/file_list.html +79 -0
- data/docs/frames.html +22 -0
- data/docs/guides/adapters.md +445 -0
- data/docs/guides/creating-adapters.md +430 -0
- data/docs/guides/getting-started.md +225 -0
- data/docs/guides/usage.md +378 -0
- data/docs/index.html +210 -0
- data/docs/js/app.js +344 -0
- data/docs/js/full_list.js +242 -0
- data/docs/js/jquery.js +4 -0
- data/docs/method_list.html +2038 -0
- data/docs/top-level-namespace.html +110 -0
- data/lib/dwh/adapters/athena.rb +359 -0
- data/lib/dwh/adapters/druid.rb +267 -0
- data/lib/dwh/adapters/duck_db.rb +235 -0
- data/lib/dwh/adapters/my_sql.rb +235 -0
- data/lib/dwh/adapters/open_authorizable.rb +215 -0
- data/lib/dwh/adapters/postgres.rb +250 -0
- data/lib/dwh/adapters/snowflake.rb +489 -0
- data/lib/dwh/adapters/sql_server.rb +257 -0
- data/lib/dwh/adapters/trino.rb +213 -0
- data/lib/dwh/adapters.rb +363 -0
- data/lib/dwh/behaviors.rb +67 -0
- data/lib/dwh/capabilities.rb +39 -0
- data/lib/dwh/column.rb +79 -0
- data/lib/dwh/errors.rb +29 -0
- data/lib/dwh/factory.rb +125 -0
- data/lib/dwh/functions/arrays.rb +42 -0
- data/lib/dwh/functions/dates.rb +162 -0
- data/lib/dwh/functions/extract_date_part.rb +70 -0
- data/lib/dwh/functions/nulls.rb +31 -0
- data/lib/dwh/functions.rb +86 -0
- data/lib/dwh/logger.rb +50 -0
- data/lib/dwh/settings/athena.yml +77 -0
- data/lib/dwh/settings/base.yml +81 -0
- data/lib/dwh/settings/databricks.yml +51 -0
- data/lib/dwh/settings/druid.yml +59 -0
- data/lib/dwh/settings/duckdb.yml +44 -0
- data/lib/dwh/settings/mysql.yml +67 -0
- data/lib/dwh/settings/postgres.yml +30 -0
- data/lib/dwh/settings/redshift.yml +52 -0
- data/lib/dwh/settings/snowflake.yml +45 -0
- data/lib/dwh/settings/sqlserver.yml +80 -0
- data/lib/dwh/settings/trino.yml +77 -0
- data/lib/dwh/settings.rb +79 -0
- data/lib/dwh/streaming_stats.rb +69 -0
- data/lib/dwh/table.rb +105 -0
- data/lib/dwh/table_stats.rb +51 -0
- data/lib/dwh/version.rb +5 -0
- data/lib/dwh.rb +54 -0
- data/sig/dwh.rbs +4 -0
- metadata +231 -0
@@ -0,0 +1,430 @@
|
|
1
|
+
<!--
|
2
|
+
# @title Creating Custom Adapters
|
3
|
+
-->
|
4
|
+
# Creating Custom Adapters
|
5
|
+
|
6
|
+
The whole point of this library is to make adding a new database integration easy. With a few steps you can create your own adapter. If its generic, please contribute back to the project via PR.
|
7
|
+
|
8
|
+
This guide walks you through creating your own custom database adapter for DWH. Creating a new adapter involves extending the base adapter class, implementing required methods, and optionally creating custom settings.
|
9
|
+
|
10
|
+
## Understanding DWH Architecture
|
11
|
+
|
12
|
+
DWH adapters have a simple, focused architecture:
|
13
|
+
|
14
|
+
- **5 Core Methods**: Every adapter must implement 5 essential methods
|
15
|
+
- **YAML Settings**: Database-specific behavior controlled by YAML configuration
|
16
|
+
- **Configuration Validation**: Automatic validation of connection parameters
|
17
|
+
- **Function Translation**: SQL functions automatically translated to database-specific syntax
|
18
|
+
|
19
|
+
## Minimal Adapter Example
|
20
|
+
|
21
|
+
Here's a minimal adapter implementation:
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
module DWH
|
25
|
+
module Adapters
|
26
|
+
class MyCustomAdapter < Adapter
|
27
|
+
# Define required configuration parameters
|
28
|
+
config :host, String, required: true, message: 'server host ip address or domain name'
|
29
|
+
config :port, Integer, required: false, default: 1234, message: 'port to connect to'
|
30
|
+
config :database, String, required: true, message: 'name of database to connect to'
|
31
|
+
config :username, String, required: true, message: 'connection username'
|
32
|
+
config :password, String, required: false, default: nil, message: 'connection password'
|
33
|
+
|
34
|
+
# Implement required methods
|
35
|
+
def connection
|
36
|
+
# Return your database connection object
|
37
|
+
# This is cached, so implement connection reuse here
|
38
|
+
@connection ||= create_connection
|
39
|
+
end
|
40
|
+
|
41
|
+
def tables(catalog: nil, schema: nil)
|
42
|
+
# Return array of DWH::Table objects
|
43
|
+
# Use catalog/schema for filtering if supported
|
44
|
+
end
|
45
|
+
|
46
|
+
def metadata(table_name, catalog: nil, schema: nil)
|
47
|
+
# Return single DWH::Table object with column information
|
48
|
+
end
|
49
|
+
|
50
|
+
def stats(table_name, date_column: nil, catalog: nil, schema: nil)
|
51
|
+
# Return DWH::TableStats object with row counts and date ranges
|
52
|
+
end
|
53
|
+
|
54
|
+
def execute(sql, format: :array, retries: 0)
|
55
|
+
# Execute SQL and return results in specified format
|
56
|
+
# Formats: :array, :object, :csv, :native
|
57
|
+
end
|
58
|
+
|
59
|
+
def execute_stream(sql, io, stats: nil)
|
60
|
+
# Execute SQL and stream results directly to IO object
|
61
|
+
end
|
62
|
+
|
63
|
+
def stream(sql, &block)
|
64
|
+
# Execute SQL and yield chunks to block
|
65
|
+
end
|
66
|
+
|
67
|
+
private
|
68
|
+
|
69
|
+
def create_connection
|
70
|
+
# Your database-specific connection logic
|
71
|
+
MyDatabaseClient.connect(
|
72
|
+
host: config[:host],
|
73
|
+
port: config[:port],
|
74
|
+
database: config[:database],
|
75
|
+
username: config[:username],
|
76
|
+
password: config[:password]
|
77
|
+
)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
# Register your adapter
|
84
|
+
DWH.register(:mycustom, DWH::Adapters::MyCustomAdapter)
|
85
|
+
```
|
86
|
+
|
87
|
+
## Step-by-Step Implementation
|
88
|
+
|
89
|
+
### 1. Define Configuration Parameters
|
90
|
+
|
91
|
+
Use the `config` class method to define connection parameters:
|
92
|
+
|
93
|
+
```ruby
|
94
|
+
class MyCustomAdapter < Adapter
|
95
|
+
# Required parameters
|
96
|
+
config :host, String, required: true, message: 'server host ip address or domain name'
|
97
|
+
config :database, String, required: true, message: 'name of database to connect to'
|
98
|
+
|
99
|
+
# Optional parameters with defaults
|
100
|
+
config :port, Integer, required: false, default: 5432, message: 'port to connect to'
|
101
|
+
config :timeout, Integer, required: false, default: 30, message: 'connection timeout'
|
102
|
+
|
103
|
+
# Boolean parameters
|
104
|
+
config :ssl, Boolean, required: false, default: false, message: 'use ssl connection'
|
105
|
+
|
106
|
+
# Parameters with allowed values
|
107
|
+
config :auth_type, String, required: false, default: 'basic',
|
108
|
+
message: 'authentication type', allowed: %w[basic oauth token]
|
109
|
+
end
|
110
|
+
```
|
111
|
+
|
112
|
+
### 2. Implement Connection Management
|
113
|
+
|
114
|
+
```ruby
|
115
|
+
def connection
|
116
|
+
return @connection if @connection && connection_valid?
|
117
|
+
|
118
|
+
@connection = create_connection
|
119
|
+
end
|
120
|
+
|
121
|
+
private
|
122
|
+
|
123
|
+
def create_connection
|
124
|
+
# Example for HTTP-based database
|
125
|
+
Faraday.new(
|
126
|
+
url: "#{protocol}://#{config[:host]}:#{config[:port]}",
|
127
|
+
headers: build_headers,
|
128
|
+
request: {
|
129
|
+
timeout: config[:timeout]
|
130
|
+
}
|
131
|
+
)
|
132
|
+
end
|
133
|
+
|
134
|
+
def build_headers
|
135
|
+
headers = { 'Content-Type' => 'application/json' }
|
136
|
+
headers['Authorization'] = "Bearer #{config[:token]}" if config[:token]
|
137
|
+
headers
|
138
|
+
end
|
139
|
+
|
140
|
+
def connection_valid?
|
141
|
+
# Implement connection health check
|
142
|
+
@connection&.get('/health')&.success?
|
143
|
+
rescue
|
144
|
+
false
|
145
|
+
end
|
146
|
+
```
|
147
|
+
|
148
|
+
### 3. Implement Table Discovery
|
149
|
+
|
150
|
+
```ruby
|
151
|
+
def tables(catalog: nil, schema: nil)
|
152
|
+
query = build_tables_query(catalog: catalog, schema: schema)
|
153
|
+
results = execute(query, format: :array)
|
154
|
+
|
155
|
+
results.map do |row|
|
156
|
+
DWH::Table.new(
|
157
|
+
physical_name: row[0],
|
158
|
+
schema: row[1] || 'default',
|
159
|
+
catalog: row[2],
|
160
|
+
table_type: row[3] || 'TABLE'
|
161
|
+
)
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
private
|
166
|
+
|
167
|
+
def build_tables_query(catalog: nil, schema: nil)
|
168
|
+
query = "SHOW TABLES"
|
169
|
+
|
170
|
+
conditions = []
|
171
|
+
conditions << "FROM #{catalog}" if catalog
|
172
|
+
conditions << "LIKE '#{schema}.%'" if schema
|
173
|
+
|
174
|
+
query += " #{conditions.join(' ')}" unless conditions.empty?
|
175
|
+
query
|
176
|
+
end
|
177
|
+
```
|
178
|
+
|
179
|
+
### 4. Implement Metadata Extraction
|
180
|
+
|
181
|
+
```ruby
|
182
|
+
def metadata(table_name, catalog: nil, schema: nil)
|
183
|
+
# Parse table name if it includes schema/catalog
|
184
|
+
parsed = parse_table_name(table_name, catalog: catalog, schema: schema)
|
185
|
+
|
186
|
+
query = build_describe_query(parsed[:table], parsed[:schema], parsed[:catalog])
|
187
|
+
results = execute(query, format: :array)
|
188
|
+
|
189
|
+
columns = results.map do |row|
|
190
|
+
DWH::Column.new(
|
191
|
+
name: row[0],
|
192
|
+
data_type: row[1],
|
193
|
+
normalized_data_type: normalize_data_type(row[1]),
|
194
|
+
nullable: row[2] != 'NO',
|
195
|
+
default_value: row[3],
|
196
|
+
character_maximum_length: row[4],
|
197
|
+
numeric_precision: row[5],
|
198
|
+
numeric_scale: row[6]
|
199
|
+
)
|
200
|
+
end
|
201
|
+
|
202
|
+
DWH::Table.new(
|
203
|
+
physical_name: parsed[:table],
|
204
|
+
schema: parsed[:schema],
|
205
|
+
catalog: parsed[:catalog],
|
206
|
+
columns: columns
|
207
|
+
)
|
208
|
+
end
|
209
|
+
```
|
210
|
+
|
211
|
+
### 5. Implement Statistics Collection
|
212
|
+
|
213
|
+
```ruby
|
214
|
+
def stats(table_name, date_column: nil, catalog: nil, schema: nil)
|
215
|
+
parsed = parse_table_name(table_name, catalog: catalog, schema: schema)
|
216
|
+
full_table_name = build_full_table_name(parsed)
|
217
|
+
|
218
|
+
# Get row count
|
219
|
+
count_query = "SELECT COUNT(*) FROM #{full_table_name}"
|
220
|
+
row_count = execute(count_query, format: :array).first.first
|
221
|
+
|
222
|
+
# Get date range if date column provided
|
223
|
+
date_start = date_end = nil
|
224
|
+
if date_column
|
225
|
+
date_query = "SELECT MIN(#{date_column}), MAX(#{date_column}) FROM #{full_table_name}"
|
226
|
+
date_result = execute(date_query, format: :array).first
|
227
|
+
date_start, date_end = date_result
|
228
|
+
end
|
229
|
+
|
230
|
+
DWH::TableStats.new(
|
231
|
+
row_count: row_count,
|
232
|
+
date_start: date_start,
|
233
|
+
date_end: date_end
|
234
|
+
)
|
235
|
+
end
|
236
|
+
```
|
237
|
+
|
238
|
+
### 6. Implement Query Execution
|
239
|
+
|
240
|
+
```ruby
|
241
|
+
def execute(sql, format: :array, retries: 0)
|
242
|
+
response = connection.post('/query', { sql: sql }.to_json)
|
243
|
+
|
244
|
+
raise DWH::ExecutionError, "Query failed: #{response.body}" unless response.success?
|
245
|
+
|
246
|
+
raw_data = JSON.parse(response.body)
|
247
|
+
format_results(raw_data, format)
|
248
|
+
rescue => e
|
249
|
+
if retries > 0
|
250
|
+
sleep(1)
|
251
|
+
execute(sql, format: format, retries: retries - 1)
|
252
|
+
else
|
253
|
+
raise DWH::ExecutionError, "Query execution failed: #{e.message}"
|
254
|
+
end
|
255
|
+
end
|
256
|
+
|
257
|
+
def execute_stream(sql, io, stats: nil)
|
258
|
+
# For HTTP APIs, you might need to paginate or use streaming endpoints
|
259
|
+
offset = 0
|
260
|
+
limit = 10_000
|
261
|
+
|
262
|
+
loop do
|
263
|
+
paginated_sql = "#{sql} LIMIT #{limit} OFFSET #{offset}"
|
264
|
+
results = execute(paginated_sql, format: :array)
|
265
|
+
|
266
|
+
break if results.empty?
|
267
|
+
|
268
|
+
results.each do |row|
|
269
|
+
csv_row = CSV.generate_line(row)
|
270
|
+
io.write(csv_row)
|
271
|
+
stats&.add_row(row)
|
272
|
+
end
|
273
|
+
|
274
|
+
offset += limit
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
def stream(sql, &block)
|
279
|
+
# Similar to execute_stream but yields chunks to block
|
280
|
+
offset = 0
|
281
|
+
limit = 10_000
|
282
|
+
|
283
|
+
loop do
|
284
|
+
paginated_sql = "#{sql} LIMIT #{limit} OFFSET #{offset}"
|
285
|
+
chunk = execute(paginated_sql, format: :array)
|
286
|
+
|
287
|
+
break if chunk.empty?
|
288
|
+
|
289
|
+
yield chunk
|
290
|
+
offset += limit
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
private
|
295
|
+
|
296
|
+
def format_results(raw_data, format)
|
297
|
+
case format
|
298
|
+
when :array
|
299
|
+
raw_data['rows']
|
300
|
+
when :object
|
301
|
+
columns = raw_data['columns']
|
302
|
+
raw_data['rows'].map { |row| columns.zip(row).to_h }
|
303
|
+
when :csv
|
304
|
+
CSV.generate do |csv|
|
305
|
+
raw_data['rows'].each { |row| csv << row }
|
306
|
+
end
|
307
|
+
when :native
|
308
|
+
raw_data
|
309
|
+
else
|
310
|
+
raise ArgumentError, "Unsupported format: #{format}"
|
311
|
+
end
|
312
|
+
end
|
313
|
+
```
|
314
|
+
|
315
|
+
## Creating Custom Settings
|
316
|
+
|
317
|
+
### 1. Create Settings File
|
318
|
+
|
319
|
+
Create by copying the [base settings file](https://github.com/stratasite/dwh/blob/main/lib/dwh/settings/base.yml) to a relative directory like so:`settings/mycustom.yml`
|
320
|
+
|
321
|
+
```yaml
|
322
|
+
# Override base settings for your database
|
323
|
+
|
324
|
+
# Function mappings
|
325
|
+
truncate_date: "DATE_TRUNC('@unit', @exp)"
|
326
|
+
date_literal: "DATE('@val')"
|
327
|
+
cast: "CAST(@exp AS @type)"
|
328
|
+
|
329
|
+
# String functions
|
330
|
+
trim: "LTRIM(RTRIM(@exp))"
|
331
|
+
upper_case: "UPPER(@exp)"
|
332
|
+
lower_case: "LOWER(@exp)"
|
333
|
+
|
334
|
+
# Null handling
|
335
|
+
if_null: "ISNULL(@exp, @when_null)"
|
336
|
+
null_if: "CASE WHEN @exp = @target THEN NULL ELSE @exp END"
|
337
|
+
|
338
|
+
# Capabilities
|
339
|
+
supports_window_functions: true
|
340
|
+
supports_array_functions: false
|
341
|
+
supports_common_table_expressions: true
|
342
|
+
supports_temp_tables: false
|
343
|
+
|
344
|
+
# Query behavior
|
345
|
+
temp_table_type: "subquery" # options: cte, subquery, temp
|
346
|
+
final_pass_measure_join_type: "inner" # inner, left, right, full
|
347
|
+
|
348
|
+
# Custom settings for your database
|
349
|
+
custom_query_prefix: "/* Generated by DWH */"
|
350
|
+
max_query_length: 1000000
|
351
|
+
```
|
352
|
+
|
353
|
+
### 2. Custom Settings Location
|
354
|
+
|
355
|
+
```ruby
|
356
|
+
class MyCustomAdapter < Adapter
|
357
|
+
# Specify custom settings file location
|
358
|
+
settings_file_path "/path/to/my_custom_settings.yml"
|
359
|
+
|
360
|
+
# ... rest of implementation
|
361
|
+
end
|
362
|
+
```
|
363
|
+
|
364
|
+
## Advanced Features
|
365
|
+
|
366
|
+
### Error Handling
|
367
|
+
|
368
|
+
```ruby
|
369
|
+
def execute(sql, format: :array, retries: 0)
|
370
|
+
# Your execution logic
|
371
|
+
rescue MyDatabaseClient::ConnectionError => e
|
372
|
+
raise DWH::ConnectionError, "Database connection failed: #{e.message}"
|
373
|
+
rescue MyDatabaseClient::QueryError => e
|
374
|
+
raise DWH::ExecutionError, "Query execution failed: #{e.message}"
|
375
|
+
rescue => e
|
376
|
+
raise DWH::AdapterError, "Unexpected error: #{e.message}"
|
377
|
+
end
|
378
|
+
```
|
379
|
+
|
380
|
+
### Custom Function Translation
|
381
|
+
|
382
|
+
```ruby
|
383
|
+
def custom_function(expression, param1, param2)
|
384
|
+
# Access settings for function templates
|
385
|
+
template = settings[:custom_function] || "CUSTOM_FUNC(@exp, @p1, @p2)"
|
386
|
+
|
387
|
+
template.gsub('@exp', expression)
|
388
|
+
.gsub('@p1', param1.to_s)
|
389
|
+
.gsub('@p2', param2.to_s)
|
390
|
+
end
|
391
|
+
```
|
392
|
+
|
393
|
+
## Registration and Usage
|
394
|
+
|
395
|
+
### Register Your Adapter
|
396
|
+
|
397
|
+
```ruby
|
398
|
+
# In your gem or application initialization
|
399
|
+
require 'dwh'
|
400
|
+
require 'my_custom_adapter'
|
401
|
+
|
402
|
+
DWH.register(:mycustom, DWH::Adapters::MyCustomAdapter)
|
403
|
+
```
|
404
|
+
|
405
|
+
### Use Your Adapter
|
406
|
+
|
407
|
+
```ruby
|
408
|
+
# Create adapter instance
|
409
|
+
adapter = DWH.create(:mycustom, {
|
410
|
+
host: 'database.example.com',
|
411
|
+
port: 1234,
|
412
|
+
database: 'analytics',
|
413
|
+
username: 'analyst',
|
414
|
+
password: 'secret'
|
415
|
+
})
|
416
|
+
|
417
|
+
# Use standard DWH interface
|
418
|
+
tables = adapter.tables
|
419
|
+
metadata = adapter.metadata('users')
|
420
|
+
results = adapter.execute("SELECT COUNT(*) FROM users")
|
421
|
+
```
|
422
|
+
|
423
|
+
## Examples to Study
|
424
|
+
|
425
|
+
Look at existing adapters for implementation patterns:
|
426
|
+
|
427
|
+
- **PostgreSQL** (`lib/dwh/adapters/postgres.rb`) - RDBMS with full SQL support
|
428
|
+
- **Druid** (`lib/dwh/adapters/druid.rb`) - HTTP API-based adapter
|
429
|
+
- **DuckDB** (`lib/dwh/adapters/duck_db.rb`) - Embedded database adapter
|
430
|
+
|
@@ -0,0 +1,225 @@
|
|
1
|
+
<!--
|
2
|
+
# @title Getting Started
|
3
|
+
-->
|
4
|
+
# Getting Started with DWH
|
5
|
+
|
6
|
+
DWH is a lightweight library that provides a unified interface to connect, introspect, and query popular databases. This guide will help you get up and running quickly.
|
7
|
+
|
8
|
+
## Installation
|
9
|
+
|
10
|
+
Add this line to your application's Gemfile:
|
11
|
+
|
12
|
+
```ruby
|
13
|
+
gem 'dwh'
|
14
|
+
```
|
15
|
+
|
16
|
+
And then execute:
|
17
|
+
|
18
|
+
```bash
|
19
|
+
bundle install
|
20
|
+
```
|
21
|
+
|
22
|
+
Or install it yourself as:
|
23
|
+
|
24
|
+
```bash
|
25
|
+
gem install dwh
|
26
|
+
```
|
27
|
+
|
28
|
+
## Basic Usage
|
29
|
+
|
30
|
+
### Creating Your First Connection
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
require 'dwh'
|
34
|
+
|
35
|
+
# Connect to PostgreSQL
|
36
|
+
postgres = DWH.create(:postgres, {
|
37
|
+
host: 'localhost',
|
38
|
+
database: 'mydb',
|
39
|
+
username: 'user',
|
40
|
+
password: 'password'
|
41
|
+
})
|
42
|
+
|
43
|
+
# Connect to DuckDB (in-memory)
|
44
|
+
duckdb = DWH.create(:duckdb, {
|
45
|
+
database: ':memory:'
|
46
|
+
})
|
47
|
+
```
|
48
|
+
|
49
|
+
### Your First Query
|
50
|
+
|
51
|
+
```ruby
|
52
|
+
# Execute a simple query
|
53
|
+
results = postgres.execute("SELECT * FROM users LIMIT 10")
|
54
|
+
|
55
|
+
# Results are returned as arrays by default
|
56
|
+
results.each do |row|
|
57
|
+
puts row.inspect
|
58
|
+
end
|
59
|
+
```
|
60
|
+
|
61
|
+
### Exploring Your Database
|
62
|
+
|
63
|
+
```ruby
|
64
|
+
# List all tables
|
65
|
+
tables = postgres.tables
|
66
|
+
puts "Available tables: #{tables.map(&:physical_name)}"
|
67
|
+
|
68
|
+
# Get detailed information about a table
|
69
|
+
table_info = postgres.metadata('users')
|
70
|
+
puts "Table: #{table_info.physical_name}"
|
71
|
+
puts "Schema: #{table_info.schema}"
|
72
|
+
puts "Columns:"
|
73
|
+
table_info.columns.each do |column|
|
74
|
+
puts " #{column.name} (#{column.normalized_data_type})"
|
75
|
+
end
|
76
|
+
|
77
|
+
# Get table statistics
|
78
|
+
stats = postgres.stats('users', date_column: 'created_at')
|
79
|
+
puts "Row count: #{stats.row_count}"
|
80
|
+
puts "Date range: #{stats.date_start} to #{stats.date_end}"
|
81
|
+
```
|
82
|
+
|
83
|
+
### Different Output Formats
|
84
|
+
|
85
|
+
```ruby
|
86
|
+
# Get results as arrays (default)
|
87
|
+
array_results = postgres.execute("SELECT id, name FROM users LIMIT 5")
|
88
|
+
|
89
|
+
# Get results as hashes/objects
|
90
|
+
hash_results = postgres.execute("SELECT id, name FROM users LIMIT 5", format: :object)
|
91
|
+
|
92
|
+
# Get results as CSV string
|
93
|
+
csv_results = postgres.execute("SELECT id, name FROM users LIMIT 5", format: :csv)
|
94
|
+
|
95
|
+
# Stream large results to a file
|
96
|
+
postgres.execute_stream("SELECT * FROM large_table", File.open('output.csv', 'w'))
|
97
|
+
```
|
98
|
+
|
99
|
+
### Streaming Large Datasets
|
100
|
+
|
101
|
+
```ruby
|
102
|
+
# stream data while tracting stats and previewing data in a separate thread
|
103
|
+
stats = DWH::StreamingStats.new(10000) # num of rows to keep in memory for previewing
|
104
|
+
exec_thread = Thread.new {
|
105
|
+
postgres.execute_stream("SELECT * FROM large_table", File.open('output.csv', 'w'), stats: stats)
|
106
|
+
}
|
107
|
+
|
108
|
+
mon_thread = Thread.new{
|
109
|
+
loop do
|
110
|
+
break if exec_thread.alive?
|
111
|
+
|
112
|
+
puts stats.data.last
|
113
|
+
end
|
114
|
+
}
|
115
|
+
|
116
|
+
[exec_thread, mon_thread].each(&:join)
|
117
|
+
|
118
|
+
# Stream with block processing
|
119
|
+
postgres.stream("SELECT * FROM large_table") do |chunk|
|
120
|
+
process_chunk(chunk)
|
121
|
+
end
|
122
|
+
|
123
|
+
```
|
124
|
+
|
125
|
+
## Advanced Usage
|
126
|
+
|
127
|
+
### Connection Pooling
|
128
|
+
|
129
|
+
```ruby
|
130
|
+
# Create a connection pool
|
131
|
+
pool = DWH.pool('my_postgres_pool', :postgres, {
|
132
|
+
host: 'localhost',
|
133
|
+
database: 'mydb',
|
134
|
+
username: 'user',
|
135
|
+
password: 'password'
|
136
|
+
}, size: 10, timeout: 5)
|
137
|
+
|
138
|
+
# Use the pool
|
139
|
+
pool.with do |connection|
|
140
|
+
results = connection.execute("SELECT COUNT(*) FROM users")
|
141
|
+
end
|
142
|
+
|
143
|
+
# Shutdown the pool when done
|
144
|
+
DWH.shutdown('my_postgres_pool')
|
145
|
+
```
|
146
|
+
|
147
|
+
### Using Extra Connection Params
|
148
|
+
|
149
|
+
DWH uses an existing Ruby gem where possible to connect to each target database. When that is not possible and the db supports a REST endpoint, we will use Faraday.
|
150
|
+
|
151
|
+
Using `extra_connection_params` key you can pass in a Hash of options that the target connector supports but DWH doesn't make first class. The main config options in DWH are based on required and common needs.
|
152
|
+
|
153
|
+
#### Sending Postgres 'connect_timeout' property supported by the PG gem
|
154
|
+
|
155
|
+
```ruby
|
156
|
+
pg = DWH.create(:postgres, {
|
157
|
+
host: 'localhost',
|
158
|
+
database: 'mydb',
|
159
|
+
username: 'user',
|
160
|
+
password: 'password',
|
161
|
+
extra_connection_params: {
|
162
|
+
connect_timeout: 5
|
163
|
+
}
|
164
|
+
})
|
165
|
+
|
166
|
+
```
|
167
|
+
|
168
|
+
### Database Functions
|
169
|
+
|
170
|
+
DWH provides a function translation layer that converts common SQL functions to database-specific syntax:
|
171
|
+
|
172
|
+
```ruby
|
173
|
+
# Date truncation
|
174
|
+
postgres.truncate_date('week', 'created_at') # => DATE_TRUNC('week', created_at)
|
175
|
+
sqlserver.truncate_date('week', 'created_at') # => DATETRUNC(week, created_at)
|
176
|
+
|
177
|
+
# Date literals
|
178
|
+
postgres.date_literal('2025-01-01') # => '2025-01-01'::DATE
|
179
|
+
sqlserver.date_literal('2025-01-01') # => '2025-01-01'
|
180
|
+
|
181
|
+
# Null handling
|
182
|
+
adapter.coalesce('column1', 'column2', "'default'") # => COALESCE(column1, column2, 'default')
|
183
|
+
adapter.null_if('column1', "'empty'") # => NULLIF(column1, 'empty')
|
184
|
+
|
185
|
+
# String functions
|
186
|
+
adapter.trim('column_name') # => TRIM(column_name)
|
187
|
+
adapter.upper_case('column_name') # => UPPER(column_name)
|
188
|
+
adapter.lower_case('column_name') # => LOWER(column_name)
|
189
|
+
```
|
190
|
+
|
191
|
+
## Core API
|
192
|
+
|
193
|
+
Standardized API across adapters:
|
194
|
+
|
195
|
+
connection
|
196
|
+
: creates a reusuable connection based on config hash passed in
|
197
|
+
|
198
|
+
tables(schema: nil, catalog: nil)
|
199
|
+
: returns a list of tables from the default connection or from the specified schema and catalog
|
200
|
+
|
201
|
+
metadata(table_name, schema: nil, catalog: nil)
|
202
|
+
: provides metadata about a table
|
203
|
+
|
204
|
+
stats(table_name, date_column: nil)
|
205
|
+
: provides table row count and date range
|
206
|
+
|
207
|
+
execute(sql, format: :array, retries: 0)
|
208
|
+
: runs a query and returns in given format
|
209
|
+
|
210
|
+
execute_stream(sql, io, stats: nil)
|
211
|
+
: runs a query and streams it as csv into the given io
|
212
|
+
|
213
|
+
## Error Handling
|
214
|
+
|
215
|
+
```ruby
|
216
|
+
begin
|
217
|
+
results = adapter.execute("SELECT * FROM non_existent_table")
|
218
|
+
rescue DWH::ExecutionError => e
|
219
|
+
puts "Query failed: #{e.message}"
|
220
|
+
rescue DWH::ConnectionError => e
|
221
|
+
puts "Connection failed: #{e.message}"
|
222
|
+
rescue DWH::ConfigError => e
|
223
|
+
puts "Configuration error: #{e.message}"
|
224
|
+
end
|
225
|
+
```
|