dwh 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +7 -0
  2. data/.rubocop.yml +36 -0
  3. data/CHANGELOG.md +5 -0
  4. data/LICENSE +21 -0
  5. data/README.md +130 -0
  6. data/Rakefile +42 -0
  7. data/docs/DWH/Adapters/Adapter.html +3053 -0
  8. data/docs/DWH/Adapters/Athena.html +1704 -0
  9. data/docs/DWH/Adapters/Boolean.html +121 -0
  10. data/docs/DWH/Adapters/Druid.html +1626 -0
  11. data/docs/DWH/Adapters/DuckDb.html +2012 -0
  12. data/docs/DWH/Adapters/MySql.html +1704 -0
  13. data/docs/DWH/Adapters/OpenAuthorizable/ClassMethods.html +265 -0
  14. data/docs/DWH/Adapters/OpenAuthorizable.html +1102 -0
  15. data/docs/DWH/Adapters/Postgres.html +2000 -0
  16. data/docs/DWH/Adapters/Snowflake.html +1662 -0
  17. data/docs/DWH/Adapters/SqlServer.html +2084 -0
  18. data/docs/DWH/Adapters/Trino.html +1835 -0
  19. data/docs/DWH/Adapters.html +129 -0
  20. data/docs/DWH/AuthenticationError.html +142 -0
  21. data/docs/DWH/Behaviors.html +767 -0
  22. data/docs/DWH/Capabilities.html +748 -0
  23. data/docs/DWH/Column.html +1115 -0
  24. data/docs/DWH/ConfigError.html +143 -0
  25. data/docs/DWH/ConnectionError.html +143 -0
  26. data/docs/DWH/DWHError.html +138 -0
  27. data/docs/DWH/ExecutionError.html +143 -0
  28. data/docs/DWH/Factory.html +1133 -0
  29. data/docs/DWH/Functions/Arrays.html +505 -0
  30. data/docs/DWH/Functions/Dates.html +1644 -0
  31. data/docs/DWH/Functions/ExtractDatePart.html +804 -0
  32. data/docs/DWH/Functions/Nulls.html +377 -0
  33. data/docs/DWH/Functions.html +846 -0
  34. data/docs/DWH/Logger.html +258 -0
  35. data/docs/DWH/OAuthError.html +138 -0
  36. data/docs/DWH/Settings.html +658 -0
  37. data/docs/DWH/StreamingStats.html +804 -0
  38. data/docs/DWH/Table.html +1260 -0
  39. data/docs/DWH/TableStats.html +583 -0
  40. data/docs/DWH/TokenExpiredError.html +142 -0
  41. data/docs/DWH/UnsupportedCapability.html +135 -0
  42. data/docs/DWH.html +220 -0
  43. data/docs/_index.html +471 -0
  44. data/docs/class_list.html +54 -0
  45. data/docs/css/common.css +1 -0
  46. data/docs/css/full_list.css +58 -0
  47. data/docs/css/style.css +503 -0
  48. data/docs/file.README.html +210 -0
  49. data/docs/file.adapters.html +514 -0
  50. data/docs/file.creating-adapters.html +497 -0
  51. data/docs/file.getting-started.html +288 -0
  52. data/docs/file.usage.html +446 -0
  53. data/docs/file_list.html +79 -0
  54. data/docs/frames.html +22 -0
  55. data/docs/guides/adapters.md +445 -0
  56. data/docs/guides/creating-adapters.md +430 -0
  57. data/docs/guides/getting-started.md +225 -0
  58. data/docs/guides/usage.md +378 -0
  59. data/docs/index.html +210 -0
  60. data/docs/js/app.js +344 -0
  61. data/docs/js/full_list.js +242 -0
  62. data/docs/js/jquery.js +4 -0
  63. data/docs/method_list.html +2038 -0
  64. data/docs/top-level-namespace.html +110 -0
  65. data/lib/dwh/adapters/athena.rb +359 -0
  66. data/lib/dwh/adapters/druid.rb +267 -0
  67. data/lib/dwh/adapters/duck_db.rb +235 -0
  68. data/lib/dwh/adapters/my_sql.rb +235 -0
  69. data/lib/dwh/adapters/open_authorizable.rb +215 -0
  70. data/lib/dwh/adapters/postgres.rb +250 -0
  71. data/lib/dwh/adapters/snowflake.rb +489 -0
  72. data/lib/dwh/adapters/sql_server.rb +257 -0
  73. data/lib/dwh/adapters/trino.rb +213 -0
  74. data/lib/dwh/adapters.rb +363 -0
  75. data/lib/dwh/behaviors.rb +67 -0
  76. data/lib/dwh/capabilities.rb +39 -0
  77. data/lib/dwh/column.rb +79 -0
  78. data/lib/dwh/errors.rb +29 -0
  79. data/lib/dwh/factory.rb +125 -0
  80. data/lib/dwh/functions/arrays.rb +42 -0
  81. data/lib/dwh/functions/dates.rb +162 -0
  82. data/lib/dwh/functions/extract_date_part.rb +70 -0
  83. data/lib/dwh/functions/nulls.rb +31 -0
  84. data/lib/dwh/functions.rb +86 -0
  85. data/lib/dwh/logger.rb +50 -0
  86. data/lib/dwh/settings/athena.yml +77 -0
  87. data/lib/dwh/settings/base.yml +81 -0
  88. data/lib/dwh/settings/databricks.yml +51 -0
  89. data/lib/dwh/settings/druid.yml +59 -0
  90. data/lib/dwh/settings/duckdb.yml +44 -0
  91. data/lib/dwh/settings/mysql.yml +67 -0
  92. data/lib/dwh/settings/postgres.yml +30 -0
  93. data/lib/dwh/settings/redshift.yml +52 -0
  94. data/lib/dwh/settings/snowflake.yml +45 -0
  95. data/lib/dwh/settings/sqlserver.yml +80 -0
  96. data/lib/dwh/settings/trino.yml +77 -0
  97. data/lib/dwh/settings.rb +79 -0
  98. data/lib/dwh/streaming_stats.rb +69 -0
  99. data/lib/dwh/table.rb +105 -0
  100. data/lib/dwh/table_stats.rb +51 -0
  101. data/lib/dwh/version.rb +5 -0
  102. data/lib/dwh.rb +54 -0
  103. data/sig/dwh.rbs +4 -0
  104. metadata +231 -0
data/lib/dwh/table.rb ADDED
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'column'
4
+
5
+ module DWH
6
+ # Container to map to a data warehouse table.
7
+ # If you initialize with a fuly qualified table name
8
+ # , it will automatically create catalog and schema components.
9
+ #
10
+ # This is the object returned from +metadata+ method call of an adapter
11
+ #
12
+ # ==== Examples
13
+ # Table.new("dwh.public.hello_world_table")
14
+ #
15
+ # table_stats_instance = adapter.stats("my_table", schema: "dwh")
16
+ # Table.new("my_table", schema: "dwh", stats: table_stats_instance)
17
+ class Table
18
+ attr_reader :physical_name, :schema, :catalog, :columns, :table_stats
19
+
20
+ def initialize(physical_name, schema: nil, catalog: nil, table_stats: nil)
21
+ parts = physical_name.split('.')
22
+
23
+ @physical_name = parts.last
24
+ @table_stats = table_stats
25
+ @catalog = catalog
26
+ @schema = schema
27
+
28
+ @catalog = parts.first if @catalog.nil? && parts.length > 2
29
+
30
+ if @schema.nil?
31
+ if parts.length == 2
32
+ @schema = parts.first
33
+ elsif parts.length > 2
34
+ @schema = parts[1]
35
+ end
36
+ end
37
+
38
+ @columns = []
39
+ end
40
+
41
+ def <<(column)
42
+ @columns << column
43
+ end
44
+
45
+ def fully_qualified_table_name
46
+ [catalog, schema, physical_name].compact.join('.')
47
+ end
48
+
49
+ def fully_qualified_schema_name
50
+ [catalog, schema].compact.join('.')
51
+ end
52
+
53
+ def catalog_and_schema?
54
+ catalog && schema
55
+ end
56
+
57
+ def catalog_or_schema?
58
+ catalog || schema
59
+ end
60
+
61
+ def stats
62
+ @table_stats
63
+ end
64
+
65
+ def to_h
66
+ {
67
+ physical_name: physical_name,
68
+ schema: schema,
69
+ catalog: catalog,
70
+ columns: columns.map(&:to_h),
71
+ stats: table_stats&.to_h
72
+ }
73
+ end
74
+
75
+ def size
76
+ @table_stats&.row_count || 0
77
+ end
78
+
79
+ def find_column(name)
80
+ columns.find { |c| c.name.downcase == name.downcase }
81
+ end
82
+
83
+ def self.from_hash_or_json(physical_name, metadata)
84
+ metadata = JSON.parse(metadata) if metadata.is_a?(String)
85
+ metadata.symbolize_keys!
86
+
87
+ stats = TableStats.new(**metadata[:stats].symbolize_keys) if metadata.key?(:stats)
88
+ table = new(physical_name, table_stats: stats)
89
+
90
+ metadata[:columns]&.each do |col|
91
+ col.symbolize_keys!
92
+ table << Column.new(
93
+ name: col[:name],
94
+ data_type: col[:data_type],
95
+ precision: col[:precision],
96
+ scale: col[:scale],
97
+ max_char_length: col[:max_char_length],
98
+ schema_type: col[:schema_type]
99
+ )
100
+ end
101
+
102
+ table
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,51 @@
1
+ module DWH
2
+ # TableStats is returned when calling Adapter#stat.
3
+ # This currently ust provide basic stats,
4
+ # but could be enhanced in the future to provide more
5
+ # introspection on the table. For examples, indexes and
6
+ # partition settings. Cardinality of various columns
7
+ # etc.
8
+ class TableStats
9
+ # @return [Integer] total rows in table
10
+ attr_accessor :row_count
11
+
12
+ # @return [DateTime] when a date column is passed to {Adapters::Adapter#stats} it
13
+ # will return date of first record in the table
14
+ attr_accessor :date_start
15
+
16
+ # @return [DateTime] when a date column is passed to {Adapters::Adapter#stats} it
17
+ # returns the date of the last record in the table
18
+ attr_accessor :date_end
19
+
20
+ def initialize(row_count: nil, date_start: nil, date_end: nil)
21
+ @row_count = row_count.nil? ? 0 : row_count.to_i
22
+ @date_start = parse_date(date_start)
23
+ @date_end = parse_date(date_end)
24
+ end
25
+
26
+ # Hash of the stats attributes
27
+ # @return [Hash] of the attributes
28
+ def to_h
29
+ {
30
+ row_count: row_count,
31
+ date_start: date_start,
32
+ date_end: date_end
33
+ }
34
+ end
35
+
36
+ private
37
+
38
+ def parse_date(date)
39
+ case date
40
+ when nil
41
+ date
42
+ when String
43
+ DateTime.parse(date)
44
+ when Time, Date
45
+ DateTime.parse(date.to_s)
46
+ else
47
+ raise ConfigError, "Unexpected date class: #{date.class.name}"
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DWH
4
+ VERSION = '0.1.0'
5
+ end
data/lib/dwh.rb ADDED
@@ -0,0 +1,54 @@
1
+ require 'faraday'
2
+ require 'active_support/core_ext/string/inflections'
3
+ require 'active_support/core_ext/hash/keys'
4
+ require 'active_support/core_ext/object/blank'
5
+ require 'active_support/duration'
6
+
7
+ require_relative 'dwh/version'
8
+ require_relative 'dwh/errors'
9
+ require_relative 'dwh/logger'
10
+ require_relative 'dwh/streaming_stats'
11
+ require_relative 'dwh/factory'
12
+ require_relative 'dwh/adapters'
13
+ require_relative 'dwh/table'
14
+ require_relative 'dwh/table_stats'
15
+ require_relative 'dwh/adapters/druid'
16
+ require_relative 'dwh/adapters/trino'
17
+ require_relative 'dwh/adapters/postgres'
18
+ require_relative 'dwh/adapters/snowflake'
19
+ require_relative 'dwh/adapters/my_sql'
20
+ require_relative 'dwh/adapters/sql_server'
21
+ require_relative 'dwh/adapters/duck_db'
22
+ require_relative 'dwh/adapters/athena'
23
+
24
+ # DWH encapsulates the full functionality of this gem.
25
+ #
26
+ # @example Create an instance of an existing registered adapter:
27
+ # DWH.create("snowflake", {warehouse: "wh", account_id: "myid"})
28
+ #
29
+ # @example Check if an adapter exists:
30
+ # DWH.adapter?(:redshift)
31
+ #
32
+ # @example Register your own adatper:
33
+ # DWH.register(:my_adapter, MyLib::MyAdapter)
34
+ module DWH
35
+ INT_TYPES = %w[int integer bigint tinyint smallint].freeze
36
+ DEC_TYPES = %w[real float double decimal].freeze
37
+ STRING_TYPES = %w[string char varchar varbinary json].freeze
38
+ TIMESTAMP_TYPES = ['timestamp with time zone', 'timestamp(p)', 'timestamp'].freeze
39
+ DATE_TYPES = %w[date].freeze
40
+
41
+ extend Factory
42
+
43
+ # Register default adapters
44
+ register(:druid, Adapters::Druid)
45
+ register(:postgres, Adapters::Postgres)
46
+ register(:trino, Adapters::Trino)
47
+ register(:snowflake, Adapters::Snowflake)
48
+ register(:mysql, Adapters::MySql)
49
+ register(:sqlserver, Adapters::SqlServer)
50
+ register(:duckdb, Adapters::DuckDb)
51
+ register(:athena, Adapters::Athena)
52
+
53
+ # start_reaper
54
+ end
data/sig/dwh.rbs ADDED
@@ -0,0 +1,4 @@
1
+ module Dwh
2
+ VERSION: String
3
+ # See the writing guide of rbs: https://github.com/ruby/rbs#guides
4
+ end
metadata ADDED
@@ -0,0 +1,231 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: dwh
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Ajo Abraham
8
+ bindir: exe
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: activesupport
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: 8.0.2
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: 8.0.2
26
+ - !ruby/object:Gem::Dependency
27
+ name: connection_pool
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '2.4'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '2.4'
40
+ - !ruby/object:Gem::Dependency
41
+ name: csv
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: 3.3.5
47
+ type: :runtime
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: 3.3.5
54
+ - !ruby/object:Gem::Dependency
55
+ name: faraday
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ type: :runtime
62
+ prerelease: false
63
+ version_requirements: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ - !ruby/object:Gem::Dependency
69
+ name: jwt
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: 2.10.1
75
+ type: :runtime
76
+ prerelease: false
77
+ version_requirements: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - "~>"
80
+ - !ruby/object:Gem::Version
81
+ version: 2.10.1
82
+ - !ruby/object:Gem::Dependency
83
+ name: logger
84
+ requirement: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: '0'
89
+ type: :runtime
90
+ prerelease: false
91
+ version_requirements: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
96
+ description: |
97
+ Provides a unified interface across data warehouses to connect, execute, and introspect. This is not an ORM but a fast
98
+ integrationg solution. It is quite easy to add new database adapters. Supports popular cloud warehouses too.
99
+ email:
100
+ - ajo@strata.site
101
+ executables: []
102
+ extensions: []
103
+ extra_rdoc_files: []
104
+ files:
105
+ - ".rubocop.yml"
106
+ - CHANGELOG.md
107
+ - LICENSE
108
+ - README.md
109
+ - Rakefile
110
+ - docs/DWH.html
111
+ - docs/DWH/Adapters.html
112
+ - docs/DWH/Adapters/Adapter.html
113
+ - docs/DWH/Adapters/Athena.html
114
+ - docs/DWH/Adapters/Boolean.html
115
+ - docs/DWH/Adapters/Druid.html
116
+ - docs/DWH/Adapters/DuckDb.html
117
+ - docs/DWH/Adapters/MySql.html
118
+ - docs/DWH/Adapters/OpenAuthorizable.html
119
+ - docs/DWH/Adapters/OpenAuthorizable/ClassMethods.html
120
+ - docs/DWH/Adapters/Postgres.html
121
+ - docs/DWH/Adapters/Snowflake.html
122
+ - docs/DWH/Adapters/SqlServer.html
123
+ - docs/DWH/Adapters/Trino.html
124
+ - docs/DWH/AuthenticationError.html
125
+ - docs/DWH/Behaviors.html
126
+ - docs/DWH/Capabilities.html
127
+ - docs/DWH/Column.html
128
+ - docs/DWH/ConfigError.html
129
+ - docs/DWH/ConnectionError.html
130
+ - docs/DWH/DWHError.html
131
+ - docs/DWH/ExecutionError.html
132
+ - docs/DWH/Factory.html
133
+ - docs/DWH/Functions.html
134
+ - docs/DWH/Functions/Arrays.html
135
+ - docs/DWH/Functions/Dates.html
136
+ - docs/DWH/Functions/ExtractDatePart.html
137
+ - docs/DWH/Functions/Nulls.html
138
+ - docs/DWH/Logger.html
139
+ - docs/DWH/OAuthError.html
140
+ - docs/DWH/Settings.html
141
+ - docs/DWH/StreamingStats.html
142
+ - docs/DWH/Table.html
143
+ - docs/DWH/TableStats.html
144
+ - docs/DWH/TokenExpiredError.html
145
+ - docs/DWH/UnsupportedCapability.html
146
+ - docs/_index.html
147
+ - docs/class_list.html
148
+ - docs/css/common.css
149
+ - docs/css/full_list.css
150
+ - docs/css/style.css
151
+ - docs/file.README.html
152
+ - docs/file.adapters.html
153
+ - docs/file.creating-adapters.html
154
+ - docs/file.getting-started.html
155
+ - docs/file.usage.html
156
+ - docs/file_list.html
157
+ - docs/frames.html
158
+ - docs/guides/adapters.md
159
+ - docs/guides/creating-adapters.md
160
+ - docs/guides/getting-started.md
161
+ - docs/guides/usage.md
162
+ - docs/index.html
163
+ - docs/js/app.js
164
+ - docs/js/full_list.js
165
+ - docs/js/jquery.js
166
+ - docs/method_list.html
167
+ - docs/top-level-namespace.html
168
+ - lib/dwh.rb
169
+ - lib/dwh/adapters.rb
170
+ - lib/dwh/adapters/athena.rb
171
+ - lib/dwh/adapters/druid.rb
172
+ - lib/dwh/adapters/duck_db.rb
173
+ - lib/dwh/adapters/my_sql.rb
174
+ - lib/dwh/adapters/open_authorizable.rb
175
+ - lib/dwh/adapters/postgres.rb
176
+ - lib/dwh/adapters/snowflake.rb
177
+ - lib/dwh/adapters/sql_server.rb
178
+ - lib/dwh/adapters/trino.rb
179
+ - lib/dwh/behaviors.rb
180
+ - lib/dwh/capabilities.rb
181
+ - lib/dwh/column.rb
182
+ - lib/dwh/errors.rb
183
+ - lib/dwh/factory.rb
184
+ - lib/dwh/functions.rb
185
+ - lib/dwh/functions/arrays.rb
186
+ - lib/dwh/functions/dates.rb
187
+ - lib/dwh/functions/extract_date_part.rb
188
+ - lib/dwh/functions/nulls.rb
189
+ - lib/dwh/logger.rb
190
+ - lib/dwh/settings.rb
191
+ - lib/dwh/settings/athena.yml
192
+ - lib/dwh/settings/base.yml
193
+ - lib/dwh/settings/databricks.yml
194
+ - lib/dwh/settings/druid.yml
195
+ - lib/dwh/settings/duckdb.yml
196
+ - lib/dwh/settings/mysql.yml
197
+ - lib/dwh/settings/postgres.yml
198
+ - lib/dwh/settings/redshift.yml
199
+ - lib/dwh/settings/snowflake.yml
200
+ - lib/dwh/settings/sqlserver.yml
201
+ - lib/dwh/settings/trino.yml
202
+ - lib/dwh/streaming_stats.rb
203
+ - lib/dwh/table.rb
204
+ - lib/dwh/table_stats.rb
205
+ - lib/dwh/version.rb
206
+ - sig/dwh.rbs
207
+ homepage: https://www.strata.site
208
+ licenses: []
209
+ metadata:
210
+ allowed_push_host: https://rubygems.org
211
+ homepage_uri: https://www.strata.site
212
+ source_code_uri: https://github.com/stratasite/dwh.git
213
+ changelog_uri: https://github.com/stratasite/dwh/blob/main/CHANGELOG.md
214
+ rdoc_options: []
215
+ require_paths:
216
+ - lib
217
+ required_ruby_version: !ruby/object:Gem::Requirement
218
+ requirements:
219
+ - - ">="
220
+ - !ruby/object:Gem::Version
221
+ version: 3.4.4
222
+ required_rubygems_version: !ruby/object:Gem::Requirement
223
+ requirements:
224
+ - - ">="
225
+ - !ruby/object:Gem::Version
226
+ version: '0'
227
+ requirements: []
228
+ rubygems_version: 3.6.7
229
+ specification_version: 4
230
+ summary: Data warehouse adapters for interacting with popular data warehouses.
231
+ test_files: []