laketower 0.5.1__py3-none-any.whl → 0.6.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of laketower might be problematic. Click here for more details.

Files changed (33) hide show
  1. laketower/__about__.py +1 -1
  2. laketower/cli.py +269 -101
  3. laketower/config.py +96 -14
  4. laketower/static/datatables.bundle.js +27931 -0
  5. laketower/static/datatables.js +55 -0
  6. laketower/static/editor.bundle.js +27433 -0
  7. laketower/static/editor.js +74 -0
  8. laketower/static/vendor/bootstrap/bootstrap.bundle.min.js +7 -0
  9. laketower/static/vendor/bootstrap-icons/bootstrap-icons.min.css +5 -0
  10. laketower/static/vendor/bootstrap-icons/fonts/bootstrap-icons.woff +0 -0
  11. laketower/static/vendor/bootstrap-icons/fonts/bootstrap-icons.woff2 +0 -0
  12. laketower/static/vendor/datatables.net-bs5/dataTables.bootstrap5.css +610 -0
  13. laketower/static/vendor/datatables.net-columncontrol-bs5/columnControl.bootstrap5.min.css +1 -0
  14. laketower/static/vendor/halfmoon/halfmoon.min.css +22 -0
  15. laketower/static/vendor/halfmoon/halfmoon.modern.css +282 -0
  16. laketower/tables.py +218 -16
  17. laketower/templates/_base.html +99 -20
  18. laketower/templates/queries/view.html +50 -8
  19. laketower/templates/tables/_macros.html +3 -0
  20. laketower/templates/tables/history.html +6 -0
  21. laketower/templates/tables/import.html +71 -0
  22. laketower/templates/tables/index.html +6 -0
  23. laketower/templates/tables/query.html +53 -7
  24. laketower/templates/tables/statistics.html +10 -4
  25. laketower/templates/tables/view.html +48 -42
  26. laketower/web.py +253 -30
  27. {laketower-0.5.1.dist-info → laketower-0.6.5.dist-info}/METADATA +189 -5
  28. laketower-0.6.5.dist-info/RECORD +35 -0
  29. laketower-0.6.5.dist-info/entry_points.txt +2 -0
  30. laketower-0.5.1.dist-info/RECORD +0 -22
  31. laketower-0.5.1.dist-info/entry_points.txt +0 -2
  32. {laketower-0.5.1.dist-info → laketower-0.6.5.dist-info}/WHEEL +0 -0
  33. {laketower-0.5.1.dist-info → laketower-0.6.5.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,282 @@
1
+
2
+ /*!
3
+ * ----------------------------------------------------------------------------
4
+ * Halfmoon CSS - Modern theme
5
+ * Copyright (c) 2023, Tahmid Khan | MIT License | https://www.gethalfmoon.com
6
+ * ----------------------------------------------------------------------------
7
+ * The above notice must be included in its entirety when this file is used.
8
+ */
9
+
10
+ /* Color palette */
11
+
12
+ [data-bs-core=modern] {
13
+ /* Gray */
14
+
15
+ --bs-slate-hue: 216;
16
+ --bs-slate-saturation: 20%;
17
+
18
+ /* Light gray */
19
+
20
+ --bs-lightgray-hue: var(--bs-slate-hue);
21
+ --bs-lightgray-saturation: var(--bs-slate-saturation);
22
+
23
+ /* Sable (almost black) */
24
+
25
+ --bs-sable-hue: var(--bs-darkgray-hue);
26
+ --bs-sable-saturation: var(--bs-darkgray-saturation);
27
+ --bs-sable-100-hsl: var(--bs-sable-hue), var(--bs-sable-saturation), 31%;
28
+ --bs-sable-200-hsl: var(--bs-sable-hue), var(--bs-sable-saturation), 29%;
29
+ --bs-sable-300-hsl: var(--bs-sable-hue), var(--bs-sable-saturation), 27%;
30
+ --bs-sable-400-hsl: var(--bs-sable-hue), var(--bs-sable-saturation), 25%;
31
+ --bs-sable-500-hsl: var(--bs-sable-hue), var(--bs-sable-saturation), 23%;
32
+ --bs-sable-600-hsl: var(--bs-sable-hue), var(--bs-sable-saturation), 21%;
33
+ --bs-sable-700-hsl: var(--bs-sable-hue), var(--bs-sable-saturation), 19%;
34
+ --bs-sable-800-hsl: var(--bs-sable-hue), var(--bs-sable-saturation), 17%;
35
+ --bs-sable-900-hsl: var(--bs-sable-hue), var(--bs-sable-saturation), 15%;
36
+ --bs-sable-100: hsl(var(--bs-sable-100-hsl));
37
+ --bs-sable-200: hsl(var(--bs-sable-200-hsl));
38
+ --bs-sable-300: hsl(var(--bs-sable-300-hsl));
39
+ --bs-sable-400: hsl(var(--bs-sable-400-hsl));
40
+ --bs-sable-500: hsl(var(--bs-sable-500-hsl));
41
+ --bs-sable-600: hsl(var(--bs-sable-600-hsl));
42
+ --bs-sable-700: hsl(var(--bs-sable-700-hsl));
43
+ --bs-sable-800: hsl(var(--bs-sable-800-hsl));
44
+ --bs-sable-900: hsl(var(--bs-sable-900-hsl));
45
+ --bs-sable-hsl: var(--bs-sable-500-hsl);
46
+ --bs-sable: hsl(var(--bs-sable-hsl));
47
+ --bs-sable-foreground-hsl: var(--bs-white-hsl);
48
+ --bs-sable-foreground: hsl(var(--bs-sable-foreground-hsl));
49
+ --bs-sable-text-emphasis-hsl: var(--bs-sable-600-hsl);
50
+ --bs-sable-text-emphasis: hsl(var(--bs-sable-text-emphasis-hsl));
51
+ --bs-sable-hover-bg: var(--bs-sable-600);
52
+ --bs-sable-active-bg: var(--bs-sable-700);
53
+ --bs-sable-bg-subtle: hsl(var(--bs-sable-hue), var(--bs-sable-saturation), 70%);
54
+ --bs-sable-border-subtle: var(--bs-sable-400);
55
+ --bs-sable-checkbox-svg: var(--bs-checkbox-svg-light);
56
+ --bs-sable-dash-svg: var(--bs-dash-svg-light);
57
+ --bs-sable-radio-svg: var(--bs-radio-svg-light);
58
+ --bs-sable-switch-svg: var(--bs-switch-svg-light);
59
+
60
+ /* Primary */
61
+
62
+ --bs-primary-hue: var(--bs-navy-hue);
63
+ --bs-primary-saturation: var(--bs-navy-saturation);
64
+ --bs-primary-100-hsl: var(--bs-navy-100-hsl);
65
+ --bs-primary-200-hsl: var(--bs-navy-200-hsl);
66
+ --bs-primary-300-hsl: var(--bs-navy-300-hsl);
67
+ --bs-primary-400-hsl: var(--bs-navy-400-hsl);
68
+ --bs-primary-500-hsl: var(--bs-navy-500-hsl);
69
+ --bs-primary-600-hsl: var(--bs-navy-600-hsl);
70
+ --bs-primary-700-hsl: var(--bs-navy-700-hsl);
71
+ --bs-primary-800-hsl: var(--bs-navy-800-hsl);
72
+ --bs-primary-900-hsl: var(--bs-navy-900-hsl);
73
+ --bs-primary-100: var(--bs-navy-100);
74
+ --bs-primary-200: var(--bs-navy-200);
75
+ --bs-primary-300: var(--bs-navy-300);
76
+ --bs-primary-400: var(--bs-navy-400);
77
+ --bs-primary-500: var(--bs-navy-500);
78
+ --bs-primary-600: var(--bs-navy-600);
79
+ --bs-primary-700: var(--bs-navy-700);
80
+ --bs-primary-800: var(--bs-navy-800);
81
+ --bs-primary-900: var(--bs-navy-900);
82
+ --bs-primary-hsl: var(--bs-navy-hsl);
83
+ --bs-primary: var(--bs-navy);
84
+ --bs-primary-foreground-hsl: var(--bs-navy-foreground-hsl);
85
+ --bs-primary-foreground: var(--bs-navy-foreground);
86
+ --bs-primary-text-emphasis-hsl: var(--bs-navy-text-emphasis-hsl);
87
+ --bs-primary-text-emphasis: var(--bs-navy-text-emphasis);
88
+ --bs-primary-hover-bg: var(--bs-navy-hover-bg);
89
+ --bs-primary-active-bg: var(--bs-navy-active-bg);
90
+ --bs-primary-bg-subtle: var(--bs-navy-bg-subtle);
91
+ --bs-primary-border-subtle: var(--bs-navy-border-subtle);
92
+ --bs-primary-checkbox-svg: var(--bs-navy-checkbox-svg);
93
+ --bs-primary-dash-svg: var(--bs-navy-dash-svg);
94
+ --bs-primary-radio-svg: var(--bs-navy-radio-svg);
95
+ --bs-primary-switch-svg: var(--bs-navy-switch-svg);
96
+
97
+ /* Info */
98
+
99
+ --bs-info-hue: var(--bs-blue-hue);
100
+ --bs-info-saturation: var(--bs-blue-saturation);
101
+ --bs-info-100-hsl: var(--bs-blue-100-hsl);
102
+ --bs-info-200-hsl: var(--bs-blue-200-hsl);
103
+ --bs-info-300-hsl: var(--bs-blue-300-hsl);
104
+ --bs-info-400-hsl: var(--bs-blue-400-hsl);
105
+ --bs-info-500-hsl: var(--bs-blue-500-hsl);
106
+ --bs-info-600-hsl: var(--bs-blue-600-hsl);
107
+ --bs-info-700-hsl: var(--bs-blue-700-hsl);
108
+ --bs-info-800-hsl: var(--bs-blue-800-hsl);
109
+ --bs-info-900-hsl: var(--bs-blue-900-hsl);
110
+ --bs-info-100: var(--bs-blue-100);
111
+ --bs-info-200: var(--bs-blue-200);
112
+ --bs-info-300: var(--bs-blue-300);
113
+ --bs-info-400: var(--bs-blue-400);
114
+ --bs-info-500: var(--bs-blue-500);
115
+ --bs-info-600: var(--bs-blue-600);
116
+ --bs-info-700: var(--bs-blue-700);
117
+ --bs-info-800: var(--bs-blue-800);
118
+ --bs-info-900: var(--bs-blue-900);
119
+ --bs-info-hsl: var(--bs-blue-hsl);
120
+ --bs-info: var(--bs-blue);
121
+ --bs-info-foreground-hsl: var(--bs-blue-foreground-hsl);
122
+ --bs-info-foreground: var(--bs-blue-foreground);
123
+ --bs-info-text-emphasis-hsl: var(--bs-blue-text-emphasis-hsl);
124
+ --bs-info-text-emphasis: var(--bs-blue-text-emphasis);
125
+ --bs-info-hover-bg: var(--bs-blue-hover-bg);
126
+ --bs-info-active-bg: var(--bs-blue-active-bg);
127
+ --bs-info-bg-subtle: var(--bs-blue-bg-subtle);
128
+ --bs-info-border-subtle: var(--bs-blue-border-subtle);
129
+ --bs-info-checkbox-svg: var(--bs-blue-checkbox-svg);
130
+ --bs-info-dash-svg: var(--bs-blue-dash-svg);
131
+ --bs-info-radio-svg: var(--bs-blue-radio-svg);
132
+ --bs-info-switch-svg: var(--bs-blue-switch-svg);
133
+ }
134
+
135
+ [data-bs-core=modern][data-bs-theme=dark] {
136
+ /* Dark gray */
137
+
138
+ --bs-darkgray-text-emphasis-hsl: var(--bs-darkgray-200-hsl);
139
+ --bs-darkgray-text-emphasis: hsl(var(--bs-darkgray-text-emphasis-hsl));
140
+
141
+ /* Sable (black) */
142
+
143
+ --bs-sable-text-emphasis-hsl: var(--bs-sable-400-hsl);
144
+ --bs-sable-text-emphasis: hsl(var(--bs-sable-text-emphasis-hsl));
145
+ --bs-sable-bg-subtle: hsl(var(--bs-sable-hue), var(--bs-sable-saturation), 14%);
146
+ --bs-sable-border-subtle: var(--bs-sable-600);
147
+
148
+ /* Blue */
149
+
150
+ --bs-blue-text-emphasis-hsl: var(--bs-blue-300-hsl);
151
+ --bs-blue-text-emphasis: hsl(var(--bs-blue-text-emphasis-hsl));
152
+
153
+ /* Primary */
154
+
155
+ --bs-primary-hue: var(--bs-sky-hue);
156
+ --bs-primary-saturation: var(--bs-sky-saturation);
157
+ --bs-primary-100-hsl: var(--bs-sky-100-hsl);
158
+ --bs-primary-200-hsl: var(--bs-sky-200-hsl);
159
+ --bs-primary-300-hsl: var(--bs-sky-300-hsl);
160
+ --bs-primary-400-hsl: var(--bs-sky-400-hsl);
161
+ --bs-primary-500-hsl: var(--bs-sky-500-hsl);
162
+ --bs-primary-600-hsl: var(--bs-sky-600-hsl);
163
+ --bs-primary-700-hsl: var(--bs-sky-700-hsl);
164
+ --bs-primary-800-hsl: var(--bs-sky-800-hsl);
165
+ --bs-primary-900-hsl: var(--bs-sky-900-hsl);
166
+ --bs-primary-100: var(--bs-sky-100);
167
+ --bs-primary-200: var(--bs-sky-200);
168
+ --bs-primary-300: var(--bs-sky-300);
169
+ --bs-primary-400: var(--bs-sky-400);
170
+ --bs-primary-500: var(--bs-sky-500);
171
+ --bs-primary-600: var(--bs-sky-600);
172
+ --bs-primary-700: var(--bs-sky-700);
173
+ --bs-primary-800: var(--bs-sky-800);
174
+ --bs-primary-900: var(--bs-sky-900);
175
+ --bs-primary-hsl: var(--bs-sky-hsl);
176
+ --bs-primary: var(--bs-sky);
177
+ --bs-primary-foreground-hsl: var(--bs-sky-foreground-hsl);
178
+ --bs-primary-foreground: var(--bs-sky-foreground);
179
+ --bs-primary-text-emphasis-hsl: var(--bs-sky-text-emphasis-hsl);
180
+ --bs-primary-text-emphasis: var(--bs-sky-text-emphasis);
181
+ --bs-primary-hover-bg: var(--bs-sky-hover-bg);
182
+ --bs-primary-active-bg: var(--bs-sky-active-bg);
183
+ --bs-primary-bg-subtle: var(--bs-sky-bg-subtle);
184
+ --bs-primary-border-subtle: var(--bs-sky-border-subtle);
185
+ --bs-primary-checkbox-svg: var(--bs-sky-checkbox-svg);
186
+ --bs-primary-dash-svg: var(--bs-sky-dash-svg);
187
+ --bs-primary-radio-svg: var(--bs-sky-radio-svg);
188
+ --bs-primary-switch-svg: var(--bs-sky-switch-svg);
189
+
190
+ /* Info */
191
+
192
+ --bs-info-text-emphasis-hsl: var(--bs-blue-text-emphasis-hsl);
193
+ --bs-info-text-emphasis: var(--bs-blue-text-emphasis);
194
+ --bs-info-bg-subtle: var(--bs-blue-bg-subtle);
195
+ --bs-info-border-subtle: var(--bs-blue-border-subtle);
196
+ }
197
+
198
+ /* Variables */
199
+
200
+ [data-bs-core=modern] {
201
+ /* Link */
202
+
203
+ --bs-link-color-hsl: var(--bs-info-text-emphasis-hsl);
204
+ --bs-link-hover-color-hsl: var(--bs-info-hsl);
205
+
206
+ /* Content (used as needed in cards, panels, menus, etc.) */
207
+
208
+ --bs-content-bg-hsl: var(--bs-body-bg-hsl);
209
+ --bs-content-border-color: var(--bs-border-color);
210
+
211
+ /* Form */
212
+
213
+ --bs-form-focus-border-color: var(--bs-info-border-subtle);
214
+ --bs-form-focus-shadow-hsl: var(--bs-info-hsl);
215
+ --bs-form-check-focus-border-color: var(--bs-info-border-subtle);
216
+ }
217
+
218
+ [data-bs-core=modern]:not([data-bs-theme=dark]) {
219
+ /* Background */
220
+
221
+ --bs-body-bg-hsl: var(--bs-white-hsl);
222
+ --bs-secondary-bg-hsl: var(--bs-lightgray-hue), var(--bs-lightgray-saturation), 98.75%;
223
+ --bs-tertiary-bg-hsl: var(--bs-lightgray-hue), var(--bs-lightgray-saturation), 97.5%;
224
+
225
+ /* Border */
226
+
227
+ --bs-border-color: var(--bs-lightgray-700);
228
+ --bs-border-color-light: var(--bs-lightgray-500);
229
+ }
230
+
231
+ [data-bs-core=modern][data-bs-theme=dark] {
232
+ /* Background */
233
+
234
+ --bs-body-bg-hsl: var(--bs-sable-900-hsl);
235
+ --bs-secondary-bg-hsl: var(--bs-sable-800-hsl);
236
+ --bs-tertiary-bg-hsl: var(--bs-sable-700-hsl);
237
+
238
+ /* Border */
239
+
240
+ --bs-border-color: var(--bs-gray-900);
241
+
242
+ /* Content (used as needed in cards, panels, menus, etc.) */
243
+
244
+ --bs-content-floating-bg-hsl: var(--bs-sable-hue), var(--bs-sable-saturation), 16.5%;
245
+
246
+ /* Action (used as needed in buttons, inputs, menu items, page links, etc.) */
247
+
248
+ --bs-action-border-color: var(--bs-border-color);
249
+
250
+ /* Contextual buttons */
251
+
252
+ --bs-ctx-btn-border-color: transparent;
253
+ --bs-ctx-btn-bg-clip: border-box;
254
+
255
+ /* Action bar (used as needed in range, progress, etc.) */
256
+
257
+ --bs-actionbar-border-color: hsla(var(--bs-white-hsl), 0.075);
258
+ --bs-progresstrack-border-width: 0;
259
+ --bs-progresstrack-box-shadow: inset 0 0 0 var(--bs-border-width) var(--bs-actionbar-border-color);
260
+ --bs-progresstrack-bg-clip: border-box;
261
+ }
262
+
263
+ /* Sidebar */
264
+
265
+ [data-bs-core=modern] .sidebar {
266
+ --bs-sidebar-item-padding-x: 1rem;
267
+ --bs-sidebar-item-padding-y: 0.25rem;
268
+ --bs-sidebar-header-font-weight: var(--bs-font-weight-bold);
269
+ --bs-sidebar-divider-bg: var(--bs-sidebar-bg);
270
+ }
271
+
272
+ [data-bs-core=modern] .sidebar-nav .nav-link {
273
+ border-left: var(--bs-border-width) solid var(--bs-border-color-light);
274
+ }
275
+
276
+ [data-bs-core=modern] .sidebar-nav .nav-link.active,
277
+ [data-bs-core=modern] .sidebar-nav .nav-link.show {
278
+ font-weight: var(--bs-font-weight-bold);
279
+ border-color: currentColor;
280
+ -webkit-font-smoothing: antialiased;
281
+ -moz-osx-font-smoothing: grayscale;
282
+ }
laketower/tables.py CHANGED
@@ -1,14 +1,16 @@
1
+ import enum
1
2
  from datetime import datetime, timezone
2
- from typing import Any, Protocol
3
+ from typing import Any, BinaryIO, Protocol, TextIO
3
4
 
4
5
  import deltalake
5
6
  import duckdb
6
- import pandas as pd
7
7
  import pyarrow as pa
8
+ import pyarrow.csv as csv
8
9
  import pyarrow.dataset as padataset
9
10
  import pydantic
10
11
  import sqlglot
11
12
  import sqlglot.dialects.duckdb
13
+ import sqlglot.errors
12
14
  import sqlglot.expressions
13
15
 
14
16
  from laketower.config import ConfigTable, TableFormats
@@ -17,6 +19,15 @@ from laketower.config import ConfigTable, TableFormats
17
19
  DEFAULT_LIMIT = 10
18
20
 
19
21
 
22
+ class ImportModeEnum(str, enum.Enum):
23
+ append = "append"
24
+ overwrite = "overwrite"
25
+
26
+
27
+ class ImportFileFormatEnum(str, enum.Enum):
28
+ csv = "csv"
29
+
30
+
20
31
  class TableMetadata(pydantic.BaseModel):
21
32
  table_format: TableFormats
22
33
  name: str | None = None
@@ -43,17 +54,112 @@ class TableHistory(pydantic.BaseModel):
43
54
 
44
55
 
45
56
  class TableProtocol(Protocol): # pragma: no cover
57
+ @classmethod
58
+ def is_valid(cls, table_config: ConfigTable) -> bool: ...
59
+ def __init__(self, table_config: ConfigTable) -> None: ...
46
60
  def metadata(self) -> TableMetadata: ...
47
61
  def schema(self) -> pa.Schema: ...
48
62
  def history(self) -> TableHistory: ...
49
63
  def dataset(self, version: int | str | None = None) -> padataset.Dataset: ...
64
+ def import_data(
65
+ self, data: pa.Table, mode: ImportModeEnum = ImportModeEnum.append
66
+ ) -> None: ...
50
67
 
51
68
 
52
69
  class DeltaTable:
53
70
  def __init__(self, table_config: ConfigTable):
54
71
  super().__init__()
55
72
  self.table_config = table_config
56
- self._impl = deltalake.DeltaTable(table_config.uri)
73
+ storage_options = self._generate_storage_options(table_config)
74
+ self._impl = deltalake.DeltaTable(
75
+ table_config.uri, storage_options=storage_options
76
+ )
77
+
78
+ @classmethod
79
+ def _generate_storage_options(
80
+ cls, table_config: ConfigTable
81
+ ) -> dict[str, str] | None:
82
+ # documentation from `object-store` Rust crate:
83
+ # - s3: https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html
84
+ # - adls: https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html
85
+ storage_options = None
86
+ conn_s3 = (
87
+ table_config.connection.s3
88
+ if table_config.connection and table_config.connection.s3
89
+ else None
90
+ )
91
+ conn_adls = (
92
+ table_config.connection.adls
93
+ if table_config.connection and table_config.connection.adls
94
+ else None
95
+ )
96
+ if conn_s3:
97
+ storage_options = (
98
+ {
99
+ "aws_access_key_id": conn_s3.s3_access_key_id,
100
+ "aws_secret_access_key": conn_s3.s3_secret_access_key.get_secret_value(),
101
+ "aws_allow_http": str(conn_s3.s3_allow_http).lower(),
102
+ }
103
+ | ({"aws_region": conn_s3.s3_region} if conn_s3.s3_region else {})
104
+ | (
105
+ {"aws_endpoint_url": str(conn_s3.s3_endpoint_url).rstrip("/")}
106
+ if conn_s3.s3_endpoint_url
107
+ else {}
108
+ )
109
+ )
110
+ elif conn_adls:
111
+ storage_options = (
112
+ {
113
+ "azure_storage_account_name": conn_adls.adls_account_name,
114
+ "azure_use_azure_cli": str(conn_adls.use_azure_cli).lower(),
115
+ }
116
+ | (
117
+ {
118
+ "azure_storage_access_key": conn_adls.adls_access_key.get_secret_value()
119
+ }
120
+ if conn_adls.adls_access_key
121
+ else {}
122
+ )
123
+ | (
124
+ {"azure_storage_sas_key": conn_adls.adls_sas_key.get_secret_value()}
125
+ if conn_adls.adls_sas_key
126
+ else {}
127
+ )
128
+ | (
129
+ {"azure_storage_tenant_id": conn_adls.adls_tenant_id}
130
+ if conn_adls.adls_tenant_id
131
+ else {}
132
+ )
133
+ | (
134
+ {"azure_storage_client_id": conn_adls.adls_client_id}
135
+ if conn_adls.adls_client_id
136
+ else {}
137
+ )
138
+ | (
139
+ {
140
+ "azure_storage_client_secret": conn_adls.adls_client_secret.get_secret_value()
141
+ }
142
+ if conn_adls.adls_client_secret
143
+ else {}
144
+ )
145
+ | (
146
+ {
147
+ "azure_msi_endpoint": str(conn_adls.azure_msi_endpoint).rstrip(
148
+ "/"
149
+ )
150
+ }
151
+ if conn_adls.azure_msi_endpoint
152
+ else {}
153
+ )
154
+ )
155
+ return storage_options
156
+
157
+ @classmethod
158
+ def is_valid(cls, table_config: ConfigTable) -> bool:
159
+ storage_options = cls._generate_storage_options(table_config)
160
+ return deltalake.DeltaTable.is_deltatable(
161
+ table_config.uri, storage_options=storage_options
162
+ )
57
163
 
58
164
  def metadata(self) -> TableMetadata:
59
165
  metadata = self._impl.metadata()
@@ -96,10 +202,47 @@ class DeltaTable:
96
202
  self._impl.load_as_version(version)
97
203
  return self._impl.to_pyarrow_dataset()
98
204
 
205
+ def import_data(
206
+ self, data: pa.Table, mode: ImportModeEnum = ImportModeEnum.append
207
+ ) -> None:
208
+ deltalake.write_deltalake(
209
+ self.table_config.uri, data, mode=mode.value, schema_mode="merge"
210
+ )
211
+
99
212
 
100
213
  def load_table(table_config: ConfigTable) -> TableProtocol:
101
- format_handler = {TableFormats.delta: DeltaTable}
102
- return format_handler[table_config.table_format](table_config)
214
+ format_handler: dict[TableFormats, type[TableProtocol]] = {
215
+ TableFormats.delta: DeltaTable
216
+ }
217
+ table_handler = format_handler[table_config.table_format]
218
+ if not table_handler.is_valid(table_config):
219
+ raise ValueError(f"Invalid table: {table_config.uri}")
220
+ return table_handler(table_config)
221
+
222
+
223
+ def load_datasets(table_configs: list[ConfigTable]) -> dict[str, padataset.Dataset]:
224
+ tables_dataset = {}
225
+ for table_config in table_configs:
226
+ try:
227
+ tables_dataset[table_config.name] = load_table(table_config).dataset()
228
+ except ValueError:
229
+ pass
230
+ return tables_dataset
231
+
232
+
233
+ def extract_query_parameter_names(sql: str) -> set[str]:
234
+ try:
235
+ parsed_sql = sqlglot.parse(sql, dialect=sqlglot.dialects.duckdb.DuckDB)
236
+ except sqlglot.errors.SqlglotError as e:
237
+ raise ValueError(f"Error: {e}") from e
238
+
239
+ return {
240
+ str(node.this)
241
+ for statement in parsed_sql
242
+ if statement is not None
243
+ for node in statement.walk()
244
+ if isinstance(node, sqlglot.expressions.Placeholder)
245
+ }
103
246
 
104
247
 
105
248
  def generate_table_query(
@@ -110,32 +253,91 @@ def generate_table_query(
110
253
  sort_desc: str | None = None,
111
254
  ) -> str:
112
255
  query_expr = (
113
- sqlglot.select(*(cols or ["*"])).from_(table_name).limit(limit or DEFAULT_LIMIT)
256
+ sqlglot.select(*([f'"{col}"' for col in cols] if cols else ["*"]))
257
+ .from_(f'"{table_name}"')
258
+ .limit(limit or DEFAULT_LIMIT)
114
259
  )
115
260
  if sort_asc:
116
261
  query_expr = query_expr.order_by(f"{sort_asc} asc")
117
262
  elif sort_desc:
118
263
  query_expr = query_expr.order_by(f"{sort_desc} desc")
119
- return sqlglot.Generator(dialect=sqlglot.dialects.duckdb.DuckDB).generate(
120
- query_expr
121
- )
264
+ return query_expr.sql(dialect=sqlglot.dialects.duckdb.DuckDB, identify="always")
122
265
 
123
266
 
124
267
  def generate_table_statistics_query(table_name: str) -> str:
125
- return (
126
- f"SELECT column_name, count, avg, std, min, max FROM (SUMMARIZE {table_name})" # nosec B608
268
+ summarize_expr = sqlglot.expressions.Summarize(
269
+ this=sqlglot.expressions.Table(this=f'"{table_name}"')
270
+ )
271
+ subquery_expr = sqlglot.expressions.Subquery(this=summarize_expr)
272
+ query_expr = sqlglot.select(
273
+ "column_name", "count", "avg", "std", "min", "max"
274
+ ).from_(subquery_expr)
275
+ return query_expr.sql(dialect=sqlglot.dialects.duckdb.DuckDB, identify="always")
276
+
277
+
278
+ def limit_query(sql_query: str, max_limit: int) -> str:
279
+ try:
280
+ query_ast = sqlglot.parse(sql_query, dialect=sqlglot.dialects.duckdb.DuckDB)
281
+ except sqlglot.errors.SqlglotError as e:
282
+ raise ValueError(f"Error: {e}") from e
283
+
284
+ if query_ast and isinstance(query_ast[-1], sqlglot.expressions.Select):
285
+ limit_wrapper = (
286
+ sqlglot.select("*")
287
+ .from_(sqlglot.expressions.Subquery(this=query_ast[-1]))
288
+ .limit(max_limit)
289
+ )
290
+ query_ast[-1] = limit_wrapper
291
+
292
+ return "; ".join(
293
+ [
294
+ stmt.sql(dialect=sqlglot.dialects.duckdb.DuckDB, identify="always")
295
+ for stmt in query_ast
296
+ if stmt is not None
297
+ ]
127
298
  )
128
299
 
129
300
 
130
301
  def execute_query(
131
- tables_datasets: dict[str, padataset.Dataset], sql_query: str
132
- ) -> pd.DataFrame:
302
+ tables_datasets: dict[str, padataset.Dataset],
303
+ sql_query: str,
304
+ sql_params: dict[str, str] = {},
305
+ ) -> pa.Table:
306
+ if not sql_query:
307
+ raise ValueError("Error: Cannot execute empty SQL query")
308
+
133
309
  try:
134
310
  conn = duckdb.connect()
135
311
  for table_name, table_dataset in tables_datasets.items():
312
+ # ATTACH IF NOT EXISTS ':memory:' AS {catalog.name};
313
+ # CREATE SCHEMA IF NOT EXISTS {catalog.name}.{database.name};
314
+ # USE {catalog.name}.{database.name};
315
+ # CREATE VIEW IF NOT EXISTS {table.name} AS FROM {table.name}_dataset;
316
+
136
317
  view_name = f"{table_name}_view"
137
318
  conn.register(view_name, table_dataset)
138
- conn.execute(f"create table {table_name} as select * from {view_name}") # nosec B608
139
- return conn.execute(sql_query).df()
319
+ conn.execute(f'create view "{table_name}" as select * from "{view_name}"') # nosec B608
320
+ return conn.execute(sql_query, parameters=sql_params).fetch_arrow_table()
140
321
  except duckdb.Error as e:
141
- raise ValueError(str(e)) from e
322
+ raise ValueError(f"Error: {e}") from e
323
+
324
+
325
+ def import_file_to_table(
326
+ table_config: ConfigTable,
327
+ file_path: BinaryIO | TextIO,
328
+ mode: ImportModeEnum = ImportModeEnum.append,
329
+ file_format: ImportFileFormatEnum = ImportFileFormatEnum.csv,
330
+ delimiter: str = ",",
331
+ encoding: str = "utf-8",
332
+ ) -> int:
333
+ file_format_handler = {
334
+ ImportFileFormatEnum.csv: lambda f, d, e: csv.read_csv(
335
+ f,
336
+ read_options=csv.ReadOptions(encoding=e),
337
+ parse_options=csv.ParseOptions(delimiter=d),
338
+ )
339
+ }
340
+ table = load_table(table_config)
341
+ df = file_format_handler[file_format](file_path, delimiter, encoding)
342
+ table.import_data(df, mode=mode)
343
+ return len(df)