laketower 0.5.1__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of laketower might be problematic. Click here for more details.

laketower/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.5.1"
1
+ __version__ = "0.6.0"
laketower/cli.py CHANGED
@@ -12,9 +12,13 @@ import uvicorn
12
12
  from laketower.__about__ import __version__
13
13
  from laketower.config import load_yaml_config
14
14
  from laketower.tables import (
15
+ ImportFileFormatEnum,
16
+ ImportModeEnum,
15
17
  execute_query,
16
18
  generate_table_query,
17
19
  generate_table_statistics_query,
20
+ import_file_to_table,
21
+ load_datasets,
18
22
  load_table,
19
23
  )
20
24
 
@@ -47,77 +51,96 @@ def list_tables(config_path: Path) -> None:
47
51
 
48
52
 
49
53
  def table_metadata(config_path: Path, table_name: str) -> None:
50
- config = load_yaml_config(config_path)
51
- table_config = next(filter(lambda x: x.name == table_name, config.tables))
52
- table = load_table(table_config)
53
- metadata = table.metadata()
54
-
55
- tree = rich.tree.Tree(table_name)
56
- tree.add(f"name: {metadata.name}")
57
- tree.add(f"description: {metadata.description}")
58
- tree.add(f"format: {metadata.table_format.value}")
59
- tree.add(f"uri: {metadata.uri}")
60
- tree.add(f"id: {metadata.id}")
61
- tree.add(f"version: {metadata.version}")
62
- tree.add(f"created at: {metadata.created_at}")
63
- tree.add(f"partitions: {', '.join(metadata.partitions)}")
64
- tree.add(f"configuration: {metadata.configuration}")
54
+ out: rich.jupyter.JupyterMixin
55
+ try:
56
+ config = load_yaml_config(config_path)
57
+ table_config = next(filter(lambda x: x.name == table_name, config.tables))
58
+ table = load_table(table_config)
59
+ metadata = table.metadata()
60
+
61
+ out = rich.tree.Tree(table_name)
62
+ out.add(f"name: {metadata.name}")
63
+ out.add(f"description: {metadata.description}")
64
+ out.add(f"format: {metadata.table_format.value}")
65
+ out.add(f"uri: {metadata.uri}")
66
+ out.add(f"id: {metadata.id}")
67
+ out.add(f"version: {metadata.version}")
68
+ out.add(f"created at: {metadata.created_at}")
69
+ out.add(f"partitions: {', '.join(metadata.partitions)}")
70
+ out.add(f"configuration: {metadata.configuration}")
71
+ except Exception as e:
72
+ out = rich.panel.Panel.fit(f"[red]{e}")
73
+
65
74
  console = rich.get_console()
66
- console.print(tree)
75
+ console.print(out)
67
76
 
68
77
 
69
78
  def table_schema(config_path: Path, table_name: str) -> None:
70
- config = load_yaml_config(config_path)
71
- table_config = next(filter(lambda x: x.name == table_name, config.tables))
72
- table = load_table(table_config)
73
- schema = table.schema()
74
-
75
- tree = rich.tree.Tree(table_name)
76
- for field in schema:
77
- nullable = "" if field.nullable else " not null"
78
- tree.add(f"{field.name}: {field.type}{nullable}")
79
+ out: rich.jupyter.JupyterMixin
80
+ try:
81
+ config = load_yaml_config(config_path)
82
+ table_config = next(filter(lambda x: x.name == table_name, config.tables))
83
+ table = load_table(table_config)
84
+ schema = table.schema()
85
+
86
+ out = rich.tree.Tree(table_name)
87
+ for field in schema:
88
+ nullable = "" if field.nullable else " not null"
89
+ out.add(f"{field.name}: {field.type}{nullable}")
90
+ except Exception as e:
91
+ out = rich.panel.Panel.fit(f"[red]{e}")
92
+
79
93
  console = rich.get_console()
80
- console.print(tree, markup=False) # disable markup to allow bracket characters
94
+ console.print(out, markup=False) # disable markup to allow bracket characters
81
95
 
82
96
 
83
97
  def table_history(config_path: Path, table_name: str) -> None:
84
- config = load_yaml_config(config_path)
85
- table_config = next(filter(lambda x: x.name == table_name, config.tables))
86
- table = load_table(table_config)
87
- history = table.history()
88
-
89
- tree = rich.tree.Tree(table_name)
90
- for rev in history.revisions:
91
- tree_version = tree.add(f"version: {rev.version}")
92
- tree_version.add(f"timestamp: {rev.timestamp}")
93
- tree_version.add(f"client version: {rev.client_version}")
94
- tree_version.add(f"operation: {rev.operation}")
95
- tree_op_params = tree_version.add("operation parameters")
96
- for param_key, param_val in rev.operation_parameters.items():
97
- tree_op_params.add(f"{param_key}: {param_val}")
98
- tree_op_metrics = tree_version.add("operation metrics")
99
- for metric_key, metric_val in rev.operation_metrics.items():
100
- tree_op_metrics.add(f"{metric_key}: {metric_val}")
98
+ out: rich.jupyter.JupyterMixin
99
+ try:
100
+ config = load_yaml_config(config_path)
101
+ table_config = next(filter(lambda x: x.name == table_name, config.tables))
102
+ table = load_table(table_config)
103
+ history = table.history()
104
+
105
+ out = rich.tree.Tree(table_name)
106
+ for rev in history.revisions:
107
+ tree_version = out.add(f"version: {rev.version}")
108
+ tree_version.add(f"timestamp: {rev.timestamp}")
109
+ tree_version.add(f"client version: {rev.client_version}")
110
+ tree_version.add(f"operation: {rev.operation}")
111
+ tree_op_params = tree_version.add("operation parameters")
112
+ for param_key, param_val in rev.operation_parameters.items():
113
+ tree_op_params.add(f"{param_key}: {param_val}")
114
+ tree_op_metrics = tree_version.add("operation metrics")
115
+ for metric_key, metric_val in rev.operation_metrics.items():
116
+ tree_op_metrics.add(f"{metric_key}: {metric_val}")
117
+ except Exception as e:
118
+ out = rich.panel.Panel.fit(f"[red]{e}")
119
+
101
120
  console = rich.get_console()
102
- console.print(tree, markup=False)
121
+ console.print(out, markup=False)
103
122
 
104
123
 
105
124
  def table_statistics(
106
125
  config_path: Path, table_name: str, version: int | None = None
107
126
  ) -> None:
108
- config = load_yaml_config(config_path)
109
- table_config = next(filter(lambda x: x.name == table_name, config.tables))
110
- table = load_table(table_config)
111
- table_dataset = table.dataset(version=version)
112
- sql_query = generate_table_statistics_query(table_name)
113
- results = execute_query({table_name: table_dataset}, sql_query)
114
-
115
- out = rich.table.Table()
116
- for column in results.columns:
117
- out.add_column(column)
118
- for value_list in results.to_numpy().tolist():
119
- row = [str(x) for x in value_list]
120
- out.add_row(*row)
127
+ out: rich.jupyter.JupyterMixin
128
+ try:
129
+ config = load_yaml_config(config_path)
130
+ table_config = next(filter(lambda x: x.name == table_name, config.tables))
131
+ table = load_table(table_config)
132
+ table_dataset = table.dataset(version=version)
133
+ sql_query = generate_table_statistics_query(table_name)
134
+ results = execute_query({table_name: table_dataset}, sql_query)
135
+
136
+ out = rich.table.Table()
137
+ for column in results.columns:
138
+ out.add_column(column)
139
+ for value_list in results.to_numpy().tolist():
140
+ row = [str(x) for x in value_list]
141
+ out.add_row(*row)
142
+ except Exception as e:
143
+ out = rich.panel.Panel.fit(f"[red]{e}")
121
144
 
122
145
  console = rich.get_console()
123
146
  console.print(out, markup=False) # disable markup to allow bracket characters
@@ -132,42 +155,51 @@ def view_table(
132
155
  sort_desc: str | None = None,
133
156
  version: int | None = None,
134
157
  ) -> None:
135
- config = load_yaml_config(config_path)
136
- table_config = next(filter(lambda x: x.name == table_name, config.tables))
137
- table = load_table(table_config)
138
- table_dataset = table.dataset(version=version)
139
- sql_query = generate_table_query(
140
- table_name, limit=limit, cols=cols, sort_asc=sort_asc, sort_desc=sort_desc
141
- )
142
- results = execute_query({table_name: table_dataset}, sql_query)
158
+ out: rich.jupyter.JupyterMixin
159
+ try:
160
+ config = load_yaml_config(config_path)
161
+ table_config = next(filter(lambda x: x.name == table_name, config.tables))
162
+ table = load_table(table_config)
163
+ table_dataset = table.dataset(version=version)
164
+ sql_query = generate_table_query(
165
+ table_name, limit=limit, cols=cols, sort_asc=sort_asc, sort_desc=sort_desc
166
+ )
167
+ results = execute_query({table_name: table_dataset}, sql_query)
143
168
 
144
- out = rich.table.Table()
145
- for column in results.columns:
146
- out.add_column(column)
147
- for value_list in results.to_numpy().tolist():
148
- row = [str(x) for x in value_list]
149
- out.add_row(*row)
169
+ out = rich.table.Table()
170
+ for column in results.columns:
171
+ out.add_column(column)
172
+ for value_list in results.to_numpy().tolist():
173
+ row = [str(x) for x in value_list]
174
+ out.add_row(*row)
175
+ except Exception as e:
176
+ out = rich.panel.Panel.fit(f"[red]{e}")
150
177
 
151
178
  console = rich.get_console()
152
179
  console.print(out)
153
180
 
154
181
 
155
- def query_table(config_path: Path, sql_query: str) -> None:
156
- config = load_yaml_config(config_path)
157
- tables_dataset = {
158
- table_config.name: load_table(table_config).dataset()
159
- for table_config in config.tables
160
- }
161
-
182
+ def query_table(
183
+ config_path: Path, sql_query: str, output_path: Path | None = None
184
+ ) -> None:
162
185
  out: rich.jupyter.JupyterMixin
163
186
  try:
187
+ config = load_yaml_config(config_path)
188
+ tables_dataset = load_datasets(config.tables)
164
189
  results = execute_query(tables_dataset, sql_query)
190
+
165
191
  out = rich.table.Table()
166
192
  for column in results.columns:
167
193
  out.add_column(column)
168
194
  for value_list in results.values.tolist():
169
195
  row = [str(x) for x in value_list]
170
196
  out.add_row(*row)
197
+
198
+ if output_path is not None:
199
+ results.to_csv(
200
+ output_path, header=True, index=False, sep=",", encoding="utf-8"
201
+ )
202
+ out = rich.text.Text(f"Query results written to: {output_path}")
171
203
  except ValueError as e:
172
204
  out = rich.panel.Panel.fit(f"[red]{e}")
173
205
 
@@ -175,6 +207,33 @@ def query_table(config_path: Path, sql_query: str) -> None:
175
207
  console.print(out)
176
208
 
177
209
 
210
+ def import_table(
211
+ config_path: Path,
212
+ table_name: str,
213
+ file_path: Path,
214
+ mode: ImportModeEnum,
215
+ file_format: ImportFileFormatEnum,
216
+ delimiter: str,
217
+ encoding: str,
218
+ ) -> None:
219
+ out: rich.jupyter.JupyterMixin
220
+ try:
221
+ config = load_yaml_config(config_path)
222
+ table_config = next(filter(lambda x: x.name == table_name, config.tables))
223
+ with open(file_path, "rb") as file_content:
224
+ rows_imported = import_file_to_table(
225
+ table_config, file_content, mode, file_format, delimiter, encoding
226
+ )
227
+ out = rich.text.Text(
228
+ f"Successfully imported {rows_imported} rows into table '{table_name}' in '{mode.value}' mode"
229
+ )
230
+ except Exception as e:
231
+ out = rich.panel.Panel.fit(f"[red]{e}")
232
+
233
+ console = rich.get_console()
234
+ console.print(out)
235
+
236
+
178
237
  def list_queries(config_path: Path) -> None:
179
238
  config = load_yaml_config(config_path)
180
239
  tree = rich.tree.Tree("queries")
@@ -185,17 +244,14 @@ def list_queries(config_path: Path) -> None:
185
244
 
186
245
 
187
246
  def view_query(config_path: Path, query_name: str) -> None:
188
- config = load_yaml_config(config_path)
189
- query_config = next(filter(lambda x: x.name == query_name, config.queries))
190
- sql_query = query_config.sql
191
- tables_dataset = {
192
- table_config.name: load_table(table_config).dataset()
193
- for table_config in config.tables
194
- }
195
-
196
247
  out: rich.jupyter.JupyterMixin
197
248
  try:
249
+ config = load_yaml_config(config_path)
250
+ tables_dataset = load_datasets(config.tables)
251
+ query_config = next(filter(lambda x: x.name == query_name, config.queries))
252
+ sql_query = query_config.sql
198
253
  results = execute_query(tables_dataset, sql_query)
254
+
199
255
  out = rich.table.Table()
200
256
  for column in results.columns:
201
257
  out.add_column(column)
@@ -310,8 +366,46 @@ def cli() -> None:
310
366
  parser_tables_query = subsparsers_tables.add_parser(
311
367
  "query", help="Query registered tables"
312
368
  )
369
+ parser_tables_query.add_argument(
370
+ "--output", help="Output query results to a file (default format: CSV)"
371
+ )
313
372
  parser_tables_query.add_argument("sql", help="SQL query to execute")
314
- parser_tables_query.set_defaults(func=lambda x: query_table(x.config, x.sql))
373
+ parser_tables_query.set_defaults(
374
+ func=lambda x: query_table(x.config, x.sql, x.output)
375
+ )
376
+
377
+ parser_tables_import = subsparsers_tables.add_parser(
378
+ "import", help="Import data into a table"
379
+ )
380
+ parser_tables_import.add_argument("table", help="Name of the table")
381
+ parser_tables_import.add_argument(
382
+ "--file", type=Path, required=True, help="Path to file to import"
383
+ )
384
+ parser_tables_import.add_argument(
385
+ "--mode",
386
+ choices=[mode.value for mode in ImportModeEnum],
387
+ default=ImportModeEnum.append.value,
388
+ type=ImportModeEnum,
389
+ help=f"Import mode (default: {ImportModeEnum.append.value})",
390
+ )
391
+ parser_tables_import.add_argument(
392
+ "--format",
393
+ choices=[file_format.value for file_format in ImportFileFormatEnum],
394
+ default=ImportFileFormatEnum.csv.value,
395
+ type=ImportFileFormatEnum,
396
+ help=f"File format (default: {ImportFileFormatEnum.csv.value})",
397
+ )
398
+ parser_tables_import.add_argument(
399
+ "--delimiter", default=",", help="Column delimiter to use (default: ',')"
400
+ )
401
+ parser_tables_import.add_argument(
402
+ "--encoding", default="utf-8", help="File encoding to use (default: 'utf-8')"
403
+ )
404
+ parser_tables_import.set_defaults(
405
+ func=lambda x: import_table(
406
+ x.config, x.table, x.file, x.mode, x.format, x.delimiter, x.encoding
407
+ )
408
+ )
315
409
 
316
410
  parser_queries = subparsers.add_parser("queries", help="Work with queries")
317
411
  subsparsers_queries = parser_queries.add_subparsers(required=True)
laketower/config.py CHANGED
@@ -1,40 +1,105 @@
1
1
  import enum
2
+ import json
3
+ import os
2
4
  from pathlib import Path
5
+ from typing import Any
3
6
 
4
- import deltalake
5
7
  import pydantic
6
8
  import yaml
7
9
 
8
10
 
11
+ def substitute_env_vars(config_data: Any) -> Any:
12
+ """
13
+ Substitute environment variables within the input payload.
14
+
15
+ Only allowed format:
16
+ ```python
17
+ {
18
+ "some_key": {"env": "VAR_NAME"}
19
+ }
20
+
21
+ If the "env" key MUST BE the only key in the dict to be processed.
22
+
23
+ The content of the environment variable will be loaded with a JSON parser,
24
+ so it can contain complex and nested structures (default is a string).
25
+ ```
26
+ """
27
+ match config_data:
28
+ case {"env": str(var_name)} if len(config_data) == 1:
29
+ # Handle environment variable substitution
30
+ env_value = os.getenv(var_name)
31
+ if env_value is None:
32
+ raise ValueError(f"environment variable '{var_name}' is not set")
33
+
34
+ try:
35
+ return json.loads(env_value)
36
+ except json.JSONDecodeError:
37
+ return env_value
38
+
39
+ case dict() as config_dict:
40
+ # Process dictionary recursively
41
+ return {
42
+ key: substitute_env_vars(value) for key, value in config_dict.items()
43
+ }
44
+
45
+ case list() as config_list:
46
+ # Process list recursively
47
+ return [substitute_env_vars(item) for item in config_list]
48
+
49
+ case _:
50
+ # Return primitive values unchanged
51
+ return config_data
52
+
53
+
9
54
  class TableFormats(str, enum.Enum):
10
55
  delta = "delta"
11
56
 
12
57
 
13
- class ConfigTable(pydantic.BaseModel):
14
- name: str
15
- uri: str
16
- table_format: TableFormats = pydantic.Field(alias="format")
58
+ class ConfigTableConnectionS3(pydantic.BaseModel):
59
+ s3_access_key_id: str
60
+ s3_secret_access_key: pydantic.SecretStr
61
+ s3_region: str | None = None
62
+ s3_endpoint_url: pydantic.AnyHttpUrl | None = None
63
+ s3_allow_http: bool = False
17
64
 
18
- @pydantic.model_validator(mode="after")
19
- def check_table(self) -> "ConfigTable":
20
- def check_delta_table(table_uri: str) -> None:
21
- if not deltalake.DeltaTable.is_deltatable(table_uri):
22
- raise ValueError(f"{table_uri} is not a valid Delta table")
23
65
 
24
- format_check = {TableFormats.delta: check_delta_table}
25
- format_check[self.table_format](self.uri)
66
+ class ConfigTableConnectionADLS(pydantic.BaseModel):
67
+ adls_account_name: str
68
+ adls_access_key: pydantic.SecretStr | None = None
69
+ adls_sas_key: pydantic.SecretStr | None = None
70
+ adls_tenant_id: str | None = None
71
+ adls_client_id: str | None = None
72
+ adls_client_secret: pydantic.SecretStr | None = None
73
+ azure_msi_endpoint: pydantic.AnyHttpUrl | None = None
74
+ use_azure_cli: bool = False
75
+
26
76
 
77
+ class ConfigTableConnection(pydantic.BaseModel):
78
+ s3: ConfigTableConnectionS3 | None = None
79
+ adls: ConfigTableConnectionADLS | None = None
80
+
81
+ @pydantic.model_validator(mode="after")
82
+ def mutually_exclusive_connectors(self) -> "ConfigTableConnection":
83
+ connectors = [self.s3, self.adls]
84
+ non_null_connectors = list(filter(None, connectors))
85
+ if len(non_null_connectors) > 1:
86
+ raise ValueError(
87
+ "only one connection type can be specified among: 's3', 'adls'"
88
+ )
27
89
  return self
28
90
 
29
91
 
30
- class ConfigQuery(pydantic.BaseModel):
92
+ class ConfigTable(pydantic.BaseModel):
31
93
  name: str
32
- title: str
33
- sql: str
94
+ uri: str
95
+ table_format: TableFormats = pydantic.Field(alias="format")
96
+ connection: ConfigTableConnection | None = None
34
97
 
35
98
 
36
- class ConfigDashboard(pydantic.BaseModel):
99
+ class ConfigQuery(pydantic.BaseModel):
37
100
  name: str
101
+ title: str
102
+ sql: str
38
103
 
39
104
 
40
105
  class Config(pydantic.BaseModel):
@@ -44,4 +109,5 @@ class Config(pydantic.BaseModel):
44
109
 
45
110
  def load_yaml_config(config_path: Path) -> Config:
46
111
  config_dict = yaml.safe_load(config_path.read_text())
112
+ config_dict = substitute_env_vars(config_dict)
47
113
  return Config.model_validate(config_dict)