laketower 0.5.1__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of laketower might be problematic. Click here for more details.

laketower/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.5.1"
1
+ __version__ = "0.6.1"
laketower/cli.py CHANGED
@@ -12,9 +12,14 @@ import uvicorn
12
12
  from laketower.__about__ import __version__
13
13
  from laketower.config import load_yaml_config
14
14
  from laketower.tables import (
15
+ ImportFileFormatEnum,
16
+ ImportModeEnum,
15
17
  execute_query,
18
+ extract_query_parameter_names,
16
19
  generate_table_query,
17
20
  generate_table_statistics_query,
21
+ import_file_to_table,
22
+ load_datasets,
18
23
  load_table,
19
24
  )
20
25
 
@@ -47,77 +52,96 @@ def list_tables(config_path: Path) -> None:
47
52
 
48
53
 
49
54
  def table_metadata(config_path: Path, table_name: str) -> None:
50
- config = load_yaml_config(config_path)
51
- table_config = next(filter(lambda x: x.name == table_name, config.tables))
52
- table = load_table(table_config)
53
- metadata = table.metadata()
54
-
55
- tree = rich.tree.Tree(table_name)
56
- tree.add(f"name: {metadata.name}")
57
- tree.add(f"description: {metadata.description}")
58
- tree.add(f"format: {metadata.table_format.value}")
59
- tree.add(f"uri: {metadata.uri}")
60
- tree.add(f"id: {metadata.id}")
61
- tree.add(f"version: {metadata.version}")
62
- tree.add(f"created at: {metadata.created_at}")
63
- tree.add(f"partitions: {', '.join(metadata.partitions)}")
64
- tree.add(f"configuration: {metadata.configuration}")
55
+ out: rich.jupyter.JupyterMixin
56
+ try:
57
+ config = load_yaml_config(config_path)
58
+ table_config = next(filter(lambda x: x.name == table_name, config.tables))
59
+ table = load_table(table_config)
60
+ metadata = table.metadata()
61
+
62
+ out = rich.tree.Tree(table_name)
63
+ out.add(f"name: {metadata.name}")
64
+ out.add(f"description: {metadata.description}")
65
+ out.add(f"format: {metadata.table_format.value}")
66
+ out.add(f"uri: {metadata.uri}")
67
+ out.add(f"id: {metadata.id}")
68
+ out.add(f"version: {metadata.version}")
69
+ out.add(f"created at: {metadata.created_at}")
70
+ out.add(f"partitions: {', '.join(metadata.partitions)}")
71
+ out.add(f"configuration: {metadata.configuration}")
72
+ except Exception as e:
73
+ out = rich.panel.Panel.fit(f"[red]{e}")
74
+
65
75
  console = rich.get_console()
66
- console.print(tree)
76
+ console.print(out)
67
77
 
68
78
 
69
79
  def table_schema(config_path: Path, table_name: str) -> None:
70
- config = load_yaml_config(config_path)
71
- table_config = next(filter(lambda x: x.name == table_name, config.tables))
72
- table = load_table(table_config)
73
- schema = table.schema()
74
-
75
- tree = rich.tree.Tree(table_name)
76
- for field in schema:
77
- nullable = "" if field.nullable else " not null"
78
- tree.add(f"{field.name}: {field.type}{nullable}")
80
+ out: rich.jupyter.JupyterMixin
81
+ try:
82
+ config = load_yaml_config(config_path)
83
+ table_config = next(filter(lambda x: x.name == table_name, config.tables))
84
+ table = load_table(table_config)
85
+ schema = table.schema()
86
+
87
+ out = rich.tree.Tree(table_name)
88
+ for field in schema:
89
+ nullable = "" if field.nullable else " not null"
90
+ out.add(f"{field.name}: {field.type}{nullable}")
91
+ except Exception as e:
92
+ out = rich.panel.Panel.fit(f"[red]{e}")
93
+
79
94
  console = rich.get_console()
80
- console.print(tree, markup=False) # disable markup to allow bracket characters
95
+ console.print(out, markup=False) # disable markup to allow bracket characters
81
96
 
82
97
 
83
98
  def table_history(config_path: Path, table_name: str) -> None:
84
- config = load_yaml_config(config_path)
85
- table_config = next(filter(lambda x: x.name == table_name, config.tables))
86
- table = load_table(table_config)
87
- history = table.history()
88
-
89
- tree = rich.tree.Tree(table_name)
90
- for rev in history.revisions:
91
- tree_version = tree.add(f"version: {rev.version}")
92
- tree_version.add(f"timestamp: {rev.timestamp}")
93
- tree_version.add(f"client version: {rev.client_version}")
94
- tree_version.add(f"operation: {rev.operation}")
95
- tree_op_params = tree_version.add("operation parameters")
96
- for param_key, param_val in rev.operation_parameters.items():
97
- tree_op_params.add(f"{param_key}: {param_val}")
98
- tree_op_metrics = tree_version.add("operation metrics")
99
- for metric_key, metric_val in rev.operation_metrics.items():
100
- tree_op_metrics.add(f"{metric_key}: {metric_val}")
99
+ out: rich.jupyter.JupyterMixin
100
+ try:
101
+ config = load_yaml_config(config_path)
102
+ table_config = next(filter(lambda x: x.name == table_name, config.tables))
103
+ table = load_table(table_config)
104
+ history = table.history()
105
+
106
+ out = rich.tree.Tree(table_name)
107
+ for rev in history.revisions:
108
+ tree_version = out.add(f"version: {rev.version}")
109
+ tree_version.add(f"timestamp: {rev.timestamp}")
110
+ tree_version.add(f"client version: {rev.client_version}")
111
+ tree_version.add(f"operation: {rev.operation}")
112
+ tree_op_params = tree_version.add("operation parameters")
113
+ for param_key, param_val in rev.operation_parameters.items():
114
+ tree_op_params.add(f"{param_key}: {param_val}")
115
+ tree_op_metrics = tree_version.add("operation metrics")
116
+ for metric_key, metric_val in rev.operation_metrics.items():
117
+ tree_op_metrics.add(f"{metric_key}: {metric_val}")
118
+ except Exception as e:
119
+ out = rich.panel.Panel.fit(f"[red]{e}")
120
+
101
121
  console = rich.get_console()
102
- console.print(tree, markup=False)
122
+ console.print(out, markup=False)
103
123
 
104
124
 
105
125
  def table_statistics(
106
126
  config_path: Path, table_name: str, version: int | None = None
107
127
  ) -> None:
108
- config = load_yaml_config(config_path)
109
- table_config = next(filter(lambda x: x.name == table_name, config.tables))
110
- table = load_table(table_config)
111
- table_dataset = table.dataset(version=version)
112
- sql_query = generate_table_statistics_query(table_name)
113
- results = execute_query({table_name: table_dataset}, sql_query)
114
-
115
- out = rich.table.Table()
116
- for column in results.columns:
117
- out.add_column(column)
118
- for value_list in results.to_numpy().tolist():
119
- row = [str(x) for x in value_list]
120
- out.add_row(*row)
128
+ out: rich.jupyter.JupyterMixin
129
+ try:
130
+ config = load_yaml_config(config_path)
131
+ table_config = next(filter(lambda x: x.name == table_name, config.tables))
132
+ table = load_table(table_config)
133
+ table_dataset = table.dataset(version=version)
134
+ sql_query = generate_table_statistics_query(table_name)
135
+ results = execute_query({table_name: table_dataset}, sql_query)
136
+
137
+ out = rich.table.Table()
138
+ for column in results.columns:
139
+ out.add_column(column)
140
+ for value_list in results.to_numpy().tolist():
141
+ row = [str(x) for x in value_list]
142
+ out.add_row(*row)
143
+ except Exception as e:
144
+ out = rich.panel.Panel.fit(f"[red]{e}")
121
145
 
122
146
  console = rich.get_console()
123
147
  console.print(out, markup=False) # disable markup to allow bracket characters
@@ -132,42 +156,59 @@ def view_table(
132
156
  sort_desc: str | None = None,
133
157
  version: int | None = None,
134
158
  ) -> None:
135
- config = load_yaml_config(config_path)
136
- table_config = next(filter(lambda x: x.name == table_name, config.tables))
137
- table = load_table(table_config)
138
- table_dataset = table.dataset(version=version)
139
- sql_query = generate_table_query(
140
- table_name, limit=limit, cols=cols, sort_asc=sort_asc, sort_desc=sort_desc
141
- )
142
- results = execute_query({table_name: table_dataset}, sql_query)
159
+ out: rich.jupyter.JupyterMixin
160
+ try:
161
+ config = load_yaml_config(config_path)
162
+ table_config = next(filter(lambda x: x.name == table_name, config.tables))
163
+ table = load_table(table_config)
164
+ table_dataset = table.dataset(version=version)
165
+ sql_query = generate_table_query(
166
+ table_name, limit=limit, cols=cols, sort_asc=sort_asc, sort_desc=sort_desc
167
+ )
168
+ results = execute_query({table_name: table_dataset}, sql_query)
143
169
 
144
- out = rich.table.Table()
145
- for column in results.columns:
146
- out.add_column(column)
147
- for value_list in results.to_numpy().tolist():
148
- row = [str(x) for x in value_list]
149
- out.add_row(*row)
170
+ out = rich.table.Table()
171
+ for column in results.columns:
172
+ out.add_column(column)
173
+ for value_list in results.to_numpy().tolist():
174
+ row = [str(x) for x in value_list]
175
+ out.add_row(*row)
176
+ except Exception as e:
177
+ out = rich.panel.Panel.fit(f"[red]{e}")
150
178
 
151
179
  console = rich.get_console()
152
180
  console.print(out)
153
181
 
154
182
 
155
- def query_table(config_path: Path, sql_query: str) -> None:
156
- config = load_yaml_config(config_path)
157
- tables_dataset = {
158
- table_config.name: load_table(table_config).dataset()
159
- for table_config in config.tables
160
- }
161
-
183
+ def query_table(
184
+ config_path: Path,
185
+ sql_query: str,
186
+ sql_params: list[list[str]] = [],
187
+ output_path: Path | None = None,
188
+ ) -> None:
162
189
  out: rich.jupyter.JupyterMixin
163
190
  try:
164
- results = execute_query(tables_dataset, sql_query)
191
+ config = load_yaml_config(config_path)
192
+ tables_dataset = load_datasets(config.tables)
193
+ sql_params_dict = {param[0]: param[1] for param in sql_params}
194
+ query_param_names = extract_query_parameter_names(sql_query)
195
+ query_params = {
196
+ name: sql_params_dict.get(name) or "" for name in query_param_names
197
+ }
198
+ results = execute_query(tables_dataset, sql_query, sql_params=query_params)
199
+
165
200
  out = rich.table.Table()
166
201
  for column in results.columns:
167
202
  out.add_column(column)
168
203
  for value_list in results.values.tolist():
169
204
  row = [str(x) for x in value_list]
170
205
  out.add_row(*row)
206
+
207
+ if output_path is not None:
208
+ results.to_csv(
209
+ output_path, header=True, index=False, sep=",", encoding="utf-8"
210
+ )
211
+ out = rich.text.Text(f"Query results written to: {output_path}")
171
212
  except ValueError as e:
172
213
  out = rich.panel.Panel.fit(f"[red]{e}")
173
214
 
@@ -175,6 +216,33 @@ def query_table(config_path: Path, sql_query: str) -> None:
175
216
  console.print(out)
176
217
 
177
218
 
219
+ def import_table(
220
+ config_path: Path,
221
+ table_name: str,
222
+ file_path: Path,
223
+ mode: ImportModeEnum,
224
+ file_format: ImportFileFormatEnum,
225
+ delimiter: str,
226
+ encoding: str,
227
+ ) -> None:
228
+ out: rich.jupyter.JupyterMixin
229
+ try:
230
+ config = load_yaml_config(config_path)
231
+ table_config = next(filter(lambda x: x.name == table_name, config.tables))
232
+ with open(file_path, "rb") as file_content:
233
+ rows_imported = import_file_to_table(
234
+ table_config, file_content, mode, file_format, delimiter, encoding
235
+ )
236
+ out = rich.text.Text(
237
+ f"Successfully imported {rows_imported} rows into table '{table_name}' in '{mode.value}' mode"
238
+ )
239
+ except Exception as e:
240
+ out = rich.panel.Panel.fit(f"[red]{e}")
241
+
242
+ console = rich.get_console()
243
+ console.print(out)
244
+
245
+
178
246
  def list_queries(config_path: Path) -> None:
179
247
  config = load_yaml_config(config_path)
180
248
  tree = rich.tree.Tree("queries")
@@ -184,18 +252,24 @@ def list_queries(config_path: Path) -> None:
184
252
  console.print(tree)
185
253
 
186
254
 
187
- def view_query(config_path: Path, query_name: str) -> None:
188
- config = load_yaml_config(config_path)
189
- query_config = next(filter(lambda x: x.name == query_name, config.queries))
190
- sql_query = query_config.sql
191
- tables_dataset = {
192
- table_config.name: load_table(table_config).dataset()
193
- for table_config in config.tables
194
- }
195
-
255
+ def view_query(
256
+ config_path: Path, query_name: str, query_params: list[list[str]] = []
257
+ ) -> None:
196
258
  out: rich.jupyter.JupyterMixin
197
259
  try:
198
- results = execute_query(tables_dataset, sql_query)
260
+ config = load_yaml_config(config_path)
261
+ tables_dataset = load_datasets(config.tables)
262
+ query_config = next(filter(lambda x: x.name == query_name, config.queries))
263
+ default_parameters = {k: v.default for k, v in query_config.parameters.items()}
264
+ sql_query = query_config.sql
265
+ query_params_dict = {param[0]: param[1] for param in query_params}
266
+ sql_param_names = extract_query_parameter_names(sql_query)
267
+ sql_params = {
268
+ name: query_params_dict.get(name) or default_parameters.get(name) or ""
269
+ for name in sql_param_names
270
+ }
271
+ results = execute_query(tables_dataset, sql_query, sql_params=sql_params)
272
+
199
273
  out = rich.table.Table()
200
274
  for column in results.columns:
201
275
  out.add_column(column)
@@ -310,8 +384,54 @@ def cli() -> None:
310
384
  parser_tables_query = subsparsers_tables.add_parser(
311
385
  "query", help="Query registered tables"
312
386
  )
387
+ parser_tables_query.add_argument(
388
+ "--output", help="Output query results to a file (default format: CSV)"
389
+ )
390
+ parser_tables_query.add_argument(
391
+ "--param",
392
+ "-p",
393
+ nargs=2,
394
+ action="append",
395
+ default=[],
396
+ help="Inject query named parameters values",
397
+ )
313
398
  parser_tables_query.add_argument("sql", help="SQL query to execute")
314
- parser_tables_query.set_defaults(func=lambda x: query_table(x.config, x.sql))
399
+ parser_tables_query.set_defaults(
400
+ func=lambda x: query_table(x.config, x.sql, x.param, x.output)
401
+ )
402
+
403
+ parser_tables_import = subsparsers_tables.add_parser(
404
+ "import", help="Import data into a table"
405
+ )
406
+ parser_tables_import.add_argument("table", help="Name of the table")
407
+ parser_tables_import.add_argument(
408
+ "--file", type=Path, required=True, help="Path to file to import"
409
+ )
410
+ parser_tables_import.add_argument(
411
+ "--mode",
412
+ choices=[mode.value for mode in ImportModeEnum],
413
+ default=ImportModeEnum.append.value,
414
+ type=ImportModeEnum,
415
+ help=f"Import mode (default: {ImportModeEnum.append.value})",
416
+ )
417
+ parser_tables_import.add_argument(
418
+ "--format",
419
+ choices=[file_format.value for file_format in ImportFileFormatEnum],
420
+ default=ImportFileFormatEnum.csv.value,
421
+ type=ImportFileFormatEnum,
422
+ help=f"File format (default: {ImportFileFormatEnum.csv.value})",
423
+ )
424
+ parser_tables_import.add_argument(
425
+ "--delimiter", default=",", help="Column delimiter to use (default: ',')"
426
+ )
427
+ parser_tables_import.add_argument(
428
+ "--encoding", default="utf-8", help="File encoding to use (default: 'utf-8')"
429
+ )
430
+ parser_tables_import.set_defaults(
431
+ func=lambda x: import_table(
432
+ x.config, x.table, x.file, x.mode, x.format, x.delimiter, x.encoding
433
+ )
434
+ )
315
435
 
316
436
  parser_queries = subparsers.add_parser("queries", help="Work with queries")
317
437
  subsparsers_queries = parser_queries.add_subparsers(required=True)
@@ -325,7 +445,17 @@ def cli() -> None:
325
445
  "view", help="View a given query"
326
446
  )
327
447
  parser_queries_view.add_argument("query", help="Name of the query")
328
- parser_queries_view.set_defaults(func=lambda x: view_query(x.config, x.query))
448
+ parser_queries_view.add_argument(
449
+ "--param",
450
+ "-p",
451
+ nargs=2,
452
+ action="append",
453
+ default=[],
454
+ help="Inject query named parameters values",
455
+ )
456
+ parser_queries_view.set_defaults(
457
+ func=lambda x: view_query(x.config, x.query, x.param)
458
+ )
329
459
 
330
460
  args = parser.parse_args()
331
461
  args.func(args)
laketower/config.py CHANGED
@@ -1,42 +1,113 @@
1
1
  import enum
2
+ import json
3
+ import os
2
4
  from pathlib import Path
5
+ from typing import Any
3
6
 
4
- import deltalake
5
7
  import pydantic
6
8
  import yaml
7
9
 
8
10
 
11
+ def substitute_env_vars(config_data: Any) -> Any:
12
+ """
13
+ Substitute environment variables within the input payload.
14
+
15
+ Only allowed format:
16
+ ```python
17
+ {
18
+ "some_key": {"env": "VAR_NAME"}
19
+ }
20
+
21
+ If the "env" key MUST BE the only key in the dict to be processed.
22
+
23
+ The content of the environment variable will be loaded with a JSON parser,
24
+ so it can contain complex and nested structures (default is a string).
25
+ ```
26
+ """
27
+ match config_data:
28
+ case {"env": str(var_name)} if len(config_data) == 1:
29
+ # Handle environment variable substitution
30
+ env_value = os.getenv(var_name)
31
+ if env_value is None:
32
+ raise ValueError(f"environment variable '{var_name}' is not set")
33
+
34
+ try:
35
+ return json.loads(env_value)
36
+ except json.JSONDecodeError:
37
+ return env_value
38
+
39
+ case dict() as config_dict:
40
+ # Process dictionary recursively
41
+ return {
42
+ key: substitute_env_vars(value) for key, value in config_dict.items()
43
+ }
44
+
45
+ case list() as config_list:
46
+ # Process list recursively
47
+ return [substitute_env_vars(item) for item in config_list]
48
+
49
+ case _:
50
+ # Return primitive values unchanged
51
+ return config_data
52
+
53
+
9
54
  class TableFormats(str, enum.Enum):
10
55
  delta = "delta"
11
56
 
12
57
 
58
+ class ConfigTableConnectionS3(pydantic.BaseModel):
59
+ s3_access_key_id: str
60
+ s3_secret_access_key: pydantic.SecretStr
61
+ s3_region: str | None = None
62
+ s3_endpoint_url: pydantic.AnyHttpUrl | None = None
63
+ s3_allow_http: bool = False
64
+
65
+
66
+ class ConfigTableConnectionADLS(pydantic.BaseModel):
67
+ adls_account_name: str
68
+ adls_access_key: pydantic.SecretStr | None = None
69
+ adls_sas_key: pydantic.SecretStr | None = None
70
+ adls_tenant_id: str | None = None
71
+ adls_client_id: str | None = None
72
+ adls_client_secret: pydantic.SecretStr | None = None
73
+ azure_msi_endpoint: pydantic.AnyHttpUrl | None = None
74
+ use_azure_cli: bool = False
75
+
76
+
77
+ class ConfigTableConnection(pydantic.BaseModel):
78
+ s3: ConfigTableConnectionS3 | None = None
79
+ adls: ConfigTableConnectionADLS | None = None
80
+
81
+ @pydantic.model_validator(mode="after")
82
+ def mutually_exclusive_connectors(self) -> "ConfigTableConnection":
83
+ connectors = [self.s3, self.adls]
84
+ non_null_connectors = list(filter(None, connectors))
85
+ if len(non_null_connectors) > 1:
86
+ raise ValueError(
87
+ "only one connection type can be specified among: 's3', 'adls'"
88
+ )
89
+ return self
90
+
91
+
13
92
  class ConfigTable(pydantic.BaseModel):
14
93
  name: str
15
94
  uri: str
16
95
  table_format: TableFormats = pydantic.Field(alias="format")
96
+ connection: ConfigTableConnection | None = None
17
97
 
18
- @pydantic.model_validator(mode="after")
19
- def check_table(self) -> "ConfigTable":
20
- def check_delta_table(table_uri: str) -> None:
21
- if not deltalake.DeltaTable.is_deltatable(table_uri):
22
- raise ValueError(f"{table_uri} is not a valid Delta table")
23
-
24
- format_check = {TableFormats.delta: check_delta_table}
25
- format_check[self.table_format](self.uri)
26
98
 
27
- return self
99
+ class ConfigQueryParameter(pydantic.BaseModel):
100
+ default: str
28
101
 
29
102
 
30
103
  class ConfigQuery(pydantic.BaseModel):
31
104
  name: str
32
105
  title: str
106
+ description: str | None = None
107
+ parameters: dict[str, ConfigQueryParameter] = {}
33
108
  sql: str
34
109
 
35
110
 
36
- class ConfigDashboard(pydantic.BaseModel):
37
- name: str
38
-
39
-
40
111
  class Config(pydantic.BaseModel):
41
112
  tables: list[ConfigTable] = []
42
113
  queries: list[ConfigQuery] = []
@@ -44,4 +115,5 @@ class Config(pydantic.BaseModel):
44
115
 
45
116
  def load_yaml_config(config_path: Path) -> Config:
46
117
  config_dict = yaml.safe_load(config_path.read_text())
118
+ config_dict = substitute_env_vars(config_dict)
47
119
  return Config.model_validate(config_dict)