PyPI - laketower - Versions diffs - 0.5.1__py3-none-any.whl → 0.6.5__py3-none-any.whl - Mend

laketower 0.5.1py3-none-any.whl → 0.6.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of laketower might be problematic. Click here for more details.

Files changed (33) hide show

laketower/__about__.py +1 -1
laketower/cli.py +269 -101
laketower/config.py +96 -14
laketower/static/datatables.bundle.js +27931 -0
laketower/static/datatables.js +55 -0
laketower/static/editor.bundle.js +27433 -0
laketower/static/editor.js +74 -0
laketower/static/vendor/bootstrap/bootstrap.bundle.min.js +7 -0
laketower/static/vendor/bootstrap-icons/bootstrap-icons.min.css +5 -0
laketower/static/vendor/bootstrap-icons/fonts/bootstrap-icons.woff +0 -0
laketower/static/vendor/bootstrap-icons/fonts/bootstrap-icons.woff2 +0 -0
laketower/static/vendor/datatables.net-bs5/dataTables.bootstrap5.css +610 -0
laketower/static/vendor/datatables.net-columncontrol-bs5/columnControl.bootstrap5.min.css +1 -0
laketower/static/vendor/halfmoon/halfmoon.min.css +22 -0
laketower/static/vendor/halfmoon/halfmoon.modern.css +282 -0
laketower/tables.py +218 -16
laketower/templates/_base.html +99 -20
laketower/templates/queries/view.html +50 -8
laketower/templates/tables/_macros.html +3 -0
laketower/templates/tables/history.html +6 -0
laketower/templates/tables/import.html +71 -0
laketower/templates/tables/index.html +6 -0
laketower/templates/tables/query.html +53 -7
laketower/templates/tables/statistics.html +10 -4
laketower/templates/tables/view.html +48 -42
laketower/web.py +253 -30
{laketower-0.5.1.dist-info → laketower-0.6.5.dist-info}/METADATA +189 -5
laketower-0.6.5.dist-info/RECORD +35 -0
laketower-0.6.5.dist-info/entry_points.txt +2 -0
laketower-0.5.1.dist-info/RECORD +0 -22
laketower-0.5.1.dist-info/entry_points.txt +0 -2
{laketower-0.5.1.dist-info → laketower-0.6.5.dist-info}/WHEEL +0 -0
{laketower-0.5.1.dist-info → laketower-0.6.5.dist-info}/licenses/LICENSE +0 -0

laketower/__about__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.5.1"
1	+ __version__ = "0.6.5"

laketower/cli.py CHANGED Viewed

@@ -1,20 +1,29 @@
 import argparse
 import os
+import time
 from pathlib import Path
 import rich.jupyter
 import rich.panel
+import rich.style
 import rich.table
 import rich.text
 import rich.tree
 import uvicorn
+import pyarrow.csv as pacsv
 from laketower.__about__ import __version__
 from laketower.config import load_yaml_config
 from laketower.tables import (
+    ImportFileFormatEnum,
+    ImportModeEnum,
     execute_query,
+    extract_query_parameter_names,
     generate_table_query,
     generate_table_statistics_query,
+    import_file_to_table,
+    limit_query,
+    load_datasets,
     load_table,
 )
@@ -47,77 +56,95 @@ def list_tables(config_path: Path) -> None:
 def table_metadata(config_path: Path, table_name: str) -> None:
-    config = load_yaml_config(config_path)
-    table_config = next(filter(lambda x: x.name == table_name, config.tables))
-    table = load_table(table_config)
-    metadata = table.metadata()
-    tree = rich.tree.Tree(table_name)
-    tree.add(f"name: {metadata.name}")
-    tree.add(f"description: {metadata.description}")
-    tree.add(f"format: {metadata.table_format.value}")
-    tree.add(f"uri: {metadata.uri}")
-    tree.add(f"id: {metadata.id}")
-    tree.add(f"version: {metadata.version}")
-    tree.add(f"created at: {metadata.created_at}")
-    tree.add(f"partitions: {', '.join(metadata.partitions)}")
-    tree.add(f"configuration: {metadata.configuration}")
+    out: rich.jupyter.JupyterMixin
+    try:
+        config = load_yaml_config(config_path)
+        table_config = next(filter(lambda x: x.name == table_name, config.tables))
+        table = load_table(table_config)
+        metadata = table.metadata()
+        out = rich.tree.Tree(table_name)
+        out.add(f"name: {metadata.name}")
+        out.add(f"description: {metadata.description}")
+        out.add(f"format: {metadata.table_format.value}")
+        out.add(f"uri: {metadata.uri}")
+        out.add(f"id: {metadata.id}")
+        out.add(f"version: {metadata.version}")
+        out.add(f"created at: {metadata.created_at}")
+        out.add(f"partitions: {', '.join(metadata.partitions)}")
+        out.add(f"configuration: {metadata.configuration}")
+    except Exception as e:
+        out = rich.panel.Panel.fit(f"[red]{e}")
     console = rich.get_console()
-    console.print(tree)
+    console.print(out)
 def table_schema(config_path: Path, table_name: str) -> None:
-    config = load_yaml_config(config_path)
-    table_config = next(filter(lambda x: x.name == table_name, config.tables))
-    table = load_table(table_config)
-    schema = table.schema()
-    tree = rich.tree.Tree(table_name)
-    for field in schema:
-        nullable = "" if field.nullable else " not null"
-        tree.add(f"{field.name}: {field.type}{nullable}")
+    out: rich.jupyter.JupyterMixin
+    try:
+        config = load_yaml_config(config_path)
+        table_config = next(filter(lambda x: x.name == table_name, config.tables))
+        table = load_table(table_config)
+        schema = table.schema()
+        out = rich.tree.Tree(table_name)
+        for field in schema:
+            nullable = "" if field.nullable else " not null"
+            out.add(f"{field.name}: {field.type}{nullable}")
+    except Exception as e:
+        out = rich.panel.Panel.fit(f"[red]{e}")
     console = rich.get_console()
-    console.print(tree, markup=False)  # disable markup to allow bracket characters
+    console.print(out, markup=False)  # disable markup to allow bracket characters
 def table_history(config_path: Path, table_name: str) -> None:
-    config = load_yaml_config(config_path)
-    table_config = next(filter(lambda x: x.name == table_name, config.tables))
-    table = load_table(table_config)
-    history = table.history()
-    tree = rich.tree.Tree(table_name)
-    for rev in history.revisions:
-        tree_version = tree.add(f"version: {rev.version}")
-        tree_version.add(f"timestamp: {rev.timestamp}")
-        tree_version.add(f"client version: {rev.client_version}")
-        tree_version.add(f"operation: {rev.operation}")
-        tree_op_params = tree_version.add("operation parameters")
-        for param_key, param_val in rev.operation_parameters.items():
-            tree_op_params.add(f"{param_key}: {param_val}")
-        tree_op_metrics = tree_version.add("operation metrics")
-        for metric_key, metric_val in rev.operation_metrics.items():
-            tree_op_metrics.add(f"{metric_key}: {metric_val}")
+    out: rich.jupyter.JupyterMixin
+    try:
+        config = load_yaml_config(config_path)
+        table_config = next(filter(lambda x: x.name == table_name, config.tables))
+        table = load_table(table_config)
+        history = table.history()
+        out = rich.tree.Tree(table_name)
+        for rev in history.revisions:
+            tree_version = out.add(f"version: {rev.version}")
+            tree_version.add(f"timestamp: {rev.timestamp}")
+            tree_version.add(f"client version: {rev.client_version}")
+            tree_version.add(f"operation: {rev.operation}")
+            tree_op_params = tree_version.add("operation parameters")
+            for param_key, param_val in rev.operation_parameters.items():
+                tree_op_params.add(f"{param_key}: {param_val}")
+            tree_op_metrics = tree_version.add("operation metrics")
+            for metric_key, metric_val in rev.operation_metrics.items():
+                tree_op_metrics.add(f"{metric_key}: {metric_val}")
+    except Exception as e:
+        out = rich.panel.Panel.fit(f"[red]{e}")
     console = rich.get_console()
-    console.print(tree, markup=False)
+    console.print(out, markup=False)
 def table_statistics(
     config_path: Path, table_name: str, version: int | None = None
 ) -> None:
-    config = load_yaml_config(config_path)
-    table_config = next(filter(lambda x: x.name == table_name, config.tables))
-    table = load_table(table_config)
-    table_dataset = table.dataset(version=version)
-    sql_query = generate_table_statistics_query(table_name)
-    results = execute_query({table_name: table_dataset}, sql_query)
-    out = rich.table.Table()
-    for column in results.columns:
-        out.add_column(column)
-    for value_list in results.to_numpy().tolist():
-        row = [str(x) for x in value_list]
-        out.add_row(*row)
+    out: rich.jupyter.JupyterMixin
+    try:
+        config = load_yaml_config(config_path)
+        table_config = next(filter(lambda x: x.name == table_name, config.tables))
+        table = load_table(table_config)
+        table_dataset = table.dataset(version=version)
+        sql_query = generate_table_statistics_query(table_name)
+        results = execute_query({table_name: table_dataset}, sql_query)
+        out = rich.table.Table()
+        for column in results.column_names:
+            out.add_column(column)
+        for row_dict in results.to_pylist():
+            out.add_row(*[str(row_dict[col]) for col in results.column_names])
+    except Exception as e:
+        out = rich.panel.Panel.fit(f"[red]{e}")
     console = rich.get_console()
     console.print(out, markup=False)  # disable markup to allow bracket characters
@@ -132,42 +159,77 @@ def view_table(
     sort_desc: str | None = None,
     version: int | None = None,
 ) -> None:
-    config = load_yaml_config(config_path)
-    table_config = next(filter(lambda x: x.name == table_name, config.tables))
-    table = load_table(table_config)
-    table_dataset = table.dataset(version=version)
-    sql_query = generate_table_query(
-        table_name, limit=limit, cols=cols, sort_asc=sort_asc, sort_desc=sort_desc
-    )
-    results = execute_query({table_name: table_dataset}, sql_query)
+    out: rich.jupyter.JupyterMixin
+    try:
+        config = load_yaml_config(config_path)
+        table_config = next(filter(lambda x: x.name == table_name, config.tables))
+        table = load_table(table_config)
+        table_dataset = table.dataset(version=version)
+        sql_query = generate_table_query(
+            table_name, limit=limit, cols=cols, sort_asc=sort_asc, sort_desc=sort_desc
+        )
+        results = execute_query({table_name: table_dataset}, sql_query)
-    out = rich.table.Table()
-    for column in results.columns:
-        out.add_column(column)
-    for value_list in results.to_numpy().tolist():
-        row = [str(x) for x in value_list]
-        out.add_row(*row)
+        out = rich.table.Table()
+        for column in results.column_names:
+            out.add_column(column)
+        for row_dict in results.to_pylist():
+            out.add_row(*[str(row_dict[col]) for col in results.column_names])
+    except Exception as e:
+        out = rich.panel.Panel.fit(f"[red]{e}")
     console = rich.get_console()
     console.print(out)
-def query_table(config_path: Path, sql_query: str) -> None:
-    config = load_yaml_config(config_path)
-    tables_dataset = {
-        table_config.name: load_table(table_config).dataset()
-        for table_config in config.tables
-    }
+def query_table(
+    config_path: Path,
+    sql_query: str,
+    sql_params: list[list[str]] = [],
+    output_path: Path | None = None,
+) -> None:
     out: rich.jupyter.JupyterMixin
     try:
-        results = execute_query(tables_dataset, sql_query)
-        out = rich.table.Table()
-        for column in results.columns:
+        config = load_yaml_config(config_path)
+        tables_dataset = load_datasets(config.tables)
+        sql_params_dict = {param[0]: param[1] for param in sql_params}
+        query_param_names = extract_query_parameter_names(sql_query)
+        query_params = {
+            name: sql_params_dict.get(name) or "" for name in query_param_names
+        }
+        limited_sql_query = limit_query(sql_query, config.settings.max_query_rows + 1)
+        start_time = time.perf_counter()
+        results = execute_query(
+            tables_dataset, limited_sql_query, sql_params=query_params
+        )
+        execution_time_ms = (time.perf_counter() - start_time) * 1000
+        truncated = results.num_rows > config.settings.max_query_rows
+        results = results.slice(
+            0, min(results.num_rows, config.settings.max_query_rows)
+        )
+        out = rich.table.Table(
+            caption=(
+                f"{results.num_rows} rows returned{' (truncated)' if truncated else ''}"
+                f"\nExecution time: {execution_time_ms:.2f}ms"
+            ),
+            caption_justify="left",
+            caption_style=rich.style.Style(dim=True),
+        )
+        for column in results.column_names:
             out.add_column(column)
-        for value_list in results.values.tolist():
-            row = [str(x) for x in value_list]
-            out.add_row(*row)
+        for row_dict in results.to_pylist():
+            out.add_row(*[str(row_dict[col]) for col in results.column_names])
+        if output_path is not None:
+            pacsv.write_csv(
+                results,
+                output_path,
+                pacsv.WriteOptions(include_header=True, delimiter=","),
+            )
+            out = rich.text.Text(f"Query results written to: {output_path}")
     except ValueError as e:
         out = rich.panel.Panel.fit(f"[red]{e}")
@@ -175,6 +237,33 @@ def query_table(config_path: Path, sql_query: str) -> None:
     console.print(out)
+def import_table(
+    config_path: Path,
+    table_name: str,
+    file_path: Path,
+    mode: ImportModeEnum,
+    file_format: ImportFileFormatEnum,
+    delimiter: str,
+    encoding: str,
+) -> None:
+    out: rich.jupyter.JupyterMixin
+    try:
+        config = load_yaml_config(config_path)
+        table_config = next(filter(lambda x: x.name == table_name, config.tables))
+        with open(file_path, "rb") as file_content:
+            rows_imported = import_file_to_table(
+                table_config, file_content, mode, file_format, delimiter, encoding
+            )
+        out = rich.text.Text(
+            f"Successfully imported {rows_imported} rows into table '{table_name}' in '{mode.value}' mode"
+        )
+    except Exception as e:
+        out = rich.panel.Panel.fit(f"[red]{e}")
+    console = rich.get_console()
+    console.print(out)
 def list_queries(config_path: Path) -> None:
     config = load_yaml_config(config_path)
     tree = rich.tree.Tree("queries")
@@ -184,24 +273,47 @@ def list_queries(config_path: Path) -> None:
     console.print(tree)
-def view_query(config_path: Path, query_name: str) -> None:
-    config = load_yaml_config(config_path)
-    query_config = next(filter(lambda x: x.name == query_name, config.queries))
-    sql_query = query_config.sql
-    tables_dataset = {
-        table_config.name: load_table(table_config).dataset()
-        for table_config in config.tables
-    }
+def view_query(
+    config_path: Path, query_name: str, query_params: list[list[str]] = []
+) -> None:
     out: rich.jupyter.JupyterMixin
     try:
-        results = execute_query(tables_dataset, sql_query)
-        out = rich.table.Table()
-        for column in results.columns:
+        config = load_yaml_config(config_path)
+        tables_dataset = load_datasets(config.tables)
+        query_config = next(filter(lambda x: x.name == query_name, config.queries))
+        default_parameters = {k: v.default for k, v in query_config.parameters.items()}
+        sql_query = query_config.sql
+        query_params_dict = {param[0]: param[1] for param in query_params}
+        sql_param_names = extract_query_parameter_names(sql_query)
+        sql_params = {
+            name: query_params_dict.get(name) or default_parameters.get(name) or ""
+            for name in sql_param_names
+        }
+        limited_sql_query = limit_query(sql_query, config.settings.max_query_rows + 1)
+        start_time = time.perf_counter()
+        results = execute_query(
+            tables_dataset, limited_sql_query, sql_params=sql_params
+        )
+        execution_time_ms = (time.perf_counter() - start_time) * 1000
+        truncated = results.num_rows > config.settings.max_query_rows
+        results = results.slice(
+            0, min(results.num_rows, config.settings.max_query_rows)
+        )
+        out = rich.table.Table(
+            caption=(
+                f"{results.num_rows} rows returned{' (truncated)' if truncated else ''}"
+                f"\nExecution time: {execution_time_ms:.2f}ms"
+            ),
+            caption_justify="left",
+            caption_style=rich.style.Style(dim=True),
+        )
+        for column in results.column_names:
             out.add_column(column)
-        for value_list in results.values.tolist():
-            row = [str(x) for x in value_list]
-            out.add_row(*row)
+        for row_dict in results.to_pylist():
+            out.add_row(*[str(row_dict[col]) for col in results.column_names])
     except ValueError as e:
         out = rich.panel.Panel.fit(f"[red]{e}")
@@ -310,8 +422,54 @@ def cli() -> None:
     parser_tables_query = subsparsers_tables.add_parser(
         "query", help="Query registered tables"
     )
+    parser_tables_query.add_argument(
+        "--output", help="Output query results to a file (default format: CSV)"
+    )
+    parser_tables_query.add_argument(
+        "--param",
+        "-p",
+        nargs=2,
+        action="append",
+        default=[],
+        help="Inject query named parameters values",
+    )
     parser_tables_query.add_argument("sql", help="SQL query to execute")
-    parser_tables_query.set_defaults(func=lambda x: query_table(x.config, x.sql))
+    parser_tables_query.set_defaults(
+        func=lambda x: query_table(x.config, x.sql, x.param, x.output)
+    )
+    parser_tables_import = subsparsers_tables.add_parser(
+        "import", help="Import data into a table"
+    )
+    parser_tables_import.add_argument("table", help="Name of the table")
+    parser_tables_import.add_argument(
+        "--file", type=Path, required=True, help="Path to file to import"
+    )
+    parser_tables_import.add_argument(
+        "--mode",
+        choices=[mode.value for mode in ImportModeEnum],
+        default=ImportModeEnum.append.value,
+        type=ImportModeEnum,
+        help=f"Import mode (default: {ImportModeEnum.append.value})",
+    )
+    parser_tables_import.add_argument(
+        "--format",
+        choices=[file_format.value for file_format in ImportFileFormatEnum],
+        default=ImportFileFormatEnum.csv.value,
+        type=ImportFileFormatEnum,
+        help=f"File format (default: {ImportFileFormatEnum.csv.value})",
+    )
+    parser_tables_import.add_argument(
+        "--delimiter", default=",", help="Column delimiter to use (default: ',')"
+    )
+    parser_tables_import.add_argument(
+        "--encoding", default="utf-8", help="File encoding to use (default: 'utf-8')"
+    )
+    parser_tables_import.set_defaults(
+        func=lambda x: import_table(
+            x.config, x.table, x.file, x.mode, x.format, x.delimiter, x.encoding
+        )
+    )
     parser_queries = subparsers.add_parser("queries", help="Work with queries")
     subsparsers_queries = parser_queries.add_subparsers(required=True)
@@ -325,7 +483,17 @@ def cli() -> None:
         "view", help="View a given query"
     )
     parser_queries_view.add_argument("query", help="Name of the query")
-    parser_queries_view.set_defaults(func=lambda x: view_query(x.config, x.query))
+    parser_queries_view.add_argument(
+        "--param",
+        "-p",
+        nargs=2,
+        action="append",
+        default=[],
+        help="Inject query named parameters values",
+    )
+    parser_queries_view.set_defaults(
+        func=lambda x: view_query(x.config, x.query, x.param)
+    )
     args = parser.parse_args()
     args.func(args)

laketower/config.py CHANGED Viewed

@@ -1,47 +1,129 @@
 import enum
+import json
+import os
 from pathlib import Path
+from typing import Any
-import deltalake
 import pydantic
 import yaml
+def substitute_env_vars(config_data: Any) -> Any:
+    """
+    Substitute environment variables within the input payload.
+    Only allowed format:
+    ```python
+    {
+        "some_key": {"env": "VAR_NAME"}
+    }
+    If the "env" key MUST BE the only key in the dict to be processed.
+    The content of the environment variable will be loaded with a JSON parser,
+    so it can contain complex and nested structures (default is a string).
+    ```
+    """
+    match config_data:
+        case {"env": str(var_name)} if len(config_data) == 1:
+            # Handle environment variable substitution
+            env_value = os.getenv(var_name)
+            if env_value is None:
+                raise ValueError(f"environment variable '{var_name}' is not set")
+            try:
+                return json.loads(env_value)
+            except json.JSONDecodeError:
+                return env_value
+        case dict() as config_dict:
+            # Process dictionary recursively
+            return {
+                key: substitute_env_vars(value) for key, value in config_dict.items()
+            }
+        case list() as config_list:
+            # Process list recursively
+            return [substitute_env_vars(item) for item in config_list]
+        case _:
+            # Return primitive values unchanged
+            return config_data
 class TableFormats(str, enum.Enum):
     delta = "delta"
+class ConfigTableConnectionS3(pydantic.BaseModel):
+    s3_access_key_id: str
+    s3_secret_access_key: pydantic.SecretStr
+    s3_region: str | None = None
+    s3_endpoint_url: pydantic.AnyHttpUrl | None = None
+    s3_allow_http: bool = False
+class ConfigTableConnectionADLS(pydantic.BaseModel):
+    adls_account_name: str
+    adls_access_key: pydantic.SecretStr | None = None
+    adls_sas_key: pydantic.SecretStr | None = None
+    adls_tenant_id: str | None = None
+    adls_client_id: str | None = None
+    adls_client_secret: pydantic.SecretStr | None = None
+    azure_msi_endpoint: pydantic.AnyHttpUrl | None = None
+    use_azure_cli: bool = False
+class ConfigTableConnection(pydantic.BaseModel):
+    s3: ConfigTableConnectionS3 | None = None
+    adls: ConfigTableConnectionADLS | None = None
+    @pydantic.model_validator(mode="after")
+    def mutually_exclusive_connectors(self) -> "ConfigTableConnection":
+        connectors = [self.s3, self.adls]
+        non_null_connectors = list(filter(None, connectors))
+        if len(non_null_connectors) > 1:
+            raise ValueError(
+                "only one connection type can be specified among: 's3', 'adls'"
+            )
+        return self
+class ConfigSettingsWeb(pydantic.BaseModel):
+    hide_tables: bool = False
+class ConfigSettings(pydantic.BaseModel):
+    max_query_rows: int = 1_000
+    web: ConfigSettingsWeb = ConfigSettingsWeb()
 class ConfigTable(pydantic.BaseModel):
     name: str
     uri: str
     table_format: TableFormats = pydantic.Field(alias="format")
+    connection: ConfigTableConnection | None = None
-    @pydantic.model_validator(mode="after")
-    def check_table(self) -> "ConfigTable":
-        def check_delta_table(table_uri: str) -> None:
-            if not deltalake.DeltaTable.is_deltatable(table_uri):
-                raise ValueError(f"{table_uri} is not a valid Delta table")
-        format_check = {TableFormats.delta: check_delta_table}
-        format_check[self.table_format](self.uri)
-        return self
+class ConfigQueryParameter(pydantic.BaseModel):
+    default: str
 class ConfigQuery(pydantic.BaseModel):
     name: str
     title: str
+    description: str | None = None
+    parameters: dict[str, ConfigQueryParameter] = {}
     sql: str
-class ConfigDashboard(pydantic.BaseModel):
-    name: str
 class Config(pydantic.BaseModel):
+    settings: ConfigSettings = ConfigSettings()
     tables: list[ConfigTable] = []
     queries: list[ConfigQuery] = []
 def load_yaml_config(config_path: Path) -> Config:
     config_dict = yaml.safe_load(config_path.read_text())
+    config_dict = substitute_env_vars(config_dict)
     return Config.model_validate(config_dict)

laketower 0.5.1__py3-none-any.whl → 0.6.5__py3-none-any.whl

Potentially problematic release.

laketower 0.5.1py3-none-any.whl → 0.6.5py3-none-any.whl