PyPI - cloe-nessy - Versions diffs - 0.3.3__py3-none-any.whl → 0.3.8__py3-none-any.whl - Mend

cloe-nessy 0.3.3py3-none-any.whl → 0.3.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

cloe_nessy/__init__.py +0 -0
cloe_nessy/clients/__init__.py +0 -0
cloe_nessy/clients/api_client/__init__.py +0 -0
cloe_nessy/clients/api_client/api_client.py +0 -0
cloe_nessy/clients/api_client/api_response.py +0 -0
cloe_nessy/clients/api_client/auth.py +0 -0
cloe_nessy/clients/api_client/exceptions.py +0 -0
cloe_nessy/file_utilities/__init__.py +0 -0
cloe_nessy/file_utilities/exceptions.py +0 -0
cloe_nessy/file_utilities/factory.py +0 -0
cloe_nessy/file_utilities/get_file_paths.py +0 -0
cloe_nessy/file_utilities/location_types.py +0 -0
cloe_nessy/file_utilities/strategies/__init__.py +0 -0
cloe_nessy/file_utilities/strategies/base_strategy.py +0 -0
cloe_nessy/file_utilities/strategies/local_strategy.py +0 -0
cloe_nessy/file_utilities/strategies/onelake_strategy.py +0 -0
cloe_nessy/file_utilities/strategies/utils_strategy.py +0 -0
cloe_nessy/integration/__init__.py +0 -0
cloe_nessy/integration/reader/__init__.py +0 -0
cloe_nessy/integration/reader/api_reader.py +0 -0
cloe_nessy/integration/reader/catalog_reader.py +0 -0
cloe_nessy/integration/reader/excel_reader.py +0 -0
cloe_nessy/integration/reader/exceptions.py +0 -0
cloe_nessy/integration/reader/file_reader.py +7 -1
cloe_nessy/integration/reader/reader.py +0 -0
cloe_nessy/integration/writer/__init__.py +0 -0
cloe_nessy/integration/writer/catalog_writer.py +1 -1
cloe_nessy/logging/__init__.py +0 -0
cloe_nessy/logging/logger_mixin.py +0 -0
cloe_nessy/models/__init__.py +4 -0
cloe_nessy/models/adapter/__init__.py +3 -0
cloe_nessy/models/adapter/unity_catalog_adapter.py +292 -0
cloe_nessy/models/catalog.py +10 -0
cloe_nessy/models/column.py +0 -0
cloe_nessy/models/constraint.py +0 -0
cloe_nessy/models/foreign_key.py +0 -0
cloe_nessy/models/mixins/__init__.py +0 -0
cloe_nessy/models/mixins/read_instance_mixin.py +0 -0
cloe_nessy/models/mixins/template_loader_mixin.py +0 -0
cloe_nessy/models/schema.py +19 -0
cloe_nessy/models/table.py +50 -5
cloe_nessy/models/types.py +0 -0
cloe_nessy/models/volume.py +67 -0
cloe_nessy/object_manager/__init__.py +7 -2
cloe_nessy/object_manager/table_manager.py +183 -7
cloe_nessy/object_manager/volume_manager.py +70 -0
cloe_nessy/pipeline/__init__.py +0 -0
cloe_nessy/pipeline/actions/__init__.py +2 -0
cloe_nessy/pipeline/actions/read_api.py +69 -45
cloe_nessy/pipeline/actions/read_catalog_table.py +9 -9
cloe_nessy/pipeline/actions/read_excel.py +14 -10
cloe_nessy/pipeline/actions/read_files.py +54 -28
cloe_nessy/pipeline/actions/read_metadata_yaml.py +9 -9
cloe_nessy/pipeline/actions/transform_change_datatype.py +13 -8
cloe_nessy/pipeline/actions/transform_clean_column_names.py +4 -0
cloe_nessy/pipeline/actions/transform_concat_columns.py +25 -11
cloe_nessy/pipeline/actions/transform_decode.py +18 -7
cloe_nessy/pipeline/actions/transform_deduplication.py +9 -9
cloe_nessy/pipeline/actions/transform_distinct.py +8 -8
cloe_nessy/pipeline/actions/transform_filter.py +6 -6
cloe_nessy/pipeline/actions/transform_generic_sql.py +12 -6
cloe_nessy/pipeline/actions/transform_group_aggregate.py +20 -26
cloe_nessy/pipeline/actions/transform_hash_columns.py +209 -0
cloe_nessy/pipeline/actions/transform_join.py +17 -10
cloe_nessy/pipeline/actions/transform_json_normalize.py +19 -6
cloe_nessy/pipeline/actions/transform_rename_columns.py +7 -7
cloe_nessy/pipeline/actions/transform_replace_values.py +8 -8
cloe_nessy/pipeline/actions/transform_select_columns.py +38 -9
cloe_nessy/pipeline/actions/transform_union.py +12 -8
cloe_nessy/pipeline/actions/write_catalog_table.py +11 -10
cloe_nessy/pipeline/pipeline.py +44 -2
cloe_nessy/pipeline/pipeline_action.py +0 -0
cloe_nessy/pipeline/pipeline_config.py +0 -0
cloe_nessy/pipeline/pipeline_context.py +0 -0
cloe_nessy/pipeline/pipeline_parsing_service.py +0 -0
cloe_nessy/pipeline/pipeline_step.py +0 -0
cloe_nessy/py.typed +0 -0
cloe_nessy/session/__init__.py +0 -0
cloe_nessy/session/session_manager.py +27 -0
cloe_nessy/settings/__init__.py +0 -0
cloe_nessy/settings/settings.py +0 -0
cloe_nessy/utils/__init__.py +0 -0
cloe_nessy/utils/file_and_directory_handler.py +0 -0
cloe_nessy-0.3.8.dist-info/METADATA +46 -0
{cloe_nessy-0.3.3.dist-info → cloe_nessy-0.3.8.dist-info}/RECORD +41 -35
{cloe_nessy-0.3.3.dist-info → cloe_nessy-0.3.8.dist-info}/WHEEL +1 -1
{cloe_nessy-0.3.3.dist-info → cloe_nessy-0.3.8.dist-info}/top_level.txt +0 -0
cloe_nessy-0.3.3.dist-info/METADATA +0 -26

cloe_nessy/__init__.py CHANGED Viewed

File without changes

cloe_nessy/clients/__init__.py CHANGED Viewed

File without changes

cloe_nessy/clients/api_client/__init__.py CHANGED Viewed

File without changes

cloe_nessy/clients/api_client/api_client.py CHANGED Viewed

File without changes

cloe_nessy/clients/api_client/api_response.py CHANGED Viewed

File without changes

cloe_nessy/clients/api_client/auth.py CHANGED Viewed

File without changes

cloe_nessy/clients/api_client/exceptions.py CHANGED Viewed

File without changes

cloe_nessy/file_utilities/__init__.py CHANGED Viewed

File without changes

cloe_nessy/file_utilities/exceptions.py CHANGED Viewed

File without changes

cloe_nessy/file_utilities/factory.py CHANGED Viewed

File without changes

cloe_nessy/file_utilities/get_file_paths.py CHANGED Viewed

File without changes

cloe_nessy/file_utilities/location_types.py CHANGED Viewed

File without changes

cloe_nessy/file_utilities/strategies/__init__.py CHANGED Viewed

File without changes

cloe_nessy/file_utilities/strategies/base_strategy.py CHANGED Viewed

File without changes

cloe_nessy/file_utilities/strategies/local_strategy.py CHANGED Viewed

File without changes

cloe_nessy/file_utilities/strategies/onelake_strategy.py CHANGED Viewed

File without changes

cloe_nessy/file_utilities/strategies/utils_strategy.py CHANGED Viewed

File without changes

cloe_nessy/integration/__init__.py CHANGED Viewed

File without changes

cloe_nessy/integration/reader/__init__.py CHANGED Viewed

File without changes

cloe_nessy/integration/reader/api_reader.py CHANGED Viewed

File without changes

cloe_nessy/integration/reader/catalog_reader.py CHANGED Viewed

File without changes

cloe_nessy/integration/reader/excel_reader.py CHANGED Viewed

File without changes

cloe_nessy/integration/reader/exceptions.py CHANGED Viewed

File without changes

cloe_nessy/integration/reader/file_reader.py CHANGED Viewed

@@ -46,7 +46,13 @@ class FileReader(BaseReader):
         if not spark_format and not extension:
             raise ValueError("Either spark_format or extension must be provided.")
         self._console_logger.debug(f"Reading files from [ '{location}' ] ...")
-        extension_to_datatype_dict = {"csv": "csv", "json": "json", "parquet": "parquet", "txt": "text", "xml": "xml"}
+        extension_to_datatype_dict = {
+            "csv": "csv",
+            "json": "json",
+            "parquet": "parquet",
+            "txt": "text",
+            "xml": "xml",
+        }
         if extension and not spark_format:
             if extension not in extension_to_datatype_dict:

cloe_nessy/integration/reader/reader.py CHANGED Viewed

File without changes

cloe_nessy/integration/writer/__init__.py CHANGED Viewed

File without changes

cloe_nessy/integration/writer/catalog_writer.py CHANGED Viewed

@@ -20,7 +20,7 @@ class CatalogWriter:
                               format 'catalog.schema.table'.
             mode: The write mode. One of append, overwrite, error, errorifexists, ignore.
             partition_by: Names of the partitioning columns.
-            options: All other string options.
+            options: PySpark options for the DataFrame.saveAsTable operation (e.g. mergeSchema:true).
         Notes:
             append: Append contents of this DataFrame to existing data.

cloe_nessy/logging/__init__.py CHANGED Viewed

File without changes

cloe_nessy/logging/logger_mixin.py CHANGED Viewed

File without changes

cloe_nessy/models/__init__.py CHANGED Viewed

@@ -1,13 +1,17 @@
+from .catalog import Catalog
 from .column import Column
 from .constraint import Constraint
 from .foreign_key import ForeignKey
 from .schema import Schema
 from .table import Table
+from .volume import Volume
 __all__ = [
+    "Catalog",
     "Column",
     "Constraint",
     "Table",
     "Schema",
     "ForeignKey",
+    "Volume",
 ]

cloe_nessy/models/adapter/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .unity_catalog_adapter import UnityCatalogAdapter
+__all__ = ["UnityCatalogAdapter"]

cloe_nessy/models/adapter/unity_catalog_adapter.py ADDED Viewed

@@ -0,0 +1,292 @@
+from pyspark.sql import SparkSession
+from pyspark.sql import functions as F
+from cloe_nessy.logging.logger_mixin import LoggerMixin
+from cloe_nessy.models import ForeignKey
+from ...session import SessionManager
+from ..catalog import Catalog
+from ..column import Column
+from ..schema import Schema
+from ..table import Table
+class UnityCatalogAdapter(LoggerMixin):
+    """Acts as a translator between Unity Catalog metadata and Nessy Models."""
+    def __init__(self, spark: SparkSession | None = None):
+        """Initializes the UnityCatalogAdapter class."""
+        self._spark = spark or SessionManager.get_spark_session()
+        self._console_logger = self.get_console_logger()
+        self._catalogs = self.get_catalogs()
+    def _execute_sql(self, query):
+        """Execute a SQL query and return a DataFrame.
+        This wrapper is used for better testability.
+        Returns:
+            The resulting DataFrame after executing the SQL query.
+        """
+        return self._spark.sql(query)
+    def get_catalogs(self) -> list[Catalog]:
+        """Retrieve a list of catalogs with their associated metadata.
+        Returns:
+            A list of `Catalog` objects.
+        """
+        df = self._execute_sql("SHOW CATALOGS")
+        catalogs = []
+        for catalog in df.collect():
+            name = catalog["catalog"]
+            catalog_metadata = self._execute_sql(f"DESCRIBE CATALOG EXTENDED {name}")
+            pivoted_metadata = catalog_metadata.withColumn("dummy", F.lit("dummy"))
+            pivoted_df = pivoted_metadata.groupBy("dummy").pivot("info_name").agg(F.first("info_value"))
+            catalog_owner = pivoted_df.collect()[0]["Owner"]
+            comment = pivoted_df.collect()[0]["Comment"]
+            catalogs.append(Catalog(name=name, owner=catalog_owner, comment=comment))
+        return catalogs
+    def get_catalog_by_name(self, name: str) -> Catalog | None:
+        """Returns a Catalog by its name.
+        Args:
+            name: The name of the Catalog.
+        Returns:
+            The Catalog with the specified name.
+        """
+        for catalog in self._catalogs:
+            if catalog.name == name:
+                return catalog
+        self._console_logger.warning(f"No catalog found with name: {name}")
+        return None
+    def get_catalog_schemas(self, catalog: str | Catalog) -> list[Schema]:
+        """Collects all schemas in a given catalog.
+        Args:
+            catalog: The catalog from which the schemas are to be collected.
+        Returns:
+            A list of `Schema` objects.
+        """
+        schemas = []
+        if isinstance(catalog, Catalog):
+            catalog = catalog.name
+        schemas_df = self._execute_sql(f"SELECT * FROM {catalog}.information_schema.schemata").collect()
+        for schema in schemas_df:
+            schemas.append(
+                Schema(name=schema["schema_name"], catalog=catalog, comment=schema["comment"]),
+            )
+        return schemas
+    def get_schema_by_name(self, catalog: str | Catalog, name: str) -> Schema | None:
+        """Retrieve a schema by its name from a specified catalog.
+        Args:
+            catalog: The catalog from which to retrieve the schema.
+                This can be either a string representing the catalog name or a
+                `Catalog` object.
+            name: The name of the schema to retrieve.
+        Returns:
+            The `Schema` object if found, otherwise `None`.
+        """
+        if isinstance(catalog, Catalog):
+            catalog = catalog.name
+        schemas = self.get_catalog_schemas(catalog)
+        for schema in schemas:
+            if schema.name == name:
+                schema = self.add_tables_to_schema(catalog, schema)
+                return schema
+        self._console_logger.warning(f"No Schema in Catalog [ '{catalog}' ] found with name [ '{name}' ]")
+        return None
+    def get_table_by_name(self, table_identifier: str) -> Table | None:
+        """Retrieve a table by it's name."""
+        if len(table_identifier.split(".")) != 3:
+            raise ValueError("The identifier must be in the format 'catalog.schema.table'")
+        catalog_name, schema_name, table_name = table_identifier.split(".")
+        table_metadata_df = self._execute_sql(
+            f"""
+            SELECT * FROM {catalog_name}.information_schema.tables
+                WHERE table_catalog == '{catalog_name}'
+                AND table_schema == '{schema_name}'
+                AND table_name == '{table_name}'
+                AND table_type <> 'VIEW'
+            """,
+        )
+        if not table_metadata_df.head(1):
+            table = None
+        else:
+            table_metadata = table_metadata_df.collect()[0]
+            table_tags_list = self._execute_sql(
+                f"""
+                SELECT tag_name, tag_value FROM {catalog_name}.information_schema.table_tags
+                    WHERE catalog_name == '{catalog_name}'
+                    AND schema_name == '{schema_name}'
+                    AND table_name == '{table_name}'
+                """,
+            ).collect()
+            table_tags = {r["tag_name"]: r["tag_value"] for r in table_tags_list}
+            table = Table(
+                identifier=table_identifier,
+                data_source_format=table_metadata["data_source_format"],
+                business_properties=table_tags,
+                storage_path=table_metadata["storage_path"],
+                columns=[],
+                is_external=table_metadata["table_type"] != "MANAGED",
+            )
+            table = self.add_columns_to_table(table)
+        return table
+    def add_tables_to_schema(self, catalog: str | Catalog, schema: str | Schema) -> Schema:
+        """Add tables to a schema within a specified catalog.
+        This method retrieves all tables within the specified schema and catalog,
+        and adds them to the `Schema` object. The schema is updated with `Table`
+        objects containing details about each table.
+        Args:
+            catalog: The catalog containing the schema. This can be
+                either a string representing the catalog name or a `Catalog` object.
+            schema: The schema to which tables will be added. This
+                can be either a string representing the schema name or a `Schema`
+                object.
+        Returns:
+            The updated `Schema` object with tables added.
+        """
+        if isinstance(catalog, Catalog):
+            catalog_name = catalog.name
+        else:
+            catalog_name = catalog
+        if isinstance(schema, str):
+            schema_obj = self.get_schema_by_name(catalog_name, schema)
+            if schema_obj is None:
+                raise ValueError(f"Schema {schema} not found in catalog {catalog_name}.")
+        else:
+            schema_obj = schema
+        tables_df = self._execute_sql(
+            f"SELECT * FROM {catalog_name}.information_schema.tables WHERE table_catalog == '{catalog_name}' AND table_schema == '{schema_obj.name}' AND table_type <> 'VIEW'",
+        ).collect()
+        for table_row in tables_df:
+            table_name = table_row["table_name"]
+            table_tags_list = self._execute_sql(
+                f"""SELECT tag_name, tag_value FROM {catalog_name}.information_schema.table_tags
+                                           WHERE
+                                                catalog_name == '{catalog_name}'
+                                            AND schema_name == '{schema_obj.name}'
+                                            AND table_name == '{table_name}'
+                                           """,
+            ).collect()
+            table_tags = {r["tag_name"]: r["tag_value"] for r in table_tags_list}
+            table = Table(
+                data_source_format=table_row["data_source_format"],
+                identifier=f"{catalog}.{schema_obj.name}.{table_name}",
+                business_properties=table_tags,
+                columns=[],
+            )
+            table = self.add_columns_to_table(table)
+            schema_obj.add_table(table)
+        return schema_obj
+    def add_columns_to_table(self, table: Table) -> Table:
+        """Add columns to a table by retrieving column metadata from the information schema.
+        This method retrieves column details for the specified `table` from the
+        information schema and adds `Column` objects to the `Table`. It also identifies
+        primary key columns for the table.
+        Args:
+            table: The `Table` object to which columns will be added. The
+                `Table` object must have its `identifier` attribute set.
+        Returns:
+            The updated `Table` object with columns added.
+        """
+        if not table.identifier:
+            raise ValueError("Please set the Identifier of the Table to use this method.")
+        cols_df = self._execute_sql(
+            f"""
+            SELECT * FROM {table.catalog}.information_schema.columns
+                WHERE table_name == '{table.name}'
+                AND table_schema == '{table.schema}'
+                ORDER BY ordinal_position
+            """,
+        ).collect()
+        partition_cols_indexed = {}
+        for col_row in cols_df:
+            generated = "GENERATED ALWAYS AS IDENTITY" if col_row["is_identity"] == "YES" else None
+            table.add_column(
+                Column(
+                    name=col_row["column_name"],
+                    data_type=col_row["data_type"],
+                    default_value=col_row["column_default"],
+                    generated=generated,
+                    nullable=col_row["is_nullable"] == "YES",
+                ),
+            )
+            if col_row["partition_index"] is not None:
+                partition_cols_indexed.update({str(col_row["partition_index"]): col_row["column_name"]})
+        partitioned_by = [val for _, val in sorted(partition_cols_indexed.items())]
+        if partitioned_by:
+            table.liquid_clustering = False
+            table.partition_by = partitioned_by
+        table = self._identify_pk_columns(table)
+        table = self._identify_fk_constraints(table)
+        return table
+    def _identify_pk_columns(self, table: Table) -> Table:
+        result = self._execute_sql(
+            f"""
+                SELECT A.column_name
+                FROM {table.catalog}.information_schema.key_column_usage AS A
+                JOIN {table.catalog}.information_schema.table_constraints AS B
+                    USING (constraint_catalog, constraint_schema, constraint_name)
+                WHERE
+                    A.table_catalog = '{table.catalog}'
+                AND A.table_schema = '{table.schema}'
+                AND A.table_name = '{table.name}'
+                AND B.constraint_type = 'PRIMARY KEY'
+            """,
+        ).collect()
+        table.composite_primary_key = [col_row["column_name"] for col_row in result]
+        return table
+    def _identify_fk_constraints(self, table: Table) -> Table:
+        result = self._execute_sql(
+            f"""
+                SELECT
+                concat_ws(".", C.table_catalog, C.table_schema, C.table_name) as source_table_identifier,
+                C.column_name as source_column,
+                concat_ws(".", B.table_catalog, B.table_schema, B.table_name) as parent_table_identifier,
+                B.column_name as parent_column
+                -- fk_option currently not supported
+                -- ,concat_ws(" ",D.match_option, "ON UPDATE", D.update_rule, "ON DELETE", D.delete_rule) AS fk_options
+                FROM {table.catalog}.information_schema.table_constraints AS A
+                LEFT JOIN {table.catalog}.information_schema.constraint_column_usage AS B USING(constraint_name)
+                LEFT JOIN {table.catalog}.information_schema.key_column_usage AS C USING(constraint_name)
+                -- LEFT JOIN {table.catalog}.information_schema.referential_constraints AS D USING(constraint_name)
+                WHERE
+                    A.table_catalog == '{table.catalog}'
+                AND A.table_schema = '{table.schema}'
+                AND A.table_name == '{table.name}'
+                AND A.constraint_type == "FOREIGN KEY"
+            """,
+        ).collect()
+        table.foreign_keys = [
+            ForeignKey(
+                foreign_key_columns=fk_row["source_column"],
+                parent_table=fk_row["parent_table_identifier"],
+                parent_columns=fk_row["parent_column"],
+            )
+            for fk_row in result
+        ]
+        return table

cloe_nessy/models/catalog.py ADDED Viewed

@@ -0,0 +1,10 @@
+from dataclasses import dataclass
+@dataclass
+class Catalog:
+    """A class representing a Unity Catalog - Catalog."""
+    name: str
+    owner: str = ""
+    comment: str = ""

cloe_nessy/models/column.py CHANGED Viewed

File without changes

cloe_nessy/models/constraint.py CHANGED Viewed

File without changes

cloe_nessy/models/foreign_key.py CHANGED Viewed

File without changes

cloe_nessy/models/mixins/__init__.py CHANGED Viewed

File without changes

cloe_nessy/models/mixins/read_instance_mixin.py CHANGED Viewed

File without changes

cloe_nessy/models/mixins/template_loader_mixin.py CHANGED Viewed

File without changes

cloe_nessy/models/schema.py CHANGED Viewed

@@ -17,6 +17,7 @@ class Schema(ReadInstancesMixin):
     storage_path: str | None = None
     tables: list[Table] = Field(default_factory=list)
     properties: dict[str, Any] = Field(default_factory=dict)
+    comment: str | None = None
     @classmethod
     def read_instance_from_file(
@@ -74,3 +75,21 @@ class Schema(ReadInstancesMixin):
             raise ValueError(f"Table {table_name} not found in {self.catalog}.{self.name} metadata.")
         return table
+    def add_table(self, table: Table):
+        """Adds a table to the schema and sets the table identifier accordingly.
+        Args:
+            table: A Table object that is added to the Schema tables.
+        """
+        table.identifier = f"{self.catalog}.{self.name}.{table.name}"
+        self.tables.append(table)
+    def add_tables(self, tables: list[Table]) -> None:
+        """Adds tables to the schema.
+        Args:
+            tables: A list of Table objects that are added to the Schema tables.
+        """
+        for table in tables:
+            self.add_table(table)

cloe_nessy/models/table.py CHANGED Viewed

@@ -24,11 +24,14 @@ class Table(TemplateLoaderMixin, ReadInstancesMixin, LoggerMixin):
     is_external: bool | None = None
     partition_by: list[str] = Field(default_factory=list)
     liquid_clustering: bool | None = None
+    composite_primary_key: list[str] = Field(default_factory=list)
     properties: dict[str, str] = Field(default_factory=dict)
     constraints: list[Constraint] = Field(default_factory=list)
     foreign_keys: list[ForeignKey] = Field(default_factory=list)
     storage_path: Path | None = None
+    business_properties: dict[str, str] = Field(default_factory=dict)
     comment: str | None = None
+    data_source_format: str | None = None
     def model_post_init(self, __context: Any) -> None:
         """Post init method for the Table model."""
@@ -87,10 +90,8 @@ class Table(TemplateLoaderMixin, ReadInstancesMixin, LoggerMixin):
     @model_validator(mode="after")
     def _validate_is_external(cls, table: Self):
-        """If is_external is set to False, storage_path has to be None."""
-        if not table.is_external and table.storage_path is not None:
-            raise ValueError("is_external cannot be false while storage_path is set.")
-        elif table.is_external and table.storage_path is None:
+        """If is_external is set to True, storage_path has to be set."""
+        if table.is_external and table.storage_path is None:
             raise ValueError("is_external cannot be true while storage_path is None.")
     @classmethod
@@ -222,7 +223,7 @@ class Table(TemplateLoaderMixin, ReadInstancesMixin, LoggerMixin):
     def get_create_statement(
         self,
-        templates: Path = Path("./templates"),
+        templates: Path = Path("./src/cloe_nessy/models/templates/"),
         template_name: str = "create_table.sql.j2",
         replace: bool = True,
     ):
@@ -234,3 +235,47 @@ class Table(TemplateLoaderMixin, ReadInstancesMixin, LoggerMixin):
             raise err
         render = template.render(table=self, replace=replace)
         return render
+    def get_column_by_name(self, column_name: str) -> Column | None:
+        """Get a column by name.
+        Args:
+            column_name: The name of the column to get.
+        Returns:
+            The column if found, else None.
+        """
+        for column in self.columns:
+            if column.name == column_name:
+                return column
+        return None
+    def update_column(self, column: Column) -> None:
+        """Replaces a Column with a new Column object to update it.
+        Args:
+            column: The new column object, to replace the old one.
+        """
+        self.remove_column(column)
+        self.add_column(column)
+    def add_column(self, column: Column):
+        """Adds a column to the table.
+        Args:
+            column: The column to be added.
+        """
+        self.columns.append(column)
+    def remove_column(self, column: str | Column) -> None:
+        """Remove a column from the Table.
+        Args.
+            column: The column to be removed.
+        """
+        if isinstance(column, Column):
+            column_name = column.name
+        else:
+            column_name = column
+        self.columns = [col for col in self.columns if col.name != column_name]

cloe_nessy/models/types.py CHANGED Viewed

File without changes

cloe_nessy/models/volume.py ADDED Viewed

@@ -0,0 +1,67 @@
+from pathlib import Path
+from typing import Any
+from jinja2 import TemplateNotFound
+from pydantic import BaseModel, field_validator
+from ..logging import LoggerMixin
+from .mixins.template_loader_mixin import TemplateLoaderMixin
+class Volume(TemplateLoaderMixin, LoggerMixin, BaseModel):
+    """Volume class for managing volumes."""
+    identifier: str
+    storage_path: str | Path
+    comment: str | None = None
+    @field_validator("identifier")
+    def check_identifier(cls, value):
+        """Check the identifier."""
+        if value.count(".") != 2:
+            raise ValueError("The identifier must be in the format 'catalog.schema.volume_name'.")
+        return value
+    @property
+    def storage_identifier(self) -> str:
+        """Return the storage identifier."""
+        return f"/Volumes/{self.catalog}/{self.schema}/{self.name}/"
+    @property
+    def catalog(self) -> str:
+        """Return the catalog name."""
+        return self.identifier.split(".")[0]
+    @property
+    def schema_name(self) -> str:
+        """Return the schema name."""
+        return self.identifier.split(".")[1]
+    @property
+    def name(self) -> str:
+        """Return the table name."""
+        return self.identifier.split(".")[2]
+    @property
+    def escaped_identifier(self) -> str:
+        """Return the escaped identifier."""
+        return f"`{self.catalog}`.`{self.schema_name}`.`{self.name}`"
+    def model_post_init(self, __context: Any) -> None:
+        """Post init method for the Table model."""
+        self._console_logger = self.get_console_logger()
+        self._console_logger.debug(f"Model for volume [ '{self.identifier}' ] has been initialized.")
+    def get_create_statement(
+        self,
+        templates: Path = Path("./src/cloe_nessy/models/templates/"),
+        template_name: str = "create_volume.sql.j2",
+    ):
+        """Get the create statement for the Volume."""
+        try:
+            template = self.get_template(templates, template_name)
+        except TemplateNotFound as err:
+            self._console_logger.error(f"Template [ {template_name} ] not found.")
+            raise err
+        render = template.render(volume=self)
+        return render

cloe_nessy/object_manager/__init__.py CHANGED Viewed

@@ -1,3 +1,8 @@
-from .table_manager import TableManager
+from .table_manager import TableManager, table_log_decorator
+from .volume_manager import VolumeManager
-__all__ = ["TableManager"]
+__all__ = [
+    "TableManager",
+    "table_log_decorator",
+    "VolumeManager",
+]

cloe-nessy 0.3.3__py3-none-any.whl → 0.3.8__py3-none-any.whl

cloe-nessy 0.3.3py3-none-any.whl → 0.3.8py3-none-any.whl