PyPI - cloe-nessy - Versions diffs - 0.3.17.0__py3-none-any.whl → 0.3.18__py3-none-any.whl - Mend

cloe-nessy 0.3.17.0py3-none-any.whl → 0.3.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

cloe_nessy/integration/delta_loader/delta_loader.py +1 -1
cloe_nessy/integration/reader/catalog_reader.py +43 -6
cloe_nessy/integration/writer/catalog_writer.py +63 -1
cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py +5 -1
cloe_nessy/models/column.py +3 -2
cloe_nessy/models/schema.py +1 -0
cloe_nessy/models/templates/create_table.sql.j2 +22 -0
cloe_nessy/object_manager/table_manager.py +29 -7
cloe_nessy/pipeline/actions/read_catalog_table.py +73 -10
cloe_nessy/pipeline/actions/read_metadata_yaml.py +61 -33
cloe_nessy/pipeline/actions/transform_join.py +98 -24
cloe_nessy/pipeline/actions/transform_union.py +2 -2
cloe_nessy/pipeline/actions/write_catalog_table.py +66 -21
cloe_nessy/pipeline/actions/write_delta_merge.py +1 -0
{cloe_nessy-0.3.17.0.dist-info → cloe_nessy-0.3.18.dist-info}/METADATA +17 -17
{cloe_nessy-0.3.17.0.dist-info → cloe_nessy-0.3.18.dist-info}/RECORD +17 -18
{cloe_nessy-0.3.17.0.dist-info → cloe_nessy-0.3.18.dist-info}/WHEEL +1 -2
cloe_nessy-0.3.17.0.dist-info/top_level.txt +0 -1

cloe_nessy/integration/delta_loader/delta_loader.py CHANGED Viewed

@@ -102,7 +102,7 @@ class DeltaLoader(ABC, LoggerMixin):
             ),
         )
         catalog_writer = CatalogWriter()
-        catalog_writer.write_table(
+        catalog_writer.write(
             df=metadata_df,
             table_identifier=self.metadata_table_identifier,
             mode="append",

cloe_nessy/integration/reader/catalog_reader.py CHANGED Viewed

@@ -71,9 +71,46 @@ class CatalogReader(BaseReader):
         except AnalysisException as err:
             raise ValueError(f"Table not found: {table_identifier}") from err
         except Exception as err:
-            if delta_load_options:
-                raise ReadOperationFailedError(f"Delta load failed for table '{table_identifier}': {err}") from err
-            else:
-                raise ReadOperationFailedError(
-                    f"An error occurred while reading the table '{table_identifier}': {err}"
-                ) from err
+            raise ReadOperationFailedError(
+                f"An error occurred while reading the table '{table_identifier}': {err}"
+            ) from err
+    def read_stream(
+        self,
+        table_identifier: str = "",
+        *,
+        options: dict[str, str] | None = None,
+        **kwargs: Any,
+    ) -> DataFrame:
+        """Reads a streaming table from the Unity Catalog.
+        Args:
+            table_identifier: The table identifier in the Unity Catalog in the format 'catalog.schema.table'.
+            options: PySpark options for the read stream operation.
+            **kwargs: Additional keyword arguments to maintain compatibility with the base class method.
+        Returns:
+            The Spark Streaming DataFrame containing the read data.
+        Raises:
+            ValueError: If the table_identifier is not provided, is not a string, or is not in the correct format.
+            Exception: For any other unexpected errors during streaming read operation.
+        """
+        if options is None:
+            options = {}
+        if not table_identifier:
+            raise ValueError("table_identifier is required")
+        if not isinstance(table_identifier, str):
+            raise ValueError("table_identifier must be a string")
+        if len(table_identifier.split(".")) != 3:
+            raise ValueError("table_identifier must be in the format 'catalog.schema.table'")
+        try:
+            df = self._spark.readStream.table(table_identifier, **options)
+            return df
+        except AnalysisException as err:
+            raise ValueError(f"Table not found or not streamable: {table_identifier}") from err
+        except Exception as err:
+            raise ReadOperationFailedError(
+                f"An error occurred while reading the stream from table '{table_identifier}': {err}"
+            ) from err

cloe_nessy/integration/writer/catalog_writer.py CHANGED Viewed

@@ -5,7 +5,7 @@ class CatalogWriter:
     """A writer for Catalog tables."""
     @staticmethod
-    def write_table(
+    def write(
         df: DataFrame | None,
         table_identifier: str | None,
         partition_by: str | list[str] | None = None,
@@ -46,3 +46,65 @@ class CatalogWriter:
         if options is None:
             options = {}
         df.write.saveAsTable(table_identifier, mode=mode, partitionBy=partition_by, **options)
+    @staticmethod
+    def write_stream(
+        df: DataFrame | None,
+        table_identifier: str | None,
+        checkpoint_location: str | None = None,
+        trigger_dict: dict | None = None,
+        options: dict[str, str] | None = None,
+        mode: str = "append",
+        await_termination: bool = False,
+    ) -> None:
+        """Write a streaming DataFrame to a Unity Catalog table.
+        Args:
+            df: The streaming DataFrame to write.
+            table_identifier: The table identifier in the Unity Catalog in the
+                              format 'catalog.schema.table'.
+            checkpoint_location: Location for checkpointing. Required for stream recovery.
+            trigger_dict: A dictionary specifying the trigger configuration for the streaming query.
+                Supported keys include:
+                - "processingTime": Specifies a time interval (e.g., "10 seconds") for micro-batch processing.
+                - "once": Processes all available data once and then stops.
+                - "continuous": Specifies a time interval (e.g., "1 second") for continuous processing.
+                - "availableNow": Processes all available data immediately and then stops.
+                If nothing is provided, the default is {"availableNow": True}.
+            options: PySpark options for the DataFrame streaming write operation.
+            mode: The write mode. For streaming, typically "append".
+            await_termination: If True, the function will wait for the streaming
+                query to finish before returning.
+        Raises:
+            ValueError: If the mode is not supported for streaming operations.
+            ValueError: If the table_identifier is not a string or not in the format 'catalog.schema.table'.
+            ValueError: If the DataFrame is None.
+            ValueError: If checkpoint_location is not provided.
+        """
+        if mode not in ("append", "complete", "update"):
+            raise ValueError("mode must be one of append, complete, update for streaming operations")
+        if not table_identifier:
+            raise ValueError("table_identifier is required")
+        elif not isinstance(table_identifier, str):
+            raise ValueError("table_identifier must be a string")
+        elif len(table_identifier.split(".")) != 3:
+            raise ValueError("table_identifier must be in the format 'catalog.schema.table'")
+        if not df:
+            raise ValueError("df is required, but was None.")
+        if not checkpoint_location:
+            raise ValueError("checkpoint_location is required for streaming operations")
+        if options is None:
+            options = {}
+        if trigger_dict is None:
+            trigger_dict = {"availableNow": True}
+        stream_writer = df.writeStream.format("delta").outputMode(mode)
+        stream_writer.options(**options).option("checkpointLocation", checkpoint_location)
+        stream_writer.trigger(**trigger_dict)
+        query = stream_writer.toTable(table_identifier)
+        if await_termination:
+            query.awaitTermination()

cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py CHANGED Viewed

@@ -196,7 +196,11 @@ class DeltaMergeWriter(BaseDeltaWriter):
         config = DeltaMergeConfig(dataframe_columns=data_frame.columns, **kwargs)
-        delta_table = self.table_manager.get_delta_table(location=storage_path, spark=data_frame.sparkSession)
+        delta_table = self.table_manager.get_delta_table(
+            table=table,
+            location=storage_path if not table else None,
+            spark=data_frame.sparkSession,
+        )
         match_conditions = self._build_match_conditions(data_frame, config)

cloe_nessy/models/column.py CHANGED Viewed

@@ -5,6 +5,7 @@ from pydantic import BaseModel, Field, field_validator, model_validator
 COLUMN_DATA_TYPE_LIST = {
     "string",
+    "decimal",
     "integer",
     "int",
     "smallint",
@@ -31,7 +32,7 @@ class Column(BaseModel):
     nullable: bool = True
     default_value: Any = None
     generated: str | None = None
-    properties: dict[str, Any] = Field(default_factory=dict)
+    business_properties: dict[str, Any] = Field(default_factory=dict)
     comment: str | None = None
     @field_validator("data_type", mode="before")
@@ -43,7 +44,7 @@ class Column(BaseModel):
         """
         val = raw.lower()
         base_data_types = re.findall(r"\b[a-z]+\b", val)
-        forbidden_characters = re.findall(r"[^a-z\<\>)]+", val)
+        forbidden_characters = re.findall(r"[^a-z0-9\(\)\<\>, ]+", val)
         if forbidden_characters:
             raise ValueError(f"Forbidden characters in data type definition [ '{val}' ]: [' {forbidden_characters} ']")

cloe_nessy/models/schema.py CHANGED Viewed

@@ -43,6 +43,7 @@ class Schema(ReadInstancesMixin):
             raise FileNotFoundError("Schema file not found.")
         schema, schema_errors = super().read_instance_from_file(processed_instance_path)
+        table_errors: list[ValidationErrorType] = []
         if schema:
             schema.storage_path = "" if not schema.storage_path else schema.storage_path
             tables, table_errors = Table.read_instances_from_directory(

cloe_nessy/models/templates/create_table.sql.j2 CHANGED Viewed

@@ -13,6 +13,14 @@ USING delta
 {% if table.storage_path %}
 LOCATION '{{ table.storage_path }}'
 {% endif %}
+{% if table.properties %}
+TBLPROPERTIES (
+    {%- for key, value in table.properties.items() %}
+        {%- if not loop.first %}, {% endif -%}
+        '{{key}}' = '{{value}}'
+    {%- endfor -%}
+)
+{% endif %}
 {% if table.partition_by -%}
     {%- if table.liquid_clustering -%} CLUSTER {%- else -%} PARTITIONED {%- endif %} BY (
         {%- for column in table.partition_by -%}
@@ -34,3 +42,17 @@ ALTER TABLE {{ table.escaped_identifier }} ADD CONSTRAINT {{constraint.name}} CH
 {%- if table.comment %}
 COMMENT ON TABLE {{ table.escaped_identifier }} IS '{{ table.comment }}';
 {%- endif %}
+{# Tags do not yet work in Databricks
+{%- if table.business_properties %}
+{%- for tag_key, tag_value in table.business_properties.items() %}
+SET TAG ON TABLE {{ table.escaped_identifier }} `{{ tag_key }}`{% if tag_value %} = `{{ tag_value }}`{% endif %};
+{%- endfor %}
+{%- endif %}
+{%- for column in table.columns %}
+{%- if column.business_properties %}
+{%- for tag_key, tag_value in column.business_properties.items() %}
+SET TAG ON COLUMN {{ table.escaped_identifier }}.`{{ column.name }}` `{{ tag_key }}`{% if tag_value %} = `{{ tag_value }}`{% endif %};
+{%- endfor %}
+{%- endif %}
+{%- endfor %} #}

cloe_nessy/object_manager/table_manager.py CHANGED Viewed

@@ -110,7 +110,7 @@ class TableManager(LoggerMixin):
         self._spark.sql(f"USE CATALOG {table.catalog};")
         self._spark.sql(f"USE SCHEMA {table.schema};")
         for statement in table.get_create_statement(replace=replace).split(";"):
-            if statement and statement != "\n":
+            if statement and statement.strip():
                 self._spark.sql(statement)
     def drop_table(
@@ -186,6 +186,9 @@ class TableManager(LoggerMixin):
     def get_delta_table(self, table: Table | None = None, location: str | None = None, spark=None) -> DeltaTable:
         """Get the DeltaTable object from the Table objects location or a location string.
+        For managed tables, uses the table identifier to access the DeltaTable.
+        For external tables or when a location is provided, uses the storage path.
         Args:
             table: A Table object representing the Delta table.
             location: A string representing the table location.
@@ -195,17 +198,35 @@ class TableManager(LoggerMixin):
             The DeltaTable object corresponding to the given Table object or location string.
         Raises:
-            ValueError: If neither table nor location is provided, or if both are provided.
+            ValueError: If neither table nor location is provided.
         """
-        if (table is None and location is None) or (table is not None and location is not None):
+        if table is None and location is None:
             raise ValueError(
-                f"Either table or location must be provided, but not both. Table: {table}, location: {location}",
+                f"Either table or location must be provided. Table: {table}, location: {location}",
+            )
+        spark_session = spark or self._spark
+        if table is not None and location is not None:
+            self._console_logger.info(
+                f"Both table ({table.identifier}) and location ({location}) provided. Using table object as priority."
             )
         if table is not None:
-            location = str(table.storage_path)
+            if table.is_external is False:
+                self._console_logger.info(f"Getting DeltaTable object for managed table: {table.identifier}")
+                return DeltaTable.forName(spark_session, table.identifier)
+            table_location = str(table.storage_path)
+            self._console_logger.info(f"Getting DeltaTable object for external table location: {table_location}")
+            return DeltaTable.forPath(spark_session, table_location)
+        self._console_logger.info(f"No table object provided, using location: {location}")
+        if location is None:
+            self._console_logger.error("Location is None - this should not happen!")
+            raise ValueError("Location cannot be None when no table object is provided")
         self._console_logger.info(f"Getting DeltaTable object for location: {location}")
-        return DeltaTable.forPath(spark or self._spark, str(location))
+        return DeltaTable.forPath(spark_session, str(location))
     def table_exists(self, table: Table | None = None, table_identifier: str | None = None) -> bool:
         """Checks if a table exists in the catalog.
@@ -235,9 +256,10 @@ class TableManager(LoggerMixin):
                 raise ValueError("Invalid table identifier format. Expected 'catalog.schema.table'.")
         query_result = self._spark.sql(
+            # Using both upper and lower case to ensure compatibility with case changes in Databricks
             f"""
                 SELECT 1 FROM {catalog}.information_schema.tables
-                WHERE table_name = '{table_name}'
+                WHERE table_name in ('{table_name}', '{table_name.lower()}')
                 AND table_schema = '{schema}'
                 LIMIT 1""",
         )

cloe_nessy/pipeline/actions/read_catalog_table.py CHANGED Viewed

@@ -23,13 +23,44 @@ class ReadCatalogTableAction(PipelineAction):
             options:
                 table_identifier: my_catalog.business_schema.sales_table
                 options: <options for the CatalogReader read method>
-            delta_load_options:
-                strategy: CDF
-                delta_load_identifier: my_delta_load_id
-                strategy_options:
-                    deduplication_columns: ["id"]
-                    enable_full_load: true
+                delta_load_options:
+                    strategy: CDF
+                    delta_load_identifier: my_delta_load_id
+                    strategy_options:
+                        deduplication_columns: ["id"]
+                        enable_full_load: true
         ```
+        === "Batch Read"
+            ```yaml
+            Read Sales Table:
+                action: READ_CATALOG_TABLE
+                options:
+                    table_identifier: my_catalog.business_schema.sales_table
+                    options: <options for the CatalogReader read method>
+            ```
+        === "Streaming Read"
+            ```yaml
+            Read Sales Table Stream:
+                action: READ_CATALOG_TABLE
+                options:
+                    table_identifier: my_catalog.business_schema.sales_table
+                    stream: true
+                    options: <options for the CatalogReader read_stream method>
+            ```
+        === "Delta Load Read"
+            ```yaml
+            Read Sales Table:
+                action: READ_CATALOG_TABLE
+                options:
+                    table_identifier: my_catalog.business_schema.sales_table
+                    options: <options for the CatalogReader read method>
+                delta_load_options:
+                    strategy: CDF
+                    delta_load_identifier: my_delta_load_id
+                    strategy_options:
+                        deduplication_columns: ["id"]
+                        enable_full_load: true
+            ```
     """
     name: str = "READ_CATALOG_TABLE"
@@ -41,6 +72,7 @@ class ReadCatalogTableAction(PipelineAction):
         table_identifier: str | None = None,
         options: dict[str, str] | None = None,
         delta_load_options: dict[Any, Any] | DeltaLoadOptions | None = None,
+        stream: bool = False,
         **_: Any,  # define kwargs to match the base class signature
     ) -> PipelineContext:
         """Reads a table from Unity Catalog using a specified table identifier and optional reader configurations.
@@ -56,6 +88,16 @@ class ReadCatalogTableAction(PipelineAction):
                 behavior, such as filters or reading modes. Defaults to None.
             delta_load_options: Options for delta loading, if applicable.
                 Configures the [`DeltaLoader`][cloe_nessy.integration.delta_loader].
+                behavior, such as filters or reading modes.
+            stream: If True, the action will read the table as a stream.
+            checkpoint_location: The location for storing
+                checkpoints if streaming is enabled.
+            trigger_dict: A dictionary specifying the trigger
+                configuration for the streaming query, such as processing time or
+                continuous processing.
+                behavior, such as filters or reading modes. Defaults to None.
+            delta_load_options: Options for delta loading, if applicable.
+                Configures the [`DeltaLoader`][cloe_nessy.integration.delta_loader].
         Raises:
             ValueError: If neither `table_identifier` nor `table_metadata.identifier` in the `context` is provided.
@@ -89,10 +131,31 @@ class ReadCatalogTableAction(PipelineAction):
             runtime_info=context.runtime_info or {},
         )
-        table_reader = CatalogReader()
-        df = table_reader.read(
+        if isinstance(delta_load_options, dict):
+            delta_options_dict = delta_load_options
+            if delta_load_options:
+                delta_load_options = DeltaLoadOptions(**delta_load_options)
+            else:
+                delta_load_options = None
+        else:
+            delta_options_dict = delta_load_options.model_dump() if delta_load_options else {}
+        runtime_info = set_delta_load_info(
             table_identifier=table_identifier,
-            options=options,
-            delta_load_options=delta_load_options,
+            delta_load_options=delta_options_dict,
+            runtime_info=context.runtime_info or {},
         )
+        table_reader = CatalogReader()
+        if stream:
+            context.runtime_info = (context.runtime_info or {}) | {"streaming": True}
+            df = table_reader.read_stream(table_identifier=table_identifier, options=options)
+        else:
+            df = table_reader.read(
+                table_identifier=table_identifier,
+                options=options,
+                delta_load_options=delta_load_options,
+            )
         return context.from_existing(data=df, runtime_info=runtime_info)

cloe_nessy/pipeline/actions/read_metadata_yaml.py CHANGED Viewed

@@ -1,66 +1,94 @@
-import pathlib
+from pathlib import Path
 from typing import Any
-from ...models import Schema
+from ...models import Table
 from ..pipeline_action import PipelineAction
 from ..pipeline_context import PipelineContext
 class ReadMetadataYAMLAction(PipelineAction):
-    """Reads schema metadata from a yaml file using the [`Schema`][cloe_nessy.models.schema] model.
+    """Reads table metadata from a yaml file using the [`Table`][cloe_nessy.models.table] model.
     Example:
-        ```yaml
-        Read Schema Metadata:
-            action: READ_METADATA_YAML_ACTION
-            options:
-                path: excel_file_folder/excel_files_june/
-                file_name: sales_schema.yml
-                table_name: sales
-        ```
+        === "Managed Table"
+            ```yaml
+            Read Table Metadata:
+                action: READ_METADATA_YAML_ACTION
+                options:
+                    file_path: metadata/schemas/bronze/sales_table.yml
+                    catalog_name: production
+                    schema_name: sales_data
+            ```
+        === "External Table"
+            ```yaml
+            Read Table Metadata:
+                action: READ_METADATA_YAML_ACTION
+                options:
+                    file_path: metadata/schemas/bronze/sales_table.yml
+                    catalog_name: production
+                    schema_name: sales_data
+                    storage_path: abfs://external_storage/sales_data/sales_table
+            ```
     """
     name: str = "READ_METADATA_YAML_ACTION"
-    @staticmethod
     def run(
+        self,
         context: PipelineContext,
         *,
-        path: str | None = None,
-        file_name: str | None = None,
-        table_name: str | None = None,
+        file_path: str | None = None,
+        catalog_name: str | None = None,
+        schema_name: str | None = None,
+        storage_path: str | None = None,
         **_: Any,
     ) -> PipelineContext:
-        """Reads schema metadata from a yaml file using the [`Schema`][cloe_nessy.models.schema] model.
+        """Reads table metadata from a yaml file using the [`Table`][cloe_nessy.models.table] model.
         Args:
             context: The context in which this Action is executed.
-            path: The path to the data contract directory.
-            file_name: The name of the file that defines the schema.
-            table_name: The name of the table for which to retrieve metadata.
+            file_path: The path to the file that defines the table.
+            catalog_name: The name of the catalog for the table.
+            schema_name: The name of the schema for the table.
+            storage_path: The storage path for the table, if applicable. If not
+                provided, the table will be considered a managed table.
         Raises:
-            ValueError: If any issues occur while reading the schema, such as an invalid schema,
-                missing file, or missing path.
+            ValueError: If any issues occur while reading the table metadata, such as an invalid table,
+                missing file, missing path, or missing catalog/schema names.
         Returns:
             The context after the execution of this Action, containing the table metadata.
         """
-        if not path:
-            raise ValueError("No path provided. Please specify path to schema metadata.")
-        if not file_name:
-            raise ValueError("No file_name provided. Please specify file name.")
-        if not table_name:
-            raise ValueError("No table_name provided. Please specify table name.")
+        missing_params = []
+        if not file_path:
+            missing_params.append("file_path")
+        if not catalog_name:
+            missing_params.append("catalog_name")
+        if not schema_name:
+            missing_params.append("schema_name")
-        path_obj = pathlib.Path(path)
+        if missing_params:
+            raise ValueError(
+                f"Missing required parameters: {', '.join(missing_params)}. Please specify all required parameters."
+            )
-        schema, errors = Schema.read_instance_from_file(path_obj / file_name)
+        final_file_path = Path(file_path) if file_path else Path()
+        table, errors = Table.read_instance_from_file(
+            final_file_path,
+            catalog_name=catalog_name,
+            schema_name=schema_name,
+        )
         if errors:
-            raise ValueError(f"Errors while reading schema metadata: {errors}")
-        if not schema:
-            raise ValueError("No schema found in metadata.")
+            raise ValueError(f"Errors while reading table metadata: {errors}")
+        if not table:
+            raise ValueError("No table found in metadata.")
-        table = schema.get_table_by_name(table_name=table_name)
+        if not table.storage_path and storage_path:
+            self._console_logger.info(f"Setting storage path for table [ '{table.name}' ] to [ '{storage_path}' ]")
+            table.storage_path = storage_path
+            table.is_external = True
+        self._console_logger.info(f"Table [ '{table.name}' ] metadata read successfully from [ '{file_path}' ]")
         return context.from_existing(table_metadata=table)

cloe_nessy/pipeline/actions/transform_join.py CHANGED Viewed

@@ -1,5 +1,7 @@
 from typing import Any
+from pyspark.sql import functions as F
 from ..pipeline_action import PipelineAction
 from ..pipeline_context import PipelineContext
 from ..pipeline_step import PipelineStep
@@ -13,20 +15,74 @@ class TransformJoinAction(PipelineAction):
     from [PySpark
     documentation](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.join.html)
-    Example:
-        ```yaml
-        Join Tables:
-            action: TRANSFORM_JOIN
-            options:
-                joined_data: ((step:Transform First Table))
-                join_on: id
-                how: anti
-        ```
+    Examples:
+        === "Simple Column Join"
+            ```yaml
+            Join Tables:
+                action: TRANSFORM_JOIN
+                options:
+                    joined_data: ((step:Transform First Table))
+                    join_on: id
+                    how: inner
+            ```
+        === "Multiple Columns Join"
+            ```yaml
+            Join Tables:
+                action: TRANSFORM_JOIN
+                options:
+                    joined_data: ((step:Transform First Table))
+                    join_on: [customer_id, order_date]
+                    how: left
+            ```
+        === "Dictionary Join (Different Column Names)"
+            ```yaml
+            Join Tables:
+                action: TRANSFORM_JOIN
+                options:
+                    joined_data: ((step:Transform First Table))
+                    join_on:
+                        customer_id: cust_id
+                        order_date: date
+                    how: inner
+            ```
+        === "Complex Join with Literals and Expressions"
+            ```yaml
+            Join Tables:
+                action: TRANSFORM_JOIN
+                options:
+                    joined_data: ((step:Load Conditions Table))
+                    join_condition: |
+                        left.material = right.material
+                        AND right.sales_org = '10'
+                        AND right.distr_chan = '10'
+                        AND right.knart = 'ZUVP'
+                        AND right.lovmkond <> 'X'
+                        AND right.sales_unit = 'ST'
+                        AND left.calday BETWEEN
+                            to_date(right.date_from, 'yyyyMMdd') AND
+                            to_date(right.date_to, 'yyyyMMdd')
+                    how: left
+            ```
         !!! note "Referencing a DataFrame from another step"
             The `joined_data` parameter is a reference to the DataFrame from another step.
             The DataFrame is accessed using the `result` attribute of the PipelineStep. The syntax
             for referencing the DataFrame is `((step:Step Name))`, mind the double parentheses.
+        !!! tip "Dictionary Join Syntax"
+            When using a dictionary for `join_on`, the keys represent columns
+            from the DataFrame in context and the values represent columns from
+            the DataFrame in `joined_data`. This is useful when joining tables
+            with different column names for the same logical entity.
+        !!! tip "Complex Join Conditions"
+            Use `join_condition` instead of `join_on` for complex joins with literals,
+            expressions, and multiple conditions. Reference columns using `left.column_name`
+            for the main DataFrame and `right.column_name` for the joined DataFrame.
+            Supports all PySpark functions and operators.
     """
     name: str = "TRANSFORM_JOIN"
@@ -37,6 +93,7 @@ class TransformJoinAction(PipelineAction):
         *,
         joined_data: PipelineStep | None = None,
         join_on: list[str] | str | dict[str, str] | None = None,
+        join_condition: str | None = None,
         how: str = "inner",
         **_: Any,
     ) -> PipelineContext:
@@ -49,13 +106,17 @@ class TransformJoinAction(PipelineAction):
             join_on: A string for the join column
                 name, a list of column names, or a dictionary mapping columns from the
                 left DataFrame to the right DataFrame. This defines the condition for the
-                join operation.
+                join operation. Mutually exclusive with join_condition.
+            join_condition: A string containing a complex join expression with literals,
+                functions, and multiple conditions. Use 'left.' and 'right.' prefixes
+                to reference columns from respective DataFrames. Mutually exclusive with join_on.
             how: The type of join to perform. Must be one of: inner, cross, outer,
                 full, fullouter, left, leftouter, right, rightouter, semi, anti, etc.
         Raises:
             ValueError: If no joined_data is provided.
-            ValueError: If no join_on is provided.
+            ValueError: If neither join_on nor join_condition is provided.
+            ValueError: If both join_on and join_condition are provided.
             ValueError: If the data from context is None.
             ValueError: If the data from the joined_data is None.
@@ -64,8 +125,12 @@ class TransformJoinAction(PipelineAction):
         """
         if joined_data is None or joined_data.result is None or joined_data.result.data is None:
             raise ValueError("No joined_data provided.")
-        if not join_on:
-            raise ValueError("No join_on provided.")
+        if not join_on and not join_condition:
+            raise ValueError("Either join_on or join_condition must be provided.")
+        if join_on and join_condition:
+            raise ValueError("Cannot specify both join_on and join_condition. Use one or the other.")
         if context.data is None:
             raise ValueError("Data from the context is required for the operation.")
@@ -73,16 +138,25 @@ class TransformJoinAction(PipelineAction):
         df_right = joined_data.result.data.alias("right")  # type: ignore
         df_left = context.data.alias("left")  # type: ignore
-        if isinstance(join_on, str):
-            join_condition = [join_on]
-        elif isinstance(join_on, list):
-            join_condition = join_on
-        else:
-            join_condition = [
-                df_left[left_column] == df_right[right_column]  # type: ignore
-                for left_column, right_column in join_on.items()
-            ]
-        df = df_left.join(df_right, on=join_condition, how=how)  # type: ignore
+        if join_condition:
+            try:
+                condition = F.expr(join_condition)
+            except Exception as e:
+                # this will not raise an error in most cases, because the evaluation of the expression is lazy
+                raise ValueError(f"Failed to parse join condition '{join_condition}': {str(e)}") from e
+            df = df_left.join(df_right, on=condition, how=how)  # type: ignore
+        if join_on:
+            if isinstance(join_on, str):
+                join_condition_list = [join_on]
+            elif isinstance(join_on, list):
+                join_condition_list = join_on
+            else:
+                join_condition_list = [
+                    df_left[left_column] == df_right[right_column]  # type: ignore
+                    for left_column, right_column in join_on.items()
+                ]
+            df = df_left.join(df_right, on=join_condition_list, how=how)  # type: ignore
         return context.from_existing(data=df)  # type: ignore

cloe_nessy/pipeline/actions/transform_union.py CHANGED Viewed

@@ -22,8 +22,8 @@ class TransformUnionAction(PipelineAction):
             action: TRANSFORM_UNION
             options:
                 union_data:
-                    - ((step: Filter First Table))
-                    - ((step: SQL Transform Second Table))
+                    - ((step:Filter First Table))
+                    - ((step:SQL Transform Second Table))
         ```
         !!! note "Referencing a DataFrame from another step"
             The `union_data` parameter is a reference to the DataFrame from another step.

cloe_nessy/pipeline/actions/write_catalog_table.py CHANGED Viewed

@@ -2,6 +2,7 @@ from typing import Any
 from ...integration.delta_loader import consume_delta_load
 from ...integration.writer import CatalogWriter
+from ...object_manager import TableManager
 from ..pipeline_action import PipelineAction
 from ..pipeline_context import PipelineContext
@@ -9,17 +10,31 @@ from ..pipeline_context import PipelineContext
 class WriteCatalogTableAction(PipelineAction):
     """Writes a DataFrame to a specified catalog table using [CatalogWriter][cloe_nessy.integration.writer.CatalogWriter].
-    Example:
-        ```yaml
-        Write Table to Catalog:
-            action: WRITE_CATALOG_TABLE
-            options:
-                table_identifier: my_catalog.business_schema.sales_table
-                mode: append
-                partition_by: day
+    Examples:
+        === "Batch Write"
+            ```yaml
+            Write Table to Catalog:
+                action: WRITE_CATALOG_TABLE
                 options:
-                    mergeSchema: true
-        ```
+                    table_identifier: my_catalog.business_schema.sales_table
+                    mode: append
+                    partition_by: day
+                    options:
+                        mergeSchema: true
+            ```
+        === "Streaming Write"
+            ```yaml
+            Write Table to Catalog Stream:
+                action: WRITE_CATALOG_TABLE
+                options:
+                    table_identifier: my_catalog.business_schema.sales_table
+                    mode: append
+                    checkpoint_location: /path/to/checkpoint
+                    trigger_dict:
+                        processingTime: 10 seconds
+                    options:
+                        mergeSchema: true
+            ```
     """
     name: str = "WRITE_CATALOG_TABLE"
@@ -32,6 +47,9 @@ class WriteCatalogTableAction(PipelineAction):
         mode: str = "append",
         partition_by: str | list[str] | None = None,
         options: dict[str, str] | None = None,
+        checkpoint_location: str | None = None,
+        trigger_dict: dict | None = None,
+        await_termination: bool = False,
         **_: Any,
     ) -> PipelineContext:
         """Writes a DataFrame to a specified catalog table.
@@ -44,7 +62,11 @@ class WriteCatalogTableAction(PipelineAction):
             mode: The write mode. One of 'append', 'overwrite', 'error',
                 'errorifexists', or 'ignore'.
             partition_by: Names of the partitioning columns.
-            options: PySpark options for the DataFrame.saveAsTable operation (e.g. mergeSchema:true).
+            checkpoint_location: Location for checkpointing.
+            trigger_dict: A dictionary specifying the trigger configuration for the streaming query.
+            await_termination: If True, the function will wait for the streaming
+                query to finish before returning.
+            options: Additional options for the DataFrame write operation.
         Raises:
             ValueError: If the table name is not specified or cannot be inferred from
@@ -55,25 +77,48 @@ class WriteCatalogTableAction(PipelineAction):
         """
         if not options:
             options = dict()
-        if partition_by is None:
-            if hasattr(context.table_metadata, "partition_by"):
-                partition_by = context.table_metadata.partition_by  # type: ignore
+        streaming = context.runtime_info and context.runtime_info.get("streaming")
+        if streaming and not checkpoint_location:
+            raise ValueError("Checkpoint location must be specified for streaming writes.")
+        if (
+            partition_by is None
+            and context.table_metadata is not None
+            and hasattr(context.table_metadata, "partition_by")
+            and not context.table_metadata.liquid_clustering
+        ):
+            partition_by = context.table_metadata.partition_by  # type: ignore
         if (table_metadata := context.table_metadata) and table_identifier is None:
             table_identifier = table_metadata.identifier
         if table_identifier is None:
             raise ValueError("Table name must be specified or a valid Table object with identifier must be set.")
+        if table_metadata:
+            manager = TableManager()
+            manager.create_table(table=table_metadata, ignore_if_exists=True, replace=False)
         runtime_info = getattr(context, "runtime_info", None)
         if runtime_info and runtime_info.get("is_delta_load"):
             consume_delta_load(runtime_info)
         writer = CatalogWriter()
-        writer.write_table(
-            df=context.data,  # type: ignore
-            table_identifier=table_identifier,
-            mode=mode,
-            partition_by=partition_by,
-            options=options,
-        )
+        if streaming:
+            writer.write_stream(
+                df=context.data,  # type: ignore
+                table_identifier=table_identifier,
+                checkpoint_location=checkpoint_location,
+                trigger_dict=trigger_dict,
+                options=options,
+                mode=mode,
+                await_termination=await_termination,
+            )
+        else:
+            writer.write(
+                df=context.data,  # type: ignore
+                table_identifier=table_identifier,
+                mode=mode,
+                partition_by=partition_by,
+                options=options,
+            )
         return context.from_existing()

cloe_nessy/pipeline/actions/write_delta_merge.py CHANGED Viewed

@@ -117,6 +117,7 @@ class WriteDeltaMergeAction(PipelineAction):
         delta_merge_writer.write(
             table_identifier=context.table_metadata.identifier,
+            table=context.table_metadata,
             storage_path=str(context.table_metadata.storage_path),
             data_frame=context.data,
             key_columns=key_columns,

{cloe_nessy-0.3.17.0.dist-info → cloe_nessy-0.3.18.dist-info}/METADATA RENAMED Viewed

@@ -1,36 +1,36 @@
 Metadata-Version: 2.4
 Name: cloe-nessy
-Version: 0.3.17.0
+Version: 0.3.18
 Summary: Your friendly datalake monster.
+Project-URL: homepage, https://initions.com/
 Author-email: initions <ICSMC_EXT_PYPIORG@accenture.com>
 License: MIT
-Project-URL: homepage, https://initions.com/
 Classifier: Development Status :: 5 - Production/Stable
 Classifier: Environment :: Console
-Classifier: License :: OSI Approved :: MIT License
 Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
 Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python :: 3
 Classifier: Topic :: Database
 Requires-Python: <3.13,>=3.11
-Description-Content-Type: text/markdown
-Requires-Dist: pydantic<3.0.0,>=2.7.2
-Requires-Dist: pyyaml<7.0.0,>=6.0.1
-Requires-Dist: types-pyyaml<7.0.0.0,>=6.0.12.20240311
-Requires-Dist: jinja2<4.0.0,>=3.1.4
-Requires-Dist: pydantic-settings<3.0.0,>=2.4.0
-Requires-Dist: openpyxl<4.0.0,>=3.1.5
-Requires-Dist: requests<3.0.0,>=2.32.3
-Requires-Dist: types-requests<3.0.0.0,>=2.32.0.20240712
-Requires-Dist: pandas-stubs<3.0.0.0,>=2.2.2.240807
 Requires-Dist: azure-identity<2.0.0,>=1.19.0
-Requires-Dist: httpx<1.0.0,>=0.27.2
+Requires-Dist: cloe-logging[databricks,log-analytics]<0.4,>=0.3.8
 Requires-Dist: databricks-sdk<1.0.0,>=0.36.0
-Requires-Dist: networkx<4.0,>=3.3
+Requires-Dist: fsspec<2025.7.1,>=2025.7.0
+Requires-Dist: httpx<1.0.0,>=0.27.2
+Requires-Dist: jinja2<4.0.0,>=3.1.4
 Requires-Dist: matplotlib<4.0.0,>=3.9.2
+Requires-Dist: networkx<4.0,>=3.3
+Requires-Dist: openpyxl<4.0.0,>=3.1.5
+Requires-Dist: pandas-stubs<3.0.0.0,>=2.2.2.240807
+Requires-Dist: pydantic-settings<3.0.0,>=2.4.0
+Requires-Dist: pydantic<3.0.0,>=2.7.2
+Requires-Dist: pyyaml<7.0.0,>=6.0.1
+Requires-Dist: requests<3.0.0,>=2.32.3
 Requires-Dist: types-networkx<4.0.0.0,>=3.2.1.20240820
-Requires-Dist: fsspec<2025.7.1,>=2025.7.0
-Requires-Dist: cloe-logging[databricks,log-analytics]<0.4,>=0.3.8
+Requires-Dist: types-pyyaml<7.0.0.0,>=6.0.12.20240311
+Requires-Dist: types-requests<3.0.0.0,>=2.32.0.20240712
+Description-Content-Type: text/markdown
 # cloe-nessy

{cloe_nessy-0.3.17.0.dist-info → cloe_nessy-0.3.18.dist-info}/RECORD RENAMED Viewed

@@ -19,7 +19,7 @@ cloe_nessy/file_utilities/strategies/utils_strategy.py,sha256=urayKfOUpSaXKgTs1K
 cloe_nessy/integration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 cloe_nessy/integration/delta_loader/__init__.py,sha256=ZdBDde1uPtTCL_KAhilVmtVmmGvH5dHb05QsOozkteE,438
 cloe_nessy/integration/delta_loader/delta_load_options.py,sha256=bbPGhC0n8L6CmcmV91Xqq6fWRimxlUHUkr22uVqG0g4,1363
-cloe_nessy/integration/delta_loader/delta_loader.py,sha256=D5oOvVLWRwl0z0iQScXVOapErAl6Z5Kt3qXedchgq0s,6878
+cloe_nessy/integration/delta_loader/delta_loader.py,sha256=WOl44Udvo6hZ5PVFgabpehs8tt5nl9AYyDnnYBba5Ck,6872
 cloe_nessy/integration/delta_loader/delta_loader_factory.py,sha256=vB1cL6-Nc3SkLH1xtazMbMF1MnNYq8-g3GHZzRE3QmE,2251
 cloe_nessy/integration/delta_loader/delta_loader_metadata_table.py,sha256=G_EWUY76ZlbsPZB9LCGlOLVezk7DK6peYXEgt7-sTQE,1683
 cloe_nessy/integration/delta_loader/strategies/__init__.py,sha256=1o5fRWenL5KnUg1hf7kmTuTpG9pbMxchiQTub52Qvwo,255
@@ -27,18 +27,18 @@ cloe_nessy/integration/delta_loader/strategies/delta_cdf_loader.py,sha256=FOOZqt
 cloe_nessy/integration/delta_loader/strategies/delta_timestamp_loader.py,sha256=YYFH0DkdRPvITUc1JMgkmgIHjwDyZDCjqvEk2qhBMfE,6185
 cloe_nessy/integration/reader/__init__.py,sha256=J5vlORqHLBpHEvzIwfIjzN5xEdOat-8jlmdLcGj8nsA,239
 cloe_nessy/integration/reader/api_reader.py,sha256=3Mf-txOTJ1dXCzdNtRTLC8UKftKms4NxOoLVgzcc2eo,5691
-cloe_nessy/integration/reader/catalog_reader.py,sha256=7jFuqIPpuz03opULh2I0TCLPfW6AqkxjaW2kCc0oM1g,3292
+cloe_nessy/integration/reader/catalog_reader.py,sha256=w-oUHpyiIwJppa-BW5E_HaMxpNgVWaCQVNSTvuEr9qA,4815
 cloe_nessy/integration/reader/excel_reader.py,sha256=8KCqKBYFE6RGCiahJimQOAtbYZzaUzlnoslW9yca5P8,8035
 cloe_nessy/integration/reader/exceptions.py,sha256=_A9jFpe_RIDZCGY76qzjic9bsshxns6yXPSl141dq1c,203
 cloe_nessy/integration/reader/file_reader.py,sha256=Za_DZKUq1vATp8kIS8uY9IDHiaReZO0k80rrPHAhi5A,8132
 cloe_nessy/integration/reader/reader.py,sha256=e2KVPePQme8SBQJEbL-3zpGasOgTiEvKFTslow2wGPw,1034
 cloe_nessy/integration/writer/__init__.py,sha256=3yzCAGiWZdQWtsbzlTih01sxVTJV2DDYwvl34lEAUlE,243
-cloe_nessy/integration/writer/catalog_writer.py,sha256=Gb-hMdADgO_uUJ7mZPHBYyNme2qXsdFFnzwo7GcShHM,2192
+cloe_nessy/integration/writer/catalog_writer.py,sha256=Z26FOL3D9KK6I7Y3rgl4c88rToKZnVXlELTYH2xQsHY,5289
 cloe_nessy/integration/writer/file_writer.py,sha256=SUDbN13ZzDhbM8DpOGFgM_Gkg70To4L6Q182pXx2HRM,5454
 cloe_nessy/integration/writer/writer.py,sha256=elFPLFrWR-qVE9qnBtzzzhyRALLQcRVuOsPS0rNmRt4,1741
 cloe_nessy/integration/writer/delta_writer/__init__.py,sha256=h2CT6Hllmk0nodlek27uqwniCzVZKMkYcPGyG9K2Z24,164
 cloe_nessy/integration/writer/delta_writer/delta_append_writer.py,sha256=TbpW-j87_H9dcUza34uR6VWslJez406y3_5N1ip0SnM,4740
-cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py,sha256=no2GOLqMAJd0fEy2mqMevMj_CvutcJPRmXJC2tD4icA,10112
+cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py,sha256=Yp_q_ycasW2_wwmzty_6fZeBVcW_0o8gLrr6F1gaUjQ,10195
 cloe_nessy/integration/writer/delta_writer/delta_table_operation_type.py,sha256=m4YFY9_WgaOcnpBviVt3Km-w3wf3NF25wPS-n0NBGcE,970
 cloe_nessy/integration/writer/delta_writer/delta_writer_base.py,sha256=upUtDZMzwYFU0kzmkelVgkpFToXkrypcR3h_jvGjz14,8596
 cloe_nessy/integration/writer/delta_writer/exceptions.py,sha256=xPmGiYV0xQXauln5Oh34E5vbm0rVcs6xCh-SJSb2bw0,107
@@ -46,10 +46,10 @@ cloe_nessy/logging/__init__.py,sha256=ySVCVbdyR3Dno_tl2ZfiER_7EVaDoQMHVkNyfdMZum
 cloe_nessy/logging/logger_mixin.py,sha256=H8MyMEyb_kEDP0Ow5QStAFLuOkTIeUnneGaj916fKlU,7443
 cloe_nessy/models/__init__.py,sha256=-FmWEJ1Oq1njSopjc0R7GmT64mLSmALkm8PkHNzy9Y8,327
 cloe_nessy/models/catalog.py,sha256=ayC1sMp4cNLAZtu0ICVV3Us6-o4hn8U9tpzzvxC9RAs,177
-cloe_nessy/models/column.py,sha256=t-MX9GMs7l5W0APvsUxiE1TI9SWkKdFKblmz24s4IHY,1995
+cloe_nessy/models/column.py,sha256=W4V1Ls1d60VyZ1Ko9Yu9eSipcMbxSzKicn0aloHPiR0,2027
 cloe_nessy/models/constraint.py,sha256=hsFlhn4n928z81O3dl3v5bMetewPWzMjkJK3_4kASSM,178
 cloe_nessy/models/foreign_key.py,sha256=DwRVHs9sShqqPV-NL7ow_3AmPPWX0Od26yZn_I565pU,1001
-cloe_nessy/models/schema.py,sha256=yUrjjEhAH5zbCymE67Az_jPnVB8hGO-_UNfqzeZCD_Y,3376
+cloe_nessy/models/schema.py,sha256=cNSrH7K4hLRrkg1E6fW6DUIBMZdR2A5B21POj5iQ4GA,3429
 cloe_nessy/models/table.py,sha256=3AUBUKLJv1x-xN9KYc5Ndjf-lAlT83rUYdhRKy8wFU4,12105
 cloe_nessy/models/types.py,sha256=XRbuJGdTNa6aXyE3IAzs_J9gVjbfkzMDLfGl-k6jI_4,223
 cloe_nessy/models/volume.py,sha256=51BE06FrL1Wv6zblFwJ_HTiR6WQqH7pSmrdH90rqwLg,2444
@@ -58,10 +58,10 @@ cloe_nessy/models/adapter/unity_catalog_adapter.py,sha256=a-14Ys-AevVYQd0xeJU1sy
 cloe_nessy/models/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 cloe_nessy/models/mixins/read_instance_mixin.py,sha256=j5Y4aNWOh1jlskEaxNooZFJgPyxRmik00gAVLJnAaRs,4507
 cloe_nessy/models/mixins/template_loader_mixin.py,sha256=5MXhEGBFlq3dwZvINEyBowSlipNnVun2H_TmhI_fsS4,549
-cloe_nessy/models/templates/create_table.sql.j2,sha256=QWbiTXwmGaIlZUAIGL4pAlHkDbP9mq1vGAkdKCPOqm4,1669
+cloe_nessy/models/templates/create_table.sql.j2,sha256=71JpUyUZ_ZYO2M0tfIrTXHR7JycypAGsELt2-2d3oO0,2479
 cloe_nessy/models/templates/create_volume.sql.j2,sha256=XIUf1cHcvAxcGTyhzUiv4xpQ1cfDw_ra3_FKmOuLoBs,289
 cloe_nessy/object_manager/__init__.py,sha256=3sle0vNpPwBOkycxA3XVS9m4XZf5LD3Qd4NGxdqcHno,186
-cloe_nessy/object_manager/table_manager.py,sha256=m6u_KFYCPoqq1hagwt3s7eQopjV2oOJNlmXDVAfku-k,12703
+cloe_nessy/object_manager/table_manager.py,sha256=4eQG-zMiuBpeJmvWdL3KdhHRiPFf8TS0RFNRp8Yz6rY,13887
 cloe_nessy/object_manager/volume_manager.py,sha256=6epd3KXzcNH04EvaKubAfLsaUm9qBMeT3KNvMK04gGs,2727
 cloe_nessy/pipeline/__init__.py,sha256=sespmJ5JsgyiFyZiedTiL2kg--zGIX7cjTYsD5vemEg,325
 cloe_nessy/pipeline/pipeline.py,sha256=L4wk3b06LNWRj01nnAkuQpeRrwFTyaV1xTpgYAg4sak,10819
@@ -73,10 +73,10 @@ cloe_nessy/pipeline/pipeline_plotting_service.py,sha256=goMQj73FzUVchKn5c2SsPcWR
 cloe_nessy/pipeline/pipeline_step.py,sha256=UlnmpS6gm_dZ7m9dD1mZvye7mvUF_DA7HjOZo0oGYDU,1977
 cloe_nessy/pipeline/actions/__init__.py,sha256=RZ1UVSn9v88F4GKgHy6UYDzx8zSAMQScJLCeiHO5f8A,2802
 cloe_nessy/pipeline/actions/read_api.py,sha256=RBv5XeHtjTXuCP09Fqo6JNx6iIhQQI-nuAHCuSaGs2s,7778
-cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=TBlJaXJAQwLtwvh7dXsX9ebNN3rS6En6951MnT8xGG8,4101
+cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=EkP3JSI7VQMkvUsb97ieUeGnnfvyyUI7egvqNWMqK0I,6894
 cloe_nessy/pipeline/actions/read_excel.py,sha256=Mhl3r_2Hqk2XN7Fl5WqqAyE4JdnwSiivbhWMglyBtkE,7961
 cloe_nessy/pipeline/actions/read_files.py,sha256=hRcM7wG35vxxLVajW3SK5euHW02qxiXCYSkIl11xiQ0,7308
-cloe_nessy/pipeline/actions/read_metadata_yaml.py,sha256=3ZDy9qiDYtM1oDQzHPC23hLOvHjhdk5zg1wVHE60m9k,2295
+cloe_nessy/pipeline/actions/read_metadata_yaml.py,sha256=i8fQceV63eAqx_x0ANisCkXWfMHyhqsfFHVFH5yP2po,3544
 cloe_nessy/pipeline/actions/transform_change_datatype.py,sha256=24Tn6R3TvUkWCh8V6naLdyNbCbqvyPOOoer-hy_Ebq4,2077
 cloe_nessy/pipeline/actions/transform_clean_column_names.py,sha256=VxvWqENW63c50L96JA1V_ioe4By6gGzx_iY86njOXEM,3044
 cloe_nessy/pipeline/actions/transform_concat_columns.py,sha256=Nk8YbhxDnFZsWzW9Dj5Yl76Uq6VrcMlevQPHGms65L8,3777
@@ -88,15 +88,15 @@ cloe_nessy/pipeline/actions/transform_filter.py,sha256=Nz_ggRfKIcNzYFfFOsgq1Qeat
 cloe_nessy/pipeline/actions/transform_generic_sql.py,sha256=_naWfmPdYAUKjPNeHu5qJAohOL7DHCSYz_kwoeRv3OI,2741
 cloe_nessy/pipeline/actions/transform_group_aggregate.py,sha256=KUHeeP-RIDi34dpbsPEJkzea5zFJA6MuyjNpOsFud9o,4045
 cloe_nessy/pipeline/actions/transform_hash_columns.py,sha256=H8j_Xadnm3npVNA_nu7Be7v0bJV20ELKMxSsVHHl6CY,8407
-cloe_nessy/pipeline/actions/transform_join.py,sha256=e_tvMk8YJTAWcUK_EmOgNt0s31ICZoMX_MKOTWx4lBY,3645
+cloe_nessy/pipeline/actions/transform_join.py,sha256=ez1M1wVc9khOZj1swMArJbBKXxEpjenUHrW1wL8H330,7200
 cloe_nessy/pipeline/actions/transform_json_normalize.py,sha256=petF7pnNq1EKc8MqVdG0weFALAHNILSe_eAu4Z5XxIo,4833
 cloe_nessy/pipeline/actions/transform_rename_columns.py,sha256=4zJcPCONMU4C67qeuzsrX3AORRRHoq_selUI7FJyeg0,1952
 cloe_nessy/pipeline/actions/transform_replace_values.py,sha256=1OPHTrjcphfyGepcO7ozYfeqfwA18pjlyHpVKUS_AAU,2049
 cloe_nessy/pipeline/actions/transform_select_columns.py,sha256=-GhSEsb7iNnZIsYRm3BG9BX4_qUDJMbpj1DsKPY046w,4574
-cloe_nessy/pipeline/actions/transform_union.py,sha256=s81Vge0AbYPc7VkskCYfOQ_LEjqcmfNFyDkytfjcZyo,2720
-cloe_nessy/pipeline/actions/write_catalog_table.py,sha256=vZ7bZcrZY47P_EVYNshMNZ34l7Orhs8Q9--5Ud5hhLI,2906
+cloe_nessy/pipeline/actions/transform_union.py,sha256=SZtEzh567CIExUj9yMEgshE28h4dXKT7Wr2TDj4zB4k,2718
+cloe_nessy/pipeline/actions/write_catalog_table.py,sha256=FyC0scQU8Ul3Uigpk6IN2IJpf_4jRjAqF5yHtDVwG00,4852
 cloe_nessy/pipeline/actions/write_delta_append.py,sha256=2F5qnKPsY_F-2672Ce4Gub7qdna157jEqHHc429fO2A,2962
-cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=kQE4xLbVEUnpYImZLnpZxp88Tuf6VNSeU1W-zI8Wuvw,5805
+cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=zcOk4ytZFUxyGY8U2fdFPLFnw2g_yhaS_vOx_e3wCuE,5847
 cloe_nessy/pipeline/actions/write_file.py,sha256=JZ8UZslxUn_ttYt5wDyvtHFq2FqYk3vOR8kvExJI8pk,3212
 cloe_nessy/pipeline/utils/__init__.py,sha256=xi02UjBMiXWD7b9gDvww4gyRyowb0eRd_6Wbu0F_cro,118
 cloe_nessy/pipeline/utils/delta_load_utils.py,sha256=KitMNruxePEkecI0h4Jint1JwJpaEog5mCOchMkgan8,1495
@@ -107,7 +107,6 @@ cloe_nessy/settings/settings.py,sha256=I4n129lrujriW-d8q4as2Kb4_kI932ModfZ5Ow_Up
 cloe_nessy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 cloe_nessy/utils/column_names.py,sha256=dCNtm61mc5aLkY2oE4rlfN3VLCrpot6fOESjAZmCmhA,361
 cloe_nessy/utils/file_and_directory_handler.py,sha256=r2EVt9xG81p6ScaJCwETC5an6pMT6WseB0jMOR-JlpU,602
-cloe_nessy-0.3.17.0.dist-info/METADATA,sha256=hR0GqdboYwzBrbZY_ese9kt250DIOHgMlAj3QOqLhF8,3292
-cloe_nessy-0.3.17.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-cloe_nessy-0.3.17.0.dist-info/top_level.txt,sha256=Z7izn8HmQpg2wBUb-0jzaKlYKMU7Ypzuc9__9vPtW_I,11
-cloe_nessy-0.3.17.0.dist-info/RECORD,,
+cloe_nessy-0.3.18.dist-info/METADATA,sha256=Sc5JD6FrXR1GwPA9VHv4guNxs-hPHa9GBZz31zOQbL8,3290
+cloe_nessy-0.3.18.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+cloe_nessy-0.3.18.dist-info/RECORD,,

{cloe_nessy-0.3.17.0.dist-info → cloe_nessy-0.3.18.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,4 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.9.0)
+Generator: hatchling 1.27.0
 Root-Is-Purelib: true
 Tag: py3-none-any

cloe_nessy-0.3.17.0.dist-info/top_level.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- cloe_nessy

cloe-nessy 0.3.17.0__py3-none-any.whl → 0.3.18__py3-none-any.whl

cloe-nessy 0.3.17.0py3-none-any.whl → 0.3.18py3-none-any.whl