PyPI - cloe-nessy - Versions diffs - 0.3.19__py3-none-any.whl → 1.0.1__py3-none-any.whl - Mend

cloe-nessy 0.3.19py3-none-any.whl → 1.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

cloe_nessy/file_utilities/get_file_paths.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
+from typing import Any
 from ..logging.logger_mixin import LoggerMixin
 from .factory import FileRetrievalFactory
@@ -9,7 +10,7 @@ def get_file_paths(
     location: str,
     file_name_pattern: str | None = None,
     search_subdirs: bool = True,
-    **kwargs,
+    **kwargs: Any,
 ) -> list[str]:
     """Retrieves file paths from a specified location based on the provided criteria.

cloe_nessy/file_utilities/strategies/base_strategy.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from abc import ABC, abstractmethod
+from typing import Any
 class FileRetrievalStrategy(ABC):
@@ -15,7 +16,7 @@ class FileRetrievalStrategy(ABC):
         location: str,
         extension: str | None = None,
         search_subdirs: bool = True,
-        **kwargs,
+        **kwargs: Any,
     ) -> list[str]:
         """Retrieves a list of file paths based on the specified criteria.

cloe_nessy/file_utilities/strategies/local_strategy.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
+from typing import Any
 from ..exceptions import FileUtilitiesError
 from .base_strategy import FileRetrievalStrategy
@@ -16,7 +17,7 @@ class LocalDirectoryStrategy(FileRetrievalStrategy):
         location: str,
         extension: str | None = None,
         search_subdirs: bool = True,
-        **kwargs,  # noqa: ARG004
+        **kwargs: Any,  # noqa: ARG004
     ) -> list[str]:
         """Recursively retrieves all files with a specified extension from a given directory and its subdirectories.

cloe_nessy/file_utilities/strategies/onelake_strategy.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from typing import Any
 from .base_strategy import FileRetrievalStrategy
 from .local_strategy import LocalDirectoryStrategy
@@ -10,7 +12,7 @@ class OneLakeStrategy(FileRetrievalStrategy):
         location: str,
         extension: str | None = None,
         search_subdirs: bool = True,
-        **kwargs,
+        **kwargs: Any,
     ) -> list:
         """Recursively retrieves all files with a specified extension from a given directory and its subdirectories.

cloe_nessy/file_utilities/strategies/utils_strategy.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from typing import Any
 from ...session import SessionManager
 from ..exceptions import FileUtilitiesError
 from .base_strategy import FileRetrievalStrategy
@@ -15,7 +17,7 @@ class UtilsStrategy(FileRetrievalStrategy):
         location: str,
         extension: str | None = None,
         search_subdirs: bool = True,
-        **kwargs,  # noqa: ARG004
+        **kwargs: Any,  # noqa: ARG004
     ) -> list:
         """Recursively retrieves all files with a specified extension from a given directory and its subdirectories.

cloe_nessy/integration/reader/excel_reader.py CHANGED Viewed

@@ -160,7 +160,7 @@ class ExcelDataFrameReader(BaseReader):
             "__metadata",
             F.create_map(
                 F.lit("timestamp"),
-                F.current_timestamp(),
+                F.current_timestamp().cast("string"),
                 F.lit("file_location"),
                 F.lit(location),
                 F.lit("sheet_name"),

cloe_nessy/integration/reader/file_reader.py CHANGED Viewed

@@ -192,7 +192,8 @@ class FileReader(BaseReader):
         """Add all metadata columns to the DataFrame."""
         metadata_columns = df.select("_metadata.*").columns
-        entries = [(F.lit(field), F.col(f"_metadata.{field}")) for field in metadata_columns]
+        # Cast all metadata values to strings to ensure type consistency in the map
+        entries = [(F.lit(field), F.col(f"_metadata.{field}").cast("string")) for field in metadata_columns]
         flat_list = [item for tup in entries for item in tup]
         df = df.withColumn("__metadata", F.create_map(flat_list))

cloe_nessy/integration/writer/delta_writer/delta_append_writer.py CHANGED Viewed

@@ -62,7 +62,7 @@ class DeltaAppendWriter(BaseDeltaWriter):
         trigger_dict: dict | None = None,
         options: dict[str, str] | None = None,
         await_termination: bool = False,
-    ):
+    ) -> None:
         """Appends the provided DataFrame to a Delta table.
         Args:

cloe_nessy/integration/writer/delta_writer/delta_writer_base.py CHANGED Viewed

@@ -151,7 +151,7 @@ class BaseDeltaWriter(BaseWriter, ABC):
         return " AND ".join([f"target.`{c}` <=> source.`{c}`" for c in columns])
     @staticmethod
-    def _partition_pruning_conditions(df, partition_cols: list[str] | None) -> str:
+    def _partition_pruning_conditions(df: "DataFrame", partition_cols: list[str] | None) -> str:
         """Generates partition pruning conditions for an SQL query.
         This function is used to optimize the performance of an SQL query by only scanning the

cloe_nessy/pipeline/__init__.py CHANGED Viewed

@@ -1,7 +1,15 @@
 from .pipeline import Pipeline
 from .pipeline_action import PipelineAction
+from .pipeline_builder import PipelineBuilder
 from .pipeline_context import PipelineContext
 from .pipeline_parsing_service import PipelineParsingService
 from .pipeline_step import PipelineStep
-__all__ = ["Pipeline", "PipelineParsingService", "PipelineContext", "PipelineAction", "PipelineStep"]
+__all__ = [
+    "Pipeline",
+    "PipelineBuilder",
+    "PipelineParsingService",
+    "PipelineContext",
+    "PipelineAction",
+    "PipelineStep",
+]

cloe_nessy/pipeline/actions/__init__.py CHANGED Viewed

@@ -19,10 +19,12 @@ from .transform_group_aggregate import TransformGroupAggregate
 from .transform_hash_columns import TransformHashColumnsAction
 from .transform_join import TransformJoinAction
 from .transform_json_normalize import TransformJsonNormalize
+from .transform_regex_extract import TransformRegexExtract
 from .transform_rename_columns import TransformRenameColumnsAction
 from .transform_replace_values import TransformReplaceValuesAction
 from .transform_select_columns import TransformSelectColumnsAction
 from .transform_union import TransformUnionAction
+from .transform_with_column import TransformWithColumnAction
 from .write_catalog_table import WriteCatalogTableAction
 from .write_delta_append import WriteDeltaAppendAction
 from .write_delta_merge import WriteDeltaMergeAction
@@ -55,9 +57,11 @@ __all__ = [
     "TransformGroupAggregate",
     "TransformJoinAction",
     "TransformJsonNormalize",
+    "TransformRegexExtract",
     "TransformRenameColumnsAction",
     "TransformReplaceValuesAction",
     "TransformSelectColumnsAction",
+    "TransformWithColumnAction",
     "WriteCatalogTableAction",
     "WriteDeltaAppendAction",
     "WriteDeltaMergeAction",

cloe_nessy/pipeline/actions/read_catalog_table.py CHANGED Viewed

@@ -96,8 +96,6 @@ class ReadCatalogTableAction(PipelineAction):
                 configuration for the streaming query, such as processing time or
                 continuous processing.
                 behavior, such as filters or reading modes. Defaults to None.
-            delta_load_options: Options for delta loading, if applicable.
-                Configures the [`DeltaLoader`][cloe_nessy.integration.delta_loader].
         Raises:
             ValueError: If neither `table_identifier` nor `table_metadata.identifier` in the `context` is provided.

cloe_nessy/pipeline/actions/transform_hash_columns.py CHANGED Viewed

@@ -25,7 +25,8 @@ class HashSettings(BaseModel):
     bits: int | None = Field(default=None, description="Only required for sha2")
     @model_validator(mode="before")
-    def validate_all(cls, values):
+    @classmethod
+    def validate_all(cls: type["HashSettings"], values: Any) -> Any:
         """Validates the input values for a hashing operation before model instantiation.
         This method performs the following checks:
@@ -91,7 +92,8 @@ class HashConfig(BaseModel):
     hash_config: dict[str, HashSettings]
     @model_validator(mode="before")
-    def validate_config(cls, values):
+    @classmethod
+    def validate_config(cls: type["HashConfig"], values: Any) -> Any:
         """Validates the hash configuration provided in the model.
         This method is executed in "before" mode to ensure that the `hash_config`

cloe_nessy/pipeline/actions/transform_regex_extract.py ADDED Viewed

@@ -0,0 +1,169 @@
+import re
+from typing import Any
+import pyspark.sql.functions as F
+from cloe_nessy.pipeline.pipeline_action import PipelineAction
+from cloe_nessy.pipeline.pipeline_context import PipelineContext
+class TransformRegexExtract(PipelineAction):
+    r"""Extract values from a specified column in a DataFrame using regex patterns.
+    This action extracts values from a column based on a regex pattern and stores
+    the result in a new column. Optionally, you can replace the matched pattern in
+    the original column with a different string, remove the original column, or add
+    a boolean column indicating which rows matched the pattern.
+    Example:
+        ```yaml
+        Extract Action:
+            action: TRANSFORM_REGEX_EXTRACT
+            options:
+                source_column_name: Email
+                extract_column_name: org_domain
+                pattern: (?<=@)([A-Za-z0-9-]+)
+                replace_by: exampledomain.org
+        ```
+    This action also supports processing multiple columns simultaneously. To use this
+    functionality, structure the configuration as a dictionary mapping each source
+    column name to its extraction parameters.
+    Example:
+        ```yaml
+        Extract Action:
+            action: TRANSFORM_REGEX_EXTRACT
+            options:
+                extract_columns:
+                    Name:
+                        pattern: (?<=\w+) (\w+)
+                        replace_by: ''
+                        extract_column_name: last_name
+                        match_info_column_name: has_last_name
+                    Email:
+                        pattern: @\w+\.\w+
+                        extract_column_name: domain
+                        keep_original_column: False
+        ```
+    """
+    name: str = "TRANSFORM_REGEX_EXTRACT"
+    def run(
+        self,
+        context: PipelineContext,
+        source_column_name: str = "",
+        extract_column_name: str = "",
+        pattern: str = "",
+        keep_original_column: bool = True,
+        replace_by: str = "",
+        match_info_column_name: str = "",
+        extract_columns: dict | None = None,
+        **_: Any,
+    ) -> PipelineContext:
+        """Performs a regex extract (and replace) on a specified column in a DataFrame.
+        This function performs a regex extract (and optionally a replace) on one or more columns.
+        Args:
+            context: The context in which this action is executed.
+            source_column_name: Column name to perform the regex replace on.
+            pattern: Regex pattern to match.
+            replace_by: String that should replace the extracted pattern in the source column.
+            extract_column_name: Column name to store the extract, default: <source_column_name>_extract
+            keep_original_column: Whether to keep the original column, default: True
+            match_info_column_name: Column name to store a boolean column whether a match was found, default: None
+            extract_columns: Dictionary of column names and their corresponding 1-column-case.
+        Raises:
+            ValueError: If any of the required arguments are not provided.
+            ValueError: If the regex pattern is invalid.
+        Returns:
+            PipelineContext: Transformed context with the modified DataFrame.
+        """
+        if context.data is None:
+            raise ValueError("Data from the context is required for the operation.")
+        if not extract_columns and not source_column_name:
+            raise ValueError("Either extract_columns or source_column_name must be provided.")
+        df = context.data
+        if source_column_name:
+            self._console_logger.info(f"Extracting from column '{source_column_name}' using pattern: {pattern}")
+            df = self._process_one_column(
+                df,
+                source_column_name,
+                pattern,
+                extract_column_name,
+                replace_by,
+                keep_original_column,
+                match_info_column_name,
+            )
+        elif isinstance(extract_columns, dict):
+            self._console_logger.info(f"Extracting from {len(extract_columns)} columns")
+            for one_source_column_name in extract_columns:
+                parameter_dict = self._get_default_dict() | extract_columns[one_source_column_name]
+                df = self._process_one_column(df, one_source_column_name, **parameter_dict)
+        else:
+            raise ValueError("extract_columns must be a dictionary. See documentation for proper format.")
+        return context.from_existing(data=df)
+    def _process_one_column(
+        self,
+        df,
+        source_column_name,
+        pattern,
+        extract_column_name,
+        replace_by,
+        keep_original_column,
+        match_info_column_name,
+    ):
+        # Extract the first captured group (group 0 is the entire match)
+        matched_group_id = 0
+        if not extract_column_name:
+            extract_column_name = f"{source_column_name}_extracted"
+        if not pattern:
+            raise ValueError(f"The regex pattern (pattern) for column {source_column_name} must be provided.")
+        # Validate regex pattern
+        try:
+            re.compile(pattern)
+        except re.error as e:
+            raise ValueError(f"Invalid regex pattern '{pattern}' for column {source_column_name}: {e}") from e
+        df = df.withColumn(extract_column_name, F.regexp_extract(source_column_name, pattern, matched_group_id))
+        if replace_by:
+            df = df.withColumn(source_column_name, F.regexp_replace(source_column_name, pattern, replace_by))
+        if match_info_column_name:
+            # Check if extraction is null or empty string
+            df = df.withColumn(
+                match_info_column_name,
+                F.when((F.col(extract_column_name).isNull()) | (F.col(extract_column_name) == ""), False).otherwise(
+                    True
+                ),
+            )
+        if not keep_original_column:
+            df = df.drop(source_column_name)
+        return df
+    def _get_default_dict(self) -> dict[str, Any]:
+        """Return default parameters for single column extraction."""
+        return {
+            "pattern": "",
+            "extract_column_name": "",
+            "replace_by": "",
+            "keep_original_column": True,
+            "match_info_column_name": "",
+        }

cloe_nessy/pipeline/actions/transform_with_column.py ADDED Viewed

@@ -0,0 +1,104 @@
+"""Transform action to add or update a column using a SQL expression."""
+from typing import Any
+from pyspark.sql import functions as F
+from cloe_nessy.pipeline.pipeline_action import PipelineAction
+from cloe_nessy.pipeline.pipeline_context import PipelineContext
+class TransformWithColumnAction(PipelineAction):
+    """Add or update a column in the DataFrame using a SQL expression.
+    This action uses PySpark's expr() function to evaluate SQL expressions and
+    create or update columns in the DataFrame.
+    Examples:
+        === "Create new column"
+            ```yaml
+            Create Full Name:
+                action: TRANSFORM_WITH_COLUMN
+                options:
+                    column_name: full_name
+                    expression: concat(first_name, ' ', last_name)
+            ```
+        === "Update existing column"
+            ```yaml
+            Lowercase Email:
+                action: TRANSFORM_WITH_COLUMN
+                options:
+                    column_name: email
+                    expression: lower(email)
+            ```
+        === "Calculated column"
+            ```yaml
+            Calculate Total:
+                action: TRANSFORM_WITH_COLUMN
+                options:
+                    column_name: total_price
+                    expression: price * quantity * (1 + tax_rate)
+            ```
+        === "Extract date parts"
+            ```yaml
+            Extract Year:
+                action: TRANSFORM_WITH_COLUMN
+                options:
+                    column_name: year
+                    expression: year(order_date)
+            ```
+    """
+    name: str = "TRANSFORM_WITH_COLUMN"
+    def run(
+        self,
+        context: PipelineContext,
+        *,
+        column_name: str = "",
+        expression: str = "",
+        **_: Any,
+    ) -> PipelineContext:
+        """Add or update a column using a SQL expression.
+        Args:
+            context: The pipeline context containing the DataFrame
+            column_name: Name of the column to create or update
+            expression: SQL expression to evaluate for the column value
+            **_: Additional unused keyword arguments
+        Returns:
+            PipelineContext: Updated context with the modified DataFrame
+        Raises:
+            ValueError: If column_name is not provided
+            ValueError: If expression is not provided
+            ValueError: If context.data is None
+            Exception: If the SQL expression is invalid
+        """
+        if not column_name:
+            raise ValueError("No column_name provided.")
+        if not expression:
+            raise ValueError("No expression provided.")
+        if context.data is None:
+            raise ValueError("Data from context is required for transform_with_column")
+        self._console_logger.info(f"Adding/updating column '{column_name}' with expression: {expression}")
+        df = context.data
+        try:
+            # Use F.expr() to evaluate the SQL expression
+            df = df.withColumn(column_name, F.expr(expression))
+        except Exception as e:
+            self._console_logger.error(f"Failed to evaluate expression '{expression}' for column '{column_name}': {e}")
+            raise
+        self._console_logger.info(f"Successfully added/updated column '{column_name}'")
+        return context.from_existing(data=df)

cloe_nessy/pipeline/actions/write_delta_append.py CHANGED Viewed

@@ -19,9 +19,6 @@ class WriteDeltaAppendAction(PipelineAction):
                 table_identifier: my_catalog.my_schema.my_table
                 ignore_empty_df: false
         ```
-    Returns:
-        None.
     """
     name: str = "WRITE_DELTA_APPEND"

cloe_nessy/pipeline/actions/write_delta_merge.py CHANGED Viewed

@@ -28,9 +28,6 @@ class WriteDeltaMergeAction(PipelineAction):
                 when_not_matched_insert: true
                 use_partition_pruning: true
         ```
-    Returns:
-        None.
     """
     name: str = "WRITE_DELTA_MERGE"

cloe_nessy/pipeline/pipeline_builder.py ADDED Viewed

@@ -0,0 +1,210 @@
+from collections import OrderedDict
+from collections.abc import Callable
+from typing import Any, Self
+from .pipeline import Pipeline
+from .pipeline_step import PipelineStep
+class PipelineBuilder:
+    """Fluent API builder for creating Nessy pipelines programmatically.
+    This class provides a chainable interface for building pipelines using method calls
+    instead of YAML configuration. It dynamically creates methods for all available
+    PipelineActions.
+    Example:
+        ```python
+        pipeline = (PipelineBuilder("My Pipeline")
+            .read_files(location="data/*.csv", extension="csv")
+            .transform_clean_column_names()
+            .transform_filter(condition="amount > 1000")
+            .write_catalog_table(catalog="prod", schema="sales", table="results")
+            .build())
+        pipeline.run()
+        ```
+    """
+    def __init__(self, name: str) -> None:
+        """Initialize the pipeline builder.
+        Args:
+            name: The name of the pipeline.
+        """
+        self.name = name
+        self.steps: OrderedDict[str, PipelineStep] = OrderedDict()
+        self._step_counter = 0
+    def __getattr__(self, name: str) -> Callable[..., "PipelineBuilder"]:
+        """Dynamically create methods for pipeline actions.
+        This method is called when an attribute that doesn't exist is accessed.
+        It converts method calls like `read_files()` into the corresponding PipelineAction.
+        Args:
+            name: The method name being called.
+        Returns:
+            A callable that adds the corresponding pipeline step.
+        Raises:
+            AttributeError: If the method name doesn't correspond to a known action.
+        """
+        # Lazy import to avoid circular import issues
+        from .actions import pipeline_actions
+        # Convert method name to action name (e.g., read_files -> READ_FILES)
+        action_name = name.upper()
+        if action_name in pipeline_actions:
+            action_class = pipeline_actions[action_name]
+            def method(**kwargs: Any) -> "PipelineBuilder":
+                return self._add_step(action_class, **kwargs)
+            return method
+        raise AttributeError(
+            f"PipelineBuilder has no method '{name}'. Available actions: {list(pipeline_actions.keys())}"
+        )
+    def _add_step(self, action_class: type, step_name: str | None = None, **options: Any) -> Self:
+        """Add a step to the pipeline.
+        Args:
+            action_class: The PipelineAction class to instantiate.
+            step_name: Optional custom name for the step.
+            **options: Options to pass to the action.
+        Returns:
+            Self for method chaining.
+        Raises:
+            ValueError: If a step with the given name already exists.
+        """
+        if step_name is None:
+            step_name = f"step_{self._step_counter:03d}_{action_class.__name__}"
+        # Validate that step name is unique
+        if step_name in self.steps:
+            raise ValueError(
+                f"A step with name '{step_name}' already exists in the pipeline. "
+                f"Please provide a unique step_name. "
+                f"Existing steps: {list(self.steps.keys())}"
+            )
+        # Convert any PipelineBuilder instances in options to PipelineStep references
+        options = self._convert_builder_references(options)
+        # Set up context reference to previous step
+        context_ref = None
+        if self.steps:
+            context_ref = list(self.steps.keys())[-1]
+        step = PipelineStep(name=step_name, action=action_class(), options=options, _context_ref=context_ref)
+        # Remove any predecessors that are from already-executed external pipelines
+        # (these steps have results but aren't in our pipeline)
+        external_predecessors = set()
+        for pred_name in step._predecessors:
+            if pred_name not in self.steps and pred_name != context_ref:
+                # Check if this is a reference to an executed step from options
+                for opt_val in options.values():
+                    if isinstance(opt_val, PipelineStep) and opt_val.name == pred_name:
+                        # This is an external executed step, remove from predecessors
+                        external_predecessors.add(pred_name)
+                        break
+        step._predecessors -= external_predecessors
+        self.steps[step_name] = step
+        self._step_counter += 1
+        return self
+    def _convert_builder_references(self, options: dict[str, Any]) -> dict[str, Any]:
+        """Convert any PipelineBuilder instances in options to PipelineStep references.
+        This method recursively processes options to find PipelineBuilder instances and
+        converts them to their last step's PipelineStep reference. This allows users to
+        pass PipelineBuilder instances directly to actions that expect PipelineStep references.
+        Handles PipelineBuilder instances in:
+        - Direct values
+        - Lists
+        - Nested dictionaries
+        Args:
+            options: Dictionary of options that may contain PipelineBuilder instances.
+        Returns:
+            Dictionary with PipelineBuilder instances converted to PipelineStep references.
+        Raises:
+            ValueError: If a PipelineBuilder has no steps.
+        """
+        converted = {}
+        for key, value in options.items():
+            converted[key] = self._convert_value(value, key)
+        return converted
+    def _convert_value(self, value: Any, context: str = "") -> Any:
+        """Recursively convert a value, handling PipelineBuilder instances.
+        When a PipelineBuilder is passed as a value, it is executed immediately
+        and its last step is returned as the reference. This allows the pipeline
+        to be run before the main pipeline that references it.
+        Args:
+            value: The value to convert.
+            context: Context string for error messages (e.g., key name).
+        Returns:
+            The converted value.
+        """
+        if isinstance(value, PipelineBuilder):
+            # Build and run the referenced pipeline immediately
+            pipeline = value.build()
+            if not pipeline.steps:
+                context_msg = f" in '{context}'" if context else ""
+                raise ValueError(f"PipelineBuilder{context_msg} must have at least one step")
+            # Run the pipeline to populate the results
+            pipeline.run()
+            # Get the last step which now has results
+            last_step_name = list(pipeline.steps.keys())[-1]
+            last_step = pipeline.steps[last_step_name]
+            # Clear predecessors since this step is already executed and has its result
+            # This prevents the main pipeline from trying to resolve dependencies
+            # that don't exist in its own step dictionary
+            last_step._predecessors = set()
+            last_step._context_ref = None
+            return last_step
+        if isinstance(value, dict):
+            # Recursively convert nested dictionaries
+            return {k: self._convert_value(v, f"{context}.{k}" if context else k) for k, v in value.items()}
+        if isinstance(value, list):
+            # Recursively convert lists
+            return [
+                self._convert_value(item, f"{context}[{i}]" if context else f"[{i}]") for i, item in enumerate(value)
+            ]
+        return value
+    def build(self) -> Pipeline:
+        """Build the pipeline from the configured steps.
+        Returns:
+            A Pipeline object ready for execution.
+        """
+        return Pipeline(name=self.name, steps=self.steps)
+    def run(self) -> None:
+        """Build and run the pipeline immediately.
+        This is a convenience method equivalent to calling build().run().
+        """
+        pipeline = self.build()
+        pipeline.run()

{cloe_nessy-0.3.19.dist-info → cloe_nessy-1.0.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cloe-nessy
-Version: 0.3.19
+Version: 1.0.1
 Summary: Your friendly datalake monster.
 Project-URL: homepage, https://initions.com/
 Author-email: initions <ICSMC_EXT_PYPIORG@accenture.com>
@@ -12,11 +12,11 @@ Classifier: License :: OSI Approved :: MIT License
 Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python :: 3
 Classifier: Topic :: Database
-Requires-Python: <3.13,>=3.11
+Requires-Python: <3.14,>=3.11
 Requires-Dist: azure-identity<2.0.0,>=1.19.0
 Requires-Dist: cloe-logging[databricks,log-analytics]<0.4,>=0.3.8
 Requires-Dist: databricks-sdk<1.0.0,>=0.36.0
-Requires-Dist: fsspec<2025.7.1,>=2025.7.0
+Requires-Dist: fsspec<2025.12.1,>=2025.12.0
 Requires-Dist: httpx<1.0.0,>=0.27.2
 Requires-Dist: jinja2<4.0.0,>=3.1.4
 Requires-Dist: matplotlib<4.0.0,>=3.9.2

{cloe_nessy-0.3.19.dist-info → cloe_nessy-1.0.1.dist-info}/RECORD RENAMED Viewed

@@ -11,13 +11,13 @@ cloe_nessy/clients/api_client/pagination_strategy.py,sha256=YcvAee8CrJiOxEvuFQ4K
 cloe_nessy/file_utilities/__init__.py,sha256=nY8H48jYHvTy0VYSRHVhZaFMlzfch4-T7y3N73tgMpI,73
 cloe_nessy/file_utilities/exceptions.py,sha256=RDeV2S6AQnFhFINRo84HDV_hk2RMrf5oNQ7GhHmAZy0,97
 cloe_nessy/file_utilities/factory.py,sha256=JONYGI8MCkNwG2_ujvjN3iB7BIdl7SqXKgV05YY_i4E,1735
-cloe_nessy/file_utilities/get_file_paths.py,sha256=wQCNBi7kgM32BSFlCuKFnORd9myjZUygpNm2-tF1F54,2980
+cloe_nessy/file_utilities/get_file_paths.py,sha256=Hgfwtat7SWIjmyQG0WCrp5kOW5O0RWtfv3tHmT3igBE,3008
 cloe_nessy/file_utilities/location_types.py,sha256=G0FjpEu4_inmWbu5tvs2FyZv2TIhmPgjWU_Rtvmd6i8,801
 cloe_nessy/file_utilities/strategies/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-cloe_nessy/file_utilities/strategies/base_strategy.py,sha256=2BdGdP8ThjIP4e_fv7apx7Hg_L6q3nsPdek4oPgN7CI,2833
-cloe_nessy/file_utilities/strategies/local_strategy.py,sha256=6OcEjzLvRTBT8FKXhkLI0befT48SHutGHFIXMq5Sq8E,2217
-cloe_nessy/file_utilities/strategies/onelake_strategy.py,sha256=RnQjWtWIFzFj-zPqzyZaPYIjtjXkgP-K7-VA8GhkNmg,1980
-cloe_nessy/file_utilities/strategies/utils_strategy.py,sha256=urayKfOUpSaXKgTs1KVK0TS7FWVrJ3k4OLKh35sCxAU,3194
+cloe_nessy/file_utilities/strategies/base_strategy.py,sha256=HwARDqb59i5HJyF-URbXKNGkOVcXEQn41_xD4W0DrXw,2861
+cloe_nessy/file_utilities/strategies/local_strategy.py,sha256=LxCCggFgH7s1heySy_JtROJCNsSyXkV5kd-VRLIf3ng,2245
+cloe_nessy/file_utilities/strategies/onelake_strategy.py,sha256=Pni_JkKqfbKoEMOCWbBJJdUIhpIFUPTUyIxSCSlPZRM,2009
+cloe_nessy/file_utilities/strategies/utils_strategy.py,sha256=w4nrS6IcPPN7UBFBwszCfxgTI6xSE5BdY2WiqGYsFyI,3223
 cloe_nessy/integration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 cloe_nessy/integration/delta_loader/__init__.py,sha256=ZdBDde1uPtTCL_KAhilVmtVmmGvH5dHb05QsOozkteE,438
 cloe_nessy/integration/delta_loader/delta_load_options.py,sha256=bbPGhC0n8L6CmcmV91Xqq6fWRimxlUHUkr22uVqG0g4,1363
@@ -30,19 +30,19 @@ cloe_nessy/integration/delta_loader/strategies/delta_timestamp_loader.py,sha256=
 cloe_nessy/integration/reader/__init__.py,sha256=NWQx-v6aKE8YOHhsxfeaZnMVq4KLKyRWXzUduf5aVsk,265
 cloe_nessy/integration/reader/api_reader.py,sha256=FbOyfLVG1ryL2GC-MgE1uClHICsQKBj9yZbY4TG5qrk,19637
 cloe_nessy/integration/reader/catalog_reader.py,sha256=DlnykmFjV_v8SCBh3qaCvf24QM-6TdMFVHx5Mqv7Nvs,4850
-cloe_nessy/integration/reader/excel_reader.py,sha256=JGmxQ16ux0HT-MLvAUp-9XMdKUToMb7cdObciZNsYSs,8027
+cloe_nessy/integration/reader/excel_reader.py,sha256=QXm0MaE_-tW5ix-f_3Pgn-Vx7VG5jA_uSp858rVV7lA,8042
 cloe_nessy/integration/reader/exceptions.py,sha256=_A9jFpe_RIDZCGY76qzjic9bsshxns6yXPSl141dq1c,203
-cloe_nessy/integration/reader/file_reader.py,sha256=t5zF-cmZo1X0a1rki6ry1rSiFEu5uXRP2rNGd90fwoY,8163
+cloe_nessy/integration/reader/file_reader.py,sha256=FFqqu1h003FY2Df3ru-G1JO4Bg2Ai8Rzh58fjOCN7NM,8262
 cloe_nessy/integration/reader/reader.py,sha256=YHriYkzsBduBjfI2FnP03VEo15a8UCRZ_sXtre8eaEs,1041
 cloe_nessy/integration/writer/__init__.py,sha256=3yzCAGiWZdQWtsbzlTih01sxVTJV2DDYwvl34lEAUlE,243
 cloe_nessy/integration/writer/catalog_writer.py,sha256=dQeXmtfs7J6rP6Ye3OCvxBraFScFX_3SHs7Md58hEeM,5296
 cloe_nessy/integration/writer/file_writer.py,sha256=SUDbN13ZzDhbM8DpOGFgM_Gkg70To4L6Q182pXx2HRM,5454
 cloe_nessy/integration/writer/writer.py,sha256=elFPLFrWR-qVE9qnBtzzzhyRALLQcRVuOsPS0rNmRt4,1741
 cloe_nessy/integration/writer/delta_writer/__init__.py,sha256=h2CT6Hllmk0nodlek27uqwniCzVZKMkYcPGyG9K2Z24,164
-cloe_nessy/integration/writer/delta_writer/delta_append_writer.py,sha256=TbpW-j87_H9dcUza34uR6VWslJez406y3_5N1ip0SnM,4740
+cloe_nessy/integration/writer/delta_writer/delta_append_writer.py,sha256=nribgHmapp59v3Rw_AfJg0_BRYhP7x2IJIeE74Ia_6A,4748
 cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py,sha256=Yp_q_ycasW2_wwmzty_6fZeBVcW_0o8gLrr6F1gaUjQ,10195
 cloe_nessy/integration/writer/delta_writer/delta_table_operation_type.py,sha256=m4YFY9_WgaOcnpBviVt3Km-w3wf3NF25wPS-n0NBGcE,970
-cloe_nessy/integration/writer/delta_writer/delta_writer_base.py,sha256=upUtDZMzwYFU0kzmkelVgkpFToXkrypcR3h_jvGjz14,8596
+cloe_nessy/integration/writer/delta_writer/delta_writer_base.py,sha256=O7hw7YOa2FEzBlzjwPfxQTxm0ZrlszIjjfsHTwE_OhU,8609
 cloe_nessy/integration/writer/delta_writer/exceptions.py,sha256=xPmGiYV0xQXauln5Oh34E5vbm0rVcs6xCh-SJSb2bw0,107
 cloe_nessy/logging/__init__.py,sha256=ySVCVbdyR3Dno_tl2ZfiER_7EVaDoQMHVkNyfdMZumY,65
 cloe_nessy/logging/logger_mixin.py,sha256=H8MyMEyb_kEDP0Ow5QStAFLuOkTIeUnneGaj916fKlU,7443
@@ -65,17 +65,18 @@ cloe_nessy/models/templates/create_volume.sql.j2,sha256=XIUf1cHcvAxcGTyhzUiv4xpQ
 cloe_nessy/object_manager/__init__.py,sha256=3sle0vNpPwBOkycxA3XVS9m4XZf5LD3Qd4NGxdqcHno,186
 cloe_nessy/object_manager/table_manager.py,sha256=4eQG-zMiuBpeJmvWdL3KdhHRiPFf8TS0RFNRp8Yz6rY,13887
 cloe_nessy/object_manager/volume_manager.py,sha256=6epd3KXzcNH04EvaKubAfLsaUm9qBMeT3KNvMK04gGs,2727
-cloe_nessy/pipeline/__init__.py,sha256=sespmJ5JsgyiFyZiedTiL2kg--zGIX7cjTYsD5vemEg,325
+cloe_nessy/pipeline/__init__.py,sha256=BUzL4HJaCXWmK7OgKaxdwK72JrrdzfzIvyxOGtM28U0,417
 cloe_nessy/pipeline/pipeline.py,sha256=L4wk3b06LNWRj01nnAkuQpeRrwFTyaV1xTpgYAg4sak,10819
 cloe_nessy/pipeline/pipeline_action.py,sha256=S7IVFdmG12fRBzHuE_DiWn7qlMtApz6IloVd2Fj31Sg,1944
+cloe_nessy/pipeline/pipeline_builder.py,sha256=_BBl43two0pherkTXZ-Yrpt6XcLW8Q-Z98qxbFIsMao,7929
 cloe_nessy/pipeline/pipeline_config.py,sha256=oVQ-IH4etTGZVVEnE-5iDPLYOtWpvDlltWFv1nevnqQ,3229
 cloe_nessy/pipeline/pipeline_context.py,sha256=eCOcjyE16rGRom3L85Gy_BbncfQD6i1x31yrWqZws-4,1881
 cloe_nessy/pipeline/pipeline_parsing_service.py,sha256=eeC4RbGBILGN6zkbUyjH-qGgEMtOWV4Kv_VxrHbHMY0,9021
 cloe_nessy/pipeline/pipeline_plotting_service.py,sha256=goMQj73FzUVchKn5c2SsPcWR6fr7DtVkVrcQfJsKCq4,13111
 cloe_nessy/pipeline/pipeline_step.py,sha256=oTnlvRpB0fbOBQXbPe1URstA5fv-97igCHt_41fKCAk,2082
-cloe_nessy/pipeline/actions/__init__.py,sha256=Qad9kxOQHoMQ1sj-4AxABNNIdaN5QkZAB14DUFKAtUA,2808
+cloe_nessy/pipeline/actions/__init__.py,sha256=FfAnSIl-0T6pnaWhClkDqV8nfTdvLvZZJdwycsZMLPw,2990
 cloe_nessy/pipeline/actions/read_api.py,sha256=MAc7QfmhnaRUMdE09Ywt41RSAsuW4co8zF0zXHwbM8U,16193
-cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=EkP3JSI7VQMkvUsb97ieUeGnnfvyyUI7egvqNWMqK0I,6894
+cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=sx3dezd33c1FawMrxORwhK5GNo1IpjCyuLATWz7esZ0,6735
 cloe_nessy/pipeline/actions/read_excel.py,sha256=IG_VmDEt1TvGVEO0SY9Fm3awHNjfisR1_7DUmhC3NEE,7968
 cloe_nessy/pipeline/actions/read_files.py,sha256=hRcM7wG35vxxLVajW3SK5euHW02qxiXCYSkIl11xiQ0,7308
 cloe_nessy/pipeline/actions/read_metadata_yaml.py,sha256=i8fQceV63eAqx_x0ANisCkXWfMHyhqsfFHVFH5yP2po,3544
@@ -89,16 +90,18 @@ cloe_nessy/pipeline/actions/transform_distinct.py,sha256=c7aBxANyqT4aKhm0cSELDtD
 cloe_nessy/pipeline/actions/transform_filter.py,sha256=Nz_ggRfKIcNzYFfFOsgq1QeatjdEis0up4I7cOWBdyo,1446
 cloe_nessy/pipeline/actions/transform_generic_sql.py,sha256=_naWfmPdYAUKjPNeHu5qJAohOL7DHCSYz_kwoeRv3OI,2741
 cloe_nessy/pipeline/actions/transform_group_aggregate.py,sha256=KUHeeP-RIDi34dpbsPEJkzea5zFJA6MuyjNpOsFud9o,4045
-cloe_nessy/pipeline/actions/transform_hash_columns.py,sha256=H8j_Xadnm3npVNA_nu7Be7v0bJV20ELKMxSsVHHl6CY,8407
+cloe_nessy/pipeline/actions/transform_hash_columns.py,sha256=M5_wolJwzJpPTSrZq4yWV3TH7H6BGqbjJkJCwtqPlQo,8507
 cloe_nessy/pipeline/actions/transform_join.py,sha256=ez1M1wVc9khOZj1swMArJbBKXxEpjenUHrW1wL8H330,7200
 cloe_nessy/pipeline/actions/transform_json_normalize.py,sha256=petF7pnNq1EKc8MqVdG0weFALAHNILSe_eAu4Z5XxIo,4833
+cloe_nessy/pipeline/actions/transform_regex_extract.py,sha256=vMtUW0s_oXy8DC1-4Xh-WQN3CCp8jXYsJiFYvGdYrqE,6390
 cloe_nessy/pipeline/actions/transform_rename_columns.py,sha256=4zJcPCONMU4C67qeuzsrX3AORRRHoq_selUI7FJyeg0,1952
 cloe_nessy/pipeline/actions/transform_replace_values.py,sha256=1OPHTrjcphfyGepcO7ozYfeqfwA18pjlyHpVKUS_AAU,2049
 cloe_nessy/pipeline/actions/transform_select_columns.py,sha256=-GhSEsb7iNnZIsYRm3BG9BX4_qUDJMbpj1DsKPY046w,4574
 cloe_nessy/pipeline/actions/transform_union.py,sha256=SZtEzh567CIExUj9yMEgshE28h4dXKT7Wr2TDj4zB4k,2718
+cloe_nessy/pipeline/actions/transform_with_column.py,sha256=c-E1yYkeYmovbN1maT7ImpdQlW0nYvYsHCtDvfe4wt8,3357
 cloe_nessy/pipeline/actions/write_catalog_table.py,sha256=FyC0scQU8Ul3Uigpk6IN2IJpf_4jRjAqF5yHtDVwG00,4852
-cloe_nessy/pipeline/actions/write_delta_append.py,sha256=2F5qnKPsY_F-2672Ce4Gub7qdna157jEqHHc429fO2A,2962
-cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=zcOk4ytZFUxyGY8U2fdFPLFnw2g_yhaS_vOx_e3wCuE,5847
+cloe_nessy/pipeline/actions/write_delta_append.py,sha256=e1g4mDhwAZdKyt4Gb7ZzHcQrJ1duSl8qOn6ONizRsoM,2934
+cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=fwinlTeZoDuTyrbln5vMu1UJ1LG8ZQrus3LoCVF__I4,5819
 cloe_nessy/pipeline/actions/write_file.py,sha256=JZ8UZslxUn_ttYt5wDyvtHFq2FqYk3vOR8kvExJI8pk,3212
 cloe_nessy/pipeline/utils/__init__.py,sha256=xi02UjBMiXWD7b9gDvww4gyRyowb0eRd_6Wbu0F_cro,118
 cloe_nessy/pipeline/utils/delta_load_utils.py,sha256=KitMNruxePEkecI0h4Jint1JwJpaEog5mCOchMkgan8,1495
@@ -110,6 +113,6 @@ cloe_nessy/settings/settings.py,sha256=I4n129lrujriW-d8q4as2Kb4_kI932ModfZ5Ow_Up
 cloe_nessy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 cloe_nessy/utils/column_names.py,sha256=dCNtm61mc5aLkY2oE4rlfN3VLCrpot6fOESjAZmCmhA,361
 cloe_nessy/utils/file_and_directory_handler.py,sha256=r2EVt9xG81p6ScaJCwETC5an6pMT6WseB0jMOR-JlpU,602
-cloe_nessy-0.3.19.dist-info/METADATA,sha256=O3LES1mWSPONQE6q47c_j9s_sQcvU2a5RfL1WQW3JPk,3290
-cloe_nessy-0.3.19.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-cloe_nessy-0.3.19.dist-info/RECORD,,
+cloe_nessy-1.0.1.dist-info/METADATA,sha256=qLn3XYfGsw2pW-pPtUUidtcHZiUtIwOESWY8LCenGEY,3291
+cloe_nessy-1.0.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+cloe_nessy-1.0.1.dist-info/RECORD,,

{cloe_nessy-0.3.19.dist-info → cloe_nessy-1.0.1.dist-info}/WHEEL RENAMED Viewed

File without changes

cloe-nessy 0.3.19__py3-none-any.whl → 1.0.1__py3-none-any.whl

cloe-nessy 0.3.19py3-none-any.whl → 1.0.1py3-none-any.whl