cloe-nessy 0.3.19__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cloe_nessy/file_utilities/get_file_paths.py +2 -1
- cloe_nessy/file_utilities/strategies/base_strategy.py +2 -1
- cloe_nessy/file_utilities/strategies/local_strategy.py +2 -1
- cloe_nessy/file_utilities/strategies/onelake_strategy.py +3 -1
- cloe_nessy/file_utilities/strategies/utils_strategy.py +3 -1
- cloe_nessy/integration/writer/delta_writer/delta_append_writer.py +1 -1
- cloe_nessy/integration/writer/delta_writer/delta_writer_base.py +1 -1
- cloe_nessy/pipeline/__init__.py +9 -1
- cloe_nessy/pipeline/actions/__init__.py +2 -0
- cloe_nessy/pipeline/actions/read_catalog_table.py +0 -2
- cloe_nessy/pipeline/actions/transform_hash_columns.py +4 -2
- cloe_nessy/pipeline/actions/transform_with_column.py +104 -0
- cloe_nessy/pipeline/actions/write_delta_append.py +0 -3
- cloe_nessy/pipeline/actions/write_delta_merge.py +0 -3
- cloe_nessy/pipeline/pipeline_builder.py +210 -0
- {cloe_nessy-0.3.19.dist-info → cloe_nessy-1.0.0.dist-info}/METADATA +2 -2
- {cloe_nessy-0.3.19.dist-info → cloe_nessy-1.0.0.dist-info}/RECORD +18 -16
- {cloe_nessy-0.3.19.dist-info → cloe_nessy-1.0.0.dist-info}/WHEEL +0 -0
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
+
from typing import Any
|
|
2
3
|
|
|
3
4
|
from ..logging.logger_mixin import LoggerMixin
|
|
4
5
|
from .factory import FileRetrievalFactory
|
|
@@ -9,7 +10,7 @@ def get_file_paths(
|
|
|
9
10
|
location: str,
|
|
10
11
|
file_name_pattern: str | None = None,
|
|
11
12
|
search_subdirs: bool = True,
|
|
12
|
-
**kwargs,
|
|
13
|
+
**kwargs: Any,
|
|
13
14
|
) -> list[str]:
|
|
14
15
|
"""Retrieves file paths from a specified location based on the provided criteria.
|
|
15
16
|
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Any
|
|
2
3
|
|
|
3
4
|
|
|
4
5
|
class FileRetrievalStrategy(ABC):
|
|
@@ -15,7 +16,7 @@ class FileRetrievalStrategy(ABC):
|
|
|
15
16
|
location: str,
|
|
16
17
|
extension: str | None = None,
|
|
17
18
|
search_subdirs: bool = True,
|
|
18
|
-
**kwargs,
|
|
19
|
+
**kwargs: Any,
|
|
19
20
|
) -> list[str]:
|
|
20
21
|
"""Retrieves a list of file paths based on the specified criteria.
|
|
21
22
|
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
+
from typing import Any
|
|
2
3
|
|
|
3
4
|
from ..exceptions import FileUtilitiesError
|
|
4
5
|
from .base_strategy import FileRetrievalStrategy
|
|
@@ -16,7 +17,7 @@ class LocalDirectoryStrategy(FileRetrievalStrategy):
|
|
|
16
17
|
location: str,
|
|
17
18
|
extension: str | None = None,
|
|
18
19
|
search_subdirs: bool = True,
|
|
19
|
-
**kwargs, # noqa: ARG004
|
|
20
|
+
**kwargs: Any, # noqa: ARG004
|
|
20
21
|
) -> list[str]:
|
|
21
22
|
"""Recursively retrieves all files with a specified extension from a given directory and its subdirectories.
|
|
22
23
|
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
1
3
|
from .base_strategy import FileRetrievalStrategy
|
|
2
4
|
from .local_strategy import LocalDirectoryStrategy
|
|
3
5
|
|
|
@@ -10,7 +12,7 @@ class OneLakeStrategy(FileRetrievalStrategy):
|
|
|
10
12
|
location: str,
|
|
11
13
|
extension: str | None = None,
|
|
12
14
|
search_subdirs: bool = True,
|
|
13
|
-
**kwargs,
|
|
15
|
+
**kwargs: Any,
|
|
14
16
|
) -> list:
|
|
15
17
|
"""Recursively retrieves all files with a specified extension from a given directory and its subdirectories.
|
|
16
18
|
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
1
3
|
from ...session import SessionManager
|
|
2
4
|
from ..exceptions import FileUtilitiesError
|
|
3
5
|
from .base_strategy import FileRetrievalStrategy
|
|
@@ -15,7 +17,7 @@ class UtilsStrategy(FileRetrievalStrategy):
|
|
|
15
17
|
location: str,
|
|
16
18
|
extension: str | None = None,
|
|
17
19
|
search_subdirs: bool = True,
|
|
18
|
-
**kwargs, # noqa: ARG004
|
|
20
|
+
**kwargs: Any, # noqa: ARG004
|
|
19
21
|
) -> list:
|
|
20
22
|
"""Recursively retrieves all files with a specified extension from a given directory and its subdirectories.
|
|
21
23
|
|
|
@@ -151,7 +151,7 @@ class BaseDeltaWriter(BaseWriter, ABC):
|
|
|
151
151
|
return " AND ".join([f"target.`{c}` <=> source.`{c}`" for c in columns])
|
|
152
152
|
|
|
153
153
|
@staticmethod
|
|
154
|
-
def _partition_pruning_conditions(df, partition_cols: list[str] | None) -> str:
|
|
154
|
+
def _partition_pruning_conditions(df: "DataFrame", partition_cols: list[str] | None) -> str:
|
|
155
155
|
"""Generates partition pruning conditions for an SQL query.
|
|
156
156
|
|
|
157
157
|
This function is used to optimize the performance of an SQL query by only scanning the
|
cloe_nessy/pipeline/__init__.py
CHANGED
|
@@ -1,7 +1,15 @@
|
|
|
1
1
|
from .pipeline import Pipeline
|
|
2
2
|
from .pipeline_action import PipelineAction
|
|
3
|
+
from .pipeline_builder import PipelineBuilder
|
|
3
4
|
from .pipeline_context import PipelineContext
|
|
4
5
|
from .pipeline_parsing_service import PipelineParsingService
|
|
5
6
|
from .pipeline_step import PipelineStep
|
|
6
7
|
|
|
7
|
-
__all__ = [
|
|
8
|
+
__all__ = [
|
|
9
|
+
"Pipeline",
|
|
10
|
+
"PipelineBuilder",
|
|
11
|
+
"PipelineParsingService",
|
|
12
|
+
"PipelineContext",
|
|
13
|
+
"PipelineAction",
|
|
14
|
+
"PipelineStep",
|
|
15
|
+
]
|
|
@@ -23,6 +23,7 @@ from .transform_rename_columns import TransformRenameColumnsAction
|
|
|
23
23
|
from .transform_replace_values import TransformReplaceValuesAction
|
|
24
24
|
from .transform_select_columns import TransformSelectColumnsAction
|
|
25
25
|
from .transform_union import TransformUnionAction
|
|
26
|
+
from .transform_with_column import TransformWithColumnAction
|
|
26
27
|
from .write_catalog_table import WriteCatalogTableAction
|
|
27
28
|
from .write_delta_append import WriteDeltaAppendAction
|
|
28
29
|
from .write_delta_merge import WriteDeltaMergeAction
|
|
@@ -58,6 +59,7 @@ __all__ = [
|
|
|
58
59
|
"TransformRenameColumnsAction",
|
|
59
60
|
"TransformReplaceValuesAction",
|
|
60
61
|
"TransformSelectColumnsAction",
|
|
62
|
+
"TransformWithColumnAction",
|
|
61
63
|
"WriteCatalogTableAction",
|
|
62
64
|
"WriteDeltaAppendAction",
|
|
63
65
|
"WriteDeltaMergeAction",
|
|
@@ -96,8 +96,6 @@ class ReadCatalogTableAction(PipelineAction):
|
|
|
96
96
|
configuration for the streaming query, such as processing time or
|
|
97
97
|
continuous processing.
|
|
98
98
|
behavior, such as filters or reading modes. Defaults to None.
|
|
99
|
-
delta_load_options: Options for delta loading, if applicable.
|
|
100
|
-
Configures the [`DeltaLoader`][cloe_nessy.integration.delta_loader].
|
|
101
99
|
|
|
102
100
|
Raises:
|
|
103
101
|
ValueError: If neither `table_identifier` nor `table_metadata.identifier` in the `context` is provided.
|
|
@@ -25,7 +25,8 @@ class HashSettings(BaseModel):
|
|
|
25
25
|
bits: int | None = Field(default=None, description="Only required for sha2")
|
|
26
26
|
|
|
27
27
|
@model_validator(mode="before")
|
|
28
|
-
|
|
28
|
+
@classmethod
|
|
29
|
+
def validate_all(cls: type["HashSettings"], values: Any) -> Any:
|
|
29
30
|
"""Validates the input values for a hashing operation before model instantiation.
|
|
30
31
|
|
|
31
32
|
This method performs the following checks:
|
|
@@ -91,7 +92,8 @@ class HashConfig(BaseModel):
|
|
|
91
92
|
hash_config: dict[str, HashSettings]
|
|
92
93
|
|
|
93
94
|
@model_validator(mode="before")
|
|
94
|
-
|
|
95
|
+
@classmethod
|
|
96
|
+
def validate_config(cls: type["HashConfig"], values: Any) -> Any:
|
|
95
97
|
"""Validates the hash configuration provided in the model.
|
|
96
98
|
|
|
97
99
|
This method is executed in "before" mode to ensure that the `hash_config`
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Transform action to add or update a column using a SQL expression."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from pyspark.sql import functions as F
|
|
6
|
+
|
|
7
|
+
from cloe_nessy.pipeline.pipeline_action import PipelineAction
|
|
8
|
+
from cloe_nessy.pipeline.pipeline_context import PipelineContext
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TransformWithColumnAction(PipelineAction):
|
|
12
|
+
"""Add or update a column in the DataFrame using a SQL expression.
|
|
13
|
+
|
|
14
|
+
This action uses PySpark's expr() function to evaluate SQL expressions and
|
|
15
|
+
create or update columns in the DataFrame.
|
|
16
|
+
|
|
17
|
+
Examples:
|
|
18
|
+
=== "Create new column"
|
|
19
|
+
```yaml
|
|
20
|
+
Create Full Name:
|
|
21
|
+
action: TRANSFORM_WITH_COLUMN
|
|
22
|
+
options:
|
|
23
|
+
column_name: full_name
|
|
24
|
+
expression: concat(first_name, ' ', last_name)
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
=== "Update existing column"
|
|
28
|
+
```yaml
|
|
29
|
+
Lowercase Email:
|
|
30
|
+
action: TRANSFORM_WITH_COLUMN
|
|
31
|
+
options:
|
|
32
|
+
column_name: email
|
|
33
|
+
expression: lower(email)
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
=== "Calculated column"
|
|
37
|
+
```yaml
|
|
38
|
+
Calculate Total:
|
|
39
|
+
action: TRANSFORM_WITH_COLUMN
|
|
40
|
+
options:
|
|
41
|
+
column_name: total_price
|
|
42
|
+
expression: price * quantity * (1 + tax_rate)
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
=== "Extract date parts"
|
|
46
|
+
```yaml
|
|
47
|
+
Extract Year:
|
|
48
|
+
action: TRANSFORM_WITH_COLUMN
|
|
49
|
+
options:
|
|
50
|
+
column_name: year
|
|
51
|
+
expression: year(order_date)
|
|
52
|
+
```
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
name: str = "TRANSFORM_WITH_COLUMN"
|
|
56
|
+
|
|
57
|
+
def run(
|
|
58
|
+
self,
|
|
59
|
+
context: PipelineContext,
|
|
60
|
+
*,
|
|
61
|
+
column_name: str = "",
|
|
62
|
+
expression: str = "",
|
|
63
|
+
**_: Any,
|
|
64
|
+
) -> PipelineContext:
|
|
65
|
+
"""Add or update a column using a SQL expression.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
context: The pipeline context containing the DataFrame
|
|
69
|
+
column_name: Name of the column to create or update
|
|
70
|
+
expression: SQL expression to evaluate for the column value
|
|
71
|
+
**_: Additional unused keyword arguments
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
PipelineContext: Updated context with the modified DataFrame
|
|
75
|
+
|
|
76
|
+
Raises:
|
|
77
|
+
ValueError: If column_name is not provided
|
|
78
|
+
ValueError: If expression is not provided
|
|
79
|
+
ValueError: If context.data is None
|
|
80
|
+
Exception: If the SQL expression is invalid
|
|
81
|
+
"""
|
|
82
|
+
if not column_name:
|
|
83
|
+
raise ValueError("No column_name provided.")
|
|
84
|
+
|
|
85
|
+
if not expression:
|
|
86
|
+
raise ValueError("No expression provided.")
|
|
87
|
+
|
|
88
|
+
if context.data is None:
|
|
89
|
+
raise ValueError("Data from context is required for transform_with_column")
|
|
90
|
+
|
|
91
|
+
self._console_logger.info(f"Adding/updating column '{column_name}' with expression: {expression}")
|
|
92
|
+
|
|
93
|
+
df = context.data
|
|
94
|
+
|
|
95
|
+
try:
|
|
96
|
+
# Use F.expr() to evaluate the SQL expression
|
|
97
|
+
df = df.withColumn(column_name, F.expr(expression))
|
|
98
|
+
except Exception as e:
|
|
99
|
+
self._console_logger.error(f"Failed to evaluate expression '{expression}' for column '{column_name}': {e}")
|
|
100
|
+
raise
|
|
101
|
+
|
|
102
|
+
self._console_logger.info(f"Successfully added/updated column '{column_name}'")
|
|
103
|
+
|
|
104
|
+
return context.from_existing(data=df)
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
from collections import OrderedDict
|
|
2
|
+
from collections.abc import Callable
|
|
3
|
+
from typing import Any, Self
|
|
4
|
+
|
|
5
|
+
from .pipeline import Pipeline
|
|
6
|
+
from .pipeline_step import PipelineStep
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class PipelineBuilder:
|
|
10
|
+
"""Fluent API builder for creating Nessy pipelines programmatically.
|
|
11
|
+
|
|
12
|
+
This class provides a chainable interface for building pipelines using method calls
|
|
13
|
+
instead of YAML configuration. It dynamically creates methods for all available
|
|
14
|
+
PipelineActions.
|
|
15
|
+
|
|
16
|
+
Example:
|
|
17
|
+
```python
|
|
18
|
+
pipeline = (PipelineBuilder("My Pipeline")
|
|
19
|
+
.read_files(location="data/*.csv", extension="csv")
|
|
20
|
+
.transform_clean_column_names()
|
|
21
|
+
.transform_filter(condition="amount > 1000")
|
|
22
|
+
.write_catalog_table(catalog="prod", schema="sales", table="results")
|
|
23
|
+
.build())
|
|
24
|
+
|
|
25
|
+
pipeline.run()
|
|
26
|
+
```
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, name: str) -> None:
|
|
30
|
+
"""Initialize the pipeline builder.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
name: The name of the pipeline.
|
|
34
|
+
"""
|
|
35
|
+
self.name = name
|
|
36
|
+
self.steps: OrderedDict[str, PipelineStep] = OrderedDict()
|
|
37
|
+
self._step_counter = 0
|
|
38
|
+
|
|
39
|
+
def __getattr__(self, name: str) -> Callable[..., "PipelineBuilder"]:
|
|
40
|
+
"""Dynamically create methods for pipeline actions.
|
|
41
|
+
|
|
42
|
+
This method is called when an attribute that doesn't exist is accessed.
|
|
43
|
+
It converts method calls like `read_files()` into the corresponding PipelineAction.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
name: The method name being called.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
A callable that adds the corresponding pipeline step.
|
|
50
|
+
|
|
51
|
+
Raises:
|
|
52
|
+
AttributeError: If the method name doesn't correspond to a known action.
|
|
53
|
+
"""
|
|
54
|
+
# Lazy import to avoid circular import issues
|
|
55
|
+
from .actions import pipeline_actions
|
|
56
|
+
|
|
57
|
+
# Convert method name to action name (e.g., read_files -> READ_FILES)
|
|
58
|
+
action_name = name.upper()
|
|
59
|
+
|
|
60
|
+
if action_name in pipeline_actions:
|
|
61
|
+
action_class = pipeline_actions[action_name]
|
|
62
|
+
|
|
63
|
+
def method(**kwargs: Any) -> "PipelineBuilder":
|
|
64
|
+
return self._add_step(action_class, **kwargs)
|
|
65
|
+
|
|
66
|
+
return method
|
|
67
|
+
|
|
68
|
+
raise AttributeError(
|
|
69
|
+
f"PipelineBuilder has no method '{name}'. Available actions: {list(pipeline_actions.keys())}"
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
def _add_step(self, action_class: type, step_name: str | None = None, **options: Any) -> Self:
|
|
73
|
+
"""Add a step to the pipeline.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
action_class: The PipelineAction class to instantiate.
|
|
77
|
+
step_name: Optional custom name for the step.
|
|
78
|
+
**options: Options to pass to the action.
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
Self for method chaining.
|
|
82
|
+
|
|
83
|
+
Raises:
|
|
84
|
+
ValueError: If a step with the given name already exists.
|
|
85
|
+
"""
|
|
86
|
+
if step_name is None:
|
|
87
|
+
step_name = f"step_{self._step_counter:03d}_{action_class.__name__}"
|
|
88
|
+
|
|
89
|
+
# Validate that step name is unique
|
|
90
|
+
if step_name in self.steps:
|
|
91
|
+
raise ValueError(
|
|
92
|
+
f"A step with name '{step_name}' already exists in the pipeline. "
|
|
93
|
+
f"Please provide a unique step_name. "
|
|
94
|
+
f"Existing steps: {list(self.steps.keys())}"
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# Convert any PipelineBuilder instances in options to PipelineStep references
|
|
98
|
+
options = self._convert_builder_references(options)
|
|
99
|
+
|
|
100
|
+
# Set up context reference to previous step
|
|
101
|
+
context_ref = None
|
|
102
|
+
if self.steps:
|
|
103
|
+
context_ref = list(self.steps.keys())[-1]
|
|
104
|
+
|
|
105
|
+
step = PipelineStep(name=step_name, action=action_class(), options=options, _context_ref=context_ref)
|
|
106
|
+
|
|
107
|
+
# Remove any predecessors that are from already-executed external pipelines
|
|
108
|
+
# (these steps have results but aren't in our pipeline)
|
|
109
|
+
external_predecessors = set()
|
|
110
|
+
for pred_name in step._predecessors:
|
|
111
|
+
if pred_name not in self.steps and pred_name != context_ref:
|
|
112
|
+
# Check if this is a reference to an executed step from options
|
|
113
|
+
for opt_val in options.values():
|
|
114
|
+
if isinstance(opt_val, PipelineStep) and opt_val.name == pred_name:
|
|
115
|
+
# This is an external executed step, remove from predecessors
|
|
116
|
+
external_predecessors.add(pred_name)
|
|
117
|
+
break
|
|
118
|
+
|
|
119
|
+
step._predecessors -= external_predecessors
|
|
120
|
+
|
|
121
|
+
self.steps[step_name] = step
|
|
122
|
+
self._step_counter += 1
|
|
123
|
+
return self
|
|
124
|
+
|
|
125
|
+
def _convert_builder_references(self, options: dict[str, Any]) -> dict[str, Any]:
|
|
126
|
+
"""Convert any PipelineBuilder instances in options to PipelineStep references.
|
|
127
|
+
|
|
128
|
+
This method recursively processes options to find PipelineBuilder instances and
|
|
129
|
+
converts them to their last step's PipelineStep reference. This allows users to
|
|
130
|
+
pass PipelineBuilder instances directly to actions that expect PipelineStep references.
|
|
131
|
+
|
|
132
|
+
Handles PipelineBuilder instances in:
|
|
133
|
+
- Direct values
|
|
134
|
+
- Lists
|
|
135
|
+
- Nested dictionaries
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
options: Dictionary of options that may contain PipelineBuilder instances.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
Dictionary with PipelineBuilder instances converted to PipelineStep references.
|
|
142
|
+
|
|
143
|
+
Raises:
|
|
144
|
+
ValueError: If a PipelineBuilder has no steps.
|
|
145
|
+
"""
|
|
146
|
+
converted = {}
|
|
147
|
+
for key, value in options.items():
|
|
148
|
+
converted[key] = self._convert_value(value, key)
|
|
149
|
+
return converted
|
|
150
|
+
|
|
151
|
+
def _convert_value(self, value: Any, context: str = "") -> Any:
|
|
152
|
+
"""Recursively convert a value, handling PipelineBuilder instances.
|
|
153
|
+
|
|
154
|
+
When a PipelineBuilder is passed as a value, it is executed immediately
|
|
155
|
+
and its last step is returned as the reference. This allows the pipeline
|
|
156
|
+
to be run before the main pipeline that references it.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
value: The value to convert.
|
|
160
|
+
context: Context string for error messages (e.g., key name).
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
The converted value.
|
|
164
|
+
"""
|
|
165
|
+
if isinstance(value, PipelineBuilder):
|
|
166
|
+
# Build and run the referenced pipeline immediately
|
|
167
|
+
pipeline = value.build()
|
|
168
|
+
if not pipeline.steps:
|
|
169
|
+
context_msg = f" in '{context}'" if context else ""
|
|
170
|
+
raise ValueError(f"PipelineBuilder{context_msg} must have at least one step")
|
|
171
|
+
|
|
172
|
+
# Run the pipeline to populate the results
|
|
173
|
+
pipeline.run()
|
|
174
|
+
|
|
175
|
+
# Get the last step which now has results
|
|
176
|
+
last_step_name = list(pipeline.steps.keys())[-1]
|
|
177
|
+
last_step = pipeline.steps[last_step_name]
|
|
178
|
+
|
|
179
|
+
# Clear predecessors since this step is already executed and has its result
|
|
180
|
+
# This prevents the main pipeline from trying to resolve dependencies
|
|
181
|
+
# that don't exist in its own step dictionary
|
|
182
|
+
last_step._predecessors = set()
|
|
183
|
+
last_step._context_ref = None
|
|
184
|
+
|
|
185
|
+
return last_step
|
|
186
|
+
if isinstance(value, dict):
|
|
187
|
+
# Recursively convert nested dictionaries
|
|
188
|
+
return {k: self._convert_value(v, f"{context}.{k}" if context else k) for k, v in value.items()}
|
|
189
|
+
if isinstance(value, list):
|
|
190
|
+
# Recursively convert lists
|
|
191
|
+
return [
|
|
192
|
+
self._convert_value(item, f"{context}[{i}]" if context else f"[{i}]") for i, item in enumerate(value)
|
|
193
|
+
]
|
|
194
|
+
return value
|
|
195
|
+
|
|
196
|
+
def build(self) -> Pipeline:
|
|
197
|
+
"""Build the pipeline from the configured steps.
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
A Pipeline object ready for execution.
|
|
201
|
+
"""
|
|
202
|
+
return Pipeline(name=self.name, steps=self.steps)
|
|
203
|
+
|
|
204
|
+
def run(self) -> None:
|
|
205
|
+
"""Build and run the pipeline immediately.
|
|
206
|
+
|
|
207
|
+
This is a convenience method equivalent to calling build().run().
|
|
208
|
+
"""
|
|
209
|
+
pipeline = self.build()
|
|
210
|
+
pipeline.run()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cloe-nessy
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 1.0.0
|
|
4
4
|
Summary: Your friendly datalake monster.
|
|
5
5
|
Project-URL: homepage, https://initions.com/
|
|
6
6
|
Author-email: initions <ICSMC_EXT_PYPIORG@accenture.com>
|
|
@@ -16,7 +16,7 @@ Requires-Python: <3.13,>=3.11
|
|
|
16
16
|
Requires-Dist: azure-identity<2.0.0,>=1.19.0
|
|
17
17
|
Requires-Dist: cloe-logging[databricks,log-analytics]<0.4,>=0.3.8
|
|
18
18
|
Requires-Dist: databricks-sdk<1.0.0,>=0.36.0
|
|
19
|
-
Requires-Dist: fsspec<2025.
|
|
19
|
+
Requires-Dist: fsspec<2025.12.1,>=2025.12.0
|
|
20
20
|
Requires-Dist: httpx<1.0.0,>=0.27.2
|
|
21
21
|
Requires-Dist: jinja2<4.0.0,>=3.1.4
|
|
22
22
|
Requires-Dist: matplotlib<4.0.0,>=3.9.2
|
|
@@ -11,13 +11,13 @@ cloe_nessy/clients/api_client/pagination_strategy.py,sha256=YcvAee8CrJiOxEvuFQ4K
|
|
|
11
11
|
cloe_nessy/file_utilities/__init__.py,sha256=nY8H48jYHvTy0VYSRHVhZaFMlzfch4-T7y3N73tgMpI,73
|
|
12
12
|
cloe_nessy/file_utilities/exceptions.py,sha256=RDeV2S6AQnFhFINRo84HDV_hk2RMrf5oNQ7GhHmAZy0,97
|
|
13
13
|
cloe_nessy/file_utilities/factory.py,sha256=JONYGI8MCkNwG2_ujvjN3iB7BIdl7SqXKgV05YY_i4E,1735
|
|
14
|
-
cloe_nessy/file_utilities/get_file_paths.py,sha256=
|
|
14
|
+
cloe_nessy/file_utilities/get_file_paths.py,sha256=Hgfwtat7SWIjmyQG0WCrp5kOW5O0RWtfv3tHmT3igBE,3008
|
|
15
15
|
cloe_nessy/file_utilities/location_types.py,sha256=G0FjpEu4_inmWbu5tvs2FyZv2TIhmPgjWU_Rtvmd6i8,801
|
|
16
16
|
cloe_nessy/file_utilities/strategies/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
|
-
cloe_nessy/file_utilities/strategies/base_strategy.py,sha256=
|
|
18
|
-
cloe_nessy/file_utilities/strategies/local_strategy.py,sha256=
|
|
19
|
-
cloe_nessy/file_utilities/strategies/onelake_strategy.py,sha256=
|
|
20
|
-
cloe_nessy/file_utilities/strategies/utils_strategy.py,sha256=
|
|
17
|
+
cloe_nessy/file_utilities/strategies/base_strategy.py,sha256=HwARDqb59i5HJyF-URbXKNGkOVcXEQn41_xD4W0DrXw,2861
|
|
18
|
+
cloe_nessy/file_utilities/strategies/local_strategy.py,sha256=LxCCggFgH7s1heySy_JtROJCNsSyXkV5kd-VRLIf3ng,2245
|
|
19
|
+
cloe_nessy/file_utilities/strategies/onelake_strategy.py,sha256=Pni_JkKqfbKoEMOCWbBJJdUIhpIFUPTUyIxSCSlPZRM,2009
|
|
20
|
+
cloe_nessy/file_utilities/strategies/utils_strategy.py,sha256=w4nrS6IcPPN7UBFBwszCfxgTI6xSE5BdY2WiqGYsFyI,3223
|
|
21
21
|
cloe_nessy/integration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
22
22
|
cloe_nessy/integration/delta_loader/__init__.py,sha256=ZdBDde1uPtTCL_KAhilVmtVmmGvH5dHb05QsOozkteE,438
|
|
23
23
|
cloe_nessy/integration/delta_loader/delta_load_options.py,sha256=bbPGhC0n8L6CmcmV91Xqq6fWRimxlUHUkr22uVqG0g4,1363
|
|
@@ -39,10 +39,10 @@ cloe_nessy/integration/writer/catalog_writer.py,sha256=dQeXmtfs7J6rP6Ye3OCvxBraF
|
|
|
39
39
|
cloe_nessy/integration/writer/file_writer.py,sha256=SUDbN13ZzDhbM8DpOGFgM_Gkg70To4L6Q182pXx2HRM,5454
|
|
40
40
|
cloe_nessy/integration/writer/writer.py,sha256=elFPLFrWR-qVE9qnBtzzzhyRALLQcRVuOsPS0rNmRt4,1741
|
|
41
41
|
cloe_nessy/integration/writer/delta_writer/__init__.py,sha256=h2CT6Hllmk0nodlek27uqwniCzVZKMkYcPGyG9K2Z24,164
|
|
42
|
-
cloe_nessy/integration/writer/delta_writer/delta_append_writer.py,sha256=
|
|
42
|
+
cloe_nessy/integration/writer/delta_writer/delta_append_writer.py,sha256=nribgHmapp59v3Rw_AfJg0_BRYhP7x2IJIeE74Ia_6A,4748
|
|
43
43
|
cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py,sha256=Yp_q_ycasW2_wwmzty_6fZeBVcW_0o8gLrr6F1gaUjQ,10195
|
|
44
44
|
cloe_nessy/integration/writer/delta_writer/delta_table_operation_type.py,sha256=m4YFY9_WgaOcnpBviVt3Km-w3wf3NF25wPS-n0NBGcE,970
|
|
45
|
-
cloe_nessy/integration/writer/delta_writer/delta_writer_base.py,sha256=
|
|
45
|
+
cloe_nessy/integration/writer/delta_writer/delta_writer_base.py,sha256=O7hw7YOa2FEzBlzjwPfxQTxm0ZrlszIjjfsHTwE_OhU,8609
|
|
46
46
|
cloe_nessy/integration/writer/delta_writer/exceptions.py,sha256=xPmGiYV0xQXauln5Oh34E5vbm0rVcs6xCh-SJSb2bw0,107
|
|
47
47
|
cloe_nessy/logging/__init__.py,sha256=ySVCVbdyR3Dno_tl2ZfiER_7EVaDoQMHVkNyfdMZumY,65
|
|
48
48
|
cloe_nessy/logging/logger_mixin.py,sha256=H8MyMEyb_kEDP0Ow5QStAFLuOkTIeUnneGaj916fKlU,7443
|
|
@@ -65,17 +65,18 @@ cloe_nessy/models/templates/create_volume.sql.j2,sha256=XIUf1cHcvAxcGTyhzUiv4xpQ
|
|
|
65
65
|
cloe_nessy/object_manager/__init__.py,sha256=3sle0vNpPwBOkycxA3XVS9m4XZf5LD3Qd4NGxdqcHno,186
|
|
66
66
|
cloe_nessy/object_manager/table_manager.py,sha256=4eQG-zMiuBpeJmvWdL3KdhHRiPFf8TS0RFNRp8Yz6rY,13887
|
|
67
67
|
cloe_nessy/object_manager/volume_manager.py,sha256=6epd3KXzcNH04EvaKubAfLsaUm9qBMeT3KNvMK04gGs,2727
|
|
68
|
-
cloe_nessy/pipeline/__init__.py,sha256=
|
|
68
|
+
cloe_nessy/pipeline/__init__.py,sha256=BUzL4HJaCXWmK7OgKaxdwK72JrrdzfzIvyxOGtM28U0,417
|
|
69
69
|
cloe_nessy/pipeline/pipeline.py,sha256=L4wk3b06LNWRj01nnAkuQpeRrwFTyaV1xTpgYAg4sak,10819
|
|
70
70
|
cloe_nessy/pipeline/pipeline_action.py,sha256=S7IVFdmG12fRBzHuE_DiWn7qlMtApz6IloVd2Fj31Sg,1944
|
|
71
|
+
cloe_nessy/pipeline/pipeline_builder.py,sha256=_BBl43two0pherkTXZ-Yrpt6XcLW8Q-Z98qxbFIsMao,7929
|
|
71
72
|
cloe_nessy/pipeline/pipeline_config.py,sha256=oVQ-IH4etTGZVVEnE-5iDPLYOtWpvDlltWFv1nevnqQ,3229
|
|
72
73
|
cloe_nessy/pipeline/pipeline_context.py,sha256=eCOcjyE16rGRom3L85Gy_BbncfQD6i1x31yrWqZws-4,1881
|
|
73
74
|
cloe_nessy/pipeline/pipeline_parsing_service.py,sha256=eeC4RbGBILGN6zkbUyjH-qGgEMtOWV4Kv_VxrHbHMY0,9021
|
|
74
75
|
cloe_nessy/pipeline/pipeline_plotting_service.py,sha256=goMQj73FzUVchKn5c2SsPcWR6fr7DtVkVrcQfJsKCq4,13111
|
|
75
76
|
cloe_nessy/pipeline/pipeline_step.py,sha256=oTnlvRpB0fbOBQXbPe1URstA5fv-97igCHt_41fKCAk,2082
|
|
76
|
-
cloe_nessy/pipeline/actions/__init__.py,sha256=
|
|
77
|
+
cloe_nessy/pipeline/actions/__init__.py,sha256=Yf6-EoF_iTXOIOhgMN-GwhqH5DeaogUklVulh9OVj4s,2902
|
|
77
78
|
cloe_nessy/pipeline/actions/read_api.py,sha256=MAc7QfmhnaRUMdE09Ywt41RSAsuW4co8zF0zXHwbM8U,16193
|
|
78
|
-
cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=
|
|
79
|
+
cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=sx3dezd33c1FawMrxORwhK5GNo1IpjCyuLATWz7esZ0,6735
|
|
79
80
|
cloe_nessy/pipeline/actions/read_excel.py,sha256=IG_VmDEt1TvGVEO0SY9Fm3awHNjfisR1_7DUmhC3NEE,7968
|
|
80
81
|
cloe_nessy/pipeline/actions/read_files.py,sha256=hRcM7wG35vxxLVajW3SK5euHW02qxiXCYSkIl11xiQ0,7308
|
|
81
82
|
cloe_nessy/pipeline/actions/read_metadata_yaml.py,sha256=i8fQceV63eAqx_x0ANisCkXWfMHyhqsfFHVFH5yP2po,3544
|
|
@@ -89,16 +90,17 @@ cloe_nessy/pipeline/actions/transform_distinct.py,sha256=c7aBxANyqT4aKhm0cSELDtD
|
|
|
89
90
|
cloe_nessy/pipeline/actions/transform_filter.py,sha256=Nz_ggRfKIcNzYFfFOsgq1QeatjdEis0up4I7cOWBdyo,1446
|
|
90
91
|
cloe_nessy/pipeline/actions/transform_generic_sql.py,sha256=_naWfmPdYAUKjPNeHu5qJAohOL7DHCSYz_kwoeRv3OI,2741
|
|
91
92
|
cloe_nessy/pipeline/actions/transform_group_aggregate.py,sha256=KUHeeP-RIDi34dpbsPEJkzea5zFJA6MuyjNpOsFud9o,4045
|
|
92
|
-
cloe_nessy/pipeline/actions/transform_hash_columns.py,sha256=
|
|
93
|
+
cloe_nessy/pipeline/actions/transform_hash_columns.py,sha256=M5_wolJwzJpPTSrZq4yWV3TH7H6BGqbjJkJCwtqPlQo,8507
|
|
93
94
|
cloe_nessy/pipeline/actions/transform_join.py,sha256=ez1M1wVc9khOZj1swMArJbBKXxEpjenUHrW1wL8H330,7200
|
|
94
95
|
cloe_nessy/pipeline/actions/transform_json_normalize.py,sha256=petF7pnNq1EKc8MqVdG0weFALAHNILSe_eAu4Z5XxIo,4833
|
|
95
96
|
cloe_nessy/pipeline/actions/transform_rename_columns.py,sha256=4zJcPCONMU4C67qeuzsrX3AORRRHoq_selUI7FJyeg0,1952
|
|
96
97
|
cloe_nessy/pipeline/actions/transform_replace_values.py,sha256=1OPHTrjcphfyGepcO7ozYfeqfwA18pjlyHpVKUS_AAU,2049
|
|
97
98
|
cloe_nessy/pipeline/actions/transform_select_columns.py,sha256=-GhSEsb7iNnZIsYRm3BG9BX4_qUDJMbpj1DsKPY046w,4574
|
|
98
99
|
cloe_nessy/pipeline/actions/transform_union.py,sha256=SZtEzh567CIExUj9yMEgshE28h4dXKT7Wr2TDj4zB4k,2718
|
|
100
|
+
cloe_nessy/pipeline/actions/transform_with_column.py,sha256=c-E1yYkeYmovbN1maT7ImpdQlW0nYvYsHCtDvfe4wt8,3357
|
|
99
101
|
cloe_nessy/pipeline/actions/write_catalog_table.py,sha256=FyC0scQU8Ul3Uigpk6IN2IJpf_4jRjAqF5yHtDVwG00,4852
|
|
100
|
-
cloe_nessy/pipeline/actions/write_delta_append.py,sha256=
|
|
101
|
-
cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=
|
|
102
|
+
cloe_nessy/pipeline/actions/write_delta_append.py,sha256=e1g4mDhwAZdKyt4Gb7ZzHcQrJ1duSl8qOn6ONizRsoM,2934
|
|
103
|
+
cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=fwinlTeZoDuTyrbln5vMu1UJ1LG8ZQrus3LoCVF__I4,5819
|
|
102
104
|
cloe_nessy/pipeline/actions/write_file.py,sha256=JZ8UZslxUn_ttYt5wDyvtHFq2FqYk3vOR8kvExJI8pk,3212
|
|
103
105
|
cloe_nessy/pipeline/utils/__init__.py,sha256=xi02UjBMiXWD7b9gDvww4gyRyowb0eRd_6Wbu0F_cro,118
|
|
104
106
|
cloe_nessy/pipeline/utils/delta_load_utils.py,sha256=KitMNruxePEkecI0h4Jint1JwJpaEog5mCOchMkgan8,1495
|
|
@@ -110,6 +112,6 @@ cloe_nessy/settings/settings.py,sha256=I4n129lrujriW-d8q4as2Kb4_kI932ModfZ5Ow_Up
|
|
|
110
112
|
cloe_nessy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
111
113
|
cloe_nessy/utils/column_names.py,sha256=dCNtm61mc5aLkY2oE4rlfN3VLCrpot6fOESjAZmCmhA,361
|
|
112
114
|
cloe_nessy/utils/file_and_directory_handler.py,sha256=r2EVt9xG81p6ScaJCwETC5an6pMT6WseB0jMOR-JlpU,602
|
|
113
|
-
cloe_nessy-0.
|
|
114
|
-
cloe_nessy-0.
|
|
115
|
-
cloe_nessy-0.
|
|
115
|
+
cloe_nessy-1.0.0.dist-info/METADATA,sha256=jaztxDdomowmj0qTJ4i5jtF0dE_T5TmqA8j9zywl0U0,3291
|
|
116
|
+
cloe_nessy-1.0.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
117
|
+
cloe_nessy-1.0.0.dist-info/RECORD,,
|
|
File without changes
|