cloe-nessy 1.0.1__py3-none-any.whl → 1.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,7 @@ from pydantic import BaseModel
8
8
  class DeltaLoadOptions(BaseModel):
9
9
  """Options to configure the DeltaLoader.
10
10
 
11
- Args:
11
+ Attributes:
12
12
  strategy: Delta load strategy to use.
13
13
  delta_load_identifier: Unique delta load identifier used to track the delta load metadata.
14
14
  strategy_options: Options used to configure the chosen delta load strategy.
@@ -11,7 +11,7 @@ from ..delta_loader import DeltaLoader
11
11
  class DeltaCDFConfig(BaseModel):
12
12
  """This class holds the config for the DeltaCDFLoader.
13
13
 
14
- Args:
14
+ Attributes:
15
15
  deduplication_columns: A list of columns used for deduplication.
16
16
  from_commit_version: The starting commit version. If None, it starts from the first viable version.
17
17
  to_commit_version: The ending commit version. If None, it goes up to the latest version.
@@ -13,7 +13,7 @@ from ..delta_loader import DeltaLoader
13
13
  class DeltaTimestampConfig(BaseModel):
14
14
  """This class holds the config for the DeltaTimestampLoader.
15
15
 
16
- Args:
16
+ Attributes:
17
17
  timestamp_filter_cols: A list of columns used for timestamp filtering.
18
18
  from_timestamp: The starting timestamp. If None, it starts from the beginning.
19
19
  to_timestamp: The ending timestamp. If None, it goes up to the latest timestamp.
@@ -343,8 +343,9 @@ class APIReader(BaseReader):
343
343
  pagination_config: Configuration for pagination.
344
344
  max_retries: The maximum number of retries for the request.
345
345
  backoff_factor: Factor for exponential backoff between retries.
346
- options: Additional options for the createDataFrame function.
347
- dynamic_requests: .
346
+ dynamic_requests: A list of RequestSet dictionaries for making multiple API requests dynamically.
347
+ Each RequestSet should contain 'endpoint', 'params', and optionally 'headers', 'data', 'json_body'.
348
+ When provided, the reader will execute all requests and combine the results.
348
349
 
349
350
  Returns:
350
351
  DataFrame: The Spark DataFrame containing the read data in the json_object column.
@@ -457,6 +458,8 @@ class APIReader(BaseReader):
457
458
  backoff_factor=backoff_factor,
458
459
  )
459
460
  else:
461
+ if not pagination_config:
462
+ raise ValueError("pagination_config must be provided for paginated requests.")
460
463
  response_data = APIReader._read_from_api_with_pagination(
461
464
  api_client=api_client,
462
465
  endpoint=endpoint,
@@ -92,7 +92,6 @@ class ExcelDataFrameReader(BaseReader):
92
92
  pyspark.pandas.read_excel and handed to TextFileReader.
93
93
  load_as_strings: If True, converts all columns to string type to avoid datatype conversion errors in Spark.
94
94
  add_metadata_column: If True, adds a metadata column containing the file location and sheet name.
95
- **kwargs: Additional keyword arguments to maintain compatibility with the base class method.
96
95
  """
97
96
  if options is None:
98
97
  options = {}
@@ -152,7 +152,7 @@ class FileReader(BaseReader):
152
152
  """Reads specified location as a stream and returns streaming DataFrame.
153
153
 
154
154
  Arguments:
155
- location : Location of files to read.
155
+ location: Location of files to read.
156
156
  format: Format of files to read.
157
157
  schema: Schema of the file.
158
158
  add_metadata_column: Whether to include __metadata column in the DataFrame.
@@ -13,7 +13,7 @@ from .delta_writer_base import BaseDeltaWriter
13
13
  class DeltaMergeConfig(BaseModel):
14
14
  """Configuration for Merge options.
15
15
 
16
- Args:
16
+ Attributes:
17
17
  dataframe_columns: The columns of the DataFrame.
18
18
  key_columns: List of column names that form the key for the merge
19
19
  operation.
@@ -29,6 +29,9 @@ class DeltaMergeConfig(BaseModel):
29
29
  use_partition_pruning: Flag to specify whether to use partition
30
30
  pruning to optimize the performance of the merge operation.
31
31
  partition_by: List of column names to partition by.
32
+ column_mapping: Mapping from target column names to source column names.
33
+ If a column is not in the mapping, it's assumed to have the same name
34
+ in both source and target.
32
35
  """
33
36
 
34
37
  dataframe_columns: list[str]
@@ -39,6 +42,7 @@ class DeltaMergeConfig(BaseModel):
39
42
  when_not_matched_insert: bool = True
40
43
  use_partition_pruning: bool = True
41
44
  partition_by: list[str] = Field(default_factory=list)
45
+ column_mapping: dict[str, str] = Field(default_factory=dict)
42
46
  cols_to_merge: list[str] = Field(default_factory=list, alias="_cols_to_merge")
43
47
  cols_to_update: set[str] = Field(default_factory=set, alias="_cols_to_update")
44
48
  cols_to_insert: set[str] = Field(default_factory=set, alias="_cols_to_insert")
@@ -58,11 +62,20 @@ class DeltaMergeConfig(BaseModel):
58
62
  @model_validator(mode="before")
59
63
  @classmethod
60
64
  def _validate_key_columns(cls, config: Any):
61
- """Key columns must exist in the data frame."""
65
+ """Key columns must exist in the data frame (considering column mapping)."""
62
66
  key_columns = config.get("key_columns")
63
67
  dataframe_columns = config.get("dataframe_columns")
64
- if not set(key_columns).issubset(set(dataframe_columns)):
65
- raise ValueError("Key columns must exist in the DataFrame columns.")
68
+ column_mapping = config.get("column_mapping", {})
69
+
70
+ # For each key column (target name), find the corresponding source column
71
+ missing_columns = []
72
+ for key_col in key_columns:
73
+ source_col = column_mapping.get(key_col, key_col)
74
+ if source_col not in dataframe_columns:
75
+ missing_columns.append(f"{key_col} (maps to {source_col})" if key_col != source_col else key_col)
76
+
77
+ if missing_columns:
78
+ raise ValueError(f"Key columns must exist in the DataFrame. Missing columns: {', '.join(missing_columns)}")
66
79
  return config
67
80
 
68
81
  @model_validator(mode="before")
@@ -70,37 +83,57 @@ class DeltaMergeConfig(BaseModel):
70
83
  def _derive_merge_columns(cls, config: Any):
71
84
  """Derive update and insert columns from the DataFrame columns."""
72
85
  dataframe_columns = config.get("dataframe_columns", [])
73
- config["_cols_to_merge"] = list(set(dataframe_columns))
86
+ column_mapping = config.get("column_mapping", {})
87
+
88
+ # Build reverse mapping: source_col -> target_col
89
+ reverse_mapping = {v: k for k, v in column_mapping.items()}
90
+
91
+ # Determine which target columns we're working with
92
+ # For each dataframe column, find its corresponding target column
93
+ target_columns = []
94
+ for df_col in dataframe_columns:
95
+ target_col = reverse_mapping.get(df_col, df_col)
96
+ target_columns.append(target_col)
97
+
98
+ config["_cols_to_merge"] = list(set(target_columns))
99
+
74
100
  if config.get("cols_to_exclude_from_update"):
75
101
  config["_cols_to_update"] = set(config["_cols_to_merge"]) - set(config["cols_to_exclude_from_update"])
76
102
  else:
77
103
  config["_cols_to_update"] = set(config["_cols_to_merge"])
78
104
 
79
105
  config["_cols_to_insert"] = config["_cols_to_merge"]
80
- config["final_cols_to_update"] = {col: f"source.{col}" for col in config["_cols_to_update"]}
81
- config["final_cols_to_insert"] = {col: f"source.{col}" for col in config["_cols_to_insert"]}
106
+
107
+ # Build final mappings using column_mapping (target -> source)
108
+ # For each target column, find the corresponding source column
109
+ config["final_cols_to_update"] = {
110
+ target_col: f"source.`{column_mapping.get(target_col, target_col)}`"
111
+ for target_col in config["_cols_to_update"]
112
+ }
113
+ config["final_cols_to_insert"] = {
114
+ target_col: f"source.`{column_mapping.get(target_col, target_col)}`"
115
+ for target_col in config["_cols_to_insert"]
116
+ }
82
117
  return config
83
118
 
84
119
  @model_validator(mode="after")
85
- @classmethod
86
- def _validate_partition_pruning(cls, config: Self):
120
+ def _validate_partition_pruning(self) -> Self:
87
121
  """If partition_pruning is set, the partition by columns must be known."""
88
- if config.use_partition_pruning is True and not config.partition_by:
122
+ if self.use_partition_pruning is True and not self.partition_by:
89
123
  raise ValueError("Partition columns must be specified when using partition pruning.")
90
- return config
124
+ return self
91
125
 
92
126
  @model_validator(mode="after")
93
- @classmethod
94
- def _validate_cols_exist(cls, config: Any):
127
+ def _validate_cols_exist(self) -> Self:
95
128
  """If partition_pruning is set, the partition by columns must be known."""
96
- if any(col not in config.cols_to_merge for col in config.cols_to_update) or any(
97
- col not in config.cols_to_merge for col in config.cols_to_insert
129
+ if any(col not in self.cols_to_merge for col in self.cols_to_update) or any(
130
+ col not in self.cols_to_merge for col in self.cols_to_insert
98
131
  ):
99
132
  raise ValueError(
100
133
  "You specified column names for UPDATE or INSERT that either don't exist in the dataframe "
101
134
  "or are explicitly excluded from the MERGE.",
102
135
  )
103
- return config
136
+ return self
104
137
 
105
138
 
106
139
  class DeltaMergeWriter(BaseDeltaWriter):
@@ -127,7 +160,7 @@ class DeltaMergeWriter(BaseDeltaWriter):
127
160
 
128
161
  def _build_match_conditions(self, data_frame: DataFrame, config: DeltaMergeConfig) -> str:
129
162
  """Builds match conditions for the Delta table merge."""
130
- match_conditions = self._merge_match_conditions(config.key_columns)
163
+ match_conditions = self._merge_match_conditions_with_mapping(config.key_columns, config.column_mapping)
131
164
  if config.use_partition_pruning:
132
165
  match_conditions_list = [match_conditions] + [
133
166
  self._partition_pruning_conditions(data_frame, config.partition_by),
@@ -169,6 +202,11 @@ class DeltaMergeWriter(BaseDeltaWriter):
169
202
  function also supports partition pruning to optimize the performance of
170
203
  the merge operation.
171
204
 
205
+ When source and target tables have different column names, use the
206
+ `column_mapping` parameter to map target column names to source column names.
207
+ For any columns not in the mapping, the same name is assumed for both source
208
+ and target.
209
+
172
210
  Args:
173
211
  table: The Table object representing the Delta table.
174
212
  table_identifier: The identifier of the Delta table in the format
@@ -178,7 +216,17 @@ class DeltaMergeWriter(BaseDeltaWriter):
178
216
  ignore_empty_df: A flag indicating whether to ignore an empty source
179
217
  dataframe.
180
218
  kwargs: Passed to the
181
- [`DeltaMergeConfig`][cloe_nessy.integration.writer.delta_merge_writer.DeltaMergeConfig].
219
+ [`DeltaMergeConfig`][cloe_nessy.integration.writer.delta_writer.delta_merge_writer.DeltaMergeConfig].
220
+ Common kwargs include:
221
+
222
+ - key_columns: List of target column names to use as merge keys.
223
+ - column_mapping: Dict mapping target column names to source column names.
224
+ - when_matched_update: Whether to update matching records.
225
+ - when_matched_delete: Whether to delete matching records.
226
+ - when_not_matched_insert: Whether to insert non-matching records.
227
+ - cols_to_exclude_from_update: Target columns to exclude from updates.
228
+ - use_partition_pruning: Whether to use partition pruning.
229
+ - partition_by: List of partition columns.
182
230
 
183
231
  Raises:
184
232
  ValueError: If both, table and table_identifier or storage_path are provided.
@@ -189,6 +237,21 @@ class DeltaMergeWriter(BaseDeltaWriter):
189
237
  merge operation.
190
238
  ValueError: If partition columns are not specified when using
191
239
  partition pruning.
240
+
241
+ Example:
242
+ ```python
243
+ # Merge with different column names
244
+ writer.write(
245
+ data_frame=source_df,
246
+ table=target_table,
247
+ key_columns=["customer_id"],
248
+ column_mapping={
249
+ "customer_id": "cust_id",
250
+ "full_name": "name",
251
+ "email_address": "email"
252
+ }
253
+ )
254
+ ```
192
255
  """
193
256
  if self._empty_dataframe_check(data_frame, ignore_empty_df):
194
257
  return
@@ -150,6 +150,42 @@ class BaseDeltaWriter(BaseWriter, ABC):
150
150
  """
151
151
  return " AND ".join([f"target.`{c}` <=> source.`{c}`" for c in columns])
152
152
 
153
+ @staticmethod
154
+ def _merge_match_conditions_with_mapping(
155
+ key_columns: list[str], column_mapping: dict[str, str] | None = None
156
+ ) -> str:
157
+ """Merges match conditions with support for column name mapping.
158
+
159
+ This function generates SQL match conditions for merging tables where source and target
160
+ columns may have different names.
161
+
162
+ Args:
163
+ key_columns: A list of target column names to use as keys for the merge operation.
164
+ column_mapping: A dictionary mapping target column names to source column names.
165
+ If None or empty, assumes source and target columns have the same names.
166
+
167
+ Returns:
168
+ A string containing the match conditions, separated by " AND "
169
+
170
+ Example:
171
+ ```python
172
+ # Without mapping (same column names):
173
+ _merge_match_conditions_with_mapping(["id", "customer_id"])
174
+ # "target.`id` <=> source.`id` AND target.`customer_id` <=> source.`customer_id`"
175
+
176
+ # With mapping (different column names):
177
+ _merge_match_conditions_with_mapping(
178
+ ["id", "customer_id"],
179
+ {"customer_id": "cust_id"}
180
+ )
181
+ # "target.`id` <=> source.`id` AND target.`customer_id` <=> source.`cust_id`"
182
+ ```
183
+ """
184
+ mapping = column_mapping or {}
185
+ return " AND ".join(
186
+ [f"target.`{target_col}` <=> source.`{mapping.get(target_col, target_col)}`" for target_col in key_columns]
187
+ )
188
+
153
189
  @staticmethod
154
190
  def _partition_pruning_conditions(df: "DataFrame", partition_cols: list[str] | None) -> str:
155
191
  """Generates partition pruning conditions for an SQL query.
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import cast
2
+ from typing import Any, cast
3
3
 
4
4
  from cloe_logging import LoggerFactory
5
5
 
@@ -35,7 +35,7 @@ class LoggerMixin:
35
35
  logging_level=level if level is not None else logging_settings.log_level_console,
36
36
  log_format=log_format if log_format is not None else logging_settings.log_format_console,
37
37
  )
38
- return cast(logging.Logger, logger)
38
+ return logger
39
39
 
40
40
  def get_tabular_logger(
41
41
  self,
@@ -110,13 +110,13 @@ class LoggerMixin:
110
110
  log_type=log_type,
111
111
  test_connectivity=False,
112
112
  )
113
- return cast(logging.Logger, logger)
113
+ return logger
114
114
 
115
115
  @staticmethod
116
116
  def should_add_log_analytics_handler(
117
117
  logging_settings: LoggingSettings,
118
118
  add_log_analytics_logger: bool | None,
119
- **kwargs, # noqa: ARG004
119
+ **kwargs: Any, # noqa: ARG004
120
120
  ) -> bool:
121
121
  """Determines if a LogAnalyticsHandler should be added to the logger.
122
122
 
@@ -60,7 +60,7 @@ class ReadInstancesMixin(BaseModel):
60
60
  @classmethod
61
61
  def read_instances_from_directory(
62
62
  cls,
63
- instance_path: pathlib.Path,
63
+ instance_path: str | pathlib.Path,
64
64
  fail_on_missing_subfolder: bool = True,
65
65
  **_: Any, # allow subclasses to pass additional arguments
66
66
  ) -> tuple[list[Self], list[ValidationErrorType]]:
@@ -77,6 +77,10 @@ class ReadInstancesMixin(BaseModel):
77
77
  instances: list[Self] = []
78
78
  errors: list[ValidationErrorType] = []
79
79
 
80
+ # Convert to Path if string
81
+ if isinstance(instance_path, str):
82
+ instance_path = pathlib.Path(instance_path)
83
+
80
84
  if not instance_path.exists() or not instance_path.is_dir():
81
85
  if fail_on_missing_subfolder:
82
86
  raise FileNotFoundError(f"Directory not found: {instance_path}")
@@ -1,5 +1,5 @@
1
1
  from pathlib import Path
2
- from typing import Any, Self
2
+ from typing import Any, Self, cast
3
3
 
4
4
  from pydantic import Field
5
5
 
@@ -43,6 +43,7 @@ class Schema(ReadInstancesMixin):
43
43
  raise FileNotFoundError("Schema file not found.")
44
44
 
45
45
  schema, schema_errors = super().read_instance_from_file(processed_instance_path)
46
+ schema = cast(Self | None, schema)
46
47
  table_errors: list[ValidationErrorType] = []
47
48
  if schema:
48
49
  schema.storage_path = "" if not schema.storage_path else schema.storage_path
@@ -1,5 +1,6 @@
1
+ import pathlib
1
2
  from pathlib import Path
2
- from typing import Any, Self
3
+ from typing import Any, Self, cast
3
4
 
4
5
  import yaml
5
6
  from jinja2 import TemplateNotFound
@@ -96,16 +97,16 @@ class Table(TemplateLoaderMixin, ReadInstancesMixin, LoggerMixin):
96
97
  return v
97
98
 
98
99
  @model_validator(mode="after")
99
- def _validate_is_external(cls, table: Self):
100
+ def _validate_is_external(self) -> Self:
100
101
  """If is_external is set to True, storage_path has to be set."""
101
- if table.is_external and table.storage_path is None:
102
+ if self.is_external and self.storage_path is None:
102
103
  raise ValueError("is_external cannot be true while storage_path is None.")
103
- return table
104
+ return self
104
105
 
105
106
  @classmethod
106
- def read_instances_from_directory(
107
+ def read_instances_from_directory( # type: ignore[override]
107
108
  cls,
108
- instance_path: str | Path,
109
+ instance_path: str | pathlib.Path,
109
110
  fail_on_missing_subfolder: bool = True,
110
111
  catalog_name: str | None = None,
111
112
  schema_name: str | None = None,
@@ -236,7 +237,7 @@ class Table(TemplateLoaderMixin, ReadInstancesMixin, LoggerMixin):
236
237
  def get_create_statement(
237
238
  self,
238
239
  replace: bool = True,
239
- ):
240
+ ) -> str:
240
241
  """Get the create statement for the Table.
241
242
 
242
243
  Args:
@@ -257,7 +258,7 @@ class Table(TemplateLoaderMixin, ReadInstancesMixin, LoggerMixin):
257
258
  self._console_logger.error(f"Template [ {template_name} ] not found.")
258
259
  raise err
259
260
  render = template.render(table=self, replace=replace)
260
- return render
261
+ return cast(str, render)
261
262
 
262
263
  def get_column_by_name(self, column_name: str) -> Column | None:
263
264
  """Get a column by name.
@@ -1,5 +1,5 @@
1
1
  from pathlib import Path
2
- from typing import Any
2
+ from typing import Any, cast
3
3
 
4
4
  from jinja2 import TemplateNotFound
5
5
  from pydantic import BaseModel, field_validator
@@ -25,7 +25,7 @@ class Volume(TemplateLoaderMixin, LoggerMixin, BaseModel):
25
25
  @property
26
26
  def storage_identifier(self) -> str:
27
27
  """Return the storage identifier."""
28
- return f"/Volumes/{self.catalog}/{self.schema}/{self.name}/"
28
+ return f"/Volumes/{self.catalog}/{self.schema_name}/{self.name}/"
29
29
 
30
30
  @property
31
31
  def catalog(self) -> str:
@@ -55,7 +55,7 @@ class Volume(TemplateLoaderMixin, LoggerMixin, BaseModel):
55
55
  def get_create_statement(
56
56
  self,
57
57
  if_not_exists: bool = True,
58
- ):
58
+ ) -> str:
59
59
  """Get the create statement for the Volume.
60
60
 
61
61
  Args:
@@ -73,4 +73,4 @@ class Volume(TemplateLoaderMixin, LoggerMixin, BaseModel):
73
73
  self._console_logger.error(f"Template [ {template_name} ] not found.")
74
74
  raise err
75
75
  render = template.render(volume=self, if_not_exists=if_not_exists)
76
- return render
76
+ return cast(str, render)
@@ -1,6 +1,7 @@
1
1
  import functools
2
2
  import logging
3
3
  from dataclasses import dataclass, field
4
+ from typing import Any
4
5
 
5
6
  from delta import DeltaTable # type: ignore
6
7
 
@@ -23,7 +24,7 @@ class TableManagerLogs:
23
24
  )
24
25
 
25
26
 
26
- def table_log_decorator(operation: str):
27
+ def table_log_decorator(operation: str) -> Any:
27
28
  """Creates a decorator that logs the start, failure (if any), and completion of a table operation.
28
29
 
29
30
  The created decorator wraps a function that performs an operation on a table. The decorator logs
@@ -35,7 +36,7 @@ def table_log_decorator(operation: str):
35
36
  operation: The name of the operation to be logged. This will be included in the log messages.
36
37
 
37
38
  Returns:
38
- inner_decorator: A decorator that can be used to wrap a function that performs an operation on a table.
39
+ A decorator that can be used to wrap a function that performs an operation on a table.
39
40
 
40
41
  Example:
41
42
  ```python
@@ -183,7 +184,7 @@ class TableManager(LoggerMixin):
183
184
  SessionManager.get_utils().fs.rm(location, recurse=True)
184
185
  self._console_logger.info("... deleting physical data.")
185
186
 
186
- def get_delta_table(self, table: Table | None = None, location: str | None = None, spark=None) -> DeltaTable:
187
+ def get_delta_table(self, table: Table | None = None, location: str | None = None, spark: Any = None) -> DeltaTable:
187
188
  """Get the DeltaTable object from the Table objects location or a location string.
188
189
 
189
190
  For managed tables, uses the table identifier to access the DeltaTable.
@@ -18,10 +18,10 @@ def process_auth(
18
18
  result: AuthBase | None = None
19
19
 
20
20
  if isinstance(auth, list):
21
- auths = [process_auth(sub_auth) for sub_auth in auth]
21
+ auths = [process_auth(sub_auth) for sub_auth in auth] # type: ignore[arg-type]
22
22
  result = ChainedAuth(*auths)
23
23
  elif isinstance(auth, dict):
24
- match auth.get("type"):
24
+ match auth.get("type"): # type: ignore[arg-type]
25
25
  case "basic":
26
26
  result = HTTPBasicAuth(auth["username"], auth["password"])
27
27
  case "secret_scope":
@@ -65,8 +65,8 @@ class ReadCatalogTableAction(PipelineAction):
65
65
 
66
66
  name: str = "READ_CATALOG_TABLE"
67
67
 
68
- @staticmethod
69
68
  def run(
69
+ self,
70
70
  context: PipelineContext,
71
71
  *,
72
72
  table_identifier: str | None = None,
@@ -90,12 +90,6 @@ class ReadCatalogTableAction(PipelineAction):
90
90
  Configures the [`DeltaLoader`][cloe_nessy.integration.delta_loader].
91
91
  behavior, such as filters or reading modes.
92
92
  stream: If True, the action will read the table as a stream.
93
- checkpoint_location: The location for storing
94
- checkpoints if streaming is enabled.
95
- trigger_dict: A dictionary specifying the trigger
96
- configuration for the streaming query, such as processing time or
97
- continuous processing.
98
- behavior, such as filters or reading modes. Defaults to None.
99
93
 
100
94
  Raises:
101
95
  ValueError: If neither `table_identifier` nor `table_metadata.identifier` in the `context` is provided.
@@ -79,8 +79,8 @@ class ReadFilesAction(PipelineAction):
79
79
 
80
80
  name: str = "READ_FILES"
81
81
 
82
- @staticmethod
83
82
  def run(
83
+ self,
84
84
  context: PipelineContext,
85
85
  *,
86
86
  location: str | None = None,
@@ -71,7 +71,7 @@ class TransformDeduplication(PipelineAction):
71
71
  raise ValueError("The key_columns and order_by_columns cannot contain the same column")
72
72
 
73
73
  # check if the key_columns and order_by_columns are not null
74
- df_nulls = context.data.filter(F.greatest(*[F.col(c).isNull() for c in key_columns + order_by_columns]) == 1)
74
+ df_nulls = context.data.filter(F.greatest(*[F.col(c).isNull() for c in key_columns + order_by_columns]) == 1) # type: ignore[misc]
75
75
  if df_nulls.head(1): # if the filteredDataFrame is not empty
76
76
  raise ValueError(
77
77
  "The key_columns and order_by_columns cannot be null. Please check the quality of the provided columns (null handling)"
@@ -100,9 +100,9 @@ class TransformDeduplication(PipelineAction):
100
100
 
101
101
  # sort the order_by columns in the preferred order
102
102
  if descending:
103
- order_by_list = [F.col(col_name).desc() for col_name in order_by_columns]
103
+ order_by_list = [F.col(col_name).desc() for col_name in order_by_columns] # type: ignore[misc]
104
104
  else:
105
- order_by_list = [F.col(col_name).asc() for col_name in order_by_columns]
105
+ order_by_list = [F.col(col_name).asc() for col_name in order_by_columns] # type: ignore[misc]
106
106
 
107
107
  window_specification = (
108
108
  Window.partitionBy(key_columns)
@@ -148,7 +148,7 @@ class TransformRegexExtract(PipelineAction):
148
148
  # Check if extraction is null or empty string
149
149
  df = df.withColumn(
150
150
  match_info_column_name,
151
- F.when((F.col(extract_column_name).isNull()) | (F.col(extract_column_name) == ""), False).otherwise(
151
+ F.when((F.col(extract_column_name).isNull()) | (F.col(extract_column_name) == ""), False).otherwise( # type: ignore[misc]
152
152
  True
153
153
  ),
154
154
  )
@@ -39,8 +39,8 @@ class WriteCatalogTableAction(PipelineAction):
39
39
 
40
40
  name: str = "WRITE_CATALOG_TABLE"
41
41
 
42
- @staticmethod
43
42
  def run(
43
+ self,
44
44
  context: PipelineContext,
45
45
  *,
46
46
  table_identifier: str | None = None,
@@ -13,6 +13,7 @@ class WriteDeltaMergeAction(PipelineAction):
13
13
 
14
14
  Example:
15
15
  ```yaml
16
+ # Basic merge with same column names
16
17
  Write Delta Merge:
17
18
  action: WRITE_DELTA_MERGE
18
19
  options:
@@ -20,13 +21,25 @@ class WriteDeltaMergeAction(PipelineAction):
20
21
  key_columns:
21
22
  - id
22
23
  - customer_id
23
- cols_to_update:
24
- - name
25
- - email
26
- - updated_at
24
+ cols_to_exclude_from_update:
25
+ - created_at
27
26
  when_matched_update: true
28
27
  when_not_matched_insert: true
29
28
  use_partition_pruning: true
29
+
30
+ # Merge with different source and target column names
31
+ Write Delta Merge with Mapping:
32
+ action: WRITE_DELTA_MERGE
33
+ options:
34
+ table_identifier: my_catalog.my_schema.my_table
35
+ key_columns:
36
+ - customer_id
37
+ column_mapping:
38
+ customer_id: cust_id
39
+ full_name: name
40
+ email_address: email
41
+ when_matched_update: true
42
+ when_not_matched_insert: true
30
43
  ```
31
44
  """
32
45
 
@@ -38,11 +51,10 @@ class WriteDeltaMergeAction(PipelineAction):
38
51
  *,
39
52
  table_identifier: str | None = None,
40
53
  key_columns: list[str] | None = None,
41
- cols_to_update: list[str] | None = None,
42
- cols_to_insert: list[str] | None = None,
43
- cols_to_exclude: list[str] | None = None,
54
+ cols_to_exclude_from_update: list[str] | None = None,
55
+ column_mapping: dict[str, str] | None = None,
44
56
  when_matched_update: bool = True,
45
- when_matched_deleted: bool = False,
57
+ when_matched_delete: bool = False,
46
58
  when_not_matched_insert: bool = True,
47
59
  use_partition_pruning: bool = True,
48
60
  ignore_empty_df: bool = False,
@@ -57,23 +69,23 @@ class WriteDeltaMergeAction(PipelineAction):
57
69
  table_identifier: The identifier of the table. If passed, the
58
70
  UC Adapter will be used to create a table object. Otherwise the Table
59
71
  object will be created from the table metadata in the context.
60
- key_columns: List of column names that form the
72
+ key_columns: List of target column names that form the
61
73
  key for the merge operation.
74
+ cols_to_exclude_from_update: List of target column names to be
75
+ excluded from the update operation in the target Delta table.
76
+ column_mapping: Mapping from target column names to source column names.
77
+ Use this when source and target tables have different column names.
78
+ If a column is not in the mapping, it's assumed to have the same name
79
+ in both source and target.
62
80
  when_matched_update: Flag to specify whether to
63
- perform an update operation whenmatching records are found in
81
+ perform an update operation when matching records are found in
64
82
  the target Delta table.
65
- when_matched_deleted: Flag to specify whether to
83
+ when_matched_delete: Flag to specify whether to
66
84
  perform a delete operation when matching records are found in
67
85
  the target Delta table.
68
86
  when_not_matched_insert: Flag to specify whether to perform an
69
87
  insert operation when matching records are not found in the target
70
88
  Delta table.
71
- cols_to_update: List of column names to be
72
- updated in the target Delta table.
73
- cols_to_insert: List of column names to be
74
- inserted into the target Delta table.
75
- cols_to_exclude: List of column names to be
76
- excluded from the merge operation.
77
89
  use_partition_pruning: Flag to specify whether to use partition
78
90
  pruning to optimize the performance of the merge operation.
79
91
  ignore_empty_df: A flag indicating whether to ignore an empty source dataframe.
@@ -113,16 +125,15 @@ class WriteDeltaMergeAction(PipelineAction):
113
125
  assert key_columns is not None, "Key columns must be provided."
114
126
 
115
127
  delta_merge_writer.write(
116
- table_identifier=context.table_metadata.identifier,
128
+ data_frame=context.data,
117
129
  table=context.table_metadata,
130
+ table_identifier=context.table_metadata.identifier,
118
131
  storage_path=str(context.table_metadata.storage_path),
119
- data_frame=context.data,
120
132
  key_columns=key_columns,
121
- cols_to_update=cols_to_update,
122
- cols_to_insert=cols_to_insert,
123
- cols_to_exclude=cols_to_exclude,
133
+ cols_to_exclude_from_update=cols_to_exclude_from_update or [],
134
+ column_mapping=column_mapping or {},
124
135
  when_matched_update=when_matched_update,
125
- when_matched_deleted=when_matched_deleted,
136
+ when_matched_delete=when_matched_delete,
126
137
  when_not_matched_insert=when_not_matched_insert,
127
138
  use_partition_pruning=use_partition_pruning,
128
139
  partition_by=context.table_metadata.partition_by,
@@ -1,6 +1,7 @@
1
1
  """Pipeline plotting service for visualizing pipeline graphs using matplotlib."""
2
2
 
3
3
  import textwrap
4
+ from typing import Any
4
5
 
5
6
  import matplotlib.patches as patches
6
7
  import matplotlib.pyplot as plt
@@ -25,7 +26,7 @@ class PipelinePlottingService(LoggerMixin):
25
26
  }
26
27
  self._console_logger = self.get_console_logger()
27
28
 
28
- def plot_graph(self, pipeline, save_path: str | None = None):
29
+ def plot_graph(self, pipeline: Any, save_path: str | None = None):
29
30
  """Plot and save the pipeline graph as an image.
30
31
 
31
32
  Args:
@@ -234,22 +234,19 @@ class SessionManager(LoggerMixin):
234
234
  """Get the SparkSession builder based on the current environment."""
235
235
  if cls._env is None:
236
236
  cls._detect_env()
237
- builders = {
238
- cls.Environment.DATABRICKS_UI: SparkSession.builder,
239
- cls.Environment.FABRIC_UI: SparkSession.builder,
240
- cls.Environment.DATABRICKS_CONNECT: cls._get_databricks_connect_builder,
241
- cls.Environment.OTHER_REMOTE_SPARK: SparkSession.builder,
242
- cls.Environment.STANDALONE_SPARK: SparkSession.builder,
243
- }
244
- builder = builders.get(cls._env)
245
- if builder is None:
246
- raise ValueError(f"Unsupported environment: {cls._env}")
247
237
 
248
238
  match cls._env:
249
239
  case cls.Environment.DATABRICKS_CONNECT:
250
- return builder()
240
+ return cls._get_databricks_connect_builder()
241
+ case (
242
+ cls.Environment.DATABRICKS_UI
243
+ | cls.Environment.FABRIC_UI
244
+ | cls.Environment.OTHER_REMOTE_SPARK
245
+ | cls.Environment.STANDALONE_SPARK
246
+ ):
247
+ return SparkSession.builder
251
248
  case _:
252
- return builder
249
+ raise ValueError(f"Unsupported environment: {cls._env}")
253
250
 
254
251
  @staticmethod
255
252
  def _get_databricks_connect_builder():
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cloe-nessy
3
- Version: 1.0.1
3
+ Version: 1.0.5
4
4
  Summary: Your friendly datalake monster.
5
5
  Project-URL: homepage, https://initions.com/
6
6
  Author-email: initions <ICSMC_EXT_PYPIORG@accenture.com>
@@ -16,7 +16,7 @@ Requires-Python: <3.14,>=3.11
16
16
  Requires-Dist: azure-identity<2.0.0,>=1.19.0
17
17
  Requires-Dist: cloe-logging[databricks,log-analytics]<0.4,>=0.3.8
18
18
  Requires-Dist: databricks-sdk<1.0.0,>=0.36.0
19
- Requires-Dist: fsspec<2025.12.1,>=2025.12.0
19
+ Requires-Dist: fsspec<2026.1.1,>=2026.1.0
20
20
  Requires-Dist: httpx<1.0.0,>=0.27.2
21
21
  Requires-Dist: jinja2<4.0.0,>=3.1.4
22
22
  Requires-Dist: matplotlib<4.0.0,>=3.9.2
@@ -20,19 +20,19 @@ cloe_nessy/file_utilities/strategies/onelake_strategy.py,sha256=Pni_JkKqfbKoEMOC
20
20
  cloe_nessy/file_utilities/strategies/utils_strategy.py,sha256=w4nrS6IcPPN7UBFBwszCfxgTI6xSE5BdY2WiqGYsFyI,3223
21
21
  cloe_nessy/integration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
22
  cloe_nessy/integration/delta_loader/__init__.py,sha256=ZdBDde1uPtTCL_KAhilVmtVmmGvH5dHb05QsOozkteE,438
23
- cloe_nessy/integration/delta_loader/delta_load_options.py,sha256=bbPGhC0n8L6CmcmV91Xqq6fWRimxlUHUkr22uVqG0g4,1363
23
+ cloe_nessy/integration/delta_loader/delta_load_options.py,sha256=W3P3gwtkBW3pWbUEOK6agvJfj7QivXhEr_s0dwxhJxQ,1369
24
24
  cloe_nessy/integration/delta_loader/delta_loader.py,sha256=WOl44Udvo6hZ5PVFgabpehs8tt5nl9AYyDnnYBba5Ck,6872
25
25
  cloe_nessy/integration/delta_loader/delta_loader_factory.py,sha256=vB1cL6-Nc3SkLH1xtazMbMF1MnNYq8-g3GHZzRE3QmE,2251
26
26
  cloe_nessy/integration/delta_loader/delta_loader_metadata_table.py,sha256=G_EWUY76ZlbsPZB9LCGlOLVezk7DK6peYXEgt7-sTQE,1683
27
27
  cloe_nessy/integration/delta_loader/strategies/__init__.py,sha256=1o5fRWenL5KnUg1hf7kmTuTpG9pbMxchiQTub52Qvwo,255
28
- cloe_nessy/integration/delta_loader/strategies/delta_cdf_loader.py,sha256=FOOZqtMwp8_LoyG2ab2N19a074CFa2ArCEvNkl7wRWM,16682
29
- cloe_nessy/integration/delta_loader/strategies/delta_timestamp_loader.py,sha256=YYFH0DkdRPvITUc1JMgkmgIHjwDyZDCjqvEk2qhBMfE,6185
28
+ cloe_nessy/integration/delta_loader/strategies/delta_cdf_loader.py,sha256=sDilieWLmaw8JsjQcNRL4znmLZVvkJAzy0exjuTHUKk,16688
29
+ cloe_nessy/integration/delta_loader/strategies/delta_timestamp_loader.py,sha256=QV2smynYVfi3W7goKotPrGMPkahvIFEWT32LO56eWEI,6191
30
30
  cloe_nessy/integration/reader/__init__.py,sha256=NWQx-v6aKE8YOHhsxfeaZnMVq4KLKyRWXzUduf5aVsk,265
31
- cloe_nessy/integration/reader/api_reader.py,sha256=FbOyfLVG1ryL2GC-MgE1uClHICsQKBj9yZbY4TG5qrk,19637
31
+ cloe_nessy/integration/reader/api_reader.py,sha256=H6alDoCuGsxuhxpuN-JbL9-eMDchE9ZMq9P0hnz7t1I,20007
32
32
  cloe_nessy/integration/reader/catalog_reader.py,sha256=DlnykmFjV_v8SCBh3qaCvf24QM-6TdMFVHx5Mqv7Nvs,4850
33
- cloe_nessy/integration/reader/excel_reader.py,sha256=QXm0MaE_-tW5ix-f_3Pgn-Vx7VG5jA_uSp858rVV7lA,8042
33
+ cloe_nessy/integration/reader/excel_reader.py,sha256=dq_XbuQ_BcQwFuxld-12vzBA2F7hVhNkmMJvgAM-_R8,7937
34
34
  cloe_nessy/integration/reader/exceptions.py,sha256=_A9jFpe_RIDZCGY76qzjic9bsshxns6yXPSl141dq1c,203
35
- cloe_nessy/integration/reader/file_reader.py,sha256=FFqqu1h003FY2Df3ru-G1JO4Bg2Ai8Rzh58fjOCN7NM,8262
35
+ cloe_nessy/integration/reader/file_reader.py,sha256=1pf3kVk8UMEf0JPQiwhMLIszl55aLYaEDwS2Fp_9TT8,8261
36
36
  cloe_nessy/integration/reader/reader.py,sha256=YHriYkzsBduBjfI2FnP03VEo15a8UCRZ_sXtre8eaEs,1041
37
37
  cloe_nessy/integration/writer/__init__.py,sha256=3yzCAGiWZdQWtsbzlTih01sxVTJV2DDYwvl34lEAUlE,243
38
38
  cloe_nessy/integration/writer/catalog_writer.py,sha256=dQeXmtfs7J6rP6Ye3OCvxBraFScFX_3SHs7Md58hEeM,5296
@@ -40,30 +40,30 @@ cloe_nessy/integration/writer/file_writer.py,sha256=SUDbN13ZzDhbM8DpOGFgM_Gkg70T
40
40
  cloe_nessy/integration/writer/writer.py,sha256=elFPLFrWR-qVE9qnBtzzzhyRALLQcRVuOsPS0rNmRt4,1741
41
41
  cloe_nessy/integration/writer/delta_writer/__init__.py,sha256=h2CT6Hllmk0nodlek27uqwniCzVZKMkYcPGyG9K2Z24,164
42
42
  cloe_nessy/integration/writer/delta_writer/delta_append_writer.py,sha256=nribgHmapp59v3Rw_AfJg0_BRYhP7x2IJIeE74Ia_6A,4748
43
- cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py,sha256=Yp_q_ycasW2_wwmzty_6fZeBVcW_0o8gLrr6F1gaUjQ,10195
43
+ cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py,sha256=FvYNXK5k7tHC7Ek4j-q_oAQnuWyDnqWSDtkA9111wvk,13118
44
44
  cloe_nessy/integration/writer/delta_writer/delta_table_operation_type.py,sha256=m4YFY9_WgaOcnpBviVt3Km-w3wf3NF25wPS-n0NBGcE,970
45
- cloe_nessy/integration/writer/delta_writer/delta_writer_base.py,sha256=O7hw7YOa2FEzBlzjwPfxQTxm0ZrlszIjjfsHTwE_OhU,8609
45
+ cloe_nessy/integration/writer/delta_writer/delta_writer_base.py,sha256=B7PwPHKrsJL0ZxBT-H9wWSy0gn7shqNDJ0AbrpMHyMg,10135
46
46
  cloe_nessy/integration/writer/delta_writer/exceptions.py,sha256=xPmGiYV0xQXauln5Oh34E5vbm0rVcs6xCh-SJSb2bw0,107
47
47
  cloe_nessy/logging/__init__.py,sha256=ySVCVbdyR3Dno_tl2ZfiER_7EVaDoQMHVkNyfdMZumY,65
48
- cloe_nessy/logging/logger_mixin.py,sha256=H8MyMEyb_kEDP0Ow5QStAFLuOkTIeUnneGaj916fKlU,7443
48
+ cloe_nessy/logging/logger_mixin.py,sha256=xA12LIVn4yUEoaKmm7nGC1-U3ddSo_HiL3I5MfkvEwU,7409
49
49
  cloe_nessy/models/__init__.py,sha256=-FmWEJ1Oq1njSopjc0R7GmT64mLSmALkm8PkHNzy9Y8,327
50
50
  cloe_nessy/models/catalog.py,sha256=ayC1sMp4cNLAZtu0ICVV3Us6-o4hn8U9tpzzvxC9RAs,177
51
51
  cloe_nessy/models/column.py,sha256=W4V1Ls1d60VyZ1Ko9Yu9eSipcMbxSzKicn0aloHPiR0,2027
52
52
  cloe_nessy/models/constraint.py,sha256=hsFlhn4n928z81O3dl3v5bMetewPWzMjkJK3_4kASSM,178
53
53
  cloe_nessy/models/foreign_key.py,sha256=DwRVHs9sShqqPV-NL7ow_3AmPPWX0Od26yZn_I565pU,1001
54
- cloe_nessy/models/schema.py,sha256=cNSrH7K4hLRrkg1E6fW6DUIBMZdR2A5B21POj5iQ4GA,3429
55
- cloe_nessy/models/table.py,sha256=3AUBUKLJv1x-xN9KYc5Ndjf-lAlT83rUYdhRKy8wFU4,12105
54
+ cloe_nessy/models/schema.py,sha256=9RA31esHyj9saLeHvvYzK9wjK3GNnr15UO66NtSM368,3478
55
+ cloe_nessy/models/table.py,sha256=XG6MazeamF53AV44L-MCnkKBceXbnVU76mCs9GTB5Lg,12171
56
56
  cloe_nessy/models/types.py,sha256=XRbuJGdTNa6aXyE3IAzs_J9gVjbfkzMDLfGl-k6jI_4,223
57
- cloe_nessy/models/volume.py,sha256=51BE06FrL1Wv6zblFwJ_HTiR6WQqH7pSmrdH90rqwLg,2444
57
+ cloe_nessy/models/volume.py,sha256=kfDDaCL6GzZsv6SshsyXO0NMe7a-yYv_GlLEWk6DOjo,2473
58
58
  cloe_nessy/models/adapter/__init__.py,sha256=m36W_mqwB3dCYnCIt0fLOSHS4E1VU8FRGoaum4Gf95o,90
59
59
  cloe_nessy/models/adapter/unity_catalog_adapter.py,sha256=a-14Ys-AevVYQd0xeJU1syLxjT5Wzo4uog1hFSEs76M,12651
60
60
  cloe_nessy/models/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
- cloe_nessy/models/mixins/read_instance_mixin.py,sha256=j5Y4aNWOh1jlskEaxNooZFJgPyxRmik00gAVLJnAaRs,4507
61
+ cloe_nessy/models/mixins/read_instance_mixin.py,sha256=HT42qor6IltR-BTfe_DYa5ylntE7qbArs-3cSjo4KXU,4649
62
62
  cloe_nessy/models/mixins/template_loader_mixin.py,sha256=5MXhEGBFlq3dwZvINEyBowSlipNnVun2H_TmhI_fsS4,549
63
63
  cloe_nessy/models/templates/create_table.sql.j2,sha256=71JpUyUZ_ZYO2M0tfIrTXHR7JycypAGsELt2-2d3oO0,2479
64
64
  cloe_nessy/models/templates/create_volume.sql.j2,sha256=XIUf1cHcvAxcGTyhzUiv4xpQ1cfDw_ra3_FKmOuLoBs,289
65
65
  cloe_nessy/object_manager/__init__.py,sha256=3sle0vNpPwBOkycxA3XVS9m4XZf5LD3Qd4NGxdqcHno,186
66
- cloe_nessy/object_manager/table_manager.py,sha256=4eQG-zMiuBpeJmvWdL3KdhHRiPFf8TS0RFNRp8Yz6rY,13887
66
+ cloe_nessy/object_manager/table_manager.py,sha256=1LcwHvwRlHF8o4iiECg1gkAGVy5Wkpo1HBXsZlFGJXU,13907
67
67
  cloe_nessy/object_manager/volume_manager.py,sha256=6epd3KXzcNH04EvaKubAfLsaUm9qBMeT3KNvMK04gGs,2727
68
68
  cloe_nessy/pipeline/__init__.py,sha256=BUzL4HJaCXWmK7OgKaxdwK72JrrdzfzIvyxOGtM28U0,417
69
69
  cloe_nessy/pipeline/pipeline.py,sha256=L4wk3b06LNWRj01nnAkuQpeRrwFTyaV1xTpgYAg4sak,10819
@@ -72,20 +72,20 @@ cloe_nessy/pipeline/pipeline_builder.py,sha256=_BBl43two0pherkTXZ-Yrpt6XcLW8Q-Z9
72
72
  cloe_nessy/pipeline/pipeline_config.py,sha256=oVQ-IH4etTGZVVEnE-5iDPLYOtWpvDlltWFv1nevnqQ,3229
73
73
  cloe_nessy/pipeline/pipeline_context.py,sha256=eCOcjyE16rGRom3L85Gy_BbncfQD6i1x31yrWqZws-4,1881
74
74
  cloe_nessy/pipeline/pipeline_parsing_service.py,sha256=eeC4RbGBILGN6zkbUyjH-qGgEMtOWV4Kv_VxrHbHMY0,9021
75
- cloe_nessy/pipeline/pipeline_plotting_service.py,sha256=goMQj73FzUVchKn5c2SsPcWR6fr7DtVkVrcQfJsKCq4,13111
75
+ cloe_nessy/pipeline/pipeline_plotting_service.py,sha256=OjdYDQW19yXjdcqs7bDMlyWmv0cQz7Qn4I2cakBgN2E,13139
76
76
  cloe_nessy/pipeline/pipeline_step.py,sha256=oTnlvRpB0fbOBQXbPe1URstA5fv-97igCHt_41fKCAk,2082
77
77
  cloe_nessy/pipeline/actions/__init__.py,sha256=FfAnSIl-0T6pnaWhClkDqV8nfTdvLvZZJdwycsZMLPw,2990
78
- cloe_nessy/pipeline/actions/read_api.py,sha256=MAc7QfmhnaRUMdE09Ywt41RSAsuW4co8zF0zXHwbM8U,16193
79
- cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=sx3dezd33c1FawMrxORwhK5GNo1IpjCyuLATWz7esZ0,6735
78
+ cloe_nessy/pipeline/actions/read_api.py,sha256=YMOWPCyxitU5v6HHH_AZqpbHQenPU4-WlaOrb-NsIIk,16245
79
+ cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=MK67NIB5qXTDwCC0EFwHYoOkelFnwY6Z4REpC5BlFb4,6359
80
80
  cloe_nessy/pipeline/actions/read_excel.py,sha256=IG_VmDEt1TvGVEO0SY9Fm3awHNjfisR1_7DUmhC3NEE,7968
81
- cloe_nessy/pipeline/actions/read_files.py,sha256=hRcM7wG35vxxLVajW3SK5euHW02qxiXCYSkIl11xiQ0,7308
81
+ cloe_nessy/pipeline/actions/read_files.py,sha256=o251vfM0S3JxAaEeRQHlBMIlRqhcLlX3fsaPWVKtkBg,7304
82
82
  cloe_nessy/pipeline/actions/read_metadata_yaml.py,sha256=i8fQceV63eAqx_x0ANisCkXWfMHyhqsfFHVFH5yP2po,3544
83
83
  cloe_nessy/pipeline/actions/transform_change_datatype.py,sha256=24Tn6R3TvUkWCh8V6naLdyNbCbqvyPOOoer-hy_Ebq4,2077
84
84
  cloe_nessy/pipeline/actions/transform_clean_column_names.py,sha256=VxvWqENW63c50L96JA1V_ioe4By6gGzx_iY86njOXEM,3044
85
85
  cloe_nessy/pipeline/actions/transform_concat_columns.py,sha256=Nk8YbhxDnFZsWzW9Dj5Yl76Uq6VrcMlevQPHGms65L8,3777
86
86
  cloe_nessy/pipeline/actions/transform_convert_timestamp.py,sha256=2SL078tBcOmytDbt-cR81jZbclwqELsUB4XDLjaCnNo,3579
87
87
  cloe_nessy/pipeline/actions/transform_decode.py,sha256=_TQc2GFcgdJvtt6BVrCe1xVnJiSHB_J6mEHH01xIKMY,4464
88
- cloe_nessy/pipeline/actions/transform_deduplication.py,sha256=SfTDrOL0TNSC4wITbozabC0jYvceTLnqU4urnEjYk9g,4910
88
+ cloe_nessy/pipeline/actions/transform_deduplication.py,sha256=wOsyiBZOFCFyS9xIPyO1Tie6uBWJuj7XFqlt-cDCEWg,4976
89
89
  cloe_nessy/pipeline/actions/transform_distinct.py,sha256=c7aBxANyqT4aKhm0cSELDtD-bP0Se9vxlBF0K4AgQWs,1976
90
90
  cloe_nessy/pipeline/actions/transform_filter.py,sha256=Nz_ggRfKIcNzYFfFOsgq1QeatjdEis0up4I7cOWBdyo,1446
91
91
  cloe_nessy/pipeline/actions/transform_generic_sql.py,sha256=_naWfmPdYAUKjPNeHu5qJAohOL7DHCSYz_kwoeRv3OI,2741
@@ -93,26 +93,26 @@ cloe_nessy/pipeline/actions/transform_group_aggregate.py,sha256=KUHeeP-RIDi34dpb
93
93
  cloe_nessy/pipeline/actions/transform_hash_columns.py,sha256=M5_wolJwzJpPTSrZq4yWV3TH7H6BGqbjJkJCwtqPlQo,8507
94
94
  cloe_nessy/pipeline/actions/transform_join.py,sha256=ez1M1wVc9khOZj1swMArJbBKXxEpjenUHrW1wL8H330,7200
95
95
  cloe_nessy/pipeline/actions/transform_json_normalize.py,sha256=petF7pnNq1EKc8MqVdG0weFALAHNILSe_eAu4Z5XxIo,4833
96
- cloe_nessy/pipeline/actions/transform_regex_extract.py,sha256=vMtUW0s_oXy8DC1-4Xh-WQN3CCp8jXYsJiFYvGdYrqE,6390
96
+ cloe_nessy/pipeline/actions/transform_regex_extract.py,sha256=HCn2OzUOkxaiFg5GssVvsIwrBvAo-xb4sRu4lMgKMcE,6412
97
97
  cloe_nessy/pipeline/actions/transform_rename_columns.py,sha256=4zJcPCONMU4C67qeuzsrX3AORRRHoq_selUI7FJyeg0,1952
98
98
  cloe_nessy/pipeline/actions/transform_replace_values.py,sha256=1OPHTrjcphfyGepcO7ozYfeqfwA18pjlyHpVKUS_AAU,2049
99
99
  cloe_nessy/pipeline/actions/transform_select_columns.py,sha256=-GhSEsb7iNnZIsYRm3BG9BX4_qUDJMbpj1DsKPY046w,4574
100
100
  cloe_nessy/pipeline/actions/transform_union.py,sha256=SZtEzh567CIExUj9yMEgshE28h4dXKT7Wr2TDj4zB4k,2718
101
101
  cloe_nessy/pipeline/actions/transform_with_column.py,sha256=c-E1yYkeYmovbN1maT7ImpdQlW0nYvYsHCtDvfe4wt8,3357
102
- cloe_nessy/pipeline/actions/write_catalog_table.py,sha256=FyC0scQU8Ul3Uigpk6IN2IJpf_4jRjAqF5yHtDVwG00,4852
102
+ cloe_nessy/pipeline/actions/write_catalog_table.py,sha256=rb4UOzJhe4tU3GEhAerPKWlzxW87FftyeqgMrRGTnYk,4848
103
103
  cloe_nessy/pipeline/actions/write_delta_append.py,sha256=e1g4mDhwAZdKyt4Gb7ZzHcQrJ1duSl8qOn6ONizRsoM,2934
104
- cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=fwinlTeZoDuTyrbln5vMu1UJ1LG8ZQrus3LoCVF__I4,5819
104
+ cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=kZL2PTIwB6Mj4UKg5f9SvU1VaakuYfFoymlcLf-L7dA,6443
105
105
  cloe_nessy/pipeline/actions/write_file.py,sha256=JZ8UZslxUn_ttYt5wDyvtHFq2FqYk3vOR8kvExJI8pk,3212
106
106
  cloe_nessy/pipeline/utils/__init__.py,sha256=xi02UjBMiXWD7b9gDvww4gyRyowb0eRd_6Wbu0F_cro,118
107
107
  cloe_nessy/pipeline/utils/delta_load_utils.py,sha256=KitMNruxePEkecI0h4Jint1JwJpaEog5mCOchMkgan8,1495
108
108
  cloe_nessy/session/__init__.py,sha256=-MifkekjFu_3A9rWm30CGFQZ4yjruGaydNpbu3uq7Ww,155
109
109
  cloe_nessy/session/pyspark_compat.py,sha256=NrgSWAaWz3GgMNLmzpY4cPgneQytNQlOq_dWrD1MveE,444
110
- cloe_nessy/session/session_manager.py,sha256=d6qMTkaWJcN4QG261IoxIfQlNlB0ELtVNjTCNx2Elas,9717
110
+ cloe_nessy/session/session_manager.py,sha256=Bn-AKR0H8LvMRTqZIw2OPo-ta8C4_TNNdyl_RtftnZY,9541
111
111
  cloe_nessy/settings/__init__.py,sha256=ZbkneO3WaKOxon7qHFHnou7EnBOSnBFyKMDZblIEvzM,101
112
112
  cloe_nessy/settings/settings.py,sha256=I4n129lrujriW-d8q4as2Kb4_kI932ModfZ5Ow_UpVM,3653
113
113
  cloe_nessy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
114
114
  cloe_nessy/utils/column_names.py,sha256=dCNtm61mc5aLkY2oE4rlfN3VLCrpot6fOESjAZmCmhA,361
115
115
  cloe_nessy/utils/file_and_directory_handler.py,sha256=r2EVt9xG81p6ScaJCwETC5an6pMT6WseB0jMOR-JlpU,602
116
- cloe_nessy-1.0.1.dist-info/METADATA,sha256=qLn3XYfGsw2pW-pPtUUidtcHZiUtIwOESWY8LCenGEY,3291
117
- cloe_nessy-1.0.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
118
- cloe_nessy-1.0.1.dist-info/RECORD,,
116
+ cloe_nessy-1.0.5.dist-info/METADATA,sha256=NnYdhZTmeAouyXDAUTj4ELCOKUzNtDBkd_Rfvlf7ygM,3289
117
+ cloe_nessy-1.0.5.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
118
+ cloe_nessy-1.0.5.dist-info/RECORD,,