cloe-nessy 1.0.0__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -160,7 +160,7 @@ class ExcelDataFrameReader(BaseReader):
160
160
  "__metadata",
161
161
  F.create_map(
162
162
  F.lit("timestamp"),
163
- F.current_timestamp(),
163
+ F.current_timestamp().cast("string"),
164
164
  F.lit("file_location"),
165
165
  F.lit(location),
166
166
  F.lit("sheet_name"),
@@ -192,7 +192,8 @@ class FileReader(BaseReader):
192
192
  """Add all metadata columns to the DataFrame."""
193
193
  metadata_columns = df.select("_metadata.*").columns
194
194
 
195
- entries = [(F.lit(field), F.col(f"_metadata.{field}")) for field in metadata_columns]
195
+ # Cast all metadata values to strings to ensure type consistency in the map
196
+ entries = [(F.lit(field), F.col(f"_metadata.{field}").cast("string")) for field in metadata_columns]
196
197
  flat_list = [item for tup in entries for item in tup]
197
198
 
198
199
  df = df.withColumn("__metadata", F.create_map(flat_list))
@@ -29,6 +29,9 @@ class DeltaMergeConfig(BaseModel):
29
29
  use_partition_pruning: Flag to specify whether to use partition
30
30
  pruning to optimize the performance of the merge operation.
31
31
  partition_by: List of column names to partition by.
32
+ column_mapping: Mapping from target column names to source column names.
33
+ If a column is not in the mapping, it's assumed to have the same name
34
+ in both source and target.
32
35
  """
33
36
 
34
37
  dataframe_columns: list[str]
@@ -39,6 +42,7 @@ class DeltaMergeConfig(BaseModel):
39
42
  when_not_matched_insert: bool = True
40
43
  use_partition_pruning: bool = True
41
44
  partition_by: list[str] = Field(default_factory=list)
45
+ column_mapping: dict[str, str] = Field(default_factory=dict)
42
46
  cols_to_merge: list[str] = Field(default_factory=list, alias="_cols_to_merge")
43
47
  cols_to_update: set[str] = Field(default_factory=set, alias="_cols_to_update")
44
48
  cols_to_insert: set[str] = Field(default_factory=set, alias="_cols_to_insert")
@@ -58,11 +62,20 @@ class DeltaMergeConfig(BaseModel):
58
62
  @model_validator(mode="before")
59
63
  @classmethod
60
64
  def _validate_key_columns(cls, config: Any):
61
- """Key columns must exist in the data frame."""
65
+ """Key columns must exist in the data frame (considering column mapping)."""
62
66
  key_columns = config.get("key_columns")
63
67
  dataframe_columns = config.get("dataframe_columns")
64
- if not set(key_columns).issubset(set(dataframe_columns)):
65
- raise ValueError("Key columns must exist in the DataFrame columns.")
68
+ column_mapping = config.get("column_mapping", {})
69
+
70
+ # For each key column (target name), find the corresponding source column
71
+ missing_columns = []
72
+ for key_col in key_columns:
73
+ source_col = column_mapping.get(key_col, key_col)
74
+ if source_col not in dataframe_columns:
75
+ missing_columns.append(f"{key_col} (maps to {source_col})" if key_col != source_col else key_col)
76
+
77
+ if missing_columns:
78
+ raise ValueError(f"Key columns must exist in the DataFrame. Missing columns: {', '.join(missing_columns)}")
66
79
  return config
67
80
 
68
81
  @model_validator(mode="before")
@@ -70,15 +83,37 @@ class DeltaMergeConfig(BaseModel):
70
83
  def _derive_merge_columns(cls, config: Any):
71
84
  """Derive update and insert columns from the DataFrame columns."""
72
85
  dataframe_columns = config.get("dataframe_columns", [])
73
- config["_cols_to_merge"] = list(set(dataframe_columns))
86
+ column_mapping = config.get("column_mapping", {})
87
+
88
+ # Build reverse mapping: source_col -> target_col
89
+ reverse_mapping = {v: k for k, v in column_mapping.items()}
90
+
91
+ # Determine which target columns we're working with
92
+ # For each dataframe column, find its corresponding target column
93
+ target_columns = []
94
+ for df_col in dataframe_columns:
95
+ target_col = reverse_mapping.get(df_col, df_col)
96
+ target_columns.append(target_col)
97
+
98
+ config["_cols_to_merge"] = list(set(target_columns))
99
+
74
100
  if config.get("cols_to_exclude_from_update"):
75
101
  config["_cols_to_update"] = set(config["_cols_to_merge"]) - set(config["cols_to_exclude_from_update"])
76
102
  else:
77
103
  config["_cols_to_update"] = set(config["_cols_to_merge"])
78
104
 
79
105
  config["_cols_to_insert"] = config["_cols_to_merge"]
80
- config["final_cols_to_update"] = {col: f"source.{col}" for col in config["_cols_to_update"]}
81
- config["final_cols_to_insert"] = {col: f"source.{col}" for col in config["_cols_to_insert"]}
106
+
107
+ # Build final mappings using column_mapping (target -> source)
108
+ # For each target column, find the corresponding source column
109
+ config["final_cols_to_update"] = {
110
+ target_col: f"source.`{column_mapping.get(target_col, target_col)}`"
111
+ for target_col in config["_cols_to_update"]
112
+ }
113
+ config["final_cols_to_insert"] = {
114
+ target_col: f"source.`{column_mapping.get(target_col, target_col)}`"
115
+ for target_col in config["_cols_to_insert"]
116
+ }
82
117
  return config
83
118
 
84
119
  @model_validator(mode="after")
@@ -127,7 +162,7 @@ class DeltaMergeWriter(BaseDeltaWriter):
127
162
 
128
163
  def _build_match_conditions(self, data_frame: DataFrame, config: DeltaMergeConfig) -> str:
129
164
  """Builds match conditions for the Delta table merge."""
130
- match_conditions = self._merge_match_conditions(config.key_columns)
165
+ match_conditions = self._merge_match_conditions_with_mapping(config.key_columns, config.column_mapping)
131
166
  if config.use_partition_pruning:
132
167
  match_conditions_list = [match_conditions] + [
133
168
  self._partition_pruning_conditions(data_frame, config.partition_by),
@@ -169,6 +204,11 @@ class DeltaMergeWriter(BaseDeltaWriter):
169
204
  function also supports partition pruning to optimize the performance of
170
205
  the merge operation.
171
206
 
207
+ When source and target tables have different column names, use the
208
+ `column_mapping` parameter to map target column names to source column names.
209
+ For any columns not in the mapping, the same name is assumed for both source
210
+ and target.
211
+
172
212
  Args:
173
213
  table: The Table object representing the Delta table.
174
214
  table_identifier: The identifier of the Delta table in the format
@@ -178,7 +218,16 @@ class DeltaMergeWriter(BaseDeltaWriter):
178
218
  ignore_empty_df: A flag indicating whether to ignore an empty source
179
219
  dataframe.
180
220
  kwargs: Passed to the
181
- [`DeltaMergeConfig`][cloe_nessy.integration.writer.delta_merge_writer.DeltaMergeConfig].
221
+ [`DeltaMergeConfig`][cloe_nessy.integration.writer.delta_writer.delta_merge_writer.DeltaMergeConfig].
222
+ Common kwargs include:
223
+ - key_columns: List of target column names to use as merge keys.
224
+ - column_mapping: Dict mapping target column names to source column names.
225
+ - when_matched_update: Whether to update matching records.
226
+ - when_matched_delete: Whether to delete matching records.
227
+ - when_not_matched_insert: Whether to insert non-matching records.
228
+ - cols_to_exclude_from_update: Target columns to exclude from updates.
229
+ - use_partition_pruning: Whether to use partition pruning.
230
+ - partition_by: List of partition columns.
182
231
 
183
232
  Raises:
184
233
  ValueError: If both, table and table_identifier or storage_path are provided.
@@ -189,6 +238,21 @@ class DeltaMergeWriter(BaseDeltaWriter):
189
238
  merge operation.
190
239
  ValueError: If partition columns are not specified when using
191
240
  partition pruning.
241
+
242
+ Example:
243
+ ```python
244
+ # Merge with different column names
245
+ writer.write(
246
+ data_frame=source_df,
247
+ table=target_table,
248
+ key_columns=["customer_id"],
249
+ column_mapping={
250
+ "customer_id": "cust_id",
251
+ "full_name": "name",
252
+ "email_address": "email"
253
+ }
254
+ )
255
+ ```
192
256
  """
193
257
  if self._empty_dataframe_check(data_frame, ignore_empty_df):
194
258
  return
@@ -150,6 +150,42 @@ class BaseDeltaWriter(BaseWriter, ABC):
150
150
  """
151
151
  return " AND ".join([f"target.`{c}` <=> source.`{c}`" for c in columns])
152
152
 
153
+ @staticmethod
154
+ def _merge_match_conditions_with_mapping(
155
+ key_columns: list[str], column_mapping: dict[str, str] | None = None
156
+ ) -> str:
157
+ """Merges match conditions with support for column name mapping.
158
+
159
+ This function generates SQL match conditions for merging tables where source and target
160
+ columns may have different names.
161
+
162
+ Args:
163
+ key_columns: A list of target column names to use as keys for the merge operation.
164
+ column_mapping: A dictionary mapping target column names to source column names.
165
+ If None or empty, assumes source and target columns have the same names.
166
+
167
+ Returns:
168
+ A string containing the match conditions, separated by " AND "
169
+
170
+ Example:
171
+ ```python
172
+ # Without mapping (same column names):
173
+ _merge_match_conditions_with_mapping(["id", "customer_id"])
174
+ # "target.`id` <=> source.`id` AND target.`customer_id` <=> source.`customer_id`"
175
+
176
+ # With mapping (different column names):
177
+ _merge_match_conditions_with_mapping(
178
+ ["id", "customer_id"],
179
+ {"customer_id": "cust_id"}
180
+ )
181
+ # "target.`id` <=> source.`id` AND target.`customer_id` <=> source.`cust_id`"
182
+ ```
183
+ """
184
+ mapping = column_mapping or {}
185
+ return " AND ".join(
186
+ [f"target.`{target_col}` <=> source.`{mapping.get(target_col, target_col)}`" for target_col in key_columns]
187
+ )
188
+
153
189
  @staticmethod
154
190
  def _partition_pruning_conditions(df: "DataFrame", partition_cols: list[str] | None) -> str:
155
191
  """Generates partition pruning conditions for an SQL query.
@@ -19,6 +19,7 @@ from .transform_group_aggregate import TransformGroupAggregate
19
19
  from .transform_hash_columns import TransformHashColumnsAction
20
20
  from .transform_join import TransformJoinAction
21
21
  from .transform_json_normalize import TransformJsonNormalize
22
+ from .transform_regex_extract import TransformRegexExtract
22
23
  from .transform_rename_columns import TransformRenameColumnsAction
23
24
  from .transform_replace_values import TransformReplaceValuesAction
24
25
  from .transform_select_columns import TransformSelectColumnsAction
@@ -56,6 +57,7 @@ __all__ = [
56
57
  "TransformGroupAggregate",
57
58
  "TransformJoinAction",
58
59
  "TransformJsonNormalize",
60
+ "TransformRegexExtract",
59
61
  "TransformRenameColumnsAction",
60
62
  "TransformReplaceValuesAction",
61
63
  "TransformSelectColumnsAction",
@@ -0,0 +1,169 @@
1
+ import re
2
+ from typing import Any
3
+
4
+ import pyspark.sql.functions as F
5
+
6
+ from cloe_nessy.pipeline.pipeline_action import PipelineAction
7
+ from cloe_nessy.pipeline.pipeline_context import PipelineContext
8
+
9
+
10
+ class TransformRegexExtract(PipelineAction):
11
+ r"""Extract values from a specified column in a DataFrame using regex patterns.
12
+
13
+ This action extracts values from a column based on a regex pattern and stores
14
+ the result in a new column. Optionally, you can replace the matched pattern in
15
+ the original column with a different string, remove the original column, or add
16
+ a boolean column indicating which rows matched the pattern.
17
+
18
+ Example:
19
+ ```yaml
20
+ Extract Action:
21
+ action: TRANSFORM_REGEX_EXTRACT
22
+ options:
23
+ source_column_name: Email
24
+ extract_column_name: org_domain
25
+ pattern: (?<=@)([A-Za-z0-9-]+)
26
+ replace_by: exampledomain.org
27
+ ```
28
+
29
+ This action also supports processing multiple columns simultaneously. To use this
30
+ functionality, structure the configuration as a dictionary mapping each source
31
+ column name to its extraction parameters.
32
+
33
+ Example:
34
+ ```yaml
35
+ Extract Action:
36
+ action: TRANSFORM_REGEX_EXTRACT
37
+ options:
38
+ extract_columns:
39
+ Name:
40
+ pattern: (?<=\w+) (\w+)
41
+ replace_by: ''
42
+ extract_column_name: last_name
43
+ match_info_column_name: has_last_name
44
+ Email:
45
+ pattern: @\w+\.\w+
46
+ extract_column_name: domain
47
+ keep_original_column: False
48
+ ```
49
+
50
+ """
51
+
52
+ name: str = "TRANSFORM_REGEX_EXTRACT"
53
+
54
+ def run(
55
+ self,
56
+ context: PipelineContext,
57
+ source_column_name: str = "",
58
+ extract_column_name: str = "",
59
+ pattern: str = "",
60
+ keep_original_column: bool = True,
61
+ replace_by: str = "",
62
+ match_info_column_name: str = "",
63
+ extract_columns: dict | None = None,
64
+ **_: Any,
65
+ ) -> PipelineContext:
66
+ """Performs a regex extract (and replace) on a specified column in a DataFrame.
67
+
68
+ This function performs a regex extract (and optionally a replace) on one or more columns.
69
+
70
+ Args:
71
+ context: The context in which this action is executed.
72
+ source_column_name: Column name to perform the regex replace on.
73
+ pattern: Regex pattern to match.
74
+ replace_by: String that should replace the extracted pattern in the source column.
75
+ extract_column_name: Column name to store the extract, default: <source_column_name>_extract
76
+ keep_original_column: Whether to keep the original column, default: True
77
+ match_info_column_name: Column name to store a boolean column whether a match was found, default: None
78
+ extract_columns: Dictionary of column names and their corresponding 1-column-case.
79
+
80
+ Raises:
81
+ ValueError: If any of the required arguments are not provided.
82
+ ValueError: If the regex pattern is invalid.
83
+
84
+ Returns:
85
+ PipelineContext: Transformed context with the modified DataFrame.
86
+ """
87
+ if context.data is None:
88
+ raise ValueError("Data from the context is required for the operation.")
89
+ if not extract_columns and not source_column_name:
90
+ raise ValueError("Either extract_columns or source_column_name must be provided.")
91
+
92
+ df = context.data
93
+
94
+ if source_column_name:
95
+ self._console_logger.info(f"Extracting from column '{source_column_name}' using pattern: {pattern}")
96
+ df = self._process_one_column(
97
+ df,
98
+ source_column_name,
99
+ pattern,
100
+ extract_column_name,
101
+ replace_by,
102
+ keep_original_column,
103
+ match_info_column_name,
104
+ )
105
+
106
+ elif isinstance(extract_columns, dict):
107
+ self._console_logger.info(f"Extracting from {len(extract_columns)} columns")
108
+ for one_source_column_name in extract_columns:
109
+ parameter_dict = self._get_default_dict() | extract_columns[one_source_column_name]
110
+ df = self._process_one_column(df, one_source_column_name, **parameter_dict)
111
+
112
+ else:
113
+ raise ValueError("extract_columns must be a dictionary. See documentation for proper format.")
114
+
115
+ return context.from_existing(data=df)
116
+
117
+ def _process_one_column(
118
+ self,
119
+ df,
120
+ source_column_name,
121
+ pattern,
122
+ extract_column_name,
123
+ replace_by,
124
+ keep_original_column,
125
+ match_info_column_name,
126
+ ):
127
+ # Extract the first captured group (group 0 is the entire match)
128
+ matched_group_id = 0
129
+
130
+ if not extract_column_name:
131
+ extract_column_name = f"{source_column_name}_extracted"
132
+
133
+ if not pattern:
134
+ raise ValueError(f"The regex pattern (pattern) for column {source_column_name} must be provided.")
135
+
136
+ # Validate regex pattern
137
+ try:
138
+ re.compile(pattern)
139
+ except re.error as e:
140
+ raise ValueError(f"Invalid regex pattern '{pattern}' for column {source_column_name}: {e}") from e
141
+
142
+ df = df.withColumn(extract_column_name, F.regexp_extract(source_column_name, pattern, matched_group_id))
143
+
144
+ if replace_by:
145
+ df = df.withColumn(source_column_name, F.regexp_replace(source_column_name, pattern, replace_by))
146
+
147
+ if match_info_column_name:
148
+ # Check if extraction is null or empty string
149
+ df = df.withColumn(
150
+ match_info_column_name,
151
+ F.when((F.col(extract_column_name).isNull()) | (F.col(extract_column_name) == ""), False).otherwise(
152
+ True
153
+ ),
154
+ )
155
+
156
+ if not keep_original_column:
157
+ df = df.drop(source_column_name)
158
+
159
+ return df
160
+
161
+ def _get_default_dict(self) -> dict[str, Any]:
162
+ """Return default parameters for single column extraction."""
163
+ return {
164
+ "pattern": "",
165
+ "extract_column_name": "",
166
+ "replace_by": "",
167
+ "keep_original_column": True,
168
+ "match_info_column_name": "",
169
+ }
@@ -13,6 +13,7 @@ class WriteDeltaMergeAction(PipelineAction):
13
13
 
14
14
  Example:
15
15
  ```yaml
16
+ # Basic merge with same column names
16
17
  Write Delta Merge:
17
18
  action: WRITE_DELTA_MERGE
18
19
  options:
@@ -20,13 +21,25 @@ class WriteDeltaMergeAction(PipelineAction):
20
21
  key_columns:
21
22
  - id
22
23
  - customer_id
23
- cols_to_update:
24
- - name
25
- - email
26
- - updated_at
24
+ cols_to_exclude_from_update:
25
+ - created_at
27
26
  when_matched_update: true
28
27
  when_not_matched_insert: true
29
28
  use_partition_pruning: true
29
+
30
+ # Merge with different source and target column names
31
+ Write Delta Merge with Mapping:
32
+ action: WRITE_DELTA_MERGE
33
+ options:
34
+ table_identifier: my_catalog.my_schema.my_table
35
+ key_columns:
36
+ - customer_id
37
+ column_mapping:
38
+ customer_id: cust_id
39
+ full_name: name
40
+ email_address: email
41
+ when_matched_update: true
42
+ when_not_matched_insert: true
30
43
  ```
31
44
  """
32
45
 
@@ -38,11 +51,10 @@ class WriteDeltaMergeAction(PipelineAction):
38
51
  *,
39
52
  table_identifier: str | None = None,
40
53
  key_columns: list[str] | None = None,
41
- cols_to_update: list[str] | None = None,
42
- cols_to_insert: list[str] | None = None,
43
- cols_to_exclude: list[str] | None = None,
54
+ cols_to_exclude_from_update: list[str] | None = None,
55
+ column_mapping: dict[str, str] | None = None,
44
56
  when_matched_update: bool = True,
45
- when_matched_deleted: bool = False,
57
+ when_matched_delete: bool = False,
46
58
  when_not_matched_insert: bool = True,
47
59
  use_partition_pruning: bool = True,
48
60
  ignore_empty_df: bool = False,
@@ -57,23 +69,23 @@ class WriteDeltaMergeAction(PipelineAction):
57
69
  table_identifier: The identifier of the table. If passed, the
58
70
  UC Adapter will be used to create a table object. Otherwise the Table
59
71
  object will be created from the table metadata in the context.
60
- key_columns: List of column names that form the
72
+ key_columns: List of target column names that form the
61
73
  key for the merge operation.
74
+ cols_to_exclude_from_update: List of target column names to be
75
+ excluded from the update operation in the target Delta table.
76
+ column_mapping: Mapping from target column names to source column names.
77
+ Use this when source and target tables have different column names.
78
+ If a column is not in the mapping, it's assumed to have the same name
79
+ in both source and target.
62
80
  when_matched_update: Flag to specify whether to
63
- perform an update operation whenmatching records are found in
81
+ perform an update operation when matching records are found in
64
82
  the target Delta table.
65
- when_matched_deleted: Flag to specify whether to
83
+ when_matched_delete: Flag to specify whether to
66
84
  perform a delete operation when matching records are found in
67
85
  the target Delta table.
68
86
  when_not_matched_insert: Flag to specify whether to perform an
69
87
  insert operation when matching records are not found in the target
70
88
  Delta table.
71
- cols_to_update: List of column names to be
72
- updated in the target Delta table.
73
- cols_to_insert: List of column names to be
74
- inserted into the target Delta table.
75
- cols_to_exclude: List of column names to be
76
- excluded from the merge operation.
77
89
  use_partition_pruning: Flag to specify whether to use partition
78
90
  pruning to optimize the performance of the merge operation.
79
91
  ignore_empty_df: A flag indicating whether to ignore an empty source dataframe.
@@ -113,16 +125,15 @@ class WriteDeltaMergeAction(PipelineAction):
113
125
  assert key_columns is not None, "Key columns must be provided."
114
126
 
115
127
  delta_merge_writer.write(
116
- table_identifier=context.table_metadata.identifier,
128
+ data_frame=context.data,
117
129
  table=context.table_metadata,
130
+ table_identifier=context.table_metadata.identifier,
118
131
  storage_path=str(context.table_metadata.storage_path),
119
- data_frame=context.data,
120
132
  key_columns=key_columns,
121
- cols_to_update=cols_to_update,
122
- cols_to_insert=cols_to_insert,
123
- cols_to_exclude=cols_to_exclude,
133
+ cols_to_exclude_from_update=cols_to_exclude_from_update or [],
134
+ column_mapping=column_mapping or {},
124
135
  when_matched_update=when_matched_update,
125
- when_matched_deleted=when_matched_deleted,
136
+ when_matched_delete=when_matched_delete,
126
137
  when_not_matched_insert=when_not_matched_insert,
127
138
  use_partition_pruning=use_partition_pruning,
128
139
  partition_by=context.table_metadata.partition_by,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cloe-nessy
3
- Version: 1.0.0
3
+ Version: 1.0.3
4
4
  Summary: Your friendly datalake monster.
5
5
  Project-URL: homepage, https://initions.com/
6
6
  Author-email: initions <ICSMC_EXT_PYPIORG@accenture.com>
@@ -12,7 +12,7 @@ Classifier: License :: OSI Approved :: MIT License
12
12
  Classifier: Operating System :: OS Independent
13
13
  Classifier: Programming Language :: Python :: 3
14
14
  Classifier: Topic :: Database
15
- Requires-Python: <3.13,>=3.11
15
+ Requires-Python: <3.14,>=3.11
16
16
  Requires-Dist: azure-identity<2.0.0,>=1.19.0
17
17
  Requires-Dist: cloe-logging[databricks,log-analytics]<0.4,>=0.3.8
18
18
  Requires-Dist: databricks-sdk<1.0.0,>=0.36.0
@@ -30,9 +30,9 @@ cloe_nessy/integration/delta_loader/strategies/delta_timestamp_loader.py,sha256=
30
30
  cloe_nessy/integration/reader/__init__.py,sha256=NWQx-v6aKE8YOHhsxfeaZnMVq4KLKyRWXzUduf5aVsk,265
31
31
  cloe_nessy/integration/reader/api_reader.py,sha256=FbOyfLVG1ryL2GC-MgE1uClHICsQKBj9yZbY4TG5qrk,19637
32
32
  cloe_nessy/integration/reader/catalog_reader.py,sha256=DlnykmFjV_v8SCBh3qaCvf24QM-6TdMFVHx5Mqv7Nvs,4850
33
- cloe_nessy/integration/reader/excel_reader.py,sha256=JGmxQ16ux0HT-MLvAUp-9XMdKUToMb7cdObciZNsYSs,8027
33
+ cloe_nessy/integration/reader/excel_reader.py,sha256=QXm0MaE_-tW5ix-f_3Pgn-Vx7VG5jA_uSp858rVV7lA,8042
34
34
  cloe_nessy/integration/reader/exceptions.py,sha256=_A9jFpe_RIDZCGY76qzjic9bsshxns6yXPSl141dq1c,203
35
- cloe_nessy/integration/reader/file_reader.py,sha256=t5zF-cmZo1X0a1rki6ry1rSiFEu5uXRP2rNGd90fwoY,8163
35
+ cloe_nessy/integration/reader/file_reader.py,sha256=FFqqu1h003FY2Df3ru-G1JO4Bg2Ai8Rzh58fjOCN7NM,8262
36
36
  cloe_nessy/integration/reader/reader.py,sha256=YHriYkzsBduBjfI2FnP03VEo15a8UCRZ_sXtre8eaEs,1041
37
37
  cloe_nessy/integration/writer/__init__.py,sha256=3yzCAGiWZdQWtsbzlTih01sxVTJV2DDYwvl34lEAUlE,243
38
38
  cloe_nessy/integration/writer/catalog_writer.py,sha256=dQeXmtfs7J6rP6Ye3OCvxBraFScFX_3SHs7Md58hEeM,5296
@@ -40,9 +40,9 @@ cloe_nessy/integration/writer/file_writer.py,sha256=SUDbN13ZzDhbM8DpOGFgM_Gkg70T
40
40
  cloe_nessy/integration/writer/writer.py,sha256=elFPLFrWR-qVE9qnBtzzzhyRALLQcRVuOsPS0rNmRt4,1741
41
41
  cloe_nessy/integration/writer/delta_writer/__init__.py,sha256=h2CT6Hllmk0nodlek27uqwniCzVZKMkYcPGyG9K2Z24,164
42
42
  cloe_nessy/integration/writer/delta_writer/delta_append_writer.py,sha256=nribgHmapp59v3Rw_AfJg0_BRYhP7x2IJIeE74Ia_6A,4748
43
- cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py,sha256=Yp_q_ycasW2_wwmzty_6fZeBVcW_0o8gLrr6F1gaUjQ,10195
43
+ cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py,sha256=aMpWa8GcnW9xu5eGE_AsVyfkL5hRIeJwfCLPniM8lak,13170
44
44
  cloe_nessy/integration/writer/delta_writer/delta_table_operation_type.py,sha256=m4YFY9_WgaOcnpBviVt3Km-w3wf3NF25wPS-n0NBGcE,970
45
- cloe_nessy/integration/writer/delta_writer/delta_writer_base.py,sha256=O7hw7YOa2FEzBlzjwPfxQTxm0ZrlszIjjfsHTwE_OhU,8609
45
+ cloe_nessy/integration/writer/delta_writer/delta_writer_base.py,sha256=B7PwPHKrsJL0ZxBT-H9wWSy0gn7shqNDJ0AbrpMHyMg,10135
46
46
  cloe_nessy/integration/writer/delta_writer/exceptions.py,sha256=xPmGiYV0xQXauln5Oh34E5vbm0rVcs6xCh-SJSb2bw0,107
47
47
  cloe_nessy/logging/__init__.py,sha256=ySVCVbdyR3Dno_tl2ZfiER_7EVaDoQMHVkNyfdMZumY,65
48
48
  cloe_nessy/logging/logger_mixin.py,sha256=H8MyMEyb_kEDP0Ow5QStAFLuOkTIeUnneGaj916fKlU,7443
@@ -74,7 +74,7 @@ cloe_nessy/pipeline/pipeline_context.py,sha256=eCOcjyE16rGRom3L85Gy_BbncfQD6i1x3
74
74
  cloe_nessy/pipeline/pipeline_parsing_service.py,sha256=eeC4RbGBILGN6zkbUyjH-qGgEMtOWV4Kv_VxrHbHMY0,9021
75
75
  cloe_nessy/pipeline/pipeline_plotting_service.py,sha256=goMQj73FzUVchKn5c2SsPcWR6fr7DtVkVrcQfJsKCq4,13111
76
76
  cloe_nessy/pipeline/pipeline_step.py,sha256=oTnlvRpB0fbOBQXbPe1URstA5fv-97igCHt_41fKCAk,2082
77
- cloe_nessy/pipeline/actions/__init__.py,sha256=Yf6-EoF_iTXOIOhgMN-GwhqH5DeaogUklVulh9OVj4s,2902
77
+ cloe_nessy/pipeline/actions/__init__.py,sha256=FfAnSIl-0T6pnaWhClkDqV8nfTdvLvZZJdwycsZMLPw,2990
78
78
  cloe_nessy/pipeline/actions/read_api.py,sha256=MAc7QfmhnaRUMdE09Ywt41RSAsuW4co8zF0zXHwbM8U,16193
79
79
  cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=sx3dezd33c1FawMrxORwhK5GNo1IpjCyuLATWz7esZ0,6735
80
80
  cloe_nessy/pipeline/actions/read_excel.py,sha256=IG_VmDEt1TvGVEO0SY9Fm3awHNjfisR1_7DUmhC3NEE,7968
@@ -93,6 +93,7 @@ cloe_nessy/pipeline/actions/transform_group_aggregate.py,sha256=KUHeeP-RIDi34dpb
93
93
  cloe_nessy/pipeline/actions/transform_hash_columns.py,sha256=M5_wolJwzJpPTSrZq4yWV3TH7H6BGqbjJkJCwtqPlQo,8507
94
94
  cloe_nessy/pipeline/actions/transform_join.py,sha256=ez1M1wVc9khOZj1swMArJbBKXxEpjenUHrW1wL8H330,7200
95
95
  cloe_nessy/pipeline/actions/transform_json_normalize.py,sha256=petF7pnNq1EKc8MqVdG0weFALAHNILSe_eAu4Z5XxIo,4833
96
+ cloe_nessy/pipeline/actions/transform_regex_extract.py,sha256=vMtUW0s_oXy8DC1-4Xh-WQN3CCp8jXYsJiFYvGdYrqE,6390
96
97
  cloe_nessy/pipeline/actions/transform_rename_columns.py,sha256=4zJcPCONMU4C67qeuzsrX3AORRRHoq_selUI7FJyeg0,1952
97
98
  cloe_nessy/pipeline/actions/transform_replace_values.py,sha256=1OPHTrjcphfyGepcO7ozYfeqfwA18pjlyHpVKUS_AAU,2049
98
99
  cloe_nessy/pipeline/actions/transform_select_columns.py,sha256=-GhSEsb7iNnZIsYRm3BG9BX4_qUDJMbpj1DsKPY046w,4574
@@ -100,7 +101,7 @@ cloe_nessy/pipeline/actions/transform_union.py,sha256=SZtEzh567CIExUj9yMEgshE28h
100
101
  cloe_nessy/pipeline/actions/transform_with_column.py,sha256=c-E1yYkeYmovbN1maT7ImpdQlW0nYvYsHCtDvfe4wt8,3357
101
102
  cloe_nessy/pipeline/actions/write_catalog_table.py,sha256=FyC0scQU8Ul3Uigpk6IN2IJpf_4jRjAqF5yHtDVwG00,4852
102
103
  cloe_nessy/pipeline/actions/write_delta_append.py,sha256=e1g4mDhwAZdKyt4Gb7ZzHcQrJ1duSl8qOn6ONizRsoM,2934
103
- cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=fwinlTeZoDuTyrbln5vMu1UJ1LG8ZQrus3LoCVF__I4,5819
104
+ cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=kZL2PTIwB6Mj4UKg5f9SvU1VaakuYfFoymlcLf-L7dA,6443
104
105
  cloe_nessy/pipeline/actions/write_file.py,sha256=JZ8UZslxUn_ttYt5wDyvtHFq2FqYk3vOR8kvExJI8pk,3212
105
106
  cloe_nessy/pipeline/utils/__init__.py,sha256=xi02UjBMiXWD7b9gDvww4gyRyowb0eRd_6Wbu0F_cro,118
106
107
  cloe_nessy/pipeline/utils/delta_load_utils.py,sha256=KitMNruxePEkecI0h4Jint1JwJpaEog5mCOchMkgan8,1495
@@ -112,6 +113,6 @@ cloe_nessy/settings/settings.py,sha256=I4n129lrujriW-d8q4as2Kb4_kI932ModfZ5Ow_Up
112
113
  cloe_nessy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
113
114
  cloe_nessy/utils/column_names.py,sha256=dCNtm61mc5aLkY2oE4rlfN3VLCrpot6fOESjAZmCmhA,361
114
115
  cloe_nessy/utils/file_and_directory_handler.py,sha256=r2EVt9xG81p6ScaJCwETC5an6pMT6WseB0jMOR-JlpU,602
115
- cloe_nessy-1.0.0.dist-info/METADATA,sha256=jaztxDdomowmj0qTJ4i5jtF0dE_T5TmqA8j9zywl0U0,3291
116
- cloe_nessy-1.0.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
117
- cloe_nessy-1.0.0.dist-info/RECORD,,
116
+ cloe_nessy-1.0.3.dist-info/METADATA,sha256=fqBGuiBnOft_b6Q3yS_hxFPi5pqduBX7V7bBeXYwkvQ,3291
117
+ cloe_nessy-1.0.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
118
+ cloe_nessy-1.0.3.dist-info/RECORD,,