cloe-nessy 0.3.8__py3-none-any.whl → 0.3.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. cloe_nessy/integration/reader/api_reader.py +4 -2
  2. cloe_nessy/integration/reader/catalog_reader.py +6 -3
  3. cloe_nessy/integration/reader/excel_reader.py +1 -1
  4. cloe_nessy/integration/reader/file_reader.py +78 -5
  5. cloe_nessy/integration/writer/__init__.py +8 -1
  6. cloe_nessy/integration/writer/delta_writer/__init__.py +7 -0
  7. cloe_nessy/integration/writer/delta_writer/delta_append_writer.py +108 -0
  8. cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py +215 -0
  9. cloe_nessy/integration/writer/delta_writer/delta_table_operation_type.py +21 -0
  10. cloe_nessy/integration/writer/delta_writer/delta_writer_base.py +210 -0
  11. cloe_nessy/integration/writer/delta_writer/exceptions.py +4 -0
  12. cloe_nessy/integration/writer/file_writer.py +132 -0
  13. cloe_nessy/integration/writer/writer.py +54 -0
  14. cloe_nessy/models/adapter/unity_catalog_adapter.py +5 -1
  15. cloe_nessy/models/schema.py +1 -1
  16. cloe_nessy/models/table.py +32 -10
  17. cloe_nessy/models/volume.py +13 -4
  18. cloe_nessy/object_manager/table_manager.py +73 -19
  19. cloe_nessy/pipeline/actions/__init__.py +7 -1
  20. cloe_nessy/pipeline/actions/read_catalog_table.py +1 -4
  21. cloe_nessy/pipeline/actions/write_delta_append.py +69 -0
  22. cloe_nessy/pipeline/actions/write_delta_merge.py +118 -0
  23. cloe_nessy/pipeline/actions/write_file.py +94 -0
  24. {cloe_nessy-0.3.8.dist-info → cloe_nessy-0.3.10.dist-info}/METADATA +28 -4
  25. {cloe_nessy-0.3.8.dist-info → cloe_nessy-0.3.10.dist-info}/RECORD +27 -16
  26. {cloe_nessy-0.3.8.dist-info → cloe_nessy-0.3.10.dist-info}/WHEEL +1 -1
  27. {cloe_nessy-0.3.8.dist-info → cloe_nessy-0.3.10.dist-info}/top_level.txt +0 -0
@@ -48,9 +48,8 @@ def table_log_decorator(operation: str):
48
48
  def inner_decorator(func):
49
49
  @functools.wraps(func)
50
50
  def wrapper(self, *args, **kwargs):
51
- table_identifier = kwargs.get("table_identifier") or kwargs.get("table").identifier or args[0]
52
- if not isinstance(table_identifier, str):
53
- # assume its a Table object
51
+ table_identifier = kwargs.get("table_identifier") or kwargs.get("table") or args[0]
52
+ if isinstance(table_identifier, Table):
54
53
  table_identifier = table_identifier.identifier
55
54
  self._tabular_logger.info(
56
55
  "operation:%s | identifier:%s | status:start | error:''",
@@ -84,7 +83,6 @@ class TableManager(LoggerMixin):
84
83
 
85
84
  def __init__(self, tabular_logger: logging.Logger | None = None):
86
85
  self._spark = SessionManager.get_spark_session()
87
- self._utils = SessionManager.get_utils()
88
86
  self._console_logger = self.get_console_logger()
89
87
  self._console_logger.debug("TableManager initialized...")
90
88
  self._tabular_logger = tabular_logger or self.get_tabular_logger(**TableManagerLogs().__dict__)
@@ -115,51 +113,83 @@ class TableManager(LoggerMixin):
115
113
  if statement and statement != "\n":
116
114
  self._spark.sql(statement)
117
115
 
118
- def drop_table(self, table_identifier: str, delete_physical_data: bool = False):
116
+ def drop_table(
117
+ self,
118
+ table: Table | None = None,
119
+ storage_location: str | None = None,
120
+ table_identifier: str | None = None,
121
+ delete_physical_data: bool = False,
122
+ ):
119
123
  """Deletes a Table. For security reasons you are forced to pass the table_name.
120
124
 
121
125
  If delete_physical_data is True the actual physical data on the ADLS will be deleted.
122
126
  Use with caution!
123
127
 
124
128
  Args:
129
+ table: The Table object representing the Delta table.
130
+ storage_location: The location of the Delta table on the ADLS.
125
131
  table_identifier: The table identifier in the catalog. Must be in the format 'catalog.schema.table'.
126
132
  delete_physical_data: If set to True, deletes not only the metadata
127
133
  within the Catalog but also the physical data.
134
+
135
+ Raises:
136
+ ValueError: If neither table nor table_identifier is provided, or if both are provided.
137
+ ValueError: If the table storage path is not provided by the table object.
128
138
  """
129
139
  self._console_logger.info(f"Deleting table [ '{table_identifier}' ] ...")
130
- if not isinstance(table_identifier, str):
131
- raise NotImplementedError("table_identifier must be a string, can be a Table object in the future.")
132
-
140
+ if table is not None and (table_identifier is not None or storage_location is not None):
141
+ raise ValueError("Either table or table_identifier and storage_location must be provided, but not both.")
142
+ if table is not None:
143
+ table_identifier = table.identifier
144
+ storage_location = str(table.storage_path)
133
145
  if delete_physical_data:
134
- self._delete_physical_data()
135
- self.drop_table_from_catalog(table_identifier)
146
+ self._delete_physical_data(location=storage_location)
147
+ self.drop_table_from_catalog(table_identifier=table_identifier)
136
148
 
137
- def drop_table_from_catalog(self, table_identifier: str) -> None:
149
+ def drop_table_from_catalog(self, table_identifier: str | None = None, table: Table | None = None) -> None:
138
150
  """Removes a table from the catalog. Physical data is retained.
139
151
 
140
152
  Args:
141
153
  table_identifier: The table identifier in the catalog. Must be in the format 'catalog.schema.table'.
154
+ table: The Table object representing the Delta table.
155
+
156
+ Raises:
157
+ ValueError: If neither table nor table_identifier is provided, or if both are provided.
142
158
  """
159
+ if (table is None and table_identifier is None) or (table is not None and table_identifier is not None):
160
+ raise ValueError("Either table or table_identifier must be provided, but not both.")
161
+ if table is not None:
162
+ table_identifier = table.identifier
143
163
  self._console_logger.info(f"... deleting table [ '{table_identifier}' ] from Catalog.")
144
- if not isinstance(table_identifier, str):
145
- raise NotImplementedError("table_identifier must be a string, can be a Table object in the future.")
146
164
  self._spark.sql(f"DROP TABLE IF EXISTS {table_identifier};")
147
165
 
148
- def _delete_physical_data(self):
166
+ def _delete_physical_data(self, table: Table | None = None, location: str | None = None):
149
167
  """Removes the physical data on the ADLS for the location of this table.
150
168
 
169
+ Args:
170
+ table: The Table object representing the Delta table to be deleted.
171
+ location: The location of the Delta table to be deleted.
172
+
151
173
  Raises:
152
- NotImplementedError: This can be implemented, once a Table object is available.
174
+ ValueError: If neither table nor location is provided, or if both are provided.
175
+ ValueError: If the table storage path is not provided by the table object.
153
176
  """
154
- self._console_logger.info("... deleting physical data for table [ '' ] from Catalog.")
155
- raise NotImplementedError("This can be implemented, once a Table object is available.")
177
+ if (table is None and location is None) or (table is not None and location is not None):
178
+ raise ValueError("Either table or location must be provided, but not both.")
179
+ if table is not None:
180
+ if table.storage_path is None:
181
+ raise ValueError("Table storage path must be provided.")
182
+ location = str(table.storage_path)
183
+ SessionManager.get_utils().fs.rm(location, recurse=True)
184
+ self._console_logger.info("... deleting physical data.")
156
185
 
157
- def get_delta_table(self, table: Table | None = None, location: str | None = None) -> DeltaTable:
186
+ def get_delta_table(self, table: Table | None = None, location: str | None = None, spark=None) -> DeltaTable:
158
187
  """Get the DeltaTable object from the Table objects location or a location string.
159
188
 
160
189
  Args:
161
190
  table: A Table object representing the Delta table.
162
191
  location: A string representing the table location.
192
+ spark: An optional Spark session. If not provided, the current Spark session will be used.
163
193
 
164
194
  Returns:
165
195
  The DeltaTable object corresponding to the given Table object or location string.
@@ -173,7 +203,7 @@ class TableManager(LoggerMixin):
173
203
  if table is not None:
174
204
  location = str(table.storage_path)
175
205
  self._console_logger.info(f"Getting DeltaTable object for location: {location}")
176
- return DeltaTable.forPath(self._spark, str(location))
206
+ return DeltaTable.forPath(spark or self._spark, str(location))
177
207
 
178
208
  def table_exists(self, table: Table | None = None, table_identifier: str | None = None) -> bool:
179
209
  """Checks if a table exists in the catalog.
@@ -232,3 +262,27 @@ class TableManager(LoggerMixin):
232
262
 
233
263
  self._console_logger.info(f"Refreshing table: {table_identifier}")
234
264
  self._spark.sql(f"REFRESH TABLE {table_identifier};")
265
+
266
+ @table_log_decorator(operation="truncate")
267
+ def truncate_table(
268
+ self,
269
+ table: Table | None = None,
270
+ table_identifier: str | None = None,
271
+ ):
272
+ """Truncates a table.
273
+
274
+ Args:
275
+ table: A Table object representing the Delta table.
276
+ table_identifier: The identifier of the Delta table in the format 'catalog.schema.table'.
277
+
278
+ Raises:
279
+ ValueError: If neither table nor table_identifier is provided, or if both are provided.
280
+ """
281
+ if (table is None and table_identifier is None) or (table is not None and table_identifier is not None):
282
+ raise ValueError("Either table or table_identifier must be provided, but not both.")
283
+
284
+ if table is not None:
285
+ table_identifier = table.escaped_identifier
286
+
287
+ self._console_logger.info(f"Truncating table: {table_identifier}")
288
+ self._spark.sql(f"TRUNCATE TABLE {table_identifier};")
@@ -22,6 +22,9 @@ from .transform_replace_values import TransformReplaceValuesAction
22
22
  from .transform_select_columns import TransformSelectColumnsAction
23
23
  from .transform_union import TransformUnionAction
24
24
  from .write_catalog_table import WriteCatalogTableAction
25
+ from .write_delta_append import WriteDeltaAppendAction
26
+ from .write_delta_merge import WriteDeltaMergeAction
27
+ from .write_file import WriteFileAction
25
28
 
26
29
  # Get all subclasses of PipelineAction defined in this submodule
27
30
  pipeline_actions = {cls.name: cls for cls in PipelineAction.__subclasses__()}
@@ -36,7 +39,6 @@ __all__ = [
36
39
  "ReadExcelAction",
37
40
  "ReadFilesAction",
38
41
  "ReadMetadataYAMLAction",
39
- "WriteCatalogTableAction",
40
42
  "PipelineActionType",
41
43
  "TransformFilterAction",
42
44
  "TransformUnionAction",
@@ -52,5 +54,9 @@ __all__ = [
52
54
  "TransformRenameColumnsAction",
53
55
  "TransformReplaceValuesAction",
54
56
  "TransformSelectColumnsAction",
57
+ "WriteCatalogTableAction",
58
+ "WriteDeltaAppendAction",
59
+ "WriteDeltaMergeAction",
60
+ "WriteFileAction",
55
61
  "TransformHashColumnsAction",
56
62
  ]
@@ -61,8 +61,5 @@ class ReadCatalogTableAction(PipelineAction):
61
61
  raise ValueError("Table name must be specified or a valid Table object with identifier must be set.")
62
62
 
63
63
  table_reader = CatalogReader()
64
- df = table_reader.read(
65
- table_identifier=table_identifier,
66
- **options,
67
- )
64
+ df = table_reader.read(table_identifier=table_identifier, options=options)
68
65
  return context.from_existing(data=df)
@@ -0,0 +1,69 @@
1
+ from typing import Any
2
+
3
+ from ...integration.writer import DeltaAppendWriter
4
+ from ...models.adapter import UnityCatalogAdapter
5
+ from ...pipeline import PipelineAction, PipelineContext
6
+
7
+
8
+ class WriteDeltaAppendAction(PipelineAction):
9
+ """This class implements an Append action for an ETL pipeline.
10
+
11
+ The WriteDeltaAppendAction appends a Dataframe to Delta Table.
12
+
13
+ Returns:
14
+ None.
15
+ """
16
+
17
+ name: str = "WRITE_DELTA_APPEND"
18
+
19
+ def run(
20
+ self,
21
+ context: PipelineContext,
22
+ *,
23
+ table_identifier: str | None = None,
24
+ ignore_empty_df: bool = False,
25
+ options: dict[str, Any] | None = None,
26
+ **_: Any,
27
+ ) -> PipelineContext:
28
+ """Merge the dataframe into the delta table.
29
+
30
+ Args:
31
+ context: Context in which this Action is executed.
32
+ table_identifier: The identifier of the table. If passed, the
33
+ UC Adapter will be used to create a table object. Otherwise the Table
34
+ object will be created from the table metadata in the context.
35
+ ignore_empty_df: A flag indicating whether to ignore an empty source dataframe.
36
+ options: Additional options for the append writer.
37
+
38
+ Raises:
39
+ ValueError: If the table does not exist.
40
+ ValueError: If the data is not set in the pipeline context.
41
+ ValueError: If the table metadata is empty.
42
+
43
+ Returns:
44
+ Pipeline Context
45
+ """
46
+ delta_append_writer = DeltaAppendWriter()
47
+
48
+ if context.data is None:
49
+ raise ValueError("Data is required for the append operation.")
50
+ if context.table_metadata is None and table_identifier is None:
51
+ raise ValueError("Table metadata or a table identifier are required for the append operation.")
52
+
53
+ if table_identifier is not None:
54
+ context.table_metadata = UnityCatalogAdapter().get_table_by_name(table_identifier)
55
+ else:
56
+ if context.table_metadata is None:
57
+ raise ValueError("Table metadata is required.")
58
+
59
+ if context.table_metadata is None:
60
+ raise ValueError("Table metadata is required.")
61
+
62
+ delta_append_writer.write(
63
+ table_identifier=context.table_metadata.identifier,
64
+ table_location=context.table_metadata.storage_path,
65
+ data_frame=context.data,
66
+ ignore_empty_df=ignore_empty_df,
67
+ options=options,
68
+ )
69
+ return context.from_existing()
@@ -0,0 +1,118 @@
1
+ from typing import Any
2
+
3
+ from ...integration.writer import DeltaMergeWriter
4
+ from ...models.adapter import UnityCatalogAdapter
5
+ from ...pipeline import PipelineAction, PipelineContext
6
+
7
+
8
+ class WriteDeltaMergeAction(PipelineAction):
9
+ """This class implements a Merge action for an ETL pipeline.
10
+
11
+ The MergeIntoDeltaAction merges a Dataframe to Delta Table.
12
+
13
+ Returns:
14
+ None.
15
+ """
16
+
17
+ name: str = "WRITE_DELTA_MERGE"
18
+
19
+ def run(
20
+ self,
21
+ context: PipelineContext,
22
+ *,
23
+ table_identifier: str | None = None,
24
+ key_columns: list[str] | None = None,
25
+ cols_to_update: list[str] | None = None,
26
+ cols_to_insert: list[str] | None = None,
27
+ cols_to_exclude: list[str] | None = None,
28
+ when_matched_update: bool = True,
29
+ when_matched_deleted: bool = False,
30
+ when_not_matched_insert: bool = True,
31
+ use_partition_pruning: bool = True,
32
+ ignore_empty_df: bool = False,
33
+ create_if_not_exists: bool = True,
34
+ refresh_table: bool = True,
35
+ **_: Any,
36
+ ) -> PipelineContext:
37
+ """Merge the dataframe into the delta table.
38
+
39
+ Args:
40
+ context: Context in which this Action is executed.
41
+ table_identifier: The identifier of the table. If passed, the
42
+ UC Adapter will be used to create a table object. Otherwise the Table
43
+ object will be created from the table metadata in the context.
44
+ key_columns: List of column names that form the
45
+ key for the merge operation.
46
+ when_matched_update: Flag to specify whether to
47
+ perform an update operation whenmatching records are found in
48
+ the target Delta table.
49
+ when_matched_deleted: Flag to specify whether to
50
+ perform a delete operation when matching records are found in
51
+ the target Delta table.
52
+ when_not_matched_insert: Flag to specify whether to perform an
53
+ insert operation when matching records are not found in the target
54
+ Delta table.
55
+ cols_to_update: List of column names to be
56
+ updated in the target Delta table.
57
+ cols_to_insert: List of column names to be
58
+ inserted into the target Delta table.
59
+ cols_to_exclude: List of column names to be
60
+ excluded from the merge operation.
61
+ use_partition_pruning: Flag to specify whether to use partition
62
+ pruning to optimize the performance of the merge operation.
63
+ ignore_empty_df: A flag indicating whether to ignore an empty source dataframe.
64
+ create_if_not_exists: Create the table if it not exists.
65
+ refresh_table: Refresh the table after the transaction.
66
+
67
+ Raises:
68
+ ValueError: If the table does not exist.
69
+ ValueError: If the data is not set in the pipeline context.
70
+ ValueError: If the table metadata is empty.
71
+
72
+ Returns:
73
+ Pipeline Context
74
+ """
75
+ delta_merge_writer = DeltaMergeWriter()
76
+
77
+ if context.data is None:
78
+ raise ValueError("Data is required for the merge operation.")
79
+ if context.table_metadata is None and table_identifier is None:
80
+ raise ValueError("Table metadata or a table identifier are required for the merge operation.")
81
+
82
+ if table_identifier is not None:
83
+ context.table_metadata = UnityCatalogAdapter().get_table_by_name(table_identifier)
84
+ else:
85
+ if context.table_metadata is None:
86
+ raise ValueError("Table metadata is required.")
87
+
88
+ if context.table_metadata is None:
89
+ raise ValueError("Table metadata is required.")
90
+
91
+ if create_if_not_exists:
92
+ delta_merge_writer.table_manager.create_table(table=context.table_metadata, ignore_if_exists=True)
93
+
94
+ if not delta_merge_writer.table_manager.table_exists(context.table_metadata):
95
+ raise ValueError(f"Table {context.table_metadata.name} does not exist.")
96
+
97
+ assert key_columns is not None, "Key columns must be provided."
98
+
99
+ delta_merge_writer.write(
100
+ table_identifier=context.table_metadata.identifier,
101
+ storage_path=str(context.table_metadata.storage_path),
102
+ data_frame=context.data,
103
+ key_columns=key_columns,
104
+ cols_to_update=cols_to_update,
105
+ cols_to_insert=cols_to_insert,
106
+ cols_to_exclude=cols_to_exclude,
107
+ when_matched_update=when_matched_update,
108
+ when_matched_deleted=when_matched_deleted,
109
+ when_not_matched_insert=when_not_matched_insert,
110
+ use_partition_pruning=use_partition_pruning,
111
+ partition_by=context.table_metadata.partition_by,
112
+ ignore_empty_df=ignore_empty_df,
113
+ )
114
+
115
+ if refresh_table:
116
+ delta_merge_writer.table_manager.refresh_table(table_identifier=context.table_metadata.identifier)
117
+
118
+ return context.from_existing()
@@ -0,0 +1,94 @@
1
+ from typing import Any
2
+
3
+ from ...integration.writer import FileWriter
4
+ from ...pipeline import PipelineAction, PipelineContext
5
+
6
+
7
+ class WriteFileAction(PipelineAction):
8
+ """This class implements a Write action for an ETL pipeline.
9
+
10
+ The WriteFileAction writes a Dataframe to a storage location defined in the
11
+ options using the [`FileWriter`][cloe_nessy.integration.writer.FileWriter] class.
12
+
13
+ Example:
14
+ ```yaml
15
+ Write to File:
16
+ action: WRITE_FILE
17
+ options:
18
+ path: "path/to/location"
19
+ format: "parquet"
20
+ partition_cols: ["date"]
21
+ mode: "append"
22
+ is_stream: False
23
+ options:
24
+ mergeSchema: "true"
25
+ ```
26
+ """
27
+
28
+ name: str = "WRITE_FILE"
29
+
30
+ def run(
31
+ self,
32
+ context: PipelineContext,
33
+ *,
34
+ path: str = "",
35
+ format: str = "delta",
36
+ partition_cols: list[str] | None = None,
37
+ mode: str = "append",
38
+ is_stream: bool = False,
39
+ options: dict[str, str] | None = None,
40
+ **_: Any,
41
+ ) -> PipelineContext:
42
+ """Writes a file to a location.
43
+
44
+ Args:
45
+ context: Context in which this Action is executed.
46
+ path: Location to write data to.
47
+ format: Format of files to write.
48
+ partition_cols: Columns to partition on. If None, the writer will try to get the partition
49
+ columns from the metadata. Default None.
50
+ mode: Specifies the behavior when data or table already exists.
51
+ is_stream: If True, use the `write_stream` method of the writer.
52
+ options: Additional options passed to the writer.
53
+
54
+ Raises:
55
+ ValueError: If no path is provided.
56
+ ValueError: If the table metadata is empty.
57
+
58
+ Returns:
59
+ Pipeline Context
60
+ """
61
+ if not path:
62
+ raise ValueError("No path provided. Please specify path to write data to.")
63
+ if not options:
64
+ options = {}
65
+
66
+ if context.data is None:
67
+ raise ValueError("Data context is required for the operation.")
68
+
69
+ if partition_cols is None:
70
+ if context.table_metadata is None:
71
+ partition_cols = []
72
+ else:
73
+ partition_cols = context.table_metadata.partition_by
74
+ writer = FileWriter()
75
+ if not is_stream:
76
+ writer.write(
77
+ data_frame=context.data,
78
+ location=path,
79
+ format=format,
80
+ partition_cols=partition_cols,
81
+ mode=mode,
82
+ options=options,
83
+ )
84
+ else:
85
+ writer.write_stream(
86
+ data_frame=context.data,
87
+ location=path,
88
+ format=format,
89
+ mode=mode,
90
+ partition_cols=partition_cols,
91
+ options=options,
92
+ )
93
+
94
+ return context.from_existing()
@@ -1,9 +1,10 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cloe-nessy
3
- Version: 0.3.8
3
+ Version: 0.3.10
4
4
  Summary: Your friendly datalake monster.
5
5
  Author-email: initions <ICSMC_EXT_PYPIORG@accenture.com>
6
6
  License: MIT
7
+ Project-URL: homepage, https://initions.com/
7
8
  Classifier: Development Status :: 5 - Production/Stable
8
9
  Classifier: Environment :: Console
9
10
  Classifier: License :: OSI Approved :: MIT License
@@ -28,13 +29,13 @@ Requires-Dist: databricks-sdk<1.0.0,>=0.36.0
28
29
  Requires-Dist: networkx<4.0,>=3.3
29
30
  Requires-Dist: matplotlib<4.0.0,>=3.9.2
30
31
  Requires-Dist: types-networkx<4.0.0.0,>=3.2.1.20240820
31
- Requires-Dist: fsspec<2025.0.0,>=2024.9.0
32
+ Requires-Dist: fsspec<2025.6.0,>=2025.5.1
32
33
  Requires-Dist: cloe-logging[databricks,log-analytics]<0.4,>=0.3.7
33
34
 
34
35
  # cloe-nessy
35
36
 
36
37
  [![Copier](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/copier-org/copier/master/img/badge/badge-grayscale-inverted-border-orange.json)](https://github.com/copier-org/copier)
37
- [![python](https://img.shields.io/badge/Python-3.12-3776AB.svg?style=flat&logo=python&logoColor=white)](https://www.python.org)
38
+ [![python](https://img.shields.io/badge/Python-3.11-3776AB.svg?style=flat&logo=python&logoColor=white)](https://www.python.org)
38
39
  [![uv](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/uv/main/assets/badge/v0.json)](https://github.com/astral-sh/uv)
39
40
  [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/charliermarsh/ruff/main/assets/badge/v0.json)](https://github.com/charliermarsh/ruff)
40
41
  [![Checked with mypy](https://www.mypy-lang.org/static/mypy_badge.svg)](https://mypy-lang.org/)
@@ -43,4 +44,27 @@ Requires-Dist: cloe-logging[databricks,log-analytics]<0.4,>=0.3.7
43
44
 
44
45
  Owner: initions
45
46
 
46
- Your friendly datalake monster.
47
+ Nessy is diving deep into Data, unleashing insights with ease.
48
+
49
+ ## Introduction
50
+
51
+ "Nessy" is a comprehensive Python datalake framework that provides a seamless,
52
+ efficient, and user-friendly platform for managing and analyzing datasets using
53
+ pyspark. Its advanced features allow operations on each level of an
54
+ Extract-Transform-Load (ETL) Workflow.
55
+
56
+ ## Contributing
57
+
58
+ When you are contributing, please refer to our Contribution Guide in the *nessy*
59
+ Docs
60
+ [here](https://white-rock-0cabbc003.1.azurestaticapps.net/tool_docs/nessy/Developer-Guide/)!
61
+
62
+ ## Usage
63
+
64
+ Please find the User Guide
65
+ [here](https://white-rock-0cabbc003.1.azurestaticapps.net/tool_docs/nessy/User-Guide/)!
66
+
67
+ ## Contact
68
+
69
+ Please reach out to the *nessy* Team for any questions around this package and
70
+ repository.
@@ -18,14 +18,22 @@ cloe_nessy/file_utilities/strategies/onelake_strategy.py,sha256=RnQjWtWIFzFj-zPq
18
18
  cloe_nessy/file_utilities/strategies/utils_strategy.py,sha256=urayKfOUpSaXKgTs1KVK0TS7FWVrJ3k4OLKh35sCxAU,3194
19
19
  cloe_nessy/integration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
20
  cloe_nessy/integration/reader/__init__.py,sha256=J5vlORqHLBpHEvzIwfIjzN5xEdOat-8jlmdLcGj8nsA,239
21
- cloe_nessy/integration/reader/api_reader.py,sha256=j3Z5O1oH-Zc43TyA_aYtnDNYC9xFMxMqDsRQWtEZGD8,5636
22
- cloe_nessy/integration/reader/catalog_reader.py,sha256=tGK-Y0jZQGOrF9eZUzSr7ils-L58uex6qH9PZ81ZLy8,1835
23
- cloe_nessy/integration/reader/excel_reader.py,sha256=4kifpIakHpGmap0-P0SUgjJoQdY-eeiZBIDrQp87wK8,8012
21
+ cloe_nessy/integration/reader/api_reader.py,sha256=3Mf-txOTJ1dXCzdNtRTLC8UKftKms4NxOoLVgzcc2eo,5691
22
+ cloe_nessy/integration/reader/catalog_reader.py,sha256=lwDeWBVXfFh75XknPawetL9ZBtqS-Oss5rNzbrEeIQg,2070
23
+ cloe_nessy/integration/reader/excel_reader.py,sha256=8KCqKBYFE6RGCiahJimQOAtbYZzaUzlnoslW9yca5P8,8035
24
24
  cloe_nessy/integration/reader/exceptions.py,sha256=_A9jFpe_RIDZCGY76qzjic9bsshxns6yXPSl141dq1c,203
25
- cloe_nessy/integration/reader/file_reader.py,sha256=1os8pZIXAGTJBZjGREmHOTlZeabbikC7sDv5xn3bIjE,3950
25
+ cloe_nessy/integration/reader/file_reader.py,sha256=3DcZhyyL-Cf_R7Px1UDHJwpO8Un31dWey2Q-f4DtWfY,6879
26
26
  cloe_nessy/integration/reader/reader.py,sha256=e2KVPePQme8SBQJEbL-3zpGasOgTiEvKFTslow2wGPw,1034
27
- cloe_nessy/integration/writer/__init__.py,sha256=NIh0t1RYlG3J1Y5_CvnR36N9tISmcElD5Tq06ksmqoA,71
27
+ cloe_nessy/integration/writer/__init__.py,sha256=3yzCAGiWZdQWtsbzlTih01sxVTJV2DDYwvl34lEAUlE,243
28
28
  cloe_nessy/integration/writer/catalog_writer.py,sha256=Gb-hMdADgO_uUJ7mZPHBYyNme2qXsdFFnzwo7GcShHM,2192
29
+ cloe_nessy/integration/writer/file_writer.py,sha256=SUDbN13ZzDhbM8DpOGFgM_Gkg70To4L6Q182pXx2HRM,5454
30
+ cloe_nessy/integration/writer/writer.py,sha256=elFPLFrWR-qVE9qnBtzzzhyRALLQcRVuOsPS0rNmRt4,1741
31
+ cloe_nessy/integration/writer/delta_writer/__init__.py,sha256=h2CT6Hllmk0nodlek27uqwniCzVZKMkYcPGyG9K2Z24,164
32
+ cloe_nessy/integration/writer/delta_writer/delta_append_writer.py,sha256=TbpW-j87_H9dcUza34uR6VWslJez406y3_5N1ip0SnM,4740
33
+ cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py,sha256=no2GOLqMAJd0fEy2mqMevMj_CvutcJPRmXJC2tD4icA,10112
34
+ cloe_nessy/integration/writer/delta_writer/delta_table_operation_type.py,sha256=kiacqQ2FYQSzakJqZ9-ZHH3os4X7--QuER_2xx9y21k,971
35
+ cloe_nessy/integration/writer/delta_writer/delta_writer_base.py,sha256=upUtDZMzwYFU0kzmkelVgkpFToXkrypcR3h_jvGjz14,8596
36
+ cloe_nessy/integration/writer/delta_writer/exceptions.py,sha256=xPmGiYV0xQXauln5Oh34E5vbm0rVcs6xCh-SJSb2bw0,107
29
37
  cloe_nessy/logging/__init__.py,sha256=ySVCVbdyR3Dno_tl2ZfiER_7EVaDoQMHVkNyfdMZumY,65
30
38
  cloe_nessy/logging/logger_mixin.py,sha256=9iy7BF6drYme-f7Rrt_imbVBRgVqQ89xjcP1X5aMtfY,7467
31
39
  cloe_nessy/models/__init__.py,sha256=-FmWEJ1Oq1njSopjc0R7GmT64mLSmALkm8PkHNzy9Y8,327
@@ -33,17 +41,17 @@ cloe_nessy/models/catalog.py,sha256=ayC1sMp4cNLAZtu0ICVV3Us6-o4hn8U9tpzzvxC9RAs,
33
41
  cloe_nessy/models/column.py,sha256=53fBwRnino72XKACsHZpN9QfCBqqSXyKLHZlM0huumg,1988
34
42
  cloe_nessy/models/constraint.py,sha256=hsFlhn4n928z81O3dl3v5bMetewPWzMjkJK3_4kASSM,178
35
43
  cloe_nessy/models/foreign_key.py,sha256=DwRVHs9sShqqPV-NL7ow_3AmPPWX0Od26yZn_I565pU,1001
36
- cloe_nessy/models/schema.py,sha256=DHlJJ4JA8392vVrFatH22GoPOCJN-eqkmHVihIHpPCg,3382
37
- cloe_nessy/models/table.py,sha256=W9gqMTCQBRo3Z_rLY5cJ8WgPEUEslNLbaRrODuTy15Y,11744
44
+ cloe_nessy/models/schema.py,sha256=yUrjjEhAH5zbCymE67Az_jPnVB8hGO-_UNfqzeZCD_Y,3376
45
+ cloe_nessy/models/table.py,sha256=1N79hc79uJbNw5tHuoQAhLLS6y-9TFx5LIQT-C1X-wU,12075
38
46
  cloe_nessy/models/types.py,sha256=XRbuJGdTNa6aXyE3IAzs_J9gVjbfkzMDLfGl-k6jI_4,223
39
- cloe_nessy/models/volume.py,sha256=7_lH4X-Au8jYNRVFQ5F2x-fRy2J8Z5-cLTn3Z3mpLQs,2197
47
+ cloe_nessy/models/volume.py,sha256=51BE06FrL1Wv6zblFwJ_HTiR6WQqH7pSmrdH90rqwLg,2444
40
48
  cloe_nessy/models/adapter/__init__.py,sha256=m36W_mqwB3dCYnCIt0fLOSHS4E1VU8FRGoaum4Gf95o,90
41
- cloe_nessy/models/adapter/unity_catalog_adapter.py,sha256=T5v8gN57vRj9OLrfj8N4TEsH_Z_5N38wTqu_7rYgXb0,12572
49
+ cloe_nessy/models/adapter/unity_catalog_adapter.py,sha256=a-14Ys-AevVYQd0xeJU1syLxjT5Wzo4uog1hFSEs76M,12651
42
50
  cloe_nessy/models/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
43
51
  cloe_nessy/models/mixins/read_instance_mixin.py,sha256=j5Y4aNWOh1jlskEaxNooZFJgPyxRmik00gAVLJnAaRs,4507
44
52
  cloe_nessy/models/mixins/template_loader_mixin.py,sha256=5MXhEGBFlq3dwZvINEyBowSlipNnVun2H_TmhI_fsS4,549
45
53
  cloe_nessy/object_manager/__init__.py,sha256=3sle0vNpPwBOkycxA3XVS9m4XZf5LD3Qd4NGxdqcHno,186
46
- cloe_nessy/object_manager/table_manager.py,sha256=B_00UXaEQQHTAsyZGivhW3TQQpejC3obljAi_IwyFNA,9993
54
+ cloe_nessy/object_manager/table_manager.py,sha256=oYcYiZR0-JyoadcCcDelxfFb-ATeKDIZerYaZc-moiI,12634
47
55
  cloe_nessy/object_manager/volume_manager.py,sha256=6epd3KXzcNH04EvaKubAfLsaUm9qBMeT3KNvMK04gGs,2727
48
56
  cloe_nessy/pipeline/__init__.py,sha256=sespmJ5JsgyiFyZiedTiL2kg--zGIX7cjTYsD5vemEg,325
49
57
  cloe_nessy/pipeline/pipeline.py,sha256=-1tJVs9rZf8CcwieH4IP7mqJZ6mL7bQUZ56TNKt8eO8,11154
@@ -52,9 +60,9 @@ cloe_nessy/pipeline/pipeline_config.py,sha256=BN3ZSbr6bC-X9edoh-n5vRfPHFMbgtAU7m
52
60
  cloe_nessy/pipeline/pipeline_context.py,sha256=csElDc6BsynDUtRXgQOSCH7ONc_b-ag0YEg0zlQTz58,1874
53
61
  cloe_nessy/pipeline/pipeline_parsing_service.py,sha256=c_nAsgw81QYBM9AFiTxGgqRhNXABkDKplbeoCJPtbpE,6434
54
62
  cloe_nessy/pipeline/pipeline_step.py,sha256=UlnmpS6gm_dZ7m9dD1mZvye7mvUF_DA7HjOZo0oGYDU,1977
55
- cloe_nessy/pipeline/actions/__init__.py,sha256=dvIaWVR-L3IQkxMI-TIazn2udeFGsKmhUxw9E0VTz0g,2370
63
+ cloe_nessy/pipeline/actions/__init__.py,sha256=9gjSQKLGrPcaYaJrTYZde8d4yNrN1SoXN_DDHq5KrvY,2600
56
64
  cloe_nessy/pipeline/actions/read_api.py,sha256=RBv5XeHtjTXuCP09Fqo6JNx6iIhQQI-nuAHCuSaGs2s,7778
57
- cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=-k2wezkv8bE_xwoW7WM1ORhrCXQagKTUuXkhI2ZEROs,2783
65
+ cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=oXbqbc6BfR82dSIGclwzWiTN8EVmpFjNIYLKm4qOU50,2754
58
66
  cloe_nessy/pipeline/actions/read_excel.py,sha256=Mhl3r_2Hqk2XN7Fl5WqqAyE4JdnwSiivbhWMglyBtkE,7961
59
67
  cloe_nessy/pipeline/actions/read_files.py,sha256=N9bFgtG1tovhp2JayxE5YiN9PiO2lgG2-6h_Y6tD2eU,5220
60
68
  cloe_nessy/pipeline/actions/read_metadata_yaml.py,sha256=3ZDy9qiDYtM1oDQzHPC23hLOvHjhdk5zg1wVHE60m9k,2295
@@ -75,13 +83,16 @@ cloe_nessy/pipeline/actions/transform_replace_values.py,sha256=1OPHTrjcphfyGepcO
75
83
  cloe_nessy/pipeline/actions/transform_select_columns.py,sha256=-GhSEsb7iNnZIsYRm3BG9BX4_qUDJMbpj1DsKPY046w,4574
76
84
  cloe_nessy/pipeline/actions/transform_union.py,sha256=s81Vge0AbYPc7VkskCYfOQ_LEjqcmfNFyDkytfjcZyo,2720
77
85
  cloe_nessy/pipeline/actions/write_catalog_table.py,sha256=j7gRuG3Fedh8JgevIFBbHKock3laJVq4l6Mx3CGU5eo,2676
86
+ cloe_nessy/pipeline/actions/write_delta_append.py,sha256=fuL29SK9G5K14ycckU3iPexeK0XNXUfQscCwhXHxbKA,2498
87
+ cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=Hir7QZZZJ9hmQZXiJ9iz6u06OCmcHFpyKFVB_I1saSM,5043
88
+ cloe_nessy/pipeline/actions/write_file.py,sha256=H8LRst045yij-8XJ5pRB9m5d1lZpZjFa0WSVdSFesPo,2984
78
89
  cloe_nessy/session/__init__.py,sha256=t7_YjUhJYW3km_FrucaUdbIl1boQtwkyhw_8yE10qzc,74
79
90
  cloe_nessy/session/session_manager.py,sha256=f4OeeyGD3becDQGkdDbck3jVH9ulOCBWjW6Jaj_MIrc,7765
80
91
  cloe_nessy/settings/__init__.py,sha256=ZbkneO3WaKOxon7qHFHnou7EnBOSnBFyKMDZblIEvzM,101
81
92
  cloe_nessy/settings/settings.py,sha256=I4n129lrujriW-d8q4as2Kb4_kI932ModfZ5Ow_UpVM,3653
82
93
  cloe_nessy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
83
94
  cloe_nessy/utils/file_and_directory_handler.py,sha256=r2EVt9xG81p6ScaJCwETC5an6pMT6WseB0jMOR-JlpU,602
84
- cloe_nessy-0.3.8.dist-info/METADATA,sha256=fnnrNC7Vo8REfpUO5hdflH0_zaZAF437CyRrUuCM8dI,2374
85
- cloe_nessy-0.3.8.dist-info/WHEEL,sha256=wXxTzcEDnjrTwFYjLPcsW_7_XihufBwmpiBeiXNBGEA,91
86
- cloe_nessy-0.3.8.dist-info/top_level.txt,sha256=Z7izn8HmQpg2wBUb-0jzaKlYKMU7Ypzuc9__9vPtW_I,11
87
- cloe_nessy-0.3.8.dist-info/RECORD,,
95
+ cloe_nessy-0.3.10.dist-info/METADATA,sha256=NBGGYODGPrVIhK3HBYkRSCUkd3tvBnU0AsYqB2j90Js,3162
96
+ cloe_nessy-0.3.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
97
+ cloe_nessy-0.3.10.dist-info/top_level.txt,sha256=Z7izn8HmQpg2wBUb-0jzaKlYKMU7Ypzuc9__9vPtW_I,11
98
+ cloe_nessy-0.3.10.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.1.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5